ROSE  0.9.9.109
DisassemblerX86.h
1 /* Disassembly specific to the x86 architecture. */
2 
3 #ifndef ROSE_DISASSEMBLER_X86_H
4 #define ROSE_DISASSEMBLER_X86_H
5 
6 #include "Disassembler.h"
7 #include "InstructionEnumsX86.h"
8 #include "Cxx_GrammarSerialization.h"
9 
10 #include <boost/serialization/access.hpp>
11 #include <boost/serialization/base_object.hpp>
12 #include <boost/serialization/export.hpp>
13 #include <boost/serialization/split_member.hpp>
14 
15 namespace Rose {
16 namespace BinaryAnalysis {
17 
21  /* Per-disassembler settings; see init() */
22  X86InstructionSize insnSize;
24  /* Per-instruction settings; see startInstruction() */
25  uint64_t ip;
26  SgUnsignedCharList insnbuf;
27  size_t insnbufat;
29  /* Temporary flags set by the instruction; initialized by startInstruction() */
30  X86SegmentRegister segOverride;
31  X86BranchPrediction branchPrediction; /*FIXME: this seems to set only to x86_branch_prediction_true [RPM 2009-06-16] */
32  bool branchPredictionEnabled;
33  bool rexPresent, rexW, rexR, rexX, rexB;
34  bool sizeMustBe64Bit;
35  bool operandSizeOverride;
36  bool addressSizeOverride;
37  bool lock;
38  X86RepeatPrefix repeatPrefix;
39  bool modregrmByteSet;
40  uint8_t modregrmByte;
41  uint8_t modeField;
42  uint8_t regField;
43  uint8_t rmField;
44  SgAsmExpression *modrm;
45  SgAsmExpression *reg;
46  bool isUnconditionalJump;
47  size_t wordSize;
49  // Serialization
52 #ifdef ROSE_HAVE_BOOST_SERIALIZATION_LIB
53 private:
54  friend class boost::serialization::access;
55 
56  template<class S>
57  void serialize_common(S &s, const unsigned version) {
58  // Most of the data members don't need to be saved because we'll only save/restore disassemblers that are between
59  // instructions (we never save one while it's processing an instruction). Therefore, most of the data members can be
60  // constructed in their initial state by a combination of default constructor and init().
61  s & BOOST_SERIALIZATION_BASE_OBJECT_NVP(Disassembler);
62  s & BOOST_SERIALIZATION_NVP(wordSize);
63  }
64 
65  template<class S>
66  void save(S &s, const unsigned version) const {
67  serialize_common(s, version);
68  }
69 
70  template<class S>
71  void load(S &s, const unsigned version) {
72  serialize_common(s, version);
73  init(wordSize);
74  }
75 
76  BOOST_SERIALIZATION_SPLIT_MEMBER();
77 #endif
78 
80  // Constructors
82 
83 protected:
84  // Default constructor for serialization
86  : insnSize(x86_insnsize_none), ip(0), insnbufat(0), segOverride(x86_segreg_none),
87  branchPrediction(x86_branch_prediction_none), branchPredictionEnabled(false), rexPresent(false), rexW(false),
88  rexR(false), rexX(false), rexB(false), sizeMustBe64Bit(false), operandSizeOverride(false), addressSizeOverride(false),
89  lock(false), repeatPrefix(x86_repeat_none), modregrmByteSet(false), modregrmByte(0), modeField(0), rmField(0),
90  modrm(NULL), reg(NULL), isUnconditionalJump(false) {}
91 
92 public:
93  explicit DisassemblerX86(size_t wordsize)
94  : insnSize(x86_insnsize_none), ip(0), insnbufat(0), segOverride(x86_segreg_none),
95  branchPrediction(x86_branch_prediction_none), branchPredictionEnabled(false), rexPresent(false), rexW(false),
96  rexR(false), rexX(false), rexB(false), sizeMustBe64Bit(false), operandSizeOverride(false), addressSizeOverride(false),
97  lock(false), repeatPrefix(x86_repeat_none), modregrmByteSet(false), modregrmByte(0), modeField(0), rmField(0),
98  modrm(NULL), reg(NULL), isUnconditionalJump(false) {
99  init(wordsize);
100  }
101 
102  virtual ~DisassemblerX86() {}
103 
104  virtual DisassemblerX86 *clone() const ROSE_OVERRIDE { return new DisassemblerX86(*this); }
105 
107  // Public methods
109 public:
110  virtual bool canDisassemble(SgAsmGenericHeader*) const ROSE_OVERRIDE;
111 
112  virtual Unparser::BasePtr unparser() const ROSE_OVERRIDE;
113 
114  virtual SgAsmInstruction *disassembleOne(const MemoryMap::Ptr &map, rose_addr_t start_va,
115  AddressSet *successors=NULL) ROSE_OVERRIDE;
116 
117  virtual SgAsmInstruction *makeUnknownInstruction(const Exception&) ROSE_OVERRIDE;
118 
119 
120  /*========================================================================================================================
121  * Data types
122  *========================================================================================================================*/
123 private:
124 
128  class ExceptionX86: public Exception {
129  public:
130  ExceptionX86(const std::string &mesg, const DisassemblerX86 *d)
131  : Exception(mesg, d->ip, d->insnbuf, 8*d->insnbufat)
132  {}
133  ExceptionX86(const std::string &mesg, const DisassemblerX86 *d, size_t bit)
134  : Exception(mesg, d->ip, d->insnbuf, bit)
135  {}
136  };
137 
139  enum RegisterMode {
140  rmLegacyByte, rmRexByte, rmWord, rmDWord, rmQWord, rmSegment, rmST, rmMM, rmXMM, rmControl, rmDebug, rmReturnNull
141  };
142 
143  /* MMX registers? See mmPrefix method */
144  enum MMPrefix {
145  mmNone, mmF3, mm66, mmF2
146  };
147 
148 
149  /*========================================================================================================================
150  * Methods for reading and writing bytes of the instruction. These keep track of how much has been read or written.
151  *========================================================================================================================*/
152 private:
153 
157  uint8_t getByte();
158 
162  uint16_t getWord();
163 
167  uint32_t getDWord();
168 
172  uint64_t getQWord();
173 
174  /*========================================================================================================================
175  * Miscellaneous helper methods
176  *========================================================================================================================*/
177 private:
180  SgAsmExpression *currentDataSegment() const;
181 
185  X86InstructionSize effectiveAddressSize() const;
186 
188  RegisterMode effectiveOperandMode() const {
189  return sizeToMode(effectiveOperandSize());
190  }
191 
195  X86InstructionSize effectiveOperandSize() const;
196 
198  SgAsmType *effectiveOperandType() const {
199  return sizeToType(effectiveOperandSize());
200  }
201 
203  bool longMode() const {
204  return insnSize == x86_insnsize_64;
205  }
206 
207  /* FIXME: documentation? */
208  MMPrefix mmPrefix() const;
209 
211  void not64() const {
212  if (longMode())
213  throw ExceptionX86("not valid for 64-bit code", this);
214  }
215 
218  void setRex(uint8_t prefix);
219 
221  static RegisterMode sizeToMode(X86InstructionSize);
222 
225  static SgAsmType *sizeToType(X86InstructionSize s);
226 
227 
228 
229  /*========================================================================================================================
230  * Methods that construct something. (Their names all start with "make".)
231  *========================================================================================================================*/
232 private:
233 
236  SgAsmExpression *makeAddrSizeValue(int64_t val, size_t bit_offset, size_t bit_size);
237 
242  SgAsmX86Instruction *makeInstruction(X86InstructionKind kind, const std::string &mnemonic,
243  SgAsmExpression *op1=NULL, SgAsmExpression *op2=NULL,
244  SgAsmExpression *op3=NULL, SgAsmExpression *op4=NULL);
245 
248 
249  /* FIXME: documentation? */
250  SgAsmRegisterReferenceExpression *makeOperandRegisterByte(bool rexExtension, uint8_t registerNumber);
251 
252  /* FIXME: documentation? */
253  SgAsmRegisterReferenceExpression *makeOperandRegisterFull(bool rexExtension, uint8_t registerNumber);
254 
257  SgAsmRegisterReferenceExpression *makeRegister(uint8_t fullRegisterNumber, RegisterMode,
258  SgAsmType *registerType=NULL) const;
259 
260  /* FIXME: documentation? */
261  SgAsmRegisterReferenceExpression *makeRegisterEffective(uint8_t fullRegisterNumber) {
262  return makeRegister(fullRegisterNumber, effectiveOperandMode());
263  }
264 
265  /* FIXME: documentation? */
266  SgAsmRegisterReferenceExpression *makeRegisterEffective(bool rexExtension, uint8_t registerNumber) {
267  return makeRegister(registerNumber + (rexExtension ? 8 : 0), effectiveOperandMode());
268  }
269 
271  SgAsmExpression *makeSegmentRegister(X86SegmentRegister so, bool insn64) const;
272 
273 
274 
275  /*========================================================================================================================
276  * Methods for operating on the ModR/M byte.
277  *========================================================================================================================*/
278 private:
279 
293  void getModRegRM(RegisterMode regMode, RegisterMode rmMode, SgAsmType *t, SgAsmType *tForReg = NULL);
294 
296  SgAsmMemoryReferenceExpression *decodeModrmMemory();
297 
300  void fillInModRM(RegisterMode rmMode, SgAsmType *t);
301 
303  SgAsmExpression *makeModrmNormal(RegisterMode, SgAsmType *mrType);
304 
307  SgAsmRegisterReferenceExpression *makeModrmRegister(RegisterMode, SgAsmType* mrType=NULL);
308 
310  void requireMemory() const {
311  if (!modregrmByteSet)
312  throw ExceptionX86("requires Mod/RM byte", this);
313  if (modeField == 3)
314  throw ExceptionX86("requires memory", this);
315  }
316 
317 
318 
319  /*========================================================================================================================
320  * Methods that construct an SgAsmExpression for an immediate operand.
321  *========================================================================================================================*/
322 private:
323 
324  SgAsmExpression *getImmByte();
325  SgAsmExpression *getImmWord();
326  SgAsmExpression* getImmDWord();
327  SgAsmExpression* getImmQWord();
328  SgAsmExpression *getImmForAddr();
329  SgAsmExpression *getImmIv();
330  SgAsmExpression *getImmJz();
331  SgAsmExpression *getImmByteAsIv();
332  SgAsmExpression *getImmIzAsIv();
333  SgAsmExpression *getImmJb();
334 
335 
336 
337 
338  /*========================================================================================================================
339  * Main disassembly functions, each generally containing a huge "switch" statement based on one of the opcode bytes.
340  *========================================================================================================================*/
341 private:
342 
345  SgAsmX86Instruction *disassemble();
346 
348  SgAsmX86Instruction *decodeOpcode0F();
349 
351  SgAsmX86Instruction *decodeOpcode0F38();
352 
354  SgAsmX86Instruction *decodeX87InstructionD8();
355 
357  SgAsmX86Instruction *decodeX87InstructionD9();
358 
360  SgAsmX86Instruction *decodeX87InstructionDA();
361 
363  SgAsmX86Instruction *decodeX87InstructionDB();
364 
366  SgAsmX86Instruction *decodeX87InstructionDC();
367 
369  SgAsmX86Instruction *decodeX87InstructionDD();
370 
372  SgAsmX86Instruction *decodeX87InstructionDE();
373 
375  SgAsmX86Instruction *decodeX87InstructionDF();
376 
378  SgAsmX86Instruction *decodeGroup1(SgAsmExpression *imm);
379 
381  SgAsmX86Instruction *decodeGroup1a();
382 
384  SgAsmX86Instruction *decodeGroup2(SgAsmExpression *count);
385 
387  SgAsmX86Instruction *decodeGroup3(SgAsmExpression *immMaybe);
388 
390  SgAsmX86Instruction *decodeGroup4();
391 
393  SgAsmX86Instruction *decodeGroup5();
394 
396  SgAsmX86Instruction *decodeGroup6();
397 
400  SgAsmX86Instruction *decodeGroup7();
401 
403  SgAsmX86Instruction *decodeGroup8(SgAsmExpression *imm);
404 
406  SgAsmX86Instruction *decodeGroup11(SgAsmExpression *imm);
407 
409  SgAsmX86Instruction *decodeGroup15();
410 
412  SgAsmX86Instruction *decodeGroup16();
413 
415  SgAsmX86Instruction *decodeGroupP();
416 
417 
418 
419  /*========================================================================================================================
420  * Supporting functions
421  *========================================================================================================================*/
422 private:
423 
425  void init(size_t wordsize);
426 
428  void startInstruction(SgAsmX86Instruction *insn) {
429  startInstruction(insn->get_address(), NULL, 0);
430  insnSize = insn->get_baseSize();
431  lock = insn->get_lockPrefix();
432  branchPrediction = insn->get_branchPrediction();
433  branchPredictionEnabled = branchPrediction != x86_branch_prediction_none;
434  segOverride = insn->get_segmentOverride();
435  }
436 
438  void startInstruction(rose_addr_t start_va, const uint8_t *buf, size_t bufsz) {
439  ip = start_va;
440  insnbuf = SgUnsignedCharList(buf, buf+bufsz);
441  insnbufat = 0;
442 
443  /* Prefix flags */
444  segOverride = x86_segreg_none;
445  branchPrediction = x86_branch_prediction_none;
446  branchPredictionEnabled = false;
447  rexPresent = rexW = rexR = rexX = rexB = false;
448  sizeMustBe64Bit = false;
449  operandSizeOverride = false;
450  addressSizeOverride = false;
451  lock = false;
452  repeatPrefix = x86_repeat_none;
453  modregrmByteSet = false;
454  modregrmByte = modeField = regField = rmField = 0; /*arbitrary since modregrmByteSet is false*/
455  modrm = reg = NULL;
456  isUnconditionalJump = false;
457  }
458 };
459 
460 } // namespace
461 } // namespace
462 
463 #ifdef ROSE_HAVE_BOOST_SERIALIZATION_LIB
464 BOOST_CLASS_EXPORT_KEY(Rose::BinaryAnalysis::DisassemblerX86);
465 #endif
466 
467 #endif
Base class for references to a machine register.
Base class for machine instructions.
X86SegmentRegister get_segmentOverride() const
Property: The segment override register.
X86InstructionSize get_baseSize() const
Property: An enum constant describing the base size of an x86 instruction.
Main namespace for the ROSE library.
virtual Unparser::BasePtr unparser() const ROSE_OVERRIDE
Unparser.
virtual bool canDisassemble(SgAsmGenericHeader *) const ROSE_OVERRIDE
Predicate determining the suitability of a disassembler for a specific file header.
Reference to memory locations.
Base class for container file headers.
bool get_lockPrefix() const
Property: Whether the x86 lock prefix was present.
An efficient mapping from an address space to stored data.
Definition: MemoryMap.h:96
virtual DisassemblerX86 * clone() const ROSE_OVERRIDE
Creates a new copy of a disassembler.
Represents one Intel x86 machine instruction.
Base class for expressions.
Base class for binary types.
virtual SgAsmInstruction * makeUnknownInstruction(const Exception &) ROSE_OVERRIDE
Makes an unknown instruction from an exception.
Disassembler for the x86 architecture.
virtual SgAsmInstruction * disassembleOne(const MemoryMap::Ptr &map, rose_addr_t start_va, AddressSet *successors=NULL) ROSE_OVERRIDE
This is the lowest level disassembly function and is implemented in the architecture-specific subclas...
Virtual base class for instruction disassemblers.
Definition: Disassembler.h:41
X86BranchPrediction get_branchPrediction() const
Property: An enum constant describing branch prediction.
std::set< rose_addr_t > AddressSet
An AddressSet contains virtual addresses (alternatively, relative virtual addresses) for such things ...
Definition: Disassembler.h:82