ROSE  0.11.98.0
Disassembler/X86.h
1 /* Disassembly specific to the x86 architecture. */
2 #ifndef ROSE_BinaryAnalysis_Disassembler_X86_H
3 #define ROSE_BinaryAnalysis_Disassembler_X86_H
4 #include <featureTests.h>
5 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
6 #include <Rose/BinaryAnalysis/Disassembler/Base.h>
7 
8 #include <Rose/BinaryAnalysis/InstructionEnumsX86.h>
9 #include "Cxx_GrammarSerialization.h"
10 
11 #include <boost/serialization/access.hpp>
12 #include <boost/serialization/base_object.hpp>
13 #include <boost/serialization/export.hpp>
14 #include <boost/serialization/split_member.hpp>
15 
16 namespace Rose {
17 namespace BinaryAnalysis {
18 namespace Disassembler {
19 
22 class X86: public Base {
23 public:
25  using Ptr = X86Ptr;
26 
27 private:
28  /* Per-disassembler settings; see init() */
29  X86InstructionSize insnSize;
30  size_t wordSize;
32  /* Per-instruction settings; see startInstruction() */
33  struct State {
34  uint64_t ip;
35  SgUnsignedCharList insnbuf;
36  size_t insnbufat;
38  /* Temporary flags set by the instruction; initialized by startInstruction() */
39  X86SegmentRegister segOverride;
40  X86BranchPrediction branchPrediction; /*FIXME: this seems to set only to x86_branch_prediction_true [RPM 2009-06-16] */
41  bool branchPredictionEnabled;
42  bool rexPresent, rexW, rexR, rexX, rexB;
43  bool sizeMustBe64Bit;
44  bool operandSizeOverride;
45  bool addressSizeOverride;
46  bool lock;
47  X86RepeatPrefix repeatPrefix;
48  bool modregrmByteSet;
49  uint8_t modregrmByte;
50  uint8_t modeField;
51  uint8_t regField;
52  uint8_t rmField;
53  SgAsmExpression *modrm;
54  SgAsmExpression *reg;
55  bool isUnconditionalJump;
57  State()
58  : ip(0), insnbufat(0), segOverride(x86_segreg_none), branchPrediction(x86_branch_prediction_none),
59  branchPredictionEnabled(false), rexPresent(false), rexW(false), rexR(false), rexX(false), rexB(false),
60  sizeMustBe64Bit(false), operandSizeOverride(false), addressSizeOverride(false), lock(false),
61  repeatPrefix(x86_repeat_none), modregrmByteSet(false), modregrmByte(0), modeField(0), regField(0),
62  rmField(0), modrm(NULL), reg(NULL), isUnconditionalJump(false) {}
63  };
64 
66  // Serialization
68 #ifdef ROSE_HAVE_BOOST_SERIALIZATION_LIB
69 private:
70  friend class boost::serialization::access;
71 
72  template<class S>
73  void serialize_common(S &s, const unsigned /*version*/) {
74  // Most of the data members don't need to be saved because we'll only save/restore disassemblers that are between
75  // instructions (we never save one while it's processing an instruction). Therefore, most of the data members can be
76  // constructed in their initial state by a combination of default constructor and init().
77  s & BOOST_SERIALIZATION_BASE_OBJECT_NVP(Base);
78  s & BOOST_SERIALIZATION_NVP(wordSize);
79  }
80 
81  template<class S>
82  void save(S &s, const unsigned version) const {
83  serialize_common(s, version);
84  }
85 
86  template<class S>
87  void load(S &s, const unsigned version) {
88  serialize_common(s, version);
89  init(wordSize);
90  }
91 
92  BOOST_SERIALIZATION_SPLIT_MEMBER();
93 #endif
94 
96  // Constructors
98 
99 protected:
100  // Default constructor for serialization
101  X86();
102 
103  explicit X86(size_t wordsize);
104 
105 public:
107  static Ptr instance(size_t wordSize);
108 
109  virtual ~X86() {}
110 
111  virtual Base::Ptr clone() const override;
112 
114  // Public methods
116 public:
117  virtual bool canDisassemble(SgAsmGenericHeader*) const override;
118 
119  virtual Unparser::BasePtr unparser() const override;
120 
121  virtual SgAsmInstruction *disassembleOne(const MemoryMap::Ptr &map, rose_addr_t start_va,
122  AddressSet *successors=NULL) override;
123 
124  virtual SgAsmInstruction *makeUnknownInstruction(const Exception&) override;
125 
126 
127  /*========================================================================================================================
128  * Data types
129  *========================================================================================================================*/
130 private:
131 
135  class ExceptionX86: public Exception {
136  public:
137  ExceptionX86(const std::string &mesg, const State &state)
138  : Exception(mesg, state.ip) {
139  ASSERT_require(state.insnbufat <= state.insnbuf.size());
140  if (state.insnbufat > 0)
141  bytes = SgUnsignedCharList(&state.insnbuf[0], &state.insnbuf[0] + state.insnbufat);
142  bit = 8 * state.insnbufat;
143  }
144 
145  ExceptionX86(const std::string &mesg, const State &state, size_t bit)
146  : Exception(mesg, state.ip) {
147  ASSERT_require(state.insnbufat <= state.insnbuf.size());
148  if (state.insnbufat > 0)
149  bytes = SgUnsignedCharList(&state.insnbuf[0], &state.insnbuf[0] + state.insnbufat);
150  this->bit = bit;
151  }
152  };
153 
155  enum RegisterMode {
156  rmLegacyByte, rmRexByte, rmWord, rmDWord, rmQWord, rmSegment, rmST, rmMM, rmXMM, rmControl, rmDebug, rmReturnNull
157  };
158 
159  /* MMX registers? See mmPrefix method */
160  enum MMPrefix {
161  mmNone, mmF3, mm66, mmF2
162  };
163 
164 
165  /*========================================================================================================================
166  * Methods for reading and writing bytes of the instruction. These keep track of how much has been read or written.
167  *========================================================================================================================*/
168 private:
169 
173  uint8_t getByte(State &state) const;
174 
178  uint16_t getWord(State &state) const;
179 
183  uint32_t getDWord(State &state) const;
184 
188  uint64_t getQWord(State &state) const;
189 
190  /*========================================================================================================================
191  * Miscellaneous helper methods
192  *========================================================================================================================*/
193 private:
196  SgAsmExpression *currentDataSegment(State &state) const;
197 
201  X86InstructionSize effectiveAddressSize(State &state) const;
202 
204  RegisterMode effectiveOperandMode(State &state) const {
205  return sizeToMode(effectiveOperandSize(state));
206  }
207 
211  X86InstructionSize effectiveOperandSize(State &state) const;
212 
214  SgAsmType *effectiveOperandType(State &state) const {
215  return sizeToType(effectiveOperandSize(state));
216  }
217 
219  bool longMode() const {
220  return insnSize == x86_insnsize_64;
221  }
222 
223  /* FIXME: documentation? */
224  MMPrefix mmPrefix(State &state) const;
225 
227  void not64(State &state) const {
228  if (longMode())
229  throw ExceptionX86("not valid for 64-bit code", state);
230  }
231 
234  void setRex(State &state, uint8_t prefix) const;
235 
237  static RegisterMode sizeToMode(X86InstructionSize);
238 
241  static SgAsmType *sizeToType(X86InstructionSize s);
242 
243 
244 
245  /*========================================================================================================================
246  * Methods that construct something. (Their names all start with "make".)
247  *========================================================================================================================*/
248 private:
249 
252  SgAsmExpression *makeAddrSizeValue(State &state, int64_t val, size_t bit_offset, size_t bit_size) const;
253 
258  SgAsmX86Instruction *makeInstruction(State &state, X86InstructionKind kind, const std::string &mnemonic,
259  SgAsmExpression *op1=NULL, SgAsmExpression *op2=NULL,
260  SgAsmExpression *op3=NULL, SgAsmExpression *op4=NULL) const;
261 
263  SgAsmRegisterReferenceExpression *makeIP() const;
264 
265  /* FIXME: documentation? */
266  SgAsmRegisterReferenceExpression *makeOperandRegisterByte(State &state, bool rexExtension, uint8_t registerNumber) const;
267 
268  /* FIXME: documentation? */
269  SgAsmRegisterReferenceExpression *makeOperandRegisterFull(State &state, bool rexExtension, uint8_t registerNumber) const;
270 
273  SgAsmRegisterReferenceExpression *makeRegister(State &state, uint8_t fullRegisterNumber, RegisterMode,
274  SgAsmType *registerType=NULL) const;
275 
276  /* FIXME: documentation? */
277  SgAsmRegisterReferenceExpression *makeRegisterEffective(State &state, uint8_t fullRegisterNumber) const {
278  return makeRegister(state, fullRegisterNumber, effectiveOperandMode(state));
279  }
280 
281  /* FIXME: documentation? */
282  SgAsmRegisterReferenceExpression *makeRegisterEffective(State &state, bool rexExtension, uint8_t registerNumber) const {
283  return makeRegister(state, registerNumber + (rexExtension ? 8 : 0), effectiveOperandMode(state));
284  }
285 
287  SgAsmExpression *makeSegmentRegister(State &state, X86SegmentRegister so, bool insn64) const;
288 
289 
290 
291  /*========================================================================================================================
292  * Methods for operating on the ModR/M byte.
293  *========================================================================================================================*/
294 private:
295 
309  void getModRegRM(State &state, RegisterMode regMode, RegisterMode rmMode, SgAsmType *t, SgAsmType *tForReg = NULL) const;
310 
312  SgAsmMemoryReferenceExpression *decodeModrmMemory(State &state) const;
313 
316  void fillInModRM(State &state, RegisterMode rmMode, SgAsmType *t) const;
317 
319  SgAsmExpression *makeModrmNormal(State &state, RegisterMode, SgAsmType *mrType) const;
320 
323  SgAsmRegisterReferenceExpression *makeModrmRegister(State &state, RegisterMode, SgAsmType* mrType=NULL) const;
324 
326  void requireMemory(State &state) const {
327  if (!state.modregrmByteSet)
328  throw ExceptionX86("requires Mod/RM byte", state);
329  if (state.modeField == 3)
330  throw ExceptionX86("requires memory", state);
331  }
332 
333 
334 
335  /*========================================================================================================================
336  * Methods that construct an SgAsmExpression for an immediate operand.
337  *========================================================================================================================*/
338 private:
339 
340  SgAsmExpression *getImmByte(State &state) const;
341  SgAsmExpression *getImmWord(State &state) const;
342  SgAsmExpression* getImmDWord(State &state) const;
343  SgAsmExpression* getImmQWord(State &state) const;
344  SgAsmExpression *getImmForAddr(State &state) const;
345  SgAsmExpression *getImmIv(State &state) const;
346  SgAsmExpression *getImmJz(State &state) const;
347  SgAsmExpression *getImmByteAsIv(State &state) const;
348  SgAsmExpression *getImmIzAsIv(State &state) const;
349  SgAsmExpression *getImmJb(State &state) const;
350 
351 
352 
353 
354  /*========================================================================================================================
355  * Main disassembly functions, each generally containing a huge "switch" statement based on one of the opcode bytes.
356  *========================================================================================================================*/
357 private:
358 
361  SgAsmX86Instruction *disassemble(State &state) const;
362 
364  SgAsmX86Instruction *decodeOpcode0F(State &state) const;
365 
367  SgAsmX86Instruction *decodeOpcode0F38(State &state) const;
368 
370  SgAsmX86Instruction *decodeX87InstructionD8(State &state) const;
371 
373  SgAsmX86Instruction *decodeX87InstructionD9(State &state) const;
374 
376  SgAsmX86Instruction *decodeX87InstructionDA(State &state) const;
377 
379  SgAsmX86Instruction *decodeX87InstructionDB(State &state) const;
380 
382  SgAsmX86Instruction *decodeX87InstructionDC(State &state) const;
383 
385  SgAsmX86Instruction *decodeX87InstructionDD(State &state) const;
386 
388  SgAsmX86Instruction *decodeX87InstructionDE(State &state) const;
389 
391  SgAsmX86Instruction *decodeX87InstructionDF(State &state) const;
392 
394  SgAsmX86Instruction *decodeGroup1(State &state, SgAsmExpression *imm) const;
395 
397  SgAsmX86Instruction *decodeGroup1a(State &state) const;
398 
400  SgAsmX86Instruction *decodeGroup2(State &state, SgAsmExpression *count) const;
401 
403  SgAsmX86Instruction *decodeGroup3(State &state, SgAsmExpression *immMaybe) const;
404 
406  SgAsmX86Instruction *decodeGroup4(State &state) const;
407 
409  SgAsmX86Instruction *decodeGroup5(State &state) const;
410 
412  SgAsmX86Instruction *decodeGroup6(State &state) const;
413 
416  SgAsmX86Instruction *decodeGroup7(State &state) const;
417 
419  SgAsmX86Instruction *decodeGroup8(State &state, SgAsmExpression *imm) const;
420 
422  SgAsmX86Instruction *decodeGroup11(State &state, SgAsmExpression *imm) const;
423 
425  SgAsmX86Instruction *decodeGroup15(State &state) const;
426 
428  SgAsmX86Instruction *decodeGroup16(State &state) const;
429 
431  SgAsmX86Instruction *decodeGroupP(State &state) const;
432 
433 
434 
435  /*========================================================================================================================
436  * Supporting functions
437  *========================================================================================================================*/
438 private:
439  // Initialize instances of this class. Called by constructor.
440  void init(size_t wordsize);
441 
442 #if 0 // is this ever used?
443 
444  void startInstruction(State &state, SgAsmX86Instruction *insn) const {
445  startInstruction(insn->get_address(), NULL, 0);
446  insnSize = insn->get_baseSize();
447  state.lock = insn->get_lockPrefix();
448  state.branchPrediction = insn->get_branchPrediction();
449  state.branchPredictionEnabled = state.branchPrediction != x86_branch_prediction_none;
450  state.segOverride = insn->get_segmentOverride();
451  }
452 #endif
453 
454  // Resets disassembler state to beginning of an instruction for disassembly.
455  void startInstruction(State &state, rose_addr_t start_va, const uint8_t *buf, size_t bufsz) const {
456  state.ip = start_va;
457  state.insnbuf = SgUnsignedCharList(buf, buf+bufsz);
458  state.insnbufat = 0;
459 
460  // Prefix flags
461  state.segOverride = x86_segreg_none;
462  state.branchPrediction = x86_branch_prediction_none;
463  state.branchPredictionEnabled = false;
464  state.rexPresent = state.rexW = state.rexR = state.rexX = state.rexB = false;
465  state.sizeMustBe64Bit = false;
466  state.operandSizeOverride = false;
467  state.addressSizeOverride = false;
468  state.lock = false;
469  state.repeatPrefix = x86_repeat_none;
470  state.modregrmByteSet = false;
471  state.modregrmByte = state.modeField = state.regField = state.rmField = 0; /*arbitrary since modregrmByteSet is false*/
472  state.modrm = state.reg = NULL;
473  state.isUnconditionalJump = false;
474  }
475 
476  // Add comments to any IP relative addition expressions. We're not constant folding these because it's sometimes useful to
477  // know that the address is relative to the instruction address, but the comment is useful for understanding the disassembly.
478  void commentIpRelative(SgAsmInstruction*);
479 };
480 
481 } // namespace
482 } // namespace
483 } // namespace
484 
485 #ifdef ROSE_HAVE_BOOST_SERIALIZATION_LIB
486 BOOST_CLASS_EXPORT_KEY(Rose::BinaryAnalysis::Disassembler::X86);
487 #endif
488 
489 #endif
490 #endif
X86Ptr Ptr
Reference counting pointer.
static Ptr instance(size_t wordSize)
Allocating constructor.
Base class for references to a machine register.
virtual Base::Ptr clone() const override
Creates a new copy of a disassembler.
virtual bool canDisassemble(SgAsmGenericHeader *) const override
Predicate determining the suitability of a disassembler for a specific file header.
rose_addr_t ip
Virtual address where failure occurred; zero if no associated instruction.
virtual SgAsmInstruction * makeUnknownInstruction(const Exception &) override
Makes an unknown instruction from an exception.
Base class for machine instructions.
Rose::BinaryAnalysis::X86InstructionSize get_baseSize() const
Property: An enum constant describing the base size of an x86 instruction.
size_t bit
Bit offset in instruction byte sequence where disassembly failed (bit/8 is the index into the "bytes"...
Rose::BinaryAnalysis::X86BranchPrediction get_branchPrediction() const
Property: An enum constant describing branch prediction.
Rose::BinaryAnalysis::X86SegmentRegister get_segmentOverride() const
Property: The segment override register.
Main namespace for the ROSE library.
MemoryMapPtr Ptr
Reference counting pointer.
Definition: MemoryMap.h:115
Sawyer::SharedPointer< X86 > X86Ptr
Reference counted pointer for Intel X86 decoder.
Reference to memory locations.
Base class for container file headers.
Exception(const std::string &reason)
A bare exception not bound to any particular instruction.
Disassembler for the x86 architecture.
bool get_lockPrefix() const
Property: Whether the x86 lock prefix was present.
virtual Unparser::BasePtr unparser() const override
Unparser.
Represents one Intel x86 machine instruction.
Base class for expressions.
Binary analysis.
Base class for binary types.
SgUnsignedCharList bytes
Bytes (partial) of failed disassembly, including byte at failure.
rose_addr_t get_address() const
Property: Starting virtual address.
Virtual base class for instruction disassemblers.
virtual SgAsmInstruction * disassembleOne(const MemoryMap::Ptr &map, rose_addr_t start_va, AddressSet *successors=NULL) override
This is the lowest level disassembly function and is implemented in the architecture-specific subclas...