ROSE  0.11.2.0
DisassemblerX86.h
1 /* Disassembly specific to the x86 architecture. */
2 
3 #ifndef ROSE_DISASSEMBLER_X86_H
4 #define ROSE_DISASSEMBLER_X86_H
5 
6 #include "Disassembler.h"
7 #ifdef ROSE_BUILD_BINARY_ANALYSIS_SUPPORT
8 
9 #include "InstructionEnumsX86.h"
10 #include "Cxx_GrammarSerialization.h"
11 
12 #include <boost/serialization/access.hpp>
13 #include <boost/serialization/base_object.hpp>
14 #include <boost/serialization/export.hpp>
15 #include <boost/serialization/split_member.hpp>
16 
17 namespace Rose {
18 namespace BinaryAnalysis {
19 
23  /* Per-disassembler settings; see init() */
24  X86InstructionSize insnSize;
25  size_t wordSize;
27  /* Per-instruction settings; see startInstruction() */
28  struct State {
29  uint64_t ip;
30  SgUnsignedCharList insnbuf;
31  size_t insnbufat;
33  /* Temporary flags set by the instruction; initialized by startInstruction() */
34  X86SegmentRegister segOverride;
35  X86BranchPrediction branchPrediction; /*FIXME: this seems to set only to x86_branch_prediction_true [RPM 2009-06-16] */
36  bool branchPredictionEnabled;
37  bool rexPresent, rexW, rexR, rexX, rexB;
38  bool sizeMustBe64Bit;
39  bool operandSizeOverride;
40  bool addressSizeOverride;
41  bool lock;
42  X86RepeatPrefix repeatPrefix;
43  bool modregrmByteSet;
44  uint8_t modregrmByte;
45  uint8_t modeField;
46  uint8_t regField;
47  uint8_t rmField;
48  SgAsmExpression *modrm;
49  SgAsmExpression *reg;
50  bool isUnconditionalJump;
52  State()
53  : ip(0), insnbufat(0), segOverride(x86_segreg_none), branchPrediction(x86_branch_prediction_none),
54  branchPredictionEnabled(false), rexPresent(false), rexW(false), rexR(false), rexX(false), rexB(false),
55  sizeMustBe64Bit(false), operandSizeOverride(false), addressSizeOverride(false), lock(false),
56  repeatPrefix(x86_repeat_none), modregrmByteSet(false), modregrmByte(0), modeField(0), rmField(0),
57  modrm(NULL), reg(NULL), isUnconditionalJump(false) {}
58  };
59 
61  // Serialization
63 #ifdef ROSE_HAVE_BOOST_SERIALIZATION_LIB
64 private:
65  friend class boost::serialization::access;
66 
67  template<class S>
68  void serialize_common(S &s, const unsigned /*version*/) {
69  // Most of the data members don't need to be saved because we'll only save/restore disassemblers that are between
70  // instructions (we never save one while it's processing an instruction). Therefore, most of the data members can be
71  // constructed in their initial state by a combination of default constructor and init().
72  s & BOOST_SERIALIZATION_BASE_OBJECT_NVP(Disassembler);
73  s & BOOST_SERIALIZATION_NVP(wordSize);
74  }
75 
76  template<class S>
77  void save(S &s, const unsigned version) const {
78  serialize_common(s, version);
79  }
80 
81  template<class S>
82  void load(S &s, const unsigned version) {
83  serialize_common(s, version);
84  init(wordSize);
85  }
86 
87  BOOST_SERIALIZATION_SPLIT_MEMBER();
88 #endif
89 
91  // Constructors
93 
94 protected:
95  // Default constructor for serialization
97  : insnSize(x86_insnsize_none), wordSize(0) {}
98 
99 public:
100  explicit DisassemblerX86(size_t wordsize)
101  : insnSize(x86_insnsize_none), wordSize(0) {
102  init(wordsize);
103  }
104 
105  virtual ~DisassemblerX86() {}
106 
107  virtual DisassemblerX86 *clone() const ROSE_OVERRIDE { return new DisassemblerX86(*this); }
108 
110  // Public methods
112 public:
113  virtual bool canDisassemble(SgAsmGenericHeader*) const ROSE_OVERRIDE;
114 
115  virtual Unparser::BasePtr unparser() const ROSE_OVERRIDE;
116 
117  virtual SgAsmInstruction *disassembleOne(const MemoryMap::Ptr &map, rose_addr_t start_va,
118  AddressSet *successors=NULL) ROSE_OVERRIDE;
119 
120  virtual SgAsmInstruction *makeUnknownInstruction(const Exception&) ROSE_OVERRIDE;
121 
122 
123  /*========================================================================================================================
124  * Data types
125  *========================================================================================================================*/
126 private:
127 
131  class ExceptionX86: public Exception {
132  public:
133  ExceptionX86(const std::string &mesg, const State &state)
134  : Exception(mesg, state.ip) {
135  ASSERT_require(state.insnbufat <= state.insnbuf.size());
136  if (state.insnbufat > 0)
137  bytes = SgUnsignedCharList(&state.insnbuf[0], &state.insnbuf[0] + state.insnbufat);
138  bit = 8 * state.insnbufat;
139  }
140 
141  ExceptionX86(const std::string &mesg, const State &state, size_t bit)
142  : Exception(mesg, state.ip) {
143  ASSERT_require(state.insnbufat <= state.insnbuf.size());
144  if (state.insnbufat > 0)
145  bytes = SgUnsignedCharList(&state.insnbuf[0], &state.insnbuf[0] + state.insnbufat);
146  this->bit = bit;
147  }
148  };
149 
151  enum RegisterMode {
152  rmLegacyByte, rmRexByte, rmWord, rmDWord, rmQWord, rmSegment, rmST, rmMM, rmXMM, rmControl, rmDebug, rmReturnNull
153  };
154 
155  /* MMX registers? See mmPrefix method */
156  enum MMPrefix {
157  mmNone, mmF3, mm66, mmF2
158  };
159 
160 
161  /*========================================================================================================================
162  * Methods for reading and writing bytes of the instruction. These keep track of how much has been read or written.
163  *========================================================================================================================*/
164 private:
165 
169  uint8_t getByte(State &state) const;
170 
174  uint16_t getWord(State &state) const;
175 
179  uint32_t getDWord(State &state) const;
180 
184  uint64_t getQWord(State &state) const;
185 
186  /*========================================================================================================================
187  * Miscellaneous helper methods
188  *========================================================================================================================*/
189 private:
192  SgAsmExpression *currentDataSegment(State &state) const;
193 
197  X86InstructionSize effectiveAddressSize(State &state) const;
198 
200  RegisterMode effectiveOperandMode(State &state) const {
201  return sizeToMode(effectiveOperandSize(state));
202  }
203 
207  X86InstructionSize effectiveOperandSize(State &state) const;
208 
210  SgAsmType *effectiveOperandType(State &state) const {
211  return sizeToType(effectiveOperandSize(state));
212  }
213 
215  bool longMode() const {
216  return insnSize == x86_insnsize_64;
217  }
218 
219  /* FIXME: documentation? */
220  MMPrefix mmPrefix(State &state) const;
221 
223  void not64(State &state) const {
224  if (longMode())
225  throw ExceptionX86("not valid for 64-bit code", state);
226  }
227 
230  void setRex(State &state, uint8_t prefix) const;
231 
233  static RegisterMode sizeToMode(X86InstructionSize);
234 
237  static SgAsmType *sizeToType(X86InstructionSize s);
238 
239 
240 
241  /*========================================================================================================================
242  * Methods that construct something. (Their names all start with "make".)
243  *========================================================================================================================*/
244 private:
245 
248  SgAsmExpression *makeAddrSizeValue(State &state, int64_t val, size_t bit_offset, size_t bit_size) const;
249 
254  SgAsmX86Instruction *makeInstruction(State &state, X86InstructionKind kind, const std::string &mnemonic,
255  SgAsmExpression *op1=NULL, SgAsmExpression *op2=NULL,
256  SgAsmExpression *op3=NULL, SgAsmExpression *op4=NULL) const;
257 
259  SgAsmRegisterReferenceExpression *makeIP() const;
260 
261  /* FIXME: documentation? */
262  SgAsmRegisterReferenceExpression *makeOperandRegisterByte(State &state, bool rexExtension, uint8_t registerNumber) const;
263 
264  /* FIXME: documentation? */
265  SgAsmRegisterReferenceExpression *makeOperandRegisterFull(State &state, bool rexExtension, uint8_t registerNumber) const;
266 
269  SgAsmRegisterReferenceExpression *makeRegister(State &state, uint8_t fullRegisterNumber, RegisterMode,
270  SgAsmType *registerType=NULL) const;
271 
272  /* FIXME: documentation? */
273  SgAsmRegisterReferenceExpression *makeRegisterEffective(State &state, uint8_t fullRegisterNumber) const {
274  return makeRegister(state, fullRegisterNumber, effectiveOperandMode(state));
275  }
276 
277  /* FIXME: documentation? */
278  SgAsmRegisterReferenceExpression *makeRegisterEffective(State &state, bool rexExtension, uint8_t registerNumber) const {
279  return makeRegister(state, registerNumber + (rexExtension ? 8 : 0), effectiveOperandMode(state));
280  }
281 
283  SgAsmExpression *makeSegmentRegister(State &state, X86SegmentRegister so, bool insn64) const;
284 
285 
286 
287  /*========================================================================================================================
288  * Methods for operating on the ModR/M byte.
289  *========================================================================================================================*/
290 private:
291 
305  void getModRegRM(State &state, RegisterMode regMode, RegisterMode rmMode, SgAsmType *t, SgAsmType *tForReg = NULL) const;
306 
308  SgAsmMemoryReferenceExpression *decodeModrmMemory(State &state) const;
309 
312  void fillInModRM(State &state, RegisterMode rmMode, SgAsmType *t) const;
313 
315  SgAsmExpression *makeModrmNormal(State &state, RegisterMode, SgAsmType *mrType) const;
316 
319  SgAsmRegisterReferenceExpression *makeModrmRegister(State &state, RegisterMode, SgAsmType* mrType=NULL) const;
320 
322  void requireMemory(State &state) const {
323  if (!state.modregrmByteSet)
324  throw ExceptionX86("requires Mod/RM byte", state);
325  if (state.modeField == 3)
326  throw ExceptionX86("requires memory", state);
327  }
328 
329 
330 
331  /*========================================================================================================================
332  * Methods that construct an SgAsmExpression for an immediate operand.
333  *========================================================================================================================*/
334 private:
335 
336  SgAsmExpression *getImmByte(State &state) const;
337  SgAsmExpression *getImmWord(State &state) const;
338  SgAsmExpression* getImmDWord(State &state) const;
339  SgAsmExpression* getImmQWord(State &state) const;
340  SgAsmExpression *getImmForAddr(State &state) const;
341  SgAsmExpression *getImmIv(State &state) const;
342  SgAsmExpression *getImmJz(State &state) const;
343  SgAsmExpression *getImmByteAsIv(State &state) const;
344  SgAsmExpression *getImmIzAsIv(State &state) const;
345  SgAsmExpression *getImmJb(State &state) const;
346 
347 
348 
349 
350  /*========================================================================================================================
351  * Main disassembly functions, each generally containing a huge "switch" statement based on one of the opcode bytes.
352  *========================================================================================================================*/
353 private:
354 
357  SgAsmX86Instruction *disassemble(State &state) const;
358 
360  SgAsmX86Instruction *decodeOpcode0F(State &state) const;
361 
363  SgAsmX86Instruction *decodeOpcode0F38(State &state) const;
364 
366  SgAsmX86Instruction *decodeX87InstructionD8(State &state) const;
367 
369  SgAsmX86Instruction *decodeX87InstructionD9(State &state) const;
370 
372  SgAsmX86Instruction *decodeX87InstructionDA(State &state) const;
373 
375  SgAsmX86Instruction *decodeX87InstructionDB(State &state) const;
376 
378  SgAsmX86Instruction *decodeX87InstructionDC(State &state) const;
379 
381  SgAsmX86Instruction *decodeX87InstructionDD(State &state) const;
382 
384  SgAsmX86Instruction *decodeX87InstructionDE(State &state) const;
385 
387  SgAsmX86Instruction *decodeX87InstructionDF(State &state) const;
388 
390  SgAsmX86Instruction *decodeGroup1(State &state, SgAsmExpression *imm) const;
391 
393  SgAsmX86Instruction *decodeGroup1a(State &state) const;
394 
396  SgAsmX86Instruction *decodeGroup2(State &state, SgAsmExpression *count) const;
397 
399  SgAsmX86Instruction *decodeGroup3(State &state, SgAsmExpression *immMaybe) const;
400 
402  SgAsmX86Instruction *decodeGroup4(State &state) const;
403 
405  SgAsmX86Instruction *decodeGroup5(State &state) const;
406 
408  SgAsmX86Instruction *decodeGroup6(State &state) const;
409 
412  SgAsmX86Instruction *decodeGroup7(State &state) const;
413 
415  SgAsmX86Instruction *decodeGroup8(State &state, SgAsmExpression *imm) const;
416 
418  SgAsmX86Instruction *decodeGroup11(State &state, SgAsmExpression *imm) const;
419 
421  SgAsmX86Instruction *decodeGroup15(State &state) const;
422 
424  SgAsmX86Instruction *decodeGroup16(State &state) const;
425 
427  SgAsmX86Instruction *decodeGroupP(State &state) const;
428 
429 
430 
431  /*========================================================================================================================
432  * Supporting functions
433  *========================================================================================================================*/
434 private:
435  // Initialize instances of this class. Called by constructor.
436  void init(size_t wordsize);
437 
438 #if 0 // is this ever used?
439 
440  void startInstruction(State &state, SgAsmX86Instruction *insn) const {
441  startInstruction(insn->get_address(), NULL, 0);
442  insnSize = insn->get_baseSize();
443  state.lock = insn->get_lockPrefix();
444  state.branchPrediction = insn->get_branchPrediction();
445  state.branchPredictionEnabled = state.branchPrediction != x86_branch_prediction_none;
446  state.segOverride = insn->get_segmentOverride();
447  }
448 #endif
449 
450  // Resets disassembler state to beginning of an instruction for disassembly.
451  void startInstruction(State &state, rose_addr_t start_va, const uint8_t *buf, size_t bufsz) const {
452  state.ip = start_va;
453  state.insnbuf = SgUnsignedCharList(buf, buf+bufsz);
454  state.insnbufat = 0;
455 
456  // Prefix flags
457  state.segOverride = x86_segreg_none;
458  state.branchPrediction = x86_branch_prediction_none;
459  state.branchPredictionEnabled = false;
460  state.rexPresent = state.rexW = state.rexR = state.rexX = state.rexB = false;
461  state.sizeMustBe64Bit = false;
462  state.operandSizeOverride = false;
463  state.addressSizeOverride = false;
464  state.lock = false;
465  state.repeatPrefix = x86_repeat_none;
466  state.modregrmByteSet = false;
467  state.modregrmByte = state.modeField = state.regField = state.rmField = 0; /*arbitrary since modregrmByteSet is false*/
468  state.modrm = state.reg = NULL;
469  state.isUnconditionalJump = false;
470  }
471 
472  // Add comments to any IP relative addition expressions. We're not constant folding these because it's sometimes useful to
473  // know that the address is relative to the instruction address, but the comment is useful for understanding the disassembly.
474  void commentIpRelative(SgAsmInstruction*);
475 };
476 
477 } // namespace
478 } // namespace
479 
480 #ifdef ROSE_HAVE_BOOST_SERIALIZATION_LIB
481 BOOST_CLASS_EXPORT_KEY(Rose::BinaryAnalysis::DisassemblerX86);
482 #endif
483 
484 #endif
485 #endif
Instruction is for a 64-bit architecture.
Base class for references to a machine register.
Base class for machine instructions.
X86BranchPrediction
Intel x86 branch prediction types.
Rose::BinaryAnalysis::X86InstructionSize get_baseSize() const
Property: An enum constant describing the base size of an x86 instruction.
Rose::BinaryAnalysis::X86BranchPrediction get_branchPrediction() const
Property: An enum constant describing branch prediction.
Rose::BinaryAnalysis::X86SegmentRegister get_segmentOverride() const
Property: The segment override register.
Main namespace for the ROSE library.
virtual Unparser::BasePtr unparser() const ROSE_OVERRIDE
Unparser.
virtual bool canDisassemble(SgAsmGenericHeader *) const ROSE_OVERRIDE
Predicate determining the suitability of a disassembler for a specific file header.
Reference to memory locations.
Base class for container file headers.
bool get_lockPrefix() const
Property: Whether the x86 lock prefix was present.
An efficient mapping from an address space to stored data.
Definition: MemoryMap.h:112
virtual DisassemblerX86 * clone() const ROSE_OVERRIDE
Creates a new copy of a disassembler.
Represents one Intel x86 machine instruction.
Base class for expressions.
Base class for binary types.
virtual SgAsmInstruction * makeUnknownInstruction(const Exception &) ROSE_OVERRIDE
Makes an unknown instruction from an exception.
Disassembler for the x86 architecture.
X86SegmentRegister
Intel x86 segment registers.
virtual SgAsmInstruction * disassembleOne(const MemoryMap::Ptr &map, rose_addr_t start_va, AddressSet *successors=NULL) ROSE_OVERRIDE
This is the lowest level disassembly function and is implemented in the architecture-specific subclas...
X86InstructionKind
List of all x86 instructions known to the ROSE disassembler/assembler.
rose_addr_t get_address() const
Property: Starting virtual address.
Base class for all ROSE exceptions.
Definition: RoseException.h:9
Virtual base class for instruction disassemblers.
Definition: Disassembler.h:50
X86InstructionSize
Intel x86 instruction size constants.
X86RepeatPrefix
Intel x86 instruction repeat prefix.