ROSE  0.11.50.0
DisassemblerX86.h
1 /* Disassembly specific to the x86 architecture. */
2 #ifndef ROSE_BinaryAnalysis_DisassemblerX86_H
3 #define ROSE_BinaryAnalysis_DisassemblerX86_H
4 #include <featureTests.h>
5 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
6 #include <Rose/BinaryAnalysis/Disassembler.h>
7 
8 #include <Rose/BinaryAnalysis/InstructionEnumsX86.h>
9 #include "Cxx_GrammarSerialization.h"
10 
11 #include <boost/serialization/access.hpp>
12 #include <boost/serialization/base_object.hpp>
13 #include <boost/serialization/export.hpp>
14 #include <boost/serialization/split_member.hpp>
15 
16 namespace Rose {
17 namespace BinaryAnalysis {
18 
22  /* Per-disassembler settings; see init() */
23  X86InstructionSize insnSize;
24  size_t wordSize;
26  /* Per-instruction settings; see startInstruction() */
27  struct State {
28  uint64_t ip;
29  SgUnsignedCharList insnbuf;
30  size_t insnbufat;
32  /* Temporary flags set by the instruction; initialized by startInstruction() */
33  X86SegmentRegister segOverride;
34  X86BranchPrediction branchPrediction; /*FIXME: this seems to set only to x86_branch_prediction_true [RPM 2009-06-16] */
35  bool branchPredictionEnabled;
36  bool rexPresent, rexW, rexR, rexX, rexB;
37  bool sizeMustBe64Bit;
38  bool operandSizeOverride;
39  bool addressSizeOverride;
40  bool lock;
41  X86RepeatPrefix repeatPrefix;
42  bool modregrmByteSet;
43  uint8_t modregrmByte;
44  uint8_t modeField;
45  uint8_t regField;
46  uint8_t rmField;
47  SgAsmExpression *modrm;
48  SgAsmExpression *reg;
49  bool isUnconditionalJump;
51  State()
52  : ip(0), insnbufat(0), segOverride(x86_segreg_none), branchPrediction(x86_branch_prediction_none),
53  branchPredictionEnabled(false), rexPresent(false), rexW(false), rexR(false), rexX(false), rexB(false),
54  sizeMustBe64Bit(false), operandSizeOverride(false), addressSizeOverride(false), lock(false),
55  repeatPrefix(x86_repeat_none), modregrmByteSet(false), modregrmByte(0), modeField(0), rmField(0),
56  modrm(NULL), reg(NULL), isUnconditionalJump(false) {}
57  };
58 
60  // Serialization
62 #ifdef ROSE_HAVE_BOOST_SERIALIZATION_LIB
63 private:
64  friend class boost::serialization::access;
65 
66  template<class S>
67  void serialize_common(S &s, const unsigned /*version*/) {
68  // Most of the data members don't need to be saved because we'll only save/restore disassemblers that are between
69  // instructions (we never save one while it's processing an instruction). Therefore, most of the data members can be
70  // constructed in their initial state by a combination of default constructor and init().
71  s & BOOST_SERIALIZATION_BASE_OBJECT_NVP(Disassembler);
72  s & BOOST_SERIALIZATION_NVP(wordSize);
73  }
74 
75  template<class S>
76  void save(S &s, const unsigned version) const {
77  serialize_common(s, version);
78  }
79 
80  template<class S>
81  void load(S &s, const unsigned version) {
82  serialize_common(s, version);
83  init(wordSize);
84  }
85 
86  BOOST_SERIALIZATION_SPLIT_MEMBER();
87 #endif
88 
90  // Constructors
92 
93 protected:
94  // Default constructor for serialization
96  : insnSize(x86_insnsize_none), wordSize(0) {}
97 
98 public:
99  explicit DisassemblerX86(size_t wordsize)
100  : insnSize(x86_insnsize_none), wordSize(0) {
101  init(wordsize);
102  }
103 
104  virtual ~DisassemblerX86() {}
105 
106  virtual DisassemblerX86 *clone() const ROSE_OVERRIDE { return new DisassemblerX86(*this); }
107 
109  // Public methods
111 public:
112  virtual bool canDisassemble(SgAsmGenericHeader*) const ROSE_OVERRIDE;
113 
114  virtual Unparser::BasePtr unparser() const ROSE_OVERRIDE;
115 
116  virtual SgAsmInstruction *disassembleOne(const MemoryMap::Ptr &map, rose_addr_t start_va,
117  AddressSet *successors=NULL) ROSE_OVERRIDE;
118 
119  virtual SgAsmInstruction *makeUnknownInstruction(const Exception&) ROSE_OVERRIDE;
120 
121 
122  /*========================================================================================================================
123  * Data types
124  *========================================================================================================================*/
125 private:
126 
130  class ExceptionX86: public Exception {
131  public:
132  ExceptionX86(const std::string &mesg, const State &state)
133  : Exception(mesg, state.ip) {
134  ASSERT_require(state.insnbufat <= state.insnbuf.size());
135  if (state.insnbufat > 0)
136  bytes = SgUnsignedCharList(&state.insnbuf[0], &state.insnbuf[0] + state.insnbufat);
137  bit = 8 * state.insnbufat;
138  }
139 
140  ExceptionX86(const std::string &mesg, const State &state, size_t bit)
141  : Exception(mesg, state.ip) {
142  ASSERT_require(state.insnbufat <= state.insnbuf.size());
143  if (state.insnbufat > 0)
144  bytes = SgUnsignedCharList(&state.insnbuf[0], &state.insnbuf[0] + state.insnbufat);
145  this->bit = bit;
146  }
147  };
148 
150  enum RegisterMode {
151  rmLegacyByte, rmRexByte, rmWord, rmDWord, rmQWord, rmSegment, rmST, rmMM, rmXMM, rmControl, rmDebug, rmReturnNull
152  };
153 
154  /* MMX registers? See mmPrefix method */
155  enum MMPrefix {
156  mmNone, mmF3, mm66, mmF2
157  };
158 
159 
160  /*========================================================================================================================
161  * Methods for reading and writing bytes of the instruction. These keep track of how much has been read or written.
162  *========================================================================================================================*/
163 private:
164 
168  uint8_t getByte(State &state) const;
169 
173  uint16_t getWord(State &state) const;
174 
178  uint32_t getDWord(State &state) const;
179 
183  uint64_t getQWord(State &state) const;
184 
185  /*========================================================================================================================
186  * Miscellaneous helper methods
187  *========================================================================================================================*/
188 private:
191  SgAsmExpression *currentDataSegment(State &state) const;
192 
196  X86InstructionSize effectiveAddressSize(State &state) const;
197 
199  RegisterMode effectiveOperandMode(State &state) const {
200  return sizeToMode(effectiveOperandSize(state));
201  }
202 
206  X86InstructionSize effectiveOperandSize(State &state) const;
207 
209  SgAsmType *effectiveOperandType(State &state) const {
210  return sizeToType(effectiveOperandSize(state));
211  }
212 
214  bool longMode() const {
215  return insnSize == x86_insnsize_64;
216  }
217 
218  /* FIXME: documentation? */
219  MMPrefix mmPrefix(State &state) const;
220 
222  void not64(State &state) const {
223  if (longMode())
224  throw ExceptionX86("not valid for 64-bit code", state);
225  }
226 
229  void setRex(State &state, uint8_t prefix) const;
230 
232  static RegisterMode sizeToMode(X86InstructionSize);
233 
236  static SgAsmType *sizeToType(X86InstructionSize s);
237 
238 
239 
240  /*========================================================================================================================
241  * Methods that construct something. (Their names all start with "make".)
242  *========================================================================================================================*/
243 private:
244 
247  SgAsmExpression *makeAddrSizeValue(State &state, int64_t val, size_t bit_offset, size_t bit_size) const;
248 
253  SgAsmX86Instruction *makeInstruction(State &state, X86InstructionKind kind, const std::string &mnemonic,
254  SgAsmExpression *op1=NULL, SgAsmExpression *op2=NULL,
255  SgAsmExpression *op3=NULL, SgAsmExpression *op4=NULL) const;
256 
258  SgAsmRegisterReferenceExpression *makeIP() const;
259 
260  /* FIXME: documentation? */
261  SgAsmRegisterReferenceExpression *makeOperandRegisterByte(State &state, bool rexExtension, uint8_t registerNumber) const;
262 
263  /* FIXME: documentation? */
264  SgAsmRegisterReferenceExpression *makeOperandRegisterFull(State &state, bool rexExtension, uint8_t registerNumber) const;
265 
268  SgAsmRegisterReferenceExpression *makeRegister(State &state, uint8_t fullRegisterNumber, RegisterMode,
269  SgAsmType *registerType=NULL) const;
270 
271  /* FIXME: documentation? */
272  SgAsmRegisterReferenceExpression *makeRegisterEffective(State &state, uint8_t fullRegisterNumber) const {
273  return makeRegister(state, fullRegisterNumber, effectiveOperandMode(state));
274  }
275 
276  /* FIXME: documentation? */
277  SgAsmRegisterReferenceExpression *makeRegisterEffective(State &state, bool rexExtension, uint8_t registerNumber) const {
278  return makeRegister(state, registerNumber + (rexExtension ? 8 : 0), effectiveOperandMode(state));
279  }
280 
282  SgAsmExpression *makeSegmentRegister(State &state, X86SegmentRegister so, bool insn64) const;
283 
284 
285 
286  /*========================================================================================================================
287  * Methods for operating on the ModR/M byte.
288  *========================================================================================================================*/
289 private:
290 
304  void getModRegRM(State &state, RegisterMode regMode, RegisterMode rmMode, SgAsmType *t, SgAsmType *tForReg = NULL) const;
305 
307  SgAsmMemoryReferenceExpression *decodeModrmMemory(State &state) const;
308 
311  void fillInModRM(State &state, RegisterMode rmMode, SgAsmType *t) const;
312 
314  SgAsmExpression *makeModrmNormal(State &state, RegisterMode, SgAsmType *mrType) const;
315 
318  SgAsmRegisterReferenceExpression *makeModrmRegister(State &state, RegisterMode, SgAsmType* mrType=NULL) const;
319 
321  void requireMemory(State &state) const {
322  if (!state.modregrmByteSet)
323  throw ExceptionX86("requires Mod/RM byte", state);
324  if (state.modeField == 3)
325  throw ExceptionX86("requires memory", state);
326  }
327 
328 
329 
330  /*========================================================================================================================
331  * Methods that construct an SgAsmExpression for an immediate operand.
332  *========================================================================================================================*/
333 private:
334 
335  SgAsmExpression *getImmByte(State &state) const;
336  SgAsmExpression *getImmWord(State &state) const;
337  SgAsmExpression* getImmDWord(State &state) const;
338  SgAsmExpression* getImmQWord(State &state) const;
339  SgAsmExpression *getImmForAddr(State &state) const;
340  SgAsmExpression *getImmIv(State &state) const;
341  SgAsmExpression *getImmJz(State &state) const;
342  SgAsmExpression *getImmByteAsIv(State &state) const;
343  SgAsmExpression *getImmIzAsIv(State &state) const;
344  SgAsmExpression *getImmJb(State &state) const;
345 
346 
347 
348 
349  /*========================================================================================================================
350  * Main disassembly functions, each generally containing a huge "switch" statement based on one of the opcode bytes.
351  *========================================================================================================================*/
352 private:
353 
356  SgAsmX86Instruction *disassemble(State &state) const;
357 
359  SgAsmX86Instruction *decodeOpcode0F(State &state) const;
360 
362  SgAsmX86Instruction *decodeOpcode0F38(State &state) const;
363 
365  SgAsmX86Instruction *decodeX87InstructionD8(State &state) const;
366 
368  SgAsmX86Instruction *decodeX87InstructionD9(State &state) const;
369 
371  SgAsmX86Instruction *decodeX87InstructionDA(State &state) const;
372 
374  SgAsmX86Instruction *decodeX87InstructionDB(State &state) const;
375 
377  SgAsmX86Instruction *decodeX87InstructionDC(State &state) const;
378 
380  SgAsmX86Instruction *decodeX87InstructionDD(State &state) const;
381 
383  SgAsmX86Instruction *decodeX87InstructionDE(State &state) const;
384 
386  SgAsmX86Instruction *decodeX87InstructionDF(State &state) const;
387 
389  SgAsmX86Instruction *decodeGroup1(State &state, SgAsmExpression *imm) const;
390 
392  SgAsmX86Instruction *decodeGroup1a(State &state) const;
393 
395  SgAsmX86Instruction *decodeGroup2(State &state, SgAsmExpression *count) const;
396 
398  SgAsmX86Instruction *decodeGroup3(State &state, SgAsmExpression *immMaybe) const;
399 
401  SgAsmX86Instruction *decodeGroup4(State &state) const;
402 
404  SgAsmX86Instruction *decodeGroup5(State &state) const;
405 
407  SgAsmX86Instruction *decodeGroup6(State &state) const;
408 
411  SgAsmX86Instruction *decodeGroup7(State &state) const;
412 
414  SgAsmX86Instruction *decodeGroup8(State &state, SgAsmExpression *imm) const;
415 
417  SgAsmX86Instruction *decodeGroup11(State &state, SgAsmExpression *imm) const;
418 
420  SgAsmX86Instruction *decodeGroup15(State &state) const;
421 
423  SgAsmX86Instruction *decodeGroup16(State &state) const;
424 
426  SgAsmX86Instruction *decodeGroupP(State &state) const;
427 
428 
429 
430  /*========================================================================================================================
431  * Supporting functions
432  *========================================================================================================================*/
433 private:
434  // Initialize instances of this class. Called by constructor.
435  void init(size_t wordsize);
436 
437 #if 0 // is this ever used?
438 
439  void startInstruction(State &state, SgAsmX86Instruction *insn) const {
440  startInstruction(insn->get_address(), NULL, 0);
441  insnSize = insn->get_baseSize();
442  state.lock = insn->get_lockPrefix();
443  state.branchPrediction = insn->get_branchPrediction();
444  state.branchPredictionEnabled = state.branchPrediction != x86_branch_prediction_none;
445  state.segOverride = insn->get_segmentOverride();
446  }
447 #endif
448 
449  // Resets disassembler state to beginning of an instruction for disassembly.
450  void startInstruction(State &state, rose_addr_t start_va, const uint8_t *buf, size_t bufsz) const {
451  state.ip = start_va;
452  state.insnbuf = SgUnsignedCharList(buf, buf+bufsz);
453  state.insnbufat = 0;
454 
455  // Prefix flags
456  state.segOverride = x86_segreg_none;
457  state.branchPrediction = x86_branch_prediction_none;
458  state.branchPredictionEnabled = false;
459  state.rexPresent = state.rexW = state.rexR = state.rexX = state.rexB = false;
460  state.sizeMustBe64Bit = false;
461  state.operandSizeOverride = false;
462  state.addressSizeOverride = false;
463  state.lock = false;
464  state.repeatPrefix = x86_repeat_none;
465  state.modregrmByteSet = false;
466  state.modregrmByte = state.modeField = state.regField = state.rmField = 0; /*arbitrary since modregrmByteSet is false*/
467  state.modrm = state.reg = NULL;
468  state.isUnconditionalJump = false;
469  }
470 
471  // Add comments to any IP relative addition expressions. We're not constant folding these because it's sometimes useful to
472  // know that the address is relative to the instruction address, but the comment is useful for understanding the disassembly.
473  void commentIpRelative(SgAsmInstruction*);
474 };
475 
476 } // namespace
477 } // namespace
478 
479 #ifdef ROSE_HAVE_BOOST_SERIALIZATION_LIB
480 BOOST_CLASS_EXPORT_KEY(Rose::BinaryAnalysis::DisassemblerX86);
481 #endif
482 
483 #endif
484 #endif
Instruction is for a 64-bit architecture.
Base class for references to a machine register.
Base class for machine instructions.
X86BranchPrediction
Intel x86 branch prediction types.
Rose::BinaryAnalysis::X86InstructionSize get_baseSize() const
Property: An enum constant describing the base size of an x86 instruction.
Rose::BinaryAnalysis::X86BranchPrediction get_branchPrediction() const
Property: An enum constant describing branch prediction.
Rose::BinaryAnalysis::X86SegmentRegister get_segmentOverride() const
Property: The segment override register.
Main namespace for the ROSE library.
virtual Unparser::BasePtr unparser() const ROSE_OVERRIDE
Unparser.
virtual bool canDisassemble(SgAsmGenericHeader *) const ROSE_OVERRIDE
Predicate determining the suitability of a disassembler for a specific file header.
Reference to memory locations.
Base class for container file headers.
bool get_lockPrefix() const
Property: Whether the x86 lock prefix was present.
An efficient mapping from an address space to stored data.
Definition: MemoryMap.h:111
virtual DisassemblerX86 * clone() const ROSE_OVERRIDE
Creates a new copy of a disassembler.
Represents one Intel x86 machine instruction.
Base class for expressions.
Base class for binary types.
virtual SgAsmInstruction * makeUnknownInstruction(const Exception &) ROSE_OVERRIDE
Makes an unknown instruction from an exception.
Disassembler for the x86 architecture.
X86SegmentRegister
Intel x86 segment registers.
virtual SgAsmInstruction * disassembleOne(const MemoryMap::Ptr &map, rose_addr_t start_va, AddressSet *successors=NULL) ROSE_OVERRIDE
This is the lowest level disassembly function and is implemented in the architecture-specific subclas...
X86InstructionKind
List of all x86 instructions known to the ROSE disassembler/assembler.
rose_addr_t get_address() const
Property: Starting virtual address.
Base class for all ROSE exceptions.
Definition: Rose/Exception.h:9
Virtual base class for instruction disassemblers.
Definition: Disassembler.h:50
X86InstructionSize
Intel x86 instruction size constants.
X86RepeatPrefix
Intel x86 instruction repeat prefix.