ROSE  0.11.96.0
DisassemblerX86.h
1 /* Disassembly specific to the x86 architecture. */
2 #ifndef ROSE_BinaryAnalysis_DisassemblerX86_H
3 #define ROSE_BinaryAnalysis_DisassemblerX86_H
4 #include <featureTests.h>
5 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
6 #include <Rose/BinaryAnalysis/Disassembler.h>
7 
8 #include <Rose/BinaryAnalysis/InstructionEnumsX86.h>
9 #include "Cxx_GrammarSerialization.h"
10 
11 #include <boost/serialization/access.hpp>
12 #include <boost/serialization/base_object.hpp>
13 #include <boost/serialization/export.hpp>
14 #include <boost/serialization/split_member.hpp>
15 
16 namespace Rose {
17 namespace BinaryAnalysis {
18 
22  /* Per-disassembler settings; see init() */
23  X86InstructionSize insnSize;
24  size_t wordSize;
26  /* Per-instruction settings; see startInstruction() */
27  struct State {
28  uint64_t ip;
29  SgUnsignedCharList insnbuf;
30  size_t insnbufat;
32  /* Temporary flags set by the instruction; initialized by startInstruction() */
33  X86SegmentRegister segOverride;
34  X86BranchPrediction branchPrediction; /*FIXME: this seems to set only to x86_branch_prediction_true [RPM 2009-06-16] */
35  bool branchPredictionEnabled;
36  bool rexPresent, rexW, rexR, rexX, rexB;
37  bool sizeMustBe64Bit;
38  bool operandSizeOverride;
39  bool addressSizeOverride;
40  bool lock;
41  X86RepeatPrefix repeatPrefix;
42  bool modregrmByteSet;
43  uint8_t modregrmByte;
44  uint8_t modeField;
45  uint8_t regField;
46  uint8_t rmField;
47  SgAsmExpression *modrm;
48  SgAsmExpression *reg;
49  bool isUnconditionalJump;
51  State()
52  : ip(0), insnbufat(0), segOverride(x86_segreg_none), branchPrediction(x86_branch_prediction_none),
53  branchPredictionEnabled(false), rexPresent(false), rexW(false), rexR(false), rexX(false), rexB(false),
54  sizeMustBe64Bit(false), operandSizeOverride(false), addressSizeOverride(false), lock(false),
55  repeatPrefix(x86_repeat_none), modregrmByteSet(false), modregrmByte(0), modeField(0), regField(0),
56  rmField(0), modrm(NULL), reg(NULL), isUnconditionalJump(false) {}
57  };
58 
60  // Serialization
62 #ifdef ROSE_HAVE_BOOST_SERIALIZATION_LIB
63 private:
64  friend class boost::serialization::access;
65 
66  template<class S>
67  void serialize_common(S &s, const unsigned /*version*/) {
68  // Most of the data members don't need to be saved because we'll only save/restore disassemblers that are between
69  // instructions (we never save one while it's processing an instruction). Therefore, most of the data members can be
70  // constructed in their initial state by a combination of default constructor and init().
71  s & BOOST_SERIALIZATION_BASE_OBJECT_NVP(Disassembler);
72  s & BOOST_SERIALIZATION_NVP(wordSize);
73  }
74 
75  template<class S>
76  void save(S &s, const unsigned version) const {
77  serialize_common(s, version);
78  }
79 
80  template<class S>
81  void load(S &s, const unsigned version) {
82  serialize_common(s, version);
83  init(wordSize);
84  }
85 
86  BOOST_SERIALIZATION_SPLIT_MEMBER();
87 #endif
88 
90  // Constructors
92 
93 protected:
94  // Default constructor for serialization
96  : insnSize(x86_insnsize_none), wordSize(0) {}
97 
98 public:
99  explicit DisassemblerX86(size_t wordsize)
100  : insnSize(x86_insnsize_none), wordSize(0) {
101  init(wordsize);
102  }
103 
104  virtual ~DisassemblerX86() {}
105 
106  virtual DisassemblerX86 *clone() const override { return new DisassemblerX86(*this); }
107 
109  // Public methods
111 public:
112  virtual bool canDisassemble(SgAsmGenericHeader*) const override;
113 
114  virtual Unparser::BasePtr unparser() const override;
115 
116  virtual SgAsmInstruction *disassembleOne(const MemoryMap::Ptr &map, rose_addr_t start_va,
117  AddressSet *successors=NULL) override;
118 
119  virtual SgAsmInstruction *makeUnknownInstruction(const Exception&) override;
120 
121 
122  /*========================================================================================================================
123  * Data types
124  *========================================================================================================================*/
125 private:
126 
130  class ExceptionX86: public Exception {
131  public:
132  ExceptionX86(const std::string &mesg, const State &state)
133  : Exception(mesg, state.ip) {
134  ASSERT_require(state.insnbufat <= state.insnbuf.size());
135  if (state.insnbufat > 0)
136  bytes = SgUnsignedCharList(&state.insnbuf[0], &state.insnbuf[0] + state.insnbufat);
137  bit = 8 * state.insnbufat;
138  }
139 
140  ExceptionX86(const std::string &mesg, const State &state, size_t bit)
141  : Exception(mesg, state.ip) {
142  ASSERT_require(state.insnbufat <= state.insnbuf.size());
143  if (state.insnbufat > 0)
144  bytes = SgUnsignedCharList(&state.insnbuf[0], &state.insnbuf[0] + state.insnbufat);
145  this->bit = bit;
146  }
147  };
148 
150  enum RegisterMode {
151  rmLegacyByte, rmRexByte, rmWord, rmDWord, rmQWord, rmSegment, rmST, rmMM, rmXMM, rmControl, rmDebug, rmReturnNull
152  };
153 
154  /* MMX registers? See mmPrefix method */
155  enum MMPrefix {
156  mmNone, mmF3, mm66, mmF2
157  };
158 
159 
160  /*========================================================================================================================
161  * Methods for reading and writing bytes of the instruction. These keep track of how much has been read or written.
162  *========================================================================================================================*/
163 private:
164 
168  uint8_t getByte(State &state) const;
169 
173  uint16_t getWord(State &state) const;
174 
178  uint32_t getDWord(State &state) const;
179 
183  uint64_t getQWord(State &state) const;
184 
185  /*========================================================================================================================
186  * Miscellaneous helper methods
187  *========================================================================================================================*/
188 private:
191  SgAsmExpression *currentDataSegment(State &state) const;
192 
196  X86InstructionSize effectiveAddressSize(State &state) const;
197 
199  RegisterMode effectiveOperandMode(State &state) const {
200  return sizeToMode(effectiveOperandSize(state));
201  }
202 
206  X86InstructionSize effectiveOperandSize(State &state) const;
207 
209  SgAsmType *effectiveOperandType(State &state) const {
210  return sizeToType(effectiveOperandSize(state));
211  }
212 
214  bool longMode() const {
215  return insnSize == x86_insnsize_64;
216  }
217 
218  /* FIXME: documentation? */
219  MMPrefix mmPrefix(State &state) const;
220 
222  void not64(State &state) const {
223  if (longMode())
224  throw ExceptionX86("not valid for 64-bit code", state);
225  }
226 
229  void setRex(State &state, uint8_t prefix) const;
230 
232  static RegisterMode sizeToMode(X86InstructionSize);
233 
236  static SgAsmType *sizeToType(X86InstructionSize s);
237 
238 
239 
240  /*========================================================================================================================
241  * Methods that construct something. (Their names all start with "make".)
242  *========================================================================================================================*/
243 private:
244 
247  SgAsmExpression *makeAddrSizeValue(State &state, int64_t val, size_t bit_offset, size_t bit_size) const;
248 
253  SgAsmX86Instruction *makeInstruction(State &state, X86InstructionKind kind, const std::string &mnemonic,
254  SgAsmExpression *op1=NULL, SgAsmExpression *op2=NULL,
255  SgAsmExpression *op3=NULL, SgAsmExpression *op4=NULL) const;
256 
258  SgAsmRegisterReferenceExpression *makeIP() const;
259 
260  /* FIXME: documentation? */
261  SgAsmRegisterReferenceExpression *makeOperandRegisterByte(State &state, bool rexExtension, uint8_t registerNumber) const;
262 
263  /* FIXME: documentation? */
264  SgAsmRegisterReferenceExpression *makeOperandRegisterFull(State &state, bool rexExtension, uint8_t registerNumber) const;
265 
268  SgAsmRegisterReferenceExpression *makeRegister(State &state, uint8_t fullRegisterNumber, RegisterMode,
269  SgAsmType *registerType=NULL) const;
270 
271  /* FIXME: documentation? */
272  SgAsmRegisterReferenceExpression *makeRegisterEffective(State &state, uint8_t fullRegisterNumber) const {
273  return makeRegister(state, fullRegisterNumber, effectiveOperandMode(state));
274  }
275 
276  /* FIXME: documentation? */
277  SgAsmRegisterReferenceExpression *makeRegisterEffective(State &state, bool rexExtension, uint8_t registerNumber) const {
278  return makeRegister(state, registerNumber + (rexExtension ? 8 : 0), effectiveOperandMode(state));
279  }
280 
282  SgAsmExpression *makeSegmentRegister(State &state, X86SegmentRegister so, bool insn64) const;
283 
284 
285 
286  /*========================================================================================================================
287  * Methods for operating on the ModR/M byte.
288  *========================================================================================================================*/
289 private:
290 
304  void getModRegRM(State &state, RegisterMode regMode, RegisterMode rmMode, SgAsmType *t, SgAsmType *tForReg = NULL) const;
305 
307  SgAsmMemoryReferenceExpression *decodeModrmMemory(State &state) const;
308 
311  void fillInModRM(State &state, RegisterMode rmMode, SgAsmType *t) const;
312 
314  SgAsmExpression *makeModrmNormal(State &state, RegisterMode, SgAsmType *mrType) const;
315 
318  SgAsmRegisterReferenceExpression *makeModrmRegister(State &state, RegisterMode, SgAsmType* mrType=NULL) const;
319 
321  void requireMemory(State &state) const {
322  if (!state.modregrmByteSet)
323  throw ExceptionX86("requires Mod/RM byte", state);
324  if (state.modeField == 3)
325  throw ExceptionX86("requires memory", state);
326  }
327 
328 
329 
330  /*========================================================================================================================
331  * Methods that construct an SgAsmExpression for an immediate operand.
332  *========================================================================================================================*/
333 private:
334 
335  SgAsmExpression *getImmByte(State &state) const;
336  SgAsmExpression *getImmWord(State &state) const;
337  SgAsmExpression* getImmDWord(State &state) const;
338  SgAsmExpression* getImmQWord(State &state) const;
339  SgAsmExpression *getImmForAddr(State &state) const;
340  SgAsmExpression *getImmIv(State &state) const;
341  SgAsmExpression *getImmJz(State &state) const;
342  SgAsmExpression *getImmByteAsIv(State &state) const;
343  SgAsmExpression *getImmIzAsIv(State &state) const;
344  SgAsmExpression *getImmJb(State &state) const;
345 
346 
347 
348 
349  /*========================================================================================================================
350  * Main disassembly functions, each generally containing a huge "switch" statement based on one of the opcode bytes.
351  *========================================================================================================================*/
352 private:
353 
356  SgAsmX86Instruction *disassemble(State &state) const;
357 
359  SgAsmX86Instruction *decodeOpcode0F(State &state) const;
360 
362  SgAsmX86Instruction *decodeOpcode0F38(State &state) const;
363 
365  SgAsmX86Instruction *decodeX87InstructionD8(State &state) const;
366 
368  SgAsmX86Instruction *decodeX87InstructionD9(State &state) const;
369 
371  SgAsmX86Instruction *decodeX87InstructionDA(State &state) const;
372 
374  SgAsmX86Instruction *decodeX87InstructionDB(State &state) const;
375 
377  SgAsmX86Instruction *decodeX87InstructionDC(State &state) const;
378 
380  SgAsmX86Instruction *decodeX87InstructionDD(State &state) const;
381 
383  SgAsmX86Instruction *decodeX87InstructionDE(State &state) const;
384 
386  SgAsmX86Instruction *decodeX87InstructionDF(State &state) const;
387 
389  SgAsmX86Instruction *decodeGroup1(State &state, SgAsmExpression *imm) const;
390 
392  SgAsmX86Instruction *decodeGroup1a(State &state) const;
393 
395  SgAsmX86Instruction *decodeGroup2(State &state, SgAsmExpression *count) const;
396 
398  SgAsmX86Instruction *decodeGroup3(State &state, SgAsmExpression *immMaybe) const;
399 
401  SgAsmX86Instruction *decodeGroup4(State &state) const;
402 
404  SgAsmX86Instruction *decodeGroup5(State &state) const;
405 
407  SgAsmX86Instruction *decodeGroup6(State &state) const;
408 
411  SgAsmX86Instruction *decodeGroup7(State &state) const;
412 
414  SgAsmX86Instruction *decodeGroup8(State &state, SgAsmExpression *imm) const;
415 
417  SgAsmX86Instruction *decodeGroup11(State &state, SgAsmExpression *imm) const;
418 
420  SgAsmX86Instruction *decodeGroup15(State &state) const;
421 
423  SgAsmX86Instruction *decodeGroup16(State &state) const;
424 
426  SgAsmX86Instruction *decodeGroupP(State &state) const;
427 
428 
429 
430  /*========================================================================================================================
431  * Supporting functions
432  *========================================================================================================================*/
433 private:
434  // Initialize instances of this class. Called by constructor.
435  void init(size_t wordsize);
436 
437 #if 0 // is this ever used?
438 
439  void startInstruction(State &state, SgAsmX86Instruction *insn) const {
440  startInstruction(insn->get_address(), NULL, 0);
441  insnSize = insn->get_baseSize();
442  state.lock = insn->get_lockPrefix();
443  state.branchPrediction = insn->get_branchPrediction();
444  state.branchPredictionEnabled = state.branchPrediction != x86_branch_prediction_none;
445  state.segOverride = insn->get_segmentOverride();
446  }
447 #endif
448 
449  // Resets disassembler state to beginning of an instruction for disassembly.
450  void startInstruction(State &state, rose_addr_t start_va, const uint8_t *buf, size_t bufsz) const {
451  state.ip = start_va;
452  state.insnbuf = SgUnsignedCharList(buf, buf+bufsz);
453  state.insnbufat = 0;
454 
455  // Prefix flags
456  state.segOverride = x86_segreg_none;
457  state.branchPrediction = x86_branch_prediction_none;
458  state.branchPredictionEnabled = false;
459  state.rexPresent = state.rexW = state.rexR = state.rexX = state.rexB = false;
460  state.sizeMustBe64Bit = false;
461  state.operandSizeOverride = false;
462  state.addressSizeOverride = false;
463  state.lock = false;
464  state.repeatPrefix = x86_repeat_none;
465  state.modregrmByteSet = false;
466  state.modregrmByte = state.modeField = state.regField = state.rmField = 0; /*arbitrary since modregrmByteSet is false*/
467  state.modrm = state.reg = NULL;
468  state.isUnconditionalJump = false;
469  }
470 
471  // Add comments to any IP relative addition expressions. We're not constant folding these because it's sometimes useful to
472  // know that the address is relative to the instruction address, but the comment is useful for understanding the disassembly.
473  void commentIpRelative(SgAsmInstruction*);
474 };
475 
476 } // namespace
477 } // namespace
478 
479 #ifdef ROSE_HAVE_BOOST_SERIALIZATION_LIB
480 BOOST_CLASS_EXPORT_KEY(Rose::BinaryAnalysis::DisassemblerX86);
481 #endif
482 
483 #endif
484 #endif
virtual DisassemblerX86 * clone() const override
Creates a new copy of a disassembler.
virtual bool canDisassemble(SgAsmGenericHeader *) const override
Predicate determining the suitability of a disassembler for a specific file header.
Instruction is for a 64-bit architecture.
Base class for references to a machine register.
Base class for machine instructions.
X86BranchPrediction
Intel x86 branch prediction types.
Rose::BinaryAnalysis::X86InstructionSize get_baseSize() const
Property: An enum constant describing the base size of an x86 instruction.
Rose::BinaryAnalysis::X86BranchPrediction get_branchPrediction() const
Property: An enum constant describing branch prediction.
Rose::BinaryAnalysis::X86SegmentRegister get_segmentOverride() const
Property: The segment override register.
Main namespace for the ROSE library.
Reference to memory locations.
Base class for container file headers.
bool get_lockPrefix() const
Property: Whether the x86 lock prefix was present.
virtual SgAsmInstruction * makeUnknownInstruction(const Exception &) override
Makes an unknown instruction from an exception.
Represents one Intel x86 machine instruction.
virtual Unparser::BasePtr unparser() const override
Unparser.
Base class for expressions.
Base class for binary types.
Disassembler for the x86 architecture.
X86SegmentRegister
Intel x86 segment registers.
X86InstructionKind
List of all x86 instructions known to the ROSE disassembler/assembler.
rose_addr_t get_address() const
Property: Starting virtual address.
Base class for all ROSE exceptions.
Definition: Rose/Exception.h:9
Virtual base class for instruction disassemblers.
Definition: Disassembler.h:50
X86InstructionSize
Intel x86 instruction size constants.
virtual SgAsmInstruction * disassembleOne(const MemoryMap::Ptr &map, rose_addr_t start_va, AddressSet *successors=NULL) override
This is the lowest level disassembly function and is implemented in the architecture-specific subclas...
X86RepeatPrefix
Intel x86 instruction repeat prefix.