ROSE  0.11.22.0
DisassemblerX86.h
1 /* Disassembly specific to the x86 architecture. */
2 
3 #ifndef ROSE_DISASSEMBLER_X86_H
4 #define ROSE_DISASSEMBLER_X86_H
5 
6 #include <featureTests.h>
7 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
8 #include "Disassembler.h"
9 
10 #include "InstructionEnumsX86.h"
11 #include "Cxx_GrammarSerialization.h"
12 
13 #include <boost/serialization/access.hpp>
14 #include <boost/serialization/base_object.hpp>
15 #include <boost/serialization/export.hpp>
16 #include <boost/serialization/split_member.hpp>
17 
18 namespace Rose {
19 namespace BinaryAnalysis {
20 
24  /* Per-disassembler settings; see init() */
25  X86InstructionSize insnSize;
26  size_t wordSize;
28  /* Per-instruction settings; see startInstruction() */
29  struct State {
30  uint64_t ip;
31  SgUnsignedCharList insnbuf;
32  size_t insnbufat;
34  /* Temporary flags set by the instruction; initialized by startInstruction() */
35  X86SegmentRegister segOverride;
36  X86BranchPrediction branchPrediction; /*FIXME: this seems to set only to x86_branch_prediction_true [RPM 2009-06-16] */
37  bool branchPredictionEnabled;
38  bool rexPresent, rexW, rexR, rexX, rexB;
39  bool sizeMustBe64Bit;
40  bool operandSizeOverride;
41  bool addressSizeOverride;
42  bool lock;
43  X86RepeatPrefix repeatPrefix;
44  bool modregrmByteSet;
45  uint8_t modregrmByte;
46  uint8_t modeField;
47  uint8_t regField;
48  uint8_t rmField;
49  SgAsmExpression *modrm;
50  SgAsmExpression *reg;
51  bool isUnconditionalJump;
53  State()
54  : ip(0), insnbufat(0), segOverride(x86_segreg_none), branchPrediction(x86_branch_prediction_none),
55  branchPredictionEnabled(false), rexPresent(false), rexW(false), rexR(false), rexX(false), rexB(false),
56  sizeMustBe64Bit(false), operandSizeOverride(false), addressSizeOverride(false), lock(false),
57  repeatPrefix(x86_repeat_none), modregrmByteSet(false), modregrmByte(0), modeField(0), rmField(0),
58  modrm(NULL), reg(NULL), isUnconditionalJump(false) {}
59  };
60 
62  // Serialization
64 #ifdef ROSE_HAVE_BOOST_SERIALIZATION_LIB
65 private:
66  friend class boost::serialization::access;
67 
68  template<class S>
69  void serialize_common(S &s, const unsigned /*version*/) {
70  // Most of the data members don't need to be saved because we'll only save/restore disassemblers that are between
71  // instructions (we never save one while it's processing an instruction). Therefore, most of the data members can be
72  // constructed in their initial state by a combination of default constructor and init().
73  s & BOOST_SERIALIZATION_BASE_OBJECT_NVP(Disassembler);
74  s & BOOST_SERIALIZATION_NVP(wordSize);
75  }
76 
77  template<class S>
78  void save(S &s, const unsigned version) const {
79  serialize_common(s, version);
80  }
81 
82  template<class S>
83  void load(S &s, const unsigned version) {
84  serialize_common(s, version);
85  init(wordSize);
86  }
87 
88  BOOST_SERIALIZATION_SPLIT_MEMBER();
89 #endif
90 
92  // Constructors
94 
95 protected:
96  // Default constructor for serialization
98  : insnSize(x86_insnsize_none), wordSize(0) {}
99 
100 public:
101  explicit DisassemblerX86(size_t wordsize)
102  : insnSize(x86_insnsize_none), wordSize(0) {
103  init(wordsize);
104  }
105 
106  virtual ~DisassemblerX86() {}
107 
108  virtual DisassemblerX86 *clone() const ROSE_OVERRIDE { return new DisassemblerX86(*this); }
109 
111  // Public methods
113 public:
114  virtual bool canDisassemble(SgAsmGenericHeader*) const ROSE_OVERRIDE;
115 
116  virtual Unparser::BasePtr unparser() const ROSE_OVERRIDE;
117 
118  virtual SgAsmInstruction *disassembleOne(const MemoryMap::Ptr &map, rose_addr_t start_va,
119  AddressSet *successors=NULL) ROSE_OVERRIDE;
120 
121  virtual SgAsmInstruction *makeUnknownInstruction(const Exception&) ROSE_OVERRIDE;
122 
123 
124  /*========================================================================================================================
125  * Data types
126  *========================================================================================================================*/
127 private:
128 
132  class ExceptionX86: public Exception {
133  public:
134  ExceptionX86(const std::string &mesg, const State &state)
135  : Exception(mesg, state.ip) {
136  ASSERT_require(state.insnbufat <= state.insnbuf.size());
137  if (state.insnbufat > 0)
138  bytes = SgUnsignedCharList(&state.insnbuf[0], &state.insnbuf[0] + state.insnbufat);
139  bit = 8 * state.insnbufat;
140  }
141 
142  ExceptionX86(const std::string &mesg, const State &state, size_t bit)
143  : Exception(mesg, state.ip) {
144  ASSERT_require(state.insnbufat <= state.insnbuf.size());
145  if (state.insnbufat > 0)
146  bytes = SgUnsignedCharList(&state.insnbuf[0], &state.insnbuf[0] + state.insnbufat);
147  this->bit = bit;
148  }
149  };
150 
152  enum RegisterMode {
153  rmLegacyByte, rmRexByte, rmWord, rmDWord, rmQWord, rmSegment, rmST, rmMM, rmXMM, rmControl, rmDebug, rmReturnNull
154  };
155 
156  /* MMX registers? See mmPrefix method */
157  enum MMPrefix {
158  mmNone, mmF3, mm66, mmF2
159  };
160 
161 
162  /*========================================================================================================================
163  * Methods for reading and writing bytes of the instruction. These keep track of how much has been read or written.
164  *========================================================================================================================*/
165 private:
166 
170  uint8_t getByte(State &state) const;
171 
175  uint16_t getWord(State &state) const;
176 
180  uint32_t getDWord(State &state) const;
181 
185  uint64_t getQWord(State &state) const;
186 
187  /*========================================================================================================================
188  * Miscellaneous helper methods
189  *========================================================================================================================*/
190 private:
193  SgAsmExpression *currentDataSegment(State &state) const;
194 
198  X86InstructionSize effectiveAddressSize(State &state) const;
199 
201  RegisterMode effectiveOperandMode(State &state) const {
202  return sizeToMode(effectiveOperandSize(state));
203  }
204 
208  X86InstructionSize effectiveOperandSize(State &state) const;
209 
211  SgAsmType *effectiveOperandType(State &state) const {
212  return sizeToType(effectiveOperandSize(state));
213  }
214 
216  bool longMode() const {
217  return insnSize == x86_insnsize_64;
218  }
219 
220  /* FIXME: documentation? */
221  MMPrefix mmPrefix(State &state) const;
222 
224  void not64(State &state) const {
225  if (longMode())
226  throw ExceptionX86("not valid for 64-bit code", state);
227  }
228 
231  void setRex(State &state, uint8_t prefix) const;
232 
234  static RegisterMode sizeToMode(X86InstructionSize);
235 
238  static SgAsmType *sizeToType(X86InstructionSize s);
239 
240 
241 
242  /*========================================================================================================================
243  * Methods that construct something. (Their names all start with "make".)
244  *========================================================================================================================*/
245 private:
246 
249  SgAsmExpression *makeAddrSizeValue(State &state, int64_t val, size_t bit_offset, size_t bit_size) const;
250 
255  SgAsmX86Instruction *makeInstruction(State &state, X86InstructionKind kind, const std::string &mnemonic,
256  SgAsmExpression *op1=NULL, SgAsmExpression *op2=NULL,
257  SgAsmExpression *op3=NULL, SgAsmExpression *op4=NULL) const;
258 
260  SgAsmRegisterReferenceExpression *makeIP() const;
261 
262  /* FIXME: documentation? */
263  SgAsmRegisterReferenceExpression *makeOperandRegisterByte(State &state, bool rexExtension, uint8_t registerNumber) const;
264 
265  /* FIXME: documentation? */
266  SgAsmRegisterReferenceExpression *makeOperandRegisterFull(State &state, bool rexExtension, uint8_t registerNumber) const;
267 
270  SgAsmRegisterReferenceExpression *makeRegister(State &state, uint8_t fullRegisterNumber, RegisterMode,
271  SgAsmType *registerType=NULL) const;
272 
273  /* FIXME: documentation? */
274  SgAsmRegisterReferenceExpression *makeRegisterEffective(State &state, uint8_t fullRegisterNumber) const {
275  return makeRegister(state, fullRegisterNumber, effectiveOperandMode(state));
276  }
277 
278  /* FIXME: documentation? */
279  SgAsmRegisterReferenceExpression *makeRegisterEffective(State &state, bool rexExtension, uint8_t registerNumber) const {
280  return makeRegister(state, registerNumber + (rexExtension ? 8 : 0), effectiveOperandMode(state));
281  }
282 
284  SgAsmExpression *makeSegmentRegister(State &state, X86SegmentRegister so, bool insn64) const;
285 
286 
287 
288  /*========================================================================================================================
289  * Methods for operating on the ModR/M byte.
290  *========================================================================================================================*/
291 private:
292 
306  void getModRegRM(State &state, RegisterMode regMode, RegisterMode rmMode, SgAsmType *t, SgAsmType *tForReg = NULL) const;
307 
309  SgAsmMemoryReferenceExpression *decodeModrmMemory(State &state) const;
310 
313  void fillInModRM(State &state, RegisterMode rmMode, SgAsmType *t) const;
314 
316  SgAsmExpression *makeModrmNormal(State &state, RegisterMode, SgAsmType *mrType) const;
317 
320  SgAsmRegisterReferenceExpression *makeModrmRegister(State &state, RegisterMode, SgAsmType* mrType=NULL) const;
321 
323  void requireMemory(State &state) const {
324  if (!state.modregrmByteSet)
325  throw ExceptionX86("requires Mod/RM byte", state);
326  if (state.modeField == 3)
327  throw ExceptionX86("requires memory", state);
328  }
329 
330 
331 
332  /*========================================================================================================================
333  * Methods that construct an SgAsmExpression for an immediate operand.
334  *========================================================================================================================*/
335 private:
336 
337  SgAsmExpression *getImmByte(State &state) const;
338  SgAsmExpression *getImmWord(State &state) const;
339  SgAsmExpression* getImmDWord(State &state) const;
340  SgAsmExpression* getImmQWord(State &state) const;
341  SgAsmExpression *getImmForAddr(State &state) const;
342  SgAsmExpression *getImmIv(State &state) const;
343  SgAsmExpression *getImmJz(State &state) const;
344  SgAsmExpression *getImmByteAsIv(State &state) const;
345  SgAsmExpression *getImmIzAsIv(State &state) const;
346  SgAsmExpression *getImmJb(State &state) const;
347 
348 
349 
350 
351  /*========================================================================================================================
352  * Main disassembly functions, each generally containing a huge "switch" statement based on one of the opcode bytes.
353  *========================================================================================================================*/
354 private:
355 
358  SgAsmX86Instruction *disassemble(State &state) const;
359 
361  SgAsmX86Instruction *decodeOpcode0F(State &state) const;
362 
364  SgAsmX86Instruction *decodeOpcode0F38(State &state) const;
365 
367  SgAsmX86Instruction *decodeX87InstructionD8(State &state) const;
368 
370  SgAsmX86Instruction *decodeX87InstructionD9(State &state) const;
371 
373  SgAsmX86Instruction *decodeX87InstructionDA(State &state) const;
374 
376  SgAsmX86Instruction *decodeX87InstructionDB(State &state) const;
377 
379  SgAsmX86Instruction *decodeX87InstructionDC(State &state) const;
380 
382  SgAsmX86Instruction *decodeX87InstructionDD(State &state) const;
383 
385  SgAsmX86Instruction *decodeX87InstructionDE(State &state) const;
386 
388  SgAsmX86Instruction *decodeX87InstructionDF(State &state) const;
389 
391  SgAsmX86Instruction *decodeGroup1(State &state, SgAsmExpression *imm) const;
392 
394  SgAsmX86Instruction *decodeGroup1a(State &state) const;
395 
397  SgAsmX86Instruction *decodeGroup2(State &state, SgAsmExpression *count) const;
398 
400  SgAsmX86Instruction *decodeGroup3(State &state, SgAsmExpression *immMaybe) const;
401 
403  SgAsmX86Instruction *decodeGroup4(State &state) const;
404 
406  SgAsmX86Instruction *decodeGroup5(State &state) const;
407 
409  SgAsmX86Instruction *decodeGroup6(State &state) const;
410 
413  SgAsmX86Instruction *decodeGroup7(State &state) const;
414 
416  SgAsmX86Instruction *decodeGroup8(State &state, SgAsmExpression *imm) const;
417 
419  SgAsmX86Instruction *decodeGroup11(State &state, SgAsmExpression *imm) const;
420 
422  SgAsmX86Instruction *decodeGroup15(State &state) const;
423 
425  SgAsmX86Instruction *decodeGroup16(State &state) const;
426 
428  SgAsmX86Instruction *decodeGroupP(State &state) const;
429 
430 
431 
432  /*========================================================================================================================
433  * Supporting functions
434  *========================================================================================================================*/
435 private:
436  // Initialize instances of this class. Called by constructor.
437  void init(size_t wordsize);
438 
439 #if 0 // is this ever used?
440 
441  void startInstruction(State &state, SgAsmX86Instruction *insn) const {
442  startInstruction(insn->get_address(), NULL, 0);
443  insnSize = insn->get_baseSize();
444  state.lock = insn->get_lockPrefix();
445  state.branchPrediction = insn->get_branchPrediction();
446  state.branchPredictionEnabled = state.branchPrediction != x86_branch_prediction_none;
447  state.segOverride = insn->get_segmentOverride();
448  }
449 #endif
450 
451  // Resets disassembler state to beginning of an instruction for disassembly.
452  void startInstruction(State &state, rose_addr_t start_va, const uint8_t *buf, size_t bufsz) const {
453  state.ip = start_va;
454  state.insnbuf = SgUnsignedCharList(buf, buf+bufsz);
455  state.insnbufat = 0;
456 
457  // Prefix flags
458  state.segOverride = x86_segreg_none;
459  state.branchPrediction = x86_branch_prediction_none;
460  state.branchPredictionEnabled = false;
461  state.rexPresent = state.rexW = state.rexR = state.rexX = state.rexB = false;
462  state.sizeMustBe64Bit = false;
463  state.operandSizeOverride = false;
464  state.addressSizeOverride = false;
465  state.lock = false;
466  state.repeatPrefix = x86_repeat_none;
467  state.modregrmByteSet = false;
468  state.modregrmByte = state.modeField = state.regField = state.rmField = 0; /*arbitrary since modregrmByteSet is false*/
469  state.modrm = state.reg = NULL;
470  state.isUnconditionalJump = false;
471  }
472 
473  // Add comments to any IP relative addition expressions. We're not constant folding these because it's sometimes useful to
474  // know that the address is relative to the instruction address, but the comment is useful for understanding the disassembly.
475  void commentIpRelative(SgAsmInstruction*);
476 };
477 
478 } // namespace
479 } // namespace
480 
481 #ifdef ROSE_HAVE_BOOST_SERIALIZATION_LIB
482 BOOST_CLASS_EXPORT_KEY(Rose::BinaryAnalysis::DisassemblerX86);
483 #endif
484 
485 #endif
486 #endif
Instruction is for a 64-bit architecture.
Base class for references to a machine register.
Base class for machine instructions.
X86BranchPrediction
Intel x86 branch prediction types.
Rose::BinaryAnalysis::X86InstructionSize get_baseSize() const
Property: An enum constant describing the base size of an x86 instruction.
Rose::BinaryAnalysis::X86BranchPrediction get_branchPrediction() const
Property: An enum constant describing branch prediction.
Rose::BinaryAnalysis::X86SegmentRegister get_segmentOverride() const
Property: The segment override register.
Main namespace for the ROSE library.
virtual Unparser::BasePtr unparser() const ROSE_OVERRIDE
Unparser.
virtual bool canDisassemble(SgAsmGenericHeader *) const ROSE_OVERRIDE
Predicate determining the suitability of a disassembler for a specific file header.
Reference to memory locations.
Base class for container file headers.
bool get_lockPrefix() const
Property: Whether the x86 lock prefix was present.
An efficient mapping from an address space to stored data.
Definition: MemoryMap.h:112
virtual DisassemblerX86 * clone() const ROSE_OVERRIDE
Creates a new copy of a disassembler.
Represents one Intel x86 machine instruction.
Base class for expressions.
Base class for binary types.
virtual SgAsmInstruction * makeUnknownInstruction(const Exception &) ROSE_OVERRIDE
Makes an unknown instruction from an exception.
Disassembler for the x86 architecture.
X86SegmentRegister
Intel x86 segment registers.
virtual SgAsmInstruction * disassembleOne(const MemoryMap::Ptr &map, rose_addr_t start_va, AddressSet *successors=NULL) ROSE_OVERRIDE
This is the lowest level disassembly function and is implemented in the architecture-specific subclas...
X86InstructionKind
List of all x86 instructions known to the ROSE disassembler/assembler.
rose_addr_t get_address() const
Property: Starting virtual address.
Base class for all ROSE exceptions.
Definition: RoseException.h:9
Virtual base class for instruction disassemblers.
Definition: Disassembler.h:50
X86InstructionSize
Intel x86 instruction size constants.
X86RepeatPrefix
Intel x86 instruction repeat prefix.