ROSE 0.11.145.237
Disassembler/X86.h
1/* Disassembly specific to the x86 architecture. */
2#ifndef ROSE_BinaryAnalysis_Disassembler_X86_H
3#define ROSE_BinaryAnalysis_Disassembler_X86_H
4#include <featureTests.h>
5#ifdef ROSE_ENABLE_BINARY_ANALYSIS
6#include <Rose/BinaryAnalysis/Disassembler/Base.h>
7
8#include <Rose/BinaryAnalysis/Architecture/BasicTypes.h>
9#include <Rose/BinaryAnalysis/InstructionEnumsX86.h>
10#include "Cxx_GrammarSerialization.h"
11
12#ifdef ROSE_ENABLE_BOOST_SERIALIZATION
13#include <boost/serialization/access.hpp>
14#include <boost/serialization/base_object.hpp>
15#include <boost/serialization/export.hpp>
16#include <boost/serialization/split_member.hpp>
17#endif
18
19namespace Rose {
20namespace BinaryAnalysis {
21namespace Disassembler {
22
25class X86: public Base {
26public:
28 using Ptr = X86Ptr;
29
30private:
31 /* Per-disassembler settings; see init() */
32 X86InstructionSize insnSize;
33 size_t wordSize;
35 /* Per-instruction settings; see startInstruction() */
36 struct State {
37 uint64_t ip;
38 SgUnsignedCharList insnbuf;
39 size_t insnbufat;
41 /* Temporary flags set by the instruction; initialized by startInstruction() */
42 X86SegmentRegister segOverride;
43 X86BranchPrediction branchPrediction; /*FIXME: this seems to set only to x86_branch_prediction_true [RPM 2009-06-16] */
44 bool branchPredictionEnabled;
45 bool rexPresent, rexW, rexR, rexX, rexB;
46 bool sizeMustBe64Bit;
47 bool operandSizeOverride;
48 bool addressSizeOverride;
49 bool lock;
50 X86RepeatPrefix repeatPrefix;
51 bool modregrmByteSet;
52 uint8_t modregrmByte;
53 uint8_t modeField;
54 uint8_t regField;
55 uint8_t rmField;
56 SgAsmExpression *modrm;
57 SgAsmExpression *reg;
58 bool isUnconditionalJump;
60 State()
61 : ip(0), insnbufat(0), segOverride(x86_segreg_none), branchPrediction(x86_branch_prediction_none),
62 branchPredictionEnabled(false), rexPresent(false), rexW(false), rexR(false), rexX(false), rexB(false),
63 sizeMustBe64Bit(false), operandSizeOverride(false), addressSizeOverride(false), lock(false),
64 repeatPrefix(x86_repeat_none), modregrmByteSet(false), modregrmByte(0), modeField(0), regField(0),
65 rmField(0), modrm(nullptr), reg(nullptr), isUnconditionalJump(false) {}
66 };
67
69 // Serialization
71#ifdef ROSE_ENABLE_BOOST_SERIALIZATION
72private:
73 friend class boost::serialization::access;
74
75 template<class S>
76 void serialize_common(S &s, const unsigned /*version*/) {
77 // Most of the data members don't need to be saved because we'll only save/restore disassemblers that are between
78 // instructions (we never save one while it's processing an instruction). Therefore, most of the data members can be
79 // constructed in their initial state by a combination of default constructor and init().
80 s & BOOST_SERIALIZATION_BASE_OBJECT_NVP(Base);
81 s & BOOST_SERIALIZATION_NVP(wordSize);
82 }
83
84 template<class S>
85 void save(S &s, const unsigned version) const {
86 serialize_common(s, version);
87 }
88
89 template<class S>
90 void load(S &s, const unsigned version) {
91 serialize_common(s, version);
92 init(wordSize);
93 }
94
95 BOOST_SERIALIZATION_SPLIT_MEMBER();
96#endif
97
99 // Constructors
101
102protected:
103 explicit X86(const Architecture::BaseConstPtr&);
104
105public:
108
109 virtual ~X86() {}
110
111 virtual Base::Ptr clone() const override;
112
114 // Public methods
116public:
117 virtual SgAsmInstruction *disassembleOne(const MemoryMap::Ptr &map, Address va, AddressSet *successors=nullptr) override;
118
120
121
122 /*========================================================================================================================
123 * Data types
124 *========================================================================================================================*/
125private:
126
130 class ExceptionX86: public Exception {
131 public:
132 ExceptionX86(const std::string &mesg, const State &state)
133 : Exception(mesg, state.ip) {
134 ASSERT_require(state.insnbufat <= state.insnbuf.size());
135 if (state.insnbufat > 0)
136 bytes = SgUnsignedCharList(&state.insnbuf[0], &state.insnbuf[0] + state.insnbufat);
137 bit = 8 * state.insnbufat;
138 }
139
140 ExceptionX86(const std::string &mesg, const State &state, size_t bit)
141 : Exception(mesg, state.ip) {
142 ASSERT_require(state.insnbufat <= state.insnbuf.size());
143 if (state.insnbufat > 0)
144 bytes = SgUnsignedCharList(&state.insnbuf[0], &state.insnbuf[0] + state.insnbufat);
145 this->bit = bit;
146 }
147 };
148
150 enum RegisterMode {
151 rmLegacyByte, rmRexByte, rmWord, rmDWord, rmQWord, rmSegment, rmST, rmMM, rmXMM, rmControl, rmDebug, rmReturnNull
152 };
153
154 /* MMX registers? See mmPrefix method */
155 enum MMPrefix {
156 mmNone, mmF3, mm66, mmF2
157 };
158
159
160 /*========================================================================================================================
161 * Methods for reading and writing bytes of the instruction. These keep track of how much has been read or written.
162 *========================================================================================================================*/
163private:
164
168 uint8_t getByte(State &state) const;
169
173 uint16_t getWord(State &state) const;
174
178 uint32_t getDWord(State &state) const;
179
183 uint64_t getQWord(State &state) const;
184
185 /*========================================================================================================================
186 * Miscellaneous helper methods
187 *========================================================================================================================*/
188private:
191 SgAsmExpression *currentDataSegment(State &state) const;
192
196 X86InstructionSize effectiveAddressSize(State &state) const;
197
199 RegisterMode effectiveOperandMode(State &state) const {
200 return sizeToMode(effectiveOperandSize(state));
201 }
202
206 X86InstructionSize effectiveOperandSize(State &state) const;
207
209 SgAsmType *effectiveOperandType(State &state) const {
210 return sizeToType(effectiveOperandSize(state));
211 }
212
214 bool longMode() const {
215 return insnSize == x86_insnsize_64;
216 }
217
218 /* FIXME: documentation? */
219 MMPrefix mmPrefix(State &state) const;
220
222 void not64(State &state) const {
223 if (longMode())
224 throw ExceptionX86("not valid for 64-bit code", state);
225 }
226
229 void setRex(State &state, uint8_t prefix) const;
230
232 static RegisterMode sizeToMode(X86InstructionSize);
233
236 static SgAsmType *sizeToType(X86InstructionSize s);
237
238
239
240 /*========================================================================================================================
241 * Methods that construct something. (Their names all start with "make".)
242 *========================================================================================================================*/
243private:
244
247 SgAsmExpression *makeAddrSizeValue(State &state, int64_t val, size_t bit_offset, size_t bit_size) const;
248
253 SgAsmX86Instruction *makeInstruction(State &state, X86InstructionKind kind,
254 SgAsmExpression *op1=nullptr, SgAsmExpression *op2=nullptr,
255 SgAsmExpression *op3=nullptr, SgAsmExpression *op4=nullptr) const;
256
258 SgAsmRegisterReferenceExpression *makeIP() const;
259
260 /* FIXME: documentation? */
261 SgAsmRegisterReferenceExpression *makeOperandRegisterByte(State &state, bool rexExtension, uint8_t registerNumber) const;
262
263 /* FIXME: documentation? */
264 SgAsmRegisterReferenceExpression *makeOperandRegisterFull(State &state, bool rexExtension, uint8_t registerNumber) const;
265
268 SgAsmRegisterReferenceExpression *makeRegister(State &state, uint8_t fullRegisterNumber, RegisterMode,
269 SgAsmType *registerType=nullptr) const;
270
271 /* FIXME: documentation? */
272 SgAsmRegisterReferenceExpression *makeRegisterEffective(State &state, uint8_t fullRegisterNumber) const {
273 return makeRegister(state, fullRegisterNumber, effectiveOperandMode(state));
274 }
275
276 /* FIXME: documentation? */
277 SgAsmRegisterReferenceExpression *makeRegisterEffective(State &state, bool rexExtension, uint8_t registerNumber) const {
278 return makeRegister(state, registerNumber + (rexExtension ? 8 : 0), effectiveOperandMode(state));
279 }
280
282 SgAsmExpression *makeSegmentRegister(State &state, X86SegmentRegister so, bool insn64) const;
283
284
285
286 /*========================================================================================================================
287 * Methods for operating on the ModR/M byte.
288 *========================================================================================================================*/
289private:
290
304 void getModRegRM(State &state, RegisterMode regMode, RegisterMode rmMode, SgAsmType *t, SgAsmType *tForReg = nullptr) const;
305
307 SgAsmMemoryReferenceExpression *decodeModrmMemory(State &state) const;
308
311 void fillInModRM(State &state, RegisterMode rmMode, SgAsmType *t) const;
312
314 SgAsmExpression *makeModrmNormal(State &state, RegisterMode, SgAsmType *mrType) const;
315
318 SgAsmRegisterReferenceExpression *makeModrmRegister(State &state, RegisterMode, SgAsmType* mrType=nullptr) const;
319
321 void requireMemory(State &state) const {
322 if (!state.modregrmByteSet)
323 throw ExceptionX86("requires Mod/RM byte", state);
324 if (state.modeField == 3)
325 throw ExceptionX86("requires memory", state);
326 }
327
328
329
330 /*========================================================================================================================
331 * Methods that construct an SgAsmExpression for an immediate operand.
332 *========================================================================================================================*/
333private:
334
335 SgAsmExpression *getImmByte(State &state) const;
336 SgAsmExpression *getImmWord(State &state) const;
337 SgAsmExpression* getImmDWord(State &state) const;
338 SgAsmExpression* getImmQWord(State &state) const;
339 SgAsmExpression *getImmForAddr(State &state) const;
340 SgAsmExpression *getImmIv(State &state) const;
341 SgAsmExpression *getImmJz(State &state) const;
342 SgAsmExpression *getImmByteAsIv(State &state) const;
343 SgAsmExpression *getImmIzAsIv(State &state) const;
344 SgAsmExpression *getImmJb(State &state) const;
345
346
347
348
349 /*========================================================================================================================
350 * Main disassembly functions, each generally containing a huge "switch" statement based on one of the opcode bytes.
351 *========================================================================================================================*/
352private:
353
356 SgAsmX86Instruction *disassemble(State &state) const;
357
359 SgAsmX86Instruction *decodeOpcode0F(State &state) const;
360
362 SgAsmX86Instruction *decodeOpcode0F38(State &state) const;
363
365 SgAsmX86Instruction *decodeX87InstructionD8(State &state) const;
366
368 SgAsmX86Instruction *decodeX87InstructionD9(State &state) const;
369
371 SgAsmX86Instruction *decodeX87InstructionDA(State &state) const;
372
374 SgAsmX86Instruction *decodeX87InstructionDB(State &state) const;
375
377 SgAsmX86Instruction *decodeX87InstructionDC(State &state) const;
378
380 SgAsmX86Instruction *decodeX87InstructionDD(State &state) const;
381
383 SgAsmX86Instruction *decodeX87InstructionDE(State &state) const;
384
386 SgAsmX86Instruction *decodeX87InstructionDF(State &state) const;
387
389 SgAsmX86Instruction *decodeGroup1(State &state, SgAsmExpression *imm) const;
390
392 SgAsmX86Instruction *decodeGroup1a(State &state) const;
393
395 SgAsmX86Instruction *decodeGroup2(State &state, SgAsmExpression *count) const;
396
398 SgAsmX86Instruction *decodeGroup3(State &state, SgAsmExpression *immMaybe) const;
399
401 SgAsmX86Instruction *decodeGroup4(State &state) const;
402
404 SgAsmX86Instruction *decodeGroup5(State &state) const;
405
407 SgAsmX86Instruction *decodeGroup6(State &state) const;
408
411 SgAsmX86Instruction *decodeGroup7(State &state) const;
412
414 SgAsmX86Instruction *decodeGroup8(State &state, SgAsmExpression *imm) const;
415
417 SgAsmX86Instruction *decodeGroup11(State &state, SgAsmExpression *imm) const;
418
420 SgAsmX86Instruction *decodeGroup15(State &state) const;
421
423 SgAsmX86Instruction *decodeGroup16(State &state) const;
424
426 SgAsmX86Instruction *decodeGroupP(State &state) const;
427
428
429
430 /*========================================================================================================================
431 * Supporting functions
432 *========================================================================================================================*/
433private:
434 // Initialize instances of this class. Called by constructor.
435 void init(size_t wordsize);
436
437#if 0 // is this ever used?
439 void startInstruction(State &state, SgAsmX86Instruction *insn) const {
440 startInstruction(insn->get_address(), nullptr, 0);
441 insnSize = insn->get_baseSize();
442 state.lock = insn->get_lockPrefix();
443 state.branchPrediction = insn->get_branchPrediction();
444 state.branchPredictionEnabled = state.branchPrediction != x86_branch_prediction_none;
445 state.segOverride = insn->get_segmentOverride();
446 }
447#endif
448
449 // Resets disassembler state to beginning of an instruction for disassembly.
450 void startInstruction(State &state, Address start_va, const uint8_t *buf, size_t bufsz) const {
451 state.ip = start_va;
452 state.insnbuf = SgUnsignedCharList(buf, buf+bufsz);
453 state.insnbufat = 0;
454
455 // Prefix flags
456 state.segOverride = x86_segreg_none;
457 state.branchPrediction = x86_branch_prediction_none;
458 state.branchPredictionEnabled = false;
459 state.rexPresent = state.rexW = state.rexR = state.rexX = state.rexB = false;
460 state.sizeMustBe64Bit = false;
461 state.operandSizeOverride = false;
462 state.addressSizeOverride = false;
463 state.lock = false;
464 state.repeatPrefix = x86_repeat_none;
465 state.modregrmByteSet = false;
466 state.modregrmByte = state.modeField = state.regField = state.rmField = 0; /*arbitrary since modregrmByteSet is false*/
467 state.modrm = state.reg = nullptr;
468 state.isUnconditionalJump = false;
469 }
470
471 // Add comments to any IP relative addition expressions. We're not constant folding these because it's sometimes useful to
472 // know that the address is relative to the instruction address, but the comment is useful for understanding the disassembly.
473 void commentIpRelative(SgAsmInstruction*);
474};
475
476} // namespace
477} // namespace
478} // namespace
479
480#ifdef ROSE_ENABLE_BOOST_SERIALIZATION
481BOOST_CLASS_EXPORT_KEY(Rose::BinaryAnalysis::Disassembler::X86);
482#endif
483
484#endif
485#endif
Virtual base class for instruction disassemblers.
size_t bit
Bit offset in instruction byte sequence where disassembly failed (bit/8 is the index into the "bytes"...
SgUnsignedCharList bytes
Bytes (partial) of failed disassembly, including byte at failure.
Address ip
Virtual address where failure occurred; zero if no associated instruction.
Disassembler for the x86 architecture.
static Ptr instance(const Architecture::BaseConstPtr &)
Allocating constructor.
virtual SgAsmInstruction * makeUnknownInstruction(const Exception &) override
Makes an unknown instruction from an exception.
virtual SgAsmInstruction * disassembleOne(const MemoryMap::Ptr &map, Address va, AddressSet *successors=nullptr) override
This is the lowest level disassembly function and is implemented in the architecture-specific subclas...
virtual Base::Ptr clone() const override
Creates a new copy of a disassembler.
Reference-counting intrusive smart pointer.
Base class for expressions.
Base class for machine instructions.
Reference to memory locations.
Base class for references to a machine register.
Base class for binary types.
Represents one Intel x86 machine instruction.
std::shared_ptr< const Base > BaseConstPtr
Reference counted pointer for Architecture::Base.
X86InstructionKind
List of all x86 instructions known to the ROSE disassembler/assembler.
std::uint64_t Address
Address.
Definition Address.h:11
The ROSE library.