ROSE 0.11.145.147
Disassembler/X86.h
1/* Disassembly specific to the x86 architecture. */
2#ifndef ROSE_BinaryAnalysis_Disassembler_X86_H
3#define ROSE_BinaryAnalysis_Disassembler_X86_H
4#include <featureTests.h>
5#ifdef ROSE_ENABLE_BINARY_ANALYSIS
6#include <Rose/BinaryAnalysis/Disassembler/Base.h>
7
8#include <Rose/BinaryAnalysis/Architecture/BasicTypes.h>
9#include <Rose/BinaryAnalysis/InstructionEnumsX86.h>
10#include "Cxx_GrammarSerialization.h"
11
12#include <boost/serialization/access.hpp>
13#include <boost/serialization/base_object.hpp>
14#include <boost/serialization/export.hpp>
15#include <boost/serialization/split_member.hpp>
16
17namespace Rose {
18namespace BinaryAnalysis {
19namespace Disassembler {
20
23class X86: public Base {
24public:
26 using Ptr = X86Ptr;
27
28private:
29 /* Per-disassembler settings; see init() */
30 X86InstructionSize insnSize;
31 size_t wordSize;
33 /* Per-instruction settings; see startInstruction() */
34 struct State {
35 uint64_t ip;
36 SgUnsignedCharList insnbuf;
37 size_t insnbufat;
39 /* Temporary flags set by the instruction; initialized by startInstruction() */
40 X86SegmentRegister segOverride;
41 X86BranchPrediction branchPrediction; /*FIXME: this seems to set only to x86_branch_prediction_true [RPM 2009-06-16] */
42 bool branchPredictionEnabled;
43 bool rexPresent, rexW, rexR, rexX, rexB;
44 bool sizeMustBe64Bit;
45 bool operandSizeOverride;
46 bool addressSizeOverride;
47 bool lock;
48 X86RepeatPrefix repeatPrefix;
49 bool modregrmByteSet;
50 uint8_t modregrmByte;
51 uint8_t modeField;
52 uint8_t regField;
53 uint8_t rmField;
54 SgAsmExpression *modrm;
55 SgAsmExpression *reg;
56 bool isUnconditionalJump;
58 State()
59 : ip(0), insnbufat(0), segOverride(x86_segreg_none), branchPrediction(x86_branch_prediction_none),
60 branchPredictionEnabled(false), rexPresent(false), rexW(false), rexR(false), rexX(false), rexB(false),
61 sizeMustBe64Bit(false), operandSizeOverride(false), addressSizeOverride(false), lock(false),
62 repeatPrefix(x86_repeat_none), modregrmByteSet(false), modregrmByte(0), modeField(0), regField(0),
63 rmField(0), modrm(nullptr), reg(nullptr), isUnconditionalJump(false) {}
64 };
65
67 // Serialization
69#ifdef ROSE_HAVE_BOOST_SERIALIZATION_LIB
70private:
71 friend class boost::serialization::access;
72
73 template<class S>
74 void serialize_common(S &s, const unsigned /*version*/) {
75 // Most of the data members don't need to be saved because we'll only save/restore disassemblers that are between
76 // instructions (we never save one while it's processing an instruction). Therefore, most of the data members can be
77 // constructed in their initial state by a combination of default constructor and init().
78 s & BOOST_SERIALIZATION_BASE_OBJECT_NVP(Base);
79 s & BOOST_SERIALIZATION_NVP(wordSize);
80 }
81
82 template<class S>
83 void save(S &s, const unsigned version) const {
84 serialize_common(s, version);
85 }
86
87 template<class S>
88 void load(S &s, const unsigned version) {
89 serialize_common(s, version);
90 init(wordSize);
91 }
92
93 BOOST_SERIALIZATION_SPLIT_MEMBER();
94#endif
95
97 // Constructors
99
100protected:
101 explicit X86(const Architecture::BaseConstPtr&);
102
103public:
106
107 virtual ~X86() {}
108
109 virtual Base::Ptr clone() const override;
110
112 // Public methods
114public:
115 virtual SgAsmInstruction *disassembleOne(const MemoryMap::Ptr &map, rose_addr_t va,
116 AddressSet *successors=nullptr) override;
117
119
120
121 /*========================================================================================================================
122 * Data types
123 *========================================================================================================================*/
124private:
125
129 class ExceptionX86: public Exception {
130 public:
131 ExceptionX86(const std::string &mesg, const State &state)
132 : Exception(mesg, state.ip) {
133 ASSERT_require(state.insnbufat <= state.insnbuf.size());
134 if (state.insnbufat > 0)
135 bytes = SgUnsignedCharList(&state.insnbuf[0], &state.insnbuf[0] + state.insnbufat);
136 bit = 8 * state.insnbufat;
137 }
138
139 ExceptionX86(const std::string &mesg, const State &state, size_t bit)
140 : Exception(mesg, state.ip) {
141 ASSERT_require(state.insnbufat <= state.insnbuf.size());
142 if (state.insnbufat > 0)
143 bytes = SgUnsignedCharList(&state.insnbuf[0], &state.insnbuf[0] + state.insnbufat);
144 this->bit = bit;
145 }
146 };
147
149 enum RegisterMode {
150 rmLegacyByte, rmRexByte, rmWord, rmDWord, rmQWord, rmSegment, rmST, rmMM, rmXMM, rmControl, rmDebug, rmReturnNull
151 };
152
153 /* MMX registers? See mmPrefix method */
154 enum MMPrefix {
155 mmNone, mmF3, mm66, mmF2
156 };
157
158
159 /*========================================================================================================================
160 * Methods for reading and writing bytes of the instruction. These keep track of how much has been read or written.
161 *========================================================================================================================*/
162private:
163
167 uint8_t getByte(State &state) const;
168
172 uint16_t getWord(State &state) const;
173
177 uint32_t getDWord(State &state) const;
178
182 uint64_t getQWord(State &state) const;
183
184 /*========================================================================================================================
185 * Miscellaneous helper methods
186 *========================================================================================================================*/
187private:
190 SgAsmExpression *currentDataSegment(State &state) const;
191
195 X86InstructionSize effectiveAddressSize(State &state) const;
196
198 RegisterMode effectiveOperandMode(State &state) const {
199 return sizeToMode(effectiveOperandSize(state));
200 }
201
205 X86InstructionSize effectiveOperandSize(State &state) const;
206
208 SgAsmType *effectiveOperandType(State &state) const {
209 return sizeToType(effectiveOperandSize(state));
210 }
211
213 bool longMode() const {
214 return insnSize == x86_insnsize_64;
215 }
216
217 /* FIXME: documentation? */
218 MMPrefix mmPrefix(State &state) const;
219
221 void not64(State &state) const {
222 if (longMode())
223 throw ExceptionX86("not valid for 64-bit code", state);
224 }
225
228 void setRex(State &state, uint8_t prefix) const;
229
231 static RegisterMode sizeToMode(X86InstructionSize);
232
235 static SgAsmType *sizeToType(X86InstructionSize s);
236
237
238
239 /*========================================================================================================================
240 * Methods that construct something. (Their names all start with "make".)
241 *========================================================================================================================*/
242private:
243
246 SgAsmExpression *makeAddrSizeValue(State &state, int64_t val, size_t bit_offset, size_t bit_size) const;
247
252 SgAsmX86Instruction *makeInstruction(State &state, X86InstructionKind kind,
253 SgAsmExpression *op1=nullptr, SgAsmExpression *op2=nullptr,
254 SgAsmExpression *op3=nullptr, SgAsmExpression *op4=nullptr) const;
255
257 SgAsmRegisterReferenceExpression *makeIP() const;
258
259 /* FIXME: documentation? */
260 SgAsmRegisterReferenceExpression *makeOperandRegisterByte(State &state, bool rexExtension, uint8_t registerNumber) const;
261
262 /* FIXME: documentation? */
263 SgAsmRegisterReferenceExpression *makeOperandRegisterFull(State &state, bool rexExtension, uint8_t registerNumber) const;
264
267 SgAsmRegisterReferenceExpression *makeRegister(State &state, uint8_t fullRegisterNumber, RegisterMode,
268 SgAsmType *registerType=nullptr) const;
269
270 /* FIXME: documentation? */
271 SgAsmRegisterReferenceExpression *makeRegisterEffective(State &state, uint8_t fullRegisterNumber) const {
272 return makeRegister(state, fullRegisterNumber, effectiveOperandMode(state));
273 }
274
275 /* FIXME: documentation? */
276 SgAsmRegisterReferenceExpression *makeRegisterEffective(State &state, bool rexExtension, uint8_t registerNumber) const {
277 return makeRegister(state, registerNumber + (rexExtension ? 8 : 0), effectiveOperandMode(state));
278 }
279
281 SgAsmExpression *makeSegmentRegister(State &state, X86SegmentRegister so, bool insn64) const;
282
283
284
285 /*========================================================================================================================
286 * Methods for operating on the ModR/M byte.
287 *========================================================================================================================*/
288private:
289
303 void getModRegRM(State &state, RegisterMode regMode, RegisterMode rmMode, SgAsmType *t, SgAsmType *tForReg = nullptr) const;
304
306 SgAsmMemoryReferenceExpression *decodeModrmMemory(State &state) const;
307
310 void fillInModRM(State &state, RegisterMode rmMode, SgAsmType *t) const;
311
313 SgAsmExpression *makeModrmNormal(State &state, RegisterMode, SgAsmType *mrType) const;
314
317 SgAsmRegisterReferenceExpression *makeModrmRegister(State &state, RegisterMode, SgAsmType* mrType=nullptr) const;
318
320 void requireMemory(State &state) const {
321 if (!state.modregrmByteSet)
322 throw ExceptionX86("requires Mod/RM byte", state);
323 if (state.modeField == 3)
324 throw ExceptionX86("requires memory", state);
325 }
326
327
328
329 /*========================================================================================================================
330 * Methods that construct an SgAsmExpression for an immediate operand.
331 *========================================================================================================================*/
332private:
333
334 SgAsmExpression *getImmByte(State &state) const;
335 SgAsmExpression *getImmWord(State &state) const;
336 SgAsmExpression* getImmDWord(State &state) const;
337 SgAsmExpression* getImmQWord(State &state) const;
338 SgAsmExpression *getImmForAddr(State &state) const;
339 SgAsmExpression *getImmIv(State &state) const;
340 SgAsmExpression *getImmJz(State &state) const;
341 SgAsmExpression *getImmByteAsIv(State &state) const;
342 SgAsmExpression *getImmIzAsIv(State &state) const;
343 SgAsmExpression *getImmJb(State &state) const;
344
345
346
347
348 /*========================================================================================================================
349 * Main disassembly functions, each generally containing a huge "switch" statement based on one of the opcode bytes.
350 *========================================================================================================================*/
351private:
352
355 SgAsmX86Instruction *disassemble(State &state) const;
356
358 SgAsmX86Instruction *decodeOpcode0F(State &state) const;
359
361 SgAsmX86Instruction *decodeOpcode0F38(State &state) const;
362
364 SgAsmX86Instruction *decodeX87InstructionD8(State &state) const;
365
367 SgAsmX86Instruction *decodeX87InstructionD9(State &state) const;
368
370 SgAsmX86Instruction *decodeX87InstructionDA(State &state) const;
371
373 SgAsmX86Instruction *decodeX87InstructionDB(State &state) const;
374
376 SgAsmX86Instruction *decodeX87InstructionDC(State &state) const;
377
379 SgAsmX86Instruction *decodeX87InstructionDD(State &state) const;
380
382 SgAsmX86Instruction *decodeX87InstructionDE(State &state) const;
383
385 SgAsmX86Instruction *decodeX87InstructionDF(State &state) const;
386
388 SgAsmX86Instruction *decodeGroup1(State &state, SgAsmExpression *imm) const;
389
391 SgAsmX86Instruction *decodeGroup1a(State &state) const;
392
394 SgAsmX86Instruction *decodeGroup2(State &state, SgAsmExpression *count) const;
395
397 SgAsmX86Instruction *decodeGroup3(State &state, SgAsmExpression *immMaybe) const;
398
400 SgAsmX86Instruction *decodeGroup4(State &state) const;
401
403 SgAsmX86Instruction *decodeGroup5(State &state) const;
404
406 SgAsmX86Instruction *decodeGroup6(State &state) const;
407
410 SgAsmX86Instruction *decodeGroup7(State &state) const;
411
413 SgAsmX86Instruction *decodeGroup8(State &state, SgAsmExpression *imm) const;
414
416 SgAsmX86Instruction *decodeGroup11(State &state, SgAsmExpression *imm) const;
417
419 SgAsmX86Instruction *decodeGroup15(State &state) const;
420
422 SgAsmX86Instruction *decodeGroup16(State &state) const;
423
425 SgAsmX86Instruction *decodeGroupP(State &state) const;
426
427
428
429 /*========================================================================================================================
430 * Supporting functions
431 *========================================================================================================================*/
432private:
433 // Initialize instances of this class. Called by constructor.
434 void init(size_t wordsize);
435
436#if 0 // is this ever used?
438 void startInstruction(State &state, SgAsmX86Instruction *insn) const {
439 startInstruction(insn->get_address(), nullptr, 0);
440 insnSize = insn->get_baseSize();
441 state.lock = insn->get_lockPrefix();
442 state.branchPrediction = insn->get_branchPrediction();
443 state.branchPredictionEnabled = state.branchPrediction != x86_branch_prediction_none;
444 state.segOverride = insn->get_segmentOverride();
445 }
446#endif
447
448 // Resets disassembler state to beginning of an instruction for disassembly.
449 void startInstruction(State &state, rose_addr_t start_va, const uint8_t *buf, size_t bufsz) const {
450 state.ip = start_va;
451 state.insnbuf = SgUnsignedCharList(buf, buf+bufsz);
452 state.insnbufat = 0;
453
454 // Prefix flags
455 state.segOverride = x86_segreg_none;
456 state.branchPrediction = x86_branch_prediction_none;
457 state.branchPredictionEnabled = false;
458 state.rexPresent = state.rexW = state.rexR = state.rexX = state.rexB = false;
459 state.sizeMustBe64Bit = false;
460 state.operandSizeOverride = false;
461 state.addressSizeOverride = false;
462 state.lock = false;
463 state.repeatPrefix = x86_repeat_none;
464 state.modregrmByteSet = false;
465 state.modregrmByte = state.modeField = state.regField = state.rmField = 0; /*arbitrary since modregrmByteSet is false*/
466 state.modrm = state.reg = nullptr;
467 state.isUnconditionalJump = false;
468 }
469
470 // Add comments to any IP relative addition expressions. We're not constant folding these because it's sometimes useful to
471 // know that the address is relative to the instruction address, but the comment is useful for understanding the disassembly.
472 void commentIpRelative(SgAsmInstruction*);
473};
474
475} // namespace
476} // namespace
477} // namespace
478
479#ifdef ROSE_HAVE_BOOST_SERIALIZATION_LIB
480BOOST_CLASS_EXPORT_KEY(Rose::BinaryAnalysis::Disassembler::X86);
481#endif
482
483#endif
484#endif
Virtual base class for instruction disassemblers.
size_t bit
Bit offset in instruction byte sequence where disassembly failed (bit/8 is the index into the "bytes"...
SgUnsignedCharList bytes
Bytes (partial) of failed disassembly, including byte at failure.
rose_addr_t ip
Virtual address where failure occurred; zero if no associated instruction.
Disassembler for the x86 architecture.
static Ptr instance(const Architecture::BaseConstPtr &)
Allocating constructor.
virtual SgAsmInstruction * makeUnknownInstruction(const Exception &) override
Makes an unknown instruction from an exception.
virtual SgAsmInstruction * disassembleOne(const MemoryMap::Ptr &map, rose_addr_t va, AddressSet *successors=nullptr) override
This is the lowest level disassembly function and is implemented in the architecture-specific subclas...
virtual Base::Ptr clone() const override
Creates a new copy of a disassembler.
Reference-counting intrusive smart pointer.
Base class for expressions.
Base class for machine instructions.
Reference to memory locations.
Base class for references to a machine register.
Base class for binary types.
Represents one Intel x86 machine instruction.
std::shared_ptr< const Base > BaseConstPtr
Reference counted pointer for Architecture::Base.
X86InstructionKind
List of all x86 instructions known to the ROSE disassembler/assembler.
The ROSE library.