ROSE 0.11.145.147
AssemblerX86.h
1/* Assembly specific to the x86 architecture. */
2
3#ifndef ROSE_BinaryAnalysis_AssemblerX86_H
4#define ROSE_BinaryAnalysis_AssemblerX86_H
5
6#include <featureTests.h>
7#ifdef ROSE_ENABLE_BINARY_ANALYSIS
8#include "Assembler.h"
9
10#include <Rose/BinaryAnalysis/InstructionEnumsX86.h>
11
12namespace Rose {
13namespace BinaryAnalysis {
14
31class AssemblerX86: public Assembler {
32public:
34 : honor_operand_types(false) {
35 if (defns.size()==0)
36 initAssemblyRules();
37 }
38
39 virtual ~AssemblerX86() {}
40
42 virtual SgUnsignedCharList assembleOne(SgAsmInstruction*);
43
50 honor_operand_types = b;
51 }
52
56 return honor_operand_types;
57 }
58
60 virtual SgUnsignedCharList assembleProgram(const std::string &source);
61
62 /*========================================================================================================================
63 * Members for defining instructions.
64 *========================================================================================================================*/
65private:
66 /* These bit masks specify how the opcode part of the encoding is generated. The base opcode bytes are specified with
67 * a 64-bit value so that up to eight bytes of opcode can be specified. The bytes generated come from the 64-bit opcode
68 * value in big-endian order but without leading zero bytes. If the 64-bit opcode is zero then a single zero byte is
69 * generated. For instance, the MONITOR instruction has an opcode of 0x0f01c8, generating the encoding 0x0f, 0x01, 0xc8. */
70
74 static const unsigned od_e_mask = 0x00000070; /* mask for numeric value (n) part of En field. */
75 static const unsigned od_e_pres = 0x00000080; /* bit set if En modification was specified. */
76 static const unsigned od_e0 = 0x00000000 | od_e_pres;
77 static const unsigned od_e1 = 0x00000010 | od_e_pres;
78 static const unsigned od_e2 = 0x00000020 | od_e_pres;
79 static const unsigned od_e3 = 0x00000030 | od_e_pres;
80 static const unsigned od_e4 = 0x00000040 | od_e_pres;
81 static const unsigned od_e5 = 0x00000050 | od_e_pres;
82 static const unsigned od_e6 = 0x00000060 | od_e_pres;
83 static const unsigned od_e7 = 0x00000070 | od_e_pres;
84 static size_t od_e_val(unsigned opcode_mods) { return (opcode_mods & od_e_mask)>>4; }
89 static const unsigned od_rex_pres = 0x00000001; /* bit set if REX prefix is present. */
90 static const unsigned od_rex_mask = 0x00000f00; /* mask for low nyble of REX byte. */
91 static const unsigned od_rex = 0x00000000 | od_rex_pres;
92 static const unsigned od_rexb = 0x00000100 | od_rex_pres;
93 static const unsigned od_rexx = 0x00000200 | od_rex_pres;
94 static const unsigned od_rexxb = 0x00000300 | od_rex_pres;
95 static const unsigned od_rexr = 0x00000400 | od_rex_pres;
96 static const unsigned od_rexrb = 0x00000500 | od_rex_pres;
97 static const unsigned od_rexrx = 0x00000600 | od_rex_pres;
98 static const unsigned od_rexrxb = 0x00000700 | od_rex_pres;
99 static const unsigned od_rexw = 0x00000800 | od_rex_pres;
100 static const unsigned od_rexwb = 0x00000900 | od_rex_pres;
101 static const unsigned od_rexwx = 0x00000a00 | od_rex_pres;
102 static const unsigned od_rexwxb = 0x00000b00 | od_rex_pres;
103 static const unsigned od_rexwr = 0x00000c00 | od_rex_pres;
104 static const unsigned od_rexwrb = 0x00000d00 | od_rex_pres;
105 static const unsigned od_rexwrx = 0x00000e00 | od_rex_pres;
106 static const unsigned od_rexwrxb = 0x00000f00 | od_rex_pres;
107 static uint8_t od_rex_byte(unsigned opcode_mods) { return 0x40 | ((opcode_mods & od_rex_mask) >> 8); }
108
111 static const unsigned od_modrm = 0x00000002;
112
115 static const unsigned od_c_mask = 0x00007000;
116 static const unsigned od_cb = 0x00001000;
117 static const unsigned od_cw = 0x00002000;
118 static const unsigned od_cd = 0x00003000;
119 static const unsigned od_cp = 0x00004000;
120 static const unsigned od_co = 0x00005000;
121 static const unsigned od_ct = 0x00006000;
122
125 static const unsigned od_i_mask = 0x00070000;
126 static const unsigned od_ib = 0x00010000;
127 static const unsigned od_iw = 0x00020000;
128 static const unsigned od_id = 0x00030000;
129 static const unsigned od_io = 0x00040000;
130
133 static const unsigned od_r_mask = 0x00700000;
134 static const unsigned od_rb = 0x00100000;
135 static const unsigned od_rw = 0x00200000;
136 static const unsigned od_rd = 0x00300000;
137 static const unsigned od_ro = 0x00400000;
138
142 static const unsigned od_i = 0x00000004;
143
144
146 enum OperandDefn
147 {
148 od_none,
149 od_AL,
150 od_AX,
151 od_EAX,
152 od_RAX,
153 od_DX,
154 od_CS,
155 od_DS,
156 od_ES,
157 od_FS,
158 od_GS,
159 od_SS,
160 od_rel8,
162 od_rel16,
164 od_rel32,
166 od_rel64,
168 od_ptr16_16,
173 od_ptr16_32,
178 od_ptr16_64,
183 od_r8,
185 od_r16,
187 od_r32,
189 od_r64,
191 od_imm8,
195 od_imm16,
197 od_imm32,
199 od_imm64,
202 od_r_m8,
205 od_r_m16,
210 od_r_m32,
215 od_r_m64,
220 od_m,
221 od_m8,
223 od_m16,
225 od_m32,
227 od_m64,
228 od_m128,
229 od_m16_16,
232 od_m16_32,
235 od_m16_64,
238 od_m16a16,
242 od_m16a32,
247 od_m32a32,
251 od_m16a64,
256 od_moffs8,
259 od_moffs16,
262 od_moffs32,
265 od_moffs64,
268 od_sreg,
270 od_m32fp,
272 od_m64fp,
274 od_m80fp,
276 od_st0,
277 od_st1,
278 od_st2,
279 od_st3,
280 od_st4,
281 od_st5,
282 od_st6,
283 od_st7,
284 od_sti,
285 od_mm,
286 od_mm_m32,
289 od_mm_m64,
292 od_xmm,
294 od_xmm_m16,
295 od_xmm_m32,
298 od_xmm_m64,
301 od_xmm_m128,
304 /* The following are not documented in section 3.1.1.2 but are used elsewhere in the manual */
305 od_XMM0,
306 od_0,
307 od_1,
308 od_m80,
309 od_dec,
310 od_m80bcd,
311 od_m2byte,
312 od_m14_28byte,
313 od_m94_108byte,
314 od_m512byte,
315 od_r16_m16,
316 od_r32_m8,
317 od_r32_m16,
318 od_r64_m16,
319 od_CR0,
320 od_CR7,
321 od_CR8,
322 od_CR0CR7,
323 od_DR0DR7,
324 od_reg,
325 od_CL,
326 };
327
329 static const unsigned COMPAT_LEGACY = 0x01;
330 static const unsigned COMPAT_64 = 0x02;
333 static uint8_t build_modrm(unsigned mod, unsigned reg, unsigned rm) {
334 return ((mod&0x3)<<6) | ((reg&0x7)<<3) | (rm&0x7);
335 }
336
338 static unsigned modrm_mod(uint8_t modrm) { return modrm>>6; }
339
341 static unsigned modrm_reg(uint8_t modrm) { return (modrm>>3) & 0x7; }
342
344 static unsigned modrm_rm(uint8_t modrm) { return modrm & 0x7; }
345
347 static uint8_t build_sib(unsigned ss, unsigned index, unsigned base) {
348 return ((ss&0x3)<<6) | ((index&0x7)<<3) | (base&0x7);
349 }
350
352 static unsigned sib_ss(uint8_t sib) {return sib>>6; }
353
355 static unsigned sib_index(uint8_t sib) { return (sib>>3) & 0x7; }
356
358 static unsigned sib_base(uint8_t sib) { return sib & 0x7; }
359
363 class InsnDefn {
364 public:
365 InsnDefn(const std::string &mnemonic, X86InstructionKind kind, unsigned compatibility, uint64_t opcode,
366 unsigned opcode_modifiers, OperandDefn op1=od_none, OperandDefn op2=od_none, OperandDefn op3=od_none,
367 OperandDefn op4=od_none)
368 : mnemonic(mnemonic), kind(kind), compatibility(compatibility), opcode(opcode), opcode_modifiers(opcode_modifiers) {
369 if (op1) operands.push_back(op1);
370 if (op2) operands.push_back(op2);
371 if (op3) operands.push_back(op3);
372 if (op4) operands.push_back(op4);
373 }
374 std::string to_str() const;
375 void set_location(const std::string &s) {
376 location = s;
377 }
378 std::string mnemonic;
380 unsigned compatibility;
381 uint64_t opcode;
382 unsigned opcode_modifiers;
383 std::vector<OperandDefn> operands;
384 std::string location; /* location of instruction documentation */
385 };
386
387 enum MemoryReferencePattern
388 {
389 mrp_unknown,
390 mrp_disp, /* displacement */
391 mrp_index, /* register*scale */
392 mrp_index_disp, /* register*scale + displacement */
393 mrp_base, /* register */
394 mrp_base_disp, /* register + displacement */
395 mrp_base_index, /* register + register*scale */
396 mrp_base_index_disp /* register + register*scale + displacement */
397 };
398
400 typedef std::vector<const InsnDefn*> DictionaryPage;
401
403 typedef std::map<X86InstructionKind, DictionaryPage> InsnDictionary;
404
406 static void initAssemblyRules();
407 static void initAssemblyRules_part1();
408 static void initAssemblyRules_part2();
409 static void initAssemblyRules_part3();
410 static void initAssemblyRules_part4();
411 static void initAssemblyRules_part5();
412 static void initAssemblyRules_part6();
413 static void initAssemblyRules_part7();
414 static void initAssemblyRules_part8();
415 static void initAssemblyRules_part9();
416
418 static void define(const InsnDefn *d) {
419 defns[d->kind].push_back(d);
420 }
421
425 static std::string to_str(X86InstructionKind);
426
432 SgUnsignedCharList fixup_prefix_bytes(SgAsmX86Instruction *insn, SgUnsignedCharList source);
433
436 SgUnsignedCharList assemble(SgAsmX86Instruction *insn, const InsnDefn *defn);
437
441 void matches(const InsnDefn *defn, SgAsmX86Instruction *insn, int64_t *disp, int64_t *imm) const;
442
445 bool matches(OperandDefn, SgAsmExpression*, SgAsmInstruction*, int64_t *disp, int64_t *imm) const;
446
448 static bool matches_rel(SgAsmInstruction*, int64_t val, size_t nbytes);
449
453 static MemoryReferencePattern parse_memref(SgAsmInstruction *insn, SgAsmMemoryReferenceExpression *expr,
454 SgAsmRegisterReferenceExpression **base_reg/*out*/,
455 SgAsmRegisterReferenceExpression **index_reg/*out*/,
456 SgAsmValueExpression **scale/*out*/, SgAsmValueExpression **displacement/*out*/);
457
459 uint8_t build_modrm(const InsnDefn*, SgAsmX86Instruction*, size_t argno,
460 uint8_t *sib, int64_t *displacement, uint8_t *rex) const;
461
463 void build_modreg(const InsnDefn*, SgAsmX86Instruction*, size_t argno, uint8_t *modrm, uint8_t *rex) const;
464
467 uint8_t segment_override(SgAsmX86Instruction*);
468
469 static InsnDictionary defns;
470 bool honor_operand_types;
471};
472
473} // namespace
474} // namespace
475
476#endif
477#endif
This class contains methods for assembling x86 instructions (SgAsmX86Instruction).
virtual SgUnsignedCharList assembleOne(SgAsmInstruction *)
Assemble an instruction (SgAsmInstruction) into byte code.
bool get_honor_operand_types() const
Returns true if the assembler is honoring operand data types, or false if the assembler is using the ...
void set_honor_operand_types(bool b)
Causes the assembler to honor (if true) or disregard (if false) the data types of operands when assem...
virtual SgUnsignedCharList assembleProgram(const std::string &source)
Assemble an x86 program from assembly source code using the nasm assembler.
Virtual base class for instruction assemblers.
Definition Assembler.h:58
Base class for expressions.
Base class for machine instructions.
Reference to memory locations.
Base class for references to a machine register.
Base class for values.
Represents one Intel x86 machine instruction.
X86InstructionKind
List of all x86 instructions known to the ROSE disassembler/assembler.
The ROSE library.