ROSE  0.11.145.0
AssemblerX86.h
1 /* Assembly specific to the x86 architecture. */
2 
3 #ifndef ROSE_BinaryAnalysis_AssemblerX86_H
4 #define ROSE_BinaryAnalysis_AssemblerX86_H
5 
6 #include <featureTests.h>
7 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
8 #include "Assembler.h"
9 
10 namespace Rose {
11 namespace BinaryAnalysis {
12 
29 class AssemblerX86: public Assembler {
30 public:
31  AssemblerX86()
32  : honor_operand_types(false) {
33  if (defns.size()==0)
34  initAssemblyRules();
35  }
36 
37  virtual ~AssemblerX86() {}
38 
40  virtual SgUnsignedCharList assembleOne(SgAsmInstruction*);
41 
47  void set_honor_operand_types(bool b) {
48  honor_operand_types = b;
49  }
50 
53  bool get_honor_operand_types() const {
54  return honor_operand_types;
55  }
56 
58  virtual SgUnsignedCharList assembleProgram(const std::string &source);
59 
60  /*========================================================================================================================
61  * Members for defining instructions.
62  *========================================================================================================================*/
63 private:
64  /* These bit masks specify how the opcode part of the encoding is generated. The base opcode bytes are specified with
65  * a 64-bit value so that up to eight bytes of opcode can be specified. The bytes generated come from the 64-bit opcode
66  * value in big-endian order but without leading zero bytes. If the 64-bit opcode is zero then a single zero byte is
67  * generated. For instance, the MONITOR instruction has an opcode of 0x0f01c8, generating the encoding 0x0f, 0x01, 0xc8. */
68 
72  static const unsigned od_e_mask = 0x00000070; /* mask for numeric value (n) part of En field. */
73  static const unsigned od_e_pres = 0x00000080; /* bit set if En modification was specified. */
74  static const unsigned od_e0 = 0x00000000 | od_e_pres;
75  static const unsigned od_e1 = 0x00000010 | od_e_pres;
76  static const unsigned od_e2 = 0x00000020 | od_e_pres;
77  static const unsigned od_e3 = 0x00000030 | od_e_pres;
78  static const unsigned od_e4 = 0x00000040 | od_e_pres;
79  static const unsigned od_e5 = 0x00000050 | od_e_pres;
80  static const unsigned od_e6 = 0x00000060 | od_e_pres;
81  static const unsigned od_e7 = 0x00000070 | od_e_pres;
82  static size_t od_e_val(unsigned opcode_mods) { return (opcode_mods & od_e_mask)>>4; }
87  static const unsigned od_rex_pres = 0x00000001; /* bit set if REX prefix is present. */
88  static const unsigned od_rex_mask = 0x00000f00; /* mask for low nyble of REX byte. */
89  static const unsigned od_rex = 0x00000000 | od_rex_pres;
90  static const unsigned od_rexb = 0x00000100 | od_rex_pres;
91  static const unsigned od_rexx = 0x00000200 | od_rex_pres;
92  static const unsigned od_rexxb = 0x00000300 | od_rex_pres;
93  static const unsigned od_rexr = 0x00000400 | od_rex_pres;
94  static const unsigned od_rexrb = 0x00000500 | od_rex_pres;
95  static const unsigned od_rexrx = 0x00000600 | od_rex_pres;
96  static const unsigned od_rexrxb = 0x00000700 | od_rex_pres;
97  static const unsigned od_rexw = 0x00000800 | od_rex_pres;
98  static const unsigned od_rexwb = 0x00000900 | od_rex_pres;
99  static const unsigned od_rexwx = 0x00000a00 | od_rex_pres;
100  static const unsigned od_rexwxb = 0x00000b00 | od_rex_pres;
101  static const unsigned od_rexwr = 0x00000c00 | od_rex_pres;
102  static const unsigned od_rexwrb = 0x00000d00 | od_rex_pres;
103  static const unsigned od_rexwrx = 0x00000e00 | od_rex_pres;
104  static const unsigned od_rexwrxb = 0x00000f00 | od_rex_pres;
105  static uint8_t od_rex_byte(unsigned opcode_mods) { return 0x40 | ((opcode_mods & od_rex_mask) >> 8); }
106 
109  static const unsigned od_modrm = 0x00000002;
110 
113  static const unsigned od_c_mask = 0x00007000;
114  static const unsigned od_cb = 0x00001000;
115  static const unsigned od_cw = 0x00002000;
116  static const unsigned od_cd = 0x00003000;
117  static const unsigned od_cp = 0x00004000;
118  static const unsigned od_co = 0x00005000;
119  static const unsigned od_ct = 0x00006000;
120 
123  static const unsigned od_i_mask = 0x00070000;
124  static const unsigned od_ib = 0x00010000;
125  static const unsigned od_iw = 0x00020000;
126  static const unsigned od_id = 0x00030000;
127  static const unsigned od_io = 0x00040000;
128 
131  static const unsigned od_r_mask = 0x00700000;
132  static const unsigned od_rb = 0x00100000;
133  static const unsigned od_rw = 0x00200000;
134  static const unsigned od_rd = 0x00300000;
135  static const unsigned od_ro = 0x00400000;
136 
140  static const unsigned od_i = 0x00000004;
141 
142 
144  enum OperandDefn
145  {
146  od_none,
147  od_AL,
148  od_AX,
149  od_EAX,
150  od_RAX,
151  od_DX,
152  od_CS,
153  od_DS,
154  od_ES,
155  od_FS,
156  od_GS,
157  od_SS,
158  od_rel8,
160  od_rel16,
162  od_rel32,
164  od_rel64,
166  od_ptr16_16,
171  od_ptr16_32,
176  od_ptr16_64,
181  od_r8,
183  od_r16,
185  od_r32,
187  od_r64,
189  od_imm8,
193  od_imm16,
195  od_imm32,
197  od_imm64,
200  od_r_m8,
203  od_r_m16,
208  od_r_m32,
213  od_r_m64,
218  od_m,
219  od_m8,
221  od_m16,
223  od_m32,
225  od_m64,
226  od_m128,
227  od_m16_16,
230  od_m16_32,
233  od_m16_64,
236  od_m16a16,
240  od_m16a32,
245  od_m32a32,
249  od_m16a64,
254  od_moffs8,
257  od_moffs16,
260  od_moffs32,
263  od_moffs64,
266  od_sreg,
268  od_m32fp,
270  od_m64fp,
272  od_m80fp,
274  od_st0,
275  od_st1,
276  od_st2,
277  od_st3,
278  od_st4,
279  od_st5,
280  od_st6,
281  od_st7,
282  od_sti,
283  od_mm,
284  od_mm_m32,
287  od_mm_m64,
290  od_xmm,
292  od_xmm_m16,
293  od_xmm_m32,
296  od_xmm_m64,
299  od_xmm_m128,
302  /* The following are not documented in section 3.1.1.2 but are used elsewhere in the manual */
303  od_XMM0,
304  od_0,
305  od_1,
306  od_m80,
307  od_dec,
308  od_m80bcd,
309  od_m2byte,
310  od_m14_28byte,
311  od_m94_108byte,
312  od_m512byte,
313  od_r16_m16,
314  od_r32_m8,
315  od_r32_m16,
316  od_r64_m16,
317  od_CR0,
318  od_CR7,
319  od_CR8,
320  od_CR0CR7,
321  od_DR0DR7,
322  od_reg,
323  od_CL,
324  };
325 
327  static const unsigned COMPAT_LEGACY = 0x01;
328  static const unsigned COMPAT_64 = 0x02;
331  static uint8_t build_modrm(unsigned mod, unsigned reg, unsigned rm) {
332  return ((mod&0x3)<<6) | ((reg&0x7)<<3) | (rm&0x7);
333  }
334 
336  static unsigned modrm_mod(uint8_t modrm) { return modrm>>6; }
337 
339  static unsigned modrm_reg(uint8_t modrm) { return (modrm>>3) & 0x7; }
340 
342  static unsigned modrm_rm(uint8_t modrm) { return modrm & 0x7; }
343 
345  static uint8_t build_sib(unsigned ss, unsigned index, unsigned base) {
346  return ((ss&0x3)<<6) | ((index&0x7)<<3) | (base&0x7);
347  }
348 
350  static unsigned sib_ss(uint8_t sib) {return sib>>6; }
351 
353  static unsigned sib_index(uint8_t sib) { return (sib>>3) & 0x7; }
354 
356  static unsigned sib_base(uint8_t sib) { return sib & 0x7; }
357 
361  class InsnDefn {
362  public:
363  InsnDefn(const std::string &mnemonic, X86InstructionKind kind, unsigned compatibility, uint64_t opcode,
364  unsigned opcode_modifiers, OperandDefn op1=od_none, OperandDefn op2=od_none, OperandDefn op3=od_none,
365  OperandDefn op4=od_none)
366  : mnemonic(mnemonic), kind(kind), compatibility(compatibility), opcode(opcode), opcode_modifiers(opcode_modifiers) {
367  if (op1) operands.push_back(op1);
368  if (op2) operands.push_back(op2);
369  if (op3) operands.push_back(op3);
370  if (op4) operands.push_back(op4);
371  }
372  std::string to_str() const;
373  void set_location(const std::string &s) {
374  location = s;
375  }
376  std::string mnemonic;
377  X86InstructionKind kind;
378  unsigned compatibility;
379  uint64_t opcode;
380  unsigned opcode_modifiers;
381  std::vector<OperandDefn> operands;
382  std::string location; /* location of instruction documentation */
383  };
384 
385  enum MemoryReferencePattern
386  {
387  mrp_unknown,
388  mrp_disp, /* displacement */
389  mrp_index, /* register*scale */
390  mrp_index_disp, /* register*scale + displacement */
391  mrp_base, /* register */
392  mrp_base_disp, /* register + displacement */
393  mrp_base_index, /* register + register*scale */
394  mrp_base_index_disp /* register + register*scale + displacement */
395  };
396 
398  typedef std::vector<const InsnDefn*> DictionaryPage;
399 
401  typedef std::map<X86InstructionKind, DictionaryPage> InsnDictionary;
402 
404  static void initAssemblyRules();
405  static void initAssemblyRules_part1();
406  static void initAssemblyRules_part2();
407  static void initAssemblyRules_part3();
408  static void initAssemblyRules_part4();
409  static void initAssemblyRules_part5();
410  static void initAssemblyRules_part6();
411  static void initAssemblyRules_part7();
412  static void initAssemblyRules_part8();
413  static void initAssemblyRules_part9();
414 
416  static void define(const InsnDefn *d) {
417  defns[d->kind].push_back(d);
418  }
419 
423  static std::string to_str(X86InstructionKind);
424 
430  SgUnsignedCharList fixup_prefix_bytes(SgAsmX86Instruction *insn, SgUnsignedCharList source);
431 
434  SgUnsignedCharList assemble(SgAsmX86Instruction *insn, const InsnDefn *defn);
435 
439  void matches(const InsnDefn *defn, SgAsmX86Instruction *insn, int64_t *disp, int64_t *imm) const;
440 
443  bool matches(OperandDefn, SgAsmExpression*, SgAsmInstruction*, int64_t *disp, int64_t *imm) const;
444 
446  static bool matches_rel(SgAsmInstruction*, int64_t val, size_t nbytes);
447 
451  static MemoryReferencePattern parse_memref(SgAsmInstruction *insn, SgAsmMemoryReferenceExpression *expr,
452  SgAsmRegisterReferenceExpression **base_reg/*out*/,
453  SgAsmRegisterReferenceExpression **index_reg/*out*/,
454  SgAsmValueExpression **scale/*out*/, SgAsmValueExpression **displacement/*out*/);
455 
457  uint8_t build_modrm(const InsnDefn*, SgAsmX86Instruction*, size_t argno,
458  uint8_t *sib, int64_t *displacement, uint8_t *rex) const;
459 
461  void build_modreg(const InsnDefn*, SgAsmX86Instruction*, size_t argno, uint8_t *modrm, uint8_t *rex) const;
462 
465  uint8_t segment_override(SgAsmX86Instruction*);
466 
467  static InsnDictionary defns;
468  bool honor_operand_types;
469 };
470 
471 } // namespace
472 } // namespace
473 
474 #endif
475 #endif
Base class for references to a machine register.
Base class for machine instructions.
bool get_honor_operand_types() const
Returns true if the assembler is honoring operand data types, or false if the assembler is using the ...
Definition: AssemblerX86.h:53
Main namespace for the ROSE library.
This class contains methods for assembling x86 instructions (SgAsmX86Instruction).
Definition: AssemblerX86.h:29
Reference to memory locations.
virtual SgUnsignedCharList assembleOne(SgAsmInstruction *)
Assemble an instruction (SgAsmInstruction) into byte code.
Represents one Intel x86 machine instruction.
Base class for expressions.
Base class for values.
virtual SgUnsignedCharList assembleProgram(const std::string &source)
Assemble an x86 program from assembly source code using the nasm assembler.
X86InstructionKind
List of all x86 instructions known to the ROSE disassembler/assembler.
void set_honor_operand_types(bool b)
Causes the assembler to honor (if true) or disregard (if false) the data types of operands when assem...
Definition: AssemblerX86.h:47
Virtual base class for instruction assemblers.
Definition: Assembler.h:58