ROSE  0.9.9.139
AssemblerX86.h
1 /* Assembly specific to the x86 architecture. */
2 
3 #ifndef ROSE_ASSEMBLER_X86_H
4 #define ROSE_ASSEMBLER_X86_H
5 
6 #include "Assembler.h"
7 
8 //#include "sage3.h"
9 
26 class AssemblerX86: public Assembler {
27 public:
28  AssemblerX86()
29  : honor_operand_types(false) {
30  if (defns.size()==0)
31  initAssemblyRules();
32  }
33 
34  virtual ~AssemblerX86() {}
35 
37  virtual SgUnsignedCharList assembleOne(SgAsmInstruction*);
38 
44  void set_honor_operand_types(bool b) {
45  honor_operand_types = b;
46  }
47 
50  bool get_honor_operand_types() const {
51  return honor_operand_types;
52  }
53 
55  virtual SgUnsignedCharList assembleProgram(const std::string &source);
56 
57  /*========================================================================================================================
58  * Members for defining instructions.
59  *========================================================================================================================*/
60 private:
61  /* These bit masks specify how the opcode part of the encoding is generated. The base opcode bytes are specified with
62  * a 64-bit value so that up to eight bytes of opcode can be specified. The bytes generated come from the 64-bit opcode
63  * value in big-endian order but without leading zero bytes. If the 64-bit opcode is zero then a single zero byte is
64  * generated. For instance, the MONITOR instruction has an opcode of 0x0f01c8, generating the encoding 0x0f, 0x01, 0xc8. */
65 
69  static const unsigned od_e_mask = 0x00000070; /* mask for numeric value (n) part of En field. */
70  static const unsigned od_e_pres = 0x00000080; /* bit set if En modification was specified. */
71  static const unsigned od_e0 = 0x00000000 | od_e_pres;
72  static const unsigned od_e1 = 0x00000010 | od_e_pres;
73  static const unsigned od_e2 = 0x00000020 | od_e_pres;
74  static const unsigned od_e3 = 0x00000030 | od_e_pres;
75  static const unsigned od_e4 = 0x00000040 | od_e_pres;
76  static const unsigned od_e5 = 0x00000050 | od_e_pres;
77  static const unsigned od_e6 = 0x00000060 | od_e_pres;
78  static const unsigned od_e7 = 0x00000070 | od_e_pres;
79  static size_t od_e_val(unsigned opcode_mods) { return (opcode_mods & od_e_mask)>>4; }
84  static const unsigned od_rex_pres = 0x00000001; /* bit set if REX prefix is present. */
85  static const unsigned od_rex_mask = 0x00000f00; /* mask for low nyble of REX byte. */
86  static const unsigned od_rex = 0x00000000 | od_rex_pres;
87  static const unsigned od_rexb = 0x00000100 | od_rex_pres;
88  static const unsigned od_rexx = 0x00000200 | od_rex_pres;
89  static const unsigned od_rexxb = 0x00000300 | od_rex_pres;
90  static const unsigned od_rexr = 0x00000400 | od_rex_pres;
91  static const unsigned od_rexrb = 0x00000500 | od_rex_pres;
92  static const unsigned od_rexrx = 0x00000600 | od_rex_pres;
93  static const unsigned od_rexrxb = 0x00000700 | od_rex_pres;
94  static const unsigned od_rexw = 0x00000800 | od_rex_pres;
95  static const unsigned od_rexwb = 0x00000900 | od_rex_pres;
96  static const unsigned od_rexwx = 0x00000a00 | od_rex_pres;
97  static const unsigned od_rexwxb = 0x00000b00 | od_rex_pres;
98  static const unsigned od_rexwr = 0x00000c00 | od_rex_pres;
99  static const unsigned od_rexwrb = 0x00000d00 | od_rex_pres;
100  static const unsigned od_rexwrx = 0x00000e00 | od_rex_pres;
101  static const unsigned od_rexwrxb = 0x00000f00 | od_rex_pres;
102  static uint8_t od_rex_byte(unsigned opcode_mods) { return 0x40 | ((opcode_mods & od_rex_mask) >> 8); }
103 
106  static const unsigned od_modrm = 0x00000002;
107 
110  static const unsigned od_c_mask = 0x00007000;
111  static const unsigned od_cb = 0x00001000;
112  static const unsigned od_cw = 0x00002000;
113  static const unsigned od_cd = 0x00003000;
114  static const unsigned od_cp = 0x00004000;
115  static const unsigned od_co = 0x00005000;
116  static const unsigned od_ct = 0x00006000;
117 
120  static const unsigned od_i_mask = 0x00070000;
121  static const unsigned od_ib = 0x00010000;
122  static const unsigned od_iw = 0x00020000;
123  static const unsigned od_id = 0x00030000;
124  static const unsigned od_io = 0x00040000;
125 
128  static const unsigned od_r_mask = 0x00700000;
129  static const unsigned od_rb = 0x00100000;
130  static const unsigned od_rw = 0x00200000;
131  static const unsigned od_rd = 0x00300000;
132  static const unsigned od_ro = 0x00400000;
133 
137  static const unsigned od_i = 0x00000004;
138 
139 
141  enum OperandDefn
142  {
143  od_none,
144  od_AL,
145  od_AX,
146  od_EAX,
147  od_RAX,
148  od_DX,
149  od_CS,
150  od_DS,
151  od_ES,
152  od_FS,
153  od_GS,
154  od_SS,
155  od_rel8,
157  od_rel16,
159  od_rel32,
161  od_rel64,
163  od_ptr16_16,
168  od_ptr16_32,
173  od_ptr16_64,
178  od_r8,
180  od_r16,
182  od_r32,
184  od_r64,
186  od_imm8,
190  od_imm16,
192  od_imm32,
194  od_imm64,
197  od_r_m8,
200  od_r_m16,
205  od_r_m32,
210  od_r_m64,
215  od_m,
216  od_m8,
218  od_m16,
220  od_m32,
222  od_m64,
223  od_m128,
224  od_m16_16,
227  od_m16_32,
230  od_m16_64,
233  od_m16a16,
237  od_m16a32,
242  od_m32a32,
246  od_m16a64,
251  od_moffs8,
254  od_moffs16,
257  od_moffs32,
260  od_moffs64,
263  od_sreg,
265  od_m32fp,
267  od_m64fp,
269  od_m80fp,
271  od_st0,
272  od_st1,
273  od_st2,
274  od_st3,
275  od_st4,
276  od_st5,
277  od_st6,
278  od_st7,
279  od_sti,
280  od_mm,
281  od_mm_m32,
284  od_mm_m64,
287  od_xmm,
289  od_xmm_m16,
290  od_xmm_m32,
293  od_xmm_m64,
296  od_xmm_m128,
299  /* The following are not documented in section 3.1.1.2 but are used elsewhere in the manual */
300  od_XMM0,
301  od_0,
302  od_1,
303  od_m80,
304  od_dec,
305  od_m80bcd,
306  od_m2byte,
307  od_m14_28byte,
308  od_m94_108byte,
309  od_m512byte,
310  od_r16_m16,
311  od_r32_m8,
312  od_r32_m16,
313  od_r64_m16,
314  od_CR0,
315  od_CR7,
316  od_CR8,
317  od_CR0CR7,
318  od_DR0DR7,
319  od_reg,
320  od_CL,
321  };
322 
324  static const unsigned COMPAT_LEGACY = 0x01;
325  static const unsigned COMPAT_64 = 0x02;
328  static uint8_t build_modrm(unsigned mod, unsigned reg, unsigned rm) {
329  return ((mod&0x3)<<6) | ((reg&0x7)<<3) | (rm&0x7);
330  }
331 
333  static unsigned modrm_mod(uint8_t modrm) { return modrm>>6; }
334 
336  static unsigned modrm_reg(uint8_t modrm) { return (modrm>>3) & 0x7; }
337 
339  static unsigned modrm_rm(uint8_t modrm) { return modrm & 0x7; }
340 
342  static uint8_t build_sib(unsigned ss, unsigned index, unsigned base) {
343  return ((ss&0x3)<<6) | ((index&0x7)<<3) | (base&0x7);
344  }
345 
347  static unsigned sib_ss(uint8_t sib) {return sib>>6; }
348 
350  static unsigned sib_index(uint8_t sib) { return (sib>>3) & 0x7; }
351 
353  static unsigned sib_base(uint8_t sib) { return sib & 0x7; }
354 
358  class InsnDefn {
359  public:
360  InsnDefn(const std::string &mnemonic, X86InstructionKind kind, unsigned compatibility, uint64_t opcode,
361  unsigned opcode_modifiers, OperandDefn op1=od_none, OperandDefn op2=od_none, OperandDefn op3=od_none,
362  OperandDefn op4=od_none)
363  : mnemonic(mnemonic), kind(kind), compatibility(compatibility), opcode(opcode), opcode_modifiers(opcode_modifiers) {
364  if (op1) operands.push_back(op1);
365  if (op2) operands.push_back(op2);
366  if (op3) operands.push_back(op3);
367  if (op4) operands.push_back(op4);
368  }
369  std::string to_str() const;
370  void set_location(const std::string &s) {
371  location = s;
372  }
373  std::string mnemonic;
374  X86InstructionKind kind;
375  unsigned compatibility;
376  uint64_t opcode;
377  unsigned opcode_modifiers;
378  std::vector<OperandDefn> operands;
379  std::string location; /* location of instruction documentation */
380  };
381 
382  enum MemoryReferencePattern
383  {
384  mrp_unknown,
385  mrp_disp, /* displacement */
386  mrp_index, /* register*scale */
387  mrp_index_disp, /* register*scale + displacement */
388  mrp_base, /* register */
389  mrp_base_disp, /* register + displacement */
390  mrp_base_index, /* register + register*scale */
391  mrp_base_index_disp /* register + register*scale + displacement */
392  };
393 
395  typedef std::vector<const InsnDefn*> DictionaryPage;
396 
398  typedef std::map<X86InstructionKind, DictionaryPage> InsnDictionary;
399 
401  static void initAssemblyRules();
402  static void initAssemblyRules_part1();
403  static void initAssemblyRules_part2();
404  static void initAssemblyRules_part3();
405  static void initAssemblyRules_part4();
406  static void initAssemblyRules_part5();
407  static void initAssemblyRules_part6();
408  static void initAssemblyRules_part7();
409  static void initAssemblyRules_part8();
410  static void initAssemblyRules_part9();
411 
413  static void define(const InsnDefn *d) {
414  defns[d->kind].push_back(d);
415  }
416 
420  static std::string to_str(X86InstructionKind);
421 
427  SgUnsignedCharList fixup_prefix_bytes(SgAsmX86Instruction *insn, SgUnsignedCharList source);
428 
431  SgUnsignedCharList assemble(SgAsmX86Instruction *insn, const InsnDefn *defn);
432 
436  void matches(const InsnDefn *defn, SgAsmX86Instruction *insn, int64_t *disp, int64_t *imm) const;
437 
440  bool matches(OperandDefn, SgAsmExpression*, SgAsmInstruction*, int64_t *disp, int64_t *imm) const;
441 
443  static bool matches_rel(SgAsmInstruction*, int64_t val, size_t nbytes);
444 
448  static MemoryReferencePattern parse_memref(SgAsmInstruction *insn, SgAsmMemoryReferenceExpression *expr,
449  SgAsmRegisterReferenceExpression **base_reg/*out*/,
450  SgAsmRegisterReferenceExpression **index_reg/*out*/,
451  SgAsmValueExpression **scale/*out*/, SgAsmValueExpression **displacement/*out*/);
452 
454  uint8_t build_modrm(const InsnDefn*, SgAsmX86Instruction*, size_t argno,
455  uint8_t *sib, int64_t *displacement, uint8_t *rex) const;
456 
458  void build_modreg(const InsnDefn*, SgAsmX86Instruction*, size_t argno, uint8_t *modrm, uint8_t *rex) const;
459 
462  uint8_t segment_override(SgAsmX86Instruction*);
463 
464  static InsnDictionary defns;
465  bool honor_operand_types;
466 };
467 
468 #endif
Base class for references to a machine register.
Base class for machine instructions.
This class contains methods for assembling x86 instructions (SgAsmX86Instruction).
Definition: AssemblerX86.h:26
virtual SgUnsignedCharList assembleProgram(const std::string &source)
Assemble an x86 program from assembly source code using the nasm assembler.
Reference to memory locations.
Represents one Intel x86 machine instruction.
virtual SgUnsignedCharList assembleOne(SgAsmInstruction *)
Assemble an instruction (SgAsmInstruction) into byte code.
Base class for expressions.
Virtual base class for instruction assemblers.
Definition: Assembler.h:49
Base class for values.
bool get_honor_operand_types() const
Returns true if the assembler is honoring operand data types, or false if the assembler is using the ...
Definition: AssemblerX86.h:50
void set_honor_operand_types(bool b)
Causes the assembler to honor (if true) or disregard (if false) the data types of operands when assem...
Definition: AssemblerX86.h:44