ROSE  0.9.11.42
AssemblerX86.h
1 /* Assembly specific to the x86 architecture. */
2 
3 #ifndef ROSE_BinaryAnalysis_AssemblerX86_H
4 #define ROSE_BinaryAnalysis_AssemblerX86_H
5 
6 #include "Assembler.h"
7 
8 namespace Rose {
9 namespace BinaryAnalysis {
10 
27 class AssemblerX86: public Assembler {
28 public:
29  AssemblerX86()
30  : honor_operand_types(false) {
31  if (defns.size()==0)
32  initAssemblyRules();
33  }
34 
35  virtual ~AssemblerX86() {}
36 
38  virtual SgUnsignedCharList assembleOne(SgAsmInstruction*);
39 
45  void set_honor_operand_types(bool b) {
46  honor_operand_types = b;
47  }
48 
51  bool get_honor_operand_types() const {
52  return honor_operand_types;
53  }
54 
56  virtual SgUnsignedCharList assembleProgram(const std::string &source);
57 
58  /*========================================================================================================================
59  * Members for defining instructions.
60  *========================================================================================================================*/
61 private:
62  /* These bit masks specify how the opcode part of the encoding is generated. The base opcode bytes are specified with
63  * a 64-bit value so that up to eight bytes of opcode can be specified. The bytes generated come from the 64-bit opcode
64  * value in big-endian order but without leading zero bytes. If the 64-bit opcode is zero then a single zero byte is
65  * generated. For instance, the MONITOR instruction has an opcode of 0x0f01c8, generating the encoding 0x0f, 0x01, 0xc8. */
66 
70  static const unsigned od_e_mask = 0x00000070; /* mask for numeric value (n) part of En field. */
71  static const unsigned od_e_pres = 0x00000080; /* bit set if En modification was specified. */
72  static const unsigned od_e0 = 0x00000000 | od_e_pres;
73  static const unsigned od_e1 = 0x00000010 | od_e_pres;
74  static const unsigned od_e2 = 0x00000020 | od_e_pres;
75  static const unsigned od_e3 = 0x00000030 | od_e_pres;
76  static const unsigned od_e4 = 0x00000040 | od_e_pres;
77  static const unsigned od_e5 = 0x00000050 | od_e_pres;
78  static const unsigned od_e6 = 0x00000060 | od_e_pres;
79  static const unsigned od_e7 = 0x00000070 | od_e_pres;
80  static size_t od_e_val(unsigned opcode_mods) { return (opcode_mods & od_e_mask)>>4; }
85  static const unsigned od_rex_pres = 0x00000001; /* bit set if REX prefix is present. */
86  static const unsigned od_rex_mask = 0x00000f00; /* mask for low nyble of REX byte. */
87  static const unsigned od_rex = 0x00000000 | od_rex_pres;
88  static const unsigned od_rexb = 0x00000100 | od_rex_pres;
89  static const unsigned od_rexx = 0x00000200 | od_rex_pres;
90  static const unsigned od_rexxb = 0x00000300 | od_rex_pres;
91  static const unsigned od_rexr = 0x00000400 | od_rex_pres;
92  static const unsigned od_rexrb = 0x00000500 | od_rex_pres;
93  static const unsigned od_rexrx = 0x00000600 | od_rex_pres;
94  static const unsigned od_rexrxb = 0x00000700 | od_rex_pres;
95  static const unsigned od_rexw = 0x00000800 | od_rex_pres;
96  static const unsigned od_rexwb = 0x00000900 | od_rex_pres;
97  static const unsigned od_rexwx = 0x00000a00 | od_rex_pres;
98  static const unsigned od_rexwxb = 0x00000b00 | od_rex_pres;
99  static const unsigned od_rexwr = 0x00000c00 | od_rex_pres;
100  static const unsigned od_rexwrb = 0x00000d00 | od_rex_pres;
101  static const unsigned od_rexwrx = 0x00000e00 | od_rex_pres;
102  static const unsigned od_rexwrxb = 0x00000f00 | od_rex_pres;
103  static uint8_t od_rex_byte(unsigned opcode_mods) { return 0x40 | ((opcode_mods & od_rex_mask) >> 8); }
104 
107  static const unsigned od_modrm = 0x00000002;
108 
111  static const unsigned od_c_mask = 0x00007000;
112  static const unsigned od_cb = 0x00001000;
113  static const unsigned od_cw = 0x00002000;
114  static const unsigned od_cd = 0x00003000;
115  static const unsigned od_cp = 0x00004000;
116  static const unsigned od_co = 0x00005000;
117  static const unsigned od_ct = 0x00006000;
118 
121  static const unsigned od_i_mask = 0x00070000;
122  static const unsigned od_ib = 0x00010000;
123  static const unsigned od_iw = 0x00020000;
124  static const unsigned od_id = 0x00030000;
125  static const unsigned od_io = 0x00040000;
126 
129  static const unsigned od_r_mask = 0x00700000;
130  static const unsigned od_rb = 0x00100000;
131  static const unsigned od_rw = 0x00200000;
132  static const unsigned od_rd = 0x00300000;
133  static const unsigned od_ro = 0x00400000;
134 
138  static const unsigned od_i = 0x00000004;
139 
140 
142  enum OperandDefn
143  {
144  od_none,
145  od_AL,
146  od_AX,
147  od_EAX,
148  od_RAX,
149  od_DX,
150  od_CS,
151  od_DS,
152  od_ES,
153  od_FS,
154  od_GS,
155  od_SS,
156  od_rel8,
158  od_rel16,
160  od_rel32,
162  od_rel64,
164  od_ptr16_16,
169  od_ptr16_32,
174  od_ptr16_64,
179  od_r8,
181  od_r16,
183  od_r32,
185  od_r64,
187  od_imm8,
191  od_imm16,
193  od_imm32,
195  od_imm64,
198  od_r_m8,
201  od_r_m16,
206  od_r_m32,
211  od_r_m64,
216  od_m,
217  od_m8,
219  od_m16,
221  od_m32,
223  od_m64,
224  od_m128,
225  od_m16_16,
228  od_m16_32,
231  od_m16_64,
234  od_m16a16,
238  od_m16a32,
243  od_m32a32,
247  od_m16a64,
252  od_moffs8,
255  od_moffs16,
258  od_moffs32,
261  od_moffs64,
264  od_sreg,
266  od_m32fp,
268  od_m64fp,
270  od_m80fp,
272  od_st0,
273  od_st1,
274  od_st2,
275  od_st3,
276  od_st4,
277  od_st5,
278  od_st6,
279  od_st7,
280  od_sti,
281  od_mm,
282  od_mm_m32,
285  od_mm_m64,
288  od_xmm,
290  od_xmm_m16,
291  od_xmm_m32,
294  od_xmm_m64,
297  od_xmm_m128,
300  /* The following are not documented in section 3.1.1.2 but are used elsewhere in the manual */
301  od_XMM0,
302  od_0,
303  od_1,
304  od_m80,
305  od_dec,
306  od_m80bcd,
307  od_m2byte,
308  od_m14_28byte,
309  od_m94_108byte,
310  od_m512byte,
311  od_r16_m16,
312  od_r32_m8,
313  od_r32_m16,
314  od_r64_m16,
315  od_CR0,
316  od_CR7,
317  od_CR8,
318  od_CR0CR7,
319  od_DR0DR7,
320  od_reg,
321  od_CL,
322  };
323 
325  static const unsigned COMPAT_LEGACY = 0x01;
326  static const unsigned COMPAT_64 = 0x02;
329  static uint8_t build_modrm(unsigned mod, unsigned reg, unsigned rm) {
330  return ((mod&0x3)<<6) | ((reg&0x7)<<3) | (rm&0x7);
331  }
332 
334  static unsigned modrm_mod(uint8_t modrm) { return modrm>>6; }
335 
337  static unsigned modrm_reg(uint8_t modrm) { return (modrm>>3) & 0x7; }
338 
340  static unsigned modrm_rm(uint8_t modrm) { return modrm & 0x7; }
341 
343  static uint8_t build_sib(unsigned ss, unsigned index, unsigned base) {
344  return ((ss&0x3)<<6) | ((index&0x7)<<3) | (base&0x7);
345  }
346 
348  static unsigned sib_ss(uint8_t sib) {return sib>>6; }
349 
351  static unsigned sib_index(uint8_t sib) { return (sib>>3) & 0x7; }
352 
354  static unsigned sib_base(uint8_t sib) { return sib & 0x7; }
355 
359  class InsnDefn {
360  public:
361  InsnDefn(const std::string &mnemonic, X86InstructionKind kind, unsigned compatibility, uint64_t opcode,
362  unsigned opcode_modifiers, OperandDefn op1=od_none, OperandDefn op2=od_none, OperandDefn op3=od_none,
363  OperandDefn op4=od_none)
364  : mnemonic(mnemonic), kind(kind), compatibility(compatibility), opcode(opcode), opcode_modifiers(opcode_modifiers) {
365  if (op1) operands.push_back(op1);
366  if (op2) operands.push_back(op2);
367  if (op3) operands.push_back(op3);
368  if (op4) operands.push_back(op4);
369  }
370  std::string to_str() const;
371  void set_location(const std::string &s) {
372  location = s;
373  }
374  std::string mnemonic;
375  X86InstructionKind kind;
376  unsigned compatibility;
377  uint64_t opcode;
378  unsigned opcode_modifiers;
379  std::vector<OperandDefn> operands;
380  std::string location; /* location of instruction documentation */
381  };
382 
383  enum MemoryReferencePattern
384  {
385  mrp_unknown,
386  mrp_disp, /* displacement */
387  mrp_index, /* register*scale */
388  mrp_index_disp, /* register*scale + displacement */
389  mrp_base, /* register */
390  mrp_base_disp, /* register + displacement */
391  mrp_base_index, /* register + register*scale */
392  mrp_base_index_disp /* register + register*scale + displacement */
393  };
394 
396  typedef std::vector<const InsnDefn*> DictionaryPage;
397 
399  typedef std::map<X86InstructionKind, DictionaryPage> InsnDictionary;
400 
402  static void initAssemblyRules();
403  static void initAssemblyRules_part1();
404  static void initAssemblyRules_part2();
405  static void initAssemblyRules_part3();
406  static void initAssemblyRules_part4();
407  static void initAssemblyRules_part5();
408  static void initAssemblyRules_part6();
409  static void initAssemblyRules_part7();
410  static void initAssemblyRules_part8();
411  static void initAssemblyRules_part9();
412 
414  static void define(const InsnDefn *d) {
415  defns[d->kind].push_back(d);
416  }
417 
421  static std::string to_str(X86InstructionKind);
422 
428  SgUnsignedCharList fixup_prefix_bytes(SgAsmX86Instruction *insn, SgUnsignedCharList source);
429 
432  SgUnsignedCharList assemble(SgAsmX86Instruction *insn, const InsnDefn *defn);
433 
437  void matches(const InsnDefn *defn, SgAsmX86Instruction *insn, int64_t *disp, int64_t *imm) const;
438 
441  bool matches(OperandDefn, SgAsmExpression*, SgAsmInstruction*, int64_t *disp, int64_t *imm) const;
442 
444  static bool matches_rel(SgAsmInstruction*, int64_t val, size_t nbytes);
445 
449  static MemoryReferencePattern parse_memref(SgAsmInstruction *insn, SgAsmMemoryReferenceExpression *expr,
450  SgAsmRegisterReferenceExpression **base_reg/*out*/,
451  SgAsmRegisterReferenceExpression **index_reg/*out*/,
452  SgAsmValueExpression **scale/*out*/, SgAsmValueExpression **displacement/*out*/);
453 
455  uint8_t build_modrm(const InsnDefn*, SgAsmX86Instruction*, size_t argno,
456  uint8_t *sib, int64_t *displacement, uint8_t *rex) const;
457 
459  void build_modreg(const InsnDefn*, SgAsmX86Instruction*, size_t argno, uint8_t *modrm, uint8_t *rex) const;
460 
463  uint8_t segment_override(SgAsmX86Instruction*);
464 
465  static InsnDictionary defns;
466  bool honor_operand_types;
467 };
468 
469 } // namespace
470 } // namespace
471 
472 #endif
Base class for references to a machine register.
Base class for machine instructions.
bool get_honor_operand_types() const
Returns true if the assembler is honoring operand data types, or false if the assembler is using the ...
Definition: AssemblerX86.h:51
Main namespace for the ROSE library.
This class contains methods for assembling x86 instructions (SgAsmX86Instruction).
Definition: AssemblerX86.h:27
Reference to memory locations.
virtual SgUnsignedCharList assembleOne(SgAsmInstruction *)
Assemble an instruction (SgAsmInstruction) into byte code.
Represents one Intel x86 machine instruction.
Base class for expressions.
Base class for values.
virtual SgUnsignedCharList assembleProgram(const std::string &source)
Assemble an x86 program from assembly source code using the nasm assembler.
X86InstructionKind
List of all x86 instructions known to the ROSE disassembler/assembler.
void set_honor_operand_types(bool b)
Causes the assembler to honor (if true) or disregard (if false) the data types of operands when assem...
Definition: AssemblerX86.h:45
Virtual base class for instruction assemblers.
Definition: Assembler.h:54