ROSE  0.10.7.0
AssemblerX86.h
1 /* Assembly specific to the x86 architecture. */
2 
3 #ifndef ROSE_BinaryAnalysis_AssemblerX86_H
4 #define ROSE_BinaryAnalysis_AssemblerX86_H
5 
6 #include "Assembler.h"
7 #ifdef ROSE_BUILD_BINARY_ANALYSIS_SUPPORT
8 
9 namespace Rose {
10 namespace BinaryAnalysis {
11 
28 class AssemblerX86: public Assembler {
29 public:
30  AssemblerX86()
31  : honor_operand_types(false) {
32  if (defns.size()==0)
33  initAssemblyRules();
34  }
35 
36  virtual ~AssemblerX86() {}
37 
39  virtual SgUnsignedCharList assembleOne(SgAsmInstruction*);
40 
46  void set_honor_operand_types(bool b) {
47  honor_operand_types = b;
48  }
49 
52  bool get_honor_operand_types() const {
53  return honor_operand_types;
54  }
55 
57  virtual SgUnsignedCharList assembleProgram(const std::string &source);
58 
59  /*========================================================================================================================
60  * Members for defining instructions.
61  *========================================================================================================================*/
62 private:
63  /* These bit masks specify how the opcode part of the encoding is generated. The base opcode bytes are specified with
64  * a 64-bit value so that up to eight bytes of opcode can be specified. The bytes generated come from the 64-bit opcode
65  * value in big-endian order but without leading zero bytes. If the 64-bit opcode is zero then a single zero byte is
66  * generated. For instance, the MONITOR instruction has an opcode of 0x0f01c8, generating the encoding 0x0f, 0x01, 0xc8. */
67 
71  static const unsigned od_e_mask = 0x00000070; /* mask for numeric value (n) part of En field. */
72  static const unsigned od_e_pres = 0x00000080; /* bit set if En modification was specified. */
73  static const unsigned od_e0 = 0x00000000 | od_e_pres;
74  static const unsigned od_e1 = 0x00000010 | od_e_pres;
75  static const unsigned od_e2 = 0x00000020 | od_e_pres;
76  static const unsigned od_e3 = 0x00000030 | od_e_pres;
77  static const unsigned od_e4 = 0x00000040 | od_e_pres;
78  static const unsigned od_e5 = 0x00000050 | od_e_pres;
79  static const unsigned od_e6 = 0x00000060 | od_e_pres;
80  static const unsigned od_e7 = 0x00000070 | od_e_pres;
81  static size_t od_e_val(unsigned opcode_mods) { return (opcode_mods & od_e_mask)>>4; }
86  static const unsigned od_rex_pres = 0x00000001; /* bit set if REX prefix is present. */
87  static const unsigned od_rex_mask = 0x00000f00; /* mask for low nyble of REX byte. */
88  static const unsigned od_rex = 0x00000000 | od_rex_pres;
89  static const unsigned od_rexb = 0x00000100 | od_rex_pres;
90  static const unsigned od_rexx = 0x00000200 | od_rex_pres;
91  static const unsigned od_rexxb = 0x00000300 | od_rex_pres;
92  static const unsigned od_rexr = 0x00000400 | od_rex_pres;
93  static const unsigned od_rexrb = 0x00000500 | od_rex_pres;
94  static const unsigned od_rexrx = 0x00000600 | od_rex_pres;
95  static const unsigned od_rexrxb = 0x00000700 | od_rex_pres;
96  static const unsigned od_rexw = 0x00000800 | od_rex_pres;
97  static const unsigned od_rexwb = 0x00000900 | od_rex_pres;
98  static const unsigned od_rexwx = 0x00000a00 | od_rex_pres;
99  static const unsigned od_rexwxb = 0x00000b00 | od_rex_pres;
100  static const unsigned od_rexwr = 0x00000c00 | od_rex_pres;
101  static const unsigned od_rexwrb = 0x00000d00 | od_rex_pres;
102  static const unsigned od_rexwrx = 0x00000e00 | od_rex_pres;
103  static const unsigned od_rexwrxb = 0x00000f00 | od_rex_pres;
104  static uint8_t od_rex_byte(unsigned opcode_mods) { return 0x40 | ((opcode_mods & od_rex_mask) >> 8); }
105 
108  static const unsigned od_modrm = 0x00000002;
109 
112  static const unsigned od_c_mask = 0x00007000;
113  static const unsigned od_cb = 0x00001000;
114  static const unsigned od_cw = 0x00002000;
115  static const unsigned od_cd = 0x00003000;
116  static const unsigned od_cp = 0x00004000;
117  static const unsigned od_co = 0x00005000;
118  static const unsigned od_ct = 0x00006000;
119 
122  static const unsigned od_i_mask = 0x00070000;
123  static const unsigned od_ib = 0x00010000;
124  static const unsigned od_iw = 0x00020000;
125  static const unsigned od_id = 0x00030000;
126  static const unsigned od_io = 0x00040000;
127 
130  static const unsigned od_r_mask = 0x00700000;
131  static const unsigned od_rb = 0x00100000;
132  static const unsigned od_rw = 0x00200000;
133  static const unsigned od_rd = 0x00300000;
134  static const unsigned od_ro = 0x00400000;
135 
139  static const unsigned od_i = 0x00000004;
140 
141 
143  enum OperandDefn
144  {
145  od_none,
146  od_AL,
147  od_AX,
148  od_EAX,
149  od_RAX,
150  od_DX,
151  od_CS,
152  od_DS,
153  od_ES,
154  od_FS,
155  od_GS,
156  od_SS,
157  od_rel8,
159  od_rel16,
161  od_rel32,
163  od_rel64,
165  od_ptr16_16,
170  od_ptr16_32,
175  od_ptr16_64,
180  od_r8,
182  od_r16,
184  od_r32,
186  od_r64,
188  od_imm8,
192  od_imm16,
194  od_imm32,
196  od_imm64,
199  od_r_m8,
202  od_r_m16,
207  od_r_m32,
212  od_r_m64,
217  od_m,
218  od_m8,
220  od_m16,
222  od_m32,
224  od_m64,
225  od_m128,
226  od_m16_16,
229  od_m16_32,
232  od_m16_64,
235  od_m16a16,
239  od_m16a32,
244  od_m32a32,
248  od_m16a64,
253  od_moffs8,
256  od_moffs16,
259  od_moffs32,
262  od_moffs64,
265  od_sreg,
267  od_m32fp,
269  od_m64fp,
271  od_m80fp,
273  od_st0,
274  od_st1,
275  od_st2,
276  od_st3,
277  od_st4,
278  od_st5,
279  od_st6,
280  od_st7,
281  od_sti,
282  od_mm,
283  od_mm_m32,
286  od_mm_m64,
289  od_xmm,
291  od_xmm_m16,
292  od_xmm_m32,
295  od_xmm_m64,
298  od_xmm_m128,
301  /* The following are not documented in section 3.1.1.2 but are used elsewhere in the manual */
302  od_XMM0,
303  od_0,
304  od_1,
305  od_m80,
306  od_dec,
307  od_m80bcd,
308  od_m2byte,
309  od_m14_28byte,
310  od_m94_108byte,
311  od_m512byte,
312  od_r16_m16,
313  od_r32_m8,
314  od_r32_m16,
315  od_r64_m16,
316  od_CR0,
317  od_CR7,
318  od_CR8,
319  od_CR0CR7,
320  od_DR0DR7,
321  od_reg,
322  od_CL,
323  };
324 
326  static const unsigned COMPAT_LEGACY = 0x01;
327  static const unsigned COMPAT_64 = 0x02;
330  static uint8_t build_modrm(unsigned mod, unsigned reg, unsigned rm) {
331  return ((mod&0x3)<<6) | ((reg&0x7)<<3) | (rm&0x7);
332  }
333 
335  static unsigned modrm_mod(uint8_t modrm) { return modrm>>6; }
336 
338  static unsigned modrm_reg(uint8_t modrm) { return (modrm>>3) & 0x7; }
339 
341  static unsigned modrm_rm(uint8_t modrm) { return modrm & 0x7; }
342 
344  static uint8_t build_sib(unsigned ss, unsigned index, unsigned base) {
345  return ((ss&0x3)<<6) | ((index&0x7)<<3) | (base&0x7);
346  }
347 
349  static unsigned sib_ss(uint8_t sib) {return sib>>6; }
350 
352  static unsigned sib_index(uint8_t sib) { return (sib>>3) & 0x7; }
353 
355  static unsigned sib_base(uint8_t sib) { return sib & 0x7; }
356 
360  class InsnDefn {
361  public:
362  InsnDefn(const std::string &mnemonic, X86InstructionKind kind, unsigned compatibility, uint64_t opcode,
363  unsigned opcode_modifiers, OperandDefn op1=od_none, OperandDefn op2=od_none, OperandDefn op3=od_none,
364  OperandDefn op4=od_none)
365  : mnemonic(mnemonic), kind(kind), compatibility(compatibility), opcode(opcode), opcode_modifiers(opcode_modifiers) {
366  if (op1) operands.push_back(op1);
367  if (op2) operands.push_back(op2);
368  if (op3) operands.push_back(op3);
369  if (op4) operands.push_back(op4);
370  }
371  std::string to_str() const;
372  void set_location(const std::string &s) {
373  location = s;
374  }
375  std::string mnemonic;
376  X86InstructionKind kind;
377  unsigned compatibility;
378  uint64_t opcode;
379  unsigned opcode_modifiers;
380  std::vector<OperandDefn> operands;
381  std::string location; /* location of instruction documentation */
382  };
383 
384  enum MemoryReferencePattern
385  {
386  mrp_unknown,
387  mrp_disp, /* displacement */
388  mrp_index, /* register*scale */
389  mrp_index_disp, /* register*scale + displacement */
390  mrp_base, /* register */
391  mrp_base_disp, /* register + displacement */
392  mrp_base_index, /* register + register*scale */
393  mrp_base_index_disp /* register + register*scale + displacement */
394  };
395 
397  typedef std::vector<const InsnDefn*> DictionaryPage;
398 
400  typedef std::map<X86InstructionKind, DictionaryPage> InsnDictionary;
401 
403  static void initAssemblyRules();
404  static void initAssemblyRules_part1();
405  static void initAssemblyRules_part2();
406  static void initAssemblyRules_part3();
407  static void initAssemblyRules_part4();
408  static void initAssemblyRules_part5();
409  static void initAssemblyRules_part6();
410  static void initAssemblyRules_part7();
411  static void initAssemblyRules_part8();
412  static void initAssemblyRules_part9();
413 
415  static void define(const InsnDefn *d) {
416  defns[d->kind].push_back(d);
417  }
418 
422  static std::string to_str(X86InstructionKind);
423 
429  SgUnsignedCharList fixup_prefix_bytes(SgAsmX86Instruction *insn, SgUnsignedCharList source);
430 
433  SgUnsignedCharList assemble(SgAsmX86Instruction *insn, const InsnDefn *defn);
434 
438  void matches(const InsnDefn *defn, SgAsmX86Instruction *insn, int64_t *disp, int64_t *imm) const;
439 
442  bool matches(OperandDefn, SgAsmExpression*, SgAsmInstruction*, int64_t *disp, int64_t *imm) const;
443 
445  static bool matches_rel(SgAsmInstruction*, int64_t val, size_t nbytes);
446 
450  static MemoryReferencePattern parse_memref(SgAsmInstruction *insn, SgAsmMemoryReferenceExpression *expr,
451  SgAsmRegisterReferenceExpression **base_reg/*out*/,
452  SgAsmRegisterReferenceExpression **index_reg/*out*/,
453  SgAsmValueExpression **scale/*out*/, SgAsmValueExpression **displacement/*out*/);
454 
456  uint8_t build_modrm(const InsnDefn*, SgAsmX86Instruction*, size_t argno,
457  uint8_t *sib, int64_t *displacement, uint8_t *rex) const;
458 
460  void build_modreg(const InsnDefn*, SgAsmX86Instruction*, size_t argno, uint8_t *modrm, uint8_t *rex) const;
461 
464  uint8_t segment_override(SgAsmX86Instruction*);
465 
466  static InsnDictionary defns;
467  bool honor_operand_types;
468 };
469 
470 } // namespace
471 } // namespace
472 
473 #endif
474 #endif
Base class for references to a machine register.
Base class for machine instructions.
bool get_honor_operand_types() const
Returns true if the assembler is honoring operand data types, or false if the assembler is using the ...
Definition: AssemblerX86.h:52
Main namespace for the ROSE library.
This class contains methods for assembling x86 instructions (SgAsmX86Instruction).
Definition: AssemblerX86.h:28
Reference to memory locations.
virtual SgUnsignedCharList assembleOne(SgAsmInstruction *)
Assemble an instruction (SgAsmInstruction) into byte code.
Represents one Intel x86 machine instruction.
Base class for expressions.
Base class for values.
virtual SgUnsignedCharList assembleProgram(const std::string &source)
Assemble an x86 program from assembly source code using the nasm assembler.
X86InstructionKind
List of all x86 instructions known to the ROSE disassembler/assembler.
void set_honor_operand_types(bool b)
Causes the assembler to honor (if true) or disregard (if false) the data types of operands when assem...
Definition: AssemblerX86.h:46
Virtual base class for instruction assemblers.
Definition: Assembler.h:57