ROSE  0.11.17.0
DisassemblerA64.h
1 #ifndef ROSE_DISASSEMBLER_ARM_H
2 #define ROSE_DISASSEMBLER_ARM_H
3 #include <Disassembler.h>
4 #ifdef ROSE_ENABLE_ASM_A64
5 
6 #include <capstone/capstone.h>
7 
8 namespace Rose {
9 namespace BinaryAnalysis {
10 
15 class DisassemblerArm: public Disassembler {
16 public:
17  // <rant>
18  //
19  // ARM naming is a total mess! The term "ARM" is used for the company that licenses all this, and the architecture, and
20  // some of the instruction sets, and some of the implementations. For instance, "ARM10E" is an implementation of
21  // "ARMv5TE" architecture having the "A32" (a.k.a., "AArch32") and "T32" (a.k.a., "Thumb", which also sometimes means
22  // both "Thumb" and "Thumb-2") instruction sets, all of which was designed by "Arm Holdings". Another case in point: the
23  // "Thumb-2" instruction set was extended and named "ThumbEE" (why not just "Thumb-3"?), was erroneously called
24  // "Thumb-2EE" in some ARM documentation, and was marketed as "Jazelle RCT". Then there's the inconsistencies with
25  // dashes and spaces, as in "ARM Cortex-A64" and "ARM Neoverse E1" which are both implementations created by Arm
26  // Holdings; and inconsistencies in capitalization as in "ARM" vs. "Arm" (as in "Arm Limited") vs. "arm" (as on the title
27  // page of Arm Limited's instruction reference guide); "Armv8" vs. "ARMv8", etc. It's no wonder that references like the
28  // Wikipedia page for ARM (if "ARM" is even the correct term) are riddled with mistakes, and disassemblers like Capstone
29  // are confusingly ambiguous, and ROSE itself has poor documentation for all things ARM related. The whole thing is a
30  // rat's nest probably creatied by some overzealous marketing department.
31  //
32  // Since ROSE is using Capstone for the decoding, instead of spending more effort trying to make heads and tails of this
33  // mess, I'm going to just use the same parameters as Capstone, namely a set of non-orthogonal and ill-documented
34  // enums. Best of luck to you.
35  //
36  // </rant>
37 
39  enum Architecture {
40  ARCH_ARM = CS_ARCH_ARM,
41  ARCH_ARM64 = CS_ARCH_ARM64
42  };
43 
45  enum Mode {
46  MODE_ARM32 = CS_MODE_ARM, // probably zero, not really a bit flag
47  MODE_THUMB = CS_MODE_THUMB,
48  MODE_MCLASS = CS_MODE_MCLASS,
49  MODE_V8 = CS_MODE_V8
50  };
51 
53  using Modes = BitFlags<Mode>;
54 
55 private:
56  Architecture arch_; // a subset of Capstone's cs_arch constants
57  Modes modes_; // a subset of Capstone's cs_mode constants (warning: nonorthoganal concepts)
58  csh capstone_; // the capstone handle
59  bool capstoneOpened_; // whether capstone_ is initialized
60 
61 public:
63  DisassemblerArm(Architecture arch, Modes modes = Modes())
64  : arch_(arch), modes_(modes), capstoneOpened_(false) {
65  init();
66  }
67 
68  ~DisassemblerArm();
69 
70  // overrides
71  bool canDisassemble(SgAsmGenericHeader*) const override;
72  Disassembler* clone() const override;
73  Unparser::BasePtr unparser() const override;
74  SgAsmInstruction* disassembleOne(const MemoryMap::Ptr&, rose_addr_t startVa, AddressSet *successors=NULL) override;
75  SgAsmInstruction* makeUnknownInstruction(const Exception&) override;
76 
77 private:
78  void init();
79 
80  // Returns the opcode as a 32-bit value.
81  uint32_t opcode(const cs_insn&);
82  uint32_t opcode(SgAsmInstruction*);
83 
84  // Make a ROSE instruction operand from a Capstone operand
85  SgAsmExpression* makeOperand(const cs_insn&, const cs_arm64_op&);
86 
87  // Make a ROSE register descriptor from a Capstone register enum constant.
88  RegisterDescriptor makeRegister(arm64_reg);
89 
90  // Restrict a register to just part of a register
91  RegisterDescriptor subRegister(RegisterDescriptor reg, int idx, arm64_vess elmtSize);
92 
93 // // Extract a particular element out of the expression and return a new expression.
94 // SgAsmExpression* extractElement(SgAsmExpression*, arm64_vess elmtSizeSpec, int idx);
95 
96  // Extend an expression. Given an expression of some integer type, and given a destination type and an ARM extender function, generate a new expression (if necessary)
97  // that represents the ARM extender function. This may consist of an truncation and/or a signed or unsigned extend operation.
98  SgAsmExpression* extendOperand(SgAsmExpression*, const cs_insn&, arm64_extender, SgAsmType*, arm64_shifter, unsigned shiftAmount) const;
99 
100  // Return a type for register.
101  SgAsmType* registerType(RegisterDescriptor, arm64_vas);
102 
103  // Capstone doesn't return information about how much memory is read for a memory read operand. Therefore, we need to
104  // partially decode instructions ourselves to get this information.
105  SgAsmType* typeForMemoryRead(const cs_insn&);
106 
107  // Change a memory reference expresson's address by wrapping it in a SgAsmPreIncrementExpression or SgAsmPostIncrementExpression if necessary.
108  void wrapPrePostIncrement(SgAsmOperandList*, const cs_arm64&);
109 };
110 
111 } // namespace
112 } // namespace
113 
114 #endif
115 #endif
Base class for machine instructions.
const char * Mode(int64_t)
Convert Rose::BinaryAnalysis::DisassemblerArm::Mode enum constant to a string.
Definition: stringify.C:41567
Main namespace for the ROSE library.
Base class for container file headers.
List of operands for an instruction.
Base class for expressions.
Base class for binary types.
const char * Architecture(int64_t)
Convert Rose::BinaryAnalysis::DisassemblerMips::Mips32::Architecture enum constant to a string...
Definition: stringify.C:40051