ROSE  0.11.96.0
DisassemblerAarch64.h
1 #ifndef ROSE_BinaryAnalysis_DisassemblerAarch64_H
2 #define ROSE_BinaryAnalysis_DisassemblerAarch64_H
3 #include <Rose/BinaryAnalysis/Disassembler.h>
4 #ifdef ROSE_ENABLE_ASM_AARCH64
5 
6 #include <capstone/capstone.h>
7 
8 namespace Rose {
9 namespace BinaryAnalysis {
10 
18 class DisassemblerAarch64: public Disassembler {
19 public:
20  // <rant>
21  //
22  // ARM naming is a total mess! The term "ARM" is used for the company that licenses all this, and the architecture, and
23  // some of the instruction sets, and some of the implementations. For instance, "ARM10E" is an implementation of
24  // "ARMv5TE" architecture having the "A32" (a.k.a., "AArch32") and "T32" (a.k.a., "Thumb", which also sometimes means
25  // both "Thumb" and "Thumb-2") instruction sets, all of which was designed by "Arm Holdings". Another case in point: the
26  // "Thumb-2" instruction set was extended and named "ThumbEE" (why not just "Thumb-3"?), was erroneously called
27  // "Thumb-2EE" in some ARM documentation, and was marketed as "Jazelle RCT". Then there's the inconsistencies with
28  // dashes and spaces, as in "ARM Cortex-A64" and "ARM Neoverse E1" which are both implementations created by Arm
29  // Holdings; and inconsistencies in capitalization as in "ARM" vs. "Arm" (as in "Arm Limited") vs. "arm" (as on the title
30  // page of Arm Limited's instruction reference guide); "Armv8" vs. "ARMv8", etc. It's no wonder that references like the
31  // Wikipedia page for ARM (if "ARM" is even the correct term) are riddled with mistakes, and disassemblers like Capstone
32  // are confusingly ambiguous, and ROSE itself has poor documentation for all things ARM related. The whole thing is a
33  // rat's nest probably creatied by some overzealous marketing department.
34  //
35  // Since ROSE is using Capstone for the decoding, instead of spending more effort trying to make heads and tails of this
36  // mess, I'm going to just use the same parameters as Capstone, namely a set of non-orthogonal and ill-documented
37  // enums. Best of luck to you.
38  //
39  // </rant>
40 
42  enum Mode {
43  MODE_MCLASS = CS_MODE_MCLASS,
44  };
45 
47  using Modes = BitFlags<Mode>;
48 
49 private:
50  Modes modes_; // a subset of Capstone's cs_mode constants (warning: nonorthoganal concepts)
51  csh capstone_; // the capstone handle
52  bool capstoneOpened_; // whether capstone_ is initialized
53 
54 public:
56  explicit DisassemblerAarch64(Modes modes = Modes())
57  : modes_(modes), capstoneOpened_(false) {
58  init();
59  }
60 
61  ~DisassemblerAarch64();
62 
63  // overrides
64  bool canDisassemble(SgAsmGenericHeader*) const override;
65  Disassembler* clone() const override;
66  Unparser::BasePtr unparser() const override;
67  SgAsmInstruction* disassembleOne(const MemoryMap::Ptr&, rose_addr_t startVa, AddressSet *successors=nullptr) override;
68  SgAsmInstruction* makeUnknownInstruction(const Exception&) override;
69 
70 private:
71  void init();
72 
73  // Returns the opcode as a 32-bit value.
74  uint32_t opcode(const cs_insn&);
75  uint32_t opcode(SgAsmInstruction*);
76 
77  // Make a ROSE instruction operand from a Capstone operand
78  SgAsmExpression* makeOperand(const cs_insn&, const cs_arm64_op&);
79 
80  // Make a ROSE register descriptor from a Capstone register enum constant.
81  RegisterDescriptor makeRegister(arm64_reg);
82 
83  // Restrict a register to just part of a register
84  RegisterDescriptor subRegister(RegisterDescriptor reg, int idx, arm64_vess elmtSize);
85 
86 // // Extract a particular element out of the expression and return a new expression.
87 // SgAsmExpression* extractElement(SgAsmExpression*, arm64_vess elmtSizeSpec, int idx);
88 
89  // Extend an expression. Given an expression of some integer type, and given a destination type and an ARM extender
90  // function, generate a new expression (if necessary) that represents the ARM extender function. This may consist of an
91  // truncation and/or a signed or unsigned extend operation.
92  SgAsmExpression* extendOperand(SgAsmExpression*, const cs_insn&, arm64_extender, SgAsmType*, arm64_shifter, unsigned shiftAmount) const;
93 
94  // Return a type for register.
95  SgAsmType* registerType(RegisterDescriptor, arm64_vas);
96 
97  // Capstone doesn't return information about how much memory is read for a memory read operand. Therefore, we need to
98  // partially decode instructions ourselves to get this information.
99  SgAsmType* typeForMemoryRead(const cs_insn&);
100 
101  // Change a memory reference expresson's address by wrapping it in a SgAsmPreIncrementExpression or
102  // SgAsmPostIncrementExpression if necessary.
103  void wrapPrePostIncrement(SgAsmOperandList*, const cs_arm64&);
104 };
105 
106 } // namespace
107 } // namespace
108 
109 #endif
110 #endif
Base class for machine instructions.
Main namespace for the ROSE library.
Base class for container file headers.
List of operands for an instruction.
const char * Mode(int64_t)
Convert Rose::AST::cmdline::graphviz_t::Mode enum constant to a string.
Base class for expressions.
Base class for binary types.