ROSE  0.11.98.0
Disassembler/Aarch64.h
1 #ifndef ROSE_BinaryAnalysis_Disassembler_Aarch64_H
2 #define ROSE_BinaryAnalysis_Disassembler_Aarch64_H
3 #include <Rose/BinaryAnalysis/Disassembler/Base.h>
4 #ifdef ROSE_ENABLE_ASM_AARCH64
5 
6 #include <capstone/capstone.h>
7 
8 namespace Rose {
9 namespace BinaryAnalysis {
10 namespace Disassembler {
11 
19 class Aarch64: public Base {
20 public:
21  // <rant>
22  //
23  // ARM naming is a total mess! The term "ARM" is used for the company that licenses all this, and the architecture, and
24  // some of the instruction sets, and some of the implementations. For instance, "ARM10E" is an implementation of
25  // "ARMv5TE" architecture having the "A32" (a.k.a., "AArch32") and "T32" (a.k.a., "Thumb", which also sometimes means
26  // both "Thumb" and "Thumb-2") instruction sets, all of which was designed by "Arm Holdings". Another case in point: the
27  // "Thumb-2" instruction set was extended and named "ThumbEE" (why not just "Thumb-3"?), was erroneously called
28  // "Thumb-2EE" in some ARM documentation, and was marketed as "Jazelle RCT". Then there's the inconsistencies with
29  // dashes and spaces, as in "ARM Cortex-A64" and "ARM Neoverse E1" which are both implementations created by Arm
30  // Holdings; and inconsistencies in capitalization as in "ARM" vs. "Arm" (as in "Arm Limited") vs. "arm" (as on the title
31  // page of Arm Limited's instruction reference guide); "Armv8" vs. "ARMv8", etc. It's no wonder that references like the
32  // Wikipedia page for ARM (if "ARM" is even the correct term) are riddled with mistakes, and disassemblers like Capstone
33  // are confusingly ambiguous, and ROSE itself has poor documentation for all things ARM related. The whole thing is a
34  // rat's nest probably creatied by some overzealous marketing department.
35  //
36  // Since ROSE is using Capstone for the decoding, instead of spending more effort trying to make heads and tails of this
37  // mess, I'm going to just use the same parameters as Capstone, namely a set of non-orthogonal and ill-documented
38  // enums. Best of luck to you.
39  //
40  // </rant>
41 
43  using Ptr = Aarch64Ptr;
44 
46  enum Mode {
47  MODE_MCLASS = CS_MODE_MCLASS,
48  };
49 
51  using Modes = BitFlags<Mode>;
52 
53 private:
54  Modes modes_; // a subset of Capstone's cs_mode constants (warning: nonorthoganal concepts)
55  csh capstone_; // the capstone handle
56  bool capstoneOpened_ = false; // whether capstone_ is initialized
57 
58 protected:
59  // Constructor for specific architecture.
60  explicit Aarch64(Modes);
61 
62 public:
64  static Ptr instance(Modes modes = Modes());
65 
66  ~Aarch64();
67 
68  // overrides
69  bool canDisassemble(SgAsmGenericHeader*) const override;
70  Base::Ptr clone() const override;
71  Unparser::BasePtr unparser() const override;
72  SgAsmInstruction* disassembleOne(const MemoryMap::Ptr&, rose_addr_t startVa, AddressSet *successors=nullptr) override;
73  SgAsmInstruction* makeUnknownInstruction(const Exception&) override;
74 
75 private:
76  void openCapstone();
77 
78  // Returns the opcode as a 32-bit value.
79  uint32_t opcode(const cs_insn&);
80  uint32_t opcode(SgAsmInstruction*);
81 
82  // Make a ROSE instruction operand from a Capstone operand
83  SgAsmExpression* makeOperand(const cs_insn&, const cs_arm64_op&);
84 
85  // Make a ROSE register descriptor from a Capstone register enum constant.
86  RegisterDescriptor makeRegister(arm64_reg);
87 
88  // Restrict a register to just part of a register
89  RegisterDescriptor subRegister(RegisterDescriptor reg, int idx, arm64_vess elmtSize);
90 
91 // // Extract a particular element out of the expression and return a new expression.
92 // SgAsmExpression* extractElement(SgAsmExpression*, arm64_vess elmtSizeSpec, int idx);
93 
94  // Extend an expression. Given an expression of some integer type, and given a destination type and an ARM extender
95  // function, generate a new expression (if necessary) that represents the ARM extender function. This may consist of an
96  // truncation and/or a signed or unsigned extend operation.
97  SgAsmExpression* extendOperand(SgAsmExpression*, const cs_insn&, arm64_extender, SgAsmType*, arm64_shifter, unsigned shiftAmount) const;
98 
99  // Return a type for register.
100  SgAsmType* registerType(RegisterDescriptor, arm64_vas);
101 
102  // Capstone doesn't return information about how much memory is read for a memory read operand. Therefore, we need to
103  // partially decode instructions ourselves to get this information.
104  SgAsmType* typeForMemoryRead(const cs_insn&);
105 
106  // Change a memory reference expresson's address by wrapping it in a SgAsmPreIncrementExpression or
107  // SgAsmPostIncrementExpression if necessary.
108  void wrapPrePostIncrement(SgAsmOperandList*, const cs_arm64&);
109 };
110 
111 } // namespace
112 } // namespace
113 } // namespace
114 
115 #endif
116 #endif
Base class for machine instructions.
Main namespace for the ROSE library.
Sawyer::SharedPointer< Aarch64 > Aarch64Ptr
Reference counted pointer for ARM Aarch64 decoder.
MemoryMapPtr Ptr
Reference counting pointer.
Definition: MemoryMap.h:115
Base class for container file headers.
List of operands for an instruction.
Sawyer::SharedPointer< Node > Ptr
Reference counting pointer.
const char * Mode(int64_t)
Convert Rose::AST::cmdline::graphviz_t::Mode enum constant to a string.
Base class for expressions.
Binary analysis.
Base class for binary types.