ROSE  0.11.101.0
LlvmSemantics.h
1 #ifndef ROSE_BinaryAnalysis_InstructionSemantics_LlvmSemantics_H
2 #define ROSE_BinaryAnalysis_InstructionSemantics_LlvmSemantics_H
3 #include <featureTests.h>
4 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
5 
6 #include <Rose/BinaryAnalysis/BasicTypes.h>
7 #include <Rose/BinaryAnalysis/InstructionSemantics/SymbolicSemantics.h>
8 #include <Rose/BinaryAnalysis/InstructionSemantics/DispatcherX86.h>
9 #include <Rose/CommandLine.h>
10 
11 namespace Rose {
12 namespace BinaryAnalysis {
13 namespace InstructionSemantics {
14 
16 namespace LlvmSemantics {
17 
18 typedef std::vector<RegisterDescriptor> RegisterDescriptors;
19 
25 typedef SymbolicExpression::Nodes TreeNodes;
26 
29 
30 typedef BaseSemantics::RegisterStateGenericPtr RegisterStatePtr;
32 
33 typedef SymbolicSemantics::MemoryStatePtr MemoryStatePtr;
35 
36 typedef BaseSemantics::StatePtr StatePtr;
38 
40 typedef boost::shared_ptr<class RiscOperators> RiscOperatorsPtr;
41 
43 public:
46 
49 
50 private:
53 
54  Rewrites rewrites; // maps expressions to LLVM variables
55  Variables variables; // ROSE-to-LLVM variable map; name includes sigil
56  RegisterStatePtr prev_regstate; // most recently emitted register state
57  RegisterDescriptors important_registers; // registers that should be emitted to LLVM
58  TreeNodes mem_writes; // memory write operations (OP_WRITE expressions)
59  int indent_level; // level of indentation (might be negative, but prefix() clips to zero
60  std::string indent_string; // white space per indentation level
61  int llvmVersion_; // 1000000*major + 1000*minor + patch. e.g., 3005000 = llvm-3.5.0
62 
64  // Real constructors
65 protected:
66  explicit RiscOperators(const BaseSemantics::SValuePtr &protoval, const SmtSolverPtr&);
67 
68  explicit RiscOperators(const BaseSemantics::StatePtr&, const SmtSolverPtr&);
69 
71  // Static allocating constructors
72 public:
73  ~RiscOperators();
74 
77  static RiscOperatorsPtr instanceFromRegisters(const RegisterDictionaryPtr&, const SmtSolverPtr &solver = SmtSolverPtr());
78 
80  static RiscOperatorsPtr instanceFromProtoval(const BaseSemantics::SValuePtr &protoval,
81  const SmtSolverPtr &solver = SmtSolverPtr());
82 
84  static RiscOperatorsPtr instanceFromState(const BaseSemantics::StatePtr&, const SmtSolverPtr &solver = SmtSolverPtr());
85 
87  // Virtual constructors
88 public:
90  const SmtSolverPtr &solver = SmtSolverPtr()) const override;
91 
93  const SmtSolverPtr &solver = SmtSolverPtr()) const override;
94 
96  // Dynamic pointer cases
97 public:
100  static RiscOperatorsPtr promote(const BaseSemantics::RiscOperatorsPtr&);
101 
103  // Properties
104 public:
112  int llvmVersion() const { return llvmVersion_; }
113  void llvmVersion(int v) { llvmVersion_ = v; }
116  // Methods we override from the super class
118 public:
120  const BaseSemantics::SValuePtr &dflt,
121  const BaseSemantics::SValuePtr &cond) override;
122  virtual void writeMemory(RegisterDescriptor segreg, const BaseSemantics::SValuePtr &addr,
123  const BaseSemantics::SValuePtr &data, const BaseSemantics::SValuePtr &cond) override;
124 
126  // Methods to control indentation of LLVM output
127 public:
131  int indent(int nlevels=1) { indent_level += nlevels; return nlevels; }
132 
134  std::string prefix() const;
135 
137  struct Indent {
138  RiscOperators *ops;
139  RiscOperatorsPtr ops_ptr;
140  int nlevels;
141  explicit Indent(const RiscOperatorsPtr &ops_ptr, int nlevels=1): ops_ptr(ops_ptr), nlevels(nlevels) {
142  ops = ops_ptr.get();
143  ops->indent(nlevels);
144  }
145  explicit Indent(RiscOperators *ops, int nlevels=1): ops(ops), nlevels(nlevels) {
146  ops->indent(nlevels);
147  }
148  ~Indent() {
149  ops->indent(-nlevels);
150  }
151  };
152 
154  // New methods to control/query the machine state
155 public:
157  virtual void reset();
158 
161  virtual const RegisterDescriptors& get_important_registers();
162 
165  virtual RegisterDescriptors get_stored_registers();
166 
168  virtual RegisterDescriptors get_modified_registers();
169 
172 
174  virtual SValuePtr get_instruction_pointer();
175 
179  virtual const TreeNodes& get_memory_writes() { return mem_writes; }
180 
182  virtual void make_current();
183 
185  virtual void add_rewrite(const ExpressionPtr &from, const LeafPtr &to);
186 
189  virtual std::string add_variable(const LeafPtr&);
190 
193  virtual std::string get_variable(const LeafPtr&);
194 
196  // New methods to emit the machine state
197 public:
199  virtual void emit_register_declarations(std::ostream&, const RegisterDescriptors&);
200 
202  virtual void emit_register_definitions(std::ostream&, const RegisterDescriptors&);
203 
230  virtual void emit_prerequisites(std::ostream&, const RegisterDescriptors&, const RegisterDictionaryPtr&);
231 
234  virtual void emit_next_eip(std::ostream&, SgAsmInstruction *latest_insn);
235 
237  virtual void emit_memory_writes(std::ostream&);
238 
240  virtual void emit_changed_state(std::ostream&);
241 
243  // New methods to return snippets of LLVM as strings or expressions
244 public:
246  virtual std::string llvm_integer_type(size_t nbits);
247 
249  virtual std::string llvm_term(const ExpressionPtr&);
250 
253  virtual std::string llvm_lvalue(const LeafPtr&);
254 
256  virtual LeafPtr next_temporary(size_t nbits);
257 
259  virtual std::string next_label();
260 
262  virtual std::string addr_label(rose_addr_t);
263 
265  virtual std::string function_label(SgAsmFunction*);
266 
268  // New methods to emit LLVM code for an expression.
269 public:
287  virtual LeafPtr emit_expression(std::ostream&, const SValuePtr&);
288  virtual LeafPtr emit_expression(std::ostream&, const ExpressionPtr&);
289  virtual LeafPtr emit_expression(std::ostream&, const LeafPtr&);
292 protected:
296  virtual LeafPtr emit_assignment(std::ostream&, const ExpressionPtr &rhs);
297 
304  virtual ExpressionPtr emit_zero_extend(std::ostream&, const ExpressionPtr &value, size_t nbits);
305  virtual ExpressionPtr emit_sign_extend(std::ostream&, const ExpressionPtr &value, size_t nbits);
306  virtual ExpressionPtr emit_truncate(std::ostream&, const ExpressionPtr &value, size_t nbits);
307  virtual ExpressionPtr emit_unsigned_resize(std::ostream&, const ExpressionPtr &value, size_t nbits);
308  virtual ExpressionPtr emit_binary(std::ostream&, const std::string &llvm_op, const ExpressionPtr&, const ExpressionPtr&);
309  virtual ExpressionPtr emit_signed_binary(std::ostream&, const std::string &llvm_op, const ExpressionPtr&, const ExpressionPtr&);
310  virtual ExpressionPtr emit_unsigned_binary(std::ostream&, const std::string &llvm_op, const ExpressionPtr&, const ExpressionPtr&);
311  virtual ExpressionPtr emit_logical_right_shift(std::ostream&, const ExpressionPtr &value, const ExpressionPtr &amount);
312  virtual ExpressionPtr emit_logical_right_shift_ones(std::ostream&, const ExpressionPtr &value, const ExpressionPtr &amount);
313  virtual ExpressionPtr emit_arithmetic_right_shift(std::ostream&, const ExpressionPtr &value, const ExpressionPtr &amount);
314  virtual ExpressionPtr emit_left_shift(std::ostream&, const ExpressionPtr &value, const ExpressionPtr &amount);
315  virtual ExpressionPtr emit_left_shift_ones(std::ostream&, const ExpressionPtr &value, const ExpressionPtr &amount);
316  virtual ExpressionPtr emit_lssb(std::ostream&, const ExpressionPtr&);
317  virtual ExpressionPtr emit_mssb(std::ostream&, const ExpressionPtr&);
318  virtual ExpressionPtr emit_extract(std::ostream&, const ExpressionPtr &value, const ExpressionPtr &from, size_t result_nbits);
319  virtual ExpressionPtr emit_invert(std::ostream&, const ExpressionPtr &value);
320  virtual ExpressionPtr emit_left_associative(std::ostream&, const std::string &llvm_op, const TreeNodes &operands);
321  virtual ExpressionPtr emit_concat(std::ostream&, TreeNodes operands);
322  virtual ExpressionPtr emit_signed_divide(std::ostream&, const ExpressionPtr &numerator, const ExpressionPtr &denominator);
323  virtual ExpressionPtr emit_unsigned_divide(std::ostream&, const ExpressionPtr &numerator, const ExpressionPtr &denominator);
324  virtual ExpressionPtr emit_signed_modulo(std::ostream&, const ExpressionPtr &numerator, const ExpressionPtr &denominator);
325  virtual ExpressionPtr emit_unsigned_modulo(std::ostream&, const ExpressionPtr &numerator, const ExpressionPtr &denominator);
326  virtual ExpressionPtr emit_signed_multiply(std::ostream&, const TreeNodes &operands);
327  virtual ExpressionPtr emit_unsigned_multiply(std::ostream&, const TreeNodes &operands);
328  virtual ExpressionPtr emit_rotate_left(std::ostream&, const ExpressionPtr &value, const ExpressionPtr &amount);
329  virtual ExpressionPtr emit_rotate_right(std::ostream&, const ExpressionPtr &value, const ExpressionPtr &amount);
330  virtual ExpressionPtr emit_compare(std::ostream&, const std::string &llvm_op, const ExpressionPtr&, const ExpressionPtr&);
331  virtual ExpressionPtr emit_ite(std::ostream&, const ExpressionPtr &cond, const ExpressionPtr&, const ExpressionPtr&);
332  virtual ExpressionPtr emit_memory_read(std::ostream&, const ExpressionPtr &address, size_t nbits);
333  virtual ExpressionPtr emit_global_read(std::ostream&, const std::string &varname, size_t nbits);
334  virtual void emit_memory_write(std::ostream&, const ExpressionPtr &address, const ExpressionPtr &value);
336 };
337 
339 typedef boost::shared_ptr<class Transcoder> TranscoderPtr;
340 
342 class Transcoder {
343 public:
346 
347 private:
348  RiscOperatorsPtr operators;
349  BaseSemantics::DispatcherPtr dispatcher;
350  bool emit_funcfrags; // emit BBs that aren't part of the CFG?
351  bool quiet_errors; // catch exceptions and emit an LLVM comment instead?
352 
353 protected:
354  explicit Transcoder(const BaseSemantics::DispatcherPtr&);
355 
356 public:
357  ~Transcoder();
358 
361  static TranscoderPtr instance(const BaseSemantics::DispatcherPtr&);
362 
364  static TranscoderPtr instanceX86();
365 
374  int llvmVersion() const;
375  void llvmVersion(int version);
383  bool emitFunctionFragements() const { return emit_funcfrags; }
384  void emitFunctionFragements(bool b) { emit_funcfrags = b; }
391  bool quietErrors() const { return quiet_errors; }
392  void quietErrors(bool b) { quiet_errors = b; }
397  void emitFilePrologue(std::ostream&);
398  std::string emitFilePrologue();
403  void emitFunctionDeclarations(SgNode *ast, std::ostream&);
404  std::string emitFunctionDeclarations(SgNode *ast);
410  void transcodeInstruction(SgAsmInstruction*, std::ostream&);
417  size_t transcodeBasicBlock(SgAsmBlock*, std::ostream&);
418  std::string transcodeBasicBlock(SgAsmBlock*);
424  size_t transcodeFunction(SgAsmFunction*, std::ostream&);
425  std::string transcodeFunction(SgAsmFunction*);
431  void transcodeInterpretation(SgAsmInterpretation*, std::ostream&);
434 };
435 
436 } // namespace
437 } // namespace
438 } // namespace
439 } // namespace
440 
441 #endif
442 #endif
std::string prefix() const
Return indentation string.
static TranscoderPtr instanceX86()
Factory method to create a new transcoder for 32-bit X86 instructions.
virtual std::string llvm_integer_type(size_t nbits)
Obtain the LLVM type name for an integer.
virtual ExpressionPtr emit_unsigned_divide(std::ostream &, const ExpressionPtr &numerator, const ExpressionPtr &denominator)
Emit an operation as LLVM instructions.
static TranscoderPtr instance(const BaseSemantics::DispatcherPtr &)
Factory method to create a new transcoder for an arbitrary machine architecture.
Instruction basic block.
static RiscOperatorsPtr instanceFromState(const BaseSemantics::StatePtr &, const SmtSolverPtr &solver=SmtSolverPtr())
Instantiates a new RiscOperators object with specified state.
size_t transcodeBasicBlock(SgAsmBlock *, std::ostream &)
Transcode a basic block of machine instructions to LLVM instructions.
void emitFunctionFragements(bool b)
Property to determine whether function fragments should be emitted.
virtual void emit_register_definitions(std::ostream &, const RegisterDescriptors &)
Output LLVM global register definitions for the specified registers.
boost::shared_ptr< RiscOperators > RiscOperatorsPtr
Shared-ownership pointer to a RISC operators object.
virtual void writeMemory(RegisterDescriptor segreg, const BaseSemantics::SValuePtr &addr, const BaseSemantics::SValuePtr &data, const BaseSemantics::SValuePtr &cond) override
Writes a value to memory.
static RiscOperatorsPtr instanceFromProtoval(const BaseSemantics::SValuePtr &protoval, const SmtSolverPtr &solver=SmtSolverPtr())
Instantiates a new RiscOperators object with specified prototypical values.
Base class for machine instructions.
virtual RegisterDescriptors get_modified_registers()
Return the list of important registers that have been modified since the last call to make_current()...
static RiscOperatorsPtr promote(const BaseSemantics::RiscOperatorsPtr &)
Run-time promotion of a base RiscOperators pointer to Llvm operators.
virtual ExpressionPtr emit_binary(std::ostream &, const std::string &llvm_op, const ExpressionPtr &, const ExpressionPtr &)
Emit an operation as LLVM instructions.
virtual ExpressionPtr emit_left_shift(std::ostream &, const ExpressionPtr &value, const ExpressionPtr &amount)
Emit an operation as LLVM instructions.
virtual void emit_register_declarations(std::ostream &, const RegisterDescriptors &)
Output LLVM global register declarations for the specified registers.
virtual ExpressionPtr emit_global_read(std::ostream &, const std::string &varname, size_t nbits)
Emit an operation as LLVM instructions.
Represents a synthesized function.
virtual void emit_next_eip(std::ostream &, SgAsmInstruction *latest_insn)
Output an LLVM branch instruction.
virtual ExpressionPtr emit_logical_right_shift_ones(std::ostream &, const ExpressionPtr &value, const ExpressionPtr &amount)
Emit an operation as LLVM instructions.
virtual std::string get_variable(const LeafPtr &)
Returns the LLVM name for a variable, including the sigil.
virtual std::string addr_label(rose_addr_t)
Obtain a label for a virtual address.
Main namespace for the ROSE library.
virtual ExpressionPtr emit_concat(std::ostream &, TreeNodes operands)
Emit an operation as LLVM instructions.
virtual std::string llvm_term(const ExpressionPtr &)
Convert a ROSE variable or integer to an LLVM term.
bool quietErrors() const
Property to control what happens when a translation exception occurs.
void transcodeInstruction(SgAsmInstruction *, std::ostream &)
Translate a single machine instruction to LLVM instructions.
virtual ExpressionPtr emit_arithmetic_right_shift(std::ostream &, const ExpressionPtr &value, const ExpressionPtr &amount)
Emit an operation as LLVM instructions.
Reference-counting intrusive smart pointer.
Definition: SharedPointer.h:68
boost::shared_ptr< State > StatePtr
Shared-ownership pointer to a semantic state.
virtual ExpressionPtr emit_signed_multiply(std::ostream &, const TreeNodes &operands)
Emit an operation as LLVM instructions.
virtual BaseSemantics::RiscOperatorsPtr create(const BaseSemantics::SValuePtr &protoval, const SmtSolverPtr &solver=SmtSolverPtr()) const override
Virtual allocating constructor.
virtual RegisterDescriptors get_stored_registers()
Return the list of important registers that are stored.
boost::shared_ptr< Dispatcher > DispatcherPtr
Shared-ownership pointer to a semantics instruction dispatcher.
virtual ExpressionPtr emit_left_shift_ones(std::ostream &, const ExpressionPtr &value, const ExpressionPtr &amount)
Emit an operation as LLVM instructions.
virtual void emit_memory_writes(std::ostream &)
Output changed memory state.
void quietErrors(bool b)
Property to control what happens when a translation exception occurs.
virtual ExpressionPtr emit_left_associative(std::ostream &, const std::string &llvm_op, const TreeNodes &operands)
Emit an operation as LLVM instructions.
boost::shared_ptr< class RegisterStateGeneric > RegisterStateGenericPtr
Shared-ownership pointer to generic register states.
int indent(int nlevels=1)
Increase indentation by nlevels levels.
virtual void make_current()
Mark the current state as having been emitted.
virtual const RegisterDescriptors & get_important_registers()
Return the list of registers that needs to be emitted to LLVM.
virtual ExpressionPtr emit_unsigned_modulo(std::ostream &, const ExpressionPtr &numerator, const ExpressionPtr &denominator)
Emit an operation as LLVM instructions.
virtual ExpressionPtr emit_unsigned_multiply(std::ostream &, const TreeNodes &operands)
Emit an operation as LLVM instructions.
virtual ExpressionPtr emit_rotate_left(std::ostream &, const ExpressionPtr &value, const ExpressionPtr &amount)
Emit an operation as LLVM instructions.
Describes (part of) a physical CPU register.
virtual LeafPtr emit_expression(std::ostream &, const SValuePtr &)
Emit LLVM statements for an expression.
This class represents the base class for all IR nodes within Sage III.
Definition: Cxx_Grammar.h:9433
virtual std::string add_variable(const LeafPtr &)
Register an LLVM variable.
Type of values manipulated by the SymbolicSemantics domain.
boost::shared_ptr< class RiscOperators > RiscOperatorsPtr
Shared-ownership pointer to LLVM RISC operations.
Definition: LlvmSemantics.h:40
virtual ExpressionPtr emit_invert(std::ostream &, const ExpressionPtr &value)
Emit an operation as LLVM instructions.
void emitFunctionDeclarations(SgNode *ast, std::ostream &)
Emit function declarations.
virtual ExpressionPtr emit_mssb(std::ostream &, const ExpressionPtr &)
Emit an operation as LLVM instructions.
virtual ExpressionPtr emit_lssb(std::ostream &, const ExpressionPtr &)
Emit an operation as LLVM instructions.
virtual void emit_prerequisites(std::ostream &, const RegisterDescriptors &, const RegisterDictionaryPtr &)
Output LLVM global variable reads that are needed to define the specified registers and pending memor...
bool emitFunctionFragements() const
Property to determine whether function fragments should be emitted.
Binary analysis.
virtual const TreeNodes & get_memory_writes()
Return the list of memory writes that have occured since the last call to make_current().
Defines RISC operators for the SymbolicSemantics domain.
virtual ExpressionPtr emit_zero_extend(std::ostream &, const ExpressionPtr &value, size_t nbits)
Emit an operation as LLVM instructions.
virtual ExpressionPtr emit_logical_right_shift(std::ostream &, const ExpressionPtr &value, const ExpressionPtr &amount)
Emit an operation as LLVM instructions.
virtual RegisterDescriptor get_insn_pointer_register()
Return the descriptor for the instruction pointer register.
virtual ExpressionPtr emit_ite(std::ostream &, const ExpressionPtr &cond, const ExpressionPtr &, const ExpressionPtr &)
Emit an operation as LLVM instructions.
virtual ExpressionPtr emit_extract(std::ostream &, const ExpressionPtr &value, const ExpressionPtr &from, size_t result_nbits)
Emit an operation as LLVM instructions.
virtual ExpressionPtr emit_rotate_right(std::ostream &, const ExpressionPtr &value, const ExpressionPtr &amount)
Emit an operation as LLVM instructions.
virtual BaseSemantics::SValuePtr readMemory(RegisterDescriptor segreg, const BaseSemantics::SValuePtr &addr, const BaseSemantics::SValuePtr &dflt, const BaseSemantics::SValuePtr &cond) override
Reads a value from memory.
virtual LeafPtr emit_assignment(std::ostream &, const ExpressionPtr &rhs)
Emit an assignment and add a rewrite rule.
size_t transcodeFunction(SgAsmFunction *, std::ostream &)
Transcode an entire function to LLVM instructions.
virtual ExpressionPtr emit_truncate(std::ostream &, const ExpressionPtr &value, size_t nbits)
Emit an operation as LLVM instructions.
virtual SValuePtr get_instruction_pointer()
Return the value of the instruction pointer.
static RiscOperatorsPtr instanceFromRegisters(const RegisterDictionaryPtr &, const SmtSolverPtr &solver=SmtSolverPtr())
Instantiates a new RiscOperators object and configures it to use semantic values and states that are ...
void transcodeInterpretation(SgAsmInterpretation *, std::ostream &)
Transcode an entire binary interpretation.
virtual std::string next_label()
Obtain the name for an LLVM label, excluding the "%" sigil.
virtual SmtSolverPtr solver() const
Property: Satisfiability module theory (SMT) solver.
virtual ExpressionPtr emit_signed_binary(std::ostream &, const std::string &llvm_op, const ExpressionPtr &, const ExpressionPtr &)
Emit an operation as LLVM instructions.
virtual ExpressionPtr emit_memory_read(std::ostream &, const ExpressionPtr &address, size_t nbits)
Emit an operation as LLVM instructions.
Leaf node of an expression tree for instruction semantics.
Interior node of an expression tree for instruction semantics.
virtual SValuePtr protoval() const
Property: Prototypical semantic value.
virtual void add_rewrite(const ExpressionPtr &from, const LeafPtr &to)
Register a rewrite.
virtual ExpressionPtr emit_unsigned_resize(std::ostream &, const ExpressionPtr &value, size_t nbits)
Emit an operation as LLVM instructions.
Base class for semantics machine states.
Definition: State.h:39
virtual ExpressionPtr emit_signed_divide(std::ostream &, const ExpressionPtr &numerator, const ExpressionPtr &denominator)
Emit an operation as LLVM instructions.
virtual std::string function_label(SgAsmFunction *)
Obtain a label for a function.
Represents an interpretation of a binary container.
virtual void emit_changed_state(std::ostream &)
Output LLVM to bring the LLVM state up to date with respect to the ROSE state.
virtual ExpressionPtr emit_sign_extend(std::ostream &, const ExpressionPtr &value, size_t nbits)
Emit an operation as LLVM instructions.
virtual std::string llvm_lvalue(const LeafPtr &)
Convert a ROSE variable to an LLVM lvalue.
virtual ExpressionPtr emit_unsigned_binary(std::ostream &, const std::string &llvm_op, const ExpressionPtr &, const ExpressionPtr &)
Emit an operation as LLVM instructions.
virtual ExpressionPtr emit_signed_modulo(std::ostream &, const ExpressionPtr &numerator, const ExpressionPtr &denominator)
Emit an operation as LLVM instructions.
boost::shared_ptr< class Transcoder > TranscoderPtr
Shared-ownership pointer to an LLVM transcoder.
virtual LeafPtr next_temporary(size_t nbits)
Create a temporary variable.
virtual ExpressionPtr emit_compare(std::ostream &, const std::string &llvm_op, const ExpressionPtr &, const ExpressionPtr &)
Emit an operation as LLVM instructions.
virtual void emit_memory_write(std::ostream &, const ExpressionPtr &address, const ExpressionPtr &value)
Emit an operation as LLVM instructions.
int llvmVersion() const
Property: LLVM version number.