1#ifndef ROSE_BinaryAnalysis_String_H 
    2#define ROSE_BinaryAnalysis_String_H 
    3#include <featureTests.h> 
    4#ifdef ROSE_ENABLE_BINARY_ANALYSIS 
    6#include <Rose/Diagnostics.h> 
    7#include <Rose/BinaryAnalysis/MemoryMap.h> 
    8#include <Rose/Exception.h> 
    9#include <Sawyer/CommandLine.h> 
   10#include <Sawyer/Optional.h> 
   13namespace BinaryAnalysis {
 
  235    virtual std::string 
name() 
const = 0;
 
 
  273    virtual std::string 
name()
 const override { 
return "no-op"; }
 
 
  296    virtual std::string 
name()
 const override { 
return "UTF-8"; }
 
 
  319    virtual std::string 
name()
 const override { 
return "UTF-16"; }
 
 
  348    virtual std::string 
name() 
const = 0;
 
 
  378    size_t octetsPerValue_;
 
  383        : octetsPerValue_(octetsPerValue), sex_(sex), cv_(0) {
 
  384        ASSERT_require(1==octetsPerValue || sex!=ByteOrder::ORDER_UNSPECIFIED);
 
  385        ASSERT_require(octetsPerValue <= 
sizeof(
CodeValue));
 
  394    virtual std::string 
name() 
const override;
 
 
  423    virtual std::string 
name() 
const = 0;
 
 
  452    size_t octetsPerValue_;
 
  457        : octetsPerValue_(octetsPerValue), sex_(sex), length_(0) {
 
  458        ASSERT_require(1==octetsPerValue || sex!=ByteOrder::ORDER_UNSPECIFIED);
 
  459        ASSERT_require(octetsPerValue <= 
sizeof(
size_t));
 
  468    virtual std::string 
name() 
const override;
 
 
  490    virtual std::string 
name() 
const = 0;
 
 
  504    static Ptr instance() {
 
  507    virtual std::string 
name()
 const override { 
return "printable ASCII"; }
 
 
  522    virtual std::string 
name()
 const override { 
return "any code point"; }
 
 
  537    size_t nCodePoints_ = 0;                            
 
  547        : cef_(cef), ces_(ces), cpp_(cpp) {}
 
  556    virtual std::string 
name() 
const = 0;
 
  585    size_t length()
 const { 
return nCodePoints_; }
 
 
  650        inst->state_ = state_;
 
  651        inst->codePoints_ = codePoints_;
 
  652        inst->nCodePoints_ = nCodePoints_;
 
  653        inst->declaredLength_ = declaredLength_;
 
 
  656    virtual std::string 
name() 
const override;
 
 
  718        inst->state_ = state_;
 
  719        inst->codePoints_ = codePoints_;
 
  720        inst->nCodePoints_ = nCodePoints_;
 
  721        inst->terminated_ = terminated_;
 
 
  724    virtual std::string 
name() 
const override;
 
 
  763        : encoder_(encoder), where_(where) {}
 
  778    size_t length()
 const { 
return encoder_->length(); }
 
 
  847        Settings(): minLength(5), maxLength(-1), maxOverlap(8), keepingOnlyLongest(true) {}
 
 
  852    bool discardingCodePoints_;                         
 
  853    std::vector<StringEncodingScheme::Ptr> encoders_;   
 
  854    std::vector<EncodedString> strings_;                
 
  887    const std::vector<StringEncodingScheme::Ptr>& 
encoders()
 const { 
return encoders_; }
 
  888    std::vector<StringEncodingScheme::Ptr>& 
encoders() { 
return encoders_; }
 
  964    const std::vector<EncodedString>& 
strings()
 const { 
return strings_; }
 
  965    std::vector<EncodedString>& 
strings() { 
return strings_; }
 
  971    std::ostream& 
print(std::ostream&) 
const;
 
 
  974std::ostream& operator<<(std::ostream&, 
const StringFinder&);
 
An efficient mapping from an address space to stored data.
 
virtual std::string name() const override
Name of predicate.
 
virtual bool isValid(CodePoint) override
Predicate.
 
Basic character encoding scheme.
 
virtual CodeValue consume() override
Consume a decoded code value.
 
virtual Ptr clone() const override
Create a new copy of this encoder.
 
virtual std::string name() const override
Name of encoder.
 
virtual State decode(Octet) override
Decode one octet.
 
virtual Octets encode(CodeValue) override
Encode a code value into a sequence of octets.
 
virtual void reset() override
Reset the decoder state machine.
 
Basic length encoding scheme.
 
virtual std::string name() const override
Name of encoder.
 
virtual void reset() override
Reset the decoder state machine.
 
virtual Ptr clone() const override
Create a new copy of this encoder.
 
virtual size_t consume() override
Consume a decoded length.
 
virtual Octets encode(size_t) override
Encode a length into a sequence of octets.
 
virtual State decode(Octet) override
Decode one octet.
 
Defines the mapping between code values and octets.
 
virtual State decode(Octet)=0
Decode one octet.
 
State state() const
Decoder state.
 
virtual void reset()=0
Reset the decoder state machine.
 
virtual std::string name() const =0
Name of encoder.
 
Sawyer::SharedPointer< CharacterEncodingScheme > Ptr
Shared ownership pointer to a CharacterEncodingScheme.
 
virtual CodeValue consume()=0
Consume a decoded code value.
 
virtual Octets encode(CodeValue)=0
Encode a code value into a sequence of octets.
 
virtual Ptr clone() const =0
Create a new copy of this encoder.
 
Valid code point predicate.
 
virtual std::string name() const =0
Name of predicate.
 
Sawyer::SharedPointer< CodePointPredicate > Ptr
Shared ownership pointer to a CodePointPredicate.
 
virtual bool isValid(CodePoint)=0
Predicate.
 
An encoder plus interval.
 
const AddressInterval & where() const
Where the string is located in memory.
 
StringEncodingScheme::Ptr encoder() const
Information about the string encoding.
 
size_t length() const
Length of encoded string in code points.
 
const CodePoints & codePoints() const
Code points associated with the string.
 
std::string narrow() const
Return code points as a C++ std::string.
 
size_t size() const
Size of encoded string in bytes.
 
std::wstring wide() const
Return code points as a C++ std::wstring.
 
void decode(const MemoryMap &)
Decodes the string from memory.
 
Address address() const
Starting address of string in memory.
 
Errors for string analysis.
 
Length-prefixed string encoding scheme.
 
void lengthEncodingScheme(const LengthEncodingScheme::Ptr &les)
Property: Lengh encoding scheme.
 
virtual State decode(Octet) override
Decode one octet.
 
Sawyer::Optional< size_t > declaredLength() const
Returns the declared length, if any.
 
LengthEncodingScheme::Ptr lengthEncodingScheme() const
Property: Lengh encoding scheme.
 
virtual StringEncodingScheme::Ptr clone() const override
Create a new copy of this encoder.
 
virtual Octets encode(const CodePoints &) override
Encode a string into a sequence of octets.
 
virtual std::string name() const override
Name of encoding.
 
virtual void reset() override
Reset the state machine to an initial state.
 
Sawyer::SharedPointer< LengthEncodedString > Ptr
Shared ownership pointer to a LengthEncodedString.
 
Encoding for the length of a string.
 
State state() const
Decoder state.
 
virtual void reset()=0
Reset the decoder state machine.
 
virtual State decode(Octet)=0
Decode one octet.
 
virtual std::string name() const =0
Name of encoder.
 
Sawyer::SharedPointer< LengthEncodingScheme > Ptr
Shared ownership pointer to a LengthEncodingScheme.
 
virtual size_t consume()=0
Consume a decoded length.
 
virtual Octets encode(size_t)=0
Encode a length into a sequence of octets.
 
virtual Ptr clone() const =0
Create a new copy of this encoder.
 
virtual bool isValid(CodePoint) override
Predicate.
 
virtual std::string name() const override
Name of predicate.
 
virtual Ptr clone() const =0
Create a new copy of this encoder.
 
virtual void reset()
Reset the state machine to an initial state.
 
CharacterEncodingForm::Ptr characterEncodingForm() const
Property: Character encoding format.
 
void characterEncodingScheme(const CharacterEncodingScheme::Ptr &ces)
Property: Character encoding scheme.
 
virtual std::string name() const =0
Name of encoding.
 
virtual Octets encode(const CodePoints &)=0
Encode a string into a sequence of octets.
 
void codePointPredicate(const CodePointPredicate::Ptr &cpp)
Property: Code point predicate.
 
CodePoints consume()
Consume pending decoded code points.
 
CharacterEncodingScheme::Ptr characterEncodingScheme() const
Property: Character encoding scheme.
 
void characterEncodingForm(const CharacterEncodingForm::Ptr &cef)
Property: Character encoding format.
 
size_t length() const
Number of code points decoded since reset.
 
const CodePoints & codePoints() const
Return pending decoded code points without consuming them.
 
virtual State decode(Octet)=0
Decode one octet.
 
State state() const
Decoder state.
 
CodePointPredicate::Ptr codePointPredicate() const
Property: Code point predicate.
 
Sawyer::SharedPointer< StringEncodingScheme > Ptr
Shared ownership pointer to a StringEncodingScheme.
 
Analysis to find encoded strings.
 
Settings & settings()
Property: Analysis settings often set from a command-line.
 
std::vector< EncodedString > & strings()
Obtain strings that were found.
 
const std::vector< EncodedString > & strings() const
Obtain strings that were found.
 
StringFinder & discardingCodePoints(bool b)
Property: Whether to discard code points.
 
StringFinder & insertCommonEncoders(ByteOrder::Endianness)
Inserts common encodings.
 
StringFinder()
Constructor.
 
const std::vector< StringEncodingScheme::Ptr > & encoders() const
Property: List of string encodings.
 
static Sawyer::CommandLine::SwitchGroup commandLineSwitches(Settings &)
Command-line parser for analysis settings.
 
const Settings & settings() const
Property: Analysis settings often set from a command-line.
 
std::ostream & print(std::ostream &) const
Print results.
 
StringFinder & find(const MemoryMap::ConstConstraints &, Sawyer::Container::MatchFlags flags=0)
Finds strings by searching memory.
 
StringFinder & insertUncommonEncoders(ByteOrder::Endianness)
Inserts less common encodings.
 
Sawyer::CommandLine::SwitchGroup commandLineSwitches()
Command-line parser for analysis settings.
 
bool discardingCodePoints() const
Property: Whether to discard code points.
 
std::vector< StringEncodingScheme::Ptr > & encoders()
Property: List of string encodings.
 
StringFinder & reset()
Reset analysis results.
 
Terminated string encoding scheme.
 
Sawyer::Optional< CodePoint > terminated() const
Returns the decoded termination character, if any.
 
virtual Octets encode(const CodePoints &) override
Encode a string into a sequence of octets.
 
virtual std::string name() const override
Name of encoding.
 
const CodePoints & terminators() const
Property: string termination code points.
 
Sawyer::SharedPointer< TerminatedString > Ptr
Shared ownership pointer to a TerminatedString.
 
CodePoints & terminators()
Property: string termination code points.
 
virtual State decode(Octet) override
Decode one octet.
 
virtual void reset() override
Reset the state machine to an initial state.
 
virtual StringEncodingScheme::Ptr clone() const override
Create a new copy of this encoder.
 
Base class for all ROSE exceptions.
 
A collection of related switch declarations.
 
Constraints are used to select addresses from a memory map.
 
Value size() const
Size of interval.
 
T least() const
Returns lower limit.
 
Holds a value or nothing.
 
Base class for reference counted objects.
 
Reference-counting intrusive smart pointer.
 
@ ORDER_UNSPECIFIED
Endianness is unspecified and unknown.
 
PrintableAscii::Ptr printableAscii()
Returns a new printable ASCII predicate.
 
Utf8CharacterEncodingForm::Ptr utf8CharacterEncodingForm()
Returns a new UTF-8 character encoding form.
 
BasicCharacterEncodingScheme::Ptr basicCharacterEncodingScheme(size_t octetsPerValue, ByteOrder::Endianness sex=ByteOrder::ORDER_UNSPECIFIED)
Returns a new basic character encoding scheme.
 
AnyCodePoint::Ptr anyCodePoint()
Returns a new predicate that matches all code points.
 
TerminatedString::Ptr nulTerminatedPrintableAscii()
Returns a new encoder for NUL-terminated printable ASCII strings.
 
Utf16CharacterEncodingForm::Ptr utf16CharacterEncodingForm()
Returns a new UTF-16 character encoding form.
 
std::vector< CodePoint > CodePoints
A sequence of code points, i.e., a string.
 
@ USER_DEFINED_0
First user-defined value.
 
@ COMPLETED_STATE
Completed state, but not a final state.
 
@ USER_DEFINED_MAX
Maximum user-defined value.
 
@ INITIAL_STATE
Initial state just after a reset.
 
@ ERROR_STATE
Decoder is in an error condition.
 
@ FINAL_STATE
Final state where nothing more can be decoded.
 
@ USER_DEFINED_2
Third user-defined value.
 
@ USER_DEFINED_1
Second user-defined value.
 
bool isDone(State st)
Returns true for COMPLETED_STATE or FINAL_STATE.
 
void initDiagnostics()
Initialize the diagnostics facility.
 
LengthEncodedString::Ptr lengthEncodedPrintableAscii(size_t lengthSize, ByteOrder::Endianness order=ByteOrder::ORDER_UNSPECIFIED)
Returns a new encoder for length-encoded printable ASCII strings.
 
Sawyer::Message::Facility mlog
Diagnostics specific to string analysis.
 
uint8_t Octet
One byte in a sequence that encodes a code value.
 
std::vector< Octet > Octets
A sequence of octets.
 
std::vector< CodeValue > CodeValues
A sequence of code values.
 
unsigned CodeValue
One value in a sequence that encodes a code point.
 
LengthEncodedString::Ptr lengthEncodedPrintableAsciiWide(size_t lengthSize, ByteOrder::Endianness order, size_t charSize)
Returns a new encoder for multi-byte length-encoded printable ASCII strings.
 
BasicLengthEncodingScheme::Ptr basicLengthEncodingScheme(size_t octetsPerValue, ByteOrder::Endianness sex=ByteOrder::ORDER_UNSPECIFIED)
Returns a new basic length encoding scheme.
 
unsigned CodePoint
One character in a coded character set.
 
TerminatedString::Ptr nulTerminatedPrintableAsciiWide(size_t charSize, ByteOrder::Endianness order)
Returns a new encoder for multi-byte NUL-terminated printable ASCII strings.
 
NoopCharacterEncodingForm::Ptr noopCharacterEncodingForm()
Returns a new no-op character encoding form.
 
LengthEncodedString::Ptr lengthEncodedString(const LengthEncodingScheme::Ptr &les, const CharacterEncodingForm::Ptr &cef, const CharacterEncodingScheme::Ptr &ces, const CodePointPredicate::Ptr &cpp)
Returns a new length-prefixed string encoder.
 
std::uint64_t Address
Address.
 
unsigned MatchFlags
Flags for matching constraints.
 
size_t maxOverlap
Whether to allow overlapping strings.
 
size_t maxLength
Maximum length of matched strings.
 
bool keepingOnlyLongest
Whether to keep only longest non-overlapping strings.
 
size_t minLength
Minimum length of matched strings.