1#ifndef ROSE_BinaryAnalysis_String_H
2#define ROSE_BinaryAnalysis_String_H
3#include <featureTests.h>
4#ifdef ROSE_ENABLE_BINARY_ANALYSIS
6#include <Rose/Diagnostics.h>
7#include <Rose/BinaryAnalysis/MemoryMap.h>
8#include <Rose/Exception.h>
9#include <Sawyer/CommandLine.h>
10#include <Sawyer/Optional.h>
13namespace BinaryAnalysis {
235 virtual std::string
name()
const = 0;
273 virtual std::string
name()
const override {
return "no-op"; }
296 virtual std::string
name()
const override {
return "UTF-8"; }
319 virtual std::string
name()
const override {
return "UTF-16"; }
348 virtual std::string
name()
const = 0;
378 size_t octetsPerValue_;
383 : octetsPerValue_(octetsPerValue), sex_(sex), cv_(0) {
384 ASSERT_require(1==octetsPerValue || sex!=ByteOrder::ORDER_UNSPECIFIED);
385 ASSERT_require(octetsPerValue <=
sizeof(
CodeValue));
394 virtual std::string
name()
const override;
423 virtual std::string
name()
const = 0;
452 size_t octetsPerValue_;
457 : octetsPerValue_(octetsPerValue), sex_(sex), length_(0) {
458 ASSERT_require(1==octetsPerValue || sex!=ByteOrder::ORDER_UNSPECIFIED);
459 ASSERT_require(octetsPerValue <=
sizeof(
size_t));
468 virtual std::string
name()
const override;
490 virtual std::string
name()
const = 0;
504 static Ptr instance() {
507 virtual std::string
name()
const override {
return "printable ASCII"; }
522 virtual std::string
name()
const override {
return "any code point"; }
537 size_t nCodePoints_ = 0;
547 : cef_(cef), ces_(ces), cpp_(cpp) {}
556 virtual std::string
name()
const = 0;
585 size_t length()
const {
return nCodePoints_; }
650 inst->state_ = state_;
651 inst->codePoints_ = codePoints_;
652 inst->nCodePoints_ = nCodePoints_;
653 inst->declaredLength_ = declaredLength_;
656 virtual std::string
name()
const override;
718 inst->state_ = state_;
719 inst->codePoints_ = codePoints_;
720 inst->nCodePoints_ = nCodePoints_;
721 inst->terminated_ = terminated_;
724 virtual std::string
name()
const override;
763 : encoder_(encoder), where_(where) {}
778 size_t length()
const {
return encoder_->length(); }
847 Settings(): minLength(5), maxLength(-1), maxOverlap(8), keepingOnlyLongest(true) {}
852 bool discardingCodePoints_;
853 std::vector<StringEncodingScheme::Ptr> encoders_;
854 std::vector<EncodedString> strings_;
887 const std::vector<StringEncodingScheme::Ptr>&
encoders()
const {
return encoders_; }
888 std::vector<StringEncodingScheme::Ptr>&
encoders() {
return encoders_; }
964 const std::vector<EncodedString>&
strings()
const {
return strings_; }
965 std::vector<EncodedString>&
strings() {
return strings_; }
971 std::ostream&
print(std::ostream&)
const;
974std::ostream& operator<<(std::ostream&,
const StringFinder&);
An efficient mapping from an address space to stored data.
virtual std::string name() const override
Name of predicate.
virtual bool isValid(CodePoint) override
Predicate.
Basic character encoding scheme.
virtual CodeValue consume() override
Consume a decoded code value.
virtual Ptr clone() const override
Create a new copy of this encoder.
virtual std::string name() const override
Name of encoder.
virtual State decode(Octet) override
Decode one octet.
virtual Octets encode(CodeValue) override
Encode a code value into a sequence of octets.
virtual void reset() override
Reset the decoder state machine.
Basic length encoding scheme.
virtual std::string name() const override
Name of encoder.
virtual void reset() override
Reset the decoder state machine.
virtual Ptr clone() const override
Create a new copy of this encoder.
virtual size_t consume() override
Consume a decoded length.
virtual Octets encode(size_t) override
Encode a length into a sequence of octets.
virtual State decode(Octet) override
Decode one octet.
Defines the mapping between code values and octets.
virtual State decode(Octet)=0
Decode one octet.
State state() const
Decoder state.
virtual void reset()=0
Reset the decoder state machine.
virtual std::string name() const =0
Name of encoder.
Sawyer::SharedPointer< CharacterEncodingScheme > Ptr
Shared ownership pointer to a CharacterEncodingScheme.
virtual CodeValue consume()=0
Consume a decoded code value.
virtual Octets encode(CodeValue)=0
Encode a code value into a sequence of octets.
virtual Ptr clone() const =0
Create a new copy of this encoder.
Valid code point predicate.
virtual std::string name() const =0
Name of predicate.
Sawyer::SharedPointer< CodePointPredicate > Ptr
Shared ownership pointer to a CodePointPredicate.
virtual bool isValid(CodePoint)=0
Predicate.
An encoder plus interval.
const AddressInterval & where() const
Where the string is located in memory.
StringEncodingScheme::Ptr encoder() const
Information about the string encoding.
size_t length() const
Length of encoded string in code points.
const CodePoints & codePoints() const
Code points associated with the string.
std::string narrow() const
Return code points as a C++ std::string.
size_t size() const
Size of encoded string in bytes.
std::wstring wide() const
Return code points as a C++ std::wstring.
rose_addr_t address() const
Starting address of string in memory.
void decode(const MemoryMap &)
Decodes the string from memory.
Errors for string analysis.
Length-prefixed string encoding scheme.
void lengthEncodingScheme(const LengthEncodingScheme::Ptr &les)
Property: Lengh encoding scheme.
virtual State decode(Octet) override
Decode one octet.
Sawyer::Optional< size_t > declaredLength() const
Returns the declared length, if any.
LengthEncodingScheme::Ptr lengthEncodingScheme() const
Property: Lengh encoding scheme.
virtual StringEncodingScheme::Ptr clone() const override
Create a new copy of this encoder.
virtual Octets encode(const CodePoints &) override
Encode a string into a sequence of octets.
virtual std::string name() const override
Name of encoding.
virtual void reset() override
Reset the state machine to an initial state.
Sawyer::SharedPointer< LengthEncodedString > Ptr
Shared ownership pointer to a LengthEncodedString.
Encoding for the length of a string.
State state() const
Decoder state.
virtual void reset()=0
Reset the decoder state machine.
virtual State decode(Octet)=0
Decode one octet.
virtual std::string name() const =0
Name of encoder.
Sawyer::SharedPointer< LengthEncodingScheme > Ptr
Shared ownership pointer to a LengthEncodingScheme.
virtual size_t consume()=0
Consume a decoded length.
virtual Octets encode(size_t)=0
Encode a length into a sequence of octets.
virtual Ptr clone() const =0
Create a new copy of this encoder.
virtual bool isValid(CodePoint) override
Predicate.
virtual std::string name() const override
Name of predicate.
virtual Ptr clone() const =0
Create a new copy of this encoder.
virtual void reset()
Reset the state machine to an initial state.
CharacterEncodingForm::Ptr characterEncodingForm() const
Property: Character encoding format.
void characterEncodingScheme(const CharacterEncodingScheme::Ptr &ces)
Property: Character encoding scheme.
virtual std::string name() const =0
Name of encoding.
virtual Octets encode(const CodePoints &)=0
Encode a string into a sequence of octets.
void codePointPredicate(const CodePointPredicate::Ptr &cpp)
Property: Code point predicate.
CodePoints consume()
Consume pending decoded code points.
CharacterEncodingScheme::Ptr characterEncodingScheme() const
Property: Character encoding scheme.
void characterEncodingForm(const CharacterEncodingForm::Ptr &cef)
Property: Character encoding format.
size_t length() const
Number of code points decoded since reset.
const CodePoints & codePoints() const
Return pending decoded code points without consuming them.
virtual State decode(Octet)=0
Decode one octet.
State state() const
Decoder state.
CodePointPredicate::Ptr codePointPredicate() const
Property: Code point predicate.
Sawyer::SharedPointer< StringEncodingScheme > Ptr
Shared ownership pointer to a StringEncodingScheme.
Analysis to find encoded strings.
Settings & settings()
Property: Analysis settings often set from a command-line.
std::vector< EncodedString > & strings()
Obtain strings that were found.
const std::vector< EncodedString > & strings() const
Obtain strings that were found.
StringFinder & discardingCodePoints(bool b)
Property: Whether to discard code points.
StringFinder & insertCommonEncoders(ByteOrder::Endianness)
Inserts common encodings.
StringFinder()
Constructor.
const std::vector< StringEncodingScheme::Ptr > & encoders() const
Property: List of string encodings.
static Sawyer::CommandLine::SwitchGroup commandLineSwitches(Settings &)
Command-line parser for analysis settings.
const Settings & settings() const
Property: Analysis settings often set from a command-line.
std::ostream & print(std::ostream &) const
Print results.
StringFinder & find(const MemoryMap::ConstConstraints &, Sawyer::Container::MatchFlags flags=0)
Finds strings by searching memory.
StringFinder & insertUncommonEncoders(ByteOrder::Endianness)
Inserts less common encodings.
Sawyer::CommandLine::SwitchGroup commandLineSwitches()
Command-line parser for analysis settings.
bool discardingCodePoints() const
Property: Whether to discard code points.
std::vector< StringEncodingScheme::Ptr > & encoders()
Property: List of string encodings.
StringFinder & reset()
Reset analysis results.
Terminated string encoding scheme.
Sawyer::Optional< CodePoint > terminated() const
Returns the decoded termination character, if any.
virtual Octets encode(const CodePoints &) override
Encode a string into a sequence of octets.
virtual std::string name() const override
Name of encoding.
const CodePoints & terminators() const
Property: string termination code points.
Sawyer::SharedPointer< TerminatedString > Ptr
Shared ownership pointer to a TerminatedString.
CodePoints & terminators()
Property: string termination code points.
virtual State decode(Octet) override
Decode one octet.
virtual void reset() override
Reset the state machine to an initial state.
virtual StringEncodingScheme::Ptr clone() const override
Create a new copy of this encoder.
Base class for all ROSE exceptions.
A collection of related switch declarations.
Constraints are used to select addresses from a memory map.
Value size() const
Size of interval.
T least() const
Returns lower limit.
Holds a value or nothing.
Base class for reference counted objects.
Reference-counting intrusive smart pointer.
@ ORDER_UNSPECIFIED
Endianness is unspecified and unknown.
PrintableAscii::Ptr printableAscii()
Returns a new printable ASCII predicate.
Utf8CharacterEncodingForm::Ptr utf8CharacterEncodingForm()
Returns a new UTF-8 character encoding form.
BasicCharacterEncodingScheme::Ptr basicCharacterEncodingScheme(size_t octetsPerValue, ByteOrder::Endianness sex=ByteOrder::ORDER_UNSPECIFIED)
Returns a new basic character encoding scheme.
AnyCodePoint::Ptr anyCodePoint()
Returns a new predicate that matches all code points.
TerminatedString::Ptr nulTerminatedPrintableAscii()
Returns a new encoder for NUL-terminated printable ASCII strings.
Utf16CharacterEncodingForm::Ptr utf16CharacterEncodingForm()
Returns a new UTF-16 character encoding form.
std::vector< CodePoint > CodePoints
A sequence of code points, i.e., a string.
@ USER_DEFINED_0
First user-defined value.
@ COMPLETED_STATE
Completed state, but not a final state.
@ USER_DEFINED_MAX
Maximum user-defined value.
@ INITIAL_STATE
Initial state just after a reset.
@ ERROR_STATE
Decoder is in an error condition.
@ FINAL_STATE
Final state where nothing more can be decoded.
@ USER_DEFINED_2
Third user-defined value.
@ USER_DEFINED_1
Second user-defined value.
bool isDone(State st)
Returns true for COMPLETED_STATE or FINAL_STATE.
void initDiagnostics()
Initialize the diagnostics facility.
LengthEncodedString::Ptr lengthEncodedPrintableAscii(size_t lengthSize, ByteOrder::Endianness order=ByteOrder::ORDER_UNSPECIFIED)
Returns a new encoder for length-encoded printable ASCII strings.
Sawyer::Message::Facility mlog
Diagnostics specific to string analysis.
uint8_t Octet
One byte in a sequence that encodes a code value.
std::vector< Octet > Octets
A sequence of octets.
std::vector< CodeValue > CodeValues
A sequence of code values.
unsigned CodeValue
One value in a sequence that encodes a code point.
LengthEncodedString::Ptr lengthEncodedPrintableAsciiWide(size_t lengthSize, ByteOrder::Endianness order, size_t charSize)
Returns a new encoder for multi-byte length-encoded printable ASCII strings.
BasicLengthEncodingScheme::Ptr basicLengthEncodingScheme(size_t octetsPerValue, ByteOrder::Endianness sex=ByteOrder::ORDER_UNSPECIFIED)
Returns a new basic length encoding scheme.
unsigned CodePoint
One character in a coded character set.
TerminatedString::Ptr nulTerminatedPrintableAsciiWide(size_t charSize, ByteOrder::Endianness order)
Returns a new encoder for multi-byte NUL-terminated printable ASCII strings.
NoopCharacterEncodingForm::Ptr noopCharacterEncodingForm()
Returns a new no-op character encoding form.
LengthEncodedString::Ptr lengthEncodedString(const LengthEncodingScheme::Ptr &les, const CharacterEncodingForm::Ptr &cef, const CharacterEncodingScheme::Ptr &ces, const CodePointPredicate::Ptr &cpp)
Returns a new length-prefixed string encoder.
unsigned MatchFlags
Flags for matching constraints.
size_t maxOverlap
Whether to allow overlapping strings.
size_t maxLength
Maximum length of matched strings.
bool keepingOnlyLongest
Whether to keep only longest non-overlapping strings.
size_t minLength
Minimum length of matched strings.