1 #ifndef ROSE_BinaryAnalysis_String_H
2 #define ROSE_BinaryAnalysis_String_H
3 #include <featureTests.h>
4 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
6 #include <Diagnostics.h>
8 #include <RoseException.h>
9 #include <Sawyer/CommandLine.h>
10 #include <Sawyer/Optional.h>
232 virtual Ptr clone()
const = 0;
235 virtual std::string name()
const = 0;
240 virtual CodeValues encode(CodePoint) = 0;
249 virtual State decode(CodeValue) = 0;
254 virtual CodePoint consume() = 0;
257 virtual void reset() = 0;
273 virtual std::string
name() const ROSE_OVERRIDE {
return "no-op"; }
274 virtual CodeValues encode(CodePoint cp) ROSE_OVERRIDE;
275 virtual State decode(CodeValue) ROSE_OVERRIDE;
276 virtual CodePoint consume() ROSE_OVERRIDE;
277 virtual
void reset() ROSE_OVERRIDE;
296 virtual std::string
name() const ROSE_OVERRIDE {
return "UTF-8"; }
297 virtual CodeValues encode(CodePoint cp) ROSE_OVERRIDE;
298 virtual State decode(CodeValue) ROSE_OVERRIDE;
299 virtual CodePoint consume() ROSE_OVERRIDE;
300 virtual
void reset() ROSE_OVERRIDE;
319 virtual std::string
name() const ROSE_OVERRIDE {
return "UTF-16"; }
320 virtual CodeValues encode(CodePoint cp) ROSE_OVERRIDE;
321 virtual State decode(CodeValue) ROSE_OVERRIDE;
322 virtual CodePoint consume() ROSE_OVERRIDE;
323 virtual
void reset() ROSE_OVERRIDE;
345 virtual Ptr clone()
const = 0;
348 virtual std::string name()
const = 0;
352 virtual Octets encode(CodeValue) = 0;
361 virtual State decode(Octet) = 0;
366 virtual CodeValue consume() = 0;
369 virtual void reset() = 0;
378 size_t octetsPerValue_;
379 ByteOrder::Endianness sex_;
383 : octetsPerValue_(octetsPerValue), sex_(sex), cv_(0) {
384 ASSERT_require(1==octetsPerValue || sex!=ByteOrder::ORDER_UNSPECIFIED);
385 ASSERT_require(octetsPerValue <=
sizeof(CodeValue));
388 static Ptr instance(
size_t octetsPerValue, ByteOrder::Endianness sex = ByteOrder::ORDER_UNSPECIFIED) {
391 virtual Ptr
clone() const ROSE_OVERRIDE {
394 virtual std::string name() const ROSE_OVERRIDE;
395 virtual Octets encode(CodeValue) ROSE_OVERRIDE;
396 virtual
State decode(Octet) ROSE_OVERRIDE;
397 virtual CodeValue consume() ROSE_OVERRIDE;
398 virtual
void reset() ROSE_OVERRIDE;
420 virtual Ptr clone()
const = 0;
423 virtual std::string name()
const = 0;
426 virtual Octets encode(
size_t) = 0;
435 virtual State decode(Octet) = 0;
440 virtual size_t consume() = 0;
443 virtual void reset() = 0;
452 size_t octetsPerValue_;
453 ByteOrder::Endianness sex_;
457 : octetsPerValue_(octetsPerValue), sex_(sex), length_(0) {
458 ASSERT_require(1==octetsPerValue || sex!=ByteOrder::ORDER_UNSPECIFIED);
459 ASSERT_require(octetsPerValue <=
sizeof(
size_t));
462 static Ptr instance(
size_t octetsPerValue, ByteOrder::Endianness sex = ByteOrder::ORDER_UNSPECIFIED) {
465 virtual Ptr
clone() const ROSE_OVERRIDE {
468 virtual std::string name() const ROSE_OVERRIDE;
469 virtual Octets encode(
size_t) ROSE_OVERRIDE;
470 virtual
State decode(Octet) ROSE_OVERRIDE;
471 virtual
size_t consume() ROSE_OVERRIDE;
472 virtual
void reset() ROSE_OVERRIDE;
490 virtual std::string name()
const = 0;
493 virtual bool isValid(CodePoint) = 0;
504 static Ptr instance() {
507 virtual std::string
name() const ROSE_OVERRIDE {
return "printable ASCII"; }
508 virtual bool isValid(CodePoint) ROSE_OVERRIDE;
522 virtual std::string
name() const ROSE_OVERRIDE {
return "any code point"; }
523 virtual bool isValid(CodePoint) ROSE_OVERRIDE {
return true; }
536 CodePoints codePoints_;
547 : cef_(cef), ces_(ces), cpp_(cpp) {}
556 virtual std::string name()
const = 0;
559 virtual Ptr clone()
const = 0;
562 virtual Octets encode(
const CodePoints&) = 0;
571 virtual State decode(Octet) = 0;
579 CodePoints consume();
585 size_t length()
const {
return nCodePoints_; }
588 virtual void reset();
650 inst->state_ = state_;
651 inst->codePoints_ = codePoints_;
652 inst->nCodePoints_ = nCodePoints_;
653 inst->declaredLength_ = declaredLength_;
656 virtual std::string name() const ROSE_OVERRIDE;
657 virtual Octets encode(const CodePoints&) ROSE_OVERRIDE;
658 virtual
State decode(Octet) ROSE_OVERRIDE;
659 virtual
void reset() ROSE_OVERRIDE;
666 Sawyer::Optional<
size_t> declaredLength()
const {
return declaredLength_; }
685 ByteOrder::Endianness order = ByteOrder::ORDER_UNSPECIFIED);
695 CodePoints terminators_;
718 inst->state_ = state_;
719 inst->codePoints_ = codePoints_;
720 inst->nCodePoints_ = nCodePoints_;
721 inst->terminated_ = terminated_;
724 virtual std::string name() const ROSE_OVERRIDE;
725 virtual Octets encode(const CodePoints&) ROSE_OVERRIDE;
726 virtual
State decode(Octet) ROSE_OVERRIDE;
727 virtual
void reset() ROSE_OVERRIDE;
733 Sawyer::Optional<CodePoint> terminated()
const {
return terminated_; }
763 : encoder_(encoder), where_(where) {}
778 size_t length()
const {
return encoder_->length(); }
783 const CodePoints&
codePoints()
const {
return encoder_->codePoints(); }
788 std::string narrow()
const;
791 std::wstring wide()
const;
847 Settings(): minLength(5), maxLength(-1), maxOverlap(8), keepingOnlyLongest(true) {}
852 bool discardingCodePoints_;
853 std::vector<StringEncodingScheme::Ptr> encoders_;
854 std::vector<EncodedString> strings_;
887 const std::vector<StringEncodingScheme::Ptr>&
encoders()
const {
return encoders_; }
888 std::vector<StringEncodingScheme::Ptr>&
encoders() {
return encoders_; }
917 StringFinder& insertCommonEncoders(ByteOrder::Endianness);
924 StringFinder& insertUncommonEncoders(ByteOrder::Endianness);
964 const std::vector<EncodedString>&
strings()
const {
return strings_; }
965 std::vector<EncodedString>&
strings() {
return strings_; }
971 std::ostream& print(std::ostream&)
const;
974 std::ostream& operator<<(std::ostream&,
const StringFinder&);
PrintableAscii::Ptr printableAscii()
Returns a new printable ASCII predicate.
unsigned MatchFlags
Flags for matching constraints.
CodePointPredicate::Ptr codePointPredicate() const
Property: Code point predicate.
size_t length() const
Number of code points decoded since reset.
void characterEncodingScheme(const CharacterEncodingScheme::Ptr &ces)
Property: Character encoding scheme.
virtual Ptr clone() const ROSE_OVERRIDE
Create a new copy of this encoder.
Value size() const
Size of interval.
std::vector< CodePoint > CodePoints
A sequence of code points, i.e., a string.
LengthEncodedString::Ptr lengthEncodedPrintableAsciiWide(size_t lengthSize, ByteOrder::Endianness order, size_t charSize)
Returns a new encoder for multi-byte length-encoded printable ASCII strings.
size_t maxOverlap
Whether to allow overlapping strings.
Settings & settings()
Property: Analysis settings often set from a command-line.
Defines the mapping between code values and octets.
size_t minLength
Minimum length of matched strings.
BasicLengthEncodingScheme::Ptr basicLengthEncodingScheme(size_t octetsPerValue, ByteOrder::Endianness sex=ByteOrder::ORDER_UNSPECIFIED)
Returns a new basic length encoding scheme.
StringFinder()
Constructor.
void codePointPredicate(const CodePointPredicate::Ptr &cpp)
Property: Code point predicate.
void initDiagnostics()
Initialize the diagnostics facility.
const AddressInterval & where() const
Where the string is located in memory.
const Settings & settings() const
Property: Analysis settings often set from a command-line.
LengthEncodingScheme::Ptr lengthEncodingScheme() const
Property: Lengh encoding scheme.
Initial state just after a reset.
Terminated string encoding scheme.
Sawyer::SharedPointer< TerminatedString > Ptr
Shared ownership pointer to a TerminatedString.
Maximum user-defined value.
const CodePoints & terminators() const
Property: string termination code points.
A collection of related switch declarations.
const CodePoints & codePoints() const
Return pending decoded code points without consuming them.
Analysis to find encoded strings.
unsigned CodePoint
One character in a coded character set.
bool keepingOnlyLongest
Whether to keep only longest non-overlapping strings.
Main namespace for the ROSE library.
BasicCharacterEncodingScheme::Ptr basicCharacterEncodingScheme(size_t octetsPerValue, ByteOrder::Endianness sex=ByteOrder::ORDER_UNSPECIFIED)
Returns a new basic character encoding scheme.
State state() const
Decoder state.
Sawyer::SharedPointer< CharacterEncodingScheme > Ptr
Shared ownership pointer to a CharacterEncodingScheme.
const std::vector< StringEncodingScheme::Ptr > & encoders() const
Property: List of string encodings.
StringFinder & reset()
Reset analysis results.
State state() const
Decoder state.
unsigned CodeValue
One value in a sequence that encodes a code point.
Name space for the entire library.
Sawyer::Message::Facility mlog
Diagnostics specific to string analysis.
std::vector< Octet > Octets
A sequence of octets.
Valid code point predicate.
State state() const
Decoder state.
size_t size() const
Size of encoded string in bytes.
virtual bool isValid(CodePoint) ROSE_OVERRIDE
Predicate.
StringFinder & discardingCodePoints(bool b)
Property: Whether to discard code points.
uint8_t Octet
One byte in a sequence that encodes a code value.
std::vector< CodeValue > CodeValues
A sequence of code values.
T least() const
Returns lower limit.
bool discardingCodePoints() const
Property: Whether to discard code points.
std::vector< EncodedString > & strings()
Obtain strings that were found.
size_t maxLength
Maximum length of matched strings.
Final state where nothing more can be decoded.
An encoder plus interval.
const std::vector< EncodedString > & strings() const
Obtain strings that were found.
First user-defined value.
Encoding for the length of a string.
void characterEncodingForm(const CharacterEncodingForm::Ptr &cef)
Property: Character encoding format.
An efficient mapping from an address space to stored data.
virtual StringEncodingScheme::Ptr clone() const ROSE_OVERRIDE
Create a new copy of this encoder.
const CodePoints & codePoints() const
Code points associated with the string.
AnyCodePoint::Ptr anyCodePoint()
Returns a new predicate that matches all code points.
Completed state, but not a final state.
virtual StringEncodingScheme::Ptr clone() const ROSE_OVERRIDE
Create a new copy of this encoder.
Sawyer::SharedPointer< StringEncodingScheme > Ptr
Shared ownership pointer to a StringEncodingScheme.
TerminatedString::Ptr nulTerminatedPrintableAscii()
Returns a new encoder for NUL-terminated printable ASCII strings.
Constraints are used to select addresses from a memory map.
LengthEncodedString::Ptr lengthEncodedString(const LengthEncodingScheme::Ptr &les, const CharacterEncodingForm::Ptr &cef, const CharacterEncodingScheme::Ptr &ces, const CodePointPredicate::Ptr &cpp)
Returns a new length-prefixed string encoder.
CharacterEncodingScheme::Ptr characterEncodingScheme() const
Property: Character encoding scheme.
NoopCharacterEncodingForm::Ptr noopCharacterEncodingForm()
Returns a new no-op character encoding form.
size_t length() const
Length of encoded string in code points.
LengthEncodedString::Ptr lengthEncodedPrintableAscii(size_t lengthSize, ByteOrder::Endianness order=ByteOrder::ORDER_UNSPECIFIED)
Returns a new encoder for length-encoded printable ASCII strings.
Sawyer::SharedPointer< LengthEncodingScheme > Ptr
Shared ownership pointer to a LengthEncodingScheme.
Sawyer::SharedPointer< CodePointPredicate > Ptr
Shared ownership pointer to a CodePointPredicate.
Base class for reference counted objects.
TerminatedString::Ptr nulTerminatedPrintableAsciiWide(size_t charSize, ByteOrder::Endianness order)
Returns a new encoder for multi-byte NUL-terminated printable ASCII strings.
rose_addr_t address() const
Starting address of string in memory.
CharacterEncodingForm::Ptr characterEncodingForm() const
Property: Character encoding format.
Third user-defined value.
Errors for string analysis.
Basic length encoding scheme.
Second user-defined value.
Sawyer::SharedPointer< LengthEncodedString > Ptr
Shared ownership pointer to a LengthEncodedString.
CodePoints & terminators()
Property: string termination code points.
void lengthEncodingScheme(const LengthEncodingScheme::Ptr &les)
Property: Lengh encoding scheme.
Utf8CharacterEncodingForm::Ptr utf8CharacterEncodingForm()
Returns a new UTF-8 character encoding form.
Decoder is in an error condition.
Length-prefixed string encoding scheme.
std::vector< StringEncodingScheme::Ptr > & encoders()
Property: List of string encodings.
virtual std::string name() const ROSE_OVERRIDE
Name of predicate.
virtual std::string name() const ROSE_OVERRIDE
Name of predicate.
Base class for all ROSE exceptions.
virtual Ptr clone() const ROSE_OVERRIDE
Create a new copy of this encoder.
bool isDone(State st)
Returns true for COMPLETED_STATE or FINAL_STATE.
Basic character encoding scheme.
Utf16CharacterEncodingForm::Ptr utf16CharacterEncodingForm()
Returns a new UTF-16 character encoding form.
StringEncodingScheme::Ptr encoder() const
Information about the string encoding.