ROSE 0.11.145.192
String.h
1#ifndef ROSE_BinaryAnalysis_String_H
2#define ROSE_BinaryAnalysis_String_H
3#include <featureTests.h>
4#ifdef ROSE_ENABLE_BINARY_ANALYSIS
5
6#include <Rose/Diagnostics.h>
7#include <Rose/BinaryAnalysis/MemoryMap.h>
8#include <Rose/Exception.h>
9#include <Sawyer/CommandLine.h>
10#include <Sawyer/Optional.h>
11
12namespace Rose {
13namespace BinaryAnalysis {
14
167namespace Strings {
168
171
172typedef uint8_t Octet;
173typedef std::vector<Octet> Octets;
174typedef unsigned CodeValue;
175typedef std::vector<CodeValue> CodeValues;
176typedef unsigned CodePoint;
177typedef std::vector<CodePoint> CodePoints;
181public:
182 Exception(const std::string &s): Rose::Exception(s) {}
183};
184
208
210bool isDone(State st);
211
214
221class ROSE_DLL_API CharacterEncodingForm: public Sawyer::SharedObject {
222protected:
223 State state_ = INITIAL_STATE;
224public:
226 virtual ~CharacterEncodingForm() {}
227
230
232 virtual Ptr clone() const = 0;
233
235 virtual std::string name() const = 0;
236
241
243 State state() const { return state_; }
244
249 virtual State decode(CodeValue) = 0;
250
254 virtual CodePoint consume() = 0;
255
257 virtual void reset() = 0;
258};
259
264 CodePoint cp_;
265protected:
266 NoopCharacterEncodingForm(): cp_(0) {}
267public:
270
271 static Ptr instance() { return Ptr(new NoopCharacterEncodingForm); }
272 virtual CharacterEncodingForm::Ptr clone() const override { return Ptr(new NoopCharacterEncodingForm(*this)); }
273 virtual std::string name() const override { return "no-op"; }
274 virtual CodeValues encode(CodePoint cp) override;
275 virtual State decode(CodeValue) override;
276 virtual CodePoint consume() override;
277 virtual void reset() override;
278};
279
282
287 CodePoint cp_;
288protected:
289 Utf8CharacterEncodingForm(): cp_(0) {}
290public:
293
294 static Ptr instance() { return Ptr(new Utf8CharacterEncodingForm); }
295 virtual CharacterEncodingForm::Ptr clone() const override { return Ptr(new Utf8CharacterEncodingForm(*this)); }
296 virtual std::string name() const override { return "UTF-8"; }
297 virtual CodeValues encode(CodePoint cp) override;
298 virtual State decode(CodeValue) override;
299 virtual CodePoint consume() override;
300 virtual void reset() override;
301};
302
305
310 CodePoint cp_;
311protected:
312 Utf16CharacterEncodingForm(): cp_(0) {}
313public:
316
317 static Ptr instance() { return Ptr(new Utf16CharacterEncodingForm); }
318 virtual CharacterEncodingForm::Ptr clone() const override { return Ptr(new Utf16CharacterEncodingForm(*this)); }
319 virtual std::string name() const override { return "UTF-16"; }
320 virtual CodeValues encode(CodePoint cp) override;
321 virtual State decode(CodeValue) override;
322 virtual CodePoint consume() override;
323 virtual void reset() override;
324};
325
328
335protected:
336 State state_ = INITIAL_STATE;
337public:
339 virtual ~CharacterEncodingScheme() {}
340
343
345 virtual Ptr clone() const = 0;
346
348 virtual std::string name() const = 0;
349
352 virtual Octets encode(CodeValue) = 0;
353
355 State state() const { return state_; }
356
361 virtual State decode(Octet) = 0;
362
366 virtual CodeValue consume() = 0;
367
369 virtual void reset() = 0;
370};
371
378 size_t octetsPerValue_;
380 CodeValue cv_;
381protected:
382 BasicCharacterEncodingScheme(size_t octetsPerValue, ByteOrder::Endianness sex)
383 : octetsPerValue_(octetsPerValue), sex_(sex), cv_(0) {
384 ASSERT_require(1==octetsPerValue || sex!=ByteOrder::ORDER_UNSPECIFIED);
385 ASSERT_require(octetsPerValue <= sizeof(CodeValue));
386 }
387public:
388 static Ptr instance(size_t octetsPerValue, ByteOrder::Endianness sex = ByteOrder::ORDER_UNSPECIFIED) {
389 return Ptr(new BasicCharacterEncodingScheme(octetsPerValue, sex));
390 }
391 virtual Ptr clone() const override {
392 return Ptr(new BasicCharacterEncodingScheme(*this));
393 }
394 virtual std::string name() const override;
395 virtual Octets encode(CodeValue) override;
396 virtual State decode(Octet) override;
397 virtual CodeValue consume() override;
398 virtual void reset() override;
399};
400
404
409class ROSE_DLL_API LengthEncodingScheme: public Sawyer::SharedObject {
410protected:
411 State state_ = INITIAL_STATE;
412public:
414 virtual ~LengthEncodingScheme() {}
415
418
420 virtual Ptr clone() const = 0;
421
423 virtual std::string name() const = 0;
424
426 virtual Octets encode(size_t) = 0;
427
429 State state() const { return state_; }
430
435 virtual State decode(Octet) = 0;
436
440 virtual size_t consume() = 0;
441
443 virtual void reset() = 0;
444};
445
452 size_t octetsPerValue_;
454 size_t length_;
455protected:
456 BasicLengthEncodingScheme(size_t octetsPerValue, ByteOrder::Endianness sex)
457 : octetsPerValue_(octetsPerValue), sex_(sex), length_(0) {
458 ASSERT_require(1==octetsPerValue || sex!=ByteOrder::ORDER_UNSPECIFIED);
459 ASSERT_require(octetsPerValue <= sizeof(size_t));
460 }
461public:
462 static Ptr instance(size_t octetsPerValue, ByteOrder::Endianness sex = ByteOrder::ORDER_UNSPECIFIED) {
463 return Ptr(new BasicLengthEncodingScheme(octetsPerValue, sex));
464 }
465 virtual Ptr clone() const override {
466 return Ptr(new BasicLengthEncodingScheme(*this));
467 }
468 virtual std::string name() const override;
469 virtual Octets encode(size_t) override;
470 virtual State decode(Octet) override;
471 virtual size_t consume() override;
472 virtual void reset() override;
473};
474
478
482class ROSE_DLL_API CodePointPredicate: public Sawyer::SharedObject {
483public:
484 virtual ~CodePointPredicate() {}
485
488
490 virtual std::string name() const = 0;
491
493 virtual bool isValid(CodePoint) = 0;
494};
495
500class ROSE_DLL_API PrintableAscii: public CodePointPredicate {
501protected:
502 PrintableAscii() {}
503public:
504 static Ptr instance() {
505 return Ptr(new PrintableAscii);
506 }
507 virtual std::string name() const override { return "printable ASCII"; }
508 virtual bool isValid(CodePoint) override;
509};
510
513
517class ROSE_DLL_API AnyCodePoint: public CodePointPredicate {
518protected:
519 AnyCodePoint() {}
520public:
521 static Ptr instance() { return Ptr(new AnyCodePoint); }
522 virtual std::string name() const override { return "any code point"; }
523 virtual bool isValid(CodePoint) override { return true; }
524};
525
528
533class ROSE_DLL_API StringEncodingScheme: public Sawyer::SharedObject {
534protected:
535 State state_ = INITIAL_STATE; // decoding state
536 CodePoints codePoints_; // unconsumed code points
537 size_t nCodePoints_ = 0; // number of code points decoded since reset
541
542protected:
544
546 const CodePointPredicate::Ptr &cpp)
547 : cef_(cef), ces_(ces), cpp_(cpp) {}
548
549public:
550 virtual ~StringEncodingScheme() {}
551
554
556 virtual std::string name() const = 0;
557
559 virtual Ptr clone() const = 0;
560
562 virtual Octets encode(const CodePoints&) = 0;
563
565 State state() const { return state_; }
566
571 virtual State decode(Octet) = 0;
572
580
582 const CodePoints& codePoints() const { return codePoints_; }
583
585 size_t length() const { return nCodePoints_; }
586
588 virtual void reset();
589
599 void characterEncodingForm(const CharacterEncodingForm::Ptr &cef) { cef_ = cef; }
622 void codePointPredicate(const CodePointPredicate::Ptr &cpp) { cpp_ = cpp; }
624};
625
629class ROSE_DLL_API LengthEncodedString: public StringEncodingScheme {
631 Sawyer::Optional<size_t> declaredLength_; // decoded length
632protected:
635 : StringEncodingScheme(cef, ces, cpp), les_(les) {}
636public:
639
640 static Ptr instance(const LengthEncodingScheme::Ptr &les, const CharacterEncodingForm::Ptr &cef,
642 return Ptr(new LengthEncodedString(les, cef, ces, cpp));
643 }
644 virtual StringEncodingScheme::Ptr clone() const override {
645 LengthEncodingScheme::Ptr les = les_->clone();
646 CharacterEncodingForm::Ptr cef = cef_->clone();
647 CharacterEncodingScheme::Ptr ces = ces_->clone();
648 CodePointPredicate::Ptr cpp = cpp_; // not cloned since they have no state
649 LengthEncodedString *inst = new LengthEncodedString(les, cef, ces, cpp);
650 inst->state_ = state_;
651 inst->codePoints_ = codePoints_;
652 inst->nCodePoints_ = nCodePoints_;
653 inst->declaredLength_ = declaredLength_;
654 return Ptr(inst);
655 }
656 virtual std::string name() const override;
657 virtual Octets encode(const CodePoints&) override;
658 virtual State decode(Octet) override;
659 virtual void reset() override;
660
666 Sawyer::Optional<size_t> declaredLength() const { return declaredLength_; }
667
674 void lengthEncodingScheme(const LengthEncodingScheme::Ptr &les) { les_ = les; }
676};
677
681
686
689
694class ROSE_DLL_API TerminatedString: public StringEncodingScheme {
695 CodePoints terminators_;
696 Sawyer::Optional<CodePoint> terminated_; // decoded termination
697protected:
699 const CodePointPredicate::Ptr &cpp, const CodePoints &terminators)
700 : StringEncodingScheme(cef, ces, cpp), terminators_(terminators) {}
701public:
704
705 static Ptr instance(const CharacterEncodingForm::Ptr &cef, const CharacterEncodingScheme::Ptr &ces,
706 const CodePointPredicate::Ptr &cpp, const CodePoints &terminators) {
707 return Ptr(new TerminatedString(cef, ces, cpp, terminators));
708 }
709 static Ptr instance(const CharacterEncodingForm::Ptr &cef, const CharacterEncodingScheme::Ptr &ces,
710 const CodePointPredicate::Ptr &cpp, CodePoint terminator = 0) {
711 return Ptr(new TerminatedString(cef, ces, cpp, CodePoints(1, terminator)));
712 }
713 virtual StringEncodingScheme::Ptr clone() const override {
714 CharacterEncodingForm::Ptr cef = cef_->clone();
715 CharacterEncodingScheme::Ptr ces = ces_->clone();
716 CodePointPredicate::Ptr cpp = cpp_; // not cloned since they have no state
717 TerminatedString *inst = new TerminatedString(cef, ces, cpp, terminators_);
718 inst->state_ = state_;
719 inst->codePoints_ = codePoints_;
720 inst->nCodePoints_ = nCodePoints_;
721 inst->terminated_ = terminated_;
722 return Ptr(inst);
723 }
724 virtual std::string name() const override;
725 virtual Octets encode(const CodePoints&) override;
726 virtual State decode(Octet) override;
727 virtual void reset() override;
728
733 Sawyer::Optional<CodePoint> terminated() const { return terminated_; }
734
742 const CodePoints& terminators() const { return terminators_; }
743 CodePoints& terminators() { return terminators_; }
745};
746
749
752
757class ROSE_DLL_API EncodedString {
758 StringEncodingScheme::Ptr encoder_; // how string is encoded
759 AddressInterval where_; // where encoded string is located
760public:
761 EncodedString() {}
762 EncodedString(const StringEncodingScheme::Ptr &encoder, const AddressInterval &where)
763 : encoder_(encoder), where_(where) {}
764
766 StringEncodingScheme::Ptr encoder() const { return encoder_; }
767
769 const AddressInterval& where() const { return where_; }
770
772 rose_addr_t address() const { return where_.least(); }
773
775 size_t size() const { return where_.size(); }
776
778 size_t length() const { return encoder_->length(); }
779
783 const CodePoints& codePoints() const { return encoder_->codePoints(); }
784
788 std::string narrow() const;
789
791 std::wstring wide() const;
792
797 void decode(const MemoryMap&);
798};
799
806class ROSE_DLL_API StringFinder {
807public:
811 struct Settings {
816 size_t minLength;
817
822 size_t maxLength;
823
840
846
847 Settings(): minLength(5), maxLength(-1), maxOverlap(8), keepingOnlyLongest(true) {}
848 };
849
850private:
851 Settings settings_; // command-line settings for this analysis
852 bool discardingCodePoints_; // whether to store decoded code points
853 std::vector<StringEncodingScheme::Ptr> encoders_; // encodings to use when searching
854 std::vector<EncodedString> strings_; // strings that have been found
855
856public:
861 StringFinder(): discardingCodePoints_(false) {}
862
866 const Settings& settings() const { return settings_; }
867 Settings& settings() { return settings_; }
877 bool discardingCodePoints() const { return discardingCodePoints_; }
878 StringFinder& discardingCodePoints(bool b) { discardingCodePoints_=b; return *this; }
887 const std::vector<StringEncodingScheme::Ptr>& encoders() const { return encoders_; }
888 std::vector<StringEncodingScheme::Ptr>& encoders() { return encoders_; }
918
925
929 StringFinder& reset() { strings_.clear(); return *this; }
930
960
964 const std::vector<EncodedString>& strings() const { return strings_; }
965 std::vector<EncodedString>& strings() { return strings_; }
971 std::ostream& print(std::ostream&) const;
972};
973
974std::ostream& operator<<(std::ostream&, const StringFinder&);
975
976} // namespace
977} // namespace
978} // namespace
979
980#endif
981#endif
An efficient mapping from an address space to stored data.
Definition MemoryMap.h:115
virtual std::string name() const override
Name of predicate.
Definition String.h:522
virtual bool isValid(CodePoint) override
Predicate.
Definition String.h:523
virtual CodeValue consume() override
Consume a decoded code value.
virtual Ptr clone() const override
Create a new copy of this encoder.
Definition String.h:391
virtual std::string name() const override
Name of encoder.
virtual State decode(Octet) override
Decode one octet.
virtual Octets encode(CodeValue) override
Encode a code value into a sequence of octets.
virtual void reset() override
Reset the decoder state machine.
virtual std::string name() const override
Name of encoder.
virtual void reset() override
Reset the decoder state machine.
virtual Ptr clone() const override
Create a new copy of this encoder.
Definition String.h:465
virtual size_t consume() override
Consume a decoded length.
virtual Octets encode(size_t) override
Encode a length into a sequence of octets.
virtual State decode(Octet) override
Decode one octet.
Defines mapping between code points and code values.
Definition String.h:221
virtual Ptr clone() const =0
Create a new encoder from this one.
virtual void reset()=0
Reset the decoder state machine.
virtual State decode(CodeValue)=0
Decode one code value.
Sawyer::SharedPointer< CharacterEncodingForm > Ptr
Shared ownership pointer to a CharacterEncodingForm.
Definition String.h:229
virtual std::string name() const =0
Name of encoder.
virtual CodeValues encode(CodePoint)=0
Encode a code point into a sequence of one or more code values.
virtual CodePoint consume()=0
Consume a decoded code point.
Defines the mapping between code values and octets.
Definition String.h:334
virtual State decode(Octet)=0
Decode one octet.
virtual void reset()=0
Reset the decoder state machine.
virtual std::string name() const =0
Name of encoder.
Sawyer::SharedPointer< CharacterEncodingScheme > Ptr
Shared ownership pointer to a CharacterEncodingScheme.
Definition String.h:342
virtual CodeValue consume()=0
Consume a decoded code value.
virtual Octets encode(CodeValue)=0
Encode a code value into a sequence of octets.
virtual Ptr clone() const =0
Create a new copy of this encoder.
virtual std::string name() const =0
Name of predicate.
Sawyer::SharedPointer< CodePointPredicate > Ptr
Shared ownership pointer to a CodePointPredicate.
Definition String.h:487
virtual bool isValid(CodePoint)=0
Predicate.
const AddressInterval & where() const
Where the string is located in memory.
Definition String.h:769
StringEncodingScheme::Ptr encoder() const
Information about the string encoding.
Definition String.h:766
size_t length() const
Length of encoded string in code points.
Definition String.h:778
const CodePoints & codePoints() const
Code points associated with the string.
Definition String.h:783
std::string narrow() const
Return code points as a C++ std::string.
size_t size() const
Size of encoded string in bytes.
Definition String.h:775
std::wstring wide() const
Return code points as a C++ std::wstring.
rose_addr_t address() const
Starting address of string in memory.
Definition String.h:772
void decode(const MemoryMap &)
Decodes the string from memory.
Errors for string analysis.
Definition String.h:180
Length-prefixed string encoding scheme.
Definition String.h:629
void lengthEncodingScheme(const LengthEncodingScheme::Ptr &les)
Property: Lengh encoding scheme.
Definition String.h:674
virtual State decode(Octet) override
Decode one octet.
Sawyer::Optional< size_t > declaredLength() const
Returns the declared length, if any.
Definition String.h:666
LengthEncodingScheme::Ptr lengthEncodingScheme() const
Property: Lengh encoding scheme.
Definition String.h:673
virtual StringEncodingScheme::Ptr clone() const override
Create a new copy of this encoder.
Definition String.h:644
virtual Octets encode(const CodePoints &) override
Encode a string into a sequence of octets.
virtual std::string name() const override
Name of encoding.
virtual void reset() override
Reset the state machine to an initial state.
Sawyer::SharedPointer< LengthEncodedString > Ptr
Shared ownership pointer to a LengthEncodedString.
Definition String.h:638
Encoding for the length of a string.
Definition String.h:409
virtual void reset()=0
Reset the decoder state machine.
virtual State decode(Octet)=0
Decode one octet.
virtual std::string name() const =0
Name of encoder.
Sawyer::SharedPointer< LengthEncodingScheme > Ptr
Shared ownership pointer to a LengthEncodingScheme.
Definition String.h:417
virtual size_t consume()=0
Consume a decoded length.
virtual Octets encode(size_t)=0
Encode a length into a sequence of octets.
virtual Ptr clone() const =0
Create a new copy of this encoder.
virtual void reset() override
Reset the decoder state machine.
virtual std::string name() const override
Name of encoder.
Definition String.h:273
virtual CodePoint consume() override
Consume a decoded code point.
virtual CharacterEncodingForm::Ptr clone() const override
Create a new encoder from this one.
Definition String.h:272
Sawyer::SharedPointer< NoopCharacterEncodingForm > Ptr
Shared-ownership pointer to a NoopCharacterEncodingForm.
Definition String.h:269
virtual State decode(CodeValue) override
Decode one code value.
virtual CodeValues encode(CodePoint cp) override
Encode a code point into a sequence of one or more code values.
virtual bool isValid(CodePoint) override
Predicate.
virtual std::string name() const override
Name of predicate.
Definition String.h:507
virtual Ptr clone() const =0
Create a new copy of this encoder.
virtual void reset()
Reset the state machine to an initial state.
CharacterEncodingForm::Ptr characterEncodingForm() const
Property: Character encoding format.
Definition String.h:598
void characterEncodingScheme(const CharacterEncodingScheme::Ptr &ces)
Property: Character encoding scheme.
Definition String.h:611
virtual std::string name() const =0
Name of encoding.
virtual Octets encode(const CodePoints &)=0
Encode a string into a sequence of octets.
void codePointPredicate(const CodePointPredicate::Ptr &cpp)
Property: Code point predicate.
Definition String.h:622
CodePoints consume()
Consume pending decoded code points.
CharacterEncodingScheme::Ptr characterEncodingScheme() const
Property: Character encoding scheme.
Definition String.h:610
void characterEncodingForm(const CharacterEncodingForm::Ptr &cef)
Property: Character encoding format.
Definition String.h:599
size_t length() const
Number of code points decoded since reset.
Definition String.h:585
const CodePoints & codePoints() const
Return pending decoded code points without consuming them.
Definition String.h:582
virtual State decode(Octet)=0
Decode one octet.
CodePointPredicate::Ptr codePointPredicate() const
Property: Code point predicate.
Definition String.h:621
Sawyer::SharedPointer< StringEncodingScheme > Ptr
Shared ownership pointer to a StringEncodingScheme.
Definition String.h:553
Analysis to find encoded strings.
Definition String.h:806
Settings & settings()
Property: Analysis settings often set from a command-line.
Definition String.h:867
std::vector< EncodedString > & strings()
Obtain strings that were found.
Definition String.h:965
const std::vector< EncodedString > & strings() const
Obtain strings that were found.
Definition String.h:964
StringFinder & discardingCodePoints(bool b)
Property: Whether to discard code points.
Definition String.h:878
StringFinder & insertCommonEncoders(ByteOrder::Endianness)
Inserts common encodings.
const std::vector< StringEncodingScheme::Ptr > & encoders() const
Property: List of string encodings.
Definition String.h:887
static Sawyer::CommandLine::SwitchGroup commandLineSwitches(Settings &)
Command-line parser for analysis settings.
const Settings & settings() const
Property: Analysis settings often set from a command-line.
Definition String.h:866
std::ostream & print(std::ostream &) const
Print results.
StringFinder & find(const MemoryMap::ConstConstraints &, Sawyer::Container::MatchFlags flags=0)
Finds strings by searching memory.
StringFinder & insertUncommonEncoders(ByteOrder::Endianness)
Inserts less common encodings.
Sawyer::CommandLine::SwitchGroup commandLineSwitches()
Command-line parser for analysis settings.
bool discardingCodePoints() const
Property: Whether to discard code points.
Definition String.h:877
std::vector< StringEncodingScheme::Ptr > & encoders()
Property: List of string encodings.
Definition String.h:888
StringFinder & reset()
Reset analysis results.
Definition String.h:929
Terminated string encoding scheme.
Definition String.h:694
Sawyer::Optional< CodePoint > terminated() const
Returns the decoded termination character, if any.
Definition String.h:733
virtual Octets encode(const CodePoints &) override
Encode a string into a sequence of octets.
virtual std::string name() const override
Name of encoding.
const CodePoints & terminators() const
Property: string termination code points.
Definition String.h:742
Sawyer::SharedPointer< TerminatedString > Ptr
Shared ownership pointer to a TerminatedString.
Definition String.h:703
CodePoints & terminators()
Property: string termination code points.
Definition String.h:743
virtual State decode(Octet) override
Decode one octet.
virtual void reset() override
Reset the state machine to an initial state.
virtual StringEncodingScheme::Ptr clone() const override
Create a new copy of this encoder.
Definition String.h:713
virtual std::string name() const override
Name of encoder.
Definition String.h:319
virtual CodeValues encode(CodePoint cp) override
Encode a code point into a sequence of one or more code values.
Sawyer::SharedPointer< Utf16CharacterEncodingForm > Ptr
Shared-ownership pointer to a Utf16CharacterEncodingForm.
Definition String.h:315
virtual State decode(CodeValue) override
Decode one code value.
virtual CodePoint consume() override
Consume a decoded code point.
virtual void reset() override
Reset the decoder state machine.
virtual CharacterEncodingForm::Ptr clone() const override
Create a new encoder from this one.
Definition String.h:318
virtual CodePoint consume() override
Consume a decoded code point.
Sawyer::SharedPointer< Utf8CharacterEncodingForm > Ptr
Shared-ownership pointer to a Utf8CharacterEncodingForm.
Definition String.h:292
virtual CharacterEncodingForm::Ptr clone() const override
Create a new encoder from this one.
Definition String.h:295
virtual CodeValues encode(CodePoint cp) override
Encode a code point into a sequence of one or more code values.
virtual std::string name() const override
Name of encoder.
Definition String.h:296
virtual void reset() override
Reset the decoder state machine.
virtual State decode(CodeValue) override
Decode one code value.
Base class for all ROSE exceptions.
A collection of related switch declarations.
Constraints are used to select addresses from a memory map.
Definition AddressMap.h:76
Value size() const
Size of interval.
Definition Interval.h:302
T least() const
Returns lower limit.
Definition Interval.h:218
Collection of streams.
Definition Message.h:1606
Holds a value or nothing.
Definition Optional.h:56
Base class for reference counted objects.
Reference-counting intrusive smart pointer.
@ ORDER_UNSPECIFIED
Endianness is unspecified and unknown.
Definition ByteOrder.h:21
PrintableAscii::Ptr printableAscii()
Returns a new printable ASCII predicate.
Utf8CharacterEncodingForm::Ptr utf8CharacterEncodingForm()
Returns a new UTF-8 character encoding form.
BasicCharacterEncodingScheme::Ptr basicCharacterEncodingScheme(size_t octetsPerValue, ByteOrder::Endianness sex=ByteOrder::ORDER_UNSPECIFIED)
Returns a new basic character encoding scheme.
AnyCodePoint::Ptr anyCodePoint()
Returns a new predicate that matches all code points.
TerminatedString::Ptr nulTerminatedPrintableAscii()
Returns a new encoder for NUL-terminated printable ASCII strings.
Utf16CharacterEncodingForm::Ptr utf16CharacterEncodingForm()
Returns a new UTF-16 character encoding form.
std::vector< CodePoint > CodePoints
A sequence of code points, i.e., a string.
Definition String.h:177
@ USER_DEFINED_0
First user-defined value.
Definition String.h:203
@ COMPLETED_STATE
Completed state, but not a final state.
Definition String.h:200
@ USER_DEFINED_MAX
Maximum user-defined value.
Definition String.h:206
@ INITIAL_STATE
Initial state just after a reset.
Definition String.h:201
@ ERROR_STATE
Decoder is in an error condition.
Definition String.h:202
@ FINAL_STATE
Final state where nothing more can be decoded.
Definition String.h:199
@ USER_DEFINED_2
Third user-defined value.
Definition String.h:205
@ USER_DEFINED_1
Second user-defined value.
Definition String.h:204
bool isDone(State st)
Returns true for COMPLETED_STATE or FINAL_STATE.
void initDiagnostics()
Initialize the diagnostics facility.
LengthEncodedString::Ptr lengthEncodedPrintableAscii(size_t lengthSize, ByteOrder::Endianness order=ByteOrder::ORDER_UNSPECIFIED)
Returns a new encoder for length-encoded printable ASCII strings.
Sawyer::Message::Facility mlog
Diagnostics specific to string analysis.
uint8_t Octet
One byte in a sequence that encodes a code value.
Definition String.h:172
std::vector< Octet > Octets
A sequence of octets.
Definition String.h:173
std::vector< CodeValue > CodeValues
A sequence of code values.
Definition String.h:175
unsigned CodeValue
One value in a sequence that encodes a code point.
Definition String.h:174
LengthEncodedString::Ptr lengthEncodedPrintableAsciiWide(size_t lengthSize, ByteOrder::Endianness order, size_t charSize)
Returns a new encoder for multi-byte length-encoded printable ASCII strings.
BasicLengthEncodingScheme::Ptr basicLengthEncodingScheme(size_t octetsPerValue, ByteOrder::Endianness sex=ByteOrder::ORDER_UNSPECIFIED)
Returns a new basic length encoding scheme.
unsigned CodePoint
One character in a coded character set.
Definition String.h:176
TerminatedString::Ptr nulTerminatedPrintableAsciiWide(size_t charSize, ByteOrder::Endianness order)
Returns a new encoder for multi-byte NUL-terminated printable ASCII strings.
NoopCharacterEncodingForm::Ptr noopCharacterEncodingForm()
Returns a new no-op character encoding form.
LengthEncodedString::Ptr lengthEncodedString(const LengthEncodingScheme::Ptr &les, const CharacterEncodingForm::Ptr &cef, const CharacterEncodingScheme::Ptr &ces, const CodePointPredicate::Ptr &cpp)
Returns a new length-prefixed string encoder.
The ROSE library.
unsigned MatchFlags
Flags for matching constraints.
Definition AddressMap.h:46
size_t maxOverlap
Whether to allow overlapping strings.
Definition String.h:839
size_t maxLength
Maximum length of matched strings.
Definition String.h:822
bool keepingOnlyLongest
Whether to keep only longest non-overlapping strings.
Definition String.h:845
size_t minLength
Minimum length of matched strings.
Definition String.h:816