ROSE 0.11.145.192
Lexer.h
1// WARNING: Changes to this file must be contributed back to Sawyer or else they will
2// be clobbered by the next update from Sawyer. The Sawyer repository is at
3// https://gitlab.com/charger7534/sawyer.git.
4
5
6
7
8#ifndef Sawyer_Lexer_H
9#define Sawyer_Lexer_H
10
11#include <Sawyer/AllocatingBuffer.h>
12#include <Sawyer/LineVector.h>
13#include <Sawyer/Optional.h>
14#include <Sawyer/Sawyer.h>
15
16#include <boost/filesystem.hpp>
17
18namespace Sawyer {
19
20namespace Lexer {
21
27template<typename T>
28class SAWYER_EXPORT Token {
29public:
30 typedef T TokenEnum;
31
32private:
33 Optional<TokenEnum> type_; // empty means EOF
34 size_t begin_; // beginning character position in the input
35 size_t end_; // ending (exclusive) character position in the input
36
37public:
39 Token(): begin_(0), end_(0) {}
40
47 Token(TokenEnum type, size_t begin, size_t end)
48 : type_(type), begin_(begin), end_(end) {
49 ASSERT_require(end >= begin);
50 }
51
56 bool isEof() const {
57 if (type_)
58 return false;
59 return true;
60 }
61
65 TokenEnum type() const {
66 return *type_;
67 }
68
72 size_t begin() const {
73 return begin_;
74 }
75
81 size_t end() const {
82 return end_;
83 }
84};
85
90template<class T>
91class SAWYER_EXPORT TokenStream {
92public:
93 typedef T Token;
94
95private:
96 std::string name_; // name of stream (e.g., a file name)
97 Container::LineVector content_; // line-oriented character contents of the stream
98 size_t at_; // cursor position in stream
99 std::vector<Token> tokens_; // current token at [0] plus lookahead
100
101public:
102 virtual ~TokenStream() {}
103
105 explicit TokenStream(const boost::filesystem::path &fileName)
106 : name_(fileName.string()), content_(fileName.string()), at_(0) {}
107
112 explicit TokenStream(const std::string &inputString)
113 : name_("string"), content_(Container::AllocatingBuffer<size_t, char>::instance(inputString)), at_(0) {}
114
119 : name_("string"), content_(buffer), at_(0) {}
120
122 const std::string& name() const {
123 return name_;
124 }
125
129 const Token& current() {
130 return (*this)[0];
131 }
132
136 bool atEof() {
137 return current().isEof();
138 }
139
144 const Token& operator[](size_t lookahead) {
145 static const Token eof_;
146 while (lookahead >= tokens_.size()) {
147 if (!tokens_.empty() && tokens_.back().isEof())
148 return eof_;
149 tokens_.push_back(scanNextToken(content_, at_/*in,out*/));
150 }
151 return tokens_[lookahead];
152 }
153
158 void consume(size_t n = 1) {
159 const Token &t = current();
160 if (t.isEof()) {
161 // void
162 } else if (n >= tokens_.size()) {
163 tokens_.clear();
164 } else {
165 tokens_.erase(tokens_.begin(), tokens_.begin() + n);
166 }
167 }
168
182 std::string lexeme(const Token &t) {
183 if (const char *s = content_.characters(t.begin())) {
184 return std::string(s, t.end() - t.begin());
185 } else {
186 return "";
187 }
188 }
189 std::string lexeme() {
190 return lexeme(current());
191 }
200 bool isa(const Token &t, typename Token::TokenEnum type) {
201 return !t.isEof() && t.type() == type;
202 }
203
204 bool isa(typename Token::TokenEnum type) {
205 return isa(current(), type);
206 }
217 bool match(const Token &t, const char *s) {
218 ASSERT_not_null(s);
219 size_t n1 = t.end() - t.begin();
220 size_t n2 = strlen(s);
221 if (n1 != n2)
222 return false;
223 const char *lexeme = content_.characters(t.begin());
224 return 0 == strncmp(lexeme, s, n1);
225 }
226 bool match(const char *s) {
227 return match(current(), s);
228 }
235 std::pair<size_t, size_t> location(size_t position) {
236 return content_.location(position);
237 }
238
240 std::pair<size_t, size_t> locationEof() {
241 size_t nChars = content_.nCharacters();
242 return nChars > 0 ? content_.location(nChars-1) : content_.location(0);
243 }
244
246 std::string lineString(size_t lineIdx) {
247 return content_.lineString(lineIdx);
248 }
249
257 virtual Token scanNextToken(const Container::LineVector &content, size_t &at /*in,out*/) = 0;
258};
259
260} // namespace
261} // namespace
262
263#endif
264
A buffer of characters indexed by line number.
Definition LineVector.h:24
size_t nCharacters() const
Number of characters.
Definition LineVector.h:74
std::string lineString(size_t lineIdx) const
Line as a string.
const char * characters(size_t charIdx) const
Characters at file offset.
std::pair< size_t, size_t > location(size_t charIndex) const
Convert a character index to a line and column index.
An ordered list of tokens scanned from input.
Definition Lexer.h:91
bool isa(typename Token::TokenEnum type)
Determine whether token is a specific type.
Definition Lexer.h:204
std::string lexeme()
Return the lexeme for a token.
Definition Lexer.h:189
const std::string & name() const
Property: Name of stream.
Definition Lexer.h:122
std::pair< size_t, size_t > location(size_t position)
Return the line number and offset for an input position.
Definition Lexer.h:235
bool match(const char *s)
Determine whether a token matches a string.
Definition Lexer.h:226
bool match(const Token &t, const char *s)
Determine whether a token matches a string.
Definition Lexer.h:217
void consume(size_t n=1)
Consume some tokens.
Definition Lexer.h:158
const Token & operator[](size_t lookahead)
Return the current or future token.
Definition Lexer.h:144
TokenStream(const Container::Buffer< size_t, char >::Ptr &buffer)
Create a token stream from a buffer.
Definition Lexer.h:118
std::string lexeme(const Token &t)
Return the lexeme for a token.
Definition Lexer.h:182
std::pair< size_t, size_t > locationEof()
Returns the last line index and character offset.
Definition Lexer.h:240
std::string lineString(size_t lineIdx)
Return the entire string for some line index.
Definition Lexer.h:246
virtual Token scanNextToken(const Container::LineVector &content, size_t &at)=0
Function that obtains the next token.
TokenStream(const boost::filesystem::path &fileName)
Create a token stream from the contents of a file.
Definition Lexer.h:105
bool atEof()
Returns true if the stream is at the end.
Definition Lexer.h:136
const Token & current()
Return the current token.
Definition Lexer.h:129
TokenStream(const std::string &inputString)
Create a token stream from a string.
Definition Lexer.h:112
bool isa(const Token &t, typename Token::TokenEnum type)
Determine whether token is a specific type.
Definition Lexer.h:200
Represents one token of input.
Definition Lexer.h:28
TokenEnum type() const
Returns the token.
Definition Lexer.h:65
size_t end() const
Token lexeme ending position.
Definition Lexer.h:81
Token(TokenEnum type, size_t begin, size_t end)
Construct a token.
Definition Lexer.h:47
bool isEof() const
Whether this is an EOF token.
Definition Lexer.h:56
Token()
Construct an EOF token.
Definition Lexer.h:39
size_t begin() const
Token lexeme starting position.
Definition Lexer.h:72
Holds a value or nothing.
Definition Optional.h:56
Reference-counting intrusive smart pointer.
Sawyer support library.