ROSE  0.9.10.91
Clexer.h
1 // WARNING: Changes to this file must be contributed back to Sawyer or else they will
2 // be clobbered by the next update from Sawyer. The Sawyer repository is at
3 // https://github.com/matzke1/sawyer.
4 
5 
6 
7 
8 // Lexical analyzer for C-like languages
9 #ifndef Sawyer_Clexer_H
10 #define Sawyer_Clexer_H
11 
12 #include <Sawyer/Sawyer.h>
13 
14 #include <Sawyer/Assert.h>
15 #include <Sawyer/Buffer.h>
16 #include <Sawyer/LineVector.h>
17 
18 #include <string>
19 #include <vector>
20 
21 namespace Sawyer {
22 namespace Language {
23 namespace Clexer {
24 
25 enum TokenType {
26  TOK_EOF, // end of file
27  TOK_LEFT, // '(', '[', or '{'
28  TOK_RIGHT, // ')', ']', or '}'
29  TOK_CHAR, // character literal
30  TOK_STRING, // string literal
31  TOK_NUMBER, // numeric constant, including optional leading sign
32  TOK_WORD, // word or symbol name
33  TOK_CPP, // preprocessor statement starting with '#'
34  TOK_OTHER // anything else
35 };
36 
37 std::string toString(TokenType);
38 
39 class Token {
40  friend class TokenStream;
41 
42  TokenType type_;
43  size_t begin_;
44  size_t end_;
45 
46 public:
47  Token(): type_(TOK_EOF), begin_(0), end_(0) {} // for std::vector, otherwise not used
48 
49  Token(TokenType type, size_t begin, size_t end)
50  : type_(type), begin_(begin), end_(end) {
51  ASSERT_require(end >= begin);
52  }
53 
54  TokenType type() const {
55  return type_;
56  }
57 };
58 
59 class TokenStream {
60  std::string fileName_; // name of source file
61  Sawyer::Container::LineVector content_; // contents of source file
62  size_t at_; // cursor position in buffer
63  std::vector<Token> tokens_; // token stream filled on demand
64  bool skipPreprocessorTokens_; // skip over '#' preprocessor directives
65 public:
66  explicit TokenStream(const std::string &fileName)
67  : fileName_(fileName), content_(fileName), at_(0), skipPreprocessorTokens_(true) {}
68 
69  explicit TokenStream(const std::string &fileName, const Sawyer::Container::Buffer<size_t, char>::Ptr &buffer)
70  : fileName_(fileName), content_(buffer), at_(0), skipPreprocessorTokens_(true) {}
71 
72  const std::string fileName() const { return fileName_; }
73 
74  bool skipPreprocessorTokens() const { return skipPreprocessorTokens_; }
75  void skipPreprocessorTokens(bool b) { skipPreprocessorTokens_ = b; }
76 
77  const Token& operator[](size_t lookahead);
78 
79  void consume(size_t n = 1);
80 
81  std::string lexeme(const Token &t) const;
82 
83  std::string toString(const Token &t) const;
84 
85  // Return the line of source in which this token appears, including line termination if present.
86  std::string line(const Token &t) const;
87 
88  bool matches(const Token &token, const char *s2) const;
89 
90  void emit(std::ostream &out, const std::string &fileName, const Token &token, const std::string &message) const;
91 
92  void emit(std::ostream &out, const std::string &fileName, const Token &begin, const Token &locus, const Token &end,
93  const std::string &message) const;
94 
95  std::pair<size_t, size_t> location(const Token &token) const;
96 
97 private:
98  void scanString();
99  void makeNextToken();
100 };
101 
102 
103 } // namespace
104 } // namespace
105 } // namespace
106 
107 #endif
A buffer of characters indexed by line number.
Definition: LineVector.h:24
ROSE_DLL_API std::string toString(const Attributes &)
Convert attributes to GraphViz language string.
const char * Language(long)
Convert ClangToSageTranslator::Language enum constant to a string.
Definition: stringify.C:19023
Reference-counting smart pointer.
Definition: SharedPointer.h:34
Name space for the entire library.
Definition: Access.h:13