ROSE  0.11.51.0
Clexer.h
1 // WARNING: Changes to this file must be contributed back to Sawyer or else they will
2 // be clobbered by the next update from Sawyer. The Sawyer repository is at
3 // https://github.com/matzke1/sawyer.
4 
5 
6 
7 
8 // Lexical analyzer for C-like languages
9 #ifndef Sawyer_Clexer_H
10 #define Sawyer_Clexer_H
11 
12 #include <Sawyer/Sawyer.h>
13 
14 #include <Sawyer/Assert.h>
15 #include <Sawyer/Buffer.h>
16 #include <Sawyer/LineVector.h>
17 
18 #include <string>
19 #include <vector>
20 
21 namespace Sawyer {
22 namespace Language {
23 namespace Clexer {
24 
25 enum TokenType {
26  TOK_EOF, // end of file
27  TOK_LEFT, // '(', '[', or '{'
28  TOK_RIGHT, // ')', ']', or '}'
29  TOK_CHAR, // character literal
30  TOK_STRING, // string literal
31  TOK_NUMBER, // numeric constant, including optional leading sign
32  TOK_WORD, // word or symbol name
33  TOK_CPP, // preprocessor statement starting with '#'
34  TOK_OTHER // anything else
35 };
36 
37 std::string toString(TokenType);
38 
39 class Token {
40  friend class TokenStream;
41 
42  TokenType type_;
43  size_t begin_;
44  size_t end_;
45 
46 public:
47  Token(): type_(TOK_EOF), begin_(0), end_(0) {} // for std::vector, otherwise not used
48 
49  Token(TokenType type, size_t begin, size_t end)
50  : type_(type), begin_(begin), end_(end) {
51  ASSERT_require(end >= begin);
52  }
53 
54  TokenType type() const {
55  return type_;
56  }
57 
58  size_t begin() const {
59  return begin_;
60  }
61 
62  size_t end() const {
63  return end_;
64  }
65 };
66 
67 class TokenStream {
68  std::string fileName_; // name of source file
69  Sawyer::Container::LineVector content_; // contents of source file
70  size_t at_; // cursor position in buffer
71  std::vector<Token> tokens_; // token stream filled on demand
72  bool skipPreprocessorTokens_; // skip over '#' preprocessor directives
73 public:
74  explicit TokenStream(const std::string &fileName)
75  : fileName_(fileName), content_(fileName), at_(0), skipPreprocessorTokens_(true) {}
76 
77  explicit TokenStream(const std::string &fileName, const Sawyer::Container::Buffer<size_t, char>::Ptr &buffer)
78  : fileName_(fileName), content_(buffer), at_(0), skipPreprocessorTokens_(true) {}
79 
80  const std::string fileName() const { return fileName_; }
81 
82  bool skipPreprocessorTokens() const { return skipPreprocessorTokens_; }
83  void skipPreprocessorTokens(bool b) { skipPreprocessorTokens_ = b; }
84 
85  const Token& operator[](size_t lookahead);
86 
87  void consume(size_t n = 1);
88 
89  std::string lexeme(const Token &t) const;
90 
91  std::string toString(const Token &t) const;
92 
93  // Return the line of source in which this token appears, including line termination if present.
94  std::string line(const Token &t) const;
95 
96  bool matches(const Token &token, const char *s2) const;
97 
98  void emit(std::ostream &out, const std::string &fileName, const Token &token, const std::string &message) const;
99 
100  void emit(std::ostream &out, const std::string &fileName, const Token &begin, const Token &locus, const Token &end,
101  const std::string &message) const;
102 
103  std::pair<size_t, size_t> location(const Token &token) const;
104 
105  const Sawyer::Container::LineVector& content() const {
106  return content_;
107  }
108 
109 private:
110  void scanString();
111  void makeNextToken();
112 };
113 
114 
115 } // namespace
116 } // namespace
117 } // namespace
118 
119 #endif
A buffer of characters indexed by line number.
Definition: LineVector.h:24
const char * Language(int64_t)
Convert ClangToDotTranslator::Language enum constant to a string.
Reference-counting intrusive smart pointer.
Definition: SharedPointer.h:68
Name space for the entire library.
Definition: FeasiblePath.h:787
ROSE_UTIL_API std::string toString(const Path &)
Convert a path to a string.