ROSE  0.9.9.109
DocumentMarkup.h
1 // WARNING: Changes to this file must be contributed back to Sawyer or else they will
2 // be clobbered by the next update from Sawyer. The Sawyer repository is at
3 // https://github.com/matzke1/sawyer.
4 
5 
6 
7 
8 #ifndef Sawyer_Document_Markup_H
9 #define Sawyer_Document_Markup_H
10 
11 #include <Sawyer/Exception.h>
12 #include <Sawyer/Lexer.h>
13 #include <Sawyer/LineVector.h>
14 #include <Sawyer/Map.h>
15 #include <Sawyer/Sawyer.h>
16 
17 #include <boost/algorithm/string/join.hpp>
18 #include <boost/regex.hpp>
19 
20 namespace Sawyer {
21 
26 namespace Document {
27 
34 namespace Markup {
35 
36 class Grammar;
37 
39 // Token
41 
42 enum TokenType {
43  TOK_DATA, // anything that's not markup
44  TOK_FUNCTION, // function name, like "code" scanned from "@code"
45  TOK_LEFT, // unescaped left paren (escaped are part of TOK_DATA)
46  TOK_RIGHT, // unescaped right paren
47  TOK_BLANK_LINE // one or more lines with no non-white-space
48 };
49 
51 
53 // TokenStream
55 
56 class SAWYER_EXPORT TokenStream: public Lexer::TokenStream<Token> {
57  static const char CHAR_LEFT = '{';
58  static const char CHAR_RIGHT = '}';
59  static const char CHAR_AT = '@';
60 
61 public:
62  explicit TokenStream(const std::string &s): Lexer::TokenStream<Token>(s) {}
63  Token scanNextToken(const Container::LineVector &content, size_t &at /*in,out*/);
64 };
65 
67 // Function
69 
71 class SAWYER_EXPORT Function: public SharedObject, public SharedFromThis<Function> {
72 public:
75 
76 private:
77  // Declaration for a formal argument
78  struct FormalArg {
79  std::string name; // name of argument for diagnostics and debugging
80  Optional<std::string> dflt; // default value
81 
82  explicit FormalArg(const std::string &name, const Optional<std::string> &dflt = Sawyer::Nothing())
83  : name(name), dflt(dflt) {}
84 
85  bool isRequired() const { if (dflt) return false; else return true; }
86  bool isOptional() const { return !isRequired(); }
87  };
88 
89  std::string name_; // name of function for diagnostics and debugging
90  bool isMacro_; // if true, then don't evaluate args before calling
91  std::vector<FormalArg> formals_; // declarations of formal arguments
92  size_t ellipsis_; // max number of additional args
93 
94 protected:
95  explicit Function(const std::string &name, bool evalArgs = true)
96  : name_(name), isMacro_(!evalArgs), ellipsis_(0) {}
97 
98 public:
102  const std::string& name() const;
103 
107  bool isMacro() const;
108 
112  Ptr arg(const std::string &name);
113 
121  Ptr arg(const std::string &name, const std::string &dflt);
122 
127  Ptr ellipsis(size_t n = (size_t)(-1));
128 
130  size_t nRequiredArgs() const;
131 
136  size_t nOptionalArgs() const;
137 
142  size_t nAdditionalArgs() const;
143 
145  size_t maxArgs() const;
146 
152  void validateArgs(std::vector<std::string> &actuals /*in,out*/, TokenStream&) const;
153 
155  virtual std::string eval(const Grammar&, const std::vector<std::string> &actuals) = 0;
156 };
157 
159 // Errors and exceptions
161 
163 class SAWYER_EXPORT ErrorLocation {
164  friend class Grammar;
165 
166  // Stack frame where error occurred
167  struct Frame {
168  std::string name; // like "in @foo at name_of_input"
169  size_t lineIdx; // zero-origin line number
170  size_t offset; // beginning position within the line
171  std::string input; // line of input causing error
172  Frame(TokenStream &where, const std::string &mesg);
173  };
174 
175  // Use the destructor to add a record to the stack frame. We could have used try/catch but that would interfere with
176  // debugging the exception. The alternative is that each function that could add a stack frame does so like this:
177  // void some_function(ErrorLocation &eloc, TokenStream &tokens, ...) {
178  // Trap trap(eloc, tokens);
179  // do_something_that_might_throw();
180  // trap.passed();
181  // }
182  //
183  // Any failure to reach trap.passed() will add a stack frame to the location.
184  class Trap {
185  ErrorLocation &eloc_;
186  TokenStream &tokens_;
187  std::string mesg_;
188  bool passed_;
189 
190  public:
191  Trap(ErrorLocation &eloc, TokenStream &tokens, const std::string &mesg)
192  : eloc_(eloc), tokens_(tokens), mesg_(mesg), passed_(false) {}
193  ~Trap() { if (!passed_) eloc_.push(Frame(tokens_, mesg_)); }
194  void passed() { passed_ = true; }
195  };
196 
197 private:
198  std::vector<Frame> frames_;
199 
200  void push(const Frame &frame) {
201  frames_.push_back(frame);
202  }
203 
204 public:
206  std::string toString() const;
207 };
208 
210 class SAWYER_EXPORT SyntaxError: public Sawyer::Exception::SyntaxError {
211  ErrorLocation eloc_;
212 public:
213  ~SyntaxError() throw () {}
214 
216  SyntaxError(const std::string &mesg)
217  : Sawyer::Exception::SyntaxError(mesg) {}
218 
220  void errorLocation(const ErrorLocation &eloc) {
221  eloc_ = eloc;
222  }
223 };
224 
226 // Predefined functions
228 
232 class SAWYER_EXPORT StaticContent: public Function {
233  std::string resultString_;
234 protected:
235  StaticContent(const std::string &name, const std::string &resultString)
236  : Function(name), resultString_(resultString) {}
237 public:
239  static Ptr instance(const std::string &name, const std::string str) {
240  return Ptr(new StaticContent(name, str));
241  }
242  std::string eval(const Grammar&, const std::vector<std::string> &args);
243 };
244 
249 class SAWYER_EXPORT Error: public Function {
250 protected:
251  Error(const std::string &name): Function(name) {}
252 public:
254  static Ptr instance(const std::string &name, const std::string dfltMesg = "error") {
255  return Ptr(new Error(name))->arg("message", dfltMesg);
256  }
257  std::string eval(const Grammar&, const std::vector<std::string> &args);
258 };
259 
264 class SAWYER_EXPORT Quote: public Function {
265 protected:
266  Quote(const std::string &name): Function(name, false) {}
267 public:
269  static Ptr instance(const std::string &name) {
270  return Ptr(new Quote(name))->ellipsis();
271  }
272  std::string eval(const Grammar&, const std::vector<std::string> &args);
273 };
274 
280 class SAWYER_EXPORT Eval: public Function {
281 protected:
282  Eval(const std::string &name): Function(name) {}
283 public:
285  static Ptr instance(const std::string &name) {
286  return Ptr(new Eval(name))->ellipsis();
287  }
288  std::string eval(const Grammar &grammar, const std::vector<std::string> &args);
289 };
290 
295 class SAWYER_EXPORT IfEq: public Function {
296 protected:
297  IfEq(const std::string &name): Function(name, false) {}
298 public:
299  static Ptr instance(const std::string &name) {
300  return Ptr(new IfEq(name))->arg("val1")->arg("val2")->arg("if_part")->arg("else_part", "");
301  }
302  std::string eval(const Grammar &grammar, const std::vector<std::string> &args);
303 };
304 
308 class SAWYER_EXPORT Concat: public Function {
309 protected:
310  Concat(const std::string &name): Function(name) {}
311 public:
312  static Ptr instance(const std::string &name) {
313  return Ptr(new Concat(name))->ellipsis();
314  }
315  std::string eval(const Grammar &grammar, const std::vector<std::string> &args);
316 };
317 
319 // Reflow
321 
327 class SAWYER_EXPORT Reflow {
328  size_t indentLevel_;
329  std::string indentation_; // string for one level of indentation
330  std::ostringstream out_;
331  size_t column_;
332  std::string spaces_; // accumulated white space
333  std::string nonspaces_; // non-spaces following accumulated space
334  size_t pageWidth_;
335  size_t nLineFeeds_; // number of consecutive linefeeds
336 
337 public:
341  explicit Reflow(size_t pageWidth = 80)
342  : indentLevel_(0), indentation_(" "), column_(0), pageWidth_(pageWidth), nLineFeeds_(0) {}
343 
351  size_t pageWidth() const { return pageWidth_; }
352  Reflow& pageWidth(size_t n) { pageWidth_ = std::max(n, (size_t)20); return *this; }
361  const std::string& indentationString() const { return indentation_; }
362  Reflow& indentationString(const std::string &s) { indentation_ = s; return *this; }
372  Reflow& operator++();
373  Reflow& operator--();
380  Reflow& lineBreak();
381 
385  Reflow& operator()(const std::string &s);
386 
390  std::string toString();
391 
392 private:
393  void emitIndentation(); // indent if we're at the beginning of a line. Also discards accumulated white space.
394  void emitAccumulated(); // optionally indent and emit accumulated space and non-space
395  void emitNewLine(); // advance to the next line without emitting accumulated text or indentation.
396 };
397 
399 // Grammar
401 
403 class SAWYER_EXPORT Grammar {
404  Container::Map<std::string, Function::Ptr> functions_; // functions indexed by their names
405  static const bool CONSUME = true;
406  static const bool LEAVE = false;
407 
408 public:
409  virtual ~Grammar() {}
410 
414  Grammar& with(const Function::Ptr&);
415 
417  virtual std::string operator()(const std::string &s) const;
418 
420  static std::string unescape(const std::string &s);
421 
423  static std::string escape(const std::string &s);
424 
425 private:
426  // Evaluate an entire token stream. Throws an exception if an error occurs.
427  std::string eval(TokenStream &tokens, ErrorLocation&) const;
428 
429  // Returns a string up to (and possibly including) the next CHAR_RIGHT that is not balanced by a CHAR_LEFT encountered
430  // during the scanning. The TOK_RIGHT is consumed if requireRight is set, and an error is thrown if that token is not
431  // found.
432  std::string readArgument(TokenStream&, ErrorLocation&, bool requireRight) const;
433 
434  // Parse one argument by parsing up to (and possibly including) the next unbalanced TOK_RIGHT. The TOK_RIGHT is consumed
435  // if requireRight is set, and an error is thrown if that token is not found.
436  std::string evalArgument(TokenStream&, ErrorLocation&, bool requireRight) const;
437 
438  // Parse one function. The current token should be a TOK_FUNCTION.
439  std::string evalFunction(TokenStream&, ErrorLocation&) const;
440 };
441 
442 } // namespace
443 } // namespace
444 } // namespace
445 
446 #endif
Reflow(size_t pageWidth=80)
Construct a reflow filter.
SyntaxError(const std::string &mesg)
Syntax error.
Reflow & pageWidth(size_t n)
Property: Page width.
A buffer of characters indexed by line number.
Definition: LineVector.h:23
Error in parsing something.
Syntax error when parsing markup.
static Ptr instance(const std::string &name, const std::string str)
Create a new instance.
Function that quotes its arguments.
A function that inserts a string.
A class that can reflow and indent paragraphs.
Represents one token of input.
Definition: Lexer.h:28
size_t pageWidth() const
Property: Page width.
Name space for the entire library.
Definition: Access.h:11
void errorLocation(const ErrorLocation &eloc)
Set an error location.
const std::string & indentationString() const
Property: Indentation string.
Base class for markup functions.
Creates SharedPointer from this.
Ptr ellipsis(size_t n=(size_t)(-1))
Declare additional arguments.
static Ptr instance(const std::string &name, const std::string dfltMesg="error")
Create a new instance.
static Ptr instance(const std::string &name)
Create a new instance.
Ptr arg(const std::string &name)
Declare a required argument.
static Ptr instance(const std::string &name)
Create a new instance.
Base class for reference counted objects.
Definition: SharedObject.h:22
Function that generates an error message.
Reflow & indentationString(const std::string &s)
Property: Indentation string.
Evaluate arguments a second time.
Represents no value.
Definition: Optional.h:32
Information about the location of an exception.
SharedPointer< Function > Ptr
Reference-counting pointer to markup function.
An ordered list of tokens scanned from input.
Definition: Lexer.h:91
Container associating values with keys.
Definition: Sawyer/Map.h:64