Free Electron
scanner.h
1 #ifndef SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
2 #define SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
3 
4 #if defined(_MSC_VER) || \
5  (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
6  (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
7 #pragma once
8 #endif
9 
10 #include <cstddef>
11 #include <ios>
12 #include <queue>
13 #include <stack>
14 #include <string>
15 
16 #include "ptr_vector.h"
17 #include "stream.h"
18 #include "token.h"
19 #include "yaml-cpp/mark.h"
20 
21 namespace YAML {
22 class Node;
23 class RegEx;
24 
25 /**
26  * A scanner transforms a stream of characters into a stream of tokens.
27  */
28 class Scanner {
29  public:
30  explicit Scanner(std::istream &in);
31  ~Scanner();
32 
33  /** Returns true if there are no more tokens to be read. */
34  bool empty();
35 
36  /** Removes the next token in the queue. */
37  void pop();
38 
39  /** Returns, but does not remove, the next token in the queue. */
40  Token &peek();
41 
42  /** Returns the current mark in the input stream. */
43  Mark mark() const;
44 
45  private:
46  struct IndentMarker {
47  enum INDENT_TYPE { MAP, SEQ, NONE };
48  enum STATUS { VALID, INVALID, UNKNOWN };
49  IndentMarker(int column_, INDENT_TYPE type_)
50  : column(column_), type(type_), status(VALID), pStartToken(nullptr) {}
51 
52  int column;
53  INDENT_TYPE type;
54  STATUS status;
55  Token *pStartToken;
56  };
57 
58  enum FLOW_MARKER { FLOW_MAP, FLOW_SEQ };
59 
60  private:
61  // scanning
62 
63  /**
64  * Scans until there's a valid token at the front of the queue, or the queue
65  * is empty. The state can be checked by {@link #empty}, and the next token
66  * retrieved by {@link #peek}.
67  */
68  void EnsureTokensInQueue();
69 
70  /**
71  * The main scanning function; this method branches out to scan whatever the
72  * next token should be.
73  */
74  void ScanNextToken();
75 
76  /** Eats the input stream until it reaches the next token-like thing. */
77  void ScanToNextToken();
78 
79  /** Sets the initial conditions for starting a stream. */
80  void StartStream();
81 
82  /** Closes out the stream, finish up, etc. */
83  void EndStream();
84 
85  Token *PushToken(Token::TYPE type);
86 
87  bool InFlowContext() const { return !m_flows.empty(); }
88  bool InBlockContext() const { return m_flows.empty(); }
89  std::size_t GetFlowLevel() const { return m_flows.size(); }
90 
91  Token::TYPE GetStartTokenFor(IndentMarker::INDENT_TYPE type) const;
92 
93  /**
94  * Pushes an indentation onto the stack, and enqueues the proper token
95  * (sequence start or mapping start).
96  *
97  * @return the indent marker it generates (if any).
98  */
99  IndentMarker *PushIndentTo(int column, IndentMarker::INDENT_TYPE type);
100 
101  /**
102  * Pops indentations off the stack until it reaches the current indentation
103  * level, and enqueues the proper token each time. Then pops all invalid
104  * indentations off.
105  */
106  void PopIndentToHere();
107 
108  /**
109  * Pops all indentations (except for the base empty one) off the stack, and
110  * enqueues the proper token each time.
111  */
112  void PopAllIndents();
113 
114  /** Pops a single indent, pushing the proper token. */
115  void PopIndent();
116  int GetTopIndent() const;
117 
118  // checking input
119  bool CanInsertPotentialSimpleKey() const;
120  bool ExistsActiveSimpleKey() const;
121  void InsertPotentialSimpleKey();
122  void InvalidateSimpleKey();
123  bool VerifySimpleKey();
124  void PopAllSimpleKeys();
125 
126  /**
127  * Throws a ParserException with the current token location (if available),
128  * and does not parse any more tokens.
129  */
130  void ThrowParserException(const std::string &msg) const;
131 
132  bool IsWhitespaceToBeEaten(char ch);
133 
134  /**
135  * Returns the appropriate regex to check if the next token is a value token.
136  */
137  const RegEx &GetValueRegex() const;
138 
139  struct SimpleKey {
140  SimpleKey(const Mark &mark_, std::size_t flowLevel_);
141 
142  void Validate();
143  void Invalidate();
144 
145  Mark mark;
146  std::size_t flowLevel;
147  IndentMarker *pIndent;
148  Token *pMapStart, *pKey;
149  };
150 
151  // and the tokens
152  void ScanDirective();
153  void ScanDocStart();
154  void ScanDocEnd();
155  void ScanBlockSeqStart();
156  void ScanBlockMapSTart();
157  void ScanBlockEnd();
158  void ScanBlockEntry();
159  void ScanFlowStart();
160  void ScanFlowEnd();
161  void ScanFlowEntry();
162  void ScanKey();
163  void ScanValue();
164  void ScanAnchorOrAlias();
165  void ScanTag();
166  void ScanPlainScalar();
167  void ScanQuotedScalar();
168  void ScanBlockScalar();
169 
170  private:
171  // the stream
172  Stream INPUT;
173 
174  // the output (tokens)
175  std::queue<Token> m_tokens;
176 
177  // state info
178  bool m_startedStream, m_endedStream;
179  bool m_simpleKeyAllowed;
180  bool m_canBeJSONFlow;
181  std::stack<SimpleKey> m_simpleKeys;
182  std::stack<IndentMarker *> m_indents;
183  ptr_vector<IndentMarker> m_indentRefs; // for "garbage collection"
184  std::stack<FLOW_MARKER> m_flows;
185 };
186 }
187 
188 #endif // SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
void ThrowParserException(const std::string &msg) const
Throws a ParserException with the current token location (if available), and does not parse any more ...
void PopIndentToHere()
Pops indentations off the stack until it reaches the current indentation level, and enqueues the prop...
void ScanToNextToken()
Eats the input stream until it reaches the next token-like thing.
void ScanNextToken()
The main scanning function; this method branches out to scan whatever the next token should be...
Token & peek()
Returns, but does not remove, the next token in the queue.
void EndStream()
Closes out the stream, finish up, etc.
void StartStream()
Sets the initial conditions for starting a stream.
bool empty()
Returns true if there are no more tokens to be read.
const RegEx & GetValueRegex() const
Returns the appropriate regex to check if the next token is a value token.
void EnsureTokensInQueue()
Scans until there&#39;s a valid token at the front of the queue, or the queue is empty.
void PopIndent()
Pops a single indent, pushing the proper token.
Definition: anchor.h:12
void pop()
Removes the next token in the queue.
void PopAllIndents()
Pops all indentations (except for the base empty one) off the stack, and enqueues the proper token ea...
Mark mark() const
Returns the current mark in the input stream.
IndentMarker * PushIndentTo(int column, IndentMarker::INDENT_TYPE type)
Pushes an indentation onto the stack, and enqueues the proper token (sequence start or mapping start)...
A scanner transforms a stream of characters into a stream of tokens.
Definition: scanner.h:28