1 /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
2 * Use of this file is governed by the BSD 3-clause license that
3 * can be found in the LICENSE.txt file in the project root.
8 #include "Recognizer.h"
9 #include "tree/ParseTreeListener.h"
10 #include "tree/ParseTree.h"
11 #include "TokenStream.h"
12 #include "TokenSource.h"
13 #include "misc/Interval.h"
17 /// This is all the parsing support code essentially; most of it is error recovery stuff.
18 class ANTLR4CPP_PUBLIC Parser : public Recognizer {
21 class TraceListener : public tree::ParseTreeListener {
23 TraceListener(Parser *outerInstance);
24 virtual ~TraceListener();
26 virtual void enterEveryRule(ParserRuleContext *ctx) override;
27 virtual void visitTerminal(tree::TerminalNode *node) override;
28 virtual void visitErrorNode(tree::ErrorNode *node) override;
29 virtual void exitEveryRule(ParserRuleContext *ctx) override;
32 Parser *const outerInstance;
35 class TrimToSizeListener : public tree::ParseTreeListener {
37 static TrimToSizeListener INSTANCE;
39 virtual ~TrimToSizeListener();
41 virtual void enterEveryRule(ParserRuleContext *ctx) override;
42 virtual void visitTerminal(tree::TerminalNode *node) override;
43 virtual void visitErrorNode(tree::ErrorNode *node) override;
44 virtual void exitEveryRule(ParserRuleContext *ctx) override;
47 Parser(TokenStream *input);
50 /// reset the parser's state
54 /// Match current input symbol against {@code ttype}. If the symbol type
55 /// matches, <seealso cref="ANTLRErrorStrategy#reportMatch"/> and <seealso cref="#consume"/> are
56 /// called to complete the match process.
58 /// If the symbol type does not match,
59 /// <seealso cref="ANTLRErrorStrategy#recoverInline"/> is called on the current error
60 /// strategy to attempt recovery. If <seealso cref="#getBuildParseTree"/> is
61 /// {@code true} and the token index of the symbol returned by
62 /// <seealso cref="ANTLRErrorStrategy#recoverInline"/> is -1, the symbol is added to
63 /// the parse tree by calling {@link #createErrorNode(ParserRuleContext, Token)} then
64 /// {@link ParserRuleContext#addErrorNode(ErrorNode)}.
66 /// <param name="ttype"> the token type to match </param>
67 /// <returns> the matched symbol </returns>
68 /// <exception cref="RecognitionException"> if the current input symbol did not match
69 /// {@code ttype} and the error strategy could not recover from the
70 /// mismatched symbol </exception>
71 virtual Token* match(size_t ttype);
74 /// Match current input symbol as a wildcard. If the symbol type matches
75 /// (i.e. has a value greater than 0), <seealso cref="ANTLRErrorStrategy#reportMatch"/>
76 /// and <seealso cref="#consume"/> are called to complete the match process.
78 /// If the symbol type does not match,
79 /// <seealso cref="ANTLRErrorStrategy#recoverInline"/> is called on the current error
80 /// strategy to attempt recovery. If <seealso cref="#getBuildParseTree"/> is
81 /// {@code true} and the token index of the symbol returned by
82 /// <seealso cref="ANTLRErrorStrategy#recoverInline"/> is -1, the symbol is added to
83 /// the parse tree by calling <seealso cref="ParserRuleContext#addErrorNode"/>.
85 /// <returns> the matched symbol </returns>
86 /// <exception cref="RecognitionException"> if the current input symbol did not match
87 /// a wildcard and the error strategy could not recover from the mismatched
88 /// symbol </exception>
89 virtual Token* matchWildcard();
92 /// Track the <seealso cref="ParserRuleContext"/> objects during the parse and hook
93 /// them up using the <seealso cref="ParserRuleContext#children"/> list so that it
94 /// forms a parse tree. The <seealso cref="ParserRuleContext"/> returned from the start
95 /// rule represents the root of the parse tree.
97 /// Note that if we are not building parse trees, rule contexts only point
98 /// upwards. When a rule exits, it returns the context but that gets garbage
99 /// collected if nobody holds a reference. It points upwards but nobody
102 /// When we build parse trees, we are adding all of these contexts to
103 /// <seealso cref="ParserRuleContext#children"/> list. Contexts are then not candidates
104 /// for garbage collection.
106 virtual void setBuildParseTree(bool buildParseTrees);
109 /// Gets whether or not a complete parse tree will be constructed while
110 /// parsing. This property is {@code true} for a newly constructed parser.
112 /// <returns> {@code true} if a complete parse tree will be constructed while
113 /// parsing, otherwise {@code false} </returns>
114 virtual bool getBuildParseTree();
117 /// Trim the internal lists of the parse tree during parsing to conserve memory.
118 /// This property is set to {@code false} by default for a newly constructed parser.
120 /// <param name="trimParseTrees"> {@code true} to trim the capacity of the <seealso cref="ParserRuleContext#children"/>
121 /// list to its size after a rule is parsed. </param>
122 virtual void setTrimParseTree(bool trimParseTrees);
124 /// <returns> {@code true} if the <seealso cref="ParserRuleContext#children"/> list is trimmed
125 /// using the default <seealso cref="Parser.TrimToSizeListener"/> during the parse process. </returns>
126 virtual bool getTrimParseTree();
128 virtual std::vector<tree::ParseTreeListener *> getParseListeners();
131 /// Registers {@code listener} to receive events during the parsing process.
133 /// To support output-preserving grammar transformations (including but not
134 /// limited to left-recursion removal, automated left-factoring, and
135 /// optimized code generation), calls to listener methods during the parse
136 /// may differ substantially from calls made by
137 /// <seealso cref="ParseTreeWalker#DEFAULT"/> used after the parse is complete. In
138 /// particular, rule entry and exit events may occur in a different order
139 /// during the parse than after the parser. In addition, calls to certain
140 /// rule entry methods may be omitted.
142 /// With the following specific exceptions, calls to listener events are
143 /// <em>deterministic</em>, i.e. for identical input the calls to listener
144 /// methods will be the same.
147 /// <li>Alterations to the grammar used to generate code may change the
148 /// behavior of the listener calls.</li>
149 /// <li>Alterations to the command line options passed to ANTLR 4 when
150 /// generating the parser may change the behavior of the listener calls.</li>
151 /// <li>Changing the version of the ANTLR Tool used to generate the parser
152 /// may change the behavior of the listener calls.</li>
155 /// <param name="listener"> the listener to add
157 /// <exception cref="NullPointerException"> if {@code} listener is {@code null} </exception>
158 virtual void addParseListener(tree::ParseTreeListener *listener);
161 /// Remove {@code listener} from the list of parse listeners.
163 /// If {@code listener} is {@code null} or has not been added as a parse
164 /// listener, this method does nothing.
166 /// <seealso cref= #addParseListener
168 /// <param name="listener"> the listener to remove </param>
169 virtual void removeParseListener(tree::ParseTreeListener *listener);
172 /// Remove all parse listeners.
174 /// <seealso cref= #addParseListener </seealso>
175 virtual void removeParseListeners();
178 /// Notify any parse listeners of an enter rule event.
180 /// <seealso cref= #addParseListener </seealso>
181 virtual void triggerEnterRuleEvent();
184 /// Notify any parse listeners of an exit rule event.
186 /// <seealso cref= #addParseListener </seealso>
187 virtual void triggerExitRuleEvent();
190 /// Gets the number of syntax errors reported during parsing. This value is
191 /// incremented each time <seealso cref="#notifyErrorListeners"/> is called.
193 /// <seealso cref= #notifyErrorListeners </seealso>
194 virtual size_t getNumberOfSyntaxErrors();
196 virtual TokenFactory<CommonToken>* getTokenFactory() override;
199 /// Tell our token source and error strategy about a new way to create tokens. </summary>
200 template<typename T1>
201 void setTokenFactory(TokenFactory<T1> *factory) {
202 _input->getTokenSource()->setTokenFactory(factory);
205 /// The ATN with bypass alternatives is expensive to create so we create it
206 /// lazily. The ATN is owned by us.
207 virtual const atn::ATN& getATNWithBypassAlts();
210 /// The preferred method of getting a tree pattern. For example, here's a
214 /// ParseTree t = parser.expr();
215 /// ParseTreePattern p = parser.compileParseTreePattern("<ID>+0", MyParser.RULE_expr);
216 /// ParseTreeMatch m = p.match(t);
217 /// String id = m.get("ID");
220 virtual tree::pattern::ParseTreePattern compileParseTreePattern(const std::string &pattern, int patternRuleIndex);
223 /// The same as <seealso cref="#compileParseTreePattern(String, int)"/> but specify a
224 /// <seealso cref="Lexer"/> rather than trying to deduce it from this parser.
226 virtual tree::pattern::ParseTreePattern compileParseTreePattern(const std::string &pattern, int patternRuleIndex,
229 virtual Ref<ANTLRErrorStrategy> getErrorHandler();
230 virtual void setErrorHandler(Ref<ANTLRErrorStrategy> const& handler);
232 virtual IntStream* getInputStream() override;
233 void setInputStream(IntStream *input) override;
235 virtual TokenStream* getTokenStream();
237 /// Set the token stream and reset the parser.
238 virtual void setTokenStream(TokenStream *input);
241 /// Match needs to return the current input symbol, which gets put
242 /// into the label for the associated token ref; e.g., x=ID.
244 virtual Token* getCurrentToken();
246 void notifyErrorListeners(const std::string &msg);
248 virtual void notifyErrorListeners(Token *offendingToken, const std::string &msg, std::exception_ptr e);
250 /// Consume and return the <seealso cref="#getCurrentToken current symbol"/>.
252 /// E.g., given the following input with {@code A} being the current
253 /// lookahead symbol, this function moves the cursor to {@code B} and returns
261 /// If the parser is not in error recovery mode, the consumed symbol is added
262 /// to the parse tree using <seealso cref="ParserRuleContext#addChild(TerminalNode)"/>, and
263 /// <seealso cref="ParseTreeListener#visitTerminal"/> is called on any parse listeners.
264 /// If the parser <em>is</em> in error recovery mode, the consumed symbol is
265 /// added to the parse tree using {@link #createErrorNode(ParserRuleContext, Token)} then
266 /// {@link ParserRuleContext#addErrorNode(ErrorNode)} and
267 /// <seealso cref="ParseTreeListener#visitErrorNode"/> is called on any parse
269 virtual Token* consume();
271 /// Always called by generated parsers upon entry to a rule. Access field
272 /// <seealso cref="#_ctx"/> get the current context.
273 virtual void enterRule(ParserRuleContext *localctx, size_t state, size_t ruleIndex);
277 virtual void enterOuterAlt(ParserRuleContext *localctx, size_t altNum);
280 * Get the precedence level for the top-most precedence rule.
282 * @return The precedence level for the top-most precedence rule, or -1 if
283 * the parser context is not nested within a precedence rule.
285 int getPrecedence() const;
288 /// <seealso cref="#enterRecursionRule(ParserRuleContext, int, int, int)"/> instead.
289 virtual void enterRecursionRule(ParserRuleContext *localctx, size_t ruleIndex);
290 virtual void enterRecursionRule(ParserRuleContext *localctx, size_t state, size_t ruleIndex, int precedence);
292 /** Like {@link #enterRule} but for recursive rules.
293 * Make the current context the child of the incoming localctx.
295 virtual void pushNewRecursionContext(ParserRuleContext *localctx, size_t state, size_t ruleIndex);
296 virtual void unrollRecursionContexts(ParserRuleContext *parentctx);
297 virtual ParserRuleContext* getInvokingContext(size_t ruleIndex);
298 virtual ParserRuleContext* getContext();
299 virtual void setContext(ParserRuleContext *ctx);
300 virtual bool precpred(RuleContext *localctx, int precedence) override;
301 virtual bool inContext(const std::string &context);
304 /// Checks whether or not {@code symbol} can follow the current state in the
305 /// ATN. The behavior of this method is equivalent to the following, but is
306 /// implemented such that the complete context-sensitive follow set does not
307 /// need to be explicitly constructed.
310 /// return getExpectedTokens().contains(symbol);
313 /// <param name="symbol"> the symbol type to check </param>
314 /// <returns> {@code true} if {@code symbol} can follow the current state in
315 /// the ATN, otherwise {@code false}. </returns>
316 virtual bool isExpectedToken(size_t symbol);
318 bool isMatchedEOF() const;
321 /// Computes the set of input symbols which could follow the current parser
322 /// state and context, as given by <seealso cref="#getState"/> and <seealso cref="#getContext"/>,
325 /// <seealso cref= ATN#getExpectedTokens(int, RuleContext) </seealso>
326 virtual misc::IntervalSet getExpectedTokens();
328 virtual misc::IntervalSet getExpectedTokensWithinCurrentRule();
330 /// Get a rule's index (i.e., {@code RULE_ruleName} field) or INVALID_INDEX if not found.
331 virtual size_t getRuleIndex(const std::string &ruleName);
333 virtual ParserRuleContext* getRuleContext();
336 /// Return List<String> of the rule names in your parser instance
337 /// leading up to a call to the current rule. You could override if
338 /// you want more details such as the file/line info of where
339 /// in the ATN a rule is invoked.
341 /// This is very useful for error messages.
343 virtual std::vector<std::string> getRuleInvocationStack();
345 virtual std::vector<std::string> getRuleInvocationStack(RuleContext *p);
348 /// For debugging and other purposes. </summary>
349 virtual std::vector<std::string> getDFAStrings();
352 /// For debugging and other purposes. </summary>
353 virtual void dumpDFA();
355 virtual std::string getSourceName();
357 atn::ParseInfo getParseInfo() const;
362 void setProfile(bool profile);
365 /// During a parse is sometimes useful to listen in on the rule entry and exit
366 /// events as well as token matches. This is for quick and dirty debugging.
368 virtual void setTrace(bool trace);
371 * Gets whether a {@link TraceListener} is registered as a parse listener
374 * @see #setTrace(boolean)
376 bool isTrace() const;
378 tree::ParseTreeTracker& getTreeTracker() { return _tracker; }
380 /** How to create a token leaf node associated with a parent.
381 * Typically, the terminal node to create is not a function of the parent
382 * but this method must still set the parent pointer of the terminal node
383 * returned. I would prefer having {@link ParserRuleContext#addAnyChild(ParseTree)}
384 * set the parent pointer, but the parent pointer is implementation dependent
385 * and currently there is no setParent() in {@link TerminalNode} (and can't
386 * add method in Java 1.7 without breaking backward compatibility).
390 tree::TerminalNode *createTerminalNode(Token *t);
392 /** How to create an error node, given a token, associated with a parent.
393 * Typically, the error node to create is not a function of the parent
394 * but this method must still set the parent pointer of the terminal node
395 * returned. I would prefer having {@link ParserRuleContext#addAnyChild(ParseTree)}
396 * set the parent pointer, but the parent pointer is implementation dependent
397 * and currently there is no setParent() in {@link ErrorNode} (and can't
398 * add method in Java 1.7 without breaking backward compatibility).
402 tree::ErrorNode *createErrorNode(Token *t);
405 /// The ParserRuleContext object for the currently executing rule.
406 /// This is always non-null during the parsing process.
407 // ml: this is one of the contexts tracked in _allocatedContexts.
408 ParserRuleContext *_ctx;
410 /// The error handling strategy for the parser. The default is DefaultErrorStrategy.
411 /// See also getErrorHandler.
412 Ref<ANTLRErrorStrategy> _errHandler;
415 /// The input stream.
417 /// <seealso cref= #getInputStream </seealso>
418 /// <seealso cref= #setInputStream </seealso>
421 std::vector<int> _precedenceStack;
424 /// Specifies whether or not the parser should construct a parse tree during
425 /// the parsing process. The default value is {@code true}.
427 /// <seealso cref= #getBuildParseTree </seealso>
428 /// <seealso cref= #setBuildParseTree </seealso>
429 bool _buildParseTrees;
431 /// The list of <seealso cref="ParseTreeListener"/> listeners registered to receive
432 /// events during the parse.
433 /// <seealso cref= #addParseListener </seealso>
434 std::vector<tree::ParseTreeListener *> _parseListeners;
437 /// The number of syntax errors reported during parsing. This value is
438 /// incremented each time <seealso cref="#notifyErrorListeners"/> is called.
440 size_t _syntaxErrors;
442 /** Indicates parser has match()ed EOF token. See {@link #exitRule()}. */
445 virtual void addContextToParseTree();
447 // All rule contexts created during a parse run. This is cleared when calling reset().
448 tree::ParseTreeTracker _tracker;
451 /// This field maps from the serialized ATN string to the deserialized <seealso cref="ATN"/> with
452 /// bypass alternatives.
454 /// <seealso cref= ATNDeserializationOptions#isGenerateRuleBypassTransitions() </seealso>
455 static std::map<std::vector<uint16_t>, atn::ATN> bypassAltsAtnCache;
457 /// When setTrace(true) is called, a reference to the
458 /// TraceListener is stored here so it can be easily removed in a
459 /// later call to setTrace(false). The listener itself is
460 /// implemented as a parser listener so this field is not directly used by
461 /// other parser methods.
462 TraceListener *_tracer;
464 void InitializeInstanceFields();
467 } // namespace antlr4