1 /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
2 * Use of this file is governed by the BSD 3-clause license that
3 * can be found in the LICENSE.txt file in the project root.
8 #include "ANTLRErrorStrategy.h"
9 #include "misc/IntervalSet.h"
14 * This is the default implementation of {@link ANTLRErrorStrategy} used for
15 * error reporting and recovery in ANTLR parsers.
17 class ANTLR4CPP_PUBLIC DefaultErrorStrategy : public ANTLRErrorStrategy {
19 DefaultErrorStrategy();
20 DefaultErrorStrategy(DefaultErrorStrategy const& other) = delete;
21 virtual ~DefaultErrorStrategy();
23 DefaultErrorStrategy& operator = (DefaultErrorStrategy const& other) = delete;
27 * Indicates whether the error strategy is currently "recovering from an
28 * error". This is used to suppress reporting multiple error messages while
29 * attempting to recover from a detected syntax error.
31 * @see #inErrorRecoveryMode
33 bool errorRecoveryMode;
35 /** The index into the input stream where the last error occurred.
36 * This is used to prevent infinite loops where an error is found
37 * but no token is consumed during recovery...another error is found,
38 * ad nauseum. This is a failsafe mechanism to guarantee that at least
39 * one token/tree node is consumed for two errors.
43 misc::IntervalSet lastErrorStates;
48 /// The default implementation simply calls <seealso cref="#endErrorCondition"/> to
49 /// ensure that the handler is not in error recovery mode.
52 virtual void reset(Parser *recognizer) override;
55 /// This method is called to enter error recovery mode when a recognition
56 /// exception is reported.
58 /// <param name="recognizer"> the parser instance </param>
60 virtual void beginErrorCondition(Parser *recognizer);
66 virtual bool inErrorRecoveryMode(Parser *recognizer) override;
69 /// This method is called to leave error recovery mode after recovering from
70 /// a recognition exception.
72 /// <param name="recognizer"> </param>
74 virtual void endErrorCondition(Parser *recognizer);
79 /// The default implementation simply calls <seealso cref="#endErrorCondition"/>.
82 virtual void reportMatch(Parser *recognizer) override;
86 /// The default implementation returns immediately if the handler is already
87 /// in error recovery mode. Otherwise, it calls <seealso cref="#beginErrorCondition"/>
88 /// and dispatches the reporting task based on the runtime type of {@code e}
89 /// according to the following table.
92 /// <li><seealso cref="NoViableAltException"/>: Dispatches the call to
93 /// <seealso cref="#reportNoViableAlternative"/></li>
94 /// <li><seealso cref="InputMismatchException"/>: Dispatches the call to
95 /// <seealso cref="#reportInputMismatch"/></li>
96 /// <li><seealso cref="FailedPredicateException"/>: Dispatches the call to
97 /// <seealso cref="#reportFailedPredicate"/></li>
98 /// <li>All other types: calls <seealso cref="Parser#notifyErrorListeners"/> to report
99 /// the exception</li>
101 virtual void reportError(Parser *recognizer, const RecognitionException &e) override;
106 /// The default implementation resynchronizes the parser by consuming tokens
107 /// until we find one in the resynchronization set--loosely the set of tokens
108 /// that can follow the current rule.
110 virtual void recover(Parser *recognizer, std::exception_ptr e) override;
113 * The default implementation of {@link ANTLRErrorStrategy#sync} makes sure
114 * that the current lookahead symbol is consistent with what were expecting
115 * at this point in the ATN. You can call this anytime but ANTLR only
116 * generates code to check before subrules/loops and each iteration.
118 * <p>Implements Jim Idle's magic sync mechanism in closures and optional
119 * subrules. E.g.,</p>
122 * a : sync ( stuff sync )* ;
123 * sync : {consume to what can follow sync} ;
126 * At the start of a sub rule upon error, {@link #sync} performs single
127 * token deletion, if possible. If it can't do that, it bails on the current
128 * rule and uses the default error recovery, which consumes until the
129 * resynchronization set of the current rule.
131 * <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block
132 * with an empty alternative), then the expected set includes what follows
135 * <p>During loop iteration, it consumes until it sees a token that can start a
136 * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
137 * stay in the loop as long as possible.</p>
139 * <p><strong>ORIGINS</strong></p>
141 * <p>Previous versions of ANTLR did a poor job of their recovery within loops.
142 * A single mismatch token or missing token would force the parser to bail
143 * out of the entire rules surrounding the loop. So, for rule</p>
146 * classDef : 'class' ID '{' member* '}'
149 * input with an extra token between members would force the parser to
150 * consume until it found the next class definition rather than the next
151 * member definition of the current class.
153 * <p>This functionality cost a little bit of effort because the parser has to
154 * compare token set at the start of the loop and at each iteration. If for
155 * some reason speed is suffering for you, you can turn off this
156 * functionality by simply overriding this method as a blank { }.</p>
158 virtual void sync(Parser *recognizer) override;
161 /// This is called by <seealso cref="#reportError"/> when the exception is a
162 /// <seealso cref="NoViableAltException"/>.
164 /// <seealso cref= #reportError
166 /// <param name="recognizer"> the parser instance </param>
167 /// <param name="e"> the recognition exception </param>
169 virtual void reportNoViableAlternative(Parser *recognizer, const NoViableAltException &e);
172 /// This is called by <seealso cref="#reportError"/> when the exception is an
173 /// <seealso cref="InputMismatchException"/>.
175 /// <seealso cref= #reportError
177 /// <param name="recognizer"> the parser instance </param>
178 /// <param name="e"> the recognition exception </param>
179 virtual void reportInputMismatch(Parser *recognizer, const InputMismatchException &e);
182 /// This is called by <seealso cref="#reportError"/> when the exception is a
183 /// <seealso cref="FailedPredicateException"/>.
185 /// <seealso cref= #reportError
187 /// <param name="recognizer"> the parser instance </param>
188 /// <param name="e"> the recognition exception </param>
189 virtual void reportFailedPredicate(Parser *recognizer, const FailedPredicateException &e);
192 * This method is called to report a syntax error which requires the removal
193 * of a token from the input stream. At the time this method is called, the
194 * erroneous symbol is current {@code LT(1)} symbol and has not yet been
195 * removed from the input stream. When this method returns,
196 * {@code recognizer} is in error recovery mode.
198 * <p>This method is called when {@link #singleTokenDeletion} identifies
199 * single-token deletion as a viable recovery strategy for a mismatched
202 * <p>The default implementation simply returns if the handler is already in
203 * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
204 * enter error recovery mode, followed by calling
205 * {@link Parser#notifyErrorListeners}.</p>
207 * @param recognizer the parser instance
209 virtual void reportUnwantedToken(Parser *recognizer);
212 * This method is called to report a syntax error which requires the
213 * insertion of a missing token into the input stream. At the time this
214 * method is called, the missing token has not yet been inserted. When this
215 * method returns, {@code recognizer} is in error recovery mode.
217 * <p>This method is called when {@link #singleTokenInsertion} identifies
218 * single-token insertion as a viable recovery strategy for a mismatched
221 * <p>The default implementation simply returns if the handler is already in
222 * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
223 * enter error recovery mode, followed by calling
224 * {@link Parser#notifyErrorListeners}.</p>
226 * @param recognizer the parser instance
228 virtual void reportMissingToken(Parser *recognizer);
234 * <p>The default implementation attempts to recover from the mismatched input
235 * by using single token insertion and deletion as described below. If the
236 * recovery attempt fails, this method throws an
237 * {@link InputMismatchException}.</p>
239 * <p><strong>EXTRA TOKEN</strong> (single token deletion)</p>
241 * <p>{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the
242 * right token, however, then assume {@code LA(1)} is some extra spurious
243 * token and delete it. Then consume and return the next token (which was
244 * the {@code LA(2)} token) as the successful result of the match operation.</p>
246 * <p>This recovery strategy is implemented by {@link #singleTokenDeletion}.</p>
248 * <p><strong>MISSING TOKEN</strong> (single token insertion)</p>
250 * <p>If current token (at {@code LA(1)}) is consistent with what could come
251 * after the expected {@code LA(1)} token, then assume the token is missing
252 * and use the parser's {@link TokenFactory} to create it on the fly. The
253 * "insertion" is performed by returning the created token as the successful
254 * result of the match operation.</p>
256 * <p>This recovery strategy is implemented by {@link #singleTokenInsertion}.</p>
258 * <p><strong>EXAMPLE</strong></p>
260 * <p>For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When
261 * the parser returns from the nested call to {@code expr}, it will have
265 * stat → expr → atom
268 * and it will be trying to match the {@code ')'} at this point in the
272 * => ID '=' '(' INT ')' ('+' atom)* ';'
276 * The attempt to match {@code ')'} will fail when it sees {@code ';'} and
277 * call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'}
278 * is in the set of tokens that can follow the {@code ')'} token reference
279 * in rule {@code atom}. It can assume that you forgot the {@code ')'}.
281 virtual Token* recoverInline(Parser *recognizer) override;
284 /// This method implements the single-token insertion inline error recovery
285 /// strategy. It is called by <seealso cref="#recoverInline"/> if the single-token
286 /// deletion strategy fails to recover from the mismatched input. If this
287 /// method returns {@code true}, {@code recognizer} will be in error recovery
290 /// This method determines whether or not single-token insertion is viable by
291 /// checking if the {@code LA(1)} input symbol could be successfully matched
292 /// if it were instead the {@code LA(2)} symbol. If this method returns
293 /// {@code true}, the caller is responsible for creating and inserting a
294 /// token with the correct type to produce this behavior.
296 /// <param name="recognizer"> the parser instance </param>
297 /// <returns> {@code true} if single-token insertion is a viable recovery
298 /// strategy for the current mismatched input, otherwise {@code false} </returns>
300 virtual bool singleTokenInsertion(Parser *recognizer);
303 /// This method implements the single-token deletion inline error recovery
304 /// strategy. It is called by <seealso cref="#recoverInline"/> to attempt to recover
305 /// from mismatched input. If this method returns null, the parser and error
306 /// handler state will not have changed. If this method returns non-null,
307 /// {@code recognizer} will <em>not</em> be in error recovery mode since the
308 /// returned token was a successful match.
310 /// If the single-token deletion is successful, this method calls
311 /// <seealso cref="#reportUnwantedToken"/> to report the error, followed by
312 /// <seealso cref="Parser#consume"/> to actually "delete" the extraneous token. Then,
313 /// before returning <seealso cref="#reportMatch"/> is called to signal a successful
316 /// <param name="recognizer"> the parser instance </param>
317 /// <returns> the successfully matched <seealso cref="Token"/> instance if single-token
318 /// deletion successfully recovers from the mismatched input, otherwise
319 /// {@code null} </returns>
320 virtual Token* singleTokenDeletion(Parser *recognizer);
323 /// Conjure up a missing token during error recovery.
325 /// The recognizer attempts to recover from single missing
326 /// symbols. But, actions might refer to that missing symbol.
327 /// For example, x=ID {f($x);}. The action clearly assumes
328 /// that there has been an identifier matched previously and that
329 /// $x points at that token. If that token is missing, but
330 /// the next token in the stream is what we want we assume that
331 /// this token is missing and we keep going. Because we
332 /// have to return some token to replace the missing token,
333 /// we have to conjure one up. This method gives the user control
334 /// over the tokens returned for missing tokens. Mostly,
335 /// you will want to create something special for identifier
336 /// tokens. For literals such as '{' and ',', the default
337 /// action in the parser or tree parser works. It simply creates
338 /// a CommonToken of the appropriate type. The text will be the token.
339 /// If you change what tokens must be created by the lexer,
340 /// override this method to create the appropriate tokens.
342 virtual Token* getMissingSymbol(Parser *recognizer);
344 virtual misc::IntervalSet getExpectedTokens(Parser *recognizer);
347 /// How should a token be displayed in an error message? The default
348 /// is to display just the text, but during development you might
349 /// want to have a lot of information spit out. Override in that case
350 /// to use t.toString() (which, for CommonToken, dumps everything about
351 /// the token). This is better than forcing you to override a method in
352 /// your token objects because you don't have to go modify your lexer
353 /// so that it creates a new class.
355 virtual std::string getTokenErrorDisplay(Token *t);
357 virtual std::string getSymbolText(Token *symbol);
359 virtual size_t getSymbolType(Token *symbol);
361 virtual std::string escapeWSAndQuote(const std::string &s) const;
363 /* Compute the error recovery set for the current rule. During
364 * rule invocation, the parser pushes the set of tokens that can
365 * follow that rule reference on the stack; this amounts to
366 * computing FIRST of what follows the rule reference in the
367 * enclosing rule. See LinearApproximator.FIRST().
368 * This local follow set only includes tokens
369 * from within the rule; i.e., the FIRST computation done by
370 * ANTLR stops at the end of a rule.
374 * When you find a "no viable alt exception", the input is not
375 * consistent with any of the alternatives for rule r. The best
376 * thing to do is to consume tokens until you see something that
377 * can legally follow a call to r *or* any rule that called r.
378 * You don't want the exact set of viable next tokens because the
379 * input might just be missing a token--you might consume the
380 * rest of the input looking for one of the missing tokens.
392 * At each rule invocation, the set of tokens that could follow
393 * that rule is pushed on a stack. Here are the various
394 * context-sensitive follow sets:
396 * FOLLOW(b1_in_a) = FIRST(']') = ']'
397 * FOLLOW(b2_in_a) = FIRST(')') = ')'
398 * FOLLOW(c_in_b) = FIRST('^') = '^'
400 * Upon erroneous input "[]", the call chain is
404 * and, hence, the follow context stack is:
406 * depth follow set start of rule execution
407 * 0 <EOF> a (from main())
411 * Notice that ')' is not included, because b would have to have
412 * been called from a different context in rule a for ')' to be
415 * For error recovery, we cannot consider FOLLOW(c)
416 * (context-sensitive or otherwise). We need the combined set of
417 * all context-sensitive FOLLOW sets--the set of all tokens that
418 * could follow any reference in the call chain. We need to
419 * resync to one of those tokens. Note that FOLLOW(c)='^' and if
420 * we resync'd to that token, we'd consume until EOF. We need to
421 * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
422 * In this case, for input "[]", LA(1) is ']' and in the set, so we would
423 * not consume anything. After printing an error, rule c would
424 * return normally. Rule b would not find the required '^' though.
425 * At this point, it gets a mismatched token error and throws an
426 * exception (since LA(1) is not in the viable following token
427 * set). The rule exception handler tries to recover, but finds
428 * the same recovery set and doesn't consume anything. Rule b
429 * exits normally returning to rule a. Now it finds the ']' (and
430 * with the successful match exits errorRecovery mode).
432 * So, you can see that the parser walks up the call chain looking
433 * for the token that was a member of the recovery set.
435 * Errors are not generated in errorRecovery mode.
437 * ANTLR's error recovery mechanism is based upon original ideas:
439 * "Algorithms + Data Structures = Programs" by Niklaus Wirth
443 * "A note on error recovery in recursive descent parsers":
444 * http://portal.acm.org/citation.cfm?id=947902.947905
446 * Later, Josef Grosch had some good ideas:
448 * "Efficient and Comfortable Error Recovery in Recursive Descent
450 * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
452 * Like Grosch I implement context-sensitive FOLLOW sets that are combined
453 * at run-time upon error to avoid overhead during parsing.
455 virtual misc::IntervalSet getErrorRecoverySet(Parser *recognizer);
458 /// Consume tokens until one matches the given token set. </summary>
459 virtual void consumeUntil(Parser *recognizer, const misc::IntervalSet &set);
462 std::vector<std::unique_ptr<Token>> _errorSymbols; // Temporarily created token.
463 void InitializeInstanceFields();
466 } // namespace antlr4