gitweb.ps.run Git - toc/blob - antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/PredictionMode.h

   1 /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
   2  * Use of this file is governed by the BSD 3-clause license that
   3  * can be found in the LICENSE.txt file in the project root.
   4  */
   5
   6 #pragma once
   7
   8 #include "support/BitSet.h"
   9
  10 namespace antlr4 {
  11 namespace atn {
  12
  13   /**
  14    * This enumeration defines the prediction modes available in ANTLR 4 along with
  15    * utility methods for analyzing configuration sets for conflicts and/or
  16    * ambiguities.
  17    */
  18   enum class PredictionMode {
  19     /**
  20      * The SLL(*) prediction mode. This prediction mode ignores the current
  21      * parser context when making predictions. This is the fastest prediction
  22      * mode, and provides correct results for many grammars. This prediction
  23      * mode is more powerful than the prediction mode provided by ANTLR 3, but
  24      * may result in syntax errors for grammar and input combinations which are
  25      * not SLL.
  26      *
  27      * <p>
  28      * When using this prediction mode, the parser will either return a correct
  29      * parse tree (i.e. the same parse tree that would be returned with the
  30      * {@link #LL} prediction mode), or it will report a syntax error. If a
  31      * syntax error is encountered when using the {@link #SLL} prediction mode,
  32      * it may be due to either an actual syntax error in the input or indicate
  33      * that the particular combination of grammar and input requires the more
  34      * powerful {@link #LL} prediction abilities to complete successfully.</p>
  35      *
  36      * <p>
  37      * This prediction mode does not provide any guarantees for prediction
  38      * behavior for syntactically-incorrect inputs.</p>
  39      */
  40     SLL,
  41
  42     /**
  43      * The LL(*) prediction mode. This prediction mode allows the current parser
  44      * context to be used for resolving SLL conflicts that occur during
  45      * prediction. This is the fastest prediction mode that guarantees correct
  46      * parse results for all combinations of grammars with syntactically correct
  47      * inputs.
  48      *
  49      * <p>
  50      * When using this prediction mode, the parser will make correct decisions
  51      * for all syntactically-correct grammar and input combinations. However, in
  52      * cases where the grammar is truly ambiguous this prediction mode might not
  53      * report a precise answer for <em>exactly which</em> alternatives are
  54      * ambiguous.</p>
  55      *
  56      * <p>
  57      * This prediction mode does not provide any guarantees for prediction
  58      * behavior for syntactically-incorrect inputs.</p>
  59      */
  60     LL,
  61
  62     /**
  63      * The LL(*) prediction mode with exact ambiguity detection. In addition to
  64      * the correctness guarantees provided by the {@link #LL} prediction mode,
  65      * this prediction mode instructs the prediction algorithm to determine the
  66      * complete and exact set of ambiguous alternatives for every ambiguous
  67      * decision encountered while parsing.
  68      *
  69      * <p>
  70      * This prediction mode may be used for diagnosing ambiguities during
  71      * grammar development. Due to the performance overhead of calculating sets
  72      * of ambiguous alternatives, this prediction mode should be avoided when
  73      * the exact results are not necessary.</p>
  74      *
  75      * <p>
  76      * This prediction mode does not provide any guarantees for prediction
  77      * behavior for syntactically-incorrect inputs.</p>
  78      */
  79     LL_EXACT_AMBIG_DETECTION
  80   };
  81
  82   class ANTLR4CPP_PUBLIC PredictionModeClass {
  83   public:
  84     /**
  85      * Computes the SLL prediction termination condition.
  86      *
  87      * <p>
  88      * This method computes the SLL prediction termination condition for both of
  89      * the following cases.</p>
  90      *
  91      * <ul>
  92      * <li>The usual SLL+LL fallback upon SLL conflict</li>
  93      * <li>Pure SLL without LL fallback</li>
  94      * </ul>
  95      *
  96      * <p><strong>COMBINED SLL+LL PARSING</strong></p>
  97      *
  98      * <p>When LL-fallback is enabled upon SLL conflict, correct predictions are
  99      * ensured regardless of how the termination condition is computed by this
 100      * method. Due to the substantially higher cost of LL prediction, the
 101      * prediction should only fall back to LL when the additional lookahead
 102      * cannot lead to a unique SLL prediction.</p>
 103      *
 104      * <p>Assuming combined SLL+LL parsing, an SLL configuration set with only
 105      * conflicting subsets should fall back to full LL, even if the
 106      * configuration sets don't resolve to the same alternative (e.g.
 107      * {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting
 108      * configuration, SLL could continue with the hopes that more lookahead will
 109      * resolve via one of those non-conflicting configurations.</p>
 110      *
 111      * <p>Here's the prediction termination rule them: SLL (for SLL+LL parsing)
 112      * stops when it sees only conflicting configuration subsets. In contrast,
 113      * full LL keeps going when there is uncertainty.</p>
 114      *
 115      * <p><strong>HEURISTIC</strong></p>
 116      *
 117      * <p>As a heuristic, we stop prediction when we see any conflicting subset
 118      * unless we see a state that only has one alternative associated with it.
 119      * The single-alt-state thing lets prediction continue upon rules like
 120      * (otherwise, it would admit defeat too soon):</p>
 121      *
 122      * <p>{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}</p>
 123      *
 124      * <p>When the ATN simulation reaches the state before {@code ';'}, it has a
 125      * DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally
 126      * {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop
 127      * processing this node because alternative to has another way to continue,
 128      * via {@code [6|2|[]]}.</p>
 129      *
 130      * <p>It also let's us continue for this rule:</p>
 131      *
 132      * <p>{@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}</p>
 133      *
 134      * <p>After matching input A, we reach the stop state for rule A, state 1.
 135      * State 8 is the state right before B. Clearly alternatives 1 and 2
 136      * conflict and no amount of further lookahead will separate the two.
 137      * However, alternative 3 will be able to continue and so we do not stop
 138      * working on this state. In the previous example, we're concerned with
 139      * states associated with the conflicting alternatives. Here alt 3 is not
 140      * associated with the conflicting configs, but since we can continue
 141      * looking for input reasonably, don't declare the state done.</p>
 142      *
 143      * <p><strong>PURE SLL PARSING</strong></p>
 144      *
 145      * <p>To handle pure SLL parsing, all we have to do is make sure that we
 146      * combine stack contexts for configurations that differ only by semantic
 147      * predicate. From there, we can do the usual SLL termination heuristic.</p>
 148      *
 149      * <p><strong>PREDICATES IN SLL+LL PARSING</strong></p>
 150      *
 151      * <p>SLL decisions don't evaluate predicates until after they reach DFA stop
 152      * states because they need to create the DFA cache that works in all
 153      * semantic situations. In contrast, full LL evaluates predicates collected
 154      * during start state computation so it can ignore predicates thereafter.
 155      * This means that SLL termination detection can totally ignore semantic
 156      * predicates.</p>
 157      *
 158      * <p>Implementation-wise, {@link ATNConfigSet} combines stack contexts but not
 159      * semantic predicate contexts so we might see two configurations like the
 160      * following.</p>
 161      *
 162      * <p>{@code (s, 1, x, {}), (s, 1, x', {p})}</p>
 163      *
 164      * <p>Before testing these configurations against others, we have to merge
 165      * {@code x} and {@code x'} (without modifying the existing configurations).
 166      * For example, we test {@code (x+x')==x''} when looking for conflicts in
 167      * the following configurations.</p>
 168      *
 169      * <p>{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}</p>
 170      *
 171      * <p>If the configuration set has predicates (as indicated by
 172      * {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of
 173      * the configurations to strip out all of the predicates so that a standard
 174      * {@link ATNConfigSet} will merge everything ignoring predicates.</p>
 175      */
 176     static bool hasSLLConflictTerminatingPrediction(PredictionMode mode, ATNConfigSet *configs);
 177
 178     /// <summary>
 179     /// Checks if any configuration in {@code configs} is in a
 180     /// <seealso cref="RuleStopState"/>. Configurations meeting this condition have
 181     /// reached
 182     /// the end of the decision rule (local context) or end of start rule (full
 183     /// context).
 184     /// </summary>
 185     /// <param name="configs"> the configuration set to test </param>
 186     /// <returns> {@code true} if any configuration in {@code configs} is in a
 187     /// <seealso cref="RuleStopState"/>, otherwise {@code false} </returns>
 188     static bool hasConfigInRuleStopState(ATNConfigSet *configs);
 189
 190     /// <summary>
 191     /// Checks if all configurations in {@code configs} are in a
 192     /// <seealso cref="RuleStopState"/>. Configurations meeting this condition have
 193     /// reached
 194     /// the end of the decision rule (local context) or end of start rule (full
 195     /// context).
 196     /// </summary>
 197     /// <param name="configs"> the configuration set to test </param>
 198     /// <returns> {@code true} if all configurations in {@code configs} are in a
 199     /// <seealso cref="RuleStopState"/>, otherwise {@code false} </returns>
 200     static bool allConfigsInRuleStopStates(ATNConfigSet *configs);
 201
 202     /**
 203      * Full LL prediction termination.
 204      *
 205      * <p>Can we stop looking ahead during ATN simulation or is there some
 206      * uncertainty as to which alternative we will ultimately pick, after
 207      * consuming more input? Even if there are partial conflicts, we might know
 208      * that everything is going to resolve to the same minimum alternative. That
 209      * means we can stop since no more lookahead will change that fact. On the
 210      * other hand, there might be multiple conflicts that resolve to different
 211      * minimums. That means we need more look ahead to decide which of those
 212      * alternatives we should predict.</p>
 213      *
 214      * <p>The basic idea is to split the set of configurations {@code C}, into
 215      * conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with
 216      * non-conflicting configurations. Two configurations conflict if they have
 217      * identical {@link ATNConfig#state} and {@link ATNConfig#context} values
 218      * but different {@link ATNConfig#alt} value, e.g. {@code (s, i, ctx, _)}
 219      * and {@code (s, j, ctx, _)} for {@code i!=j}.</p>
 220      *
 221      * <p>Reduce these configuration subsets to the set of possible alternatives.
 222      * You can compute the alternative subsets in one pass as follows:</p>
 223      *
 224      * <p>{@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in
 225      * {@code C} holding {@code s} and {@code ctx} fixed.</p>
 226      *
 227      * <p>Or in pseudo-code, for each configuration {@code c} in {@code C}:</p>
 228      *
 229      * <pre>
 230      * map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
 231      * alt and not pred
 232      * </pre>
 233      *
 234      * <p>The values in {@code map} are the set of {@code A_s,ctx} sets.</p>
 235      *
 236      * <p>If {@code |A_s,ctx|=1} then there is no conflict associated with
 237      * {@code s} and {@code ctx}.</p>
 238      *
 239      * <p>Reduce the subsets to singletons by choosing a minimum of each subset. If
 240      * the union of these alternative subsets is a singleton, then no amount of
 241      * more lookahead will help us. We will always pick that alternative. If,
 242      * however, there is more than one alternative, then we are uncertain which
 243      * alternative to predict and must continue looking for resolution. We may
 244      * or may not discover an ambiguity in the future, even if there are no
 245      * conflicting subsets this round.</p>
 246      *
 247      * <p>The biggest sin is to terminate early because it means we've made a
 248      * decision but were uncertain as to the eventual outcome. We haven't used
 249      * enough lookahead. On the other hand, announcing a conflict too late is no
 250      * big deal; you will still have the conflict. It's just inefficient. It
 251      * might even look until the end of file.</p>
 252      *
 253      * <p>No special consideration for semantic predicates is required because
 254      * predicates are evaluated on-the-fly for full LL prediction, ensuring that
 255      * no configuration contains a semantic context during the termination
 256      * check.</p>
 257      *
 258      * <p><strong>CONFLICTING CONFIGS</strong></p>
 259      *
 260      * <p>Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict
 261      * when {@code i!=j} but {@code x=x'}. Because we merge all
 262      * {@code (s, i, _)} configurations together, that means that there are at
 263      * most {@code n} configurations associated with state {@code s} for
 264      * {@code n} possible alternatives in the decision. The merged stacks
 265      * complicate the comparison of configuration contexts {@code x} and
 266      * {@code x'}. Sam checks to see if one is a subset of the other by calling
 267      * merge and checking to see if the merged result is either {@code x} or
 268      * {@code x'}. If the {@code x} associated with lowest alternative {@code i}
 269      * is the superset, then {@code i} is the only possible prediction since the
 270      * others resolve to {@code min(i)} as well. However, if {@code x} is
 271      * associated with {@code j>i} then at least one stack configuration for
 272      * {@code j} is not in conflict with alternative {@code i}. The algorithm
 273      * should keep going, looking for more lookahead due to the uncertainty.</p>
 274      *
 275      * <p>For simplicity, I'm doing a equality check between {@code x} and
 276      * {@code x'} that lets the algorithm continue to consume lookahead longer
 277      * than necessary. The reason I like the equality is of course the
 278      * simplicity but also because that is the test you need to detect the
 279      * alternatives that are actually in conflict.</p>
 280      *
 281      * <p><strong>CONTINUE/STOP RULE</strong></p>
 282      *
 283      * <p>Continue if union of resolved alternative sets from non-conflicting and
 284      * conflicting alternative subsets has more than one alternative. We are
 285      * uncertain about which alternative to predict.</p>
 286      *
 287      * <p>The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which
 288      * alternatives are still in the running for the amount of input we've
 289      * consumed at this point. The conflicting sets let us to strip away
 290      * configurations that won't lead to more states because we resolve
 291      * conflicts to the configuration with a minimum alternate for the
 292      * conflicting set.</p>
 293      *
 294      * <p><strong>CASES</strong></p>
 295      *
 296      * <ul>
 297      *
 298      * <li>no conflicts and more than 1 alternative in set =&gt; continue</li>
 299      *
 300      * <li> {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)},
 301      * {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set
 302      * {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
 303      * {@code {1,3}} =&gt; continue
 304      * </li>
 305      *
 306      * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
 307      * {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set
 308      * {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
 309      * {@code {1}} =&gt; stop and predict 1</li>
 310      *
 311      * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
 312      * {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U
 313      * {@code {1}} = {@code {1}} =&gt; stop and predict 1, can announce
 314      * ambiguity {@code {1,2}}</li>
 315      *
 316      * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)},
 317      * {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U
 318      * {@code {2}} = {@code {1,2}} =&gt; continue</li>
 319      *
 320      * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)},
 321      * {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U
 322      * {@code {3}} = {@code {1,3}} =&gt; continue</li>
 323      *
 324      * </ul>
 325      *
 326      * <p><strong>EXACT AMBIGUITY DETECTION</strong></p>
 327      *
 328      * <p>If all states report the same conflicting set of alternatives, then we
 329      * know we have the exact ambiguity set.</p>
 330      *
 331      * <p><code>|A_<em>i</em>|&gt;1</code> and
 332      * <code>A_<em>i</em> = A_<em>j</em></code> for all <em>i</em>, <em>j</em>.</p>
 333      *
 334      * <p>In other words, we continue examining lookahead until all {@code A_i}
 335      * have more than one alternative and all {@code A_i} are the same. If
 336      * {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate
 337      * because the resolved set is {@code {1}}. To determine what the real
 338      * ambiguity is, we have to know whether the ambiguity is between one and
 339      * two or one and three so we keep going. We can only stop prediction when
 340      * we need exact ambiguity detection when the sets look like
 341      * {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...</p>
 342      */
 343     static size_t resolvesToJustOneViableAlt(const std::vector<antlrcpp::BitSet> &altsets);
 344
 345     /// <summary>
 346     /// Determines if every alternative subset in {@code altsets} contains more
 347     /// than one alternative.
 348     /// </summary>
 349     /// <param name="altsets"> a collection of alternative subsets </param>
 350     /// <returns> {@code true} if every <seealso cref="BitSet"/> in {@code altsets}
 351     /// has
 352     /// <seealso cref="BitSet#cardinality cardinality"/> &gt; 1, otherwise {@code
 353     /// false} </returns>
 354     static bool allSubsetsConflict(const std::vector<antlrcpp::BitSet> &altsets);
 355
 356     /// <summary>
 357     /// Determines if any single alternative subset in {@code altsets} contains
 358     /// exactly one alternative.
 359     /// </summary>
 360     /// <param name="altsets"> a collection of alternative subsets </param>
 361     /// <returns> {@code true} if {@code altsets} contains a <seealso
 362     /// cref="BitSet"/> with
 363     /// <seealso cref="BitSet#cardinality cardinality"/> 1, otherwise {@code false}
 364     /// </returns>
 365     static bool hasNonConflictingAltSet(const std::vector<antlrcpp::BitSet> &altsets);
 366
 367     /// <summary>
 368     /// Determines if any single alternative subset in {@code altsets} contains
 369     /// more than one alternative.
 370     /// </summary>
 371     /// <param name="altsets"> a collection of alternative subsets </param>
 372     /// <returns> {@code true} if {@code altsets} contains a <seealso
 373     /// cref="BitSet"/> with
 374     /// <seealso cref="BitSet#cardinality cardinality"/> &gt; 1, otherwise {@code
 375     /// false} </returns>
 376     static bool hasConflictingAltSet(const std::vector<antlrcpp::BitSet> &altsets);
 377
 378     /// <summary>
 379     /// Determines if every alternative subset in {@code altsets} is equivalent.
 380     /// </summary>
 381     /// <param name="altsets"> a collection of alternative subsets </param>
 382     /// <returns> {@code true} if every member of {@code altsets} is equal to the
 383     /// others, otherwise {@code false} </returns>
 384     static bool allSubsetsEqual(const std::vector<antlrcpp::BitSet> &altsets);
 385
 386     /// <summary>
 387     /// Returns the unique alternative predicted by all alternative subsets in
 388     /// {@code altsets}. If no such alternative exists, this method returns
 389     /// <seealso cref="ATN#INVALID_ALT_NUMBER"/>.
 390     /// </summary>
 391     /// <param name="altsets"> a collection of alternative subsets </param>
 392     static size_t getUniqueAlt(const std::vector<antlrcpp::BitSet> &altsets);
 393
 394     /// <summary>
 395     /// Gets the complete set of represented alternatives for a collection of
 396     /// alternative subsets. This method returns the union of each <seealso
 397     /// cref="BitSet"/>
 398     /// in {@code altsets}.
 399     /// </summary>
 400     /// <param name="altsets"> a collection of alternative subsets </param>
 401     /// <returns> the set of represented alternatives in {@code altsets} </returns>
 402     static antlrcpp::BitSet getAlts(const std::vector<antlrcpp::BitSet> &altsets);
 403
 404     /** Get union of all alts from configs. @since 4.5.1 */
 405     static antlrcpp::BitSet getAlts(ATNConfigSet *configs);
 406
 407     /// <summary>
 408     /// This function gets the conflicting alt subsets from a configuration set.
 409     /// For each configuration {@code c} in {@code configs}:
 410     ///
 411     /// <pre>
 412     /// map[c] U= c.<seealso cref="ATNConfig#alt alt"/> # map hash/equals uses s and
 413     /// x, not
 414     /// alt and not pred
 415     /// </pre>
 416     /// </summary>
 417     static std::vector<antlrcpp::BitSet> getConflictingAltSubsets(ATNConfigSet *configs);
 418
 419     /// <summary>
 420     /// Get a map from state to alt subset from a configuration set. For each
 421     /// configuration {@code c} in {@code configs}:
 422     ///
 423     /// <pre>
 424     /// map[c.<seealso cref="ATNConfig#state state"/>] U= c.<seealso
 425     /// cref="ATNConfig#alt alt"/>
 426     /// </pre>
 427     /// </summary>
 428     static std::map<ATNState*, antlrcpp::BitSet> getStateToAltMap(ATNConfigSet *configs);
 429
 430     static bool hasStateAssociatedWithOneAlt(ATNConfigSet *configs);
 431
 432     static size_t getSingleViableAlt(const std::vector<antlrcpp::BitSet> &altsets);
 433   };
 434
 435 } // namespace atn
 436 } // namespace antlr4