gitweb.ps.run Git - toc/blob - antlr4-cpp-runtime-4.9.2-source/runtime/src/atn/LexerATNSimulator.h

   1 /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
   2  * Use of this file is governed by the BSD 3-clause license that
   3  * can be found in the LICENSE.txt file in the project root.
   4  */
   5
   6 #pragma once
   7
   8 #include "atn/ATNSimulator.h"
   9 #include "atn/LexerATNConfig.h"
  10 #include "atn/ATNConfigSet.h"
  11
  12 namespace antlr4 {
  13 namespace atn {
  14
  15   /// "dup" of ParserInterpreter
  16   class ANTLR4CPP_PUBLIC LexerATNSimulator : public ATNSimulator {
  17   protected:
  18     class SimState {
  19     public:
  20       virtual ~SimState();
  21
  22     protected:
  23       size_t index;
  24       size_t line;
  25       size_t charPos;
  26       dfa::DFAState *dfaState;
  27       virtual void reset();
  28       friend class LexerATNSimulator;
  29
  30     private:
  31       void InitializeInstanceFields();
  32
  33     public:
  34       SimState() {
  35         InitializeInstanceFields();
  36       }
  37     };
  38
  39
  40   public:
  41 #if __cplusplus >= 201703L
  42     static constexpr size_t MIN_DFA_EDGE = 0;
  43     static constexpr size_t MAX_DFA_EDGE = 127; // forces unicode to stay in ATN
  44 #else
  45     enum : size_t {
  46       MIN_DFA_EDGE = 0,
  47       MAX_DFA_EDGE = 127, // forces unicode to stay in ATN
  48     };
  49 #endif
  50
  51   protected:
  52     /// <summary>
  53     /// When we hit an accept state in either the DFA or the ATN, we
  54     ///  have to notify the character stream to start buffering characters
  55     ///  via <seealso cref="IntStream#mark"/> and record the current state. The current sim state
  56     ///  includes the current index into the input, the current line,
  57     ///  and current character position in that line. Note that the Lexer is
  58     ///  tracking the starting line and characterization of the token. These
  59     ///  variables track the "state" of the simulator when it hits an accept state.
  60     /// <p/>
  61     ///  We track these variables separately for the DFA and ATN simulation
  62     ///  because the DFA simulation often has to fail over to the ATN
  63     ///  simulation. If the ATN simulation fails, we need the DFA to fall
  64     ///  back to its previously accepted state, if any. If the ATN succeeds,
  65     ///  then the ATN does the accept and the DFA simulator that invoked it
  66     ///  can simply return the predicted token type.
  67     /// </summary>
  68     Lexer *const _recog;
  69
  70     /// The current token's starting index into the character stream.
  71     ///  Shared across DFA to ATN simulation in case the ATN fails and the
  72     ///  DFA did not have a previous accept state. In this case, we use the
  73     ///  ATN-generated exception object.
  74     size_t _startIndex;
  75
  76     /// line number 1..n within the input.
  77     size_t _line;
  78
  79     /// The index of the character relative to the beginning of the line 0..n-1.
  80     size_t _charPositionInLine;
  81
  82   public:
  83     std::vector<dfa::DFA> &_decisionToDFA;
  84
  85   protected:
  86     size_t _mode;
  87
  88     /// Used during DFA/ATN exec to record the most recent accept configuration info.
  89     SimState _prevAccept;
  90
  91   public:
  92     static int match_calls;
  93
  94     LexerATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache);
  95     LexerATNSimulator(Lexer *recog, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache);
  96     virtual ~LexerATNSimulator () {}
  97
  98     virtual void copyState(LexerATNSimulator *simulator);
  99     virtual size_t match(CharStream *input, size_t mode);
 100     virtual void reset() override;
 101
 102     virtual void clearDFA() override;
 103
 104   protected:
 105     virtual size_t matchATN(CharStream *input);
 106     virtual size_t execATN(CharStream *input, dfa::DFAState *ds0);
 107
 108     /// <summary>
 109     /// Get an existing target state for an edge in the DFA. If the target state
 110     /// for the edge has not yet been computed or is otherwise not available,
 111     /// this method returns {@code null}.
 112     /// </summary>
 113     /// <param name="s"> The current DFA state </param>
 114     /// <param name="t"> The next input symbol </param>
 115     /// <returns> The existing target DFA state for the given input symbol
 116     /// {@code t}, or {@code null} if the target state for this edge is not
 117     /// already cached </returns>
 118     virtual dfa::DFAState *getExistingTargetState(dfa::DFAState *s, size_t t);
 119
 120     /// <summary>
 121     /// Compute a target state for an edge in the DFA, and attempt to add the
 122     /// computed state and corresponding edge to the DFA.
 123     /// </summary>
 124     /// <param name="input"> The input stream </param>
 125     /// <param name="s"> The current DFA state </param>
 126     /// <param name="t"> The next input symbol
 127     /// </param>
 128     /// <returns> The computed target DFA state for the given input symbol
 129     /// {@code t}. If {@code t} does not lead to a valid DFA state, this method
 130     /// returns <seealso cref="#ERROR"/>. </returns>
 131     virtual dfa::DFAState *computeTargetState(CharStream *input, dfa::DFAState *s, size_t t);
 132
 133     virtual size_t failOrAccept(CharStream *input, ATNConfigSet *reach, size_t t);
 134
 135     /// <summary>
 136     /// Given a starting configuration set, figure out all ATN configurations
 137     ///  we can reach upon input {@code t}. Parameter {@code reach} is a return
 138     ///  parameter.
 139     /// </summary>
 140     void getReachableConfigSet(CharStream *input, ATNConfigSet *closure_, // closure_ as we have a closure() already
 141                                ATNConfigSet *reach, size_t t);
 142
 143     virtual void accept(CharStream *input, const Ref<LexerActionExecutor> &lexerActionExecutor, size_t startIndex, size_t index,
 144                         size_t line, size_t charPos);
 145
 146     virtual ATNState *getReachableTarget(Transition *trans, size_t t);
 147
 148     virtual std::unique_ptr<ATNConfigSet> computeStartState(CharStream *input, ATNState *p);
 149
 150     /// <summary>
 151     /// Since the alternatives within any lexer decision are ordered by
 152     /// preference, this method stops pursuing the closure as soon as an accept
 153     /// state is reached. After the first accept state is reached by depth-first
 154     /// search from {@code config}, all other (potentially reachable) states for
 155     /// this rule would have a lower priority.
 156     /// </summary>
 157     /// <returns> {@code true} if an accept state is reached, otherwise
 158     /// {@code false}. </returns>
 159     virtual bool closure(CharStream *input, const Ref<LexerATNConfig> &config, ATNConfigSet *configs,
 160                          bool currentAltReachedAcceptState, bool speculative, bool treatEofAsEpsilon);
 161
 162     // side-effect: can alter configs.hasSemanticContext
 163     virtual Ref<LexerATNConfig> getEpsilonTarget(CharStream *input, const Ref<LexerATNConfig> &config, Transition *t,
 164       ATNConfigSet *configs, bool speculative, bool treatEofAsEpsilon);
 165
 166     /// <summary>
 167     /// Evaluate a predicate specified in the lexer.
 168     /// <p/>
 169     /// If {@code speculative} is {@code true}, this method was called before
 170     /// <seealso cref="#consume"/> for the matched character. This method should call
 171     /// <seealso cref="#consume"/> before evaluating the predicate to ensure position
 172     /// sensitive values, including <seealso cref="Lexer#getText"/>, <seealso cref="Lexer#getLine"/>,
 173     /// and <seealso cref="Lexer#getCharPositionInLine"/>, properly reflect the current
 174     /// lexer state. This method should restore {@code input} and the simulator
 175     /// to the original state before returning (i.e. undo the actions made by the
 176     /// call to <seealso cref="#consume"/>.
 177     /// </summary>
 178     /// <param name="input"> The input stream. </param>
 179     /// <param name="ruleIndex"> The rule containing the predicate. </param>
 180     /// <param name="predIndex"> The index of the predicate within the rule. </param>
 181     /// <param name="speculative"> {@code true} if the current index in {@code input} is
 182     /// one character before the predicate's location.
 183     /// </param>
 184     /// <returns> {@code true} if the specified predicate evaluates to
 185     /// {@code true}. </returns>
 186     virtual bool evaluatePredicate(CharStream *input, size_t ruleIndex, size_t predIndex, bool speculative);
 187
 188     virtual void captureSimState(CharStream *input, dfa::DFAState *dfaState);
 189     virtual dfa::DFAState* addDFAEdge(dfa::DFAState *from, size_t t, ATNConfigSet *q);
 190     virtual void addDFAEdge(dfa::DFAState *p, size_t t, dfa::DFAState *q);
 191
 192     /// <summary>
 193     /// Add a new DFA state if there isn't one with this set of
 194     /// configurations already. This method also detects the first
 195     /// configuration containing an ATN rule stop state. Later, when
 196     /// traversing the DFA, we will know which rule to accept.
 197     /// </summary>
 198     virtual dfa::DFAState *addDFAState(ATNConfigSet *configs);
 199
 200   public:
 201     dfa::DFA& getDFA(size_t mode);
 202
 203     /// Get the text matched so far for the current token.
 204     virtual std::string getText(CharStream *input);
 205     virtual size_t getLine() const;
 206     virtual void setLine(size_t line);
 207     virtual size_t getCharPositionInLine();
 208     virtual void setCharPositionInLine(size_t charPositionInLine);
 209     virtual void consume(CharStream *input);
 210     virtual std::string getTokenName(size_t t);
 211
 212   private:
 213     void InitializeInstanceFields();
 214   };
 215
 216 } // namespace atn
 217 } // namespace antlr4