add antlr source code and ReadMe

[toc] / antlr4-cpp-runtime-4.9.2-source / runtime / src / Lexer.h
diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/Lexer.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/Lexer.h

new file mode 100644 (file)

index 0000000..7561955
--- /dev/null
+++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/Lexer.h
@@ -0,0 +1,209 @@
+/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+#pragma once
+
+#include "Recognizer.h"
+#include "TokenSource.h"
+#include "CharStream.h"
+#include "Token.h"
+
+namespace antlr4 {
+
+  /// A lexer is recognizer that draws input symbols from a character stream.
+  /// lexer grammars result in a subclass of this object. A Lexer object
+  /// uses simplified match() and error recovery mechanisms in the interest
+  /// of speed.
+  class ANTLR4CPP_PUBLIC Lexer : public Recognizer, public TokenSource {
+  public:
+#if __cplusplus >= 201703L
+    static constexpr size_t DEFAULT_MODE = 0;
+    static constexpr size_t MORE = std::numeric_limits<size_t>::max() - 1;
+    static constexpr size_t SKIP = std::numeric_limits<size_t>::max() - 2;
+
+    static constexpr size_t DEFAULT_TOKEN_CHANNEL = Token::DEFAULT_CHANNEL;
+    static constexpr size_t HIDDEN = Token::HIDDEN_CHANNEL;
+    static constexpr size_t MIN_CHAR_VALUE = 0;
+    static constexpr size_t MAX_CHAR_VALUE = 0x10FFFF;
+#else
+    enum : size_t {
+      DEFAULT_MODE = 0,
+      MORE = static_cast<size_t>(-2), // std::numeric_limits<size_t>::max() - 1; doesn't work in VS 2013
+      SKIP = static_cast<size_t>(-3), // std::numeric_limits<size_t>::max() - 2; doesn't work in VS 2013
+
+      DEFAULT_TOKEN_CHANNEL = Token::DEFAULT_CHANNEL,
+      HIDDEN = Token::HIDDEN_CHANNEL,
+      MIN_CHAR_VALUE = 0,
+      MAX_CHAR_VALUE = 0x10FFFF,
+    };
+#endif
+
+    CharStream *_input; // Pure reference, usually from statically allocated instance.
+
+  protected:
+    /// How to create token objects.
+    TokenFactory<CommonToken> *_factory;
+
+  public:
+    /// The goal of all lexer rules/methods is to create a token object.
+    ///  This is an instance variable as multiple rules may collaborate to
+    ///  create a single token.  nextToken will return this object after
+    ///  matching lexer rule(s).  If you subclass to allow multiple token
+    ///  emissions, then set this to the last token to be matched or
+    ///  something nonnull so that the auto token emit mechanism will not
+    ///  emit another token.
+
+    // Life cycle of a token is this:
+    // Created by emit() (via the token factory) or by action code, holding ownership of it.
+    // Ownership is handed over to the token stream when calling nextToken().
+    std::unique_ptr<Token> token;
+
+    /// <summary>
+    /// What character index in the stream did the current token start at?
+    ///  Needed, for example, to get the text for current token.  Set at
+    ///  the start of nextToken.
+    /// </summary>
+    size_t tokenStartCharIndex;
+
+    /// <summary>
+    /// The line on which the first character of the token resides </summary>
+    size_t tokenStartLine;
+
+    /// The character position of first character within the line.
+    size_t tokenStartCharPositionInLine;
+
+    /// Once we see EOF on char stream, next token will be EOF.
+    /// If you have DONE : EOF ; then you see DONE EOF.
+    bool hitEOF;
+
+    /// The channel number for the current token.
+    size_t channel;
+
+    /// The token type for the current token.
+    size_t type;
+
+    // Use the vector as a stack.
+    std::vector<size_t> modeStack;
+    size_t mode;
+
+    Lexer();
+    Lexer(CharStream *input);
+    virtual ~Lexer() {}
+
+    virtual void reset();
+
+    /// Return a token from this source; i.e., match a token on the char stream.
+    virtual std::unique_ptr<Token> nextToken() override;
+
+    /// Instruct the lexer to skip creating a token for current lexer rule
+    /// and look for another token.  nextToken() knows to keep looking when
+    /// a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
+    /// if token == null at end of any token rule, it creates one for you
+    /// and emits it.
+    virtual void skip();
+    virtual void more();
+    virtual void setMode(size_t m);
+    virtual void pushMode(size_t m);
+    virtual size_t popMode();
+
+    template<typename T1>
+    void setTokenFactory(TokenFactory<T1> *factory)  {
+      this->_factory = factory;
+    }
+
+    virtual TokenFactory<CommonToken>* getTokenFactory() override;
+
+    /// Set the char stream and reset the lexer
+    virtual void setInputStream(IntStream *input) override;
+
+    virtual std::string getSourceName() override;
+
+    virtual CharStream* getInputStream() override;
+
+    /// By default does not support multiple emits per nextToken invocation
+    /// for efficiency reasons. Subclasses can override this method, nextToken,
+    /// and getToken (to push tokens into a list and pull from that list
+    /// rather than a single variable as this implementation does).
+    virtual void emit(std::unique_ptr<Token> newToken);
+
+    /// The standard method called to automatically emit a token at the
+    /// outermost lexical rule.  The token object should point into the
+    /// char buffer start..stop.  If there is a text override in 'text',
+    /// use that to set the token's text.  Override this method to emit
+    /// custom Token objects or provide a new factory.
+    virtual Token* emit();
+
+    virtual Token* emitEOF();
+
+    virtual size_t getLine() const override;
+
+    virtual size_t getCharPositionInLine() override;
+
+    virtual void setLine(size_t line);
+
+    virtual void setCharPositionInLine(size_t charPositionInLine);
+
+    /// What is the index of the current character of lookahead?
+    virtual size_t getCharIndex();
+
+    /// Return the text matched so far for the current token or any
+    /// text override.
+    virtual std::string getText();
+
+    /// Set the complete text of this token; it wipes any previous
+    /// changes to the text.
+    virtual void setText(const std::string &text);
+
+    /// Override if emitting multiple tokens.
+    virtual std::unique_ptr<Token> getToken();
+
+    virtual void setToken(std::unique_ptr<Token> newToken);
+
+    virtual void setType(size_t ttype);
+
+    virtual size_t getType();
+
+    virtual void setChannel(size_t newChannel);
+
+    virtual size_t getChannel();
+
+    virtual const std::vector<std::string>& getChannelNames() const = 0;
+
+    virtual const std::vector<std::string>& getModeNames() const = 0;
+
+    /// Return a list of all Token objects in input char stream.
+    /// Forces load of all tokens. Does not include EOF token.
+    virtual std::vector<std::unique_ptr<Token>> getAllTokens();
+
+    virtual void recover(const LexerNoViableAltException &e);
+
+    virtual void notifyListeners(const LexerNoViableAltException &e);
+
+    virtual std::string getErrorDisplay(const std::string &s);
+
+    /// Lexers can normally match any char in it's vocabulary after matching
+    /// a token, so do the easy thing and just kill a character and hope
+    /// it all works out.  You can instead use the rule invocation stack
+    /// to do sophisticated error recovery if you are in a fragment rule.
+    virtual void recover(RecognitionException *re);
+
+    /// <summary>
+    /// Gets the number of syntax errors reported during parsing. This value is
+    /// incremented each time <seealso cref="#notifyErrorListeners"/> is called.
+    /// </summary>
+    /// <seealso cref= #notifyListeners </seealso>
+    virtual size_t getNumberOfSyntaxErrors();
+
+  protected:
+    /// You can set the text for the current token to override what is in
+    /// the input char buffer (via setText()).
+    std::string _text;
+
+  private:
+    size_t _syntaxErrors;
+    void InitializeInstanceFields();
+  };
+
+} // namespace antlr4