1 /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
2 * Use of this file is governed by the BSD 3-clause license that
3 * can be found in the LICENSE.txt file in the project root.
8 #include "Recognizer.h"
9 #include "TokenSource.h"
10 #include "CharStream.h"
15 /// A lexer is recognizer that draws input symbols from a character stream.
16 /// lexer grammars result in a subclass of this object. A Lexer object
17 /// uses simplified match() and error recovery mechanisms in the interest
19 class ANTLR4CPP_PUBLIC Lexer : public Recognizer, public TokenSource {
21 #if __cplusplus >= 201703L
22 static constexpr size_t DEFAULT_MODE = 0;
23 static constexpr size_t MORE = std::numeric_limits<size_t>::max() - 1;
24 static constexpr size_t SKIP = std::numeric_limits<size_t>::max() - 2;
26 static constexpr size_t DEFAULT_TOKEN_CHANNEL = Token::DEFAULT_CHANNEL;
27 static constexpr size_t HIDDEN = Token::HIDDEN_CHANNEL;
28 static constexpr size_t MIN_CHAR_VALUE = 0;
29 static constexpr size_t MAX_CHAR_VALUE = 0x10FFFF;
33 MORE = static_cast<size_t>(-2), // std::numeric_limits<size_t>::max() - 1; doesn't work in VS 2013
34 SKIP = static_cast<size_t>(-3), // std::numeric_limits<size_t>::max() - 2; doesn't work in VS 2013
36 DEFAULT_TOKEN_CHANNEL = Token::DEFAULT_CHANNEL,
37 HIDDEN = Token::HIDDEN_CHANNEL,
39 MAX_CHAR_VALUE = 0x10FFFF,
43 CharStream *_input; // Pure reference, usually from statically allocated instance.
46 /// How to create token objects.
47 TokenFactory<CommonToken> *_factory;
50 /// The goal of all lexer rules/methods is to create a token object.
51 /// This is an instance variable as multiple rules may collaborate to
52 /// create a single token. nextToken will return this object after
53 /// matching lexer rule(s). If you subclass to allow multiple token
54 /// emissions, then set this to the last token to be matched or
55 /// something nonnull so that the auto token emit mechanism will not
56 /// emit another token.
58 // Life cycle of a token is this:
59 // Created by emit() (via the token factory) or by action code, holding ownership of it.
60 // Ownership is handed over to the token stream when calling nextToken().
61 std::unique_ptr<Token> token;
64 /// What character index in the stream did the current token start at?
65 /// Needed, for example, to get the text for current token. Set at
66 /// the start of nextToken.
68 size_t tokenStartCharIndex;
71 /// The line on which the first character of the token resides </summary>
72 size_t tokenStartLine;
74 /// The character position of first character within the line.
75 size_t tokenStartCharPositionInLine;
77 /// Once we see EOF on char stream, next token will be EOF.
78 /// If you have DONE : EOF ; then you see DONE EOF.
81 /// The channel number for the current token.
84 /// The token type for the current token.
87 // Use the vector as a stack.
88 std::vector<size_t> modeStack;
92 Lexer(CharStream *input);
97 /// Return a token from this source; i.e., match a token on the char stream.
98 virtual std::unique_ptr<Token> nextToken() override;
100 /// Instruct the lexer to skip creating a token for current lexer rule
101 /// and look for another token. nextToken() knows to keep looking when
102 /// a lexer rule finishes with token set to SKIP_TOKEN. Recall that
103 /// if token == null at end of any token rule, it creates one for you
107 virtual void setMode(size_t m);
108 virtual void pushMode(size_t m);
109 virtual size_t popMode();
111 template<typename T1>
112 void setTokenFactory(TokenFactory<T1> *factory) {
113 this->_factory = factory;
116 virtual TokenFactory<CommonToken>* getTokenFactory() override;
118 /// Set the char stream and reset the lexer
119 virtual void setInputStream(IntStream *input) override;
121 virtual std::string getSourceName() override;
123 virtual CharStream* getInputStream() override;
125 /// By default does not support multiple emits per nextToken invocation
126 /// for efficiency reasons. Subclasses can override this method, nextToken,
127 /// and getToken (to push tokens into a list and pull from that list
128 /// rather than a single variable as this implementation does).
129 virtual void emit(std::unique_ptr<Token> newToken);
131 /// The standard method called to automatically emit a token at the
132 /// outermost lexical rule. The token object should point into the
133 /// char buffer start..stop. If there is a text override in 'text',
134 /// use that to set the token's text. Override this method to emit
135 /// custom Token objects or provide a new factory.
136 virtual Token* emit();
138 virtual Token* emitEOF();
140 virtual size_t getLine() const override;
142 virtual size_t getCharPositionInLine() override;
144 virtual void setLine(size_t line);
146 virtual void setCharPositionInLine(size_t charPositionInLine);
148 /// What is the index of the current character of lookahead?
149 virtual size_t getCharIndex();
151 /// Return the text matched so far for the current token or any
153 virtual std::string getText();
155 /// Set the complete text of this token; it wipes any previous
156 /// changes to the text.
157 virtual void setText(const std::string &text);
159 /// Override if emitting multiple tokens.
160 virtual std::unique_ptr<Token> getToken();
162 virtual void setToken(std::unique_ptr<Token> newToken);
164 virtual void setType(size_t ttype);
166 virtual size_t getType();
168 virtual void setChannel(size_t newChannel);
170 virtual size_t getChannel();
172 virtual const std::vector<std::string>& getChannelNames() const = 0;
174 virtual const std::vector<std::string>& getModeNames() const = 0;
176 /// Return a list of all Token objects in input char stream.
177 /// Forces load of all tokens. Does not include EOF token.
178 virtual std::vector<std::unique_ptr<Token>> getAllTokens();
180 virtual void recover(const LexerNoViableAltException &e);
182 virtual void notifyListeners(const LexerNoViableAltException &e);
184 virtual std::string getErrorDisplay(const std::string &s);
186 /// Lexers can normally match any char in it's vocabulary after matching
187 /// a token, so do the easy thing and just kill a character and hope
188 /// it all works out. You can instead use the rule invocation stack
189 /// to do sophisticated error recovery if you are in a fragment rule.
190 virtual void recover(RecognitionException *re);
193 /// Gets the number of syntax errors reported during parsing. This value is
194 /// incremented each time <seealso cref="#notifyErrorListeners"/> is called.
196 /// <seealso cref= #notifyListeners </seealso>
197 virtual size_t getNumberOfSyntaxErrors();
200 /// You can set the text for the current token to override what is in
201 /// the input char buffer (via setText()).
205 size_t _syntaxErrors;
206 void InitializeInstanceFields();
209 } // namespace antlr4