1 /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
2 * Use of this file is governed by the BSD 3-clause license that
3 * can be found in the LICENSE.txt file in the project root.
6 #include "atn/LexerATNSimulator.h"
7 #include "Exceptions.h"
8 #include "misc/Interval.h"
9 #include "CommonTokenFactory.h"
10 #include "LexerNoViableAltException.h"
11 #include "ANTLRErrorListener.h"
12 #include "support/CPPUtils.h"
13 #include "CommonToken.h"
14 #include "support/StringUtils.h"
20 using namespace antlrcpp;
21 using namespace antlr4;
23 Lexer::Lexer() : Recognizer() {
24 InitializeInstanceFields();
28 Lexer::Lexer(CharStream *input) : Recognizer(), _input(input) {
29 InitializeInstanceFields();
33 // wack Lexer state variables
34 _input->seek(0); // rewind the input
38 type = Token::INVALID_TYPE;
39 channel = Token::DEFAULT_CHANNEL;
40 tokenStartCharIndex = INVALID_INDEX;
41 tokenStartCharPositionInLine = 0;
47 mode = Lexer::DEFAULT_MODE;
50 getInterpreter<atn::LexerATNSimulator>()->reset();
53 std::unique_ptr<Token> Lexer::nextToken() {
54 // Mark start location in char stream so unbuffered streams are
55 // guaranteed at least have text of current token
56 ssize_t tokenStartMarker = _input->mark();
58 auto onExit = finally([this, tokenStartMarker]{
59 // make sure we release marker after match or
60 // unbuffered char stream will keep buffering
61 _input->release(tokenStartMarker);
68 return std::move(token);
72 channel = Token::DEFAULT_CHANNEL;
73 tokenStartCharIndex = _input->index();
74 tokenStartCharPositionInLine = getInterpreter<atn::LexerATNSimulator>()->getCharPositionInLine();
75 tokenStartLine = getInterpreter<atn::LexerATNSimulator>()->getLine();
78 type = Token::INVALID_TYPE;
81 ttype = getInterpreter<atn::LexerATNSimulator>()->match(_input, mode);
82 } catch (LexerNoViableAltException &e) {
83 notifyListeners(e); // report error
87 if (_input->LA(1) == EOF) {
90 if (type == Token::INVALID_TYPE) {
96 } while (type == MORE);
97 if (token == nullptr) {
100 return std::move(token);
112 void Lexer::setMode(size_t m) {
116 void Lexer::pushMode(size_t m) {
118 std::cout << "pushMode " << m << std::endl;
121 modeStack.push_back(mode);
125 size_t Lexer::popMode() {
126 if (modeStack.empty()) {
127 throw EmptyStackException();
130 std::cout << std::string("popMode back to ") << modeStack.back() << std::endl;
133 setMode(modeStack.back());
134 modeStack.pop_back();
139 TokenFactory<CommonToken>* Lexer::getTokenFactory() {
143 void Lexer::setInputStream(IntStream *input) {
145 _input = dynamic_cast<CharStream*>(input);
148 std::string Lexer::getSourceName() {
149 return _input->getSourceName();
152 CharStream* Lexer::getInputStream() {
156 void Lexer::emit(std::unique_ptr<Token> newToken) {
157 token = std::move(newToken);
160 Token* Lexer::emit() {
161 emit(_factory->create({ this, _input }, type, _text, channel,
162 tokenStartCharIndex, getCharIndex() - 1, tokenStartLine, tokenStartCharPositionInLine));
166 Token* Lexer::emitEOF() {
167 size_t cpos = getCharPositionInLine();
168 size_t line = getLine();
169 emit(_factory->create({ this, _input }, EOF, "", Token::DEFAULT_CHANNEL, _input->index(), _input->index() - 1, line, cpos));
173 size_t Lexer::getLine() const {
174 return getInterpreter<atn::LexerATNSimulator>()->getLine();
177 size_t Lexer::getCharPositionInLine() {
178 return getInterpreter<atn::LexerATNSimulator>()->getCharPositionInLine();
181 void Lexer::setLine(size_t line) {
182 getInterpreter<atn::LexerATNSimulator>()->setLine(line);
185 void Lexer::setCharPositionInLine(size_t charPositionInLine) {
186 getInterpreter<atn::LexerATNSimulator>()->setCharPositionInLine(charPositionInLine);
189 size_t Lexer::getCharIndex() {
190 return _input->index();
193 std::string Lexer::getText() {
194 if (!_text.empty()) {
197 return getInterpreter<atn::LexerATNSimulator>()->getText(_input);
200 void Lexer::setText(const std::string &text) {
204 std::unique_ptr<Token> Lexer::getToken() {
205 return std::move(token);
208 void Lexer::setToken(std::unique_ptr<Token> newToken) {
209 token = std::move(newToken);
212 void Lexer::setType(size_t ttype) {
216 size_t Lexer::getType() {
220 void Lexer::setChannel(size_t newChannel) {
221 channel = newChannel;
224 size_t Lexer::getChannel() {
228 std::vector<std::unique_ptr<Token>> Lexer::getAllTokens() {
229 std::vector<std::unique_ptr<Token>> tokens;
230 std::unique_ptr<Token> t = nextToken();
231 while (t->getType() != EOF) {
232 tokens.push_back(std::move(t));
238 void Lexer::recover(const LexerNoViableAltException &/*e*/) {
239 if (_input->LA(1) != EOF) {
240 // skip a char and try again
241 getInterpreter<atn::LexerATNSimulator>()->consume(_input);
245 void Lexer::notifyListeners(const LexerNoViableAltException & /*e*/) {
247 std::string text = _input->getText(misc::Interval(tokenStartCharIndex, _input->index()));
248 std::string msg = std::string("token recognition error at: '") + getErrorDisplay(text) + std::string("'");
250 ProxyErrorListener &listener = getErrorListenerDispatch();
251 listener.syntaxError(this, nullptr, tokenStartLine, tokenStartCharPositionInLine, msg, std::current_exception());
254 std::string Lexer::getErrorDisplay(const std::string &s) {
255 std::stringstream ss;
275 void Lexer::recover(RecognitionException * /*re*/) {
276 // TODO: Do we lose character or line position information?
280 size_t Lexer::getNumberOfSyntaxErrors() {
281 return _syntaxErrors;
284 void Lexer::InitializeInstanceFields() {
287 _factory = CommonTokenFactory::DEFAULT.get();
288 tokenStartCharIndex = INVALID_INDEX;
290 tokenStartCharPositionInLine = 0;
294 mode = Lexer::DEFAULT_MODE;