1 /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
2 * Use of this file is governed by the BSD 3-clause license that
3 * can be found in the LICENSE.txt file in the project root.
6 #include "WritableToken.h"
8 #include "RuleContext.h"
9 #include "misc/Interval.h"
10 #include "Exceptions.h"
11 #include "support/CPPUtils.h"
13 #include "BufferedTokenStream.h"
15 using namespace antlr4;
16 using namespace antlrcpp;
18 BufferedTokenStream::BufferedTokenStream(TokenSource *tokenSource) : _tokenSource(tokenSource){
19 InitializeInstanceFields();
22 TokenSource* BufferedTokenStream::getTokenSource() const {
26 size_t BufferedTokenStream::index() {
30 ssize_t BufferedTokenStream::mark() {
34 void BufferedTokenStream::release(ssize_t /*marker*/) {
35 // no resources to release
38 void BufferedTokenStream::reset() {
42 void BufferedTokenStream::seek(size_t index) {
44 _p = adjustSeekIndex(index);
47 size_t BufferedTokenStream::size() {
48 return _tokens.size();
51 void BufferedTokenStream::consume() {
52 bool skipEofCheck = false;
55 // the last token in tokens is EOF. skip check if p indexes any
56 // fetched token except the last.
57 skipEofCheck = _p < _tokens.size() - 1;
59 // no EOF token in tokens. skip check if p indexes a fetched token.
60 skipEofCheck = _p < _tokens.size();
63 // not yet initialized
67 if (!skipEofCheck && LA(1) == Token::EOF) {
68 throw IllegalStateException("cannot consume EOF");
72 _p = adjustSeekIndex(_p + 1);
76 bool BufferedTokenStream::sync(size_t i) {
77 if (i + 1 < _tokens.size())
79 size_t n = i - _tokens.size() + 1; // how many more elements we need?
82 size_t fetched = fetch(n);
89 size_t BufferedTokenStream::fetch(size_t n) {
96 std::unique_ptr<Token> t(_tokenSource->nextToken());
98 if (is<WritableToken *>(t.get())) {
99 (static_cast<WritableToken *>(t.get()))->setTokenIndex(_tokens.size());
102 _tokens.push_back(std::move(t));
105 if (_tokens.back()->getType() == Token::EOF) {
114 Token* BufferedTokenStream::get(size_t i) const {
115 if (i >= _tokens.size()) {
116 throw IndexOutOfBoundsException(std::string("token index ") +
118 std::string(" out of range 0..") +
119 std::to_string(_tokens.size() - 1));
121 return _tokens[i].get();
124 std::vector<Token *> BufferedTokenStream::get(size_t start, size_t stop) {
125 std::vector<Token *> subset;
129 if (_tokens.empty()) {
133 if (stop >= _tokens.size()) {
134 stop = _tokens.size() - 1;
136 for (size_t i = start; i <= stop; i++) {
137 Token *t = _tokens[i].get();
138 if (t->getType() == Token::EOF) {
146 size_t BufferedTokenStream::LA(ssize_t i) {
147 return LT(i)->getType();
150 Token* BufferedTokenStream::LB(size_t k) {
154 return _tokens[_p - k].get();
157 Token* BufferedTokenStream::LT(ssize_t k) {
166 size_t i = _p + k - 1;
168 if (i >= _tokens.size()) { // return EOF token
169 // EOF must be last token
170 return _tokens.back().get();
173 return _tokens[i].get();
176 ssize_t BufferedTokenStream::adjustSeekIndex(size_t i) {
180 void BufferedTokenStream::lazyInit() {
186 void BufferedTokenStream::setup() {
189 _p = adjustSeekIndex(0);
192 void BufferedTokenStream::setTokenSource(TokenSource *tokenSource) {
193 _tokenSource = tokenSource;
199 std::vector<Token *> BufferedTokenStream::getTokens() {
200 std::vector<Token *> result;
201 for (auto &t : _tokens)
202 result.push_back(t.get());
206 std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop) {
207 return getTokens(start, stop, std::vector<size_t>());
210 std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop, const std::vector<size_t> &types) {
212 if (stop >= _tokens.size() || start >= _tokens.size()) {
213 throw IndexOutOfBoundsException(std::string("start ") +
214 std::to_string(start) +
215 std::string(" or stop ") +
216 std::to_string(stop) +
217 std::string(" not in 0..") +
218 std::to_string(_tokens.size() - 1));
221 std::vector<Token *> filteredTokens;
224 return filteredTokens;
227 for (size_t i = start; i <= stop; i++) {
228 Token *tok = _tokens[i].get();
230 if (types.empty() || std::find(types.begin(), types.end(), tok->getType()) != types.end()) {
231 filteredTokens.push_back(tok);
234 return filteredTokens;
237 std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop, size_t ttype) {
238 std::vector<size_t> s;
240 return getTokens(start, stop, s);
243 ssize_t BufferedTokenStream::nextTokenOnChannel(size_t i, size_t channel) {
249 Token *token = _tokens[i].get();
250 while (token->getChannel() != channel) {
251 if (token->getType() == Token::EOF) {
256 token = _tokens[i].get();
261 ssize_t BufferedTokenStream::previousTokenOnChannel(size_t i, size_t channel) {
264 // the EOF token is on every channel
269 Token *token = _tokens[i].get();
270 if (token->getType() == Token::EOF || token->getChannel() == channel) {
281 std::vector<Token *> BufferedTokenStream::getHiddenTokensToRight(size_t tokenIndex, ssize_t channel) {
283 if (tokenIndex >= _tokens.size()) {
284 throw IndexOutOfBoundsException(std::to_string(tokenIndex) + " not in 0.." + std::to_string(_tokens.size() - 1));
287 ssize_t nextOnChannel = nextTokenOnChannel(tokenIndex + 1, Lexer::DEFAULT_TOKEN_CHANNEL);
289 size_t from = tokenIndex + 1;
290 // if none onchannel to right, nextOnChannel=-1 so set to = last token
291 if (nextOnChannel == -1) {
292 to = static_cast<ssize_t>(size() - 1);
297 return filterForChannel(from, to, channel);
300 std::vector<Token *> BufferedTokenStream::getHiddenTokensToRight(size_t tokenIndex) {
301 return getHiddenTokensToRight(tokenIndex, -1);
304 std::vector<Token *> BufferedTokenStream::getHiddenTokensToLeft(size_t tokenIndex, ssize_t channel) {
306 if (tokenIndex >= _tokens.size()) {
307 throw IndexOutOfBoundsException(std::to_string(tokenIndex) + " not in 0.." + std::to_string(_tokens.size() - 1));
310 if (tokenIndex == 0) {
311 // Obviously no tokens can appear before the first token.
315 ssize_t prevOnChannel = previousTokenOnChannel(tokenIndex - 1, Lexer::DEFAULT_TOKEN_CHANNEL);
316 if (prevOnChannel == static_cast<ssize_t>(tokenIndex - 1)) {
319 // if none onchannel to left, prevOnChannel=-1 then from=0
320 size_t from = static_cast<size_t>(prevOnChannel + 1);
321 size_t to = tokenIndex - 1;
323 return filterForChannel(from, to, channel);
326 std::vector<Token *> BufferedTokenStream::getHiddenTokensToLeft(size_t tokenIndex) {
327 return getHiddenTokensToLeft(tokenIndex, -1);
330 std::vector<Token *> BufferedTokenStream::filterForChannel(size_t from, size_t to, ssize_t channel) {
331 std::vector<Token *> hidden;
332 for (size_t i = from; i <= to; i++) {
333 Token *t = _tokens[i].get();
335 if (t->getChannel() != Lexer::DEFAULT_TOKEN_CHANNEL) {
339 if (t->getChannel() == static_cast<size_t>(channel)) {
348 bool BufferedTokenStream::isInitialized() const {
353 * Get the text of all tokens in this buffer.
355 std::string BufferedTokenStream::getSourceName() const
357 return _tokenSource->getSourceName();
360 std::string BufferedTokenStream::getText() {
362 return getText(misc::Interval(0U, size() - 1));
365 std::string BufferedTokenStream::getText(const misc::Interval &interval) {
367 size_t start = interval.a;
368 size_t stop = interval.b;
369 if (start == INVALID_INDEX || stop == INVALID_INDEX) {
373 if (stop >= _tokens.size()) {
374 stop = _tokens.size() - 1;
377 std::stringstream ss;
378 for (size_t i = start; i <= stop; i++) {
379 Token *t = _tokens[i].get();
380 if (t->getType() == Token::EOF) {
388 std::string BufferedTokenStream::getText(RuleContext *ctx) {
389 return getText(ctx->getSourceInterval());
392 std::string BufferedTokenStream::getText(Token *start, Token *stop) {
393 if (start != nullptr && stop != nullptr) {
394 return getText(misc::Interval(start->getTokenIndex(), stop->getTokenIndex()));
400 void BufferedTokenStream::fill() {
402 const size_t blockSize = 1000;
404 size_t fetched = fetch(blockSize);
405 if (fetched < blockSize) {
411 void BufferedTokenStream::InitializeInstanceFields() {