1 /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
2 * Use of this file is governed by the BSD 3-clause license that
3 * can be found in the LICENSE.txt file in the project root.
6 #include "Exceptions.h"
7 #include "misc/Interval.h"
9 #include "TokenStream.h"
11 #include "TokenStreamRewriter.h"
13 using namespace antlr4;
15 using antlr4::misc::Interval;
17 TokenStreamRewriter::RewriteOperation::RewriteOperation(TokenStreamRewriter *outerInstance_, size_t index_)
18 : outerInstance(outerInstance_) {
20 InitializeInstanceFields();
24 TokenStreamRewriter::RewriteOperation::RewriteOperation(TokenStreamRewriter *outerInstance_, size_t index_,
25 const std::string& text_) : outerInstance(outerInstance_) {
27 InitializeInstanceFields();
32 TokenStreamRewriter::RewriteOperation::~RewriteOperation()
36 size_t TokenStreamRewriter::RewriteOperation::execute(std::string * /*buf*/) {
40 std::string TokenStreamRewriter::RewriteOperation::toString() {
41 std::string opName = "TokenStreamRewriter";
42 size_t dollarIndex = opName.find('$');
43 opName = opName.substr(dollarIndex + 1, opName.length() - (dollarIndex + 1));
44 return "<" + opName + "@" + outerInstance->tokens->get(dollarIndex)->getText() + ":\"" + text + "\">";
47 void TokenStreamRewriter::RewriteOperation::InitializeInstanceFields() {
52 TokenStreamRewriter::InsertBeforeOp::InsertBeforeOp(TokenStreamRewriter *outerInstance_, size_t index_, const std::string& text_)
53 : RewriteOperation(outerInstance_, index_, text_), outerInstance(outerInstance_) {
56 size_t TokenStreamRewriter::InsertBeforeOp::execute(std::string *buf) {
58 if (outerInstance->tokens->get(index)->getType() != Token::EOF) {
59 buf->append(outerInstance->tokens->get(index)->getText());
64 TokenStreamRewriter::ReplaceOp::ReplaceOp(TokenStreamRewriter *outerInstance_, size_t from, size_t to, const std::string& text)
65 : RewriteOperation(outerInstance_, from, text), outerInstance(outerInstance_) {
67 InitializeInstanceFields();
71 size_t TokenStreamRewriter::ReplaceOp::execute(std::string *buf) {
76 std::string TokenStreamRewriter::ReplaceOp::toString() {
78 return "<DeleteOp@" + outerInstance->tokens->get(index)->getText() + ".." + outerInstance->tokens->get(lastIndex)->getText() + ">";
80 return "<ReplaceOp@" + outerInstance->tokens->get(index)->getText() + ".." + outerInstance->tokens->get(lastIndex)->getText() + ":\"" + text + "\">";
83 void TokenStreamRewriter::ReplaceOp::InitializeInstanceFields() {
87 //------------------ TokenStreamRewriter -------------------------------------------------------------------------------
89 const std::string TokenStreamRewriter::DEFAULT_PROGRAM_NAME = "default";
91 TokenStreamRewriter::TokenStreamRewriter(TokenStream *tokens_) : tokens(tokens_) {
92 _programs[DEFAULT_PROGRAM_NAME].reserve(PROGRAM_INIT_SIZE);
95 TokenStreamRewriter::~TokenStreamRewriter() {
96 for (auto program : _programs) {
97 for (auto *operation : program.second) {
103 TokenStream *TokenStreamRewriter::getTokenStream() {
107 void TokenStreamRewriter::rollback(size_t instructionIndex) {
108 rollback(DEFAULT_PROGRAM_NAME, instructionIndex);
111 void TokenStreamRewriter::rollback(const std::string &programName, size_t instructionIndex) {
112 std::vector<RewriteOperation*> is = _programs[programName];
114 _programs.insert({ programName, std::vector<RewriteOperation*>(is.begin() + MIN_TOKEN_INDEX, is.begin() + instructionIndex) });
118 void TokenStreamRewriter::deleteProgram() {
119 deleteProgram(DEFAULT_PROGRAM_NAME);
122 void TokenStreamRewriter::deleteProgram(const std::string &programName) {
123 rollback(programName, MIN_TOKEN_INDEX);
126 void TokenStreamRewriter::insertAfter(Token *t, const std::string& text) {
127 insertAfter(DEFAULT_PROGRAM_NAME, t, text);
130 void TokenStreamRewriter::insertAfter(size_t index, const std::string& text) {
131 insertAfter(DEFAULT_PROGRAM_NAME, index, text);
134 void TokenStreamRewriter::insertAfter(const std::string &programName, Token *t, const std::string& text) {
135 insertAfter(programName, t->getTokenIndex(), text);
138 void TokenStreamRewriter::insertAfter(const std::string &programName, size_t index, const std::string& text) {
139 // to insert after, just insert before next index (even if past end)
140 insertBefore(programName, index + 1, text);
143 void TokenStreamRewriter::insertBefore(Token *t, const std::string& text) {
144 insertBefore(DEFAULT_PROGRAM_NAME, t, text);
147 void TokenStreamRewriter::insertBefore(size_t index, const std::string& text) {
148 insertBefore(DEFAULT_PROGRAM_NAME, index, text);
151 void TokenStreamRewriter::insertBefore(const std::string &programName, Token *t, const std::string& text) {
152 insertBefore(programName, t->getTokenIndex(), text);
155 void TokenStreamRewriter::insertBefore(const std::string &programName, size_t index, const std::string& text) {
156 RewriteOperation *op = new InsertBeforeOp(this, index, text); /* mem-check: deleted in d-tor */
157 std::vector<RewriteOperation*> &rewrites = getProgram(programName);
158 op->instructionIndex = rewrites.size();
159 rewrites.push_back(op);
162 void TokenStreamRewriter::replace(size_t index, const std::string& text) {
163 replace(DEFAULT_PROGRAM_NAME, index, index, text);
166 void TokenStreamRewriter::replace(size_t from, size_t to, const std::string& text) {
167 replace(DEFAULT_PROGRAM_NAME, from, to, text);
170 void TokenStreamRewriter::replace(Token *indexT, const std::string& text) {
171 replace(DEFAULT_PROGRAM_NAME, indexT, indexT, text);
174 void TokenStreamRewriter::replace(Token *from, Token *to, const std::string& text) {
175 replace(DEFAULT_PROGRAM_NAME, from, to, text);
178 void TokenStreamRewriter::replace(const std::string &programName, size_t from, size_t to, const std::string& text) {
179 if (from > to || to >= tokens->size()) {
180 throw IllegalArgumentException("replace: range invalid: " + std::to_string(from) + ".." + std::to_string(to) +
181 "(size = " + std::to_string(tokens->size()) + ")");
183 RewriteOperation *op = new ReplaceOp(this, from, to, text); /* mem-check: deleted in d-tor */
184 std::vector<RewriteOperation*> &rewrites = getProgram(programName);
185 op->instructionIndex = rewrites.size();
186 rewrites.push_back(op);
189 void TokenStreamRewriter::replace(const std::string &programName, Token *from, Token *to, const std::string& text) {
190 replace(programName, from->getTokenIndex(), to->getTokenIndex(), text);
193 void TokenStreamRewriter::Delete(size_t index) {
194 Delete(DEFAULT_PROGRAM_NAME, index, index);
197 void TokenStreamRewriter::Delete(size_t from, size_t to) {
198 Delete(DEFAULT_PROGRAM_NAME, from, to);
201 void TokenStreamRewriter::Delete(Token *indexT) {
202 Delete(DEFAULT_PROGRAM_NAME, indexT, indexT);
205 void TokenStreamRewriter::Delete(Token *from, Token *to) {
206 Delete(DEFAULT_PROGRAM_NAME, from, to);
209 void TokenStreamRewriter::Delete(const std::string &programName, size_t from, size_t to) {
210 std::string nullString;
211 replace(programName, from, to, nullString);
214 void TokenStreamRewriter::Delete(const std::string &programName, Token *from, Token *to) {
215 std::string nullString;
216 replace(programName, from, to, nullString);
219 size_t TokenStreamRewriter::getLastRewriteTokenIndex() {
220 return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME);
223 size_t TokenStreamRewriter::getLastRewriteTokenIndex(const std::string &programName) {
224 if (_lastRewriteTokenIndexes.find(programName) == _lastRewriteTokenIndexes.end()) {
225 return INVALID_INDEX;
227 return _lastRewriteTokenIndexes[programName];
230 void TokenStreamRewriter::setLastRewriteTokenIndex(const std::string &programName, size_t i) {
231 _lastRewriteTokenIndexes.insert({ programName, i });
234 std::vector<TokenStreamRewriter::RewriteOperation*>& TokenStreamRewriter::getProgram(const std::string &name) {
235 auto iterator = _programs.find(name);
236 if (iterator == _programs.end()) {
237 return initializeProgram(name);
239 return iterator->second;
242 std::vector<TokenStreamRewriter::RewriteOperation*>& TokenStreamRewriter::initializeProgram(const std::string &name) {
243 _programs[name].reserve(PROGRAM_INIT_SIZE);
244 return _programs[name];
247 std::string TokenStreamRewriter::getText() {
248 return getText(DEFAULT_PROGRAM_NAME, Interval(0UL, tokens->size() - 1));
251 std::string TokenStreamRewriter::getText(std::string programName) {
252 return getText(programName, Interval(0UL, tokens->size() - 1));
255 std::string TokenStreamRewriter::getText(const Interval &interval) {
256 return getText(DEFAULT_PROGRAM_NAME, interval);
259 std::string TokenStreamRewriter::getText(const std::string &programName, const Interval &interval) {
260 std::vector<TokenStreamRewriter::RewriteOperation*> &rewrites = _programs[programName];
261 size_t start = interval.a;
262 size_t stop = interval.b;
264 // ensure start/end are in range
265 if (stop > tokens->size() - 1) {
266 stop = tokens->size() - 1;
268 if (start == INVALID_INDEX) {
272 if (rewrites.empty() || rewrites.empty()) {
273 return tokens->getText(interval); // no instructions to execute
277 // First, optimize instruction stream
278 std::unordered_map<size_t, TokenStreamRewriter::RewriteOperation*> indexToOp = reduceToSingleOperationPerIndex(rewrites);
280 // Walk buffer, executing instructions and emitting tokens
282 while (i <= stop && i < tokens->size()) {
283 RewriteOperation *op = indexToOp[i];
284 indexToOp.erase(i); // remove so any left have index size-1
285 Token *t = tokens->get(i);
287 // no operation at that index, just dump token
288 if (t->getType() != Token::EOF) {
289 buf.append(t->getText());
291 i++; // move to next token
294 i = op->execute(&buf); // execute operation and skip
298 // include stuff after end if it's last index in buffer
299 // So, if they did an insertAfter(lastValidIndex, "foo"), include
300 // foo if end==lastValidIndex.
301 if (stop == tokens->size() - 1) {
302 // Scan any remaining operations after last token
303 // should be included (they will be inserts).
304 for (auto op : indexToOp) {
305 if (op.second->index >= tokens->size() - 1) {
306 buf.append(op.second->text);
313 std::unordered_map<size_t, TokenStreamRewriter::RewriteOperation*> TokenStreamRewriter::reduceToSingleOperationPerIndex(
314 std::vector<TokenStreamRewriter::RewriteOperation*> &rewrites) {
318 for (size_t i = 0; i < rewrites.size(); ++i) {
319 TokenStreamRewriter::RewriteOperation *op = rewrites[i];
320 ReplaceOp *rop = dynamic_cast<ReplaceOp *>(op);
324 // Wipe prior inserts within range
325 std::vector<InsertBeforeOp *> inserts = getKindOfOps<InsertBeforeOp>(rewrites, i);
326 for (auto *iop : inserts) {
327 if (iop->index == rop->index) {
328 // E.g., insert before 2, delete 2..2; update replace
329 // text to include insert before, kill insert
330 delete rewrites[iop->instructionIndex];
331 rewrites[iop->instructionIndex] = nullptr;
332 rop->text = iop->text + (!rop->text.empty() ? rop->text : "");
334 else if (iop->index > rop->index && iop->index <= rop->lastIndex) {
335 // delete insert as it's a no-op.
336 delete rewrites[iop->instructionIndex];
337 rewrites[iop->instructionIndex] = nullptr;
340 // Drop any prior replaces contained within
341 std::vector<ReplaceOp*> prevReplaces = getKindOfOps<ReplaceOp>(rewrites, i);
342 for (auto *prevRop : prevReplaces) {
343 if (prevRop->index >= rop->index && prevRop->lastIndex <= rop->lastIndex) {
344 // delete replace as it's a no-op.
345 delete rewrites[prevRop->instructionIndex];
346 rewrites[prevRop->instructionIndex] = nullptr;
349 // throw exception unless disjoint or identical
350 bool disjoint = prevRop->lastIndex < rop->index || prevRop->index > rop->lastIndex;
351 // Delete special case of replace (text==null):
352 // D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right)
353 if (prevRop->text.empty() && rop->text.empty() && !disjoint) {
354 delete rewrites[prevRop->instructionIndex];
355 rewrites[prevRop->instructionIndex] = nullptr; // kill first delete
356 rop->index = std::min(prevRop->index, rop->index);
357 rop->lastIndex = std::max(prevRop->lastIndex, rop->lastIndex);
358 std::cout << "new rop " << rop << std::endl;
360 else if (!disjoint) {
361 throw IllegalArgumentException("replace op boundaries of " + rop->toString() +
362 " overlap with previous " + prevRop->toString());
368 for (size_t i = 0; i < rewrites.size(); i++) {
369 InsertBeforeOp *iop = dynamic_cast<InsertBeforeOp *>(rewrites[i]);
373 // combine current insert with prior if any at same index
375 std::vector<InsertBeforeOp *> prevInserts = getKindOfOps<InsertBeforeOp>(rewrites, i);
376 for (auto *prevIop : prevInserts) {
377 if (prevIop->index == iop->index) { // combine objects
378 // convert to strings...we're in process of toString'ing
379 // whole token buffer so no lazy eval issue with any templates
380 iop->text = catOpText(&iop->text, &prevIop->text);
381 // delete redundant prior insert
382 delete rewrites[prevIop->instructionIndex];
383 rewrites[prevIop->instructionIndex] = nullptr;
386 // look for replaces where iop.index is in range; error
387 std::vector<ReplaceOp*> prevReplaces = getKindOfOps<ReplaceOp>(rewrites, i);
388 for (auto *rop : prevReplaces) {
389 if (iop->index == rop->index) {
390 rop->text = catOpText(&iop->text, &rop->text);
392 rewrites[i] = nullptr; // delete current insert
395 if (iop->index >= rop->index && iop->index <= rop->lastIndex) {
396 throw IllegalArgumentException("insert op " + iop->toString() + " within boundaries of previous " + rop->toString());
401 std::unordered_map<size_t, TokenStreamRewriter::RewriteOperation*> m;
402 for (TokenStreamRewriter::RewriteOperation *op : rewrites) {
403 if (op == nullptr) { // ignore deleted ops
406 if (m.count(op->index) > 0) {
407 throw RuntimeException("should only be one op per index");
415 std::string TokenStreamRewriter::catOpText(std::string *a, std::string *b) {