1 /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
2 * Use of this file is governed by the BSD 3-clause license that
3 * can be found in the LICENSE.txt file in the project root.
11 * Useful for rewriting out a buffered input token stream after doing some
12 * augmentation or other manipulations on it.
15 * You can insert stuff, replace, and delete chunks. Note that the operations
16 * are done lazily--only if you convert the buffer to a {@link String} with
17 * {@link TokenStream#getText()}. This is very efficient because you are not
18 * moving data around all the time. As the buffer of tokens is converted to
19 * strings, the {@link #getText()} method(s) scan the input token stream and
20 * check to see if there is an operation at the current index. If so, the
21 * operation is done and then normal {@link String} rendering continues on the
22 * buffer. This is like having multiple Turing machine instruction streams
23 * (programs) operating on a single input tape. :)</p>
26 * This rewriter makes no modifications to the token stream. It does not ask the
27 * stream to fill itself up nor does it advance the input cursor. The token
28 * stream {@link TokenStream#index()} will return the same value before and
29 * after any {@link #getText()} call.</p>
32 * The rewriter only works on tokens that you have in the buffer and ignores the
33 * current input cursor. If you are buffering tokens on-demand, calling
34 * {@link #getText()} halfway through the input will only do rewrites for those
35 * tokens in the first half of the file.</p>
38 * Since the operations are done lazily at {@link #getText}-time, operations do
39 * not screw up the token index values. That is, an insert operation at token
40 * index {@code i} does not change the index values for tokens
41 * {@code i}+1..n-1.</p>
44 * Because operations never actually alter the buffer, you may always get the
45 * original token stream back without undoing anything. Since the instructions
46 * are queued up, you can easily simulate transactions and roll back any changes
47 * if there is an error just by removing instructions. For example,</p>
50 * CharStream input = new ANTLRFileStream("input");
51 * TLexer lex = new TLexer(input);
52 * CommonTokenStream tokens = new CommonTokenStream(lex);
53 * T parser = new T(tokens);
54 * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
59 * Then in the rules, you can execute (assuming rewriter is visible):</p>
64 * rewriter.insertAfter(t, "text to put after t");}
65 * rewriter.insertAfter(u, "text after u");}
66 * System.out.println(rewriter.getText());
70 * You can also have multiple "instruction streams" and get multiple rewrites
71 * from a single pass over the input. Just name the instruction streams and use
72 * that name again when printing the buffer. This could be useful for generating
73 * a C file and also its header file--all from the same buffer:</p>
76 * rewriter.insertAfter("pass1", t, "text to put after t");}
77 * rewriter.insertAfter("pass2", u, "text after u");}
78 * System.out.println(rewriter.getText("pass1"));
79 * System.out.println(rewriter.getText("pass2"));
83 * If you don't use named rewrite streams, a "default" stream is used as the
84 * first example shows.</p>
86 class ANTLR4CPP_PUBLIC TokenStreamRewriter {
88 static const std::string DEFAULT_PROGRAM_NAME;
89 #if __cplusplus >= 201703L
90 static constexpr size_t PROGRAM_INIT_SIZE = 100;
91 static constexpr size_t MIN_TOKEN_INDEX = 0;
94 PROGRAM_INIT_SIZE = 100,
99 TokenStreamRewriter(TokenStream *tokens);
100 virtual ~TokenStreamRewriter();
102 TokenStream *getTokenStream();
104 virtual void rollback(size_t instructionIndex);
106 /// Rollback the instruction stream for a program so that
107 /// the indicated instruction (via instructionIndex) is no
108 /// longer in the stream. UNTESTED!
109 virtual void rollback(const std::string &programName, size_t instructionIndex);
111 virtual void deleteProgram();
113 /// Reset the program so that no instructions exist.
114 virtual void deleteProgram(const std::string &programName);
115 virtual void insertAfter(Token *t, const std::string& text);
116 virtual void insertAfter(size_t index, const std::string& text);
117 virtual void insertAfter(const std::string &programName, Token *t, const std::string& text);
118 virtual void insertAfter(const std::string &programName, size_t index, const std::string& text);
120 virtual void insertBefore(Token *t, const std::string& text);
121 virtual void insertBefore(size_t index, const std::string& text);
122 virtual void insertBefore(const std::string &programName, Token *t, const std::string& text);
123 virtual void insertBefore(const std::string &programName, size_t index, const std::string& text);
125 virtual void replace(size_t index, const std::string& text);
126 virtual void replace(size_t from, size_t to, const std::string& text);
127 virtual void replace(Token *indexT, const std::string& text);
128 virtual void replace(Token *from, Token *to, const std::string& text);
129 virtual void replace(const std::string &programName, size_t from, size_t to, const std::string& text);
130 virtual void replace(const std::string &programName, Token *from, Token *to, const std::string& text);
132 virtual void Delete(size_t index);
133 virtual void Delete(size_t from, size_t to);
134 virtual void Delete(Token *indexT);
135 virtual void Delete(Token *from, Token *to);
136 virtual void Delete(const std::string &programName, size_t from, size_t to);
137 virtual void Delete(const std::string &programName, Token *from, Token *to);
139 virtual size_t getLastRewriteTokenIndex();
141 /// Return the text from the original tokens altered per the
142 /// instructions given to this rewriter.
143 virtual std::string getText();
145 /** Return the text from the original tokens altered per the
146 * instructions given to this rewriter in programName.
148 std::string getText(std::string programName);
150 /// Return the text associated with the tokens in the interval from the
151 /// original token stream but with the alterations given to this rewriter.
152 /// The interval refers to the indexes in the original token stream.
153 /// We do not alter the token stream in any way, so the indexes
154 /// and intervals are still consistent. Includes any operations done
155 /// to the first and last token in the interval. So, if you did an
156 /// insertBefore on the first token, you would get that insertion.
157 /// The same is true if you do an insertAfter the stop token.
158 virtual std::string getText(const misc::Interval &interval);
160 virtual std::string getText(const std::string &programName, const misc::Interval &interval);
163 class RewriteOperation {
165 /// What index into rewrites List are we?
169 /// Token buffer index.
170 size_t instructionIndex;
172 RewriteOperation(TokenStreamRewriter *outerInstance, size_t index);
173 RewriteOperation(TokenStreamRewriter *outerInstance, size_t index, const std::string& text);
174 virtual ~RewriteOperation();
176 /// Execute the rewrite operation by possibly adding to the buffer.
177 /// Return the index of the next token to operate on.
179 virtual size_t execute(std::string *buf);
180 virtual std::string toString();
183 TokenStreamRewriter *const outerInstance;
184 void InitializeInstanceFields();
187 class InsertBeforeOp : public RewriteOperation {
189 TokenStreamRewriter *const outerInstance;
192 InsertBeforeOp(TokenStreamRewriter *outerInstance, size_t index, const std::string& text);
194 virtual size_t execute(std::string *buf) override;
197 class ReplaceOp : public RewriteOperation {
199 TokenStreamRewriter *const outerInstance;
204 ReplaceOp(TokenStreamRewriter *outerInstance, size_t from, size_t to, const std::string& text);
205 virtual size_t execute(std::string *buf) override;
206 virtual std::string toString() override;
209 void InitializeInstanceFields();
212 /// Our source stream
213 TokenStream *const tokens;
215 /// You may have multiple, named streams of rewrite operations.
216 /// I'm calling these things "programs."
217 /// Maps String (name) -> rewrite (List)
218 std::map<std::string, std::vector<RewriteOperation*>> _programs;
221 /// Map String (program name) -> Integer index </summary>
222 std::map<std::string, size_t> _lastRewriteTokenIndexes;
223 virtual size_t getLastRewriteTokenIndex(const std::string &programName);
224 virtual void setLastRewriteTokenIndex(const std::string &programName, size_t i);
225 virtual std::vector<RewriteOperation*>& getProgram(const std::string &name);
228 /// We need to combine operations and report invalid operations (like
229 /// overlapping replaces that are not completed nested). Inserts to
230 /// same index need to be combined etc... Here are the cases:
232 /// I.i.u I.j.v leave alone, nonoverlapping
233 /// I.i.u I.i.v combine: Iivu
235 /// R.i-j.u R.x-y.v | i-j in x-y delete first R
236 /// R.i-j.u R.i-j.v delete first R
237 /// R.i-j.u R.x-y.v | x-y in i-j ERROR
238 /// R.i-j.u R.x-y.v | boundaries overlap ERROR
240 /// Delete special case of replace (text==null):
241 /// D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right)
243 /// I.i.u R.x-y.v | i in (x+1)-y delete I (since insert before
244 /// we're not deleting i)
245 /// I.i.u R.x-y.v | i not in (x+1)-y leave alone, nonoverlapping
246 /// R.x-y.v I.i.u | i in x-y ERROR
247 /// R.x-y.v I.x.u R.x-y.uv (combine, delete I)
248 /// R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping
250 /// I.i.u = insert u before op @ index i
251 /// R.x-y.u = replace x-y indexed tokens with u
253 /// First we need to examine replaces. For any replace op:
255 /// 1. wipe out any insertions before op within that range.
256 /// 2. Drop any replace op before that is contained completely within
258 /// 3. Throw exception upon boundary overlap with any previous replace.
260 /// Then we can deal with inserts:
262 /// 1. for any inserts to same index, combine even if not adjacent.
263 /// 2. for any prior replace with same left boundary, combine this
264 /// insert with replace and delete this replace.
265 /// 3. throw exception if index in same range as previous replace
267 /// Don't actually delete; make op null in list. Easier to walk list.
268 /// Later we can throw as we add to index -> op map.
270 /// Note that I.2 R.2-2 will wipe out I.2 even though, technically, the
271 /// inserted stuff would be before the replace range. But, if you
272 /// add tokens in front of a method body '{' and then delete the method
273 /// body, I think the stuff before the '{' you added should disappear too.
275 /// Return a map from token index to operation.
277 virtual std::unordered_map<size_t, RewriteOperation*> reduceToSingleOperationPerIndex(std::vector<RewriteOperation*> &rewrites);
279 virtual std::string catOpText(std::string *a, std::string *b);
281 /// Get all operations before an index of a particular kind.
282 template <typename T>
283 std::vector<T *> getKindOfOps(std::vector<RewriteOperation *> rewrites, size_t before) {
284 std::vector<T *> ops;
285 for (size_t i = 0; i < before && i < rewrites.size(); i++) {
286 T *op = dynamic_cast<T *>(rewrites[i]);
287 if (op == nullptr) { // ignore deleted or non matching entries
296 std::vector<RewriteOperation *>& initializeProgram(const std::string &name);
300 } // namespace antlr4