X-Git-Url: https://gitweb.ps.run/toc/blobdiff_plain/9f94b672a5dc32da5ad01742bd4e976315a30d9c..c6ad2948bb98d42f8e0883ef82cd14cd2d5eda60:/antlr4-cpp-runtime-4.9.2-source/runtime/src/TokenStreamRewriter.h?ds=sidebyside diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/TokenStreamRewriter.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/TokenStreamRewriter.h new file mode 100644 index 0000000..561607a --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/TokenStreamRewriter.h @@ -0,0 +1,300 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +namespace antlr4 { + + /** + * Useful for rewriting out a buffered input token stream after doing some + * augmentation or other manipulations on it. + * + *

+ * You can insert stuff, replace, and delete chunks. Note that the operations + * are done lazily--only if you convert the buffer to a {@link String} with + * {@link TokenStream#getText()}. This is very efficient because you are not + * moving data around all the time. As the buffer of tokens is converted to + * strings, the {@link #getText()} method(s) scan the input token stream and + * check to see if there is an operation at the current index. If so, the + * operation is done and then normal {@link String} rendering continues on the + * buffer. This is like having multiple Turing machine instruction streams + * (programs) operating on a single input tape. :)

+ * + *

+ * This rewriter makes no modifications to the token stream. It does not ask the + * stream to fill itself up nor does it advance the input cursor. The token + * stream {@link TokenStream#index()} will return the same value before and + * after any {@link #getText()} call.

+ * + *

+ * The rewriter only works on tokens that you have in the buffer and ignores the + * current input cursor. If you are buffering tokens on-demand, calling + * {@link #getText()} halfway through the input will only do rewrites for those + * tokens in the first half of the file.

+ * + *

+ * Since the operations are done lazily at {@link #getText}-time, operations do + * not screw up the token index values. That is, an insert operation at token + * index {@code i} does not change the index values for tokens + * {@code i}+1..n-1.

+ * + *

+ * Because operations never actually alter the buffer, you may always get the + * original token stream back without undoing anything. Since the instructions + * are queued up, you can easily simulate transactions and roll back any changes + * if there is an error just by removing instructions. For example,

+ * + *
+   * CharStream input = new ANTLRFileStream("input");
+   * TLexer lex = new TLexer(input);
+   * CommonTokenStream tokens = new CommonTokenStream(lex);
+   * T parser = new T(tokens);
+   * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
+   * parser.startRule();
+   * 
+ * + *

+ * Then in the rules, you can execute (assuming rewriter is visible):

+ * + *
+   * Token t,u;
+   * ...
+   * rewriter.insertAfter(t, "text to put after t");}
+   * rewriter.insertAfter(u, "text after u");}
+   * System.out.println(rewriter.getText());
+   * 
+ * + *

+ * You can also have multiple "instruction streams" and get multiple rewrites + * from a single pass over the input. Just name the instruction streams and use + * that name again when printing the buffer. This could be useful for generating + * a C file and also its header file--all from the same buffer:

+ * + *
+   * rewriter.insertAfter("pass1", t, "text to put after t");}
+   * rewriter.insertAfter("pass2", u, "text after u");}
+   * System.out.println(rewriter.getText("pass1"));
+   * System.out.println(rewriter.getText("pass2"));
+   * 
+ * + *

+ * If you don't use named rewrite streams, a "default" stream is used as the + * first example shows.

+ */ + class ANTLR4CPP_PUBLIC TokenStreamRewriter { + public: + static const std::string DEFAULT_PROGRAM_NAME; +#if __cplusplus >= 201703L + static constexpr size_t PROGRAM_INIT_SIZE = 100; + static constexpr size_t MIN_TOKEN_INDEX = 0; +#else + enum : size_t { + PROGRAM_INIT_SIZE = 100, + MIN_TOKEN_INDEX = 0, + }; +#endif + + TokenStreamRewriter(TokenStream *tokens); + virtual ~TokenStreamRewriter(); + + TokenStream *getTokenStream(); + + virtual void rollback(size_t instructionIndex); + + /// Rollback the instruction stream for a program so that + /// the indicated instruction (via instructionIndex) is no + /// longer in the stream. UNTESTED! + virtual void rollback(const std::string &programName, size_t instructionIndex); + + virtual void deleteProgram(); + + /// Reset the program so that no instructions exist. + virtual void deleteProgram(const std::string &programName); + virtual void insertAfter(Token *t, const std::string& text); + virtual void insertAfter(size_t index, const std::string& text); + virtual void insertAfter(const std::string &programName, Token *t, const std::string& text); + virtual void insertAfter(const std::string &programName, size_t index, const std::string& text); + + virtual void insertBefore(Token *t, const std::string& text); + virtual void insertBefore(size_t index, const std::string& text); + virtual void insertBefore(const std::string &programName, Token *t, const std::string& text); + virtual void insertBefore(const std::string &programName, size_t index, const std::string& text); + + virtual void replace(size_t index, const std::string& text); + virtual void replace(size_t from, size_t to, const std::string& text); + virtual void replace(Token *indexT, const std::string& text); + virtual void replace(Token *from, Token *to, const std::string& text); + virtual void replace(const std::string &programName, size_t from, size_t to, const std::string& text); + virtual void replace(const std::string &programName, Token *from, Token *to, const std::string& text); + + virtual void Delete(size_t index); + virtual void Delete(size_t from, size_t to); + virtual void Delete(Token *indexT); + virtual void Delete(Token *from, Token *to); + virtual void Delete(const std::string &programName, size_t from, size_t to); + virtual void Delete(const std::string &programName, Token *from, Token *to); + + virtual size_t getLastRewriteTokenIndex(); + + /// Return the text from the original tokens altered per the + /// instructions given to this rewriter. + virtual std::string getText(); + + /** Return the text from the original tokens altered per the + * instructions given to this rewriter in programName. + */ + std::string getText(std::string programName); + + /// Return the text associated with the tokens in the interval from the + /// original token stream but with the alterations given to this rewriter. + /// The interval refers to the indexes in the original token stream. + /// We do not alter the token stream in any way, so the indexes + /// and intervals are still consistent. Includes any operations done + /// to the first and last token in the interval. So, if you did an + /// insertBefore on the first token, you would get that insertion. + /// The same is true if you do an insertAfter the stop token. + virtual std::string getText(const misc::Interval &interval); + + virtual std::string getText(const std::string &programName, const misc::Interval &interval); + + protected: + class RewriteOperation { + public: + /// What index into rewrites List are we? + size_t index; + std::string text; + + /// Token buffer index. + size_t instructionIndex; + + RewriteOperation(TokenStreamRewriter *outerInstance, size_t index); + RewriteOperation(TokenStreamRewriter *outerInstance, size_t index, const std::string& text); + virtual ~RewriteOperation(); + + /// Execute the rewrite operation by possibly adding to the buffer. + /// Return the index of the next token to operate on. + + virtual size_t execute(std::string *buf); + virtual std::string toString(); + + private: + TokenStreamRewriter *const outerInstance; + void InitializeInstanceFields(); + }; + + class InsertBeforeOp : public RewriteOperation { + private: + TokenStreamRewriter *const outerInstance; + + public: + InsertBeforeOp(TokenStreamRewriter *outerInstance, size_t index, const std::string& text); + + virtual size_t execute(std::string *buf) override; + }; + + class ReplaceOp : public RewriteOperation { + private: + TokenStreamRewriter *const outerInstance; + + public: + size_t lastIndex; + + ReplaceOp(TokenStreamRewriter *outerInstance, size_t from, size_t to, const std::string& text); + virtual size_t execute(std::string *buf) override; + virtual std::string toString() override; + + private: + void InitializeInstanceFields(); + }; + + /// Our source stream + TokenStream *const tokens; + + /// You may have multiple, named streams of rewrite operations. + /// I'm calling these things "programs." + /// Maps String (name) -> rewrite (List) + std::map> _programs; + + /// + /// Map String (program name) -> Integer index + std::map _lastRewriteTokenIndexes; + virtual size_t getLastRewriteTokenIndex(const std::string &programName); + virtual void setLastRewriteTokenIndex(const std::string &programName, size_t i); + virtual std::vector& getProgram(const std::string &name); + + /// + /// We need to combine operations and report invalid operations (like + /// overlapping replaces that are not completed nested). Inserts to + /// same index need to be combined etc... Here are the cases: + /// + /// I.i.u I.j.v leave alone, nonoverlapping + /// I.i.u I.i.v combine: Iivu + /// + /// R.i-j.u R.x-y.v | i-j in x-y delete first R + /// R.i-j.u R.i-j.v delete first R + /// R.i-j.u R.x-y.v | x-y in i-j ERROR + /// R.i-j.u R.x-y.v | boundaries overlap ERROR + /// + /// Delete special case of replace (text==null): + /// D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) + /// + /// I.i.u R.x-y.v | i in (x+1)-y delete I (since insert before + /// we're not deleting i) + /// I.i.u R.x-y.v | i not in (x+1)-y leave alone, nonoverlapping + /// R.x-y.v I.i.u | i in x-y ERROR + /// R.x-y.v I.x.u R.x-y.uv (combine, delete I) + /// R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping + /// + /// I.i.u = insert u before op @ index i + /// R.x-y.u = replace x-y indexed tokens with u + /// + /// First we need to examine replaces. For any replace op: + /// + /// 1. wipe out any insertions before op within that range. + /// 2. Drop any replace op before that is contained completely within + /// that range. + /// 3. Throw exception upon boundary overlap with any previous replace. + /// + /// Then we can deal with inserts: + /// + /// 1. for any inserts to same index, combine even if not adjacent. + /// 2. for any prior replace with same left boundary, combine this + /// insert with replace and delete this replace. + /// 3. throw exception if index in same range as previous replace + /// + /// Don't actually delete; make op null in list. Easier to walk list. + /// Later we can throw as we add to index -> op map. + /// + /// Note that I.2 R.2-2 will wipe out I.2 even though, technically, the + /// inserted stuff would be before the replace range. But, if you + /// add tokens in front of a method body '{' and then delete the method + /// body, I think the stuff before the '{' you added should disappear too. + /// + /// Return a map from token index to operation. + /// + virtual std::unordered_map reduceToSingleOperationPerIndex(std::vector &rewrites); + + virtual std::string catOpText(std::string *a, std::string *b); + + /// Get all operations before an index of a particular kind. + template + std::vector getKindOfOps(std::vector rewrites, size_t before) { + std::vector ops; + for (size_t i = 0; i < before && i < rewrites.size(); i++) { + T *op = dynamic_cast(rewrites[i]); + if (op == nullptr) { // ignore deleted or non matching entries + continue; + } + ops.push_back(op); + } + return ops; + } + + private: + std::vector& initializeProgram(const std::string &name); + + }; + +} // namespace antlr4