X-Git-Url: https://gitweb.ps.run/toc/blobdiff_plain/9f94b672a5dc32da5ad01742bd4e976315a30d9c..c6ad2948bb98d42f8e0883ef82cd14cd2d5eda60:/antlr4-cpp-runtime-4.9.2-source/runtime/src/TokenStreamRewriter.h?ds=inline diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/TokenStreamRewriter.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/TokenStreamRewriter.h new file mode 100644 index 0000000..561607a --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/TokenStreamRewriter.h @@ -0,0 +1,300 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +namespace antlr4 { + + /** + * Useful for rewriting out a buffered input token stream after doing some + * augmentation or other manipulations on it. + * + *
+ * You can insert stuff, replace, and delete chunks. Note that the operations + * are done lazily--only if you convert the buffer to a {@link String} with + * {@link TokenStream#getText()}. This is very efficient because you are not + * moving data around all the time. As the buffer of tokens is converted to + * strings, the {@link #getText()} method(s) scan the input token stream and + * check to see if there is an operation at the current index. If so, the + * operation is done and then normal {@link String} rendering continues on the + * buffer. This is like having multiple Turing machine instruction streams + * (programs) operating on a single input tape. :)
+ * + *+ * This rewriter makes no modifications to the token stream. It does not ask the + * stream to fill itself up nor does it advance the input cursor. The token + * stream {@link TokenStream#index()} will return the same value before and + * after any {@link #getText()} call.
+ * + *+ * The rewriter only works on tokens that you have in the buffer and ignores the + * current input cursor. If you are buffering tokens on-demand, calling + * {@link #getText()} halfway through the input will only do rewrites for those + * tokens in the first half of the file.
+ * + *+ * Since the operations are done lazily at {@link #getText}-time, operations do + * not screw up the token index values. That is, an insert operation at token + * index {@code i} does not change the index values for tokens + * {@code i}+1..n-1.
+ * + *+ * Because operations never actually alter the buffer, you may always get the + * original token stream back without undoing anything. Since the instructions + * are queued up, you can easily simulate transactions and roll back any changes + * if there is an error just by removing instructions. For example,
+ * + *
+ * CharStream input = new ANTLRFileStream("input");
+ * TLexer lex = new TLexer(input);
+ * CommonTokenStream tokens = new CommonTokenStream(lex);
+ * T parser = new T(tokens);
+ * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
+ * parser.startRule();
+ *
+ *
+ * + * Then in the rules, you can execute (assuming rewriter is visible):
+ * + *+ * Token t,u; + * ... + * rewriter.insertAfter(t, "text to put after t");} + * rewriter.insertAfter(u, "text after u");} + * System.out.println(rewriter.getText()); + *+ * + *
+ * You can also have multiple "instruction streams" and get multiple rewrites + * from a single pass over the input. Just name the instruction streams and use + * that name again when printing the buffer. This could be useful for generating + * a C file and also its header file--all from the same buffer:
+ * + *
+ * rewriter.insertAfter("pass1", t, "text to put after t");}
+ * rewriter.insertAfter("pass2", u, "text after u");}
+ * System.out.println(rewriter.getText("pass1"));
+ * System.out.println(rewriter.getText("pass2"));
+ *
+ *
+ * + * If you don't use named rewrite streams, a "default" stream is used as the + * first example shows.
+ */ + class ANTLR4CPP_PUBLIC TokenStreamRewriter { + public: + static const std::string DEFAULT_PROGRAM_NAME; +#if __cplusplus >= 201703L + static constexpr size_t PROGRAM_INIT_SIZE = 100; + static constexpr size_t MIN_TOKEN_INDEX = 0; +#else + enum : size_t { + PROGRAM_INIT_SIZE = 100, + MIN_TOKEN_INDEX = 0, + }; +#endif + + TokenStreamRewriter(TokenStream *tokens); + virtual ~TokenStreamRewriter(); + + TokenStream *getTokenStream(); + + virtual void rollback(size_t instructionIndex); + + /// Rollback the instruction stream for a program so that + /// the indicated instruction (via instructionIndex) is no + /// longer in the stream. UNTESTED! + virtual void rollback(const std::string &programName, size_t instructionIndex); + + virtual void deleteProgram(); + + /// Reset the program so that no instructions exist. + virtual void deleteProgram(const std::string &programName); + virtual void insertAfter(Token *t, const std::string& text); + virtual void insertAfter(size_t index, const std::string& text); + virtual void insertAfter(const std::string &programName, Token *t, const std::string& text); + virtual void insertAfter(const std::string &programName, size_t index, const std::string& text); + + virtual void insertBefore(Token *t, const std::string& text); + virtual void insertBefore(size_t index, const std::string& text); + virtual void insertBefore(const std::string &programName, Token *t, const std::string& text); + virtual void insertBefore(const std::string &programName, size_t index, const std::string& text); + + virtual void replace(size_t index, const std::string& text); + virtual void replace(size_t from, size_t to, const std::string& text); + virtual void replace(Token *indexT, const std::string& text); + virtual void replace(Token *from, Token *to, const std::string& text); + virtual void replace(const std::string &programName, size_t from, size_t to, const std::string& text); + virtual void replace(const std::string &programName, Token *from, Token *to, const std::string& text); + + virtual void Delete(size_t index); + virtual void Delete(size_t from, size_t to); + virtual void Delete(Token *indexT); + virtual void Delete(Token *from, Token *to); + virtual void Delete(const std::string &programName, size_t from, size_t to); + virtual void Delete(const std::string &programName, Token *from, Token *to); + + virtual size_t getLastRewriteTokenIndex(); + + /// Return the text from the original tokens altered per the + /// instructions given to this rewriter. + virtual std::string getText(); + + /** Return the text from the original tokens altered per the + * instructions given to this rewriter in programName. + */ + std::string getText(std::string programName); + + /// Return the text associated with the tokens in the interval from the + /// original token stream but with the alterations given to this rewriter. + /// The interval refers to the indexes in the original token stream. + /// We do not alter the token stream in any way, so the indexes + /// and intervals are still consistent. Includes any operations done + /// to the first and last token in the interval. So, if you did an + /// insertBefore on the first token, you would get that insertion. + /// The same is true if you do an insertAfter the stop token. + virtual std::string getText(const misc::Interval &interval); + + virtual std::string getText(const std::string &programName, const misc::Interval &interval); + + protected: + class RewriteOperation { + public: + /// What index into rewrites List are we? + size_t index; + std::string text; + + /// Token buffer index. + size_t instructionIndex; + + RewriteOperation(TokenStreamRewriter *outerInstance, size_t index); + RewriteOperation(TokenStreamRewriter *outerInstance, size_t index, const std::string& text); + virtual ~RewriteOperation(); + + /// Execute the rewrite operation by possibly adding to the buffer. + /// Return the index of the next token to operate on. + + virtual size_t execute(std::string *buf); + virtual std::string toString(); + + private: + TokenStreamRewriter *const outerInstance; + void InitializeInstanceFields(); + }; + + class InsertBeforeOp : public RewriteOperation { + private: + TokenStreamRewriter *const outerInstance; + + public: + InsertBeforeOp(TokenStreamRewriter *outerInstance, size_t index, const std::string& text); + + virtual size_t execute(std::string *buf) override; + }; + + class ReplaceOp : public RewriteOperation { + private: + TokenStreamRewriter *const outerInstance; + + public: + size_t lastIndex; + + ReplaceOp(TokenStreamRewriter *outerInstance, size_t from, size_t to, const std::string& text); + virtual size_t execute(std::string *buf) override; + virtual std::string toString() override; + + private: + void InitializeInstanceFields(); + }; + + /// Our source stream + TokenStream *const tokens; + + /// You may have multiple, named streams of rewrite operations. + /// I'm calling these things "programs." + /// Maps String (name) -> rewrite (List) + std::map