From 66a27d2fc7c1ad4e97de76d4982168a0fed9920a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Patrick=20Sch=C3=B6nberger?= Date: Thu, 12 Aug 2021 10:32:51 +0200 Subject: [PATCH 1/1] comments --- src/check.h | 47 ----------------------------------------------- src/find.h | 32 ++++++++++++++++++++++---------- src/generic.h | 8 +++++++- src/main.cpp | 11 ++++++++++- src/repr.h | 7 +++++++ src/repr_get.h | 17 +++++++++++++++-- src/toc.h | 49 ++++++++++++++++++++++++++++++++----------------- src/typeInfo.h | 9 +++++++++ src/visit.h | 4 ++++ 9 files changed, 106 insertions(+), 78 deletions(-) delete mode 100644 src/check.h diff --git a/src/check.h b/src/check.h deleted file mode 100644 index 091b646..0000000 --- a/src/check.h +++ /dev/null @@ -1,47 +0,0 @@ -#pragma once - -#include "repr.h" - -bool checkStmt( - const Stmt & s, - std::vector namespaces, - std::vector vars) -{ - return true; -} - -bool checkFunction( - const Function & f, - std::vector namespaces, - std::vector vars) -{ - vars.insert(vars.end(), f.parameters.begin(), f.parameters.end()); - vars.insert(vars.end(), f.body.variables.begin(), f.body.variables.end()); - for (auto s : f.body.statements) - { - if (!checkStmt(s, namespaces, vars)) - return false; - } - return true; -} - -bool checkProgram(const Program & p) -{ - for (auto f : p.functions) - { - if (!checkFunction(f, p.namespaces, p.variables)) - return false; - } - for (auto s : p.structs) - { - std::vector vars = p.variables; - for (auto v : s.members) - vars.push_back(v); - for (auto f : s.methods) - { - if (!checkFunction(f, p.namespaces, vars)) - return false; - } - } - return true; -} \ No newline at end of file diff --git a/src/find.h b/src/find.h index 3342536..cd7f577 100644 --- a/src/find.h +++ b/src/find.h @@ -8,7 +8,10 @@ template using opt = std::optional; +template +using tup = std::tuple; +// find an item in a vector by predicate template opt find(const std::vector & ts, std::function f) { @@ -18,6 +21,7 @@ opt find(const std::vector & ts, std::function f) return nullopt; } +// same as above but return pointer into raw array held by vector template opt findPtr(const std::vector & ts, std::function f) { @@ -27,12 +31,12 @@ opt findPtr(const std::vector & ts, std::function f) return nullopt; } -std::optional< - std::tuple< - std::shared_ptr, - std::vector>> +opt, + std::vector>> getContext(std::shared_ptr ctx, const std::vector & namespacePrefix) { + // try finding a continuos series of namespaces in a given context auto result = ctx; for (auto name : namespacePrefix) @@ -48,6 +52,8 @@ getContext(std::shared_ptr ctx, const std::vector & namesp } } + // if the found context is the end of a series of namespaces, also return + // a vector of namespace names std::vector namespaces; for (auto it = result; it != nullptr; it = it->parent) { @@ -65,9 +71,13 @@ getContext(std::shared_ptr ctx, const std::vector & namesp return std::make_tuple(result, namespaces); } +// all of the following functions work the same way, +// walking up the context hierarchy until the global context. +// return the first found instance that matches provided criteria +// theres also a variant to get a pointer instead for functions and +// structs used for generic instantiation - -opt>> findFunction( +opt>> findFunction( const std::string & name, const std::vector & namespacePrefix, std::shared_ptr ctx) @@ -85,7 +95,7 @@ opt>> findFunction( return nullopt; } -opt>> findFunctionPtr( +opt>> findFunctionPtr( const std::string & name, const std::vector & namespacePrefix, std::shared_ptr ctx) @@ -105,7 +115,7 @@ opt>> findFunctionPtr( -opt>> findStruct( +opt>> findStruct( const std::string & name, const std::vector & namespacePrefix, std::shared_ptr ctx) @@ -123,7 +133,7 @@ opt>> findStruct( return nullopt; } -opt>> findStructPtr( +opt>> findStructPtr( const std::string & name, const std::vector & namespacePrefix, std::shared_ptr ctx) @@ -143,7 +153,7 @@ opt>> findStructPtr( -opt>> findVariable( +opt>> findVariable( const std::string & name, const std::vector & namespacePrefix, std::shared_ptr ctx) @@ -163,6 +173,8 @@ opt>> findVariable( +// find struct members and pointer variants + opt> findStructMethod( const std::string & name, const Struct & s) diff --git a/src/generic.h b/src/generic.h index df203b3..33323e4 100644 --- a/src/generic.h +++ b/src/generic.h @@ -4,6 +4,7 @@ #include "typeInfo.h" #include "visit.h" +// add a generic instantiation if its not in the vector already void addGenericInstantiation( std::vector> & insts, const std::vector & newInst) @@ -32,6 +33,8 @@ Program instantiateGenerics(const Program & p) // Find generic instantiations + // visit expressions (only function calls are considered) and types, + // find the function/struct by pointer and add an instantiation Visitor findGenericInstantiations; findGenericInstantiations.onExpr = [&](const Expr & e, const std::shared_ptr ctx) @@ -48,7 +51,6 @@ Program instantiateGenerics(const Program & p) addGenericInstantiation(std::get<0>(*f)->genericInstantiations, e._func.genericInstantiation); } } - // TODO: generic methods }; findGenericInstantiations.onType = [&](const Type & t, const std::shared_ptr ctx) @@ -72,6 +74,10 @@ Program instantiateGenerics(const Program & p) return result; } +// generate the appendix for C struct/function names +// including array/pointer indicators because +// there might be distinct instantiations +// for int and int* for example std::string genericAppendix(const std::vector & ts) { std::stringstream sstr; diff --git a/src/main.cpp b/src/main.cpp index 5b53b10..bd9a2b2 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -17,27 +17,36 @@ int main(int argc, const char * argv[]) { std::ifstream ifs("test/test.toc"); + // create ANTLR input from filestream ANTLRInputStream input(ifs); + // lex input TocLexer lexer(&input); CommonTokenStream tokens(&lexer); + // parse TocParser parser(&tokens); + + // get Prog (root node) TocParser::ProgContext * prog = parser.prog(); - tree::ParseTree * tree = prog; + // dont continue on parse error if (parser.getNumberOfSyntaxErrors() > 0) { std::cerr << "Parsing error" << std::endl; return 1; } + // print raw parse tree + //tree::ParseTree * tree = prog; //std::string s = tree->toStringTree(&parser) + "\n"; //std::cout << "Parse Tree: " << s << std::endl; + // generate IR from tree and instantiate generics Program prg = getProgram(prog, nullptr); instantiateGenerics(prg); + // print to cout and file try { tocProgram(std::cout, prg); diff --git a/src/repr.h b/src/repr.h index 959e74d..569411c 100644 --- a/src/repr.h +++ b/src/repr.h @@ -8,6 +8,8 @@ using namespace std; +// This contains a 1 to 1 representation of the defined language + struct Type; struct Variable; struct Body; @@ -37,6 +39,9 @@ struct AssignStmt; struct ReturnStmt; struct Stmt; +// Context is a collection of everything that can be defined in a namespace +// that is reused for bodies so that the hierarchy can be walked uniformly +// both up and down using the parent variable struct Context { std::optional name; @@ -184,6 +189,8 @@ struct DotExpr std::string identifier; }; +// OperatorType enum with corresponding string array to lookup +// enum from string and the other way round enum class PrefixOperatorType { Plus, Minus, Increment, Decrement, diff --git a/src/repr_get.h b/src/repr_get.h index a1f496a..373b366 100644 --- a/src/repr_get.h +++ b/src/repr_get.h @@ -2,6 +2,8 @@ #include "repr.h" +// Transform ANTLR-generated types to corresponding IR types recursively + Type getType(TocParser::TypeContext * ctx); Variable getVariable(TocParser::VarContext * ctx); Body getBody(TocParser::BodyContext * ctx, std::shared_ptr parent); @@ -26,6 +28,11 @@ Expr getExpr(TocParser::ExprContext * ctx); Stmt getStmt(TocParser::StmtContext * ctx, std::shared_ptr parent); +// all of these functions get the relevant information +// from the parse tree and call each other for sub expressions +// the getVariable is called for variable declarations and parameter definitions +// for example, because they have the same rule in the grammar file + Type getType(TocParser::TypeContext * ctx) { Type result; @@ -216,7 +223,11 @@ OpType getOperatorType(const std::string & s, std::string typeStrings[]) - +// Expressions are somewhat of an exception, because some of their +// grammar rules are recursive, so they have to be defined +// in a single rule using Labels (https://github.com/antlr/antlr4/blob/master/doc/parser-rules.md#alternative-labels) +// Because this results in a polymorphic type, getExpr for the base expression type +// is always called and from there the polymorphic type is determined at runtime Expr getExpr(TocParser::FuncExprContext * ctx) { Expr result; @@ -361,7 +372,9 @@ Expr getExpr(TocParser::IdentifierExprContext * ctx) - +// this is always called for Expression rules +// attempt dynamic_cast at runtime and call corresponding +// function Expr getExpr(TocParser::ExprContext * ctx) { Expr result; diff --git a/src/toc.h b/src/toc.h index e95ab3e..095a72c 100644 --- a/src/toc.h +++ b/src/toc.h @@ -7,6 +7,7 @@ #include "generic.h" #include "typeInfo.h" +// print a generic vector with specified separator, optionally printing the separator at the end aswell template std::string vectorStr (const std::vector & v, const std::string & separator, bool end = false) { @@ -54,28 +55,18 @@ static std::string namespacePrefix() { return sstr.str(); } +// mapping from generic typenames (which are just names) +// to actual instantiated types static std::map currentInstantiation; -static Program globalPrg; +// set current context so that lookups can be made correctly static std::shared_ptr globalCtx; - -// std::string getPrefix(std::shared_ptr ctx) -// { -// std::string result; -// for (auto it = ctx; it != nullptr; it = it->parent) -// { -// if (it->name.has_value()) -// { -// result = it->name.value() + "_" + result; -// } -// } -// return result; -// } - std::ostream & operator<< (std::ostream & out, const Type & t) { + // if the typename equals one of the current generic instantiations + // print instantiated type instead for (auto kv : currentInstantiation) { if (t.name == kv.first) @@ -87,11 +78,16 @@ std::ostream & operator<< (std::ostream & out, const Type & t) TypeInfo ti = typeType(globalCtx, t); if (ti.isStruct) out << "struct "; + // try finding type in current context auto s = findStruct(t.name, t.namespacePrefixes, globalCtx); + // print prefix for either found type or the specified + // prefix if type is not found (shouldn't happen) if (s.has_value()) out << vectorStr(std::get<1>(*s), "_", true) << t.name; else out << vectorStr(t.namespacePrefixes, "_", true) << t.name; + + // print generic appendix if (!t.genericInstantiation.empty()) out << genericAppendix(t.genericInstantiation); @@ -104,10 +100,13 @@ std::ostream & operator<< (std::ostream & out, const Variable & v) std::stringstream sstr; std::string s = v.name; + // lookup variable and change name to reflect containing namespace auto var = findVariable(v.name, namespaces, globalCtx); if (var.has_value()) s = vectorStr(std::get<1>(*var), "_", true) + s; + // nest modifiers, inverted because C defines them + // the opposite direction for (auto m = v.type.modifiers.rbegin(); m != v.type.modifiers.rend(); m++) { if (m->type == TypeModifierType::Pointer) @@ -166,6 +165,7 @@ std::ostream & operator<< (std::ostream & out, const Expr & e) { case ExprType::Func: { + // print function call auto f = findFunction(e._func.functionName, e._func.namespacePrefixes, globalCtx); if (std::get<0>(*f).defined) @@ -178,6 +178,8 @@ std::ostream & operator<< (std::ostream & out, const Expr & e) } case ExprType::Method: { + // get TypeInfo on the Expression that the method is called on + // then print method call TypeInfo ti = typeExpr(globalCtx, *e._method.expr); out << vectorStr(ti.type.namespacePrefixes, "_", true) << @@ -215,6 +217,7 @@ std::ostream & operator<< (std::ostream & out, const Expr & e) case ExprType::Bracket: out << *e._brackets.lexpr << "[" << *e._brackets.rexpr << "]"; break; case ExprType::Identifier: + // try variable lookup auto v = findVariable(e._identifier.identifier, e._identifier.namespacePrefixes, globalCtx); if (v.has_value()) out << vectorStr(std::get<1>(*v), "_", true); @@ -264,8 +267,10 @@ std::ostream & operator<< (std::ostream & out, const Stmt & s) void tocFunction (std::ostream & out, const Function & f, bool stub) { - if (!stub && !f.defined) return; + // for a function that is not defined, only the stub can be printed + if (!f.defined && !stub) return; + // regular function if (f.genericTypeNames.empty()) { out << f.returnType << " " << namespacePrefix() << f.name << " (" << vectorStr(f.parameters, ", ") << ")"; @@ -279,10 +284,13 @@ void tocFunction (std::ostream & out, const Function & f, bool stub) out << "\n" << f.body; } } + // generic function else { + // print one instance per instantiation for (auto instantiation : f.genericInstantiations) { + // set global type mapping for (int i = 0; i < f.genericTypeNames.size(); i++) { currentInstantiation[f.genericTypeNames[i]] = instantiation[i]; @@ -305,6 +313,7 @@ void tocFunction (std::ostream & out, const Function & f, bool stub) } void tocStruct (std::ostream & out, const Struct & s, bool stub) { + // regular struct if (s.genericTypeNames.empty()) { out << "struct " << namespacePrefix() << s.name; @@ -315,6 +324,7 @@ void tocStruct (std::ostream & out, const Struct & s, bool stub) { Function f = m; + // add implicit this parameter f.parameters.insert(f.parameters.begin(), {"this", { @@ -346,6 +356,8 @@ void tocStruct (std::ostream & out, const Struct & s, bool stub) for (auto m : s.methods) { Function f = m; + + // add implicit this parameter f.parameters.insert(f.parameters.begin(), {"this", { @@ -361,6 +373,7 @@ void tocStruct (std::ostream & out, const Struct & s, bool stub) " (" << vectorStr(f.parameters, ", ") << ")\n" << f.body; } } + // generic struct else { for (auto instantiation : s.genericInstantiations) @@ -378,6 +391,7 @@ void tocStruct (std::ostream & out, const Struct & s, bool stub) { Function f = m; + // add implicit this parameter f.parameters.insert(f.parameters.begin(), {"this", { @@ -409,6 +423,8 @@ void tocStruct (std::ostream & out, const Struct & s, bool stub) for (auto m : s.methods) { Function f = m; + + // add implicit this parameter f.parameters.insert(f.parameters.begin(), {"this", { @@ -432,7 +448,6 @@ void tocProgram (std::ostream & out, const Program & p) { globalCtx = p.ctx; - globalPrg = p; for (auto n : p.ctx->namespaces) { tocNamespace(out, n, true); diff --git a/src/typeInfo.h b/src/typeInfo.h index e813612..7862ef5 100644 --- a/src/typeInfo.h +++ b/src/typeInfo.h @@ -12,6 +12,7 @@ struct TypeInfo TypeInfo typeType(std::shared_ptr globalCtx, Type t) { + // used to differentiate basic types from user defined types TypeInfo result; result.isStruct = true; if (t.name == "int" || t.name == "float" || t.name == "double" || @@ -33,6 +34,7 @@ TypeInfo typeExpr(std::shared_ptr globalCtx, Expr e) { case ExprType::Func: { + // get type info from return type auto f = findFunction(e._func.functionName, e._func.namespacePrefixes, globalCtx); if (!f.has_value()) throw "Unknown function"; @@ -41,6 +43,7 @@ TypeInfo typeExpr(std::shared_ptr globalCtx, Expr e) } case ExprType::Method: { + // get type info from return type TypeInfo tiCaller = typeExpr(globalCtx, *e._method.expr); if (!tiCaller.isStruct) throw "Calling method on non-struct"; @@ -54,6 +57,7 @@ TypeInfo typeExpr(std::shared_ptr globalCtx, Expr e) break; } case ExprType::Lit: + // literal types are defined result.isStruct = false; switch (e._lit.type) { @@ -68,6 +72,8 @@ TypeInfo typeExpr(std::shared_ptr globalCtx, Expr e) break; case ExprType::Dot: { + // assume dot access is always member access + // and lookup struct variable auto tiCaller = typeExpr(globalCtx, *e._dot.expr); if (!tiCaller.isStruct) throw "Accessing member of non-struct"; @@ -94,6 +100,8 @@ TypeInfo typeExpr(std::shared_ptr globalCtx, Expr e) break; case ExprType::Bracket: { + // get type of expr and remove array/ptr modifier to get + // type of [] access TypeInfo ti = typeExpr(globalCtx, *e._brackets.lexpr); if (!ti.type.modifiers.empty()) { @@ -107,6 +115,7 @@ TypeInfo typeExpr(std::shared_ptr globalCtx, Expr e) } case ExprType::Identifier: { + // var lookup and return var type auto v = findVariable(e._identifier.identifier, e._identifier.namespacePrefixes, globalCtx); if (!v.has_value()) throw "Unknown variable"; diff --git a/src/visit.h b/src/visit.h index 1175a86..646ac61 100644 --- a/src/visit.h +++ b/src/visit.h @@ -4,6 +4,9 @@ #include +// struct with callback functions for all relevant types +// tree can be walked selectively by providing only +// needed callbacks struct Visitor { std::function ctx)> onType = [](auto, auto){}; std::function ctx)> onExpr = [](auto, auto){}; @@ -20,6 +23,7 @@ struct Visitor { #define VISIT(XS) for (auto x : XS) visit(x); +// simply walk IR by recursively calling functions for all children struct Visit { private: Visitor v; -- 2.50.1