├── .gitignore ├── ControlFlowGraph ├── BasicBlock.cpp ├── BasicBlock.hpp ├── ControlFlowGraph.cpp ├── ControlFlowGraph.hpp ├── IRStatements.cpp └── IRStatements.hpp ├── IRGeneration ├── AbstractVisitor.hpp ├── GraphVizPrinter.cpp ├── GraphVizPrinter.hpp ├── IRGenerator.cpp ├── IRGenerator.hpp ├── LLVMIRGenerator.cpp └── LLVMIRGenerator.hpp ├── Lexer ├── Lexer.cpp ├── Lexer.hpp ├── Token.cpp └── Token.hpp ├── Parser ├── Expressions.cpp ├── Expressions.hpp ├── Parser.cpp └── Parser.hpp ├── README.md ├── build.sh ├── main.cpp └── program.txt /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Xcode files 3 | *.plist 4 | -------------------------------------------------------------------------------- /ControlFlowGraph/BasicBlock.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // BaseBlock.cpp 3 | // Compiler 4 | // 5 | // Created by Филипп Федяков on 02.06.17. 6 | // Copyright © 2017 filletofish. All rights reserved. 7 | // 8 | 9 | #include "BasicBlock.hpp" 10 | using namespace std; 11 | 12 | void BasicBlock::AddStatement(AbstractStatement *statement) { 13 | statements.push_back(statement); 14 | } 15 | 16 | void BasicBlock::AddLink(BasicBlock *pred, BasicBlock *succ) { 17 | if(std::find(succ->preds.begin(), succ->preds.end(), pred) == succ->preds.end()) { 18 | succ->preds.push_back(pred); 19 | } 20 | 21 | if(std::find(pred->succs.begin(), pred->succs.end(), succ) == pred->succs.end()) { 22 | pred->succs.push_back(succ); 23 | } 24 | } 25 | 26 | std::string BasicBlock::stringValue() { 27 | return "bb #" + std::to_string(_index) + " " + _label; 28 | } 29 | -------------------------------------------------------------------------------- /ControlFlowGraph/BasicBlock.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // BaseBlock.hpp 3 | // Compiler 4 | // 5 | // Created by Филипп Федяков on 02.06.17. 6 | // Copyright © 2017 filletofish. All rights reserved. 7 | // 8 | 9 | #ifndef BasicBlock_hpp 10 | #define BasicBlock_hpp 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | class AbstractStatement; 17 | class BasicBlock; 18 | 19 | /* In syntax analysis, a basic block is a straight-line code sequence with no branches in except to the entry and no branches out except at the exit. 20 | * Basic blocks form the vertices or nodes in a control flow graph. 21 | */ 22 | 23 | class BasicBlock { 24 | private: 25 | int _index; 26 | std::string _label; 27 | public: 28 | std::string stringValue(); 29 | std::vector statements; 30 | std::vector succs; 31 | std::vector preds; 32 | 33 | // Dominator Tree 34 | BasicBlock *dominator; 35 | std::vector domimatingBlocks; 36 | 37 | void AddStatement(AbstractStatement *statement); 38 | static void AddLink(BasicBlock *pred, BasicBlock *succ); 39 | 40 | BasicBlock(int index, std::string label): _index(index), _label(label) {}; 41 | }; 42 | 43 | #endif /* BasicBlock_hpp */ 44 | -------------------------------------------------------------------------------- /ControlFlowGraph/ControlFlowGraph.cpp: -------------------------------------------------------------------------------- 1 | 2 | // 3 | // ControlFlowGraph.cpp 4 | // Compiler 5 | // 6 | // Created by Филипп Федяков on 02.06.17. 7 | // Copyright © 2017 filletofish. All rights reserved. 8 | // 9 | 10 | #include "ControlFlowGraph.hpp" 11 | #include "BasicBlock.hpp" 12 | #include 13 | 14 | 15 | using namespace std; 16 | 17 | void ControlFlowGraph::AddBasicBlock(BasicBlock *bb) { 18 | basicBlocks.push_back(bb); 19 | } 20 | 21 | void ControlFlowGraph::ComputePredOrder() { 22 | if (!_bbInPredOrder) { 23 | _bbVisitedMap.clear(); 24 | _bbInPredOrder = new vector; 25 | PredOrderDFS(basicBlocks.front()); 26 | } 27 | } 28 | 29 | void ControlFlowGraph::PredOrderDFS(BasicBlock *bb) { 30 | _bbVisitedMap[bb] = 1; 31 | _bbInPredOrder->push_back(bb); 32 | for (auto next : bb->succs) { if (!_bbVisitedMap[next]) PredOrderDFS(next); } 33 | } 34 | 35 | void ControlFlowGraph::ComputePostOrder() { 36 | if (!_bbInPostOrder) { 37 | _bbVisitedMap.clear(); 38 | _bbInPostOrder = new vector; 39 | PostOrderDFS(basicBlocks.front()); 40 | } 41 | } 42 | 43 | void ControlFlowGraph::PostOrderDFS(BasicBlock *bb) { 44 | _bbVisitedMap[bb] = 1; 45 | for (auto next : bb->succs) { if (!_bbVisitedMap[next]) PostOrderDFS(next); } 46 | _bbInPostOrder->push_back(bb); 47 | } 48 | 49 | void ControlFlowGraph::ComputeDominatorTree() { 50 | if (!_bbInPredOrder) ComputePredOrder(); 51 | 52 | // compute dominators 53 | for (auto it : *_bbInPredOrder) { 54 | _bbVisitedMap.clear(); 55 | _bbVisitedMap[it] = 1; 56 | DomDFS(_bbInPredOrder->front()); 57 | for (auto jt : * _bbInPredOrder) { 58 | if (!_bbVisitedMap[jt]) { 59 | jt->dominator = it; 60 | } 61 | } 62 | } 63 | 64 | // compute chilrenMap 65 | for (auto v : basicBlocks) { 66 | BasicBlock *dominator = v->dominator; 67 | if (dominator) { 68 | dominator->domimatingBlocks.push_back(v); 69 | } 70 | } 71 | } 72 | 73 | void ControlFlowGraph::DomDFS(BasicBlock *bb) { 74 | if (!_bbVisitedMap[bb]) { 75 | _bbVisitedMap[bb] = 1; 76 | for (auto next : bb->succs) { if (!_bbVisitedMap[next]) DomDFS(next);} 77 | } 78 | } 79 | 80 | void ControlFlowGraph::ComputeBaseDominanceFrontier() { 81 | if (_dominanceFrontier) { return;} 82 | _dominanceFrontier = new map>; 83 | for (auto v : basicBlocks) { 84 | _dominanceFrontier->insert(pair>(v, set())); 85 | } 86 | for (auto x : *_bbInPostOrder) { 87 | for (auto y : x->succs) { 88 | if (y->dominator != x) 89 | _dominanceFrontier->at(x).insert(y); 90 | } 91 | 92 | for (auto z : x->domimatingBlocks) { 93 | for (auto y : _dominanceFrontier->at(z)) { 94 | if (y->dominator != x) 95 | _dominanceFrontier->at(x).insert(y); 96 | } 97 | } 98 | } 99 | } 100 | 101 | set ControlFlowGraph::GetMergedDominanceFrontierFromSubSet(set subSet) { 102 | if (!_dominanceFrontier) ComputeBaseDominanceFrontier(); 103 | 104 | set mergedDF; 105 | for (auto v : subSet) { 106 | set df = _dominanceFrontier->at(v); 107 | mergedDF.insert(df.begin(), df.end()); 108 | } 109 | return mergedDF; 110 | } 111 | 112 | set ControlFlowGraph::GetDominanceFrontierForSubSet(std::set subSet) { 113 | set result; 114 | set dfp; 115 | bool hasChanged = true; 116 | dfp = GetMergedDominanceFrontierFromSubSet(subSet); 117 | while (hasChanged) { 118 | hasChanged = false; 119 | dfp.insert(subSet.begin(), subSet.end()); 120 | dfp = GetMergedDominanceFrontierFromSubSet(dfp); 121 | if (result != dfp) { 122 | result = dfp; 123 | hasChanged = true; 124 | } 125 | } 126 | 127 | return result; 128 | } 129 | 130 | void ControlFlowGraph::CommitAllChanges() { 131 | ComputePredOrder(); 132 | ComputePostOrder(); 133 | ComputeDominatorTree(); 134 | ComputeBaseDominanceFrontier(); 135 | } 136 | 137 | -------------------------------------------------------------------------------- /ControlFlowGraph/ControlFlowGraph.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // ControlFlowGraph.hpp 3 | // Compiler 4 | // 5 | // Created by Филипп Федяков on 02.06.17. 6 | // Copyright © 2017 filletofish. All rights reserved. 7 | // 8 | 9 | #ifndef ControlFlowGraph_hpp 10 | #define ControlFlowGraph_hpp 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | class BasicBlock; 18 | 19 | class ControlFlowGraph { 20 | private: 21 | std::vector* _bbInPredOrder; 22 | std::vector* _bbInPostOrder; 23 | std::map _bbVisitedMap; 24 | std::map>* _dominanceFrontier; 25 | 26 | void PredOrderDFS(BasicBlock *bb); 27 | void PostOrderDFS(BasicBlock *bb); 28 | void DomDFS(BasicBlock *bb); 29 | 30 | void ComputePredOrder(); 31 | void ComputePostOrder(); 32 | void ComputeDominatorTree(); 33 | void ComputeBaseDominanceFrontier(); 34 | std::set GetMergedDominanceFrontierFromSubSet(std::set subSet); 35 | public: 36 | 37 | std::vector basicBlocks; 38 | std::set GetDominanceFrontierForSubSet(std::set subSet); 39 | void CommitAllChanges(); 40 | void AddBasicBlock(BasicBlock *bb); 41 | }; 42 | #endif /* ControlFlowGraph_hpp */ 43 | -------------------------------------------------------------------------------- /ControlFlowGraph/IRStatements.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // IRStatements.cpp 3 | // Compiler 4 | // 5 | // Created by Филипп Федяков on 02.06.17. 6 | // Copyright © 2017 filletofish. All rights reserved. 7 | // 8 | 9 | #include "IRStatements.hpp" 10 | #include "../Parser/Expressions.hpp" 11 | #include "BasicBlock.hpp" 12 | 13 | // TODO: Classes for variables and values, not to use Expressions 14 | 15 | std::string AssignStatement::Dump() { 16 | return var->stringValue() + " = " + rhs->stringValue(); 17 | } 18 | 19 | std::string BranchStatement::Dump() { 20 | if (isConditional) { 21 | return "branch on: " + condition->stringValue() + " to: " + firstBranchBB->stringValue() + " or: " + secondBranchBB->stringValue(); 22 | } else { 23 | return "branch to: " + firstBranchBB->stringValue(); 24 | } 25 | } 26 | 27 | std::string PhiNodeStatement::Dump() { 28 | std::string argEnumeration; 29 | for (auto arg : bbToVarMap) { 30 | argEnumeration += arg.second->stringValue() + " " + arg.first->stringValue() + "; "; 31 | } 32 | return var->stringValue() + " = [" + argEnumeration + "]"; 33 | } 34 | -------------------------------------------------------------------------------- /ControlFlowGraph/IRStatements.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // IRStatements.hpp 3 | // Compiler 4 | // 5 | // Created by Филипп Федяков on 02.06.17. 6 | // Copyright © 2017 filletofish. All rights reserved. 7 | // 8 | 9 | #ifndef IRInstructions_hpp 10 | #define IRInstructions_hpp 11 | 12 | #include 13 | #include 14 | #include 15 | #include "../IRGeneration/AbstractVisitor.hpp" 16 | 17 | class AbstractExpression; 18 | class NumberExpression; 19 | class VariableExpession; 20 | class AssignExpression; 21 | class IfExpression; 22 | class ForExpression; 23 | class BinaryExpression; 24 | class ControlFlowGraph; 25 | class BasicBlock; 26 | 27 | enum StatementType { 28 | ASSIGN = 0, 29 | BRANCH = 1, 30 | PHI = 2 31 | }; 32 | 33 | class AbstractStatement { 34 | public: 35 | StatementType type; 36 | virtual std::string Dump() = 0; 37 | AbstractStatement(StatementType type) : type(type) {}; 38 | virtual void Accept(AbstractVisitor * visitor) = 0; 39 | }; 40 | 41 | 42 | class AssignStatement: public AbstractStatement { 43 | public: 44 | VariableExpession *var; 45 | AbstractExpression *rhs; 46 | 47 | std::string Dump() override; 48 | AssignStatement(VariableExpession *var, AbstractExpression *rhs) : AbstractStatement(ASSIGN), var(var), rhs(rhs) {}; 49 | void Accept(AbstractVisitor *visitor) override { visitor->Visit(this);} 50 | }; 51 | 52 | class BranchStatement: public AbstractStatement { 53 | 54 | private: 55 | BranchStatement (AbstractExpression *condition, BasicBlock *firstBranchBB, BasicBlock *secondBranchBB, bool isCond) : AbstractStatement(BRANCH), condition(condition), firstBranchBB(firstBranchBB), secondBranchBB(secondBranchBB), isConditional(isCond) {}; 56 | public: 57 | bool isConditional; 58 | AbstractExpression *condition; 59 | BasicBlock *firstBranchBB; 60 | BasicBlock *secondBranchBB; 61 | std::string Dump() override; 62 | BranchStatement (AbstractExpression *condition, BasicBlock *trueBranch, BasicBlock *falseBranch) : BranchStatement(condition, trueBranch, falseBranch, true) {}; 63 | BranchStatement (BasicBlock *bb) : BranchStatement(nullptr, bb, nullptr, false) {}; 64 | void Accept(AbstractVisitor *visitor) override { visitor->Visit(this);} 65 | }; 66 | 67 | 68 | class PhiNodeStatement: public AbstractStatement { 69 | public: 70 | VariableExpession *var; 71 | std::map bbToVarMap; 72 | std::string Dump() override; 73 | PhiNodeStatement (VariableExpession *var, std::map bbToVarMap) : bbToVarMap(bbToVarMap), var(var), AbstractStatement(PHI) {}; 74 | void Accept(AbstractVisitor *visitor) override { } 75 | }; 76 | 77 | #endif /* IRInstructions_hpp */ 78 | -------------------------------------------------------------------------------- /IRGeneration/AbstractVisitor.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // AbstractVisitor.hpp 3 | // Compiler 4 | // 5 | // Created by Филипп Федяков on 05.06.17. 6 | // Copyright © 2017 filletofish. All rights reserved. 7 | // 8 | 9 | #ifndef AbstractVisitor_hpp 10 | #define AbstractVisitor_hpp 11 | 12 | #include 13 | class NumberExpression; 14 | class VariableExpession; 15 | class AssignExpression; 16 | class IfExpression; 17 | class ForExpression; 18 | class BinaryExpression; 19 | class AbstractExpression; 20 | class AbstractStatement; 21 | class BranchStatement; 22 | class AssignStatement; 23 | 24 | 25 | class AbstractVisitor { 26 | public: 27 | virtual void Visit(NumberExpression *exp) = 0; 28 | virtual void Visit(VariableExpession *exp) = 0; 29 | virtual void Visit(AssignExpression *exp) = 0; 30 | virtual void Visit(IfExpression *exp) = 0; 31 | virtual void Visit(ForExpression *exp) = 0; 32 | virtual void Visit(BinaryExpression *exp) = 0; 33 | virtual void Visit(BranchStatement *stmt) = 0; 34 | virtual void Visit(AssignStatement *stmt) = 0; 35 | }; 36 | #endif /* AbstractVisitor_hpp */ 37 | -------------------------------------------------------------------------------- /IRGeneration/GraphVizPrinter.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // GraphVizPrinter.cpp 3 | // Compiler 4 | // 5 | // Created by Филипп Федяков on 05.06.17. 6 | // Copyright © 2017 filletofish. All rights reserved. 7 | // 8 | 9 | #include "GraphVizPrinter.hpp" 10 | #include "../ControlFlowGraph/ControlFlowGraph.hpp" 11 | #include "../ControlFlowGraph/BasicBlock.hpp" 12 | 13 | void GraphVizPrinter::print() { 14 | printf("digraph G {\n"); 15 | for (auto bb : _cfg->basicBlocks) { 16 | for (auto succ : bb->succs) { 17 | printf("\t\"%s\" -> \"%s\"\n", bb->stringValue().c_str(), succ->stringValue().c_str()); 18 | } 19 | } 20 | printf("}\n"); 21 | } 22 | -------------------------------------------------------------------------------- /IRGeneration/GraphVizPrinter.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // GraphVizPrinter.hpp 3 | // Compiler 4 | // 5 | // Created by Филипп Федяков on 05.06.17. 6 | // Copyright © 2017 filletofish. All rights reserved. 7 | // 8 | 9 | #ifndef GraphVizPrinter_hpp 10 | #define GraphVizPrinter_hpp 11 | 12 | #include 13 | 14 | class ControlFlowGraph; 15 | 16 | 17 | class GraphVizPrinter { 18 | ControlFlowGraph *_cfg; 19 | 20 | public: 21 | void print(); 22 | GraphVizPrinter(ControlFlowGraph *cfg) : _cfg(cfg) {} 23 | }; 24 | 25 | #endif /* GraphVizPrinter_hpp */ 26 | -------------------------------------------------------------------------------- /IRGeneration/IRGenerator.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // CustomIRGeneration.cpp 3 | // Compiler 4 | // 5 | // Created by Филипп Федяков on 02.06.17. 6 | // Copyright © 2017 filletofish. All rights reserved. 7 | // 8 | 9 | #include "IRGenerator.hpp" 10 | #include "../Parser/Expressions.hpp" 11 | #include "../ControlFlowGraph/ControlFlowGraph.hpp" 12 | #include "../ControlFlowGraph/BasicBlock.hpp" 13 | #include "../ControlFlowGraph/IRStatements.hpp" 14 | #include "GraphVizPrinter.hpp" 15 | 16 | #include 17 | 18 | using namespace std; 19 | 20 | BasicBlock* IRGenerator::CreateBB(string label) { 21 | int nextIndex = (int)cfg->basicBlocks.size(); 22 | BasicBlock *bb = new BasicBlock(nextIndex, label); 23 | cfg->AddBasicBlock(bb); 24 | return bb; 25 | } 26 | 27 | IRGenerator::IRGenerator() { 28 | cfg = new ControlFlowGraph(); 29 | BasicBlock *bb = CreateBB("entry"); 30 | currentBB = bb; 31 | entryBB = bb; 32 | } 33 | 34 | void IRGenerator::CreateBr(BasicBlock *targetBB) { 35 | BasicBlock::AddLink(currentBB, targetBB); 36 | BranchStatement *branchStmt = new BranchStatement(targetBB); 37 | currentBB->AddStatement(branchStmt); 38 | } 39 | 40 | void IRGenerator::CreateConditionalBr(AbstractExpression *condition, BasicBlock *thenBB, BasicBlock *elseBB) { 41 | BasicBlock::AddLink(currentBB, thenBB); 42 | BasicBlock::AddLink(currentBB, elseBB); 43 | BranchStatement *branchStmt = new BranchStatement(condition, thenBB, elseBB); 44 | currentBB->AddStatement(branchStmt); 45 | } 46 | 47 | int IRGenerator::GenerateIR(AbstractExpression *exp) { 48 | exp->Accept(this); 49 | return _latestValue; 50 | } 51 | 52 | void IRGenerator::Visit(NumberExpression *exp) { 53 | _latestValue = exp->value; 54 | } 55 | 56 | void IRGenerator::Visit(VariableExpession *exp) { 57 | int* value = namedValues[exp->name]; 58 | if (!value) 59 | return LogError("Unknown variable name"); 60 | _latestValue = *value; 61 | } 62 | 63 | void IRGenerator::Visit(AssignExpression *exp) { 64 | bblocksForVar[exp->varExp->name].insert(currentBB); 65 | int assignValue = GenerateIR(exp->expr); 66 | namedValues[exp->varName()] = &assignValue; 67 | currentBB->AddStatement(new AssignStatement(exp->varExp, exp->expr)); 68 | _latestValue = assignValue; 69 | } 70 | 71 | void IRGenerator::Visit(IfExpression *exp) { 72 | BasicBlock *thenBB = CreateBB("then"); 73 | BasicBlock *elseBB = CreateBB("else"); 74 | BasicBlock *mergeBB = CreateBB("if_cont"); 75 | 76 | CreateConditionalBr(exp->conditionExp, elseBB, thenBB); 77 | 78 | currentBB = thenBB; 79 | GenerateIR(exp->thenExp); 80 | CreateBr(mergeBB); 81 | 82 | currentBB = elseBB; 83 | GenerateIR(exp->elseExp); 84 | CreateBr(mergeBB); 85 | 86 | currentBB = mergeBB; 87 | 88 | _latestValue = 0; 89 | } 90 | 91 | void IRGenerator::Visit(ForExpression *exp) { 92 | // TODO: Refactor to make less pseudo steps and to copy var instead of creating new 93 | 94 | bblocksForVar[exp->index->name].insert(currentBB); 95 | // Emit the start code first, without 'variable' in scope. 96 | int startVal = GenerateIR(exp->start); 97 | VariableExpession *pseudoVarForStart = new VariableExpession(exp->index->name); 98 | currentBB->AddStatement(new AssignStatement(pseudoVarForStart, exp->start)); 99 | 100 | int *oldVal = namedValues[exp->index->name]; 101 | namedValues[exp->index->name] = &startVal; 102 | 103 | 104 | BasicBlock *loopCoonditionBB = CreateBB("loop_cond"); 105 | CreateBr(loopCoonditionBB); 106 | 107 | currentBB = loopCoonditionBB; 108 | 109 | 110 | // Compute the end condition. 111 | bblocksForVar[exp->index->name].insert(currentBB); 112 | GenerateIR(exp->end); 113 | // Make the new basic block for the loop body 114 | BasicBlock *loopBodyBB = CreateBB("loop_body"); 115 | BasicBlock *loopAfterBB = CreateBB("loop_cont"); 116 | // MARK: Make pseudo step it in expression 117 | VariableExpession *pseudoVarForConditionCheck = new VariableExpession(exp->index->name); 118 | BinaryExpression *pseudoCompExp = new BinaryExpression('-', exp->end, pseudoVarForConditionCheck); 119 | 120 | CreateConditionalBr(pseudoCompExp, loopBodyBB, loopAfterBB); 121 | 122 | // Start insertion in LoopBB. 123 | currentBB = loopBodyBB; 124 | // Emit the body of the loop. This, like any other expr, can change the 125 | // current BB. Note that we ignore the value computed by the body, but don't 126 | // allow an error. 127 | GenerateIR(exp->body); 128 | 129 | // MARK: Make pseudo step it in expression 130 | bblocksForVar[exp->index->name].insert(currentBB); 131 | BinaryExpression *pseudoStepExp = new BinaryExpression('+', new VariableExpession(exp->index->name), new NumberExpression(1)); 132 | currentBB->AddStatement(new AssignStatement(exp->index, pseudoStepExp)); 133 | 134 | CreateBr(loopCoonditionBB); 135 | 136 | // Any new code will be inserted in AfterBB. 137 | currentBB = loopAfterBB; 138 | 139 | // Restore the unshadowed variable. 140 | if (oldVal) 141 | namedValues[exp->index->name] = oldVal; 142 | else 143 | namedValues.erase(exp->index->name); 144 | 145 | // for expr always returns 0. 146 | _latestValue = 0; 147 | } 148 | 149 | void IRGenerator::Visit(BinaryExpression *exp) { 150 | 151 | int lhsValue = GenerateIR(exp->lhs); 152 | int rhsValue = GenerateIR(exp->rhs); 153 | 154 | switch (exp->op) { 155 | case '+': 156 | _latestValue = lhsValue + rhsValue; 157 | return; 158 | case '-': 159 | _latestValue = lhsValue - rhsValue; 160 | return; 161 | default: 162 | LogError("invalid binary operator"); 163 | return; 164 | }; 165 | } 166 | 167 | void IRGenerator::LogError(const char * str) { 168 | fprintf(stderr, "Error: %s\n", str); 169 | _latestValue = 0; 170 | } 171 | 172 | 173 | void IRGenerator::CommitBuildingAndDump() { 174 | cfg->CommitAllChanges(); 175 | 176 | InsertPhiNodes(); 177 | 178 | BuildSSAForm(); 179 | 180 | 181 | for (auto it = cfg->basicBlocks.begin(); it != cfg->basicBlocks.end(); ++it) { 182 | BasicBlock *bb = *it; 183 | string succs_enumeration = ""; 184 | for (auto const &s : bb->succs) { succs_enumeration += s->stringValue() + " "; } 185 | 186 | 187 | string preds_enumeration = ""; 188 | for (auto const &s : bb->preds) { preds_enumeration += s->stringValue() + " "; } 189 | 190 | string dominator; 191 | if (bb->dominator) { 192 | dominator = bb->dominator->stringValue(); 193 | } else { 194 | dominator = ""; 195 | } 196 | printf("\n%s\n\t\tpreds: %s\n\t\tsuccs: %s\n\t\tdominatedBy: %s\n", bb->stringValue().c_str(), preds_enumeration.c_str(), succs_enumeration.c_str(), dominator.c_str()); 197 | 198 | for (auto it = bb->statements.begin(); it != bb->statements.end(); ++it) { 199 | printf("\t%s\n", (*it)->Dump().c_str()); 200 | } 201 | } 202 | } 203 | 204 | GraphVizPrinter IRGenerator::GetGraphVizPrinter() { 205 | return GraphVizPrinter(cfg); 206 | } 207 | 208 | 209 | // MARK: Phi Nodes 210 | 211 | void IRGenerator::InsertPhiNodes() { 212 | for (auto it : bblocksForVar) { 213 | std::string variableName = it.first; 214 | std::set assignedInBlocks = it.second; 215 | std::set dominanceFrontier = cfg->GetDominanceFrontierForSubSet(assignedInBlocks); 216 | for (auto bb : dominanceFrontier) { 217 | std::map bbToVarMap; 218 | 219 | for (auto pred : bb->preds) { 220 | bbToVarMap[pred] = new VariableExpession(variableName); 221 | } 222 | PhiNodeStatement *phiStmt = new PhiNodeStatement(new VariableExpession(variableName), bbToVarMap); 223 | bb->statements.insert(bb->statements.begin(), phiStmt); 224 | } 225 | } 226 | } 227 | 228 | // MARK: VAR SEARCH 229 | 230 | class VarSearchVisitor : public AbstractVisitor { 231 | private: 232 | std::set vars; 233 | public: 234 | void Visit(NumberExpression *exp) override; 235 | void Visit(VariableExpession *exp) override; 236 | void Visit(AssignExpression *exp) override; 237 | void Visit(IfExpression *exp) override; 238 | void Visit(ForExpression *exp) override; 239 | void Visit(BinaryExpression *exp) override; 240 | virtual void Visit(BranchStatement *stmt) override; 241 | virtual void Visit(AssignStatement *stmt) override; 242 | std::set AllVarsUsedInStatement(AbstractStatement *statement); 243 | }; 244 | 245 | std::set VarSearchVisitor::AllVarsUsedInStatement(AbstractStatement *statement){ 246 | vars.clear(); 247 | statement->Accept(this); 248 | return vars; 249 | } 250 | 251 | 252 | void VarSearchVisitor::Visit(BranchStatement *stmt) { 253 | if (stmt->condition) 254 | stmt->condition->Accept(this); 255 | } 256 | void VarSearchVisitor::Visit(AssignStatement *stmt) { 257 | stmt->rhs->Accept(this); 258 | } 259 | void VarSearchVisitor::Visit(NumberExpression *exp) {} 260 | void VarSearchVisitor::Visit(VariableExpession *exp) { vars.insert(exp); } 261 | void VarSearchVisitor::Visit(AssignExpression *exp) { 262 | vars.insert(exp->varExp); 263 | exp->expr->Accept(this); 264 | } 265 | void VarSearchVisitor::Visit(IfExpression *exp) { 266 | // On current version IfExpression can't be placed in ASSIGN statements 267 | assert("ForExpression can not be reached by VarSearchVisitor."); 268 | exp->conditionExp->Accept(this); 269 | exp->thenExp->Accept(this); 270 | exp->elseExp->Accept(this); 271 | } 272 | void VarSearchVisitor::Visit(BinaryExpression *exp) { 273 | exp->lhs->Accept(this); 274 | exp->rhs->Accept(this); 275 | } 276 | void VarSearchVisitor::Visit(ForExpression *exp) { 277 | // On current version ForExpr can't be placed in ASSIGN statements 278 | assert("ForExpression can not be reached by VarSearchVisitor."); 279 | vars.insert(exp->index); 280 | exp->end->Accept(this); 281 | exp->body->Accept(this); 282 | exp->start->Accept(this); 283 | } 284 | 285 | // MARK: SSA Form 286 | 287 | class SSAFormer { 288 | private: 289 | int counter; 290 | std::vector stack; 291 | VarSearchVisitor varSearcher = VarSearchVisitor(); 292 | void TraverseBBWithVar(BasicBlock *bb, std::string varName); 293 | ControlFlowGraph *cfg; 294 | public: 295 | void RenameVarToSSAForm(std::string varName); 296 | SSAFormer(ControlFlowGraph *cfg) : cfg(cfg) {}; 297 | }; 298 | 299 | 300 | void IRGenerator::BuildSSAForm() { 301 | SSAFormer ssaFormer = SSAFormer(cfg); 302 | for (auto pair : bblocksForVar) { 303 | std::string var = pair.first; 304 | ssaFormer.RenameVarToSSAForm(var); 305 | } 306 | } 307 | 308 | 309 | void SSAFormer::RenameVarToSSAForm(std::string varName) { 310 | counter = 0; 311 | stack.clear(); 312 | TraverseBBWithVar(cfg->basicBlocks.front(), varName); 313 | } 314 | 315 | void SSAFormer::TraverseBBWithVar(BasicBlock *bb, std::string varName) { 316 | for (auto stmt : bb->statements) { 317 | // Renaming vars in all rhs 318 | if (stmt->type != PHI) { 319 | set vars = varSearcher.AllVarsUsedInStatement(stmt); 320 | for (auto var : vars) { 321 | if (var->name == varName) 322 | var->SetSSAIndex(stack.back()); 323 | } 324 | } 325 | 326 | // Renaming vars in all lhs 327 | if (stmt->type == ASSIGN) { 328 | AssignStatement *assignStmt = static_cast(stmt); 329 | if (assignStmt->var->name == varName) { 330 | assignStmt->var->SetSSAIndex(counter); 331 | stack.push_back(counter); 332 | counter += 1; 333 | } 334 | } 335 | 336 | if (stmt->type == PHI) { 337 | PhiNodeStatement *phiStmt = static_cast(stmt); 338 | if (phiStmt->var->name == varName) { 339 | phiStmt->var->SetSSAIndex(counter); 340 | stack.push_back(counter); 341 | counter += 1; 342 | } 343 | } 344 | } 345 | 346 | for (auto succBB : bb->succs) { 347 | for (auto stmt : succBB->statements) { 348 | if (stmt->type == PHI) { 349 | PhiNodeStatement *phiStmt = static_cast(stmt); 350 | if (phiStmt->bbToVarMap.count(bb) && phiStmt->bbToVarMap[bb]->name == varName) { 351 | phiStmt->bbToVarMap[bb]->SetSSAIndex(stack.back()); 352 | } 353 | } 354 | } 355 | } 356 | 357 | for (auto child : bb->domimatingBlocks) { 358 | TraverseBBWithVar(child, varName); 359 | } 360 | 361 | for (auto statement : bb->statements) { 362 | if (statement->type == ASSIGN) { 363 | AssignStatement *assignStmt = static_cast(statement); 364 | if (assignStmt->var->name == varName) { 365 | stack.pop_back(); 366 | } 367 | } 368 | } 369 | } 370 | -------------------------------------------------------------------------------- /IRGeneration/IRGenerator.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // CustomIRGeneration.hpp 3 | // Compiler 4 | // 5 | // Created by Филипп Федяков on 02.06.17. 6 | // Copyright © 2017 filletofish. All rights reserved. 7 | // 8 | 9 | #ifndef CustomIRGeneration_hpp 10 | #define CustomIRGeneration_hpp 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "AbstractVisitor.hpp" 19 | 20 | class NumberExpression; 21 | class VariableExpession; 22 | class AssignExpression; 23 | class IfExpression; 24 | class ForExpression; 25 | class BinaryExpression; 26 | class ControlFlowGraph; 27 | class BasicBlock; 28 | class AbstractExpression; 29 | class AbstractStatement; 30 | class BranchStatement; 31 | class AssignStatement; 32 | class GraphVizPrinter; 33 | 34 | 35 | class IRGenerator : public AbstractVisitor { 36 | private: 37 | std::map> bblocksForVar; 38 | public: 39 | void Visit(NumberExpression *exp) override; 40 | void Visit(VariableExpession *exp) override; 41 | void Visit(AssignExpression *exp) override; 42 | void Visit(IfExpression *exp) override; 43 | void Visit(ForExpression *exp) override; 44 | void Visit(BinaryExpression *exp) override; 45 | 46 | // not implemented, because visits only expressions 47 | virtual void Visit(BranchStatement *stmt) override {}; 48 | virtual void Visit(AssignStatement *stmt) override {}; 49 | 50 | int GenerateIR(AbstractExpression *exp); 51 | 52 | 53 | /** 54 | * Dumps all IR in stdout. 55 | * Should be called after all IR Generated. 56 | */ 57 | void CommitBuildingAndDump(); 58 | 59 | /** 60 | Creates new GraphVizPrinter to present control flow graph in graphviz notation. 61 | 62 | @return instanse of GraphVizPrinter 63 | */ 64 | GraphVizPrinter GetGraphVizPrinter(); 65 | 66 | IRGenerator(); 67 | 68 | private: 69 | int _latestValue; 70 | std::map namedValues; 71 | BasicBlock *currentBB; 72 | BasicBlock *entryBB; 73 | ControlFlowGraph *cfg; 74 | 75 | BasicBlock *CreateBB(std::string label); 76 | void CreateBr(BasicBlock *targetBB); 77 | void CreateConditionalBr(AbstractExpression *condition, BasicBlock *thenBB, BasicBlock *elseBB); 78 | 79 | void InsertPhiNodes(); 80 | void BuildSSAForm(); 81 | void LogError(const char*); 82 | }; 83 | #endif /* CustomIRGeneration_hpp */ 84 | -------------------------------------------------------------------------------- /IRGeneration/LLVMIRGenerator.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // LLVMIRGenerator.cpp 3 | // Compiler 4 | // 5 | // Created by Филипп Федяков on 28.05.17. 6 | // Copyright © 2017 filletofish. All rights reserved. 7 | // 8 | 9 | #include "LLVMIRGenerator.hpp" 10 | #include "../Parser/Expressions.hpp" 11 | #include "../ControlFlowGraph/ControlFlowGraph.hpp" 12 | #include "../ControlFlowGraph/BasicBlock.hpp" 13 | 14 | using namespace llvm; 15 | 16 | llvm::Value* LLVMIRGenerator::GenerateIR(AbstractExpression *exp) { 17 | exp->Accept(this); 18 | return _latestValue; 19 | } 20 | 21 | void LLVMIRGenerator::Visit(NumberExpression *exp){ 22 | _latestValue = ConstantInt::get(*TheContext, APInt(32, exp->value, false)); 23 | } 24 | 25 | void LLVMIRGenerator::Visit(VariableExpession *exp) { 26 | llvm::AllocaInst *alloca = namedValues[exp->name]; 27 | if (!alloca) { 28 | LogError("Unknown variable name"); 29 | return; 30 | } 31 | _latestValue = Builder->CreateLoad(alloca, exp->name.c_str()); 32 | } 33 | 34 | void LLVMIRGenerator::Visit(BinaryExpression *exp) { 35 | llvm::Value *lhsValue = GenerateIR(exp->lhs); 36 | llvm::Value *rhsValue = GenerateIR(exp->rhs); 37 | if (!lhsValue || !rhsValue) 38 | return; 39 | 40 | switch (exp->op) { 41 | case '+': 42 | _latestValue = Builder->CreateFAdd(lhsValue, rhsValue, "addtmp"); 43 | return; 44 | case '-': 45 | _latestValue = Builder->CreateFSub(lhsValue, rhsValue, "subtmp"); 46 | return; 47 | default: 48 | return LogError("invalid binary operator"); 49 | } 50 | } 51 | 52 | void LLVMIRGenerator::Visit(AssignExpression *exp) { 53 | llvm::Value *assignValue = GenerateIR(exp->expr); 54 | if (!assignValue) 55 | return; 56 | if (namedValues.count(exp->varName()) != 0) { 57 | llvm::AllocaInst *Alloca = namedValues[exp->varName()]; 58 | Builder->CreateStore(assignValue, Alloca); 59 | } else { 60 | llvm::AllocaInst *Alloca = Builder->CreateAlloca(llvm::Type::getInt32Ty(*TheContext), 0, exp->varName().c_str()); 61 | Builder->CreateStore(assignValue, Alloca); 62 | namedValues[exp->varName()] = Alloca; 63 | } 64 | _latestValue = assignValue; 65 | } 66 | 67 | 68 | void LLVMIRGenerator::Visit(IfExpression *exp) { 69 | llvm::Value *CondV = GenerateIR(exp->conditionExp); 70 | if (!CondV) 71 | return; 72 | 73 | CondV = Builder->CreateICmpNE( 74 | CondV, llvm::ConstantInt::get(*TheContext, llvm::APInt(32,0,false)), "ifcond"); 75 | 76 | Function *TheFunction = Builder->GetInsertBlock()->getParent(); 77 | 78 | // Create blocks for the then and else cases. Insert the 'then' block at the 79 | // end of the function. 80 | llvm::BasicBlock *ThenBB = llvm::BasicBlock::Create(*TheContext, "then", TheFunction); 81 | llvm::BasicBlock *ElseBB = llvm::BasicBlock::Create(*TheContext, "else"); 82 | llvm::BasicBlock *MergeBB = llvm::BasicBlock::Create(*TheContext, "ifcont"); 83 | 84 | Builder->CreateCondBr(CondV, ThenBB, ElseBB); 85 | 86 | // creating temp variable for storing return value 87 | std::string temp_varName = "temp_var"; 88 | llvm::AllocaInst *tempVarAlloca = Builder->CreateAlloca(llvm::Type::getInt32Ty(*TheContext), 0, "temp_var"); 89 | 90 | 91 | 92 | // Генерируем значение. 93 | Builder->SetInsertPoint(ThenBB); 94 | 95 | llvm::Value *ThenV = GenerateIR(exp->thenExp); 96 | if (!ThenV) 97 | 98 | return; 99 | 100 | Builder->CreateStore(ThenV, tempVarAlloca); 101 | 102 | Builder->CreateBr(MergeBB); 103 | // Кодогенерация 'Then' может изменить текущий блок, обновляем ThenBB для PHI. 104 | ThenBB = Builder->GetInsertBlock(); 105 | 106 | /// Генерируем блок else. 107 | TheFunction->getBasicBlockList().push_back(ElseBB); 108 | Builder->SetInsertPoint(ElseBB); 109 | 110 | llvm::Value *ElseV = GenerateIR(exp->elseExp); 111 | if (!ElseV) 112 | return; 113 | 114 | Builder->CreateStore(ElseV, tempVarAlloca); 115 | Builder->CreateBr(MergeBB); 116 | // Кодогенерация 'Else' может изменить текущий блок, обновляем ElseBB для PHI. 117 | // codegen of 'Else' can change the current block, update ElseBB for the PHI. 118 | ElseBB = Builder->GetInsertBlock(); 119 | 120 | // Emit merge block. 121 | TheFunction->getBasicBlockList().push_back(MergeBB); 122 | Builder->SetInsertPoint(MergeBB); 123 | 124 | llvm::Value *resultValue = Builder->CreateLoad(tempVarAlloca, "temp_var"); 125 | 126 | _latestValue = resultValue; 127 | } 128 | 129 | 130 | void LLVMIRGenerator::Visit(ForExpression *exp) { 131 | 132 | Function *TheFunction = Builder->GetInsertBlock()->getParent(); 133 | 134 | // Create an alloca for the variable in the entry block. 135 | AllocaInst *Alloca =Builder->CreateAlloca(llvm::Type::getInt32Ty(*TheContext), 0, exp->index->name.c_str()); 136 | 137 | // Emit the start code first, without 'variable' in scope. 138 | llvm::Value *StartVal = GenerateIR(exp->start); 139 | if (!StartVal) 140 | return; 141 | 142 | // Store the value into the alloca. 143 | Builder->CreateStore(StartVal, Alloca); 144 | // If it shadows an existing variable, we have to restore it, so save it now. 145 | llvm::AllocaInst *OldVal = namedValues[exp->index->name]; 146 | namedValues[exp->index->name] = Alloca; 147 | 148 | llvm::BasicBlock *loopCoonditionBB = llvm::BasicBlock::Create(*TheContext, "loopCoonditionBB", TheFunction); 149 | Builder->CreateBr(loopCoonditionBB); 150 | Builder->SetInsertPoint(loopCoonditionBB); 151 | 152 | // Compute the end condition. 153 | llvm::Value *EndCond = GenerateIR(exp->end); 154 | if (!EndCond) 155 | return; 156 | 157 | llvm::Value *CurVar = Builder->CreateLoad(Alloca, exp->index->name.c_str()); 158 | EndCond = Builder->CreateICmpSLT(CurVar, EndCond, "loopcond"); 159 | 160 | // Make the new basic block for the loop body 161 | llvm::BasicBlock *LoopBB = llvm::BasicBlock::Create(*TheContext, "loop", TheFunction); 162 | llvm::BasicBlock *AfterBB = llvm::BasicBlock::Create(*TheContext, "afterloop", TheFunction); 163 | Builder->CreateCondBr(EndCond, LoopBB, AfterBB); 164 | 165 | 166 | 167 | // Start insertion in LoopBB. 168 | Builder->SetInsertPoint(LoopBB); 169 | // Emit the body of the loop. This, like any other expr, can change the 170 | // current BB. Note that we ignore the value computed by the body, but don't 171 | // allow an error. 172 | if (!GenerateIR(exp->body)) 173 | return; 174 | // Emit the step value. 175 | llvm::Value *StepVal = nullptr; 176 | if (exp->step) { 177 | StepVal = GenerateIR(exp->step); 178 | if (!StepVal) 179 | return; 180 | } else { 181 | // If not specified, use 1.0. 182 | StepVal = llvm::ConstantInt::get(*TheContext, llvm::APInt(32, 1, false)); 183 | } 184 | // Reload, increment, and restore the alloca. This handles the case where 185 | // the body of the loop mutates the variable. 186 | CurVar = Builder->CreateLoad(Alloca, exp->index->name.c_str()); 187 | llvm::Value *NextVar = Builder->CreateFAdd(CurVar, StepVal, "nextvar"); 188 | Builder->CreateStore(NextVar, Alloca); 189 | Builder->CreateBr(loopCoonditionBB); 190 | 191 | // Any new code will be inserted in AfterBB. 192 | Builder->SetInsertPoint(AfterBB); 193 | 194 | // Restore the unshadowed variable. 195 | if (OldVal) 196 | namedValues[exp->index->name] = OldVal; 197 | else 198 | namedValues.erase(exp->index->name); 199 | 200 | // for expr always returns 0. 201 | _latestValue = llvm::Constant::getNullValue(llvm::Type::getInt32Ty(*TheContext)); 202 | } 203 | 204 | void LLVMIRGenerator::LogError(const char *Str) { 205 | fprintf(stderr, "Error: %s\n", Str); 206 | _latestValue = nullptr; 207 | } 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | -------------------------------------------------------------------------------- /IRGeneration/LLVMIRGenerator.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // CodeGenVisitor.hpp 3 | // Compiler 4 | // 5 | // Created by Филипп Федяков on 28.05.17. 6 | // Copyright © 2017 filletofish. All rights reserved. 7 | // 8 | 9 | #ifndef CodeGenVisitor_hpp 10 | #define CodeGenVisitor_hpp 11 | 12 | #include "llvm/ADT/APFloat.h" 13 | #include "llvm/ADT/STLExtras.h" 14 | #include "llvm/IR/BasicBlock.h" 15 | #include "llvm/IR/Constants.h" 16 | #include "llvm/IR/DerivedTypes.h" 17 | #include "llvm/IR/Function.h" 18 | #include "llvm/IR/IRBuilder.h" 19 | #include "llvm/IR/LLVMContext.h" 20 | #include "llvm/IR/Module.h" 21 | #include "llvm/IR/Type.h" 22 | #include "llvm/IR/Verifier.h" 23 | 24 | #include "AbstractVisitor.hpp" 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | class NumberExpression; 31 | class VariableExpession; 32 | class AssignExpression; 33 | class IfExpression; 34 | class ForExpression; 35 | class BinaryExpression; 36 | class AbstractExpression; 37 | class ControlFlowGraph; 38 | class BasicBlock; 39 | 40 | class LLVMIRGenerator : public AbstractVisitor { 41 | public: 42 | void Visit(NumberExpression *exp) override; 43 | void Visit(VariableExpession *exp) override; 44 | void Visit(AssignExpression *exp) override; 45 | void Visit(IfExpression *exp) override; 46 | void Visit(ForExpression *exp) override; 47 | void Visit(BinaryExpression *exp) override; 48 | 49 | // not implemented, because visits only expressions 50 | virtual void Visit(BranchStatement *stmt) override {}; 51 | virtual void Visit(AssignStatement *stmt) override {}; 52 | 53 | llvm::Value* GenerateIR(AbstractExpression *exp); 54 | 55 | LLVMIRGenerator(llvm::LLVMContext *TheContext, 56 | llvm::IRBuilder<> *Builder) : TheContext(TheContext), Builder(Builder){}; 57 | 58 | private: 59 | llvm::Value* _latestValue; 60 | llvm::LLVMContext *TheContext; 61 | llvm::IRBuilder<> *Builder; 62 | std::map namedValues; 63 | void LogError(const char*); 64 | }; 65 | #endif /* CodeGenVisitor_hpp */ 66 | -------------------------------------------------------------------------------- /Lexer/Lexer.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Lexer.cpp 3 | // Compiler 4 | // 5 | // Created by Филипп Федяков on 27.05.17. 6 | // Copyright © 2017 filletofish. All rights reserved. 7 | // 8 | 9 | #include "Lexer.hpp" 10 | #include "Token.hpp" 11 | #include 12 | #include 13 | 14 | Token * Lexer::GetNextToken() { 15 | return current = GetToken(); 16 | } 17 | 18 | Token * Lexer::GetToken() { 19 | std::string identifier; 20 | 21 | while (isspace(lastChar)) 22 | lastChar = getchar(); 23 | 24 | if (isalpha(lastChar)) { 25 | identifier = lastChar; 26 | while (isalnum((lastChar = getchar()))) 27 | identifier += lastChar; 28 | 29 | if (identifier == "if") return new Token(tok_if); 30 | if (identifier == "then") return new Token(tok_then); 31 | if (identifier == "else") return new Token(tok_else); 32 | if (identifier == "for") return new Token(tok_for); 33 | if (identifier == "in") return new Token(tok_in); 34 | return new VarToken(identifier); 35 | } 36 | 37 | if (isdigit(lastChar)) { 38 | std::string NumStr; 39 | do { 40 | NumStr += lastChar; 41 | lastChar = getchar(); 42 | } while (isdigit(lastChar)); 43 | 44 | int val = strtod(NumStr.c_str(), 0); 45 | return new ValueToken(val); 46 | } 47 | 48 | if (lastChar == '+' || lastChar == '-') { 49 | int op = lastChar; 50 | lastChar = getchar(); 51 | return new OpToken(op); 52 | } 53 | 54 | if (lastChar == '=') { 55 | int assignOp = lastChar; 56 | lastChar = getchar(); 57 | return new AssignSymbolToken(assignOp); 58 | } 59 | 60 | if (lastChar == EOF) { 61 | lastChar = getchar(); 62 | return new Token(tok_eof); 63 | } 64 | 65 | int someSymbol = lastChar; 66 | lastChar = getchar(); 67 | return new SingleSymbolToken(someSymbol); 68 | } 69 | 70 | std::vector Lexer::getAllTokens() { 71 | std::vector tokens; 72 | Token *token = GetNextToken(); 73 | while (token->type != tok_eof) { 74 | tokens.push_back(token); 75 | token = GetNextToken(); 76 | } 77 | 78 | tokens.push_back(token); 79 | return tokens; 80 | } 81 | -------------------------------------------------------------------------------- /Lexer/Lexer.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // Lexer.hpp 3 | // Compiler 4 | // 5 | // Created by Филипп Федяков on 27.05.17. 6 | // Copyright © 2017 filletofish. All rights reserved. 7 | // 8 | 9 | #ifndef Lexer_hpp 10 | #define Lexer_hpp 11 | 12 | #include 13 | #include 14 | class Token; 15 | 16 | class Lexer { 17 | public: 18 | std::vectorgetAllTokens(); 19 | Token* GetNextToken(); 20 | Token* current; 21 | private: 22 | Token* GetToken(); 23 | int lastChar = ' '; 24 | }; 25 | #endif /* Lexer_hpp */ 26 | -------------------------------------------------------------------------------- /Lexer/Token.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Token.cpp 3 | // Compiler 4 | // 5 | // Created by Филипп Федяков on 27.05.17. 6 | // Copyright © 2017 filletofish. All rights reserved. 7 | // 8 | 9 | #include "Token.hpp" 10 | -------------------------------------------------------------------------------- /Lexer/Token.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // Token.hpp 3 | // Compiler 4 | // 5 | // Created by Филипп Федяков on 27.05.17. 6 | // Copyright © 2017 filletofish. All rights reserved. 7 | // 8 | 9 | #ifndef Token_hpp 10 | #define Token_hpp 11 | 12 | #include 13 | #include 14 | 15 | enum TokenType { 16 | tok_eof = -1, 17 | 18 | // commands 19 | tok_if = -2, 20 | tok_then = -3, 21 | tok_else = -4, 22 | tok_for = -5, 23 | tok_in = -6, 24 | 25 | // primary 26 | tok_identifier = -7, 27 | tok_number = -8, 28 | 29 | // operations 30 | tok_operation = -9, 31 | tok_assign = -10, 32 | tok_single_symbol = -11 33 | }; 34 | 35 | class Token { 36 | public: 37 | 38 | TokenType type; 39 | Token(TokenType type): type(type) {}; 40 | }; 41 | 42 | class ValueToken: public Token { 43 | public: 44 | int value; 45 | ValueToken(int value) : Token(tok_number), value(value) {}; 46 | }; 47 | 48 | class VarToken: public Token { 49 | public: 50 | std::string identifier; 51 | VarToken(std::string identifier) : Token(tok_identifier), identifier(identifier) {}; 52 | }; 53 | 54 | class OpToken: public Token { 55 | public: 56 | char op; 57 | OpToken(char op) : Token(tok_operation), op(op) {}; 58 | }; 59 | 60 | class SingleSymbolToken: public Token { 61 | public: 62 | char c; 63 | SingleSymbolToken(char c) : Token(tok_single_symbol), c(c) {}; 64 | }; 65 | 66 | class AssignSymbolToken: public Token { 67 | public: 68 | char c; 69 | AssignSymbolToken(char assign) : Token(tok_assign), c(assign) {}; 70 | }; 71 | 72 | 73 | #endif /* Token_hpp */ 74 | -------------------------------------------------------------------------------- /Parser/Expressions.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // AbstractExpresssion.cpp 3 | // Compiler 4 | // 5 | // Created by Филипп Федяков on 27.05.17. 6 | // Copyright © 2017 filletofish. All rights reserved. 7 | // 8 | 9 | #include "Expressions.hpp" 10 | -------------------------------------------------------------------------------- /Parser/Expressions.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // AbstractExpresssion.hpp 3 | // Compiler 4 | // 5 | // Created by Филипп Федяков on 27.05.17. 6 | // Copyright © 2017 filletofish. All rights reserved. 7 | // 8 | 9 | #ifndef AbstractExpresssion_hpp 10 | #define AbstractExpresssion_hpp 11 | 12 | #include 13 | #include 14 | #include "../IRGeneration/AbstractVisitor.hpp" 15 | 16 | class Value; 17 | namespace llvm {}; 18 | 19 | class AbstractExpression { 20 | public: 21 | virtual std::string stringValue() {return "";}; 22 | 23 | virtual ~AbstractExpression() = default; 24 | virtual void Accept(AbstractVisitor * visitor) = 0; 25 | }; 26 | 27 | class NumberExpression : public AbstractExpression { 28 | public: 29 | int value; 30 | std::string stringValue() override { return std::to_string(value); } 31 | NumberExpression(int value) : value(value) {}; 32 | void Accept(AbstractVisitor *visitor) override { visitor->Visit(this);} 33 | }; 34 | 35 | class VariableExpession : public AbstractExpression { 36 | private: 37 | int _SSAIndex; 38 | public: 39 | std::string name; 40 | void SetSSAIndex(int index) { _SSAIndex = index; }; 41 | std::string stringValue() override { return name + "_" + std::to_string(_SSAIndex); } 42 | 43 | VariableExpession(const std::string &name) : name(name), _SSAIndex(0) {} 44 | 45 | void Accept(AbstractVisitor *visitor) override { visitor->Visit(this);} 46 | }; 47 | 48 | class AssignExpression : public AbstractExpression { 49 | public: 50 | std::string varName() { return varExp ->name; }; 51 | AbstractExpression *expr; 52 | VariableExpession *varExp; 53 | 54 | AssignExpression(VariableExpession *varExp, AbstractExpression *expr): varExp(varExp),expr(expr) {} 55 | void Accept(AbstractVisitor *visitor) override { visitor->Visit(this);} 56 | }; 57 | 58 | class IfExpression : public AbstractExpression { 59 | public: 60 | AbstractExpression *conditionExp, *thenExp, *elseExp; 61 | 62 | IfExpression(AbstractExpression *conditionExp, AbstractExpression *thenExp, AbstractExpression *elseExp): conditionExp(conditionExp), thenExp(thenExp), elseExp(elseExp) {} 63 | void Accept(AbstractVisitor *visitor) override { visitor->Visit(this);} 64 | }; 65 | 66 | class ForExpression: public AbstractExpression { 67 | public: 68 | VariableExpession *index; 69 | AbstractExpression *start, *end, *step, *body; 70 | 71 | ForExpression(std::string indexName, AbstractExpression *start, AbstractExpression *end, AbstractExpression *step, AbstractExpression *body): index(new VariableExpession(indexName)), start(start), end(end), step(step), body(body) {} 72 | void Accept(AbstractVisitor *visitor) override { visitor->Visit(this);} 73 | }; 74 | 75 | class BinaryExpression: public AbstractExpression { 76 | public: 77 | char op; 78 | AbstractExpression *lhs, *rhs; 79 | std::string stringValue() override { return lhs->stringValue() + " " + op + " " + rhs->stringValue(); }; 80 | 81 | 82 | BinaryExpression(char op, AbstractExpression *lhs, AbstractExpression *rhs): op(op), lhs(lhs), rhs(rhs) {} 83 | void Accept(AbstractVisitor *visitor) override { visitor->Visit(this);} 84 | }; 85 | #endif /* AbstractExpresssion_hpp */ 86 | 87 | -------------------------------------------------------------------------------- /Parser/Parser.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Parser.cpp 3 | // Compiler 4 | // 5 | // Created by Филипп Федяков on 27.05.17. 6 | // Copyright © 2017 filletofish. All rights reserved. 7 | // 8 | 9 | #include "Parser.hpp" 10 | #include "Expressions.hpp" 11 | #include "../Lexer/Lexer.hpp" 12 | #include "../Lexer/Token.hpp" 13 | 14 | #include 15 | 16 | AbstractExpression* Parser::ParseCurrentExpression() { 17 | auto LHS = ParsePrimary(); 18 | if (!LHS) 19 | return nullptr; 20 | return ParseBinOpRHS(LHS); 21 | } 22 | 23 | AbstractExpression* Parser::ParsePrimary() { 24 | switch (lexer->current->type) { 25 | case tok_identifier: 26 | return ParseIdentifierExpression(); 27 | case tok_number: 28 | return ParseNumberExpression(); 29 | case tok_if: 30 | return ParseIfExpression(); 31 | case tok_for: 32 | return ParseForExpression(); 33 | default: { 34 | return LogError("unknown token when expecting an expression"); 35 | } 36 | } 37 | } 38 | 39 | bool isCharAtToken(char c, Token *token) { 40 | if (token->type != tok_single_symbol) return false; 41 | auto symbolToken = static_cast(token); 42 | return symbolToken->c == c; 43 | } 44 | 45 | AbstractExpression* Parser::ParseNumberExpression() { 46 | auto currToken = static_cast(lexer->current); 47 | auto result = new NumberExpression(currToken->value); 48 | lexer->GetNextToken(); // consuming number; 49 | return result; 50 | } 51 | 52 | AbstractExpression* Parser::ParseIdentifierExpression() { 53 | auto currToken = static_cast(lexer->current); 54 | std::string varName = currToken->identifier; 55 | 56 | lexer->GetNextToken(); 57 | 58 | if (lexer->current->type != tok_assign) // Simple variable ref. 59 | return new VariableExpession(varName); 60 | 61 | lexer->GetNextToken(); 62 | // it will parse next variable as declaration 63 | 64 | auto varDecl = ParseCurrentExpression(); 65 | if (!varDecl) { 66 | LogError("expected '='"); 67 | return nullptr; 68 | } 69 | return new AssignExpression(new VariableExpession(varName), varDecl); 70 | } 71 | 72 | AbstractExpression* Parser::ParseBinOpRHS(AbstractExpression * LHS) { 73 | while (true) { 74 | if (lexer->current->type != tok_operation) 75 | return LHS; 76 | 77 | auto currTokenOperation = static_cast(lexer->current); 78 | 79 | char op = currTokenOperation->op; 80 | lexer->GetNextToken(); 81 | 82 | // Parse the primary expression after the binary operator. 83 | auto RHS = ParsePrimary(); 84 | if (!RHS) 85 | return nullptr; 86 | // Merge LHS/RHS. 87 | LHS = new BinaryExpression(op, LHS, RHS); 88 | } 89 | } 90 | 91 | AbstractExpression* Parser::ParseIfExpression() { 92 | lexer->GetNextToken(); 93 | 94 | auto condExp = ParseCurrentExpression(); 95 | if (!condExp) 96 | return nullptr; 97 | 98 | if (lexer->current->type != tok_then) { 99 | LogError("expected 'then'"); 100 | return nullptr; 101 | } 102 | 103 | lexer->GetNextToken(); 104 | 105 | auto thenExp = ParseCurrentExpression(); 106 | if (!thenExp) 107 | return nullptr; 108 | 109 | if (lexer->current->type != tok_else) { 110 | LogError("expected 'else'"); 111 | return nullptr; 112 | } 113 | 114 | lexer->GetNextToken(); 115 | 116 | auto elseExp = ParseCurrentExpression(); 117 | if (!elseExp) 118 | return nullptr; 119 | 120 | return new IfExpression(condExp, thenExp, elseExp); 121 | } 122 | 123 | AbstractExpression* Parser::ParseForExpression() { 124 | lexer->GetNextToken(); 125 | 126 | if (lexer->current->type != tok_identifier) { 127 | LogError("expected variable after keyword 'for'"); 128 | return nullptr; 129 | } 130 | 131 | std::string idName = static_cast(lexer->current)->identifier; 132 | lexer->GetNextToken(); 133 | 134 | if (lexer->current->type != tok_assign) { 135 | LogError("expected '=' after for"); 136 | return nullptr; 137 | } 138 | 139 | lexer->GetNextToken(); 140 | auto start = ParseCurrentExpression(); 141 | if (!start) 142 | return nullptr; 143 | if (!isCharAtToken(',', lexer->current)) { 144 | LogError("expected ',' after for start value"); 145 | return nullptr; 146 | } 147 | 148 | lexer->GetNextToken(); 149 | 150 | auto end = ParseCurrentExpression(); 151 | if (!end) 152 | return nullptr; 153 | 154 | AbstractExpression* step = nullptr; 155 | if (isCharAtToken(';', lexer->current)) { 156 | lexer->GetNextToken(); 157 | step = ParseCurrentExpression(); 158 | if (!step) 159 | return nullptr; 160 | } 161 | 162 | if (lexer->current->type != tok_in) { 163 | LogError("expected 'in' after for"); 164 | return nullptr; 165 | } 166 | 167 | lexer->GetNextToken(); 168 | 169 | auto body = ParseCurrentExpression(); 170 | if (!body) 171 | return nullptr; 172 | 173 | return new ForExpression(idName, start, end, step, body); 174 | } 175 | 176 | 177 | AbstractExpression* Parser::LogError(const char *Str) { 178 | fprintf(stderr, "Error: %s\n", Str); 179 | return nullptr; 180 | } 181 | 182 | 183 | std::vector Parser::Parse() { 184 | lexer->GetNextToken(); 185 | std::vector expressions; 186 | 187 | AbstractExpression *expr = nullptr; 188 | // llvm::Value *RetVal = nullptr; 189 | while (lexer->current->type != tok_eof) { 190 | expr = ParseCurrentExpression(); 191 | 192 | if (expr) { 193 | expressions.push_back(expr); 194 | // RetVal = expr->codegen(); 195 | // if (!RetVal) getNextToken(); 196 | } else { 197 | lexer->GetNextToken(); 198 | //return expressions; 199 | } 200 | } 201 | 202 | return expressions; 203 | } 204 | -------------------------------------------------------------------------------- /Parser/Parser.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // Parser.hpp 3 | // Compiler 4 | // 5 | // Created by Филипп Федяков on 27.05.17. 6 | // Copyright © 2017 filletofish. All rights reserved. 7 | // 8 | 9 | #ifndef Parser_hpp 10 | #define Parser_hpp 11 | 12 | #include 13 | #include 14 | 15 | class AbstractExpression; 16 | class Lexer; 17 | 18 | class Parser { 19 | public: 20 | AbstractExpression* ParseCurrentExpression(); 21 | Parser(Lexer *lexer): lexer(lexer) {}; 22 | std::vector Parse(); 23 | private: 24 | Lexer* lexer; 25 | AbstractExpression* ParseNumberExpression(); 26 | AbstractExpression* ParseIdentifierExpression(); 27 | AbstractExpression* ParseIfExpression(); 28 | AbstractExpression* ParseForExpression(); 29 | AbstractExpression* ParsePrimary(); 30 | AbstractExpression* ParseBinOpRHS(AbstractExpression * LHS); 31 | 32 | AbstractExpression* LogError(const char *Str); 33 | }; 34 | #endif /* Parser_hpp */ 35 | 36 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Compiler-touch 2 | 3 | Simple compiler frontend to generate intermediate representation for simple language. 4 | The languange consists of integers, mutating variables, cycles and condition statements. 5 | 6 | Two modes available: 7 | 8 | * Generating _my own_ intermediate presentation with features: 9 | * Building Control Flow Graph 10 | * Using SSA Form 11 | * Using Phi Nodes 12 | 13 | * Generating LLVM intermediate presentation with power of LLVM Library. 14 | 15 | ### Example: 16 | 17 | Here is simple code written with our language. 18 | 19 | #### Code: 20 | ``` 21 | a = 3 22 | b = 0 23 | 24 | for i = 1, b in 25 | a = a + 3 26 | 27 | if b then 28 | a = a - 2 29 | else 30 | a = 0 31 | ``` 32 | 33 | #### Control flow graph: 34 | 35 |
36 | 37 |
38 | 39 | #### Its intermediate representation (using my own statements): 40 | 41 | ``` 42 | bb #0 entry 43 | preds: 44 | succs: bb #1 loop_cond 45 | dominatedBy: 46 | a_0 = 3 47 | b_0 = 0 48 | i_0 = 1 49 | branch to: bb #1 loop_cond 50 | 51 | bb #1 loop_cond 52 | preds: bb #0 entry bb #2 loop_body 53 | succs: bb #2 loop_body bb #3 loop_cont 54 | dominatedBy: bb #0 entry 55 | i_1 = [i_0 bb #0 entry; i_2 bb #2 loop_body; ] 56 | a_1 = [a_0 bb #0 entry; a_2 bb #2 loop_body; ] 57 | branch on: b_0 - i_1 to: bb #2 loop_body or: bb #3 loop_cont 58 | 59 | bb #2 loop_body 60 | preds: bb #1 loop_cond 61 | succs: bb #1 loop_cond 62 | dominatedBy: bb #1 loop_cond 63 | a_2 = a_1 + 3 64 | i_2 = i_1 + 1 65 | branch to: bb #1 loop_cond 66 | 67 | bb #3 loop_cont 68 | preds: bb #1 loop_cond 69 | succs: bb #5 else bb #4 then 70 | dominatedBy: bb #1 loop_cond 71 | branch on: b_0 to: bb #5 else or: bb #4 then 72 | 73 | bb #4 then 74 | preds: bb #3 loop_cont 75 | succs: bb #6 if_cont 76 | dominatedBy: bb #3 loop_cont 77 | a_3 = a_1 - 2 78 | branch to: bb #6 if_cont 79 | 80 | bb #5 else 81 | preds: bb #3 loop_cont 82 | succs: bb #6 if_cont 83 | dominatedBy: bb #3 loop_cont 84 | a_4 = 0 85 | branch to: bb #6 if_cont 86 | 87 | bb #6 if_cont 88 | preds: bb #4 then bb #5 else 89 | succs: 90 | dominatedBy: bb #3 loop_cont 91 | a_5 = [a_3 bb #4 then; a_4 bb #5 else; ] 92 | ``` 93 | 94 | 95 | 96 | #### Its intermediate representation produced with LLVM Library: 97 | 98 | ``` 99 | ; ModuleID = 'My Module' 100 | source_filename = "My Module" 101 | 102 | define common i32 @main() { 103 | entrypoint: 104 | %a = alloca i32 105 | store i32 3, i32* %a 106 | %b = alloca i32 107 | store i32 0, i32* %b 108 | %i = alloca i32 109 | store i32 1, i32* %i 110 | br label %loopCoonditionBB 111 | 112 | loopCoonditionBB: ; preds = %loop, %entrypoint 113 | %b1 = load i32, i32* %b 114 | %i2 = load i32, i32* %i 115 | %loopcond = icmp slt i32 %i2, %b1 116 | br i1 %loopcond, label %loop, label %afterloop 117 | 118 | loop: ; preds = %loopCoonditionBB 119 | %a3 = load i32, i32* %a 120 | %addtmp = fadd i32 %a3, 3 121 | store i32 %addtmp, i32* %a 122 | %i4 = load i32, i32* %i 123 | %nextvar = fadd i32 %i4, 1 124 | store i32 %nextvar, i32* %i 125 | br label %loopCoonditionBB 126 | 127 | afterloop: ; preds = %loopCoonditionBB 128 | %b5 = load i32, i32* %b 129 | %ifcond = icmp ne i32 %b5, 0 130 | br i1 %ifcond, label %then, label %else 131 | 132 | then: ; preds = %afterloop 133 | %a6 = load i32, i32* %a 134 | %subtmp = fsub i32 %a6, 2 135 | store i32 %subtmp, i32* %a 136 | br label %ifcont 137 | 138 | else: ; preds = %afterloop 139 | store i32 0, i32* %a 140 | br label %ifcont 141 | 142 | ifcont: ; preds = %else, %then 143 | %iftmp = phi i32 [ %subtmp, %then ], [ 0, %else ] 144 | ret i32 %iftmp 145 | } 146 | ``` 147 | 148 | ### What is IR? 149 | [Wiki article](https://en.wikipedia.org/wiki/Intermediate_representation) 150 | 151 | Ok, then. With LLVM installed you can look at IR of C language by yourself. Just write simple C program. 152 | 153 | Use flag `-emit-llvm` and level of optimization `-O0` (from 0 to 3): 154 | `clang -S -emit-llvm -O0 helloworld.c` 155 | 156 | Now `hello.ll` contains the IR. 157 | 158 | ### Dependencies: 159 | * llvm 4.0 160 | * clang 161 | 162 | ### Building: 163 | ``` 164 | ./build.sh 165 | ``` 166 | 167 | ### Usage: 168 | 169 | #### Printing intermediate representation: 170 | ``` 171 | ./compiler 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | #include "Parser/Parser.hpp" 29 | #include "Lexer/Lexer.hpp" 30 | #include "IRGeneration/LLVMIRGenerator.hpp" 31 | #include "Parser/Expressions.hpp" 32 | #include "IRGeneration/IRGenerator.hpp" 33 | #include "IRGeneration/GraphVizPrinter.hpp" 34 | 35 | 36 | 37 | using namespace llvm; 38 | 39 | int main(int argc, const char * argv[]) { 40 | if (argc > 1) 41 | { 42 | FILE * fp = freopen(argv[1], "r", stdin); 43 | if (fp == NULL) 44 | { 45 | perror(argv[1]); 46 | exit(1); 47 | } 48 | } 49 | 50 | 51 | bool shouldUseLLVM = false; 52 | bool shouldPringGraphViz = false; 53 | 54 | for (int i = 2; i < argc; i++) { 55 | if (argc > 2) { 56 | if (strcmp(argv[i], "-llvm") == 0) { 57 | shouldUseLLVM = true; 58 | } 59 | if (strcmp(argv[i], "-gv") == 0) { 60 | shouldPringGraphViz = true; 61 | } 62 | } 63 | 64 | } 65 | 66 | 67 | Lexer *lexer = new Lexer(); 68 | Parser *parser = new Parser(lexer); 69 | std::vector expressions = parser->Parse(); 70 | 71 | 72 | if (shouldUseLLVM) { 73 | LLVMContext context; 74 | IRBuilder<> Builder(context); 75 | 76 | 77 | // Make the module, which holds all the code. 78 | Module *module = new Module("My Module", context); 79 | Function *mainFunction = module->getFunction("main"); 80 | FunctionType *FT = FunctionType::get(Builder.getInt32Ty(),false); 81 | mainFunction = Function::Create(FT, GlobalValue::CommonLinkage, "main", module); 82 | 83 | llvm::BasicBlock *BB = llvm::BasicBlock::Create(context, "entrypoint", mainFunction); 84 | Builder.SetInsertPoint(BB); 85 | 86 | LLVMIRGenerator llvmIRGenerator = LLVMIRGenerator(&context, &Builder); 87 | llvm::Value *value = nullptr; 88 | for (std::vector::iterator it = expressions.begin(); it != expressions.end(); ++it) 89 | value = llvmIRGenerator.GenerateIR((*it)); 90 | 91 | 92 | Builder.CreateRet(value); 93 | 94 | module->dump(); 95 | if (shouldPringGraphViz) { 96 | printf("Printing GraphViz is not available in llvm mode\n"); 97 | } 98 | } else { 99 | IRGenerator irGenerator = IRGenerator(); 100 | for (std::vector::iterator it = expressions.begin(); it != expressions.end(); ++it) 101 | irGenerator.GenerateIR((*it)); 102 | 103 | 104 | irGenerator.CommitBuildingAndDump(); 105 | if (shouldPringGraphViz) { 106 | irGenerator.GetGraphVizPrinter().print(); 107 | } 108 | } 109 | 110 | return 0; 111 | } 112 | -------------------------------------------------------------------------------- /program.txt: -------------------------------------------------------------------------------- 1 | a = 3 2 | b = 0 3 | 4 | for i = 1, b in 5 | a = a + 3 6 | 7 | if b then 8 | a = a - 2 9 | else 10 | a = 0 11 | 12 | 13 | 14 | --------------------------------------------------------------------------------