├── README.md ├── main.cpp ├── lex.l ├── node.h ├── parse.y └── node.cpp /README.md: -------------------------------------------------------------------------------- 1 | # C-compiler from scratch using GNU bison and flex 2 | A compiler to a very small subset of C to x86 32-bit ASM. 3 | ## compile 4 | $ bison -d parse.y
5 | $ flex lex.l
6 | $ g++ -o compile parse.tab.c lex.yy.c node.cpp main.cpp 7 | ## run 8 | $ ./compile < proc.c 9 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include "node.h" 2 | extern ASTNode *root; 3 | extern int yyparse(); 4 | 5 | int main(int argc, char** argv) { 6 | yyparse(); 7 | FunctionNode* functionRoot = dynamic_cast(root);// root in type functionNode 8 | 9 | if (root != nullptr) { 10 | std::ofstream output("code.s"); 11 | if (output.is_open()) { 12 | functionRoot->codeGen(output); 13 | functionRoot->statement().codeGen(output); 14 | output.close(); 15 | } 16 | else{ 17 | std::cerr << "cant open file" << std::endl; 18 | } 19 | } 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /lex.l: -------------------------------------------------------------------------------- 1 | %{ 2 | #include "node.h" 3 | #include "parse.tab.h" 4 | #define SAVE_TOKEN yylval.string = new std::string(yytext, yyleng) 5 | #define TOKEN(type) (yylval.token = type) 6 | 7 | extern void yyerror(const char* s); 8 | %} 9 | 10 | %% 11 | 12 | [ \t\n] ; // Ignore whitespace 13 | [0-9]+ { SAVE_TOKEN; return INTEGER; } 14 | "return" { SAVE_TOKEN; return RETURN_KEYWORD;} 15 | "int" { SAVE_TOKEN; return INT_KEYWORD;} 16 | [a-zA-Z]+ { SAVE_TOKEN; return IDENTIFIER; } 17 | "(" { return TOKEN(LEFT_BRACE);} 18 | ")" { return TOKEN(RIGHT_BRACE);} 19 | "{" { return TOKEN(LEFT_PAREN);} 20 | "}" { return TOKEN(RIGHT_PAREN);} 21 | "+" { return TOKEN(PLUS);} 22 | "-" { return TOKEN(MINUS);} 23 | "*" { return TOKEN(MULT);} 24 | "/" { return TOKEN(DIV);} 25 | 26 | . { yyerror("Invalid character"); } 27 | 28 | %% 29 | 30 | int yywrap() { 31 | return 1; 32 | } 33 | -------------------------------------------------------------------------------- /node.h: -------------------------------------------------------------------------------- 1 | #ifndef NODE_H_ 2 | #define NODE_H_ 3 | #include 4 | #include 5 | #include 6 | 7 | class ASTNode { 8 | public: 9 | virtual ~ASTNode() = default; 10 | virtual void codeGen(std::ofstream& output) const = 0; 11 | }; 12 | 13 | class NumericNode : public ASTNode { 14 | public: 15 | NumericNode(std::string value); 16 | void codeGen(std::ofstream& output) const override; 17 | int value() const; 18 | private: 19 | int _value; 20 | }; 21 | 22 | class BinaryOperatorNode : public ASTNode { 23 | public: 24 | BinaryOperatorNode(char op, ASTNode &left, ASTNode &right); 25 | void codeGen(std::ofstream& output) const override; 26 | private: 27 | char _op; 28 | ASTNode& _left; 29 | ASTNode& _right; 30 | }; 31 | 32 | class ReturnNode : public ASTNode{ 33 | public: 34 | ReturnNode(ASTNode &returnValue); 35 | void codeGen(std::ofstream& output) const override; 36 | private: 37 | ASTNode& _returnValue; 38 | }; 39 | 40 | class FunctionNode : public ASTNode{ 41 | public: 42 | FunctionNode(std::string id,ASTNode &statement); 43 | void codeGen(std::ofstream& output) const override; 44 | const ASTNode& statement() const; 45 | private: 46 | std::string _id; 47 | ASTNode& _statement; 48 | }; 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /parse.y: -------------------------------------------------------------------------------- 1 | %{ 2 | #include "node.h" 3 | ASTNode *root = nullptr; 4 | 5 | extern int yylex(); 6 | void yyerror(const char*); 7 | 8 | %} 9 | 10 | %union { 11 | int token; 12 | std::string *string; 13 | ASTNode *node; 14 | } 15 | 16 | %token IDENTIFIER INTEGER RETURN_KEYWORD INT_KEYWORD 17 | 18 | %token LEFT_PAREN RIGHT_PAREN LEFT_BRACE RIGHT_BRACE 19 | %token PLUS MINUS MULT DIV 20 | 21 | %type expression program statement function factor term 22 | 23 | %left PLUS MINUS 24 | %left MUL DIV 25 | 26 | %start program 27 | 28 | %% 29 | 30 | program: function {root = $1;} 31 | ; 32 | function: INT_KEYWORD IDENTIFIER LEFT_BRACE RIGHT_BRACE LEFT_PAREN statement RIGHT_PAREN {$$ = new FunctionNode(*$2,*$6);} //$2 = identifier, $6 = statment. 33 | ; 34 | expression: factor 35 | | expression PLUS expression {$$ = new BinaryOperatorNode('+', *$1, *$3); } 36 | | expression MINUS expression {$$ = new BinaryOperatorNode('-', *$1, *$3); } 37 | | term 38 | ; 39 | term: factor MULT factor {$$ = new BinaryOperatorNode('*', *$1, *$3); } 40 | | factor DIV factor {$$ = new BinaryOperatorNode('/', *$1, *$3); } 41 | ; 42 | factor: INTEGER {$$ = new NumericNode(*$1); } 43 | ; 44 | statement: RETURN_KEYWORD expression {$$ = new ReturnNode(*$2);} 45 | ; 46 | %% 47 | 48 | void yyerror(const char *s) { 49 | std::cerr << "Error: " << s << std::endl; 50 | } 51 | -------------------------------------------------------------------------------- /node.cpp: -------------------------------------------------------------------------------- 1 | #include "node.h" 2 | 3 | // NumericNode implementation 4 | NumericNode::NumericNode(std::string value) : _value(std::stoi(value)) {} 5 | 6 | void NumericNode::codeGen(std::ofstream& output) const { 7 | if(output.is_open()){ 8 | output << "mov\teax," <<_value << std::endl; 9 | } 10 | } 11 | 12 | int NumericNode::value() const{ 13 | return _value; 14 | } 15 | // BinaryOperatorNode implementation 16 | BinaryOperatorNode::BinaryOperatorNode(char op, ASTNode& left, ASTNode& right) 17 | : _op(op), _left(left), _right(right) {} 18 | 19 | void BinaryOperatorNode::codeGen(std::ofstream& output) const { 20 | // First, evaluate the left operand and push the result onto the stack 21 | _left.codeGen(output); 22 | output << "\tpush eax" << std::endl; 23 | 24 | // Then, evaluate the right operand 25 | _right.codeGen(output); 26 | 27 | // Now, pop the left operand back into another register (e.g., EBX) 28 | output << "\tpop ebx" << std::endl; 29 | 30 | // Perform the operation based on the operator 31 | switch (_op) { 32 | case '+': 33 | output << "\tadd eax, ebx" << std::endl; 34 | break; 35 | case '-': 36 | output << "\tsub ebx, eax" << std::endl; 37 | output << "\tmov eax, ebx" << std::endl; 38 | break; 39 | case '*': 40 | output << "\timul eax, ebx" << std::endl; 41 | break; 42 | case '/': 43 | output << "\txchg eax, ebx" << std::endl; // Swap EAX and EBX 44 | output << "\tcdq" << std::endl; // Sign-extend EAX into EDX 45 | output << "\tidiv ebx" << std::endl; 46 | break; 47 | default: 48 | // Handle error: unknown operator 49 | break; 50 | } 51 | } 52 | 53 | // ReturnNode implementation 54 | ReturnNode::ReturnNode(ASTNode& returnValue) : _returnValue(returnValue) {} 55 | 56 | void ReturnNode::codeGen(std::ofstream& output) const { 57 | NumericNode* numericNode = dynamic_cast(&_returnValue); 58 | 59 | _returnValue.codeGen(output); 60 | if(output.is_open()){ 61 | output << "\tret" << std::endl; 62 | } 63 | 64 | } 65 | 66 | // FunctionNode implementation 67 | FunctionNode::FunctionNode(std::string id,ASTNode& statement) : _id(id),_statement(statement) {} 68 | 69 | void FunctionNode::codeGen(std::ofstream& output) const { 70 | std::string code = ".globl "+ _id+ "\n"+ _id+ ":"; 71 | if(output.is_open()){ 72 | output << code << std::endl; 73 | } 74 | } 75 | const ASTNode& FunctionNode::statement() const{ 76 | return _statement; 77 | } 78 | --------------------------------------------------------------------------------