├── README.md
├── main.cpp
├── lex.l
├── node.h
├── parse.y
└── node.cpp
/README.md:
--------------------------------------------------------------------------------
1 | # C-compiler from scratch using GNU bison and flex
2 | A compiler to a very small subset of C to x86 32-bit ASM.
3 | ## compile
4 | $ bison -d parse.y
5 | $ flex lex.l
6 | $ g++ -o compile parse.tab.c lex.yy.c node.cpp main.cpp
7 | ## run
8 | $ ./compile < proc.c
9 |
--------------------------------------------------------------------------------
/main.cpp:
--------------------------------------------------------------------------------
1 | #include "node.h"
2 | extern ASTNode *root;
3 | extern int yyparse();
4 |
5 | int main(int argc, char** argv) {
6 | yyparse();
7 | FunctionNode* functionRoot = dynamic_cast(root);// root in type functionNode
8 |
9 | if (root != nullptr) {
10 | std::ofstream output("code.s");
11 | if (output.is_open()) {
12 | functionRoot->codeGen(output);
13 | functionRoot->statement().codeGen(output);
14 | output.close();
15 | }
16 | else{
17 | std::cerr << "cant open file" << std::endl;
18 | }
19 | }
20 | return 0;
21 | }
22 |
--------------------------------------------------------------------------------
/lex.l:
--------------------------------------------------------------------------------
1 | %{
2 | #include "node.h"
3 | #include "parse.tab.h"
4 | #define SAVE_TOKEN yylval.string = new std::string(yytext, yyleng)
5 | #define TOKEN(type) (yylval.token = type)
6 |
7 | extern void yyerror(const char* s);
8 | %}
9 |
10 | %%
11 |
12 | [ \t\n] ; // Ignore whitespace
13 | [0-9]+ { SAVE_TOKEN; return INTEGER; }
14 | "return" { SAVE_TOKEN; return RETURN_KEYWORD;}
15 | "int" { SAVE_TOKEN; return INT_KEYWORD;}
16 | [a-zA-Z]+ { SAVE_TOKEN; return IDENTIFIER; }
17 | "(" { return TOKEN(LEFT_BRACE);}
18 | ")" { return TOKEN(RIGHT_BRACE);}
19 | "{" { return TOKEN(LEFT_PAREN);}
20 | "}" { return TOKEN(RIGHT_PAREN);}
21 | "+" { return TOKEN(PLUS);}
22 | "-" { return TOKEN(MINUS);}
23 | "*" { return TOKEN(MULT);}
24 | "/" { return TOKEN(DIV);}
25 |
26 | . { yyerror("Invalid character"); }
27 |
28 | %%
29 |
30 | int yywrap() {
31 | return 1;
32 | }
33 |
--------------------------------------------------------------------------------
/node.h:
--------------------------------------------------------------------------------
1 | #ifndef NODE_H_
2 | #define NODE_H_
3 | #include
4 | #include
5 | #include
6 |
7 | class ASTNode {
8 | public:
9 | virtual ~ASTNode() = default;
10 | virtual void codeGen(std::ofstream& output) const = 0;
11 | };
12 |
13 | class NumericNode : public ASTNode {
14 | public:
15 | NumericNode(std::string value);
16 | void codeGen(std::ofstream& output) const override;
17 | int value() const;
18 | private:
19 | int _value;
20 | };
21 |
22 | class BinaryOperatorNode : public ASTNode {
23 | public:
24 | BinaryOperatorNode(char op, ASTNode &left, ASTNode &right);
25 | void codeGen(std::ofstream& output) const override;
26 | private:
27 | char _op;
28 | ASTNode& _left;
29 | ASTNode& _right;
30 | };
31 |
32 | class ReturnNode : public ASTNode{
33 | public:
34 | ReturnNode(ASTNode &returnValue);
35 | void codeGen(std::ofstream& output) const override;
36 | private:
37 | ASTNode& _returnValue;
38 | };
39 |
40 | class FunctionNode : public ASTNode{
41 | public:
42 | FunctionNode(std::string id,ASTNode &statement);
43 | void codeGen(std::ofstream& output) const override;
44 | const ASTNode& statement() const;
45 | private:
46 | std::string _id;
47 | ASTNode& _statement;
48 | };
49 |
50 | #endif
51 |
--------------------------------------------------------------------------------
/parse.y:
--------------------------------------------------------------------------------
1 | %{
2 | #include "node.h"
3 | ASTNode *root = nullptr;
4 |
5 | extern int yylex();
6 | void yyerror(const char*);
7 |
8 | %}
9 |
10 | %union {
11 | int token;
12 | std::string *string;
13 | ASTNode *node;
14 | }
15 |
16 | %token IDENTIFIER INTEGER RETURN_KEYWORD INT_KEYWORD
17 |
18 | %token LEFT_PAREN RIGHT_PAREN LEFT_BRACE RIGHT_BRACE
19 | %token PLUS MINUS MULT DIV
20 |
21 | %type expression program statement function factor term
22 |
23 | %left PLUS MINUS
24 | %left MUL DIV
25 |
26 | %start program
27 |
28 | %%
29 |
30 | program: function {root = $1;}
31 | ;
32 | function: INT_KEYWORD IDENTIFIER LEFT_BRACE RIGHT_BRACE LEFT_PAREN statement RIGHT_PAREN {$$ = new FunctionNode(*$2,*$6);} //$2 = identifier, $6 = statment.
33 | ;
34 | expression: factor
35 | | expression PLUS expression {$$ = new BinaryOperatorNode('+', *$1, *$3); }
36 | | expression MINUS expression {$$ = new BinaryOperatorNode('-', *$1, *$3); }
37 | | term
38 | ;
39 | term: factor MULT factor {$$ = new BinaryOperatorNode('*', *$1, *$3); }
40 | | factor DIV factor {$$ = new BinaryOperatorNode('/', *$1, *$3); }
41 | ;
42 | factor: INTEGER {$$ = new NumericNode(*$1); }
43 | ;
44 | statement: RETURN_KEYWORD expression {$$ = new ReturnNode(*$2);}
45 | ;
46 | %%
47 |
48 | void yyerror(const char *s) {
49 | std::cerr << "Error: " << s << std::endl;
50 | }
51 |
--------------------------------------------------------------------------------
/node.cpp:
--------------------------------------------------------------------------------
1 | #include "node.h"
2 |
3 | // NumericNode implementation
4 | NumericNode::NumericNode(std::string value) : _value(std::stoi(value)) {}
5 |
6 | void NumericNode::codeGen(std::ofstream& output) const {
7 | if(output.is_open()){
8 | output << "mov\teax," <<_value << std::endl;
9 | }
10 | }
11 |
12 | int NumericNode::value() const{
13 | return _value;
14 | }
15 | // BinaryOperatorNode implementation
16 | BinaryOperatorNode::BinaryOperatorNode(char op, ASTNode& left, ASTNode& right)
17 | : _op(op), _left(left), _right(right) {}
18 |
19 | void BinaryOperatorNode::codeGen(std::ofstream& output) const {
20 | // First, evaluate the left operand and push the result onto the stack
21 | _left.codeGen(output);
22 | output << "\tpush eax" << std::endl;
23 |
24 | // Then, evaluate the right operand
25 | _right.codeGen(output);
26 |
27 | // Now, pop the left operand back into another register (e.g., EBX)
28 | output << "\tpop ebx" << std::endl;
29 |
30 | // Perform the operation based on the operator
31 | switch (_op) {
32 | case '+':
33 | output << "\tadd eax, ebx" << std::endl;
34 | break;
35 | case '-':
36 | output << "\tsub ebx, eax" << std::endl;
37 | output << "\tmov eax, ebx" << std::endl;
38 | break;
39 | case '*':
40 | output << "\timul eax, ebx" << std::endl;
41 | break;
42 | case '/':
43 | output << "\txchg eax, ebx" << std::endl; // Swap EAX and EBX
44 | output << "\tcdq" << std::endl; // Sign-extend EAX into EDX
45 | output << "\tidiv ebx" << std::endl;
46 | break;
47 | default:
48 | // Handle error: unknown operator
49 | break;
50 | }
51 | }
52 |
53 | // ReturnNode implementation
54 | ReturnNode::ReturnNode(ASTNode& returnValue) : _returnValue(returnValue) {}
55 |
56 | void ReturnNode::codeGen(std::ofstream& output) const {
57 | NumericNode* numericNode = dynamic_cast(&_returnValue);
58 |
59 | _returnValue.codeGen(output);
60 | if(output.is_open()){
61 | output << "\tret" << std::endl;
62 | }
63 |
64 | }
65 |
66 | // FunctionNode implementation
67 | FunctionNode::FunctionNode(std::string id,ASTNode& statement) : _id(id),_statement(statement) {}
68 |
69 | void FunctionNode::codeGen(std::ofstream& output) const {
70 | std::string code = ".globl "+ _id+ "\n"+ _id+ ":";
71 | if(output.is_open()){
72 | output << code << std::endl;
73 | }
74 | }
75 | const ASTNode& FunctionNode::statement() const{
76 | return _statement;
77 | }
78 |
--------------------------------------------------------------------------------