├── tests ├── __init__.py ├── unit │ ├── __init__.py │ ├── test_frontend_certificator.py │ ├── test_runner.py │ ├── test_code_generator.py │ ├── test_backend_certificator.py │ ├── test_abstract_syntax_tree.py │ └── test_lexer.py └── integration │ ├── __init__.py │ ├── test_while.py │ ├── test_expressions.py │ ├── test_function_call.py │ ├── test_gcd.py │ ├── test_struct.py │ ├── test_array.py │ ├── test_fibonacci.py │ ├── test_control_flow.py │ └── test_operations.py ├── paper.pdf ├── requirements.txt ├── src ├── __init__.py ├── certificators │ ├── __init__.py │ ├── abstract_certificator.py │ └── frontend.py ├── ast_nodes │ ├── operations │ │ ├── ADD.py │ │ ├── DIV.py │ │ ├── MOD.py │ │ ├── SUB.py │ │ ├── OR.py │ │ ├── AND.py │ │ ├── BITOR.py │ │ ├── MULT.py │ │ ├── BITAND.py │ │ ├── EQUAL.py │ │ ├── LESS.py │ │ ├── LSHIFT.py │ │ ├── RSHIFT.py │ │ ├── DIFF.py │ │ ├── GREATER.py │ │ ├── ASSIGN.py │ │ └── NOT.py │ ├── basic │ │ ├── CST.py │ │ ├── SEQ.py │ │ └── PROG.py │ ├── __init__.py │ ├── conditionals │ │ ├── IF.py │ │ ├── conditional.py │ │ ├── WHILE.py │ │ └── IFELSE.py │ ├── functions │ │ ├── PARAM.py │ │ ├── ARG.py │ │ ├── RET_SYM.py │ │ ├── FUNC_CALL.py │ │ └── FUNC_DEF.py │ ├── variables │ │ ├── STRUCT_DEF.py │ │ ├── VAR_DEF.py │ │ └── VAR.py │ └── node.py ├── inverters │ ├── certificate_to_high_level.py │ └── base_inverter.py ├── runner.py └── code_generator.py ├── benchmarks ├── while.ch ├── expressions.ch ├── control_flow.ch ├── longer_1.ch ├── array.ch ├── gcd.ch ├── function_call.ch ├── fibonacci.ch ├── longer_2.ch ├── longer_3.ch ├── struct.ch ├── longer_4.ch ├── longer_5.ch ├── longer_6.ch ├── longer_7.ch ├── longer_8.ch ├── longer_9.ch ├── multiple_function_calls.ch ├── many_control_flow.ch ├── many_function_calls.ch └── a_lot_of_function_calls.ch ├── references ├── llvm_func_call.cpp ├── functions.cpp ├── Makefile ├── llvm_type_cast.cpp ├── llvm_structs_and_arrays.cpp ├── machine_code_program.c ├── llvm_func_call.ll ├── llvm_structs_and_arrays.ll └── llvm_type_cast.ll ├── .github └── workflows │ ├── unit_tests.yml │ ├── integration_tests.yml │ └── docs.yml ├── main.py └── .gitignore /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/integration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guilhermeolivsilva/project-charon/HEAD/paper.pdf -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pdoc==15.0.0 2 | pytest==8.0.0 3 | pytest-mock==3.12.0 4 | typing-extensions==4.5.0 -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | """Project [C]haron consists in a translation validation framework of programs 2 | written in a subset of the C language.""" 3 | -------------------------------------------------------------------------------- /benchmarks/while.ch: -------------------------------------------------------------------------------- 1 | int main() { 2 | int i; 3 | i = 1; 4 | 5 | while (i < 100) { 6 | i = i + i; 7 | } 8 | 9 | return 0; 10 | } 11 | -------------------------------------------------------------------------------- /src/certificators/__init__.py: -------------------------------------------------------------------------------- 1 | """Export classes to allow `from src.certificators import ...`.""" 2 | 3 | from .backend import BackendCertificator 4 | from .frontend import FrontendCertificator 5 | -------------------------------------------------------------------------------- /benchmarks/expressions.ch: -------------------------------------------------------------------------------- 1 | int main() { 2 | int i; 3 | i = 3 + 10 * 2; 4 | 5 | int j; 6 | j = (i << 1) / 5 % 2 + 15 * 3; 7 | 8 | int k; 9 | k = !(i | j & 1); 10 | 11 | return 0; 12 | } 13 | -------------------------------------------------------------------------------- /benchmarks/control_flow.ch: -------------------------------------------------------------------------------- 1 | int main() { 2 | int i; 3 | i = 1; 4 | 5 | int x; 6 | 7 | if (i < 5) { 8 | x = 23; 9 | } 10 | else { 11 | x = 35; 12 | } 13 | 14 | return 0; 15 | } 16 | -------------------------------------------------------------------------------- /references/llvm_func_call.cpp: -------------------------------------------------------------------------------- 1 | int test(int param) { 2 | return 1 + 2 + param; 3 | } 4 | 5 | 6 | int main() { 7 | int func_call, some_other_int; 8 | 9 | func_call = test(123); 10 | func_call = test(some_other_int); 11 | 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /benchmarks/longer_1.ch: -------------------------------------------------------------------------------- 1 | int main() { 2 | int i; 3 | int x; 4 | int avg; 5 | 6 | avg = (72 + 85 + 90 + 60 + 88) / 5; 7 | 8 | i = 0; 9 | while (i < 5) { 10 | x = i + avg; 11 | i = i + 1; 12 | } 13 | 14 | return 0; 15 | } 16 | -------------------------------------------------------------------------------- /benchmarks/array.ch: -------------------------------------------------------------------------------- 1 | int main() { 2 | int my_array[5]; 3 | 4 | int i; 5 | i = 0; 6 | 7 | while(i < 5) { 8 | my_array[i] = i << i; 9 | i = i + 1; 10 | } 11 | 12 | int j; 13 | j = (my_array[2]) + 3; 14 | 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /benchmarks/gcd.ch: -------------------------------------------------------------------------------- 1 | int main() { 2 | int i; 3 | i = 125; 4 | 5 | int j; 6 | j = 100; 7 | 8 | while (i - j) { 9 | if (i < j) { 10 | j = j - i; 11 | } 12 | else { 13 | i = i - j; 14 | } 15 | } 16 | 17 | return 0; 18 | } 19 | -------------------------------------------------------------------------------- /benchmarks/function_call.ch: -------------------------------------------------------------------------------- 1 | int factorial(int x) { 2 | int fact; 3 | fact = 1; 4 | 5 | while(x > 0) { 6 | fact = fact * x; 7 | x = x - 1; 8 | } 9 | 10 | return fact; 11 | } 12 | 13 | 14 | int main() { 15 | int i; 16 | i = factorial(5); 17 | 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /benchmarks/fibonacci.ch: -------------------------------------------------------------------------------- 1 | int main() { 2 | int i; 3 | i = 1; 4 | 5 | int a; 6 | a = 0; 7 | 8 | int b; 9 | b = 1; 10 | 11 | int c; 12 | 13 | while (i < 10) { 14 | c = a; 15 | a = b; 16 | b = c + a; 17 | i = i + 1; 18 | } 19 | 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /benchmarks/longer_2.ch: -------------------------------------------------------------------------------- 1 | int main() { 2 | int i; 3 | int x; 4 | int avg; 5 | int scaled; 6 | int factor; 7 | 8 | factor = 1; 9 | 10 | avg = (72 + 85 + 90 + 60 + 88) / factor; 11 | 12 | i = 0; 13 | while (i < 5) { 14 | x = i + avg; 15 | i = i + 1; 16 | } 17 | 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /benchmarks/longer_3.ch: -------------------------------------------------------------------------------- 1 | int main() { 2 | int i; 3 | int x; 4 | int avg; 5 | int scaled; 6 | int factor; 7 | 8 | factor = 1; 9 | 10 | avg = (72 + 85 + 90 + 60 + 88) / 5; 11 | scaled = avg / factor; 12 | 13 | i = 0; 14 | while (i < 5) { 15 | x = i + avg + scaled; 16 | i = i + 1; 17 | } 18 | 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /benchmarks/struct.ch: -------------------------------------------------------------------------------- 1 | struct my_struct { 2 | int attribute_1; 3 | float attribute_2; 4 | short attribute_3; 5 | }; 6 | 7 | int main() { 8 | my_struct struct_var; 9 | 10 | struct_var.attribute_1 = 10; 11 | struct_var.attribute_2 = 13.1 * 23.4 / 30.2; 12 | struct_var.attribute_3 = (struct_var.attribute_1) * (struct_var.attribute_2); 13 | 14 | return 0; 15 | } 16 | -------------------------------------------------------------------------------- /references/functions.cpp: -------------------------------------------------------------------------------- 1 | int add(int a, int b) { 2 | return a + b; 3 | } 4 | 5 | int main() { 6 | return add(3, 4); 7 | } 8 | 9 | 10 | /* 11 | store %add.a 3 12 | store %add.b 4 13 | store %add.ret_addr @6 14 | jump add 15 | ... 16 | 17 | 18 | add: 19 | load %add.a r0 20 | load %add.b r1 21 | add r0 r1 r0 22 | store %add.ret_val r0 23 | jump %add.ret_addr 24 | */ 25 | -------------------------------------------------------------------------------- /benchmarks/longer_4.ch: -------------------------------------------------------------------------------- 1 | int main() { 2 | int i; 3 | int x; 4 | int avg; 5 | int scaled; 6 | int factor; 7 | 8 | factor = 1; 9 | 10 | avg = (72 + 85 + 90 + 60 + 88) / 5; 11 | scaled = avg / factor; 12 | 13 | i = 0; 14 | while (i < 5) { 15 | if (i % 2 == 0) { 16 | x = i + avg + scaled; 17 | } 18 | i = i + 1; 19 | } 20 | 21 | return 0; 22 | } 23 | -------------------------------------------------------------------------------- /benchmarks/longer_5.ch: -------------------------------------------------------------------------------- 1 | int print_result(int a, int b, int c) { 2 | return 0; 3 | } 4 | 5 | int main() { 6 | int i; 7 | int x; 8 | int avg; 9 | int scaled; 10 | int factor; 11 | 12 | factor = 1; 13 | 14 | avg = (72 + 85 + 90 + 60 + 88) / 5; 15 | scaled = avg / factor; 16 | 17 | i = 0; 18 | while (i < 5) { 19 | x = print_result(i, avg, scaled); 20 | i = i + 1; 21 | } 22 | 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /benchmarks/longer_6.ch: -------------------------------------------------------------------------------- 1 | int print_result(int a, int b, int c) { 2 | return 0; 3 | } 4 | 5 | int main() { 6 | int i; 7 | int x; 8 | int avg; 9 | int scaled; 10 | int factor; 11 | 12 | factor = 1; 13 | 14 | avg = (72 + 85 + 90 + 60 + 88) / 5; 15 | scaled = (avg / factor) % 2 + 1; 16 | 17 | i = 0; 18 | while (i < 5) { 19 | x = print_result(i, avg, scaled) / 5 * 4; 20 | i = i + 1; 21 | } 22 | 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /benchmarks/longer_7.ch: -------------------------------------------------------------------------------- 1 | int func_1(int x, int y) { 2 | return x / y; 3 | } 4 | 5 | int func_2(int x, int y) { 6 | return func_1(x, y) / func_1(x, y); 7 | } 8 | 9 | int main() { 10 | int i; 11 | int x; 12 | int avg; 13 | int scaled; 14 | int factor; 15 | 16 | factor = 1; 17 | 18 | avg = (72 + 85 + 90 + 60 + 88) / 5; 19 | scaled = avg / factor; 20 | 21 | i = 0; 22 | while (i < 5) { 23 | x = func_2(i, avg); 24 | i = i + 1; 25 | } 26 | 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /benchmarks/longer_8.ch: -------------------------------------------------------------------------------- 1 | int average(int x_1, int x_2, int x_3, int x_4, int x_5) { 2 | int total; 3 | total = 0; 4 | 5 | total = x_1 + x_2 + x_3 + x_4 + x_5; 6 | 7 | return total / 5; 8 | } 9 | 10 | int main() { 11 | int i; 12 | int x; 13 | int avg; 14 | int scaled; 15 | int factor; 16 | 17 | factor = 1; 18 | 19 | avg = average(72, 85, 90, 60, 88); 20 | scaled = avg / factor; 21 | 22 | i = 0; 23 | while (i < 5) { 24 | x = i + avg + scaled; 25 | i = i + 1; 26 | } 27 | 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /.github/workflows/unit_tests.yml: -------------------------------------------------------------------------------- 1 | name: Unit Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | test: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Checkout code 17 | uses: actions/checkout@v3 18 | 19 | - name: Set up Python 20 | uses: actions/setup-python@v4 21 | with: 22 | python-version: '3.9' 23 | 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install -r requirements.txt 28 | 29 | - name: Run unit tests 30 | run: pytest tests/unit/ 31 | -------------------------------------------------------------------------------- /references/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | clang tinyc_compiler.c -o program.out 3 | 4 | debug: 5 | clang tinyc_compiler.c -o debug_program.out -g 6 | 7 | clean: 8 | rm -rf *.out *.out.dSYM tests/*.result 9 | 10 | test: 11 | ./program.out < tests/two_is_less_than_three.test > tests/two_is_less_than_three.result 12 | ./program.out < tests/power_of_two.test > tests/power_of_two.result 13 | ./program.out < tests/greatest_common_divisor.test > tests/greatest_common_divisor.result 14 | ./program.out < tests/do_while.test > tests/do_while.result 15 | ./program.out < tests/while.test > tests/while.result 16 | ./program.out < tests/available_vars.test > tests/available_vars.result 17 | -------------------------------------------------------------------------------- /benchmarks/longer_9.ch: -------------------------------------------------------------------------------- 1 | int average(int x_1, int x_2, int x_3, int x_4, int x_5) { 2 | int total; 3 | total = 0; 4 | 5 | total = total + x_1; 6 | total = total + x_2; 7 | total = total + x_3; 8 | total = total + x_4; 9 | total = total + x_5; 10 | 11 | return total / 5; 12 | } 13 | 14 | int main() { 15 | int i; 16 | int x; 17 | int avg; 18 | int scaled; 19 | int factor; 20 | 21 | factor = 1; 22 | 23 | avg = average(72, 85, 90, 60, 88); 24 | scaled = avg / factor; 25 | 26 | i = 0; 27 | while (i < 5) { 28 | x = i + avg + scaled; 29 | i = i + 1; 30 | } 31 | 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /.github/workflows/integration_tests.yml: -------------------------------------------------------------------------------- 1 | name: Integration Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | test: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Checkout code 17 | uses: actions/checkout@v3 18 | 19 | - name: Set up Python 20 | uses: actions/setup-python@v4 21 | with: 22 | python-version: '3.9' 23 | 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install -r requirements.txt 28 | 29 | - name: Run integration tests 30 | run: pytest tests/integration/ 31 | -------------------------------------------------------------------------------- /benchmarks/multiple_function_calls.ch: -------------------------------------------------------------------------------- 1 | int average(int a, int b, int c, int d, int e) { 2 | int sum; 3 | sum = a + b + c + d + e; 4 | return sum / 5; 5 | } 6 | 7 | int scale(int value, int factor) { 8 | return value * factor; 9 | } 10 | 11 | int print_result(int id, int grade) { 12 | return 0; 13 | } 14 | 15 | int main() { 16 | int i; 17 | int x; 18 | int avg; 19 | int scaled; 20 | int class_label; 21 | int factor; 22 | 23 | factor = 1; 24 | 25 | avg = average(72, 85, 90, 60, 88); 26 | scaled = scale(avg, factor); 27 | 28 | i = 0; 29 | while (i < 5) { 30 | x = print_result(i, avg); 31 | i = i + 1; 32 | } 33 | 34 | return 0; 35 | } 36 | -------------------------------------------------------------------------------- /references/llvm_type_cast.cpp: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | 3 | int int_sum(int a, int b) { 4 | return a + b; 5 | } 6 | 7 | float float_sum(float a, float b) { 8 | return a + b; 9 | } 10 | 11 | short short_sum(short a, short b) { 12 | return a + b; 13 | } 14 | 15 | int main() { 16 | int i = int_sum(23, 13); 17 | float f = float_sum(3.14, 2.71); 18 | short s = short_sum(1, 2); 19 | 20 | printf("%d", (int)(i + f + s)); 21 | 22 | // Composition 23 | short test_short_1 = i + (f + s); 24 | short test_short_2 = (2 * i * s) * f; 25 | 26 | int test_int_1 = i + (f + s); 27 | int test_int_2 = (2 * i * s) * f; 28 | 29 | float test_float_1 = i + s; 30 | float test_float_2 = (2 * i * s); 31 | 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /src/ast_nodes/operations/ADD.py: -------------------------------------------------------------------------------- 1 | """Representation of ADD nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing_extensions import override 4 | 5 | from src.ast_nodes.node import Node 6 | 7 | from src.ast_nodes.operations.operation import Operation 8 | 9 | 10 | class ADD(Operation): 11 | """ 12 | Implement the representation of an addition for the AST. 13 | 14 | Parameters 15 | ---------- 16 | lhs : Node 17 | The Node representation of the left hand side of the addition. 18 | rhs : Node 19 | The Node representation of the right hand side of the addition. 20 | """ 21 | 22 | @override 23 | def __init__(self, lhs: Node, rhs: Node, **kwargs) -> None: 24 | super().__init__(lhs, rhs, **kwargs) 25 | 26 | self.instruction: str = self._compute_instruction("ADD") 27 | -------------------------------------------------------------------------------- /src/ast_nodes/operations/DIV.py: -------------------------------------------------------------------------------- 1 | """Representation of DIV nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing_extensions import override 4 | 5 | from src.ast_nodes.node import Node 6 | 7 | from src.ast_nodes.operations.operation import Operation 8 | 9 | 10 | class DIV(Operation): 11 | """ 12 | Implement the representation of a division for the AST. 13 | 14 | Parameters 15 | ---------- 16 | lhs : Node 17 | The Node representation of the left hand side of the division. 18 | rhs : Node 19 | The Node representation of the right hand side of the division. 20 | """ 21 | 22 | @override 23 | def __init__(self, lhs: Node, rhs: Node, **kwargs) -> None: 24 | super().__init__(lhs, rhs, **kwargs) 25 | 26 | self.instruction: str = self._compute_instruction("DIV") 27 | -------------------------------------------------------------------------------- /src/ast_nodes/operations/MOD.py: -------------------------------------------------------------------------------- 1 | """Representation of MOD nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing_extensions import override 4 | 5 | from src.ast_nodes.node import Node 6 | 7 | from src.ast_nodes.operations.operation import Operation 8 | 9 | 10 | class MOD(Operation): 11 | """ 12 | Implement the representation of the module operation for the AST. 13 | 14 | Parameters 15 | ---------- 16 | lhs : Node 17 | The Node representation of the left hand side of the operation. 18 | rhs : Node 19 | The Node representation of the right hand side of the operation. 20 | """ 21 | 22 | @override 23 | def __init__(self, lhs: Node, rhs: Node, **kwargs) -> None: 24 | super().__init__(lhs, rhs, supports_float=False, **kwargs) 25 | 26 | self.instruction: str = "MOD" 27 | -------------------------------------------------------------------------------- /src/ast_nodes/operations/SUB.py: -------------------------------------------------------------------------------- 1 | """Representation of SUB nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing_extensions import override 4 | 5 | from src.ast_nodes.node import Node 6 | 7 | from src.ast_nodes.operations.operation import Operation 8 | 9 | 10 | class SUB(Operation): 11 | """ 12 | Implement the representation of a subtraction for the AST. 13 | 14 | Parameters 15 | ---------- 16 | lhs : Node 17 | The Node representation of the left hand side of the subtraction. 18 | rhs : Node 19 | The Node representation of the right hand side of the subtraction. 20 | """ 21 | 22 | @override 23 | def __init__(self, lhs: Node, rhs: Node, **kwargs) -> None: 24 | super().__init__(lhs, rhs, **kwargs) 25 | 26 | self.instruction: str = self._compute_instruction("SUB") 27 | -------------------------------------------------------------------------------- /src/ast_nodes/operations/OR.py: -------------------------------------------------------------------------------- 1 | """Representation of OR nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing_extensions import override 4 | 5 | from src.ast_nodes.node import Node 6 | 7 | from src.ast_nodes.operations.operation import Operation 8 | 9 | 10 | class OR(Operation): 11 | """ 12 | Implement the representation of a disjunction (logical or) for the AST. 13 | 14 | Parameters 15 | ---------- 16 | lhs : Node 17 | The Node representation of the left hand side of the operation. 18 | rhs : Node 19 | The Node representation of the right hand side of the operation. 20 | """ 21 | 22 | @override 23 | def __init__(self, lhs: Node, rhs: Node, **kwargs) -> None: 24 | super().__init__(lhs, rhs, **kwargs) 25 | 26 | self.instruction: str = self._compute_instruction("OR") 27 | -------------------------------------------------------------------------------- /src/ast_nodes/operations/AND.py: -------------------------------------------------------------------------------- 1 | """Representation of AND nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing_extensions import override 4 | 5 | from src.ast_nodes.node import Node 6 | 7 | from src.ast_nodes.operations.operation import Operation 8 | 9 | 10 | class AND(Operation): 11 | """ 12 | Implement the representation of a conjunction (logical and) for the AST. 13 | 14 | Parameters 15 | ---------- 16 | lhs : Node 17 | The Node representation of the left hand side of the operation. 18 | rhs : Node 19 | The Node representation of the right hand side of the operation. 20 | """ 21 | 22 | @override 23 | def __init__(self, lhs: Node, rhs: Node, **kwargs) -> None: 24 | super().__init__(lhs, rhs, **kwargs) 25 | 26 | self.instruction: str = self._compute_instruction("AND") 27 | -------------------------------------------------------------------------------- /src/ast_nodes/operations/BITOR.py: -------------------------------------------------------------------------------- 1 | """Representation of BITOR nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing_extensions import override 4 | 5 | from src.ast_nodes.node import Node 6 | 7 | from src.ast_nodes.operations.operation import Operation 8 | 9 | 10 | class BITOR(Operation): 11 | """ 12 | Implement the representation of a bitwise or operation (|) for the AST. 13 | 14 | Parameters 15 | ---------- 16 | lhs : Node 17 | The Node representation of the left hand side of the operation. 18 | rhs : Node 19 | The Node representation of the right hand side of the operation. 20 | """ 21 | 22 | @override 23 | def __init__(self, lhs: Node, rhs: Node, **kwargs) -> None: 24 | super().__init__(lhs, rhs, supports_float=False, **kwargs) 25 | 26 | self.instruction: str = "BITOR" 27 | -------------------------------------------------------------------------------- /src/ast_nodes/operations/MULT.py: -------------------------------------------------------------------------------- 1 | """Representation of MULT nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing_extensions import override 4 | 5 | from src.ast_nodes.node import Node 6 | 7 | from src.ast_nodes.operations.operation import Operation 8 | 9 | 10 | class MULT(Operation): 11 | """ 12 | Implement the representation of a multiplication for the AST. 13 | 14 | Parameters 15 | ---------- 16 | lhs : Node 17 | The Node representation of the left hand side of the multiplication. 18 | rhs : Node 19 | The Node representation of the right hand side of the multiplication. 20 | """ 21 | 22 | @override 23 | def __init__(self, lhs: Node, rhs: Node, **kwargs) -> None: 24 | super().__init__(lhs, rhs, **kwargs) 25 | 26 | self.instruction: str = self._compute_instruction("MULT") 27 | -------------------------------------------------------------------------------- /src/ast_nodes/operations/BITAND.py: -------------------------------------------------------------------------------- 1 | """Representation of BITAND nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing_extensions import override 4 | 5 | from src.ast_nodes.node import Node 6 | 7 | from src.ast_nodes.operations.operation import Operation 8 | 9 | 10 | class BITAND(Operation): 11 | """ 12 | Implement the representation of a bitwise and operation (&) for the AST. 13 | 14 | Parameters 15 | ---------- 16 | lhs : Node 17 | The Node representation of the left hand side of the operation. 18 | rhs : Node 19 | The Node representation of the right hand side of the operation. 20 | """ 21 | 22 | @override 23 | def __init__(self, lhs: Node, rhs: Node, **kwargs) -> None: 24 | super().__init__(lhs, rhs, supports_float=False, **kwargs) 25 | 26 | self.instruction: str = "BITAND" 27 | -------------------------------------------------------------------------------- /src/ast_nodes/operations/EQUAL.py: -------------------------------------------------------------------------------- 1 | """Representation of EQUAL nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing_extensions import override 4 | 5 | from src.ast_nodes.node import Node 6 | 7 | from src.ast_nodes.operations.operation import Operation 8 | 9 | 10 | class EQUAL(Operation): 11 | """ 12 | Implement the representation of a "is equal" (==) comparison for the AST. 13 | 14 | Parameters 15 | ---------- 16 | lhs : Node 17 | The Node representation of the left hand side of the comparison. 18 | rhs : Node 19 | The Node representation of the right hand side of the comparison. 20 | """ 21 | 22 | @override 23 | def __init__(self, lhs: Node, rhs: Node, **kwargs) -> None: 24 | super().__init__(lhs, rhs, **kwargs) 25 | 26 | self.instruction: str = self._compute_instruction("EQ") 27 | -------------------------------------------------------------------------------- /src/ast_nodes/operations/LESS.py: -------------------------------------------------------------------------------- 1 | """Representation of LESS nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing_extensions import override 4 | 5 | from src.ast_nodes.node import Node 6 | 7 | from src.ast_nodes.operations.operation import Operation 8 | 9 | 10 | class LESS(Operation): 11 | """ 12 | Implement the representation of a "less than" (<) comparison for the AST. 13 | 14 | Parameters 15 | ---------- 16 | lhs : Node 17 | The Node representation of the left hand side of the comparison. 18 | rhs : Node 19 | The Node representation of the right hand side of the comparison. 20 | """ 21 | 22 | @override 23 | def __init__(self, lhs: Node, rhs: Node, **kwargs) -> None: 24 | super().__init__(lhs, rhs, **kwargs) 25 | 26 | self.instruction: str = self._compute_instruction("LT") 27 | -------------------------------------------------------------------------------- /src/ast_nodes/operations/LSHIFT.py: -------------------------------------------------------------------------------- 1 | """Representation of LSHIFT nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing_extensions import override 4 | 5 | from src.ast_nodes.node import Node 6 | 7 | from src.ast_nodes.operations.operation import Operation 8 | 9 | 10 | class LSHIFT(Operation): 11 | """ 12 | Implement the representation of a left shift operation (<<) for the AST. 13 | 14 | Parameters 15 | ---------- 16 | lhs : Node 17 | The Node representation of the left hand side of the operation. 18 | rhs : Node 19 | The Node representation of the right hand side of the operation. 20 | """ 21 | 22 | @override 23 | def __init__(self, lhs: Node, rhs: Node, **kwargs) -> None: 24 | super().__init__(lhs, rhs, supports_float=False, **kwargs) 25 | 26 | self.instruction: str = "LSHIFT" 27 | -------------------------------------------------------------------------------- /src/ast_nodes/operations/RSHIFT.py: -------------------------------------------------------------------------------- 1 | """Representation of RSHIFT nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing_extensions import override 4 | 5 | from src.ast_nodes.node import Node 6 | 7 | from src.ast_nodes.operations.operation import Operation 8 | 9 | 10 | class RSHIFT(Operation): 11 | """ 12 | Implement the representation of a right shift operation (>>) for the AST. 13 | 14 | Parameters 15 | ---------- 16 | lhs : Node 17 | The Node representation of the left hand side of the operation. 18 | rhs : Node 19 | The Node representation of the right hand side of the operation. 20 | """ 21 | 22 | @override 23 | def __init__(self, lhs: Node, rhs: Node, **kwargs) -> None: 24 | super().__init__(lhs, rhs, supports_float=False, **kwargs) 25 | 26 | self.instruction: str = "RSHIFT" 27 | -------------------------------------------------------------------------------- /src/ast_nodes/operations/DIFF.py: -------------------------------------------------------------------------------- 1 | """Representation of DIFF nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing_extensions import override 4 | 5 | from src.ast_nodes.node import Node 6 | 7 | from src.ast_nodes.operations.operation import Operation 8 | 9 | 10 | class DIFF(Operation): 11 | """ 12 | Implement the representation of a "is not equal" (!=) comparison for the AST. 13 | 14 | Parameters 15 | ---------- 16 | lhs : Node 17 | The Node representation of the left hand side of the comparison. 18 | rhs : Node 19 | The Node representation of the right hand side of the comparison. 20 | """ 21 | 22 | @override 23 | def __init__(self, lhs: Node, rhs: Node, **kwargs) -> None: 24 | super().__init__(lhs, rhs, **kwargs) 25 | 26 | self.instruction: str = self._compute_instruction("NEQ") 27 | -------------------------------------------------------------------------------- /benchmarks/many_control_flow.ch: -------------------------------------------------------------------------------- 1 | int main() { 2 | int i; 3 | int j; 4 | int k; 5 | 6 | int hits_1; 7 | hits_1 = 0; 8 | 9 | int hits_2; 10 | hits_2 = 0; 11 | 12 | int hits_3; 13 | hits_3 = 0; 14 | 15 | 16 | i = 0; 17 | j = 0; 18 | k = 0; 19 | while (i < 10) { 20 | while (j < 10) { 21 | while (k < 10) { 22 | if (i == k) { 23 | hits_1 = hits_1 + 1; 24 | } 25 | hits_2 = hits_2 + i; 26 | 27 | k = k + 1; 28 | } 29 | if (i == j) { 30 | hits_3 = hits_3 + 1; 31 | hits_2 = hits_2 + 1; 32 | } 33 | 34 | k = 0; 35 | j = j + 1; 36 | } 37 | i = i + 1; 38 | j = 0; 39 | } 40 | 41 | return 0; 42 | } 43 | -------------------------------------------------------------------------------- /src/ast_nodes/operations/GREATER.py: -------------------------------------------------------------------------------- 1 | """Representation of GREATER nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing_extensions import override 4 | 5 | from src.ast_nodes.node import Node 6 | 7 | from src.ast_nodes.operations.operation import Operation 8 | 9 | 10 | class GREATER(Operation): 11 | """ 12 | Implement the representation of a "greater than" (>) comparison for the AST. 13 | 14 | Parameters 15 | ---------- 16 | lhs : Node 17 | The Node representation of the left hand side of the comparison. 18 | rhs : Node 19 | The Node representation of the right hand side of the comparison. 20 | """ 21 | 22 | @override 23 | def __init__(self, lhs: Node, rhs: Node, **kwargs) -> None: 24 | super().__init__(lhs, rhs, **kwargs) 25 | 26 | self.instruction: str = self._compute_instruction("GT") 27 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Build and Deploy Docs 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v3 15 | 16 | - name: Set up Python 17 | uses: actions/setup-python@v4 18 | with: 19 | python-version: '3.9' 20 | 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install -r requirements.txt 25 | 26 | - name: Generate documentation 27 | run: | 28 | pdoc src --output-dir docs 29 | 30 | - name: Deploy to GitHub Pages 31 | uses: peaceiris/actions-gh-pages@v3 32 | with: 33 | github_token: ${{ secrets.GITHUB_TOKEN }} 34 | publish_dir: docs/ 35 | -------------------------------------------------------------------------------- /benchmarks/many_function_calls.ch: -------------------------------------------------------------------------------- 1 | int classify(int score) { 2 | int result; 3 | if (score > 90 || score == 90) { 4 | result = 5; 5 | } else if (score > 75 || score == 75) { 6 | result = 4; 7 | } else if (score > 60 || score == 60) { 8 | result = 3; 9 | } else if (score > 40 || score == 40) { 10 | result = 2; 11 | } else { 12 | result = 1; 13 | } 14 | 15 | return result; 16 | } 17 | 18 | int print_result(int id, int grade) { 19 | return 0; 20 | } 21 | 22 | int main() { 23 | int i; 24 | int x; 25 | int avg; 26 | int scaled; 27 | int class_label; 28 | int factor; 29 | 30 | factor = (72 + 85 + 90 + 60 + 88); 31 | 32 | class_label = classify(factor); 33 | 34 | i = 0; 35 | while (i < 5) { 36 | x = print_result(i, class_label); 37 | i = i + 1; 38 | } 39 | 40 | return 0; 41 | } 42 | -------------------------------------------------------------------------------- /references/llvm_structs_and_arrays.cpp: -------------------------------------------------------------------------------- 1 | int some_integer; 2 | int some_array[5]; 3 | 4 | struct my_type { 5 | int attr_1; 6 | int* attr_2; 7 | float attr_3; 8 | int attr_4; 9 | long attr_5; 10 | }; 11 | 12 | struct my_type_clone { 13 | int attr_1; 14 | int* attr_2; 15 | float attr_3; 16 | int attr_4; 17 | long attr_5; 18 | }; 19 | 20 | struct another_type { 21 | int attr_1; 22 | int attr_2; 23 | int attr_3; 24 | }; 25 | 26 | my_type test() { 27 | my_type test; 28 | 29 | return test; 30 | } 31 | 32 | 33 | my_type_clone test_clone() { 34 | my_type_clone test; 35 | 36 | return test; 37 | } 38 | 39 | 40 | int main() { 41 | my_type test; 42 | another_type test_2[5]; 43 | 44 | test.attr_1 = 1; 45 | test.attr_2 = new int[4]; 46 | test.attr_3 = 1.0; 47 | test.attr_4 = 4; 48 | test.attr_5 = 321; 49 | 50 | test_2[0].attr_1 = 1; 51 | } 52 | -------------------------------------------------------------------------------- /src/certificators/abstract_certificator.py: -------------------------------------------------------------------------------- 1 | """Base class for Certificator classes (i.e., back and frontends).""" 2 | 3 | from abc import abstractmethod 4 | 5 | 6 | class AbstractCertificator: 7 | """Base class for certificator classes.""" 8 | 9 | def __init__(self, **kwargs) -> None: 10 | self.computed_certificate: list[str] = [] 11 | self.current_prime: int = 2 12 | 13 | # The environment maps variables primes to symbols that represents their 14 | # associated types. `int x[2]` will be mapped to [3, 3], and 15 | # `struct { int x; float y; }` will be mapped to [3, 5]. (3 represents 16 | # integers, and 5 represents floating point numbers.) 17 | self.environment: dict[int, int] = {} 18 | 19 | @abstractmethod 20 | def certificate(self, **kwargs) -> str: 21 | pass 22 | 23 | def get_certificate(self) -> str: 24 | return self.computed_certificate 25 | -------------------------------------------------------------------------------- /tests/unit/test_frontend_certificator.py: -------------------------------------------------------------------------------- 1 | """Implement unit tests for the `src.certificators.frontend` module.""" 2 | 3 | from src.certificators import FrontendCertificator 4 | from tests.unit.common import ABSTRACT_SYNTAX_TREE, CERTIFICATE 5 | 6 | 7 | def test_init() -> None: 8 | """Test the instantiation of FrontendCertificator objects.""" 9 | 10 | frontend_certificator = FrontendCertificator(ast=ABSTRACT_SYNTAX_TREE) 11 | 12 | assert frontend_certificator.computed_certificate == [] 13 | assert frontend_certificator.current_prime == 2 14 | assert frontend_certificator.ast == ABSTRACT_SYNTAX_TREE 15 | 16 | 17 | # def test_certificate(): 18 | # """Test the FrontendCertificator.certificate method.""" 19 | 20 | # frontend_certificator = FrontendCertificator(ast=ABSTRACT_SYNTAX_TREE) 21 | # frontend_certificator.certificate() 22 | 23 | # assert frontend_certificator.get_certificate() == CERTIFICATE 24 | -------------------------------------------------------------------------------- /benchmarks/a_lot_of_function_calls.ch: -------------------------------------------------------------------------------- 1 | int classify(int score) { 2 | int result; 3 | if (score > 90 || score == 90) { 4 | result = 5; 5 | } else if (score > 75 || score == 75) { 6 | result = 4; 7 | } else if (score > 60 || score == 60) { 8 | result = 3; 9 | } else if (score > 40 || score == 40) { 10 | result = 2; 11 | } else { 12 | result = 1; 13 | } 14 | 15 | return result; 16 | } 17 | 18 | int print_result(int id, int grade) { 19 | return 0; 20 | } 21 | 22 | int average(int x_1, int x_2, int x_3, int x_4, int x_5) { 23 | return (x_1 + x_2 + x_3 + x_4 + x_5) / 5; 24 | } 25 | 26 | int main() { 27 | int i; 28 | int x; 29 | int avg; 30 | int scaled; 31 | int class_label; 32 | 33 | avg = average(72, 85, 90, 60, 88); 34 | class_label = classify(avg); 35 | 36 | i = 0; 37 | while (i < 5) { 38 | x = print_result(i, class_label); 39 | i = i + 1; 40 | } 41 | 42 | return 0; 43 | } 44 | -------------------------------------------------------------------------------- /tests/integration/test_while.py: -------------------------------------------------------------------------------- 1 | """Integration test for a simple `while` statement.""" 2 | 3 | from src.certificators import BackendCertificator, FrontendCertificator 4 | from src.runner import create_instance 5 | 6 | 7 | SOURCE_CODE = """ 8 | int main() { 9 | int i; 10 | i = 1; 11 | 12 | while (i < 100) { 13 | i = i + i; 14 | } 15 | 16 | return 0; 17 | } 18 | """ 19 | 20 | 21 | def test_simple_while() -> None: 22 | """Test the `while` statement.""" 23 | 24 | instance = create_instance(source_code=SOURCE_CODE) 25 | vm = instance.get_vm() 26 | vm.run() 27 | 28 | expected_memory = {"0x0": 128} 29 | assert vm.get_memory() == expected_memory 30 | 31 | 32 | def test_struct_certification() -> None: 33 | """Test the front and backend certification.""" 34 | 35 | instance = create_instance(source_code=SOURCE_CODE) 36 | 37 | ast = instance.get_ast() 38 | frontend_certificate = FrontendCertificator(ast=ast).certificate() 39 | 40 | program = instance.get_program() 41 | backend_certificate = BackendCertificator(program=program).certificate() 42 | 43 | assert frontend_certificate == backend_certificate 44 | -------------------------------------------------------------------------------- /references/machine_code_program.c: -------------------------------------------------------------------------------- 1 | int a[10]; 2 | 3 | typedef struct { 4 | int x; 5 | float y; 6 | } my_struct; 7 | 8 | my_struct global_var; 9 | 10 | my_struct function_that_returns_struct(int xyz, int aaa) { 11 | int internal_guy; 12 | internal_guy = xyz + aaa; 13 | 14 | global_var.x = internal_guy; 15 | return global_var; 16 | } 17 | 18 | int some_simple_function(float param_1, int param_2) { 19 | return param_1 / param_2; 20 | } 21 | 22 | int abc(int asda, int abcdef) { 23 | int bla; 24 | bla = 1; 25 | 26 | float blabla; 27 | blabla = 2.0; 28 | 29 | short xaxaxa; 30 | 31 | my_struct internal_struct_var; 32 | internal_struct_var.x = 1; 33 | 34 | bla = bla + some_simple_function(blabla, 123); 35 | 36 | return blabla + bla; 37 | } 38 | 39 | struct test_struct { 40 | int abcd; 41 | int xyz; 42 | }; 43 | 44 | int main() { 45 | int x; 46 | x = abc(1, 2); 47 | 48 | int array[10]; 49 | 50 | array[5] = 1; 51 | int y; 52 | 53 | if((((x << 4) == 1) || (x > 1)) && (x < 10)) { 54 | y = x & 1; 55 | } 56 | else { 57 | y = x | 1; 58 | } 59 | 60 | return ((x * y) / 2) >> 1; 61 | } -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | """Implement the main function of the Project [C]haron environment.""" 2 | 3 | import sys 4 | 5 | from src.runner import create_instance 6 | 7 | 8 | def main() -> int: 9 | """ 10 | Read the source code from the stdin, compile it and run it in the Virtual Machine. 11 | 12 | The variables of the VM are printed after the execution. 13 | """ 14 | 15 | source_code: str = sys.stdin.read() 16 | 17 | instance = create_instance(source_code) 18 | 19 | vm = instance.get_vm() 20 | 21 | frontend_certificator = instance.get_frontend_certificator() 22 | backend_certificator = instance.get_backend_certificator() 23 | 24 | frontend_certificate = ... 25 | backend_certificate = ... 26 | 27 | print("Frontend certificate:") 28 | print(frontend_certificate) 29 | 30 | print("Backend certificate:") 31 | print(backend_certificate) 32 | 33 | try: 34 | assert frontend_certificate == backend_certificate 35 | print("Certificates match!") 36 | except AssertionError: 37 | print("Certificates don't match. Aborting...") 38 | return 1 39 | 40 | vm.run() 41 | vm.print() 42 | 43 | return 0 44 | 45 | 46 | if __name__ == "__main__": 47 | main() 48 | -------------------------------------------------------------------------------- /tests/integration/test_expressions.py: -------------------------------------------------------------------------------- 1 | """Integration test to showcase expressions.""" 2 | 3 | from src.certificators import BackendCertificator, FrontendCertificator 4 | from src.runner import create_instance 5 | 6 | 7 | SOURCE_CODE = """ 8 | int main() { 9 | int i; 10 | i = 3 + 10 * 2; 11 | 12 | int j; 13 | j = (i << 1) / 5 % 2 + 15 * 3; 14 | 15 | int k; 16 | k = !(i | j & 1); 17 | 18 | return 0; 19 | } 20 | """ 21 | 22 | 23 | def test_expression(): 24 | """Test multiple expressions.""" 25 | 26 | instance = create_instance(source_code=SOURCE_CODE) 27 | vm = instance.get_vm() 28 | vm.run() 29 | 30 | expected_memory = {"0x0": 23, "0x4": 46, "0x8": 0} 31 | assert vm.get_memory() == expected_memory 32 | 33 | 34 | def test_expressions_certification() -> None: 35 | """Test the front and backend certification.""" 36 | 37 | instance = create_instance(source_code=SOURCE_CODE) 38 | 39 | ast = instance.get_ast() 40 | frontend_certificate = FrontendCertificator(ast=ast).certificate() 41 | 42 | program = instance.get_program() 43 | backend_certificate = BackendCertificator(program=program).certificate() 44 | 45 | assert frontend_certificate == backend_certificate 46 | -------------------------------------------------------------------------------- /tests/integration/test_function_call.py: -------------------------------------------------------------------------------- 1 | """Integration test for a simple function call.""" 2 | 3 | from src.certificators import BackendCertificator, FrontendCertificator 4 | from src.runner import create_instance 5 | 6 | 7 | SOURCE_CODE = """ 8 | int factorial(int x) { 9 | int fact; 10 | fact = 1; 11 | 12 | while(x > 0) { 13 | fact = fact * x; 14 | x = x - 1; 15 | } 16 | 17 | return fact; 18 | } 19 | 20 | 21 | int main() { 22 | int i; 23 | i = factorial(5); 24 | 25 | return 0; 26 | } 27 | """ 28 | 29 | 30 | def test_function_call(): 31 | """Test a function call.""" 32 | 33 | instance = create_instance(source_code=SOURCE_CODE) 34 | vm = instance.get_vm() 35 | vm.run() 36 | 37 | expected_memory = {"0x0": 0, "0x4": 120, "0x8": 120} 38 | assert vm.get_memory() == expected_memory 39 | 40 | 41 | def test_function_call_certification() -> None: 42 | """Test the front and backend certification.""" 43 | 44 | instance = create_instance(source_code=SOURCE_CODE) 45 | 46 | ast = instance.get_ast() 47 | frontend_certificate = FrontendCertificator(ast=ast).certificate() 48 | 49 | program = instance.get_program() 50 | backend_certificate = BackendCertificator(program=program).certificate() 51 | 52 | assert frontend_certificate == backend_certificate 53 | -------------------------------------------------------------------------------- /tests/integration/test_gcd.py: -------------------------------------------------------------------------------- 1 | """Test if the language correctly computes the Greatest Common Divisor (GCD).""" 2 | 3 | from src.certificators import BackendCertificator, FrontendCertificator 4 | from src.runner import create_instance 5 | 6 | SOURCE_CODE = """ 7 | int main() { 8 | int i; 9 | i = 125; 10 | 11 | int j; 12 | j = 100; 13 | 14 | while (i - j) { 15 | if (i < j) { 16 | j = j - i; 17 | } 18 | else { 19 | i = i - j; 20 | } 21 | } 22 | 23 | return 0; 24 | } 25 | """ 26 | 27 | 28 | def test_gcd() -> None: 29 | """Test the computation of the GCD between 100 and 125.""" 30 | 31 | instance = create_instance(source_code=SOURCE_CODE) 32 | vm = instance.get_vm() 33 | vm.run() 34 | 35 | expected_memory = {"0x0": 25, "0x4": 25} 36 | assert vm.get_memory() == expected_memory 37 | 38 | 39 | def test_gcd_certification() -> None: 40 | """Test the front and backend certification.""" 41 | 42 | instance = create_instance(source_code=SOURCE_CODE) 43 | 44 | ast = instance.get_ast() 45 | frontend_certificate = FrontendCertificator(ast=ast).certificate() 46 | 47 | program = instance.get_program() 48 | backend_certificate = BackendCertificator(program=program).certificate() 49 | 50 | assert frontend_certificate == backend_certificate 51 | -------------------------------------------------------------------------------- /tests/integration/test_struct.py: -------------------------------------------------------------------------------- 1 | """Integration test for a simple struct manipulation.""" 2 | 3 | from src.certificators import BackendCertificator, FrontendCertificator 4 | from src.runner import create_instance 5 | 6 | 7 | SOURCE_CODE = """ 8 | struct my_struct { 9 | int attribute_1; 10 | float attribute_2; 11 | short attribute_3; 12 | }; 13 | 14 | int main() { 15 | my_struct struct_var; 16 | 17 | struct_var.attribute_1 = 10; 18 | struct_var.attribute_2 = 13.1 * 23.4 / 30.2; 19 | struct_var.attribute_3 = (struct_var.attribute_1) * (struct_var.attribute_2); 20 | 21 | return 0; 22 | } 23 | """ 24 | 25 | 26 | def test_struct(): 27 | """Test a simple struct.""" 28 | 29 | instance = create_instance(source_code=SOURCE_CODE) 30 | vm = instance.get_vm() 31 | vm.run() 32 | 33 | expected_memory = {"0x0": 10, "0x4": 10.150331125827813, "0x8": 101} 34 | 35 | assert vm.get_memory() == expected_memory 36 | 37 | 38 | def test_struct_certification() -> None: 39 | """Test the front and backend certification.""" 40 | 41 | instance = create_instance(source_code=SOURCE_CODE) 42 | 43 | ast = instance.get_ast() 44 | frontend_certificate = FrontendCertificator(ast=ast).certificate() 45 | 46 | program = instance.get_program() 47 | backend_certificate = BackendCertificator(program=program).certificate() 48 | 49 | assert frontend_certificate == backend_certificate 50 | -------------------------------------------------------------------------------- /tests/integration/test_array.py: -------------------------------------------------------------------------------- 1 | """Integration test for a simple array manipulation.""" 2 | 3 | from src.certificators import BackendCertificator, FrontendCertificator 4 | from src.runner import create_instance 5 | 6 | 7 | SOURCE_CODE = """ 8 | int main() { 9 | int my_array[5]; 10 | 11 | int i; 12 | i = 0; 13 | 14 | while(i < 5) { 15 | my_array[i] = i << i; 16 | i = i + 1; 17 | } 18 | 19 | int j; 20 | j = (my_array[2]) + 3; 21 | 22 | return 0; 23 | } 24 | """ 25 | 26 | 27 | def test_array(): 28 | """Test a simple array.""" 29 | 30 | instance = create_instance(source_code=SOURCE_CODE) 31 | vm = instance.get_vm() 32 | vm.run() 33 | 34 | expected_memory = { 35 | "0x0": 0, 36 | "0x4": 2, 37 | "0x8": 8, 38 | "0xc": 24, 39 | "0x10": 64, 40 | "0x14": 5, 41 | "0x18": 11, 42 | } 43 | 44 | assert vm.get_memory() == expected_memory 45 | 46 | 47 | def test_array_certification() -> None: 48 | """Test the front and backend certification.""" 49 | 50 | instance = create_instance(source_code=SOURCE_CODE) 51 | 52 | ast = instance.get_ast() 53 | frontend_certificate = FrontendCertificator(ast=ast).certificate() 54 | 55 | program = instance.get_program() 56 | backend_certificate = BackendCertificator(program=program).certificate() 57 | 58 | assert frontend_certificate == backend_certificate 59 | -------------------------------------------------------------------------------- /src/ast_nodes/basic/CST.py: -------------------------------------------------------------------------------- 1 | """Representation of CST nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing_extensions import override 4 | 5 | from src.ast_nodes.node import Node 6 | from src.utils import TYPE_SYMBOLS_MAP 7 | 8 | 9 | class CST(Node): 10 | """ 11 | Implement the representation of a constant for the AST. 12 | 13 | Parameters 14 | ---------- 15 | constant_metadata : dict[str, str] 16 | A dictionary containing metadata (type and value) about this constant. 17 | 18 | Raises 19 | ------ 20 | TypeError 21 | Raised if the `type` parameter is not valid. `CONSTANT_TYPES` 22 | contains the currently supported types. 23 | """ 24 | 25 | @override 26 | def __init__(self, constant_metadata: dict[str, str]) -> None: 27 | value: int = constant_metadata.get("value") 28 | type: str = constant_metadata.get("type") 29 | 30 | if type not in TYPE_SYMBOLS_MAP: 31 | raise TypeError(f"Constant has invalid type '{type}'") 32 | 33 | _type_to_enforce: callable 34 | _, _type_to_enforce = TYPE_SYMBOLS_MAP[type].values() 35 | super().__init__(_type_to_enforce(value)) 36 | 37 | self.type = type 38 | self.instruction: str = "CONSTANT" 39 | 40 | # We apply this linear transformation so we get rid of zeroes 41 | value_exponent = value + 1 if value >= 0 else value 42 | self.symbol: str = f"({self.symbol})^({value_exponent})" 43 | -------------------------------------------------------------------------------- /tests/integration/test_fibonacci.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test if the language correctly computes the 10th element of the Fibonacci sequence. 3 | """ 4 | 5 | from src.certificators import BackendCertificator, FrontendCertificator 6 | from src.runner import create_instance 7 | 8 | 9 | SOURCE_CODE = """ 10 | int main() { 11 | int i; 12 | i = 1; 13 | 14 | int a; 15 | a = 0; 16 | 17 | int b; 18 | b = 1; 19 | 20 | int c; 21 | 22 | while (i < 10) { 23 | c = a; 24 | a = b; 25 | b = c + a; 26 | i = i + 1; 27 | } 28 | 29 | return 0; 30 | } 31 | """ 32 | 33 | 34 | def test_fib() -> None: 35 | """ 36 | Test the computation of the 10th element of the Fibonacci sequence. 37 | 38 | The result is stored in the `b` variable (at 0x8). 39 | """ 40 | 41 | instance = create_instance(source_code=SOURCE_CODE) 42 | vm = instance.get_vm() 43 | vm.run() 44 | 45 | expected_memory = {"0x0": 10, "0x4": 34, "0x8": 55, "0xc": 21} 46 | assert vm.get_memory() == expected_memory 47 | 48 | 49 | def test_fib_certification() -> None: 50 | """Test the front and backend certification.""" 51 | 52 | instance = create_instance(source_code=SOURCE_CODE) 53 | 54 | ast = instance.get_ast() 55 | frontend_certificate = FrontendCertificator(ast=ast).certificate() 56 | 57 | program = instance.get_program() 58 | backend_certificate = BackendCertificator(program=program).certificate() 59 | 60 | assert frontend_certificate == backend_certificate 61 | -------------------------------------------------------------------------------- /references/llvm_func_call.ll: -------------------------------------------------------------------------------- 1 | ; ModuleID = 'llvm_func_call.c' 2 | source_filename = "llvm_func_call.c" 3 | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 4 | target triple = "arm64-apple-macosx14.0.0" 5 | 6 | ; Function Attrs: noinline nounwind optnone ssp uwtable(sync) 7 | define i32 @test(i32 noundef %0) #0 { 8 | %2 = alloca i32, align 4 9 | store i32 %0, ptr %2, align 4 10 | %3 = load i32, ptr %2, align 4 11 | %4 = add nsw i32 3, %3 12 | ret i32 %4 13 | } 14 | 15 | ; Function Attrs: noinline nounwind optnone ssp uwtable(sync) 16 | define i32 @main() #0 { 17 | %1 = alloca i32, align 4 18 | %2 = alloca i32, align 4 19 | %3 = alloca i32, align 4 20 | store i32 0, ptr %1, align 4 21 | %4 = call i32 @test(i32 noundef 123) 22 | store i32 %4, ptr %2, align 4 23 | %5 = load i32, ptr %3, align 4 24 | %6 = call i32 @test(i32 noundef %5) 25 | store i32 %6, ptr %2, align 4 26 | ret i32 0 27 | } 28 | 29 | attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } 30 | 31 | !llvm.module.flags = !{!0, !1, !2, !3} 32 | !llvm.ident = !{!4} 33 | 34 | !0 = !{i32 1, !"wchar_size", i32 4} 35 | !1 = !{i32 8, !"PIC Level", i32 2} 36 | !2 = !{i32 7, !"uwtable", i32 1} 37 | !3 = !{i32 7, !"frame-pointer", i32 1} 38 | !4 = !{!"Homebrew clang version 17.0.3"} 39 | -------------------------------------------------------------------------------- /tests/unit/test_runner.py: -------------------------------------------------------------------------------- 1 | """Implement unit tests for the `src.interpreter` module.""" 2 | 3 | from src.certificators import BackendCertificator, FrontendCertificator 4 | from src.runner import create_instance 5 | from src.virtual_machine import VirtualMachine 6 | 7 | from tests.unit.common import * 8 | 9 | 10 | def test_create_instance(): 11 | """ 12 | Test the `create_instance` function. 13 | 14 | This method simply passes because all the attributes are tested in the 15 | other unit tests. 16 | """ 17 | 18 | pass 19 | 20 | 21 | def test_parsed_source(): 22 | """Test if the created instance has the expected `parsed_source`.""" 23 | 24 | instance = create_instance(SOURCE_CODE) 25 | 26 | assert instance.get_parsed_source() == TOKENIZED_SOURCE_CODE 27 | 28 | 29 | def test_ast(): 30 | """Test if the created instance has the expected `ast`.""" 31 | 32 | instance = create_instance(SOURCE_CODE) 33 | 34 | assert instance.get_ast() == ABSTRACT_SYNTAX_TREE 35 | 36 | 37 | def test_program(): 38 | """Test if the created instance has the expected `program`.""" 39 | 40 | instance = create_instance(SOURCE_CODE) 41 | 42 | assert instance.get_program() == MACHINE_CODE 43 | 44 | 45 | def test_vm(): 46 | """Test if the created instance has the expected `vm`.""" 47 | 48 | instance = create_instance(SOURCE_CODE) 49 | 50 | assert instance.get_vm() == VirtualMachine(MACHINE_CODE) 51 | 52 | 53 | def test_frontend_certificator(): 54 | """Test if the created instance has the expected `frontend_certificator`.""" 55 | 56 | pass 57 | 58 | 59 | def test_backend_certificator(): 60 | """Test if the created instance has the expected `backend_certificator`.""" 61 | 62 | pass 63 | -------------------------------------------------------------------------------- /tests/integration/test_control_flow.py: -------------------------------------------------------------------------------- 1 | """Integration test that checks if the control flow works correctly.""" 2 | 3 | from src.certificators import BackendCertificator, FrontendCertificator 4 | from src.runner import create_instance 5 | 6 | 7 | SOURCE_CODE = """ 8 | int main() {{ 9 | int i; 10 | i = {placeholder}; 11 | 12 | int x; 13 | 14 | if (i < 5) {{ 15 | x = 23; 16 | }} 17 | else {{ 18 | x = 35; 19 | }} 20 | 21 | return 0; 22 | }} 23 | """ 24 | 25 | 26 | def test_if() -> None: 27 | """Test the `if` case.""" 28 | 29 | placeholder = 1 30 | source_code = SOURCE_CODE.format(placeholder=placeholder) 31 | 32 | instance = create_instance(source_code=source_code) 33 | vm = instance.get_vm() 34 | vm.run() 35 | 36 | expected_memory = {"0x0": placeholder, "0x4": 23} 37 | assert vm.get_memory() == expected_memory 38 | 39 | 40 | def test_else() -> None: 41 | """Test the `else` case.""" 42 | 43 | placeholder = 10 44 | source_code = SOURCE_CODE.format(placeholder=placeholder) 45 | 46 | instance = create_instance(source_code=source_code) 47 | vm = instance.get_vm() 48 | vm.run() 49 | 50 | expected_memory = {"0x0": placeholder, "0x4": 35} 51 | assert vm.get_memory() == expected_memory 52 | 53 | 54 | def test_control_flow_certification() -> None: 55 | """Test the front and backend certification.""" 56 | 57 | # The placeholder does not really matter. 58 | placeholder = 10 59 | source_code = SOURCE_CODE.format(placeholder=placeholder) 60 | 61 | instance = create_instance(source_code=source_code) 62 | 63 | ast = instance.get_ast() 64 | frontend_certificate = FrontendCertificator(ast=ast).certificate() 65 | 66 | program = instance.get_program() 67 | backend_certificate = BackendCertificator(program=program).certificate() 68 | 69 | assert frontend_certificate == backend_certificate 70 | -------------------------------------------------------------------------------- /src/ast_nodes/__init__.py: -------------------------------------------------------------------------------- 1 | """Export classes to allow `from src.nodes import *`.""" 2 | 3 | # Base classes 4 | from src.ast_nodes.node import Node 5 | from src.ast_nodes.conditionals.conditional import Conditional 6 | from src.ast_nodes.operations.operation import Operation 7 | 8 | # Basic nodes 9 | from src.ast_nodes.basic.CST import CST 10 | from src.ast_nodes.basic.PROG import PROG 11 | from src.ast_nodes.basic.SEQ import SEQ 12 | 13 | # Variables 14 | from src.ast_nodes.variables.VAR_DEF import VAR_DEF 15 | from src.ast_nodes.variables.STRUCT_DEF import STRUCT_DEF 16 | from src.ast_nodes.variables.VAR import VAR 17 | from src.ast_nodes.variables.ELEMENT_ACCESS import ELEMENT_ACCESS 18 | 19 | # Functions 20 | from src.ast_nodes.functions.ARG import ARG 21 | from src.ast_nodes.functions.FUNC_CALL import FUNC_CALL 22 | from src.ast_nodes.functions.FUNC_DEF import FUNC_DEF 23 | from src.ast_nodes.functions.PARAM import PARAM 24 | from src.ast_nodes.functions.RET_SYM import RET_SYM 25 | 26 | # Conditional nodes (control flow, loops etc.) 27 | from src.ast_nodes.conditionals.IF import IF 28 | from src.ast_nodes.conditionals.IFELSE import IFELSE 29 | from src.ast_nodes.conditionals.WHILE import WHILE 30 | 31 | # Binary operations 32 | from src.ast_nodes.operations.ADD import ADD 33 | from src.ast_nodes.operations.ASSIGN import ASSIGN 34 | from src.ast_nodes.operations.SUB import SUB 35 | from src.ast_nodes.operations.MULT import MULT 36 | from src.ast_nodes.operations.DIV import DIV 37 | from src.ast_nodes.operations.MOD import MOD 38 | 39 | # Logical operations 40 | from src.ast_nodes.operations.AND import AND 41 | from src.ast_nodes.operations.NOT import NOT 42 | from src.ast_nodes.operations.OR import OR 43 | 44 | # Bit-wise operations 45 | from src.ast_nodes.operations.BITAND import BITAND 46 | from src.ast_nodes.operations.BITOR import BITOR 47 | from src.ast_nodes.operations.LSHIFT import LSHIFT 48 | from src.ast_nodes.operations.RSHIFT import RSHIFT 49 | 50 | # Comparisons 51 | from src.ast_nodes.operations.EQUAL import EQUAL 52 | from src.ast_nodes.operations.DIFF import DIFF 53 | from src.ast_nodes.operations.LESS import LESS 54 | from src.ast_nodes.operations.GREATER import GREATER 55 | -------------------------------------------------------------------------------- /tests/unit/test_code_generator.py: -------------------------------------------------------------------------------- 1 | """Implement unit tests for the `src.code_generator` module.""" 2 | 3 | from copy import deepcopy 4 | 5 | from src.code_generator import CodeGenerator 6 | from tests.unit.common import ABSTRACT_SYNTAX_TREE_ROOT, ENVIRONMENT, MACHINE_CODE 7 | 8 | 9 | def test_init() -> None: 10 | """Test the instantiation of CodeGenerator objects.""" 11 | 12 | _ast_root = deepcopy(ABSTRACT_SYNTAX_TREE_ROOT) 13 | cg = CodeGenerator(root=_ast_root) 14 | 15 | assert cg.root == ABSTRACT_SYNTAX_TREE_ROOT 16 | assert cg.program == { 17 | "functions": {}, 18 | "global_vars": [], 19 | "data": {}, 20 | "code": [] 21 | } 22 | assert cg.register == 0 23 | 24 | 25 | def test_generate_code() -> None: 26 | """Test the `CodeGenerator.generate_code` method.""" 27 | 28 | _ast_root = deepcopy(ABSTRACT_SYNTAX_TREE_ROOT) 29 | cg = CodeGenerator(root=_ast_root) 30 | generated_code = cg.generate_code() 31 | 32 | expected_generated_code = MACHINE_CODE 33 | assert generated_code == expected_generated_code 34 | 35 | 36 | def test_parse_global_variables() -> None: 37 | """Test the `CodeGenerator.parse_global_variables` method.""" 38 | 39 | _ast_root = deepcopy(ABSTRACT_SYNTAX_TREE_ROOT) 40 | cg = CodeGenerator(root=_ast_root) 41 | cg.parse_global_variables() 42 | 43 | expected_environment = ENVIRONMENT 44 | 45 | # Manually add the IDs 46 | current_id = 1 47 | for element in cg.program["global_vars"]: 48 | element["bytecode_id"] = current_id 49 | current_id += 1 50 | 51 | assert cg.environment == expected_environment 52 | 53 | 54 | def test_parse_functions() -> None: 55 | """Test the `CodeGenerator.parse_functions` method.""" 56 | 57 | _ast_root = deepcopy(ABSTRACT_SYNTAX_TREE_ROOT) 58 | cg = CodeGenerator(root=_ast_root) 59 | 60 | # Mock the `register` to offset the global variables 61 | cg.register = len(MACHINE_CODE["global_vars"]) 62 | 63 | # Mock the `environment` to account for global variables 64 | cg.environment = ENVIRONMENT 65 | 66 | cg.parse_functions() 67 | 68 | expected_functions_indices = MACHINE_CODE["functions"] 69 | 70 | assert cg.program["functions"] == expected_functions_indices 71 | -------------------------------------------------------------------------------- /references/llvm_structs_and_arrays.ll: -------------------------------------------------------------------------------- 1 | ; ModuleID = 'llvm_structs_and_arrays.cpp' 2 | source_filename = "llvm_structs_and_arrays.cpp" 3 | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 4 | target triple = "arm64-apple-macosx14.0.0" 5 | 6 | %struct.my_type = type { i32, ptr, float, i32, i64 } 7 | %struct.my_type_clone = type { i32, ptr, float, i32, i64 } 8 | %struct.another_type = type { i32, i32, i32 } 9 | 10 | @some_integer = global i32 0, align 4 11 | @some_array = global [5 x i32] zeroinitializer, align 4 12 | 13 | ; Function Attrs: mustprogress noinline nounwind optnone ssp uwtable(sync) 14 | define void @_Z4testv(ptr noalias sret(%struct.my_type) align 8 %0) #0 { 15 | ret void 16 | } 17 | 18 | ; Function Attrs: mustprogress noinline nounwind optnone ssp uwtable(sync) 19 | define void @_Z10test_clonev(ptr noalias sret(%struct.my_type_clone) align 8 %0) #0 { 20 | ret void 21 | } 22 | 23 | ; Function Attrs: mustprogress noinline norecurse optnone ssp uwtable(sync) 24 | define noundef i32 @main() #1 { 25 | %1 = alloca %struct.my_type, align 8 26 | %2 = alloca [5 x %struct.another_type], align 4 27 | %3 = getelementptr inbounds %struct.my_type, ptr %1, i32 0, i32 0 28 | store i32 1, ptr %3, align 8 29 | %4 = call noalias noundef nonnull ptr @_Znam(i64 noundef 16) #3 30 | %5 = getelementptr inbounds %struct.my_type, ptr %1, i32 0, i32 1 31 | store ptr %4, ptr %5, align 8 32 | %6 = getelementptr inbounds %struct.my_type, ptr %1, i32 0, i32 2 33 | store float 1.000000e+00, ptr %6, align 8 34 | %7 = getelementptr inbounds %struct.my_type, ptr %1, i32 0, i32 3 35 | store i32 4, ptr %7, align 4 36 | %8 = getelementptr inbounds %struct.my_type, ptr %1, i32 0, i32 4 37 | store i64 321, ptr %8, align 8 38 | %9 = getelementptr inbounds [5 x %struct.another_type], ptr %2, i64 0, i64 0 39 | %10 = getelementptr inbounds %struct.another_type, ptr %9, i32 0, i32 0 40 | store i32 1, ptr %10, align 4 41 | ret i32 0 42 | } 43 | 44 | ; Function Attrs: nobuiltin allocsize(0) 45 | declare noundef nonnull ptr @_Znam(i64 noundef) #2 46 | 47 | attributes #0 = { mustprogress noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } 48 | attributes #1 = { mustprogress noinline norecurse optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } 49 | attributes #2 = { nobuiltin allocsize(0) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } 50 | attributes #3 = { builtin allocsize(0) } 51 | 52 | !llvm.module.flags = !{!0, !1, !2, !3} 53 | !llvm.ident = !{!4} 54 | 55 | !0 = !{i32 1, !"wchar_size", i32 4} 56 | !1 = !{i32 8, !"PIC Level", i32 2} 57 | !2 = !{i32 7, !"uwtable", i32 1} 58 | !3 = !{i32 7, !"frame-pointer", i32 1} 59 | !4 = !{!"Homebrew clang version 17.0.3"} 60 | -------------------------------------------------------------------------------- /src/inverters/certificate_to_high_level.py: -------------------------------------------------------------------------------- 1 | """Implement certificate to high level program inverter.""" 2 | 3 | from src.inverters.base_inverter import BaseInverter 4 | 5 | 6 | class CertificateToHighLevel(BaseInverter): 7 | """ 8 | Invert a certificate to retrieve the original program in canonical form. 9 | 10 | Parameters 11 | ---------- 12 | certificate : str 13 | The certificate produced from a `Certificator` class. 14 | """ 15 | 16 | ir_to_high_level = { 17 | "RET_SYM": "return", 18 | "ASSIGN": "=", 19 | "NOT": "!", 20 | "ADD": "+", 21 | "SUB": "-", 22 | "MULT": "*", 23 | "DIV": "/", 24 | "MOD": "%", 25 | "LESS": "<", 26 | "GREATER": ">", 27 | "EQUAL": "==", 28 | "DIFF": "!=", 29 | "AND": "&&", 30 | "OR": "||", 31 | "LSHIFT": "<<", 32 | "RSHIFT": ">>", 33 | "BITAND": "&", 34 | "BITOR": "|", 35 | } 36 | 37 | def __init__(self, certificate): 38 | super().__init__(certificate) 39 | 40 | def get_program(self): 41 | functions = self._compute_functions_boundaries() 42 | 43 | ... 44 | 45 | def _compute_functions_boundaries(self) -> dict[str, dict[str, int]]: 46 | functions = {} 47 | start, end, current_function_id = 0, 0, 1 48 | 49 | for idx, item in enumerate(self.ir): 50 | if item["operation"] == "RET_SYM": 51 | end = idx 52 | 53 | functions[f"func_{current_function_id}"] = { 54 | "start": start, 55 | "end": end 56 | } 57 | 58 | start = end + 1 59 | 60 | return functions 61 | 62 | 63 | def is_operand(op: str) -> bool: 64 | """ 65 | Tell whether an IR token represents an operand or not. 66 | 67 | Parameters 68 | ---------- 69 | op : str 70 | The IR token. 71 | 72 | Returns 73 | ------- 74 | : bool 75 | Whether this token is an operand or not. 76 | """ 77 | 78 | _operands = ["VAR_ADDRESS", "CST", "VAR_VALUE", "FUNC_CALL"] 79 | 80 | return op in _operands 81 | 82 | 83 | def is_operation(op: str) -> bool: 84 | """ 85 | Tell whether an IR token represents an operation or not. 86 | 87 | Parameters 88 | ---------- 89 | op : str 90 | The IR token. 91 | 92 | Returns 93 | ------- 94 | : bool 95 | Whether this token is an operation or not. 96 | """ 97 | 98 | _operations = [ 99 | "NOT", 100 | "ADD", 101 | "SUB", 102 | "MULT", 103 | "DIV", 104 | "MOD", 105 | "LESS", 106 | "GREATER", 107 | "EQUAL", 108 | "DIFF", 109 | "AND", 110 | "OR", 111 | "LSHIFT", 112 | "RSHIFT", 113 | "BITAND", 114 | "BITOR", 115 | ] 116 | 117 | return op in _operations 118 | 119 | 120 | def is_terminator(op: str) -> bool: 121 | """ 122 | Tell whether an IR token is an expression terminator or not. 123 | 124 | Parameters 125 | ---------- 126 | op : str 127 | The IR token. 128 | 129 | Returns 130 | ------- 131 | : bool 132 | Whether this token terminates an expression or not. 133 | """ 134 | 135 | _terminators = [ 136 | "ASSIGN", 137 | "ARG", 138 | "RET_SYM" 139 | ] 140 | 141 | return op in _terminators 142 | -------------------------------------------------------------------------------- /src/ast_nodes/operations/ASSIGN.py: -------------------------------------------------------------------------------- 1 | """Representation of ASSIGN nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing import Union 4 | 5 | from typing_extensions import override 6 | 7 | from src.ast_nodes.node import Node 8 | from src.ast_nodes.variables.VAR import VAR 9 | from src.ast_nodes.variables.ELEMENT_ACCESS import ELEMENT_ACCESS 10 | from src.ast_nodes.operations.operation import Operation 11 | 12 | 13 | class ASSIGN(Operation): 14 | """ 15 | Implement the representation of a attribution operation for the AST. 16 | 17 | Parameters 18 | ---------- 19 | lhs : Node 20 | The node representation of the variable to attribute to. 21 | rhs : Node 22 | The node representation of the expression to be attributed to `lhs`. 23 | 24 | Raises 25 | ------ 26 | TypeError 27 | Raised if the `lhs` parameter is not a `VAR` or `ELEMENT_ACCESS` nodes. 28 | """ 29 | 30 | @override 31 | def __init__(self, lhs: Node, rhs: Node) -> None: 32 | if not isinstance(lhs, (VAR, ELEMENT_ACCESS)): 33 | raise TypeError("Left-hand side of ASSIGN operation is not a Variable.") 34 | 35 | super().__init__(lhs=lhs, rhs=rhs, type=lhs.type) 36 | 37 | self.instruction: str = "STOREF" if lhs.get_type() == "float" else "STORE" 38 | 39 | @override 40 | def generate_code( 41 | self, register: int, environment: dict[str, dict[int, str]] 42 | ) -> tuple[ 43 | list[dict[str, Union[int, str, float]]], 44 | int, 45 | dict[int, str] 46 | ]: 47 | """ 48 | Generate the code associated with this `ASSIGN` operation. 49 | 50 | For this node specialization, generate code from the left and right 51 | hand sides nodes first, and then from the node itself. 52 | 53 | The `ASSIGN` operation does not need a register for itself. Thus, after 54 | generating the code, we "undo" the "register allocation" and decrement 55 | the `register` value to be returned. 56 | 57 | Not pretty. But works. 58 | 59 | Parameters 60 | ---------- 61 | register : int 62 | The number of the register to be used by the code generated by this 63 | Node. 64 | environment : dict[int, str] 65 | The compiler's environment, that maps variables IDs to memory 66 | addresses and function IDs to instructions indices. 67 | 68 | Returns 69 | ------- 70 | code : list of dict 71 | Return a list of dictionaries containing code metadata: the register 72 | to be used, the related `instruction` and its metadata. 73 | register : int 74 | The number of the next register available. 75 | environment : dict[int, str] 76 | The updated {var_id: address} environment mapping. 77 | """ 78 | 79 | ( 80 | operation_code, 81 | register, 82 | environment 83 | ) = super().generate_code( 84 | register=register, 85 | environment=environment 86 | ) 87 | 88 | # Adjust the field names of the `STORE` instruction 89 | operation_code[-1]["metadata"]["register"] = operation_code[-1]["metadata"][ 90 | "lhs_register" 91 | ] 92 | operation_code[-1]["metadata"]["value"] = operation_code[-1]["metadata"][ 93 | "rhs_register" 94 | ] 95 | 96 | del operation_code[-1]["metadata"]["lhs_register"] 97 | del operation_code[-1]["metadata"]["rhs_register"] 98 | register -= 1 99 | 100 | return operation_code, register, environment 101 | -------------------------------------------------------------------------------- /src/ast_nodes/conditionals/IF.py: -------------------------------------------------------------------------------- 1 | """Representation of IF nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing import Union 4 | 5 | from typing_extensions import override 6 | 7 | from src.ast_nodes.node import Node 8 | from src.ast_nodes.conditionals.conditional import Conditional 9 | from src.utils import SYMBOLS_MAP 10 | 11 | 12 | class IF(Conditional): 13 | """ 14 | Implement the representation of a conditional for the AST. 15 | 16 | Parameters 17 | ---------- 18 | parenthesis_expression : Node 19 | The node representation of the expression to be evaluated. 20 | statement_if_true : Node 21 | The node representation of code to run if the `parenthesis_expression` 22 | evaluates to `True`. 23 | """ 24 | 25 | @override 26 | def __init__(self, parenthesis_expression: Node, statement_if_true: Node) -> None: 27 | super().__init__(parenthesis_expression, statement_if_true) 28 | 29 | self.symbol = SYMBOLS_MAP["IF"] 30 | self.boundary_symbol = SYMBOLS_MAP["IF_END"] 31 | 32 | @override 33 | def generate_code( 34 | self, register: int, environment: dict[str, dict[int, str]] 35 | ) -> tuple[ 36 | list[dict[str, Union[int, str, float]]], 37 | int, 38 | dict[int, str] 39 | ]: 40 | """ 41 | Generate the code associated with this `IF`. 42 | 43 | For this node specialization, generate code from the 44 | `parenthesis_expression` first, add a conditional jump to the last 45 | instruction of the `statement_if_true` subtree -- i.e., to jump to if 46 | the `parenthesis_expression` evaluates to `False`; in other words, to 47 | skip the conditional code -- and then from the `statement_if_true`. 48 | 49 | Parameters 50 | ---------- 51 | register : int 52 | The number of the register to be used by the code generated by this 53 | Node. 54 | environment : dict[int, str] 55 | The compiler's environment, that maps variables IDs to memory 56 | addresses and function IDs to instructions indices. 57 | 58 | Returns 59 | ------- 60 | code : list of dict 61 | Return a list of dictionaries containing code metadata: the register 62 | to be used, the related `instruction` and its metadata. 63 | register : int 64 | The number of the next register available. 65 | environment : dict[int, str] 66 | The updated {var_id: address} environment mapping. 67 | """ 68 | 69 | ( 70 | parenthesis_expression_code, 71 | register, 72 | environment 73 | ) = self.parenthesis_expression.generate_code( 74 | register=register, 75 | environment=environment 76 | ) 77 | conditional_register: int = register - 1 78 | 79 | ( 80 | statement_if_true_code, 81 | register, 82 | environment 83 | ) = self.statement_if_true.generate_code( 84 | register=register, 85 | environment=environment 86 | ) 87 | 88 | # The jump target is the amount of instructions in the 89 | # `statement_if_true` block (add 1 to land right after the last 90 | # instruction) 91 | instructions_to_jump = len(statement_if_true_code) + 1 92 | conditional_jump = { 93 | "instruction": "JZ", 94 | "metadata": { 95 | "conditional_register": conditional_register, 96 | "jump_size": instructions_to_jump, 97 | }, 98 | } 99 | 100 | # If `parenthesis_expression` evals to `False`, jump a number of 101 | # `instructions_to_jump`. If not, execute the `_statement_if_true_code`. 102 | if_code: list[dict[str, Union[int, str]]] = [ 103 | *parenthesis_expression_code, 104 | conditional_jump, 105 | *statement_if_true_code, 106 | ] 107 | 108 | return if_code, register, environment 109 | -------------------------------------------------------------------------------- /src/ast_nodes/conditionals/conditional.py: -------------------------------------------------------------------------------- 1 | """Representation of conditionals for the Abstract Syntax Tree.""" 2 | 3 | from typing_extensions import override 4 | 5 | from src.ast_nodes.node import Node 6 | from src.utils import SYMBOLS_MAP 7 | 8 | 9 | class Conditional(Node): 10 | """ 11 | Implement the representation of conditionals for the AST. 12 | 13 | Parameters 14 | ---------- 15 | parenthesis_expression : Node 16 | The node representation of the expression to be evaluated. 17 | statement_if_true : Node 18 | The node representation of code to run if the `parenthesis_expression` 19 | evaluates to `True`. 20 | """ 21 | 22 | @override 23 | def __init__(self, parenthesis_expression: Node, statement_if_true: Node) -> None: 24 | super().__init__(uses_register=False) 25 | 26 | self.parenthesis_expression: Node = parenthesis_expression 27 | self.statement_if_true: Node = statement_if_true 28 | 29 | # This will be set by the `certificate` method 30 | self.conditional_expression_boundary = None 31 | self.boundary_certificate = None 32 | 33 | @override 34 | def get_certificate_label(self) -> list[str]: 35 | """ 36 | Get the contents of `certificate_label`. 37 | 38 | For `Conditional` nodes, obtain the certificates, recursively, from the 39 | `parenthesis_expression` subtree first, then from the `Conditional` 40 | node itself, and, finally, from the `statement_if_true` subtree. 41 | 42 | Returns 43 | ------- 44 | : list of str 45 | A list containing the certificate label of the `Node`. 46 | """ 47 | 48 | return [ 49 | self.conditional_expression_boundary, 50 | *self.parenthesis_expression.get_certificate_label(), 51 | *super().get_certificate_label(), 52 | *self.statement_if_true.get_certificate_label(), 53 | self.boundary_certificate 54 | ] 55 | 56 | @override 57 | def print(self, indent: int = 0) -> None: 58 | """ 59 | Print the string representation of this `Conditional`. 60 | 61 | The node itself is aligned with `indent`, and its children are padded 62 | with an additional left space. 63 | 64 | Parameters 65 | ---------- 66 | indent : int (optional, default = 0) 67 | The number of left padding spaces to indent. 68 | """ 69 | 70 | super().print(indent) 71 | 72 | self.parenthesis_expression.print(indent + 1) 73 | self.statement_if_true.print(indent + 1) 74 | 75 | @override 76 | def certificate( 77 | self, 78 | certificator_env: dict[int, list[int]] 79 | ) -> dict[int, list[int]]: 80 | """ 81 | Compute the certificate of the current `Conditional`, and set this attribute. 82 | 83 | For `Conditional` nodes, certificate the `parenthesis_expression`, 84 | recursively, and the `Conditional` itself, and then the children 85 | `statement` nodes -- also recursively. 86 | 87 | Parameters 88 | ---------- 89 | certificator_env : dict[int, list[int]] 90 | The certificators's environment, that maps variables IDs to 91 | encodings of their types. 92 | 93 | Returns 94 | ------- 95 | certificator_env : dict[int, list[int]] 96 | The updated certificator's environment, with any additional 97 | information about the variable's types it might have captured. 98 | """ 99 | 100 | # Add the symbol to delimit the condition expression 101 | _conditional_expression_boundary_symbol = SYMBOLS_MAP["COND"] 102 | self.conditional_expression_boundary = ( 103 | f"{_conditional_expression_boundary_symbol}" 104 | ) 105 | 106 | certificator_env = self.parenthesis_expression.certificate(certificator_env) 107 | certificator_env = super().certificate(certificator_env) 108 | certificator_env = self.statement_if_true.certificate(certificator_env) 109 | 110 | self.boundary_certificate = f"{self.boundary_symbol}" 111 | 112 | return certificator_env 113 | -------------------------------------------------------------------------------- /tests/unit/test_backend_certificator.py: -------------------------------------------------------------------------------- 1 | """Implement unit tests for the `src.certificators.backend` module.""" 2 | 3 | from src.certificators import BackendCertificator 4 | from tests.unit.common import CERTIFICATE, MACHINE_CODE 5 | 6 | 7 | def test_init() -> None: 8 | """Test the instantiation of BackendCertificator objects.""" 9 | 10 | backend_certificator = BackendCertificator(program=MACHINE_CODE) 11 | 12 | assert backend_certificator.computed_certificate == [] 13 | assert backend_certificator.current_prime == 2 14 | assert backend_certificator.program == MACHINE_CODE 15 | 16 | assert backend_certificator.environment == { 17 | "functions": { 18 | 1: {"prime": 2}, 19 | 2: {"prime": 3}, 20 | 3: {"prime": 5}, 21 | 4: {"prime": 7} 22 | }, 23 | "variables": { 24 | "0x28": { 25 | "addresses": { 26 | "0x28": "int", 27 | "0x2c": "__unknown_type__", 28 | }, 29 | "prime": 2, 30 | }, 31 | "0x30": { 32 | "addresses": { 33 | "0x30": "int", 34 | }, 35 | "prime": 3, 36 | }, 37 | "0x34": { 38 | "addresses": { 39 | "0x34": "int", 40 | }, 41 | "prime": 5, 42 | }, 43 | "0x38": { 44 | "addresses": { 45 | "0x38": "int", 46 | }, 47 | "prime": 7, 48 | }, 49 | "0x3c": { 50 | "addresses": { 51 | "0x3c": "float", 52 | }, 53 | "prime": 11, 54 | }, 55 | "0x40": { 56 | "addresses": { 57 | "0x40": "int", 58 | }, 59 | "prime": 13, 60 | }, 61 | "0x44": { 62 | "addresses": { 63 | "0x44": "int", 64 | }, 65 | "prime": 17, 66 | }, 67 | "0x48": { 68 | "addresses": { 69 | "0x48": "int", 70 | }, 71 | "prime": 19, 72 | }, 73 | "0x4c": { 74 | "addresses": { 75 | "0x4c": "int", 76 | }, 77 | "prime": 23, 78 | }, 79 | "0x50": { 80 | "addresses": { 81 | "0x50": "float", 82 | }, 83 | "prime": 29, 84 | }, 85 | "0x58": { 86 | "addresses": { 87 | "0x58": "int", 88 | "0x5c": "__unknown_type__", 89 | }, 90 | "prime": 31, 91 | }, 92 | "0x60": { 93 | "addresses": { 94 | "0x60": "int", 95 | }, 96 | "prime": 37, 97 | }, 98 | "0x64": { 99 | "addresses": { 100 | "0x64": "__unknown_type__", 101 | "0x68": "__unknown_type__", 102 | "0x6c": "__unknown_type__", 103 | "0x70": "__unknown_type__", 104 | "0x74": "__unknown_type__", 105 | "0x78": "int", 106 | "0x7c": "__unknown_type__", 107 | "0x80": "__unknown_type__", 108 | "0x84": "__unknown_type__", 109 | "0x88": "__unknown_type__", 110 | }, 111 | "prime": 41, 112 | }, 113 | "0x8c": { 114 | "addresses": { 115 | "0x8c": "int", 116 | }, 117 | "prime": 43, 118 | }, 119 | }, 120 | "stash": {101: ["41"]} 121 | } 122 | 123 | 124 | # def test_certificate(): 125 | # """Test the BackendCertificator.certificate method.""" 126 | 127 | # backend_certificator = BackendCertificator(program=MACHINE_CODE) 128 | # backend_certificator.certificate() 129 | 130 | # assert backend_certificator.get_certificate() == CERTIFICATE 131 | -------------------------------------------------------------------------------- /src/ast_nodes/operations/NOT.py: -------------------------------------------------------------------------------- 1 | """Representation of NOT nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing import Union 4 | 5 | from typing_extensions import override 6 | 7 | from src.ast_nodes.node import Node 8 | 9 | 10 | class NOT(Node): 11 | """ 12 | Implement the representation of a negation (logical not) for the AST. 13 | 14 | Parameters 15 | ---------- 16 | expression : Node 17 | The Node representation of the term to be negated. 18 | """ 19 | 20 | @override 21 | def __init__(self, expression: Node, **kwargs) -> None: 22 | super().__init__() 23 | 24 | self.expression: Node = expression 25 | self.instruction: str = "NOT" 26 | self.type: str = "int" 27 | 28 | @override 29 | def print(self, indent: int = 0) -> None: 30 | """ 31 | Print the string representation of this `NOT` node. 32 | 33 | The node itself is aligned with `indent`, and its children are padded 34 | with an additional left space. 35 | 36 | Parameters 37 | ---------- 38 | indent : int (optional, default = 0) 39 | The number of left padding spaces to indent. 40 | """ 41 | 42 | super().print(indent) 43 | 44 | self.expression.print(indent + 1) 45 | 46 | @override 47 | def generate_code( 48 | self, register: int, environment: dict[str, dict[int, str]] 49 | ) -> tuple[ 50 | list[dict[str, Union[int, str, float]]], 51 | int, 52 | dict[int, str] 53 | ]: 54 | """ 55 | Generate the code associated with this `Operation`. 56 | 57 | For this node specialization, generate code from the left and right 58 | hand sides nodes first, and then from the node itself. 59 | 60 | Parameters 61 | ---------- 62 | register : int 63 | The number of the register to be used by the code generated by this 64 | Node. 65 | environment : dict[int, str] 66 | The compiler's environment, that maps variables IDs to memory 67 | addresses and function IDs to instructions indices. 68 | 69 | Returns 70 | ------- 71 | code : list of dict 72 | Return a list of dictionaries containing code metadata: the register 73 | to be used, the related `instruction` and its metadata. 74 | register : int 75 | The number of the next register available. 76 | environment : dict[int, str] 77 | The updated {var_id: address} environment mapping. 78 | """ 79 | 80 | code: list[dict[str, Union[int, str, None]]] = [] 81 | 82 | ( 83 | expression_code, 84 | register, 85 | environment 86 | ) = self.expression.generate_code( 87 | register=register, 88 | environment=environment 89 | ) 90 | code.extend(expression_code) 91 | 92 | expression_register = register - 1 93 | 94 | this_code = { 95 | "instruction": self.instruction, 96 | "metadata": {"register": register, "value": expression_register}, 97 | } 98 | register += 1 99 | 100 | code.append(this_code) 101 | 102 | return code, register, environment 103 | 104 | def certificate( 105 | self, 106 | certificator_env: dict[int, list[int]] 107 | ) -> dict[int, list[int]]: 108 | """ 109 | Compute the certificate of `NOT`, and set this attribute. 110 | 111 | For `NOT` nodes, first certificate the `expression` children node, and 112 | then the `NOT` node itself. 113 | 114 | Parameters 115 | ---------- 116 | certificator_env : dict[int, list[int]] 117 | The certificators's environment, that maps variables IDs to 118 | encodings of their types. 119 | 120 | Returns 121 | ------- 122 | certificator_env : dict[int, list[int]] 123 | The updated certificator's environment, with any additional 124 | information about the variable's types it might have captured. 125 | """ 126 | 127 | # Certificate the negated `expression` 128 | certificator_env = self.expression.certificate(certificator_env) 129 | expression_certificate_label = self.expression.get_certificate_label() 130 | 131 | self.certificate_label = [ 132 | *expression_certificate_label, 133 | f"{self.symbol}", 134 | ] 135 | 136 | return certificator_env 137 | -------------------------------------------------------------------------------- /src/runner.py: -------------------------------------------------------------------------------- 1 | """Generate a runner for Charon programs.""" 2 | 3 | from copy import deepcopy 4 | 5 | from src.abstract_syntax_tree import AbstractSyntaxTree 6 | from src.certificators import BackendCertificator, FrontendCertificator 7 | from src.code_generator import CodeGenerator 8 | from src.lexer import Lexer 9 | from src.virtual_machine import VirtualMachine 10 | 11 | 12 | class Charon: 13 | """ 14 | This class represents an instance of a program in [C]haron. 15 | 16 | An instance is composed by the parsed source (i.e., the output from the 17 | `Lexer`), the Abstract Syntax Tree, a Virtual Machine, and the frontend and 18 | backend certificators. 19 | 20 | The goal of this class is to centralize all of this generated metadata in a 21 | single object. 22 | 23 | Parameters 24 | ---------- 25 | parsed_source : dict[str, dict] 26 | The source code after being tokenized and parsed by the Lexer. 27 | ast : AbstractSyntaxTree 28 | The Abstract Syntax Tree of this progrma. 29 | program : dict[str, dict] 30 | The compiled program, to be executed by the Virtual Machine. 31 | vm : VirtualMachine 32 | An instance of `VirtualMachine` loaded with the `program`. 33 | frontend_certificator : FrontendCertificator 34 | An instance of `FrontendCertificator` loaded with the `ast`. 35 | backend_certificator : BackendCertificator 36 | An instance of `BackendCertificator` loaded with the `program`. 37 | """ 38 | 39 | def __init__( 40 | self, 41 | parsed_source: dict[str, dict], 42 | ast: AbstractSyntaxTree, 43 | code_generator: CodeGenerator, 44 | program: dict[str, dict], 45 | vm: VirtualMachine, 46 | frontend_certificator: FrontendCertificator, 47 | backend_certificator: BackendCertificator, 48 | ) -> None: 49 | self.parsed_source = parsed_source 50 | self.ast = ast 51 | self.code_generator = code_generator 52 | self.program = program 53 | self.vm = vm 54 | self.frontend_certificator = frontend_certificator 55 | self.backend_certificator = backend_certificator 56 | 57 | def get_parsed_source(self) -> dict[str, dict]: 58 | """Get the `parsed_source` attribute.""" 59 | 60 | return self.parsed_source 61 | 62 | def get_ast(self) -> AbstractSyntaxTree: 63 | """Get the `ast` attribute.""" 64 | 65 | return self.ast 66 | 67 | def get_code_generator(self) -> CodeGenerator: 68 | """Get the `code_generator` attribute.""" 69 | 70 | return self.code_generator 71 | 72 | def get_program(self) -> dict[str, dict]: 73 | """Get the `program` attribute.""" 74 | 75 | return self.program 76 | 77 | def get_vm(self) -> VirtualMachine: 78 | """Get the `vm` attribute.""" 79 | 80 | return self.vm 81 | 82 | def get_frontend_certificator(self) -> FrontendCertificator: 83 | """Get the `frontend_certificator` attribute.""" 84 | 85 | return self.frontend_certificator 86 | 87 | def get_backend_certificator(self) -> BackendCertificator: 88 | """Get the `backend_certificator` attribute.""" 89 | 90 | return self.backend_certificator 91 | 92 | 93 | def create_instance(source_code: str) -> Charon: 94 | """ 95 | Create an instance that certificates and runs the input `source_code`. 96 | 97 | Parameters 98 | ---------- 99 | source_code : str 100 | The source code to parse and load on the Virtual Machine. 101 | 102 | Returns 103 | ------- 104 | instance : Charon 105 | An instance of this [C]haron program. 106 | """ 107 | 108 | lexer = Lexer(source_code=source_code) 109 | parsed_source = lexer.parse_source_code() 110 | 111 | _parsed_source = deepcopy(parsed_source) 112 | ast = AbstractSyntaxTree(source_code=_parsed_source) 113 | ast.build() 114 | 115 | generator = CodeGenerator(root=ast.get_root()) 116 | program = generator.generate_code() 117 | 118 | vm = VirtualMachine(program=program) 119 | 120 | frontend_certificator = FrontendCertificator(ast=ast) 121 | backend_certificator = BackendCertificator(program=program) 122 | 123 | _instance = { 124 | "parsed_source": parsed_source, 125 | "ast": ast, 126 | "code_generator": generator, 127 | "program": program, 128 | "vm": vm, 129 | "frontend_certificator": frontend_certificator, 130 | "backend_certificator": backend_certificator, 131 | } 132 | 133 | return Charon(**_instance) 134 | -------------------------------------------------------------------------------- /src/ast_nodes/conditionals/WHILE.py: -------------------------------------------------------------------------------- 1 | """Representation of WHILE nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing import Union 4 | 5 | from typing_extensions import override 6 | 7 | from src.ast_nodes.node import Node 8 | from src.ast_nodes.conditionals.conditional import Conditional 9 | from src.utils import SYMBOLS_MAP 10 | 11 | 12 | class WHILE(Conditional): 13 | """ 14 | Implement the representation of a `WHILE` loop for the AST. 15 | 16 | This class simply is an interface for the `Conditional` class, renaming 17 | the `statement_if_true` to `loop`. 18 | 19 | Parameters 20 | ---------- 21 | parenthesis_expression : Node 22 | The node representation of the expression to be evaluated. 23 | loop : Node 24 | The node representation of code to run while the 25 | `parenthesis_expression` evaluates to `True`. 26 | """ 27 | 28 | @override 29 | def __init__(self, parenthesis_expression: Node, loop: Node) -> None: 30 | super().__init__(parenthesis_expression, loop) 31 | 32 | self.boundary_symbol = SYMBOLS_MAP["WHILE_END"] 33 | 34 | @override 35 | def generate_code( 36 | self, register: int, environment: dict[str, dict[int, str]] 37 | ) -> tuple[ 38 | list[dict[str, Union[int, str, float]]], 39 | int, 40 | dict[int, str] 41 | ]: 42 | """ 43 | Generate the code associated with this `WHILE`. 44 | 45 | For this node specialization, generate code from the 46 | `parenthesis_expression` first, add a conditional jump to the last 47 | instruction of the `loop` subtree -- i.e., to jump to if the 48 | `parenthesis_expression` evaluates to `False`; in other words, to 49 | skip the conditional code -- and then from the `loop`. After the `loop` 50 | instructions, add an unconditional jump to the `parenthesis_expression` 51 | for it to be reevaluated. 52 | 53 | Parameters 54 | ---------- 55 | register : int 56 | The number of the register to be used by the code generated by this 57 | Node. 58 | environment : dict[int, str] 59 | The compiler's environment, that maps variables IDs to memory 60 | addresses and function IDs to instructions indices. 61 | 62 | Returns 63 | ------- 64 | code : list of dict 65 | Return a list of dictionaries containing code metadata: the register 66 | to be used, the related `instruction` and its metadata. 67 | register : int 68 | The number of the next register available. 69 | environment : dict[int, str] 70 | The updated {var_id: address} environment mapping. 71 | """ 72 | 73 | ( 74 | parenthesis_expression_code, 75 | register, 76 | environment 77 | ) = self.parenthesis_expression.generate_code( 78 | register=register, 79 | environment=environment 80 | ) 81 | conditional_register: int = register - 1 82 | 83 | ( 84 | loop_code, 85 | register, 86 | environment 87 | ) = self.statement_if_true.generate_code( 88 | register=register, 89 | environment=environment 90 | ) 91 | 92 | # Conditional jump to leave the loop if the `parenthesis_expression` 93 | # evaluates to `False` (add 2 to land right after the unconditional 94 | # jump added later on) 95 | instructions_to_jump_over_loop = len(loop_code) + 2 96 | conditional_jump = { 97 | "instruction": "JZ", 98 | "metadata": { 99 | "conditional_register": conditional_register, 100 | "jump_size": instructions_to_jump_over_loop, 101 | }, 102 | } 103 | 104 | # Unconditional jump to go back to the `parenthesis_expression` 105 | # evaluation 106 | instructions_to_jump_back_to_expression = 0 - ( 107 | len(parenthesis_expression_code) + len(loop_code) + 1 108 | ) 109 | unconditional_jump = { 110 | "instruction": "JZ", 111 | "metadata": { 112 | "conditional_register": "zero", 113 | "jump_size": instructions_to_jump_back_to_expression, 114 | }, 115 | } 116 | 117 | while_code: list[dict[str, Union[int, str]]] = [ 118 | *parenthesis_expression_code, 119 | conditional_jump, 120 | *loop_code, 121 | unconditional_jump, 122 | ] 123 | 124 | return while_code, register, environment 125 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # VS Code stuff 2 | .vscode/ 3 | 4 | # Local tests/experiments 5 | debug/ 6 | notebooks/ 7 | 8 | # Documentation 9 | docs/ 10 | 11 | # Prerequisites 12 | *.d 13 | 14 | # Object files 15 | *.o 16 | *.ko 17 | *.obj 18 | *.elf 19 | 20 | # Linker output 21 | *.ilk 22 | *.map 23 | *.exp 24 | 25 | # Precompiled Headers 26 | *.gch 27 | *.pch 28 | 29 | # Libraries 30 | *.lib 31 | *.a 32 | *.la 33 | *.lo 34 | 35 | # Shared objects (inc. Windows DLLs) 36 | *.dll 37 | *.so 38 | *.so.* 39 | *.dylib 40 | 41 | # Executables 42 | *.exe 43 | *.out 44 | *.app 45 | *.i*86 46 | *.x86_64 47 | *.hex 48 | 49 | # Debug files 50 | *.dSYM/ 51 | *.su 52 | *.idb 53 | *.pdb 54 | 55 | # Kernel Module Compile Results 56 | *.mod* 57 | *.cmd 58 | .tmp_versions/ 59 | modules.order 60 | Module.symvers 61 | Mkfile.old 62 | dkms.conf 63 | 64 | # I/O files 65 | input 66 | output 67 | *.result 68 | 69 | # Byte-compiled / optimized / DLL files 70 | __pycache__/ 71 | *.py[cod] 72 | *$py.class 73 | 74 | # C extensions 75 | *.so 76 | 77 | # Distribution / packaging 78 | .Python 79 | build/ 80 | develop-eggs/ 81 | dist/ 82 | downloads/ 83 | eggs/ 84 | .eggs/ 85 | lib/ 86 | lib64/ 87 | parts/ 88 | sdist/ 89 | var/ 90 | wheels/ 91 | share/python-wheels/ 92 | *.egg-info/ 93 | .installed.cfg 94 | *.egg 95 | MANIFEST 96 | 97 | # PyInstaller 98 | # Usually these files are written by a python script from a template 99 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 100 | *.manifest 101 | *.spec 102 | 103 | # Installer logs 104 | pip-log.txt 105 | pip-delete-this-directory.txt 106 | 107 | # Unit test / coverage reports 108 | htmlcov/ 109 | .tox/ 110 | .nox/ 111 | .coverage 112 | .coverage.* 113 | .cache 114 | nosetests.xml 115 | coverage.xml 116 | *.cover 117 | *.py,cover 118 | .hypothesis/ 119 | .pytest_cache/ 120 | cover/ 121 | 122 | # Translations 123 | *.mo 124 | *.pot 125 | 126 | # Django stuff: 127 | *.log 128 | local_settings.py 129 | db.sqlite3 130 | db.sqlite3-journal 131 | 132 | # Flask stuff: 133 | instance/ 134 | .webassets-cache 135 | 136 | # Scrapy stuff: 137 | .scrapy 138 | 139 | # Sphinx documentation 140 | docs/_build/ 141 | 142 | # PyBuilder 143 | .pybuilder/ 144 | target/ 145 | 146 | # Jupyter Notebook 147 | .ipynb_checkpoints 148 | 149 | # IPython 150 | profile_default/ 151 | ipython_config.py 152 | 153 | # pyenv 154 | # For a library or package, you might want to ignore these files since the code is 155 | # intended to run in multiple environments; otherwise, check them in: 156 | # .python-version 157 | 158 | # pipenv 159 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 160 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 161 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 162 | # install all needed dependencies. 163 | #Pipfile.lock 164 | 165 | # poetry 166 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 167 | # This is especially recommended for binary packages to ensure reproducibility, and is more 168 | # commonly ignored for libraries. 169 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 170 | #poetry.lock 171 | 172 | # pdm 173 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 174 | #pdm.lock 175 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 176 | # in version control. 177 | # https://pdm.fming.dev/#use-with-ide 178 | .pdm.toml 179 | 180 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 181 | __pypackages__/ 182 | 183 | # Celery stuff 184 | celerybeat-schedule 185 | celerybeat.pid 186 | 187 | # SageMath parsed files 188 | *.sage.py 189 | 190 | # Environments 191 | .env 192 | .venv 193 | env/ 194 | venv/ 195 | ENV/ 196 | env.bak/ 197 | venv.bak/ 198 | 199 | # Spyder project settings 200 | .spyderproject 201 | .spyproject 202 | 203 | # Rope project settings 204 | .ropeproject 205 | 206 | # mkdocs documentation 207 | /site 208 | 209 | # mypy 210 | .mypy_cache/ 211 | .dmypy.json 212 | dmypy.json 213 | 214 | # Pyre type checker 215 | .pyre/ 216 | 217 | # pytype static type analyzer 218 | .pytype/ 219 | 220 | # Cython debug symbols 221 | cython_debug/ 222 | 223 | # PyCharm 224 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 225 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 226 | # and can be added to the global gitignore or merged into this file. For a more nuclear 227 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 228 | #.idea/ -------------------------------------------------------------------------------- /src/ast_nodes/functions/PARAM.py: -------------------------------------------------------------------------------- 1 | """Representation of PARAM nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing import Union 4 | 5 | from typing_extensions import override 6 | 7 | from src.ast_nodes.variables.VAR_DEF import VAR_DEF 8 | from src.utils import TYPE_SYMBOLS_MAP 9 | 10 | 11 | class PARAM(VAR_DEF): 12 | """ 13 | Implement the representation of a function parameter for the AST. 14 | 15 | This is pretty much the same as a VAR_DEF. It just has a different name to 16 | make the AST easier to read, and generates a `STORE` instruction in order to 17 | save received argument into the parameter's memory location. 18 | 19 | Parameters 20 | ---------- 21 | variable_metadata : dict 22 | Dictionary of parameter metadata exported by the Lexer. 23 | """ 24 | 25 | @override 26 | def __init__(self, variable_metadata: dict) -> None: 27 | super().__init__(variable_metadata) 28 | 29 | @override 30 | def generate_code( 31 | self, register: int, environment: dict[str, dict[int, str]] 32 | ) -> tuple[ 33 | list[dict[str, Union[int, str, float]]], 34 | int, 35 | dict[int, str] 36 | ]: 37 | """ 38 | Generate the code associated with this `PARAM`. 39 | 40 | For this node specialization, update the environment to create a new 41 | variable, push its address to a `CONSTANT`, and add a `STORE` 42 | instruction to save the received argument value into the parameter. 43 | 44 | Parameters 45 | ---------- 46 | register : int 47 | The number of the register to be used by the code generated by this 48 | Node. 49 | environment : dict[int, str] 50 | The compiler's environment, that maps variables IDs to memory 51 | addresses and function IDs to instructions indices. 52 | 53 | Returns 54 | ------- 55 | code : list of dict 56 | Return a list of dictionaries containing code metadata: the register 57 | to be used, the related `instruction` and its metadata. 58 | register : int 59 | The number of the next register available. 60 | environment : dict[int, str] 61 | The updated {var_id: address} environment mapping. 62 | """ 63 | 64 | code: list[dict[str, Union[int, str]]] = [] 65 | 66 | # Update the environment with the variable address 67 | _, _, environment = super().generate_code( 68 | register=register, 69 | environment=environment 70 | ) 71 | 72 | allocated_address = environment["variables"][self.value]["address"] 73 | 74 | # Emit a `CONSTANT` instruction with the address of the variable 75 | var_address_code = { 76 | "instruction": "CONSTANT", 77 | "metadata": {"register": register, "value": allocated_address} 78 | } 79 | code.append(var_address_code) 80 | 81 | # Store the argument into the parameter's allocated memory. 82 | _store_instruction = "STOREF" if self.type == "float" else "STORE" 83 | parameter_store_code = { 84 | "instruction": _store_instruction, 85 | "metadata": {"register": register, "value": "arg"}, 86 | } 87 | code.append(parameter_store_code) 88 | 89 | return code, register + 1, environment 90 | 91 | @override 92 | def certificate( 93 | self, 94 | certificator_env: dict[int, list[int]] 95 | ) -> dict[int, list[int]]: 96 | """ 97 | Compute the certificate of this variable definition. 98 | 99 | `VAR_DEF` objects will add an entry in the `certificator_env` that maps 100 | the variable's ID to the symbols that encode the type of this 101 | variable. (The entry will be initiated as a sequence of `unknown`, with 102 | `self.size` elements.) 103 | 104 | The returned certificate will have a placeholder to represent the type 105 | of this variable that will be later filled by the `certificator`. 106 | 107 | Parameters 108 | ---------- 109 | certificator_env : dict[int, list[int]] 110 | The certificators's environment, that maps variables IDs to 111 | encodings of their types. 112 | 113 | Returns 114 | ------- 115 | certificator_env : dict[int, list[int]] 116 | The updated certificator's environment, with any additional 117 | information about the variable's types it might have captured. 118 | """ 119 | 120 | certificator_env = super().certificate(certificator_env) 121 | 122 | # Tag this variable as a parameter 123 | certificator_env[self.id]["type"] = [self.type] 124 | certificator_env[self.id]["parameter"] = True 125 | certificator_env[self.id]["active"] = True 126 | 127 | _type_symbol = TYPE_SYMBOLS_MAP[self.type]['type_symbol'] 128 | self.certificate_label = [f"({self.symbol})^({_type_symbol})"] 129 | 130 | return certificator_env 131 | -------------------------------------------------------------------------------- /src/ast_nodes/basic/SEQ.py: -------------------------------------------------------------------------------- 1 | """Representation of SEQ nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing import Union 4 | 5 | from typing_extensions import override 6 | 7 | from src.ast_nodes.node import Node 8 | 9 | 10 | class SEQ(Node): 11 | """ 12 | Implement the representation of a sequence of statements for the AST. 13 | 14 | The node doesn't have any semantics itself, `instruction`, or has a 15 | certificate. Its only purpose is to help structuring the AST. 16 | """ 17 | 18 | @override 19 | def __init__(self, **kwargs) -> None: 20 | super().__init__(uses_register=False) 21 | 22 | self.children: list[Node] = [] 23 | 24 | def add_child(self, child: Node) -> None: 25 | """ 26 | Add a child Node to the `self.children` list. 27 | 28 | Parameters 29 | ---------- 30 | child : Node 31 | The child to be added to the list. 32 | """ 33 | 34 | self.children.append(child) 35 | 36 | @override 37 | def get_certificate_label(self) -> list[str]: 38 | """ 39 | Get the contents of `certificate_label`. 40 | 41 | For `SEQ` nodes, obtain the certificate from the `children` subtrees, 42 | recursively. The `SEQ` node itself does not have a certificate. 43 | 44 | Returns 45 | ------- 46 | : list of str 47 | A list containing the certificate label of the `Node`. 48 | """ 49 | 50 | certificate_label: list[str] = [] 51 | 52 | for child in self.children: 53 | certificate_label.extend(child.get_certificate_label()) 54 | 55 | return certificate_label 56 | 57 | @override 58 | def print(self, indent: int = 0) -> None: 59 | """ 60 | Print the string representation of this `Conditional`. 61 | 62 | The node itself is aligned with `indent`, and its children are padded 63 | with an additional left space. 64 | 65 | Parameters 66 | ---------- 67 | indent : int (optional, default = 0) 68 | The number of left padding spaces to indent. 69 | """ 70 | 71 | super().print(indent) 72 | 73 | for child in self.children: 74 | child.print(indent + 1) 75 | 76 | @override 77 | def generate_code( 78 | self, register: int, environment: dict[str, dict[int, str]] 79 | ) -> tuple[ 80 | list[dict[str, Union[int, str, float]]], 81 | int, 82 | dict[int, str] 83 | ]: 84 | """ 85 | Generate the code associated with this `SEQ`. 86 | 87 | For this node specialization, return a list with the children's code, 88 | generated in the same order as they appear in the `children` attribute. 89 | The `SEQ` node itself does not generate code, for it has no associated 90 | `instruction`. 91 | 92 | Parameters 93 | ---------- 94 | register : int 95 | The number of the register to be used by the code generated by this 96 | Node. 97 | environment : dict[int, str] 98 | The compiler's environment, that maps variables IDs to memory 99 | addresses and function IDs to instructions indices. 100 | 101 | Returns 102 | ------- 103 | code : list of dict 104 | Return a list of dictionaries containing code metadata: the register 105 | to be used, the related `instruction` and its metadata. 106 | register : int 107 | The number of the next register available. 108 | environment : dict[int, str] 109 | The updated {var_id: address} environment mapping. 110 | """ 111 | 112 | code: list[dict[str, Union[int, str, None]]] = [] 113 | 114 | for child in self.children: 115 | child_code, register, environment = child.generate_code( 116 | register=register, 117 | environment=environment 118 | ) 119 | code.extend(child_code) 120 | 121 | return code, register, environment 122 | 123 | @override 124 | def certificate( 125 | self, 126 | certificator_env: dict[int, list[int]] 127 | ) -> dict[int, list[int]]: 128 | """ 129 | Compute the certificate of the current `SEQ`, and set this attribute. 130 | 131 | For `SEQ` nodes, certificate the child nodes in the same order as they 132 | appear in the `children` list. The `SEQ` node itself is not certified. 133 | 134 | Parameters 135 | ---------- 136 | certificator_env : dict[int, list[int]] 137 | The updated certificator's environment, with any additional 138 | information about the variable's types it might have captured. 139 | 140 | Returns 141 | ------- 142 | certificator_env : dict[int, list[int]] 143 | The updated certificator's environment, with any additional 144 | information about the variable's types it might have captured. 145 | """ 146 | 147 | for child in self.children: 148 | certificator_env = child.certificate(certificator_env) 149 | 150 | return certificator_env 151 | -------------------------------------------------------------------------------- /src/ast_nodes/variables/STRUCT_DEF.py: -------------------------------------------------------------------------------- 1 | """Representation of STRUCT_DEF nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing import Union 4 | 5 | from typing_extensions import override 6 | 7 | from src.ast_nodes.node import Node 8 | 9 | 10 | class STRUCT_DEF(Node): 11 | """ 12 | Implement the representation of a struct definition for the AST. 13 | 14 | Parameters 15 | ---------- 16 | struct_metadata : dict 17 | Dictionary of struct metadata exported by the Lexer. 18 | """ 19 | 20 | @override 21 | def __init__(self, struct_metadata: dict[str, dict[str, dict]]) -> None: 22 | id: int = struct_metadata.get("id") 23 | _type: str = struct_metadata.get("type") 24 | 25 | super().__init__(id, type=_type) 26 | 27 | self.active: bool = struct_metadata.get("active") 28 | self.struct_metadata = struct_metadata 29 | 30 | @override 31 | def print(self, indent: int = 0) -> None: 32 | """ 33 | Print the string representation of this `STRUCT_DEF`. 34 | 35 | The node itself is aligned with `indent`, and the information about its 36 | attributes is padded with an additional left space. 37 | 38 | Parameters 39 | ---------- 40 | indent : int (optional, default = 0) 41 | The number of left padding spaces to indent. 42 | """ 43 | 44 | super().print(indent) 45 | 46 | _attribute_types = ", ".join(self.get_attribute_types()) 47 | 48 | print(f"{' ' * (indent + 1)} Attributes: {_attribute_types}") 49 | 50 | @override 51 | def certificate( 52 | self, 53 | certificator_env: dict[int, list[int]] 54 | ) -> dict[int, list[int]]: 55 | """ 56 | Compute the certificate of this `STRUCT_DEF`. 57 | 58 | `STRUCT_DEF` items does not have a certificate itself. 59 | 60 | Parameters 61 | ---------- 62 | certificator_env : dict[int, list[int]] 63 | The certificators's environment, that maps variables IDs to 64 | encodings of their types. 65 | 66 | Returns 67 | ------- 68 | certificator_env : dict[int, list[int]] 69 | The updated certificator's environment, with any additional 70 | information about the variable's types it might have captured. 71 | """ 72 | 73 | return certificator_env 74 | 75 | @override 76 | def generate_code( 77 | self, register: int, environment: dict[str, dict[int, str]] 78 | ) -> tuple[ 79 | list[dict[str, Union[int, str, float]]], 80 | int, 81 | dict[int, str] 82 | ]: 83 | """ 84 | Generate the code associated with this `STRUCT_DEF`. 85 | 86 | For this node specialization, there is no code to be generated -- the 87 | struct definition is just an abstraction. Still, it takes a `register` 88 | as parameter, but returns it without incrementing it. 89 | 90 | Parameters 91 | ---------- 92 | register : int 93 | The number of the register to be used by the code generated by this 94 | Node. 95 | environment : dict[int, str] 96 | The compiler's environment, that maps variables IDs to memory 97 | addresses and function IDs to instructions indices. 98 | 99 | Returns 100 | ------- 101 | code : list of dict 102 | Return a list of dictionaries containing code metadata: the register 103 | to be used, the related `instruction` and its metadata. 104 | register : int 105 | The number of the next register available. 106 | environment : dict[int, str] 107 | The updated {var_id: address} environment mapping. 108 | """ 109 | 110 | return [], register, environment 111 | 112 | def get_symbol(self) -> str: 113 | """ 114 | Get the `symbol` attribute from this `STRUCT_DEF`. 115 | 116 | Returns 117 | ------- 118 | : str 119 | The `symbol` attribute. 120 | """ 121 | 122 | return self.symbol 123 | 124 | def get_attribute_types(self) -> list[str]: 125 | """ 126 | Get the types of the attributes of this struct. 127 | 128 | The types are returned in the same order as they have been declared in 129 | the struct definition. 130 | 131 | Returns 132 | ------- 133 | attribute_types : list[str] 134 | A list containing the attributes types. 135 | """ 136 | 137 | attribute_types: list[str] = [ 138 | attribute.get("type") 139 | for attribute in self.struct_metadata.get("attributes").values() 140 | ] 141 | 142 | return attribute_types 143 | 144 | def is_active(self) -> bool: 145 | """ 146 | Tell whether this struct definition is `active` or not. 147 | 148 | A struct is `active` if at least one variable of its type has been 149 | defined in the source code. 150 | 151 | Returns 152 | ------- 153 | active : bool 154 | Wheter the struct is active or not. 155 | """ 156 | 157 | return self.active 158 | -------------------------------------------------------------------------------- /src/ast_nodes/functions/ARG.py: -------------------------------------------------------------------------------- 1 | """Representation of ARG nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing import Union 4 | 5 | from typing_extensions import override 6 | 7 | from src.ast_nodes.basic.CST import CST 8 | from src.ast_nodes.node import Node 9 | from src.ast_nodes.variables.VAR import VAR 10 | from src.utils import type_cast 11 | 12 | 13 | class ARG(Node): 14 | """ 15 | Implement the representation of a function argument for the AST. 16 | 17 | Parameters 18 | ---------- 19 | argument_value : Union[CST, VAR] 20 | The node representation of this argument. 21 | parameter_type : str 22 | The type the parameter expects from the argument. 23 | """ 24 | 25 | @override 26 | def __init__(self, argument_value: Union[CST, VAR], parameter_type: str) -> None: 27 | super().__init__() 28 | 29 | self.argument_value: Union[CST, VAR] = argument_value 30 | self.parameter_type: str = parameter_type 31 | 32 | @override 33 | def print(self, indent: int = 0) -> None: 34 | """ 35 | Print the string representation of this `ARG`. 36 | 37 | The node itself is aligned with `indent`, and `argument_value` is padded 38 | with an additional left space. 39 | 40 | Parameters 41 | ---------- 42 | indent : int (optional, default = 0) 43 | The number of left padding spaces to indent. 44 | """ 45 | 46 | super().print(indent) 47 | 48 | self.argument_value.print(indent=indent + 1) 49 | 50 | @override 51 | def generate_code( 52 | self, register: int, environment: dict[str, dict[int, str]] 53 | ) -> tuple[ 54 | list[dict[str, Union[int, str, float]]], 55 | int, 56 | dict[int, str] 57 | ]: 58 | """ 59 | Generate the code associated with this `ARG`. 60 | 61 | For this node specialization, generate code from the `argument_value` 62 | node first, and then generate a `MOV` instruction to save the 63 | `argument_value` in the `arg` register. 64 | 65 | Parameters 66 | ---------- 67 | register : int 68 | The number of the register to be used by the code generated by this 69 | Node. 70 | environment : dict[int, str] 71 | The compiler's environment, that maps variables IDs to memory 72 | addresses and function IDs to instructions indices. 73 | 74 | Returns 75 | ------- 76 | code : list of dict 77 | Return a list of dictionaries containing code metadata: the register 78 | to be used, the related `instruction` and its metadata. 79 | register : int 80 | The number of the next register available. 81 | environment : dict[int, str] 82 | The updated {var_id: address} environment mapping. 83 | """ 84 | 85 | code: list[dict] = [] 86 | 87 | ( 88 | argument_value_code, 89 | register, 90 | environment 91 | ) = self.argument_value.generate_code( 92 | register=register, 93 | environment=environment 94 | ) 95 | code.extend(argument_value_code) 96 | 97 | # Type cast the argument value to the type the parameter expects, if 98 | # they are not the same 99 | if self.argument_value.get_type() != self.parameter_type: 100 | # Type cast emitter doesn't change the {var_id: address} environment 101 | arg_typecast, register = type_cast( 102 | original_type=self.argument_value.get_type(), 103 | target_type=self.parameter_type, 104 | register=register, 105 | ) 106 | code.extend(arg_typecast) 107 | 108 | # ARG must point to the same register that contains the `argument_value` 109 | argument_value_register = register - 1 110 | argument_store_code = { 111 | "instruction": "MOV", 112 | "metadata": { 113 | "register": "arg", 114 | "value": argument_value_register, 115 | }, 116 | } 117 | code.append(argument_store_code) 118 | 119 | return code, register, environment 120 | 121 | @override 122 | def certificate( 123 | self, 124 | certificator_env: dict[int, list[int]] 125 | ) -> dict[int, list[int]]: 126 | """ 127 | Compute the certificate of the this `ARG`, and set this attribute. 128 | 129 | For `ARG` nodes, certificate the `argument_value` node first, and then 130 | the `ARG` itself. 131 | 132 | Parameters 133 | ---------- 134 | certificator_env : dict[int, list[int]] 135 | The certificators's environment, that maps variables IDs to 136 | encodings of their types. 137 | 138 | Returns 139 | ------- 140 | certificator_env : dict[int, list[int]] 141 | The updated certificator's environment, with any additional 142 | information about the variable's types it might have captured. 143 | """ 144 | 145 | certificator_env = self.argument_value.certificate(certificator_env) 146 | _argument_value_certificate = self.argument_value.get_certificate_label() 147 | 148 | self.certificate_label = [*_argument_value_certificate, f"{self.symbol}"] 149 | 150 | return certificator_env 151 | -------------------------------------------------------------------------------- /src/ast_nodes/basic/PROG.py: -------------------------------------------------------------------------------- 1 | """Representation of PROG nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing import Union 4 | 5 | from typing_extensions import override 6 | 7 | from src.ast_nodes.node import Node 8 | 9 | 10 | class PROG(Node): 11 | """Implement the representation of the beginning of a program for the AST.""" 12 | 13 | @override 14 | def __init__(self) -> None: 15 | super().__init__(uses_register=False) 16 | 17 | self.instruction: str = "HALT" 18 | self.children: list[Node] = [] 19 | 20 | def add_child(self, child: Node) -> None: 21 | """ 22 | Add a child Node to the `self.children` list. 23 | 24 | Parameters 25 | ---------- 26 | child : Node 27 | The child to be added to the list. 28 | """ 29 | 30 | self.children.append(child) 31 | 32 | @override 33 | def get_certificate_label(self) -> list[str]: 34 | """ 35 | Get the contents of `certificate_label`. 36 | 37 | For `PROG` nodes, first obtain the certificate from each `child` 38 | subtree, recursively, and then from the `PROG` node itself. 39 | 40 | Returns 41 | ------- 42 | : list of str 43 | A list containing the certificate label of the `Node`. 44 | """ 45 | 46 | certificate_label: list[str] = [] 47 | 48 | for child in self.children: 49 | certificate_label.extend( 50 | [label for label in child.get_certificate_label() if label is not None] 51 | ) 52 | 53 | certificate_label.extend(super().get_certificate_label()) 54 | 55 | return certificate_label 56 | 57 | @override 58 | def print(self, indent: int = 0) -> None: 59 | """ 60 | Print the string representation of this `PROG`. 61 | 62 | The node itself is aligned with `indent`, and its children are padded 63 | with an additional left space. 64 | 65 | Parameters 66 | ---------- 67 | indent : int (optional, default = 0) 68 | The number of left padding spaces to indent. 69 | """ 70 | 71 | super().print(indent) 72 | 73 | for child in self.children: 74 | child.print(indent + 1) 75 | 76 | @override 77 | def generate_code( 78 | self, register: int, environment: dict[str, dict[int, str]] 79 | ) -> tuple[ 80 | list[dict[str, Union[int, str, float]]], 81 | int, 82 | dict[int, str] 83 | ]: 84 | """ 85 | Generate the code associated with this `PROG`. 86 | 87 | For this node specialization, generate code from the `first_statement` 88 | (i.e., the program itself) and then add an ending instruction (`HALT`) 89 | to the code. 90 | 91 | Notice that the register may only be incremented by the parameter nodes' 92 | `generate_code` method. This Node does not increment the `register`, 93 | as it only adds a `HALT` to the instructions list. 94 | 95 | Parameters 96 | ---------- 97 | register : int 98 | The number of the register to be used by the code generated by this 99 | Node. 100 | environment : dict[int, str] 101 | The compiler's environment, that maps variables IDs to memory 102 | addresses and function IDs to instructions indices. 103 | 104 | Returns 105 | ------- 106 | code : list of dict 107 | Return a list of dictionaries containing code metadata: the register 108 | to be used, the related `instruction` and its metadata. 109 | register : int 110 | The number of the next register available. 111 | environment : dict[int, str] 112 | The updated {var_id: address} environment mapping. 113 | """ 114 | 115 | code: list[dict[str, Union[int, str, None]]] = [] 116 | 117 | for child in self.children: 118 | child_code, register, environment = child.generate_code( 119 | register=register, 120 | environment=environment 121 | ) 122 | code.extend(child_code) 123 | 124 | this_code, _, _ = super().generate_code( 125 | register=register, 126 | environment=environment 127 | ) 128 | code.extend(this_code) 129 | 130 | return code, register, environment 131 | 132 | @override 133 | def certificate( 134 | self, 135 | certificator_env: dict[int, list[int]] 136 | ) -> dict[int, list[int]]: 137 | """ 138 | Compute the certificate of `PROG`, and set this attribute. 139 | 140 | For `PROG` nodes, certificate the `first_statement`, recursively, and 141 | then the `PROG` node itself. 142 | 143 | Parameters 144 | ---------- 145 | certificator_env : dict[int, list[int]] 146 | The certificators's environment, that maps variables IDs to 147 | encodings of their types. 148 | 149 | Returns 150 | ------- 151 | certificator_env : dict[int, list[int]] 152 | The updated certificator's environment, with any additional 153 | information about the variable's types it might have captured. 154 | """ 155 | 156 | for child in self.children: 157 | certificator_env = child.certificate(certificator_env) 158 | 159 | return super().certificate(certificator_env) 160 | -------------------------------------------------------------------------------- /src/ast_nodes/variables/VAR_DEF.py: -------------------------------------------------------------------------------- 1 | """Representation of VAR_DEF nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing import Union 4 | 5 | from typing_extensions import override 6 | 7 | from src.ast_nodes.node import Node 8 | from src.utils import get_variable_size 9 | 10 | 11 | class VAR_DEF(Node): 12 | """ 13 | Implement the representation of a variable definition for the AST. 14 | 15 | Parameters 16 | ---------- 17 | variable_metadata : dict 18 | Dictionary of variable metadata exported by the Lexer. 19 | """ 20 | 21 | @override 22 | def __init__(self, variable_metadata: dict) -> None: 23 | id: int = variable_metadata.get("id") 24 | type: str = variable_metadata.get("type") 25 | 26 | super().__init__(id, type) 27 | 28 | self.id = id 29 | self.variable_metadata: dict = variable_metadata 30 | self.size: int = get_variable_size(variable_metadata) 31 | 32 | @override 33 | def print(self, indent: int = 0) -> None: 34 | """ 35 | Print the string representation of this `VAR_DEF`. 36 | 37 | The node itself is aligned with `indent`, and the information about its 38 | attributes is padded with an additional left space. 39 | 40 | Parameters 41 | ---------- 42 | indent : int (optional, default = 0) 43 | The number of left padding spaces to indent. 44 | """ 45 | 46 | super().print(indent) 47 | 48 | var_def_metadata: str = f"Name: {self.variable_metadata.get('name')}, " 49 | var_def_metadata += f"Type: {self.variable_metadata.get('type')}" 50 | 51 | if self.variable_metadata.get("length"): 52 | var_def_metadata += ( 53 | f" (array), Length: {self.variable_metadata.get('length')}" 54 | ) 55 | 56 | print(f"{' ' * (indent + 1)}{var_def_metadata}") 57 | 58 | @override 59 | def generate_code( 60 | self, register: int, environment: dict[str, dict[int, str]] 61 | ) -> tuple[ 62 | list[dict[str, Union[int, str, float]]], 63 | int, 64 | dict[int, str] 65 | ]: 66 | """ 67 | Generate the code associated with this `VAR_DEF`. 68 | 69 | There is no code associated with this operation: it will simply update 70 | the environment. 71 | 72 | Parameters 73 | ---------- 74 | register : int 75 | The number of the register to be used by the code generated by this 76 | Node. 77 | environment : dict[int, str] 78 | The compiler's environment, that maps variables IDs to memory 79 | addresses and function IDs to instructions indices. 80 | 81 | Returns 82 | ------- 83 | code : list of dict 84 | Return a list of dictionaries containing code metadata: the register 85 | to be used, the related `instruction` and its metadata. 86 | register : int 87 | The number of the next register available. 88 | environment : dict[int, str] 89 | The updated {var_id: address} environment mapping. 90 | """ 91 | 92 | # If no variables are defined in the environment, the dict will be empty 93 | # and we manually set the ID and address. 94 | variable_count = len(environment["variables"]) 95 | 96 | if not variable_count: 97 | new_var_address = hex(0) 98 | else: 99 | last_var_id = list(environment["variables"]).pop() 100 | last_var_address = environment["variables"][last_var_id]["address"] 101 | last_var_size = environment["variables"][last_var_id]["size"] 102 | new_var_address = hex(int(last_var_address, 16) + last_var_size) 103 | 104 | var_id = self.value 105 | environment["variables"][var_id] = { 106 | "address": new_var_address, 107 | "size": self.size 108 | } 109 | 110 | return [], register, environment 111 | 112 | @override 113 | def certificate( 114 | self, 115 | certificator_env: dict[int, list[int]] 116 | ) -> dict[int, list[int]]: 117 | """ 118 | Compute the certificate of this variable definition. 119 | 120 | `VAR_DEF` objects will add an entry in the `certificator_env` that maps 121 | the variable's prime to the symbols that encode the type of this 122 | variable. (The entry will be initiated as a sequence of `unknown`, with 123 | `self.size` elements.) 124 | 125 | The returned certificate will have a placeholder to represent the type 126 | of this variable that will be later filled by the `certificator`. 127 | 128 | Parameters 129 | ---------- 130 | certificator_env : dict[int, list[int]] 131 | The certificators's environment, that maps variables IDs to 132 | encodings of their types. 133 | 134 | Returns 135 | ------- 136 | certificator_env : dict[int, list[int]] 137 | The updated certificator's environment, with any additional 138 | information about the variable's types it might have captured. 139 | """ 140 | 141 | # Add all of the elements to the certificator environment as `unknown` 142 | # TODO: divide by the actual size of the type of this `VAR_DEF` 143 | certificator_env[self.id] = { 144 | "type": [ 145 | "__unknown_type__" 146 | for _ in range(self.size // 4) 147 | ], 148 | "active": False 149 | } 150 | 151 | return certificator_env 152 | -------------------------------------------------------------------------------- /src/inverters/base_inverter.py: -------------------------------------------------------------------------------- 1 | """Implement the base class for the certificate inverter.""" 2 | 3 | from abc import abstractmethod 4 | from typing import Union 5 | 6 | from src.utils import INVERTED_SYMBOLS_MAP 7 | 8 | 9 | class BaseInverter: 10 | """ 11 | Invert a certificate to retrieve the original program in canonical form. 12 | 13 | Parameters 14 | ---------- 15 | certificate : str 16 | The certificate produced from a `Certificator` class. 17 | """ 18 | 19 | def __init__(self, certificate: str) -> None: 20 | self.original_certificate: str = certificate 21 | self.certificate: list[dict[str, Union[str, dict]]] = self._preprocess_certificate() 22 | self.ir: list[dict[str, Union[str, dict]]] = self._get_intermediate_representation() 23 | 24 | def _preprocess_certificate(self) -> list[dict[str, Union[int, list[int]]]]: 25 | """ 26 | Preprocess a certificate to split positional primes and symbols. 27 | 28 | The result is returned as a list of dictionaries. Each dictionary has 29 | three fields: `positional_prime`, `symbol`, and `additional_info`. The 30 | former two will always have contents, while the latter might not. 31 | 32 | Returns 33 | ------- 34 | preprocessed_certificate : list[dict[str, Union[int, list[int]]]] 35 | The preprocessed certificate, as a list of dictionaries. 36 | """ 37 | 38 | split_certificate = [ 39 | list(map(int, token.replace("(", "").replace(")", "").split("^"))) 40 | for token in self.original_certificate.split("*") 41 | ] 42 | 43 | preprocessed_certificate = [ 44 | { 45 | "positional_prime": positional_prime, 46 | "symbol": symbol, 47 | "additional_info": additional_info 48 | } 49 | for positional_prime, symbol, *additional_info in split_certificate 50 | ] 51 | 52 | return preprocessed_certificate 53 | 54 | def _get_intermediate_representation(self) -> list[dict[str, Union[str, dict]]]: 55 | """ 56 | Get an Intermediate Representation (IR) for the encoded program. 57 | 58 | The IR uses a series of more verbose tokens to represent the program. 59 | 60 | Returns 61 | ------- 62 | ir : list[dict[str, Union[str, dict]]] 63 | The Intermediate Representation. 64 | """ 65 | 66 | ir = [] 67 | 68 | op_handlers = { 69 | "CST": self.__handle_constant, 70 | "VAR_DEF": self.__handle_variable_definition, 71 | "VAR_ADDRESS": self.__handle_variable, 72 | "VAR_VALUE": self.__handle_variable, 73 | } 74 | 75 | for certificate_token in self.certificate: 76 | op = INVERTED_SYMBOLS_MAP[certificate_token["symbol"]] 77 | 78 | # Handle symbols that contain additional parameters 79 | if op in op_handlers: 80 | op_handler = op_handlers[op] 81 | op_metadata = op_handler(certificate_token) 82 | 83 | else: 84 | op_metadata = {} 85 | 86 | ir.append({"operation": op, "metadata": op_metadata}) 87 | 88 | return ir 89 | 90 | def __handle_constant(self, certificate_token: dict[str, Union[str, dict]]) -> dict[str, Union[str, dict]]: 91 | """Handle a certificate token that represents a constant.""" 92 | 93 | constant_value = certificate_token["additional_info"].pop() 94 | 95 | # Constants are always added with 1 to prevent the exponentiation identity 96 | constant_value -= 1 97 | 98 | return { 99 | "value": constant_value 100 | } 101 | 102 | def __handle_variable_definition(self, certificate_token: dict[str, Union[str, dict]]) -> dict[str, Union[str, dict]]: 103 | """Handle a certificate token that represents a variable definition.""" 104 | 105 | # TODO: handle types 106 | variable_prime, _, _ = certificate_token["additional_info"] 107 | 108 | return { 109 | "variable_prime": variable_prime, 110 | # "size": size 111 | } 112 | 113 | def __handle_variable(self, certificate_token: dict[str, Union[str, dict]]) -> dict[str, Union[str, dict]]: 114 | """Handle a certificate token that represents a variable.""" 115 | 116 | ( 117 | variable_prime, 118 | _access_type, 119 | _access_offset_or_var 120 | ) = certificate_token["additional_info"] 121 | 122 | # Static access (i.e., array indexed with constant or struct) 123 | if _access_type == 2: 124 | access_type = "static" 125 | 126 | # Offset is added with 1 to prevent the exponentiation identity 127 | offset = _access_offset_or_var - 1 128 | 129 | return { 130 | "variable_prime": variable_prime, 131 | "access_type": access_type, 132 | "offset": offset 133 | } 134 | 135 | # Dynamic access (i.e., array indexed with variable) 136 | access_type = "dynamic" 137 | indexing_variable_prime = _access_offset_or_var 138 | 139 | return { 140 | "variable_prime": variable_prime, 141 | "access_type": access_type, 142 | "indexing_variable_prime": indexing_variable_prime 143 | } 144 | 145 | @abstractmethod 146 | def get_program(self) -> Union[str, dict[str, dict]]: 147 | """Get the canonical form of the program.""" 148 | 149 | raise NotImplementedError 150 | -------------------------------------------------------------------------------- /src/ast_nodes/functions/RET_SYM.py: -------------------------------------------------------------------------------- 1 | """Representation of RET_SYM nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing import Union 4 | 5 | from typing_extensions import override 6 | 7 | from src.ast_nodes.node import Node 8 | from src.utils import type_cast 9 | 10 | 11 | class RET_SYM(Node): 12 | """ 13 | Implement the representation of a function return for the AST. 14 | 15 | Parameters 16 | ---------- 17 | function_call_metadata : dict 18 | Dictionary of function call metadata exported by the Lexer. 19 | type : str 20 | The type of the function this `RET_SYM` belongs to. 21 | """ 22 | 23 | @override 24 | def __init__(self, returned_value: Node, type: str) -> None: 25 | super().__init__(uses_register=False) 26 | 27 | self.returned_value: Node = returned_value 28 | self.type: str = type 29 | 30 | @override 31 | def print(self, indent: int = 0) -> None: 32 | """ 33 | Print the string representation of this `RET_SYM`. 34 | 35 | The node itself is aligned with `indent`, and the `returned_value` child 36 | is padded with an additional left space. 37 | 38 | Parameters 39 | ---------- 40 | indent : int (optional, default = 0) 41 | The number of left padding spaces to indent. 42 | """ 43 | 44 | super().print(indent) 45 | 46 | self.returned_value.print(indent + 1) 47 | 48 | @override 49 | def generate_code( 50 | self, register: int, environment: dict[str, dict[int, str]] 51 | ) -> tuple[ 52 | list[dict[str, Union[int, str, float]]], 53 | int, 54 | dict[int, str] 55 | ]: 56 | """ 57 | Generate the code associated with this `RET_SYM`. 58 | 59 | For this node specialization, generate code from the `returned_value` 60 | child node first, and then from the `RET_SYM` itself. 61 | 62 | Notice that, if `RET_SYM.type` is different from `returned_value.type`, 63 | type cast instructions will be added to the generated code. 64 | 65 | Parameters 66 | ---------- 67 | register : int 68 | The number of the register to be used by the code generated by this 69 | Node. 70 | environment : dict[int, str] 71 | The compiler's environment, that maps variables IDs to memory 72 | addresses and function IDs to instructions indices. 73 | 74 | Returns 75 | ------- 76 | code : list of dict 77 | Return a list of dictionaries containing code metadata: the register 78 | to be used, the related `instruction` and its metadata. 79 | register : int 80 | The number of the next register available. 81 | environment : dict[int, str] 82 | The updated {var_id: address} environment mapping. 83 | """ 84 | 85 | code: list[dict[str, Union[int, str, None]]] = [] 86 | 87 | ( 88 | returned_value_code, 89 | register, 90 | environment 91 | ) = self.returned_value.generate_code( 92 | register=register, 93 | environment=environment 94 | ) 95 | code.extend(returned_value_code) 96 | 97 | # Add a type cast to enforce the returned value to have the same type 98 | # as the function it is located in. 99 | returned_value_type = self.returned_value.get_type() 100 | function_return_type = self.get_type() 101 | 102 | if returned_value_type != function_return_type: 103 | returned_value_typecast, register = type_cast( 104 | original_type=returned_value_type, 105 | target_type=function_return_type, 106 | register=register, 107 | ) 108 | code.extend(returned_value_typecast) 109 | 110 | returned_value_code_register = register - 1 111 | 112 | # The code for the return operation itself is, essentially, a pair of 113 | # MOV (move data between registers) + JR (jump to register) pair. 114 | return_symbol_code = [ 115 | { 116 | "instruction": "MOV", 117 | "metadata": { 118 | "register": "ret_value", 119 | "value": returned_value_code_register, 120 | }, 121 | }, 122 | {"instruction": "JR", "metadata": {"register": "ret_address"}}, 123 | ] 124 | 125 | code.extend(return_symbol_code) 126 | 127 | return code, register, environment 128 | 129 | @override 130 | def certificate( 131 | self, 132 | certificator_env: dict[int, list[int]] 133 | ) -> dict[int, list[int]]: 134 | """ 135 | Compute the certificate of the current `RET_SYM`, and set this attribute. 136 | 137 | For `RET_SYM` nodes, certificate the child `returned_value` first, and 138 | then the `RET_SYM` itself. 139 | 140 | Parameters 141 | ---------- 142 | certificator_env : dict[int, list[int]] 143 | The certificators's environment, that maps variables IDs to 144 | encodings of their types. 145 | 146 | Returns 147 | ------- 148 | certificator_env : dict[int, list[int]] 149 | The updated certificator's environment, with any additional 150 | information about the variable's types it might have captured. 151 | """ 152 | 153 | certificator_env = self.returned_value.certificate(certificator_env) 154 | _returned_value_certificate = self.returned_value.get_certificate_label() 155 | 156 | self.certificate_label = [ 157 | *_returned_value_certificate, 158 | f"{self.symbol}" 159 | ] 160 | 161 | return certificator_env 162 | -------------------------------------------------------------------------------- /src/code_generator.py: -------------------------------------------------------------------------------- 1 | """Implement a code generator for the virtual machine.""" 2 | 3 | from typing import Union 4 | 5 | from src.ast_nodes.basic.PROG import PROG 6 | from src.ast_nodes.functions.FUNC_DEF import FUNC_DEF 7 | from src.ast_nodes.variables.STRUCT_DEF import STRUCT_DEF 8 | from src.ast_nodes.variables.VAR_DEF import VAR_DEF 9 | 10 | 11 | class CodeGenerator: 12 | """ 13 | Code Generator that generates instructions for the virtual machine from 14 | Abstract Syntax Tree (AST) Nodes. 15 | 16 | Parameters 17 | ---------- 18 | root : PROG 19 | The root of an Abstract Syntax Tree generated by the 20 | `src.abstract_syntax_tree.AbstractSyntaxTree`. class 21 | """ 22 | 23 | def __init__(self, root: PROG) -> None: 24 | self.root: PROG = root 25 | self.program: dict[str, Union[list, dict]] = { 26 | "functions": {}, 27 | "global_vars": [], 28 | "data": {}, 29 | "code": [], 30 | } 31 | self.environment: dict[str, dict[int, str]] = { 32 | "variables": {}, 33 | "functions": {}, 34 | } 35 | self.register: int = 0 36 | 37 | def __str__(self) -> str: 38 | """ 39 | Implement a string representation of a CodeGenerator object. 40 | 41 | This method is intended to be used with `codegen_obj.print()`. 42 | 43 | Returns 44 | ------- 45 | _str : str 46 | The string representation of a CodeGenerator object. 47 | """ 48 | 49 | _str: str = "" 50 | indent: int = 1 51 | 52 | _str += "Code:" 53 | 54 | # Print the global variables 55 | for instruction in self.program["global_vars"]: 56 | _str += "\n" 57 | _str += str(instruction) 58 | 59 | # Add a line break after the global vars, if any 60 | if len(_str) > len("Code:"): 61 | _str += "\n" 62 | 63 | functions = self.program["functions"] 64 | for function_name, function_indices in functions.items(): 65 | _str += "\n" 66 | _str += f"{function_name}:" 67 | 68 | start_index = function_indices["start"] 69 | end_index = function_indices["end"] 70 | 71 | for index in range(start_index, end_index): 72 | instruction = self.program["code"][index] 73 | _str += "\n" 74 | _str += " " * indent 75 | _str += str(instruction) 76 | 77 | _str += "\n" 78 | 79 | return _str 80 | 81 | def print(self) -> None: 82 | """Print this CodeGenerator object.""" 83 | 84 | print(self) 85 | 86 | def generate_code(self) -> dict[str, dict]: 87 | """ 88 | Generate code from a the root of an Abstract Syntax Tree. 89 | 90 | The generated program will also be stored in the `self.program` 91 | attribute. 92 | 93 | Returns 94 | ------- 95 | program : dict[str, dict] 96 | A dictionary with bytecodes and struct metadata generated from some 97 | Abstract Syntax Tree representation of a program. 98 | """ 99 | 100 | self.parse_global_variables() 101 | self.parse_functions() 102 | 103 | # Add the HALT instruction at the end of the generated code. 104 | self.program["code"].append({"instruction": "HALT", "metadata": {}}) 105 | 106 | self._add_ids_to_source() 107 | self._export_data() 108 | 109 | return self.program 110 | 111 | def parse_global_variables(self) -> None: 112 | """ 113 | Generate code for global variables and add it to the generated program. 114 | """ 115 | 116 | global_var_def_nodes: list[VAR_DEF] = [ 117 | node for node in self.root.children if isinstance(node, VAR_DEF) 118 | ] 119 | 120 | for global_var_def in global_var_def_nodes: 121 | code, self.register, self.environment = global_var_def.generate_code( 122 | register=self.register, 123 | environment=self.environment 124 | ) 125 | self.program["global_vars"].extend(code) 126 | 127 | def parse_functions(self) -> None: 128 | """ 129 | Generate code for each function and add it to the generated program. 130 | """ 131 | 132 | index: int = len(self.program["code"]) 133 | 134 | function_def_nodes: list[FUNC_DEF] = [ 135 | node for node in self.root.children if isinstance(node, FUNC_DEF) 136 | ] 137 | 138 | for function_def in function_def_nodes: 139 | function_name = function_def.get_function_name() 140 | function_indices = {"start": index} 141 | 142 | code, self.register, self.environment = function_def.generate_code( 143 | register=self.register, 144 | environment=self.environment 145 | ) 146 | self.program["code"].extend(code) 147 | 148 | index += len(code) 149 | 150 | function_indices["end"] = index 151 | self.program["functions"][function_name] = function_indices 152 | 153 | def get_program(self) -> dict[str, dict]: 154 | """ 155 | Get the generated program. 156 | 157 | Returns 158 | ------- 159 | program : dict[str, dict] 160 | The generated program. 161 | """ 162 | 163 | return self.program 164 | 165 | def _add_ids_to_source(self) -> None: 166 | """Add sequential IDs to the program.""" 167 | 168 | current_id = 1 169 | 170 | for instruction in [*self.program["global_vars"], *self.program["code"]]: 171 | instruction["bytecode_id"] = current_id 172 | current_id += 1 173 | 174 | def _export_data(self) -> None: 175 | """ 176 | Export the `data` section of the program. 177 | 178 | This section maps variable's base addresses to their sizes. 179 | """ 180 | 181 | self.program["data"] = { 182 | var["address"]: var["size"] 183 | for var in self.environment["variables"].values() 184 | } 185 | -------------------------------------------------------------------------------- /references/llvm_type_cast.ll: -------------------------------------------------------------------------------- 1 | ; ModuleID = 'llvm_type_cast.cpp' 2 | source_filename = "llvm_type_cast.cpp" 3 | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 4 | target triple = "arm64-apple-macosx14.0.0" 5 | 6 | @.str = private unnamed_addr constant [3 x i8] c"%d\00", align 1 7 | 8 | ; Function Attrs: mustprogress noinline nounwind optnone ssp uwtable(sync) 9 | define noundef i32 @_Z7int_sumii(i32 noundef %0, i32 noundef %1) #0 { 10 | %3 = alloca i32, align 4 11 | %4 = alloca i32, align 4 12 | store i32 %0, ptr %3, align 4 13 | store i32 %1, ptr %4, align 4 14 | %5 = load i32, ptr %3, align 4 15 | %6 = load i32, ptr %4, align 4 16 | %7 = add nsw i32 %5, %6 17 | ret i32 %7 18 | } 19 | 20 | ; Function Attrs: mustprogress noinline nounwind optnone ssp uwtable(sync) 21 | define noundef float @_Z9float_sumff(float noundef %0, float noundef %1) #0 { 22 | %3 = alloca float, align 4 23 | %4 = alloca float, align 4 24 | store float %0, ptr %3, align 4 25 | store float %1, ptr %4, align 4 26 | %5 = load float, ptr %3, align 4 27 | %6 = load float, ptr %4, align 4 28 | %7 = fadd float %5, %6 29 | ret float %7 30 | } 31 | 32 | ; Function Attrs: mustprogress noinline nounwind optnone ssp uwtable(sync) 33 | define noundef signext i16 @_Z9short_sumss(i16 noundef signext %0, i16 noundef signext %1) #0 { 34 | %3 = alloca i16, align 2 35 | %4 = alloca i16, align 2 36 | store i16 %0, ptr %3, align 2 37 | store i16 %1, ptr %4, align 2 38 | %5 = load i16, ptr %3, align 2 39 | %6 = sext i16 %5 to i32 40 | %7 = load i16, ptr %4, align 2 41 | %8 = sext i16 %7 to i32 42 | %9 = add nsw i32 %6, %8 43 | %10 = trunc i32 %9 to i16 44 | ret i16 %10 45 | } 46 | 47 | ; Function Attrs: mustprogress noinline norecurse optnone ssp uwtable(sync) 48 | define noundef i32 @main() #1 { 49 | %1 = alloca i32, align 4 50 | %2 = alloca i32, align 4 51 | %3 = alloca float, align 4 52 | %4 = alloca i16, align 2 53 | %5 = alloca i16, align 2 54 | %6 = alloca i16, align 2 55 | %7 = alloca i32, align 4 56 | %8 = alloca i32, align 4 57 | %9 = alloca float, align 4 58 | %10 = alloca float, align 4 59 | store i32 0, ptr %1, align 4 60 | %11 = call noundef i32 @_Z7int_sumii(i32 noundef 23, i32 noundef 13) 61 | store i32 %11, ptr %2, align 4 62 | %12 = call noundef float @_Z9float_sumff(float noundef 0x40091EB860000000, float noundef 0x4005AE1480000000) 63 | store float %12, ptr %3, align 4 64 | %13 = call noundef signext i16 @_Z9short_sumss(i16 noundef signext 1, i16 noundef signext 2) 65 | store i16 %13, ptr %4, align 2 66 | %14 = load i32, ptr %2, align 4 67 | %15 = sitofp i32 %14 to float 68 | %16 = load float, ptr %3, align 4 69 | %17 = fadd float %15, %16 70 | %18 = load i16, ptr %4, align 2 71 | %19 = sext i16 %18 to i32 72 | %20 = sitofp i32 %19 to float 73 | %21 = fadd float %17, %20 74 | %22 = fptosi float %21 to i32 75 | %23 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %22) 76 | %24 = load i32, ptr %2, align 4 77 | %25 = sitofp i32 %24 to float 78 | %26 = load float, ptr %3, align 4 79 | %27 = load i16, ptr %4, align 2 80 | %28 = sext i16 %27 to i32 81 | %29 = sitofp i32 %28 to float 82 | %30 = fadd float %26, %29 83 | %31 = fadd float %25, %30 84 | %32 = fptosi float %31 to i16 85 | store i16 %32, ptr %5, align 2 86 | %33 = load i32, ptr %2, align 4 87 | %34 = mul nsw i32 2, %33 88 | %35 = load i16, ptr %4, align 2 89 | %36 = sext i16 %35 to i32 90 | %37 = mul nsw i32 %34, %36 91 | %38 = sitofp i32 %37 to float 92 | %39 = load float, ptr %3, align 4 93 | %40 = fmul float %38, %39 94 | %41 = fptosi float %40 to i16 95 | store i16 %41, ptr %6, align 2 96 | %42 = load i32, ptr %2, align 4 97 | %43 = sitofp i32 %42 to float 98 | %44 = load float, ptr %3, align 4 99 | %45 = load i16, ptr %4, align 2 100 | %46 = sext i16 %45 to i32 101 | %47 = sitofp i32 %46 to float 102 | %48 = fadd float %44, %47 103 | %49 = fadd float %43, %48 104 | %50 = fptosi float %49 to i32 105 | store i32 %50, ptr %7, align 4 106 | %51 = load i32, ptr %2, align 4 107 | %52 = mul nsw i32 2, %51 108 | %53 = load i16, ptr %4, align 2 109 | %54 = sext i16 %53 to i32 110 | %55 = mul nsw i32 %52, %54 111 | %56 = sitofp i32 %55 to float 112 | %57 = load float, ptr %3, align 4 113 | %58 = fmul float %56, %57 114 | %59 = fptosi float %58 to i32 115 | store i32 %59, ptr %8, align 4 116 | %60 = load i32, ptr %2, align 4 117 | %61 = load i16, ptr %4, align 2 118 | %62 = sext i16 %61 to i32 119 | %63 = add nsw i32 %60, %62 120 | %64 = sitofp i32 %63 to float 121 | store float %64, ptr %9, align 4 122 | %65 = load i32, ptr %2, align 4 123 | %66 = mul nsw i32 2, %65 124 | %67 = load i16, ptr %4, align 2 125 | %68 = sext i16 %67 to i32 126 | %69 = mul nsw i32 %66, %68 127 | %70 = sitofp i32 %69 to float 128 | store float %70, ptr %10, align 4 129 | ret i32 0 130 | } 131 | 132 | declare i32 @printf(ptr noundef, ...) #2 133 | 134 | attributes #0 = { mustprogress noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } 135 | attributes #1 = { mustprogress noinline norecurse optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } 136 | attributes #2 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } 137 | 138 | !llvm.module.flags = !{!0, !1, !2, !3} 139 | !llvm.ident = !{!4} 140 | 141 | !0 = !{i32 1, !"wchar_size", i32 4} 142 | !1 = !{i32 8, !"PIC Level", i32 2} 143 | !2 = !{i32 7, !"uwtable", i32 1} 144 | !3 = !{i32 7, !"frame-pointer", i32 1} 145 | !4 = !{!"Homebrew clang version 17.0.3"} 146 | -------------------------------------------------------------------------------- /src/certificators/frontend.py: -------------------------------------------------------------------------------- 1 | """Certificator for the frontend representation of [C]haron programs.""" 2 | 3 | import re 4 | 5 | from typing_extensions import override 6 | 7 | from src.abstract_syntax_tree import AbstractSyntaxTree 8 | from src.certificators.abstract_certificator import AbstractCertificator 9 | from src.utils import ( 10 | primes_list, 11 | next_prime, 12 | get_certificate_symbol, 13 | TYPE_SYMBOLS_MAP 14 | ) 15 | 16 | 17 | class FrontendCertificator(AbstractCertificator): 18 | """ 19 | Certificate the frontend representation of some program. 20 | 21 | Parameters 22 | ---------- 23 | ast : AbstractSyntaxTree 24 | The AST of the program to certificate. 25 | """ 26 | 27 | def __init__(self, ast: AbstractSyntaxTree) -> None: 28 | super().__init__() 29 | 30 | self.ast: AbstractSyntaxTree = ast 31 | 32 | @override 33 | def certificate(self, **kwargs) -> str: 34 | """ 35 | Certificate the frontend code. 36 | 37 | This method traverses the AST and annotate each node with its relative 38 | position and contents. 39 | 40 | Returns 41 | ------- 42 | computed_certificate : str 43 | The the computed certificate. 44 | """ 45 | 46 | computed_exponents = self._certificate_ast() 47 | computed_exponents = self._handle_variables_primes(computed_exponents) 48 | computed_exponents = self._add_var_def_symbols(computed_exponents) 49 | 50 | self.computed_certificate = [ 51 | f"{positional_prime}^({exponent})" 52 | for positional_prime, exponent in zip( 53 | primes_list(len(computed_exponents)), 54 | computed_exponents 55 | ) 56 | ] 57 | 58 | self.computed_certificate = "*".join(self.computed_certificate) 59 | 60 | return self.computed_certificate 61 | 62 | def _certificate_ast(self) -> list[str]: 63 | """ 64 | Certificate the Abstract Syntax Tree. 65 | 66 | Notice that the certificate generated by this method is incomplete: it 67 | lacks symbols for the types. 68 | 69 | Returns 70 | ------- 71 | ast_certificate : list[str] 72 | The list of labels of the AST certificate. 73 | """ 74 | 75 | self.environment = self.ast.root.certificate( 76 | certificator_env=self.environment 77 | ) 78 | ast_certificate = self.ast.root.get_certificate_label() 79 | 80 | return ast_certificate 81 | 82 | def _handle_variables_primes(self, computed_exponents: list[str]) -> list[str]: 83 | """ 84 | Handle variables primes by emitting it only for active variables. 85 | 86 | This method also replaces all the placeholders in `computed_exponents` 87 | with emitted primes. 88 | 89 | Parameters 90 | ---------- 91 | computed_exponents : list[str] 92 | The list of labels of the AST certificate. 93 | 94 | Returns 95 | ------- 96 | computed_exponents : list[str] 97 | The list of labels of the AST certificate, after replacing 98 | placeholders. 99 | """ 100 | 101 | # Emit primes for "alive" variables 102 | for var_id, entry in self.environment.items(): 103 | if entry["active"]: 104 | self.environment[var_id]["prime"] = self.current_prime 105 | self.current_prime = next_prime(self.current_prime) 106 | 107 | # Replace placeholders 108 | pattern = r"VAR_(\d+)_PRIME_PLACEHOLDER" 109 | 110 | for idx, element in enumerate(computed_exponents): 111 | matches = re.finditer(pattern, element) 112 | 113 | for match in matches: 114 | string_with_placeholder = match.group(0) 115 | var_id = int(match.group(1)) 116 | var_prime = self.environment[var_id]["prime"] 117 | computed_exponents[idx] = computed_exponents[idx].replace( 118 | string_with_placeholder, 119 | f"{var_prime}" 120 | ) 121 | 122 | return computed_exponents 123 | 124 | def _add_var_def_symbols(self, computed_exponents: list[str]) -> list[str]: 125 | """ 126 | Add `VAR_DEF` symbols to the beginning of `computed_exponents`. 127 | 128 | Only variables that are `active` in the certificator environment will 129 | be considered. 130 | 131 | Parameters 132 | ---------- 133 | computed_exponents : list[str] 134 | The list of labels of the AST certificate. 135 | 136 | Returns 137 | ------- 138 | computed_exponents : list[str] 139 | The list of labels of the AST certificate, added with `VAR_DEF` 140 | labels. 141 | """ 142 | 143 | var_def_exponents = [] 144 | var_def_base_symbol = get_certificate_symbol("VAR_DEF") 145 | 146 | for var_data in self.environment.values(): 147 | if any([not var_data["active"], var_data.get("parameter", False)]): 148 | continue 149 | 150 | type_symbols = "^".join([ 151 | f'({TYPE_SYMBOLS_MAP[_type]["type_symbol"]})' 152 | for _type in var_data["type"] 153 | ]) 154 | 155 | var_def_exponents.append(f"({var_def_base_symbol})^{type_symbols}") 156 | 157 | return [*var_def_exponents, *computed_exponents] 158 | 159 | def _add_types_certificates(self, certificate: str) -> str: 160 | """ 161 | Add types certificates to the certificate. 162 | 163 | This method will replace all the `PLACEHOLDERS` with the known types 164 | certificates. 165 | 166 | Parameters 167 | ---------- 168 | certificate : str 169 | The certificate to replace placeholders with the types certificates. 170 | 171 | Returns 172 | ------- 173 | certificate : str 174 | The certificate with types certificates. 175 | """ 176 | 177 | for var_prime, type_symbols in self.environment.items(): 178 | placeholder = f"TYPE_PLACEHOLDER_VAR_PRIME_{var_prime}" 179 | type_certificate = "*".join( 180 | f"({position_prime}^{type_symbol})" 181 | for position_prime, type_symbol in zip( 182 | primes_list(len(type_symbols)), 183 | type_symbols 184 | ) 185 | ) 186 | 187 | certificate = certificate.replace(placeholder, type_certificate) 188 | 189 | return certificate 190 | -------------------------------------------------------------------------------- /tests/unit/test_abstract_syntax_tree.py: -------------------------------------------------------------------------------- 1 | """Implement unit tests for the `src.abstract_syntax_tree.AbstractSyntaxTree` class.""" 2 | 3 | from copy import deepcopy 4 | 5 | from pytest import fixture 6 | 7 | from src.abstract_syntax_tree import AbstractSyntaxTree 8 | from src.ast_nodes.basic.PROG import PROG 9 | from tests.unit.common import TOKENIZED_SOURCE_CODE 10 | 11 | 12 | # Defined here just because of identation 13 | EXPECTED_PRINT_TREE = """ 14 | Kind: PROG 15 | Kind: STRUCT_DEF, Value: 1, Type: my_struct 16 | Attributes: int, float 17 | Kind: STRUCT_DEF, Value: 2, Type: test_struct 18 | Attributes: int, int 19 | Kind: VAR_DEF, Value: 1, Type: int 20 | Name: a, Type: int (array), Length: 10 21 | Kind: VAR_DEF, Value: 2, Type: my_struct 22 | Name: global_var, Type: my_struct 23 | Kind: FUNC_DEF, Value: function_that_returns_struct, Type: my_struct 24 | Kind: PARAM, Value: 3, Type: int 25 | Name: xyz, Type: int 26 | Kind: PARAM, Value: 4, Type: int 27 | Name: aaa, Type: int 28 | Kind: SEQ 29 | Kind: VAR_DEF, Value: 5, Type: int 30 | Name: internal_guy, Type: int 31 | Kind: ASSIGN, Type: int 32 | Kind: VAR, Value: 5, Type: int 33 | Kind: ADD, Type: int 34 | Kind: VAR, Value: 3, Type: int 35 | Kind: VAR, Value: 4, Type: int 36 | Kind: ASSIGN, Type: int 37 | Kind: ELEMENT_ACCESS, Type: int 38 | Kind: VAR, Value: 2, Type: my_struct 39 | Kind: CST, Value: 0, Type: int 40 | Kind: VAR, Value: 5, Type: int 41 | Kind: RET_SYM, Type: my_struct 42 | Kind: VAR, Value: 2, Type: my_struct 43 | Kind: FUNC_DEF, Value: some_simple_function, Type: int 44 | Kind: PARAM, Value: 6, Type: float 45 | Name: param_1, Type: float 46 | Kind: PARAM, Value: 7, Type: int 47 | Name: param_2, Type: int 48 | Kind: SEQ 49 | Kind: RET_SYM, Type: int 50 | Kind: DIV, Type: float 51 | Kind: VAR, Value: 6, Type: float 52 | Kind: VAR, Value: 7, Type: int 53 | Kind: FUNC_DEF, Value: abc, Type: int 54 | Kind: PARAM, Value: 8, Type: int 55 | Name: asda, Type: int 56 | Kind: PARAM, Value: 9, Type: int 57 | Name: abcdef, Type: int 58 | Kind: SEQ 59 | Kind: VAR_DEF, Value: 10, Type: int 60 | Name: bla, Type: int 61 | Kind: ASSIGN, Type: int 62 | Kind: VAR, Value: 10, Type: int 63 | Kind: CST, Value: 1, Type: int 64 | Kind: VAR_DEF, Value: 11, Type: float 65 | Name: blabla, Type: float 66 | Kind: ASSIGN, Type: float 67 | Kind: VAR, Value: 11, Type: float 68 | Kind: CST, Value: 2.0, Type: float 69 | Kind: VAR_DEF, Value: 12, Type: short 70 | Name: xaxaxa, Type: short 71 | Kind: VAR_DEF, Value: 13, Type: my_struct 72 | Name: internal_struct_var, Type: my_struct 73 | Kind: ASSIGN, Type: int 74 | Kind: ELEMENT_ACCESS, Type: int 75 | Kind: VAR, Value: 13, Type: my_struct 76 | Kind: CST, Value: 0, Type: int 77 | Kind: CST, Value: 1, Type: int 78 | Kind: ASSIGN, Type: int 79 | Kind: VAR, Value: 10, Type: int 80 | Kind: ADD, Type: int 81 | Kind: VAR, Value: 10, Type: int 82 | Kind: FUNC_CALL, Value: 2, Type: int 83 | Kind: ARG 84 | Kind: VAR, Value: 11, Type: float 85 | Kind: ARG 86 | Kind: CST, Value: 123, Type: int 87 | Kind: RET_SYM, Type: int 88 | Kind: ADD, Type: float 89 | Kind: VAR, Value: 11, Type: float 90 | Kind: VAR, Value: 10, Type: int 91 | Kind: FUNC_DEF, Value: main, Type: int 92 | Kind: SEQ 93 | Kind: VAR_DEF, Value: 14, Type: int 94 | Name: x, Type: int 95 | Kind: ASSIGN, Type: int 96 | Kind: VAR, Value: 14, Type: int 97 | Kind: FUNC_CALL, Value: 3, Type: int 98 | Kind: ARG 99 | Kind: CST, Value: 1, Type: int 100 | Kind: ARG 101 | Kind: CST, Value: 2, Type: int 102 | Kind: VAR_DEF, Value: 15, Type: int 103 | Name: array, Type: int (array), Length: 10 104 | Kind: ASSIGN, Type: int 105 | Kind: ELEMENT_ACCESS, Type: int 106 | Kind: VAR, Value: 15, Type: int (array), Length: 10 107 | Kind: CST, Value: 5, Type: int 108 | Kind: CST, Value: 1, Type: int 109 | Kind: VAR_DEF, Value: 16, Type: int 110 | Name: y, Type: int 111 | Kind: IFELSE 112 | Kind: AND, Type: int 113 | Kind: OR, Type: int 114 | Kind: EQUAL, Type: int 115 | Kind: LSHIFT, Type: int 116 | Kind: VAR, Value: 14, Type: int 117 | Kind: CST, Value: 4, Type: int 118 | Kind: CST, Value: 1, Type: int 119 | Kind: GREATER, Type: int 120 | Kind: VAR, Value: 14, Type: int 121 | Kind: CST, Value: 1, Type: int 122 | Kind: LESS, Type: int 123 | Kind: VAR, Value: 14, Type: int 124 | Kind: CST, Value: 10, Type: int 125 | Kind: SEQ 126 | Kind: ASSIGN, Type: int 127 | Kind: VAR, Value: 16, Type: int 128 | Kind: BITAND, Type: int 129 | Kind: VAR, Value: 14, Type: int 130 | Kind: CST, Value: 1, Type: int 131 | Kind: SEQ 132 | Kind: ASSIGN, Type: int 133 | Kind: VAR, Value: 16, Type: int 134 | Kind: BITOR, Type: int 135 | Kind: VAR, Value: 14, Type: int 136 | Kind: CST, Value: 1, Type: int 137 | Kind: RET_SYM, Type: int 138 | Kind: RSHIFT, Type: int 139 | Kind: DIV, Type: int 140 | Kind: MULT, Type: int 141 | Kind: VAR, Value: 14, Type: int 142 | Kind: VAR, Value: 16, Type: int 143 | Kind: CST, Value: 2, Type: int 144 | Kind: CST, Value: 1, Type: int 145 | """ 146 | 147 | 148 | def test_init() -> None: 149 | """Test the instantiation of AbstractSyntaxTree objects.""" 150 | 151 | _source = deepcopy(TOKENIZED_SOURCE_CODE) 152 | ast = AbstractSyntaxTree(source_code=_source) 153 | 154 | assert ast.source_code == TOKENIZED_SOURCE_CODE 155 | assert ast.current_symbol is None 156 | assert ast.current_value == {} 157 | assert ast.root == PROG() 158 | 159 | 160 | def test_build(capfd: fixture) -> None: 161 | """ 162 | Test if the `build` method works as expected. 163 | 164 | To run this test, we compare the output of an auxiliary `_dfs` function 165 | ran on the AST to a known, expected result. This is achieved by capturing 166 | the console output with pytest's `capfd` fixture. 167 | """ 168 | 169 | _source = deepcopy(TOKENIZED_SOURCE_CODE) 170 | ast = AbstractSyntaxTree(source_code=_source) 171 | _ = ast.build() 172 | 173 | ast.print_tree() 174 | 175 | out, _ = capfd.readouterr() 176 | out = "\n" + out 177 | 178 | expected_tree = EXPECTED_PRINT_TREE 179 | assert out == expected_tree 180 | -------------------------------------------------------------------------------- /tests/unit/test_lexer.py: -------------------------------------------------------------------------------- 1 | """Implement unit tests for the `src.lexer.Lexer` class.""" 2 | 3 | from copy import deepcopy 4 | 5 | import pytest 6 | 7 | from src.lexer import Lexer 8 | from tests.unit.common import SOURCE_CODE, TOKENIZED_SOURCE_CODE 9 | 10 | 11 | INVALID_STRUCTS = [ 12 | # 1. Struct named after reserved word/symbol 13 | "struct int { int a; };", 14 | # 2. Struct attribute named after reserved word 15 | "struct abc { int float; };", 16 | # Struct attribute redefinition 17 | "struct abc { int a; int a; };", 18 | # Struct attribute with unknown type 19 | "struct abc { int a; unknown_type b; };", 20 | ] 21 | 22 | INVALID_VARIABLES = [ 23 | # 1. Variable with unknown type 24 | "unknown_type var;", 25 | # 2. Invalid variable definition 26 | "int var .", 27 | # 3. Variable redefinition (globals) 28 | "int abc; int xyz; int abc;", 29 | # 4. Variable redefinition (local) 30 | "int main() { int abc; int xyz; int abc; }", 31 | # 5. Variable redefinition (global + local) 32 | "int xyz; int main() { int abc; int xyz; }", 33 | ] 34 | 35 | INVALID_FUNCTIONS = [ 36 | # 1. Call of undefined function 37 | "int main() { int abc; abc = func(); }", 38 | # 2. Malformed function definitions 39 | "int abc(int) { return x; }", 40 | # 3. Unclosed function definition 41 | "int main() { int abc; return abc; ", 42 | ] 43 | 44 | 45 | INVALID_SOURCES = [*INVALID_STRUCTS, *INVALID_VARIABLES, *INVALID_FUNCTIONS] 46 | 47 | 48 | def test_parse_source_code(): 49 | """ 50 | Test the `Lexer.parse_source_code` method. 51 | 52 | This test uses a snippet that uses all reserved words. 53 | """ 54 | 55 | expected_parsed_code = TOKENIZED_SOURCE_CODE 56 | 57 | _source = deepcopy(SOURCE_CODE) 58 | lexer = Lexer(source_code=_source) 59 | lexer_parsed_code = lexer.parse_source_code() 60 | 61 | assert lexer_parsed_code == expected_parsed_code 62 | 63 | 64 | def test_split_source(): 65 | """ 66 | Test the `Lexer.split_source` method. 67 | 68 | This test uses a snippet that uses all reserved words. 69 | """ 70 | 71 | expected_split_source = [ 72 | "int", 73 | "a", 74 | "[", 75 | "10", 76 | "]", 77 | ";", 78 | "struct", 79 | "my_struct", 80 | "{", 81 | "int", 82 | "x", 83 | ";", 84 | "float", 85 | "y", 86 | ";", 87 | "}", 88 | ";", 89 | "my_struct", 90 | "global_var", 91 | ";", 92 | "my_struct", 93 | "function_that_returns_struct", 94 | "(", 95 | "int", 96 | "xyz", 97 | "int", 98 | "aaa", 99 | ")", 100 | "{", 101 | "int", 102 | "internal_guy", 103 | ";", 104 | "internal_guy", 105 | "=", 106 | "xyz", 107 | "+", 108 | "aaa", 109 | ";", 110 | "global_var.x", 111 | "=", 112 | "internal_guy", 113 | ";", 114 | "return", 115 | "global_var", 116 | ";", 117 | "}", 118 | "int", 119 | "some_simple_function", 120 | "(", 121 | "float", 122 | "param_1", 123 | "int", 124 | "param_2", 125 | ")", 126 | "{", 127 | "return", 128 | "param_1", 129 | "/", 130 | "param_2", 131 | ";", 132 | "}", 133 | "int", 134 | "abc", 135 | "(", 136 | "int", 137 | "asda", 138 | "int", 139 | "abcdef", 140 | ")", 141 | "{", 142 | "int", 143 | "bla", 144 | ";", 145 | "bla", 146 | "=", 147 | "1", 148 | ";", 149 | "float", 150 | "blabla", 151 | ";", 152 | "blabla", 153 | "=", 154 | "2.0", 155 | ";", 156 | "short", 157 | "xaxaxa", 158 | ";", 159 | "my_struct", 160 | "internal_struct_var", 161 | ";", 162 | "internal_struct_var.x", 163 | "=", 164 | "1", 165 | ";", 166 | "bla", 167 | "=", 168 | "bla", 169 | "+", 170 | "some_simple_function", 171 | "(", 172 | "blabla", 173 | "123", 174 | ")", 175 | ";", 176 | "return", 177 | "blabla", 178 | "+", 179 | "bla", 180 | ";", 181 | "}", 182 | "struct", 183 | "test_struct", 184 | "{", 185 | "int", 186 | "abcd", 187 | ";", 188 | "int", 189 | "xyz", 190 | ";", 191 | "}", 192 | ";", 193 | "int", 194 | "main", 195 | "(", 196 | ")", 197 | "{", 198 | "int", 199 | "x", 200 | ";", 201 | "x", 202 | "=", 203 | "abc", 204 | "(", 205 | "1", 206 | "2", 207 | ")", 208 | ";", 209 | "int", 210 | "array", 211 | "[", 212 | "10", 213 | "]", 214 | ";", 215 | "array", 216 | "[", 217 | "5", 218 | "]", 219 | "=", 220 | "1", 221 | ";", 222 | "int", 223 | "y", 224 | ";", 225 | "if", 226 | "(", 227 | "(", 228 | "(", 229 | "(", 230 | "x", 231 | "<<", 232 | "4", 233 | ")", 234 | "==", 235 | "1", 236 | ")", 237 | "||", 238 | "(", 239 | "x", 240 | ">", 241 | "1", 242 | ")", 243 | ")", 244 | "&&", 245 | "(", 246 | "x", 247 | "<", 248 | "10", 249 | ")", 250 | ")", 251 | "{", 252 | "y", 253 | "=", 254 | "x", 255 | "&", 256 | "1", 257 | ";", 258 | "}", 259 | "else", 260 | "{", 261 | "y", 262 | "=", 263 | "x", 264 | "|", 265 | "1", 266 | ";", 267 | "}", 268 | "return", 269 | "(", 270 | "(", 271 | "x", 272 | "*", 273 | "y", 274 | ")", 275 | "/", 276 | "2", 277 | ")", 278 | ">>", 279 | "1", 280 | ";", 281 | "}", 282 | ] 283 | 284 | _source = deepcopy(SOURCE_CODE) 285 | lexer = Lexer(source_code=_source) 286 | 287 | assert lexer.split_source() == expected_split_source 288 | 289 | 290 | @pytest.mark.parametrize("source_code", INVALID_SOURCES) 291 | def test_validate_source_code_syntax(source_code: str): 292 | """ 293 | Test the `Lexer.tokenize_source_code` method. 294 | 295 | This snippets with covered syntax errors. 296 | """ 297 | 298 | with pytest.raises(SyntaxError): 299 | lexer = Lexer(source_code=source_code) 300 | _ = lexer.parse_source_code() 301 | -------------------------------------------------------------------------------- /src/ast_nodes/variables/VAR.py: -------------------------------------------------------------------------------- 1 | """Representation of VAR nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing import Union 4 | 5 | from typing_extensions import override 6 | 7 | from src.ast_nodes.node import Node 8 | from src.utils import get_certificate_symbol, type_cast 9 | 10 | 11 | class VAR(Node): 12 | """ 13 | Implement the representation of a variable for the AST. 14 | 15 | Parameters 16 | ---------- 17 | variable_metadata : dict[str, str] 18 | A dictionary containing the unique ID generated when it was first 19 | declared in the original source code, and its type. 20 | """ 21 | 22 | @override 23 | def __init__(self, variable_metadata: dict[str, str]) -> None: 24 | id = variable_metadata.get("id") 25 | super().__init__(id) 26 | 27 | self.variable_metadata: dict[str, str] = variable_metadata 28 | self.id: int = id 29 | self.type = self.variable_metadata.get("type") 30 | self.instruction = "LOADF" if self.type == "float" else "LOAD" 31 | 32 | # Handle the `instruction` and `symbol`. This defaults to the `read` 33 | # case, but can be changed by the AST as it is built 34 | self.add_context(context={"context": "read"}) 35 | 36 | @override 37 | def __str__(self) -> str: 38 | new_str: str = super().__str__() 39 | 40 | array_length: Union[int, None] = self.variable_metadata.get("length") 41 | if array_length: 42 | new_str += f" (array), Length: {array_length}" 43 | 44 | return new_str 45 | 46 | def get_id(self) -> int: 47 | return self.id 48 | 49 | @override 50 | def generate_code( 51 | self, register: int, environment: dict[str, dict[int, str]] 52 | ) -> tuple[ 53 | list[dict[str, Union[int, str, float]]], 54 | int, 55 | dict[int, str] 56 | ]: 57 | """ 58 | Generate the code associated with this `VAR`. 59 | 60 | For this node specialization, the code metadata contains the instruction 61 | (i.e., whether the context requires the variable's value or address), 62 | together with the variable identifier (`self.value`). 63 | 64 | Parameters 65 | ---------- 66 | register : int 67 | The number of the register to be used by the code generated by this 68 | Node. 69 | environment : dict[int, str] 70 | The compiler's environment, that maps variables IDs to memory 71 | addresses and function IDs to instructions indices. 72 | 73 | Returns 74 | ------- 75 | code : list of dict 76 | Return a list of dictionaries containing code metadata: the register 77 | to be used, the related `instruction` and its metadata. 78 | register : int 79 | The number of the next register available. 80 | environment : dict[int, str] 81 | The updated {var_id: address} environment mapping. 82 | """ 83 | 84 | operation: str = self.context.get("context", "read") 85 | var_address = environment["variables"][self.id]["address"] 86 | 87 | code = [ 88 | { 89 | "instruction": "CONSTANT", 90 | "metadata": {"register": register, "value": var_address} 91 | }, 92 | { 93 | "instruction": "ADD", 94 | "metadata": { 95 | "register": register + 1, 96 | "lhs_register": register, 97 | "rhs_register": "zero" 98 | }, 99 | } 100 | ] 101 | 102 | register += 2 103 | 104 | if operation == "read": 105 | code.append({ 106 | "instruction": self.instruction, 107 | "metadata": {"register": register, "value": register - 1} 108 | }) 109 | register += 1 110 | 111 | # Add an explicit cast when loading a `short` variable, as all 112 | # variables are 4 byte-aligned. 113 | if self.type == "short": 114 | cast_to_short, register = type_cast( 115 | original_type="int", 116 | target_type="short", 117 | register=register, 118 | ) 119 | code.extend(cast_to_short) 120 | 121 | return code, register, environment 122 | 123 | @override 124 | def certificate( 125 | self, 126 | certificator_env: dict[int, list[int]] 127 | ) -> dict[int, list[int]]: 128 | """ 129 | Compute the certificate of this `VAR`. 130 | 131 | This will also check if it is a parameter, and emit the symbol 132 | accordingly. 133 | 134 | Parameters 135 | ---------- 136 | certificator_env : dict[int, list[int]] 137 | The certificators's environment, that maps variables IDs to 138 | encodings of their types. 139 | 140 | Returns 141 | ------- 142 | certificator_env : dict[int, list[int]] 143 | The updated certificator's environment, with any additional 144 | information about the variable's types it might have captured. 145 | """ 146 | 147 | self.certificate_label = [f"{self.symbol}"] 148 | 149 | # Only update the `type` for "simple" variables (`ELEMENT_ACCESS` 150 | # will do this for arrays/structs) 151 | if len(certificator_env[self.id]["type"]) == 1: 152 | certificator_env[self.id]["type"] = [self.type] 153 | certificator_env[self.id]["active"] = True 154 | 155 | return certificator_env 156 | 157 | def get_metadata(self) -> dict[str, str]: 158 | """ 159 | Get the metadata of this `VAR`. 160 | 161 | Returns 162 | ------- 163 | : dict[str, str] 164 | The variable metadata. 165 | """ 166 | 167 | return self.variable_metadata 168 | 169 | def add_context(self, context: dict[str, str]) -> None: 170 | """ 171 | Add context to this `VAR` node. 172 | 173 | The context indicates whether this variable is being readed or written. 174 | 175 | Parameters 176 | ---------- 177 | context : dict[str, str] 178 | The context of this variable use. 179 | 180 | Notes 181 | ----- 182 | This method also changes the `symbol` attribute according to the 183 | instruction being used. 184 | """ 185 | 186 | # Update the context 187 | self.context = context 188 | 189 | operation: str = context.get("context", "read") 190 | 191 | if operation == "read": 192 | symbol: str = get_certificate_symbol("VAR_VALUE") 193 | 194 | else: 195 | symbol: str = get_certificate_symbol("VAR_ADDRESS") 196 | 197 | # Add ^1 because it means memory offset + 1. As this is a regular 198 | # variable – and not an array nor struct –, the offset is always 0. 199 | self.symbol: str = ( 200 | f"({symbol})" + f"^(VAR_{self.id}_PRIME_PLACEHOLDER)" + "^(2)" + "^(1)" 201 | ) 202 | -------------------------------------------------------------------------------- /src/ast_nodes/node.py: -------------------------------------------------------------------------------- 1 | """Base class for AST Nodes classes (e.g., VAR, CST etc.).""" 2 | 3 | from typing import Union 4 | 5 | from src.utils import get_certificate_symbol 6 | 7 | 8 | class Node: 9 | """ 10 | Implement a Node for the AST. 11 | 12 | Parameters 13 | ---------- 14 | value : str, int, float, or None, optional (default = None) 15 | The value the Node holds, if any. Defaults to None. 16 | type : str or None (optional, default = None) 17 | The type of the Node, if any. Defaults to None. 18 | uses_register : bool (optional, default = False) 19 | Whether the code generated by this Node will use a register (i.e., 20 | whether the exported code should have the `register` field). 21 | """ 22 | 23 | def __init__( 24 | self, 25 | value: Union[int, str, float, None] = None, 26 | type: Union[str, None] = None, 27 | uses_register: bool = True, 28 | ) -> None: 29 | self.value: Union[int, str, float, None] = value 30 | self.type: Union[str, None] = type 31 | self.certificate_label: list[str] = [] 32 | self.uses_register: bool = uses_register 33 | 34 | # Each `Node` specialization must set its own `instruction` and 35 | # `symbol`. 36 | self.instruction: str = None 37 | self.symbol: str = get_certificate_symbol(self) 38 | 39 | def __eq__(self, other: "Node") -> bool: 40 | """ 41 | Implement the equality comparison between Nodes. 42 | 43 | Parameters 44 | ---------- 45 | other : Node 46 | The right hand side Node of the comparison. 47 | 48 | Returns 49 | ------- 50 | is_equal : bool 51 | `True` if all the attributes are equal, `False` otherwise. 52 | """ 53 | 54 | is_equal = self.value == other.value and type(self) is type(other) 55 | 56 | return is_equal 57 | 58 | def __str__(self) -> str: 59 | """ 60 | Implement a string representation of a Node object. 61 | 62 | This method is internally invoked when using `print(node_obj)`. 63 | 64 | Returns 65 | ------- 66 | _str : str 67 | The string representation of a Node object. 68 | """ 69 | 70 | _str = "" 71 | 72 | _str += f"Kind: {type(self).__name__}" 73 | 74 | if self.value is not None: 75 | _str += f", Value: {self.value}" 76 | 77 | if self.type is not None: 78 | _str += f", Type: {self.type}" 79 | 80 | if len(self.certificate_label) > 0: 81 | _str += f", Certificate Label: {self.certificate_label}" 82 | 83 | return _str 84 | 85 | def get_value(self) -> Union[int, str, float, None]: 86 | """ 87 | Get the value of this Node. 88 | 89 | Returns 90 | ------- 91 | : Union[int, str, float, None] 92 | The value of this Node. 93 | """ 94 | 95 | return self.value 96 | 97 | def get_type(self) -> Union[str, None]: 98 | """ 99 | Get the type of this Node. 100 | 101 | Returns 102 | ------- 103 | : Union[str, None] 104 | The type of the Node. 105 | """ 106 | 107 | return self.type 108 | 109 | def get_certificate_label(self) -> list[str]: 110 | """ 111 | Get the contents of `certificate_label`. 112 | 113 | Returns 114 | ------- 115 | : list of str 116 | A list containing the certificate label of the `Node`. 117 | 118 | Notes 119 | ----- 120 | This method returns a list, rather than the string itself, in order to 121 | allow returning multiple labels when nodes have children. Thus, 122 | subclasses should return a composition of lists. 123 | """ 124 | 125 | return self.certificate_label 126 | 127 | def print(self, indent: int = 0) -> None: 128 | """ 129 | Print the string representation of `self`. 130 | 131 | The printed text is indented according with the optional `indent` 132 | paremeter. 133 | 134 | Parameters 135 | ---------- 136 | indent : int (optional, default = 0) 137 | The number of left padding spaces to indent. 138 | """ 139 | 140 | print(" " * indent + str(self)) 141 | 142 | def generate_code( 143 | self, register: int, environment: dict[str, dict[int, str]] 144 | ) -> tuple[ 145 | list[dict[str, Union[int, str, float]]], 146 | int, 147 | dict[int, str] 148 | ]: 149 | """ 150 | Generate the code associated with this `Node`. 151 | 152 | The generated code consists of a dictionary containing the relevant 153 | `Node` data for the code to run -- namely, the `instruction`, and the 154 | `value`. 155 | 156 | Notice that some `Nodes` may rewrite this method in order to deal 157 | with special attributes -- such as the `Operation` nodes, that must 158 | handle its children nodes. 159 | 160 | This method takes an integer as its parameter in order to allocate a 161 | register to support the generated code. It also returns an integer -- 162 | usually `register + 1` -- for it to be passed to the next Node. 163 | 164 | Parameters 165 | ---------- 166 | register : int 167 | The number of the register to be used by the code generated by this 168 | Node. 169 | environment : dict[int, str] 170 | The compiler's environment, that maps variables IDs to memory 171 | addresses and function IDs to instructions indices. 172 | 173 | Returns 174 | ------- 175 | code : list of dict 176 | Return a list of dictionaries containing code metadata: the register 177 | to be used, the related `instruction` and its metadata. 178 | register : int 179 | The number of the next register available. 180 | environment : dict[int, str] 181 | The updated {var_id: address} environment mapping. 182 | 183 | Notes 184 | ----- 185 | This method returns a `list` rather than only the `code` in 186 | order to standardize the return type as some subclasses might have to 187 | generate code using not only the node itself, but its children, too. 188 | """ 189 | 190 | code = {"instruction": self.instruction, "metadata": {}} 191 | 192 | if self.uses_register: 193 | code["metadata"]["register"] = register 194 | register += 1 195 | 196 | if self.value is not None: 197 | code["metadata"]["value"] = self.value 198 | 199 | return [code], register, environment 200 | 201 | def certificate( 202 | self, 203 | certificator_env: dict[int, list[int]] 204 | ) -> dict[int, list[int]]: 205 | """ 206 | Compute the certificate of the current `Node`, and set this attribute. 207 | 208 | Parameters 209 | ---------- 210 | certificator_env : dict[int, list[int]] 211 | The certificators's environment, that maps variables IDs to 212 | encodings of their types. 213 | 214 | Returns 215 | ------- 216 | certificator_env : dict[int, list[int]] 217 | The updated certificator's environment, with any additional 218 | information about the variable's types it might have captured. 219 | """ 220 | 221 | self.certificate_label = [f"{self.symbol}"] 222 | 223 | return certificator_env 224 | -------------------------------------------------------------------------------- /src/ast_nodes/functions/FUNC_CALL.py: -------------------------------------------------------------------------------- 1 | """Representation of FUNC_CALL nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing import Union 4 | 5 | from typing_extensions import override 6 | 7 | from src.ast_nodes.basic.CST import CST 8 | from src.ast_nodes.functions.ARG import ARG 9 | from src.ast_nodes.node import Node 10 | from src.ast_nodes.variables.VAR import VAR 11 | 12 | 13 | class FUNC_CALL(Node): 14 | """ 15 | Implement the representation of a function call for the AST. 16 | 17 | Parameters 18 | ---------- 19 | function_call_metadata : dict 20 | Dictionary of function call metadata exported by the Lexer. 21 | """ 22 | 23 | @override 24 | def __init__(self, function_call_metadata: dict) -> None: 25 | function_id: int = function_call_metadata["called_function_metadata"]["id"] 26 | super().__init__(function_id) 27 | 28 | _function_type: str = function_call_metadata["called_function_metadata"]["type"] 29 | 30 | self.function_call_metadata: dict = function_call_metadata 31 | self.arguments: list[ARG] = self._build_children_nodes() 32 | self.type: str = _function_type 33 | 34 | _prime: int = self.function_call_metadata["called_function_metadata"]["prime"] 35 | self.symbol: str = f"({self.symbol})^({_prime})" 36 | 37 | @override 38 | def get_certificate_label(self) -> list[str]: 39 | """ 40 | Get the contents of `certificate_label`. 41 | 42 | For `FUNC_CALL` nodes, obtain the certificates, recursively, from each 43 | `argument` subtree first, and then from the `FUNC_CALL` node itself. 44 | 45 | Returns 46 | ------- 47 | : list of str 48 | A list containing the certificate label of the `Node`. 49 | """ 50 | 51 | certificate_label: list[str] = [] 52 | 53 | for argument in self.arguments: 54 | _argument_certificate = argument.get_certificate_label() 55 | certificate_label.extend(_argument_certificate) 56 | 57 | certificate_label.extend(super().get_certificate_label()) 58 | 59 | return certificate_label 60 | 61 | @override 62 | def print(self, indent: int = 0) -> None: 63 | """ 64 | Print the string representation of this `FUNC_CALL`. 65 | 66 | The node itself is aligned with `indent`, and its children are padded 67 | with an additional left space. 68 | 69 | Parameters 70 | ---------- 71 | indent : int (optional, default = 0) 72 | The number of left padding spaces to indent. 73 | """ 74 | 75 | super().print(indent) 76 | 77 | for argument in self.arguments: 78 | argument.print(indent=indent + 1) 79 | 80 | @override 81 | def generate_code( 82 | self, register: int, environment: dict[str, dict[int, str]] 83 | ) -> tuple[ 84 | list[dict[str, Union[int, str, float]]], 85 | int, 86 | dict[int, str] 87 | ]: 88 | """ 89 | Generate the code associated with this `FUNC_CALL`. 90 | 91 | For this node specialization, generate code from `argument` children 92 | nodes first, and then from the `FUNC_CALL` itself. 93 | 94 | Parameters 95 | ---------- 96 | register : int 97 | The number of the register to be used by the code generated by this 98 | Node. 99 | environment : dict[int, str] 100 | The compiler's environment, that maps variables IDs to memory 101 | addresses and function IDs to instructions indices. 102 | 103 | Returns 104 | ------- 105 | code : list of dict 106 | Return a list of dictionaries containing code metadata: the register 107 | to be used, the related `instruction` and its metadata. 108 | register : int 109 | The number of the next register available. 110 | environment : dict[int, str] 111 | The updated {var_id: address} environment mapping. 112 | """ 113 | 114 | code: list[dict] = [] 115 | arguments_registers: list[int] = [] 116 | 117 | for argument in self.arguments: 118 | ( 119 | argument_code, 120 | register, 121 | environment 122 | ) = argument.generate_code( 123 | register=register, 124 | environment=environment 125 | ) 126 | 127 | # Keep track of the registers containing the arguments values 128 | arguments_registers.append( 129 | argument_code[0].get("metadata").get("register") 130 | ) 131 | code.extend(argument_code) 132 | 133 | # The code for the function call itself is actually very simple! Just 134 | # jump-and-link (JAL), to keep track of the return address, and copy 135 | # the `returned_value_register` to `register`. 136 | func_call_code: list[dict[str, dict]] = [ 137 | {"instruction": "JAL", "metadata": {"value": self.value}}, 138 | { 139 | "instruction": "MOV", 140 | "metadata": { 141 | "register": register, 142 | "value": "ret_value", 143 | }, 144 | }, 145 | ] 146 | register += 1 147 | 148 | code.extend(func_call_code) 149 | 150 | return code, register, environment 151 | 152 | @override 153 | def certificate( 154 | self, 155 | certificator_env: dict[int, list[int]] 156 | ) -> dict[int, list[int]]: 157 | """ 158 | Compute the certificate of the current `FUNC_CALL`, and set this attribute. 159 | 160 | For `FUNC_CALL` nodes, certificate each `argument` child first, and 161 | then the `FUNC_CALL` itself. 162 | 163 | Parameters 164 | ---------- 165 | certificator_env : dict[int, list[int]] 166 | The certificators's environment, that maps variables IDs to 167 | encodings of their types. 168 | 169 | Returns 170 | ------- 171 | certificator_env : dict[int, list[int]] 172 | The updated certificator's environment, with any additional 173 | information about the variable's types it might have captured. 174 | """ 175 | 176 | for argument in self.arguments: 177 | certificator_env = argument.certificate(certificator_env) 178 | 179 | return super().certificate(certificator_env) 180 | 181 | def _build_children_nodes(self) -> list[Node]: 182 | arguments = self.function_call_metadata["arguments"] 183 | parameters_types: list[str] = [ 184 | param["type"] 185 | for param in ( 186 | self.function_call_metadata["called_function_metadata"][ 187 | "parameters" 188 | ].values() 189 | ) 190 | ] 191 | 192 | children_nodes: list[Node] = [] 193 | 194 | for idx, argument_metadata in enumerate(arguments): 195 | _is_variable = argument_metadata["variable"] 196 | _parameter_type = parameters_types[idx] 197 | 198 | if _is_variable: 199 | argument_value = VAR(variable_metadata=argument_metadata) 200 | 201 | else: 202 | argument_value = CST(constant_metadata=argument_metadata) 203 | 204 | new_node = ARG( 205 | argument_value=argument_value, parameter_type=_parameter_type 206 | ) 207 | 208 | children_nodes.append(new_node) 209 | 210 | return children_nodes 211 | -------------------------------------------------------------------------------- /tests/integration/test_operations.py: -------------------------------------------------------------------------------- 1 | """Integration test to showcase the available operations.""" 2 | 3 | import pytest 4 | 5 | from src.certificators import BackendCertificator, FrontendCertificator 6 | from src.runner import create_instance 7 | 8 | 9 | SOURCE_CODE = """ 10 | int {function_name}() {{ 11 | short var_1; 12 | var_1 = 10; 13 | 14 | int var_2; 15 | var_2 = 4; 16 | 17 | float var_3; 18 | var_3 = 2.3; 19 | 20 | short result_1; 21 | result_1 = var_1 {operator} var_2; 22 | 23 | short result_2; 24 | result_2 = var_1 {operator} var_3; 25 | 26 | return 0; 27 | }} 28 | 29 | int main() {{ 30 | {function_name}(); 31 | 32 | return 0; 33 | }} 34 | """ 35 | 36 | BITWISE_SOURCE_CODE = """ 37 | int {function_name}() {{ 38 | short var_1; 39 | var_1 = 11; 40 | 41 | int var_2; 42 | var_2 = 3; 43 | 44 | short result_1; 45 | result_1 = var_1 {operator} var_2; 46 | 47 | return 0; 48 | }} 49 | 50 | int main() {{ 51 | {function_name}(); 52 | 53 | return 0; 54 | }} 55 | """ 56 | 57 | 58 | @pytest.mark.parametrize( 59 | "test_suite", 60 | [ 61 | { 62 | "function_name": "addition", 63 | "operator": "+", 64 | "expected_memory": {"0x0": 10, "0x4": 4, "0x8": 2.3, "0xc": 14, "0x10": 12}, 65 | }, 66 | { 67 | "function_name": "subtraction", 68 | "operator": "-", 69 | "expected_memory": {"0x0": 10, "0x4": 4, "0x8": 2.3, "0xc": 6, "0x10": 7}, 70 | }, 71 | { 72 | "function_name": "multiplication", 73 | "operator": "*", 74 | "expected_memory": {"0x0": 10, "0x4": 4, "0x8": 2.3, "0xc": 40, "0x10": 23}, 75 | }, 76 | { 77 | "function_name": "division", 78 | "operator": "/", 79 | "expected_memory": {"0x0": 10, "0x4": 4, "0x8": 2.3, "0xc": 2, "0x10": 4}, 80 | }, 81 | { 82 | "function_name": "greater_than", 83 | "operator": ">", 84 | "expected_memory": {"0x0": 10, "0x4": 4, "0x8": 2.3, "0xc": 1, "0x10": 1}, 85 | }, 86 | { 87 | "function_name": "less_than", 88 | "operator": "<", 89 | "expected_memory": {"0x0": 10, "0x4": 4, "0x8": 2.3, "0xc": 0, "0x10": 0}, 90 | }, 91 | { 92 | "function_name": "equal", 93 | "operator": "==", 94 | "expected_memory": {"0x0": 10, "0x4": 4, "0x8": 2.3, "0xc": 0, "0x10": 0}, 95 | }, 96 | { 97 | "function_name": "not_equal", 98 | "operator": "!=", 99 | "expected_memory": {"0x0": 10, "0x4": 4, "0x8": 2.3, "0xc": 1, "0x10": 1}, 100 | }, 101 | { 102 | "function_name": "logical_and", 103 | "operator": "&&", 104 | "expected_memory": {"0x0": 10, "0x4": 4, "0x8": 2.3, "0xc": 1, "0x10": 1}, 105 | }, 106 | { 107 | "function_name": "logical_or", 108 | "operator": "||", 109 | "expected_memory": {"0x0": 10, "0x4": 4, "0x8": 2.3, "0xc": 1, "0x10": 1}, 110 | }, 111 | ], 112 | ) 113 | def test_operation(test_suite: dict) -> None: 114 | """Test an operation.""" 115 | 116 | test_parameters = { 117 | key: value 118 | for key, value in test_suite.items() 119 | if key in ["function_name", "operator"] 120 | } 121 | 122 | instance = create_instance(source_code=SOURCE_CODE.format(**test_parameters)) 123 | vm = instance.get_vm() 124 | vm.run() 125 | 126 | assert vm.get_memory() == test_suite["expected_memory"] 127 | 128 | 129 | @pytest.mark.parametrize( 130 | "test_suite", 131 | [ 132 | { 133 | "function_name": "left_shift", 134 | "operator": "<<", 135 | "expected_memory": {"0x0": 11, "0x4": 3, "0x8": 88}, 136 | }, 137 | { 138 | "function_name": "right_shift", 139 | "operator": ">>", 140 | "expected_memory": {"0x0": 11, "0x4": 3, "0x8": 1}, 141 | }, 142 | { 143 | "function_name": "bitwise_and", 144 | "operator": "&", 145 | "expected_memory": {"0x0": 11, "0x4": 3, "0x8": 3}, 146 | }, 147 | { 148 | "function_name": "bitwise_or", 149 | "operator": "|", 150 | "expected_memory": {"0x0": 11, "0x4": 3, "0x8": 11}, 151 | }, 152 | { 153 | "function_name": "module", 154 | "operator": "%", 155 | "expected_memory": {"0x0": 11, "0x4": 3, "0x8": 2}, 156 | }, 157 | ], 158 | ) 159 | def test_bitwise_operation(test_suite: dict) -> None: 160 | """Test a bit-wise operation.""" 161 | 162 | test_parameters = { 163 | key: value 164 | for key, value in test_suite.items() 165 | if key in ["function_name", "operator"] 166 | } 167 | 168 | instance = create_instance( 169 | source_code=BITWISE_SOURCE_CODE.format(**test_parameters) 170 | ) 171 | vm = instance.get_vm() 172 | vm.run() 173 | 174 | assert vm.get_memory() == test_suite["expected_memory"] 175 | 176 | 177 | @pytest.mark.parametrize( 178 | "test_suite", 179 | [ 180 | {"function_name": "addition", "operator": "+"}, 181 | {"function_name": "subtraction", "operator": "-"}, 182 | {"function_name": "multiplication", "operator": "*"}, 183 | {"function_name": "division", "operator": "/"}, 184 | {"function_name": "greater_than", "operator": ">"}, 185 | {"function_name": "less_than", "operator": "<"}, 186 | {"function_name": "equal", "operator": "=="}, 187 | {"function_name": "not_equal", "operator": "!="}, 188 | {"function_name": "logical_and", "operator": "&&"}, 189 | {"function_name": "logical_or", "operator": "||"}, 190 | ], 191 | ) 192 | def test_operation_certification(test_suite: dict) -> None: 193 | """Test the front and backend certification.""" 194 | 195 | test_parameters = { 196 | key: value 197 | for key, value in test_suite.items() 198 | if key in ["function_name", "operator"] 199 | } 200 | 201 | instance = create_instance(source_code=SOURCE_CODE.format(**test_parameters)) 202 | 203 | ast = instance.get_ast() 204 | frontend_certificate = FrontendCertificator(ast=ast).certificate() 205 | 206 | program = instance.get_program() 207 | backend_certificate = BackendCertificator(program=program).certificate() 208 | 209 | assert frontend_certificate == backend_certificate 210 | 211 | 212 | @pytest.mark.parametrize( 213 | "test_suite", 214 | [ 215 | {"function_name": "left_shift", "operator": "<<"}, 216 | {"function_name": "right_shift", "operator": ">>"}, 217 | {"function_name": "bitwise_and", "operator": "&"}, 218 | {"function_name": "bitwise_or", "operator": "|"}, 219 | {"function_name": "module", "operator": "%"}, 220 | ], 221 | ) 222 | def test_bitwise_operation(test_suite: dict) -> None: 223 | """Test the front and backend certification.""" 224 | 225 | test_parameters = { 226 | key: value 227 | for key, value in test_suite.items() 228 | if key in ["function_name", "operator"] 229 | } 230 | 231 | instance = create_instance( 232 | source_code=BITWISE_SOURCE_CODE.format(**test_parameters) 233 | ) 234 | 235 | ast = instance.get_ast() 236 | frontend_certificate = FrontendCertificator(ast=ast).certificate() 237 | 238 | program = instance.get_program() 239 | backend_certificate = BackendCertificator(program=program).certificate() 240 | 241 | assert frontend_certificate == backend_certificate 242 | -------------------------------------------------------------------------------- /src/ast_nodes/functions/FUNC_DEF.py: -------------------------------------------------------------------------------- 1 | """Representation of FUNC_DEF nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing import Union 4 | 5 | from typing_extensions import override 6 | 7 | from src.ast_nodes.node import Node 8 | from src.ast_nodes.basic.SEQ import SEQ 9 | from src.ast_nodes.functions.PARAM import PARAM 10 | 11 | 12 | class FUNC_DEF(Node): 13 | """ 14 | Implement the representation of a `FUNC_DEF` node for the AST. 15 | 16 | A `FUNC_DEF` is an abstraction of a function definition: it tracks its 17 | return type, parameters (and its types), and statements. 18 | 19 | The node itself only has children to help structuring the function 20 | definition, but doesn't have any semantics itself, `instruction`, or has a 21 | certificate. 22 | 23 | Parameters 24 | ---------- 25 | function_name : str 26 | The name of the function. 27 | variable_metadata : dict 28 | Dictionary of variable metadata exported by the Lexer. 29 | """ 30 | 31 | @override 32 | def __init__( 33 | self, function_name: str, function_metadata: dict[str, Union[str, dict]] 34 | ) -> None: 35 | type: str = function_metadata.get("type") 36 | super().__init__(function_name, type) 37 | 38 | self.parameters: list[PARAM] = self._define_vars_from_args( 39 | parameters=function_metadata.get("parameters") 40 | ) 41 | self.statements: SEQ = None 42 | 43 | def set_statements(self, statements: SEQ) -> None: 44 | """ 45 | Set the `statements` attribute. 46 | 47 | Parameters 48 | ---------- 49 | statements : SEQ 50 | The statements to be added to object. Must be a `SEQ` -- i.e., the 51 | root of a subtree with the statements of this function. 52 | """ 53 | 54 | self.statements = statements 55 | 56 | @override 57 | def get_certificate_label(self) -> list[str]: 58 | """ 59 | Get the contents of `certificate_label`. 60 | 61 | For `FUNC_DEF` nodes, obtain the certificates, recursively, starting 62 | from the `statements` attribute (i.e., a `SEQ` node). The 63 | `FUNC_DEF` node itself does not have a certificate. 64 | 65 | Returns 66 | ------- 67 | certificate : list of str 68 | A list containing the certificate label of this `FUNC_DEF`. 69 | """ 70 | 71 | certificate: list[str] = [] 72 | 73 | for parameter in self.parameters: 74 | certificate.extend(parameter.get_certificate_label()) 75 | 76 | certificate.extend(self.statements.get_certificate_label()) 77 | 78 | return certificate 79 | 80 | @override 81 | def print(self, indent: int = 0) -> None: 82 | """ 83 | Print the string representation of this `FUNC_DEF`. 84 | 85 | The node itself is aligned with `indent`, and its children are padded 86 | with an additional left space. 87 | 88 | Parameters 89 | ---------- 90 | indent : int (optional, default = 0) 91 | The number of left padding spaces to indent. 92 | """ 93 | 94 | super().print(indent) 95 | 96 | for parameter in self.parameters: 97 | parameter.print(indent + 1) 98 | 99 | self.statements.print(indent + 1) 100 | 101 | @override 102 | def generate_code( 103 | self, register: int, environment: dict[str, dict[int, str]] 104 | ) -> tuple[ 105 | list[dict[str, Union[int, str, float]]], 106 | int, 107 | dict[int, str] 108 | ]: 109 | """ 110 | Generate the code for this `FUNC_DEF`. 111 | 112 | For this node specialization, generate the code from its `parameters` 113 | and `statements`, recursively. The `FUNC_DEF` node itself does not have 114 | an associated instruction, nor uses registers. 115 | 116 | Parameters 117 | ---------- 118 | register : int 119 | The number of the register to be used by the code generated by this 120 | Node. 121 | environment : dict[int, str] 122 | The compiler's environment, that maps variables IDs to memory 123 | addresses and function IDs to instructions indices. 124 | 125 | Returns 126 | ------- 127 | code : list of dict 128 | Return a list of dictionaries containing code metadata: the register 129 | to be used, the related `instruction` and its metadata. 130 | register : int 131 | The number of the next register available. 132 | environment : dict[int, str] 133 | The updated {var_id: address} environment mapping. 134 | """ 135 | 136 | code: list[dict[str, Union[int, str]]] = [] 137 | 138 | for parameter in self.parameters: 139 | ( 140 | var_def_code, 141 | register, 142 | environment 143 | ) = parameter.generate_code( 144 | register=register, 145 | environment=environment 146 | ) 147 | code.extend(var_def_code) 148 | 149 | ( 150 | statements_code, 151 | register, 152 | environment 153 | ) = self.statements.generate_code( 154 | register=register, 155 | environment=environment 156 | ) 157 | 158 | code.extend(statements_code) 159 | 160 | return code, register, environment 161 | 162 | @override 163 | def certificate( 164 | self, 165 | certificator_env: dict[int, list[int]] 166 | ) -> dict[int, list[int]]: 167 | """ 168 | Compute the certificate of this `FUNC_DEF`. 169 | 170 | To achieve this, certificate its `parameters` and `statements`, 171 | recursively and in order. The `FUNC_DEF` node itself does not have a 172 | certificate. 173 | 174 | Parameters 175 | ---------- 176 | certificator_env : dict[int, list[int]] 177 | The certificators's environment, that maps variables IDs to 178 | encodings of their types. 179 | 180 | Returns 181 | ------- 182 | certificator_env : dict[int, list[int]] 183 | The updated certificator's environment, with any additional 184 | information about the variable's types it might have captured. 185 | """ 186 | 187 | for parameter in self.parameters: 188 | certificator_env = parameter.certificate(certificator_env) 189 | 190 | return self.statements.certificate(certificator_env) 191 | 192 | def _define_vars_from_args(self, parameters: dict[str, dict]) -> list[PARAM]: 193 | """ 194 | Create `PARAM` nodes to be contain the received parameters. 195 | 196 | Parameters 197 | ---------- 198 | parameters : dict 199 | A dict with variable metadata to generate `PARAM` nodes from. 200 | 201 | Returns 202 | ------- 203 | variables : list[PARAM] 204 | A list of `PARAM` nodes. Might be empty, if the function does not 205 | take any parameters. 206 | """ 207 | 208 | variables: list[PARAM] = [] 209 | 210 | for parameter_name, parameter_metadata in parameters.items(): 211 | variable_metadata = {"name": parameter_name, **parameter_metadata} 212 | 213 | variables.append(PARAM(variable_metadata=variable_metadata)) 214 | 215 | return variables 216 | 217 | def get_function_name(self) -> str: 218 | """ 219 | Get the name of this function. 220 | 221 | Returns 222 | ------- 223 | function_name : str 224 | The name of the function. 225 | """ 226 | 227 | function_name: str = self.value 228 | 229 | return function_name 230 | -------------------------------------------------------------------------------- /src/ast_nodes/conditionals/IFELSE.py: -------------------------------------------------------------------------------- 1 | """Representation of IFELSE nodes for the Abstract Syntax Tree.""" 2 | 3 | from typing import Union 4 | 5 | from typing_extensions import override 6 | 7 | from src.ast_nodes.node import Node 8 | from src.ast_nodes.conditionals.IF import IF 9 | from src.utils import SYMBOLS_MAP 10 | 11 | 12 | class IFELSE(IF): 13 | """ 14 | Implement the representation of a conditional for the AST. 15 | 16 | Parameters 17 | ---------- 18 | parenthesis_expression : Node 19 | The node representation of the expression to be evaluated. 20 | statement_if_true : Node 21 | The node representation of code to run if the `parenthesis_expression` 22 | evaluates to `True`. 23 | statement_if_false : Node 24 | The node representation of code to run if the `parenthesis_expression` 25 | evaluates to `False`. 26 | """ 27 | 28 | @override 29 | def __init__( 30 | self, 31 | parenthesis_expression: Node, 32 | statement_if_true: Node, 33 | statement_if_false: Node, 34 | ) -> None: 35 | super().__init__(parenthesis_expression, statement_if_true) 36 | 37 | self.statement_if_false: Node = statement_if_false 38 | 39 | # This will be set by the `certificate` method 40 | self.else_boundary_certificate = None 41 | 42 | @override 43 | def get_certificate_label(self) -> list[str]: 44 | """ 45 | Get the contents of `certificate_label`. 46 | 47 | For `IFELSE` nodes, first call the `Conditional.get_certificate_label` 48 | method, and compose it with the `certificate_label` obtained recursively 49 | from the `statement_if_false` subtree. 50 | 51 | Returns 52 | ------- 53 | : list of str 54 | A list containing the certificate label of the `Node`. 55 | """ 56 | 57 | return [ 58 | *super().get_certificate_label(), 59 | *self.statement_if_false.get_certificate_label(), 60 | self.else_boundary_certificate, 61 | ] 62 | 63 | @override 64 | def print(self, indent: int = 0) -> None: 65 | """ 66 | Print the string representation of this `Conditional`. 67 | 68 | The node itself is aligned with `indent`, and its children are padded 69 | with an additional left space. 70 | 71 | Parameters 72 | ---------- 73 | indent : int (optional, default = 0) 74 | The number of left padding spaces to indent. 75 | """ 76 | 77 | super().print(indent) 78 | 79 | self.statement_if_false.print(indent + 1) 80 | 81 | @override 82 | def generate_code( 83 | self, register: int, environment: dict[str, dict[int, str]] 84 | ) -> tuple[ 85 | list[dict[str, Union[int, str, float]]], 86 | int, 87 | dict[int, str] 88 | ]: 89 | """ 90 | Generate the code associated with this `IFELSE`. 91 | 92 | For this node specialization, generate code from the 93 | `parenthesis_expression` first, add a conditional jump to the first 94 | instruction of the `statement_if_false` subtree -- i.e., to jump to if 95 | the `parenthesis_expression` evaluates to `False` --, then generate 96 | code from the `statement_if_true`, add an unconditional jump to the 97 | last instruction of teh `statement_if_false` subtree, and then finally 98 | generate code from `statement_if_false`. 99 | 100 | Parameters 101 | ---------- 102 | register : int 103 | The number of the register to be used by the code generated by this 104 | Node. 105 | environment : dict[int, str] 106 | The compiler's environment, that maps variables IDs to memory 107 | addresses and function IDs to instructions indices. 108 | 109 | Returns 110 | ------- 111 | code : list of dict 112 | Return a list of dictionaries containing code metadata: the register 113 | to be used, the related `instruction` and its metadata. 114 | register : int 115 | The number of the next register available. 116 | environment : dict[int, str] 117 | The updated {var_id: address} environment mapping. 118 | """ 119 | 120 | ( 121 | parenthesis_expression_code, 122 | register, 123 | environment 124 | ) = self.parenthesis_expression.generate_code( 125 | register=register, 126 | environment=environment 127 | ) 128 | conditional_register: int = register - 1 129 | 130 | ( 131 | statement_if_true_code, 132 | register, 133 | environment 134 | ) = self.statement_if_true.generate_code( 135 | register=register, 136 | environment=environment 137 | ) 138 | 139 | ( 140 | statement_if_false_code, 141 | register, 142 | environment 143 | ) = self.statement_if_false.generate_code( 144 | register=register, 145 | environment=environment 146 | ) 147 | 148 | # The jump target is the amount of instructions in the 149 | # `statement_if_true` block (add 2 to land right after the unconditional 150 | # jump added later on) 151 | instructions_to_jump_over_if = len(statement_if_true_code) + 2 152 | conditional_jump = { 153 | "instruction": "JZ", 154 | "metadata": { 155 | "conditional_register": conditional_register, 156 | "jump_size": instructions_to_jump_over_if, 157 | }, 158 | } 159 | 160 | # The jump target is the amount of instructions in the 161 | # `statement_if_false` block (add 1 to land right after the last 162 | # instruction of the `statemente_if_false` block) 163 | instructions_to_jump_over_else = len(statement_if_false_code) + 1 164 | unconditional_jump = { 165 | "instruction": "JZ", 166 | "metadata": { 167 | "conditional_register": "zero", 168 | "jump_size": instructions_to_jump_over_else, 169 | }, 170 | } 171 | 172 | ifelse_code: list[dict[str, Union[int, str]]] = [ 173 | *parenthesis_expression_code, 174 | conditional_jump, 175 | *statement_if_true_code, 176 | unconditional_jump, 177 | *statement_if_false_code, 178 | ] 179 | 180 | return ifelse_code, register, environment 181 | 182 | @override 183 | def certificate( 184 | self, 185 | certificator_env: dict[int, list[int]] 186 | ) -> dict[int, list[int]]: 187 | """ 188 | Compute the certificate of the current `IFELSE`, and set this attribute. 189 | 190 | For `IFELSE` nodes, certificate the `parenthesis_expression`, 191 | recursively, and the `IFELSE` itself, and then the children 192 | `statement` nodes -- also recursively -- in order (i.e., the 193 | `statement_if_true` and then the `statement_if_false` subtrees). 194 | 195 | Parameters 196 | ---------- 197 | certificator_env : dict[int, list[int]] 198 | The certificators's environment, that maps variables IDs to 199 | encodings of their types. 200 | 201 | Returns 202 | ------- 203 | certificator_env : dict[int, list[int]] 204 | The certificators's environment, that maps variables IDs to 205 | encodings of their types. 206 | """ 207 | 208 | certificator_env = super().certificate(certificator_env) 209 | certificator_env = self.statement_if_false.certificate(certificator_env) 210 | 211 | _else_boundary_symbol = SYMBOLS_MAP["ELSE_END"] 212 | self.else_boundary_certificate = f"{_else_boundary_symbol}" 213 | 214 | return certificator_env 215 | --------------------------------------------------------------------------------