├── CMakeLists.txt ├── LICENSE ├── README.md ├── build.sh ├── buildlib.sh ├── docs ├── api.md ├── machines.md └── operations.md ├── include ├── api │ ├── carbon.h │ └── ops.h ├── arch │ ├── Z80 │ │ ├── generator.h │ │ └── optimizer.h │ ├── aarch64 │ │ ├── generator.h │ │ └── optimizer.h │ └── i386 │ │ ├── archconfig.h │ │ ├── generator.h │ │ └── optimizer.h ├── attributes.h ├── config.h ├── core.h ├── debug.h ├── errsys.h ├── function.h ├── generator.h ├── machine.h ├── oplist.h ├── optimizer │ ├── optimizer.h │ └── reggraph.h ├── parser.h ├── register.h ├── scanner.h ├── token.h └── type.h ├── src ├── api │ └── bindings.cpp ├── arch │ ├── aarch64 │ │ ├── codegen.cpp │ │ └── generator.cpp │ └── i386 │ │ ├── codegen.cpp │ │ ├── generator.cpp │ │ └── optimizer.cpp ├── attributes.cpp ├── errsys.cpp ├── function.cpp ├── generator │ ├── generate.cpp │ └── generator.cpp ├── machine.cpp ├── main.cpp ├── oplist.cpp ├── optimizer │ ├── optimizer.cpp │ └── regassign.cpp ├── parser.cpp ├── register.cpp ├── scanner.cpp ├── token.cpp └── type.cpp ├── test.sh └── tests ├── capi ├── Makefile ├── test0.c └── test1.c ├── test.py └── testfiles ├── test0.ir ├── test1.ir ├── test10.ir ├── test11.ir ├── test12.ir ├── test13.ir ├── test14.ir ├── test15.ir ├── test16.ir ├── test2.ir ├── test3.ir ├── test4.ir ├── test5.ir ├── test6.ir ├── test7.ir ├── test8.ir └── test9.ir /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0) 2 | 3 | project(carbon-ir) 4 | 5 | set(EXEC true CACHE BOOL "") 6 | 7 | set(CMAKE_BUILD_TYPE Debug) 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable") 9 | 10 | set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -s") 11 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s") 12 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Os") 13 | 14 | if (EXEC) 15 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address") 16 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__EXEC") 17 | endif() 18 | 19 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -pedantic -Wall -std=gnu++11") 20 | 21 | string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE) 22 | if (uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG") 23 | message("[Debugging mode enabled]") 24 | add_definitions(-DMODE_DEBUG) 25 | endif () 26 | 27 | include_directories(include) 28 | file(GLOB_RECURSE SOURCES "src/*.cpp") 29 | 30 | if (EXEC) 31 | add_executable(carbon-ir ${SOURCES}) 32 | else () 33 | add_library(carbon-ir STATIC ${SOURCES}) 34 | endif () -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Robbe De Greef 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Carbon Intermediate Representation 2 | The carbon intermediate representation or carbon for short 3 | is a architecture independent optimizer and code generator designed to 4 | easily connect the front end of a compiler to multiple backend architectures. 5 | 6 | Carbon can either be used as an standalone compiler via it's own assembly-like 7 | language or linked into your compiler and accessed via its C API. 8 | 9 | ## Build 10 | Before you build carbon, make sure you have 11 | - C++ 11 compiler 12 | - CMake >= 3.0 13 | - Bash compatible shell (though this is only to automate build processes etc) 14 | 15 | And to run tests you will also need python 3 16 | 17 | To build the `carbon-ir` compiler, in the main directory run 18 | `./build.sh`. 19 | 20 | To build the `libcarbon-ir.a` static library, in the main directory run 21 | `./buildlib.sh` 22 | 23 | ## Running the tests 24 | To run carbon's tests you will first need to build carbon-ir. 25 | Once you have the compiler you can just run `./test.sh` to run all tests 26 | at once and run `./test.sh testX` to run a specific test (change X to the 27 | number of the test you want to run). 28 | 29 | The C API testing framework is still being worked on so for now you can go into 30 | tests/capi/ and run the makefile to generate the test executables. 31 | 32 | ## Examples of the language 33 | The carbon-ir language is a very simple and staticly typed. 34 | 35 | This function will add 1 and 5 together and return its result 36 | 37 | function i32 main() 38 | { 39 | %0 = i32 1 40 | %1 = i32 5 41 | %2 = add i32 %0 %1 42 | return i32 %2 43 | } 44 | 45 | The C equivalent would then be: 46 | 47 | int main() 48 | { 49 | return 1 + 5; 50 | } 51 | 52 | However it could also very well be: 53 | 54 | int main() 55 | { 56 | int x = 1; 57 | int y = 5; 58 | return x + y; 59 | } 60 | 61 | As you can see when you compile C or any other language to carbon-ir 62 | you don't have to worry about allocating variables and carbon will 63 | handle all register allocation and spills for you. 64 | 65 | For all the other operations and info check the operations.md in the 66 | docs folder 67 | 68 | ## Usage of the compiler 69 | The most basic usage of carbon would be: 70 | 71 | carbon-ir ./file-to-compile.ir 72 | 73 | This will just generate a binary for the file specified called a.out 74 | in the default architecture (x86). However to change that name you 75 | can utilise the -o flag 76 | 77 | carbon-ir ./file-to-compile -o the-name 78 | 79 | If you would like to generate for a different machine type use 80 | the -m flag 81 | 82 | carbon-ir ./file-to-compile -o the-name -m aarch64 83 | 84 | Currently the architectures you can compile for are: 85 | x86, z80, aarch64 (armv8) and JVM but please check the machines.md 86 | file in the docs folder for more information on that. 87 | 88 | For more flags check the help flag 89 | 90 | carbon-ir --help 91 | 92 | ## Usage of the C API 93 | Here follows a simple example of the carbon C API. 94 | 95 | #include 96 | int main() 97 | { 98 | struct carbon carb = init_carbon("test", "x86"); 99 | struct cfunc func = c_create_func(carb, "main", CTYPE_I32), 0, 0); 100 | 101 | c_add_func_attribute(func, "global:true"); 102 | 103 | c_push_op(func, INTLIT, 1, -1, c_reg(func, 0, CTYPE_I32), CTYPE_I32); 104 | c_push_op(func, RETURN, c_reg(func, 0, CTYPE_I32), -1, -1, CTYPE_I32); 105 | 106 | /* Generate all the operations and build the executable */ 107 | c_gen_func(func); 108 | c_destroy_func(func); 109 | c_writeassembly(carb, "test.s"); 110 | end_carbon(carb); 111 | } 112 | 113 | The API is very simple and everything can be accessed by including the file. 114 | Most features of the carbon language are included in the API, if not, leave an issue and 115 | I will implement it as fast as I can. -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | if [ ! -d "build" ] 2 | then 3 | mkdir build 4 | fi 5 | 6 | cd build 7 | 8 | cmake .. -DEXEC:BOOL=true 9 | make 10 | mv carbon-ir ../ 11 | -------------------------------------------------------------------------------- /buildlib.sh: -------------------------------------------------------------------------------- 1 | if [ ! -d "build" ] 2 | then 3 | mkdir build 4 | fi 5 | 6 | cd build 7 | 8 | cmake .. -DEXEC:BOOL=false 9 | make 10 | mv libcarbon-ir.a ../ 11 | -------------------------------------------------------------------------------- /docs/api.md: -------------------------------------------------------------------------------- 1 | # The C API 2 | The C API is still in its very early stages and not complete 3 | but you can help expand it. 4 | 5 | 6 | Currently all API functions, structures and global variables are defined 7 | in the file. 8 | 9 | ## Basic example 10 | A basic example of the usage of the carbon C API. 11 | You can find more examples in the tests/capi/ folder. 12 | 13 | #include 14 | int main() 15 | { 16 | struct carbon carb = init_carbon("test", "x86"); 17 | struct cfunc func = c_create_func(carb, "main", CTYPE_I32), 0, 0); 18 | 19 | c_add_func_attribute(func, "global:true"); 20 | 21 | c_push_op(func, INTLIT, 1, -1, c_reg(func, 0, CTYPE_I32), CTYPE_I32); 22 | c_push_op(func, RETURN, c_reg(func, 0, CTYPE_I32), -1, -1, CTYPE_I32); 23 | 24 | c_gen_func(func); 25 | c_destroy_func(func); 26 | c_writeassembly(carb, "test.s"); 27 | end_carbon(carb); 28 | } 29 | 30 | ## Structures 31 | ### struct carbon 32 | The most important structure is the carbon structure. It holds the information 33 | about the current interface between your compiler and carbon. 34 | 35 | It can be created using the `init_carbon()` function as follows. 36 | 37 | struct carbon carb = init_carbon("infile-name", "x86"); 38 | 39 | The init functions takes two arguments, the first is the input file name. This is 40 | just for housekeeping (this way the generated assembly code can reference your 41 | source input source code file), you can leave it empty ("") if you want. 42 | The second one is the type of architecture you want to generate a binary for. 43 | The possible architectures are defined in machine.md in the docs/ folder. 44 | 45 | At the end of its life you may want to destroy the carbon structure so that 46 | its recourses can be freed. 47 | 48 | end_carbon(carb); 49 | 50 | ### struct cfunc 51 | The function struct is crutial to generate operations and actual code. Use the 52 | `c_create_func()` to create one. 53 | 54 | struct cfunc func = c_create_func(carb, "func-name", CTYPE_I32, argamount, arglist); 55 | 56 | This function takes quite a few arguments. The first is the carbon structure, the second is the name for the function, the third is its return type (see struct ctype for more info). Finally the last two are used to pass the amount of arguments and its types to the 57 | function. The first one being the amount and the second a pointer to an array of struct 58 | ctypes. Carbon will try to read the specified amount of arguments from the pointer. 59 | You can always leave the last two variables to zero if the function doesn't take any 60 | arguments. 61 | 62 | ### struct ctype 63 | Types in the C API are represented in the ctype struct. The struct currently 64 | has three fields 65 | 66 | struct ctype 67 | { 68 | int byte_size; /* The amount of bytes the type occupies in memory */ 69 | int ptr; /* If this is a pointer and if so how deep it references */ 70 | int is_float; /* If this type is a float or an integer */ 71 | }; 72 | 73 | Some default types are already defined for you in the carbon.h file. 74 | see. 75 | 76 | CTYPE_I64 77 | CTYPE_I32 78 | CTYPE_I16 79 | CTYPE_I8 80 | 81 | And more will be added in the future. 82 | 83 | ## Functions 84 | @todo -------------------------------------------------------------------------------- /docs/machines.md: -------------------------------------------------------------------------------- 1 | # Supported output machines 2 | 3 | Currently the x86 architecture is the most complete and 4 | supported. The other targets currently are Z80 and 5 | aarch64 but are nowhere near completion. 6 | 7 | You might notice that the arch/ folder also contains 8 | a JVM folder, this was just a test to compile carbon 9 | to a java VM however as expected that is not possible due 10 | to java's pointer abstractions. I left it in in case 11 | it might interest someone. 12 | 13 | ## X86 14 | The x86 generator will generate NASM assembly and link it 15 | with gcc. -------------------------------------------------------------------------------- /docs/operations.md: -------------------------------------------------------------------------------- 1 | # Simple carbon-ir documentation 2 | ## Basic 3 | ### Registers and integer literls 4 | Carbon makes use of a unlimited amount of registers. The operations in carbon 5 | can only accept registers as arguments, not plain integers. 6 | You can of course store a integer in a register first like this: 7 | 8 | REG = TYPE INTLIT-VALUE 9 | 10 | e.g. 11 | 12 | %0 = i32 3 13 | 14 | Don't worry about the overhead, carbon will handle and optimize that for you 15 | (or at least that's the goal). 16 | 17 | ### Comments 18 | Carbon uses line comments with the same syntax as python. The hashtag (pound) symbol (#) 19 | is used to denote a comment. 20 | 21 | e.g. 22 | 23 | %0 = i32 5 # This line will be executed but the next one won't 24 | # %1 = i32 1 25 | 26 | ## Binary operations 27 | The binary operations currently are: `add` `sub` `mul` `div` and `mod` 28 | 29 | REG = OPERATION TYPE REG REG 30 | 31 | e.g. 32 | 33 | %2 = add i32 %0 %1 34 | 35 | The binary operations will store the result of their operation in the returning 36 | register. The types of all registers must be the same and equal to the type specified after the instruction. If the returning register is initialized here, it's type will 37 | be set to the type of the other registers. 38 | 39 | ### add 40 | The add operation will add the content of the first register to that of the second register. 41 | 42 | C equivalent: `c = a + b`; 43 | 44 | ### sub 45 | The sub operation will subtract the content the second register from that of the first register. 46 | 47 | C equivalent: `c = a - b`; 48 | 49 | ### mul 50 | The mul operation will multiply the content of the first register with that of the 51 | second register. 52 | 53 | C equivalent: `c = a * b`; 54 | 55 | ### div 56 | The div operation will divide the content of the first register with that of the 57 | second register. 58 | 59 | C equivalent: `c = a / b`; 60 | 61 | ### mod 62 | The mod operation will divide the content of the first register with that of the second 63 | register and return the remainder to the returning register. 64 | 65 | C equivalent: `c = a % b`; 66 | 67 | ## Memory operations 68 | ### alloca 69 | The alloca keyword will allocate a specified amount of entries on the stack with the size 70 | specified by the type requested. 71 | 72 | e.g. 73 | 74 | %0 = i32 1 75 | %1 = alloca i32 %0 76 | 77 | This example would allocate the space for 1 i32 on the stack (4 bytes). 78 | The alloca operation then stores a pointer to the start of the array in the returning 79 | register. The pointer's type will be the same specified by alloca (i32 in the example). 80 | 81 | ### store 82 | The store operation can be used to store a value in the memory pointed to by a pointer. 83 | 84 | e.g. 85 | 86 | %0 = i32 1 87 | %1 = alloca i32 %0 88 | %2 = i32 5 89 | store i32* %1 %2 90 | 91 | This example will first create a pointer that points to a location on the stack, 92 | and then load the value 5 into that location. 93 | 94 | The store operation accepts either a global variable identifier or a register as 95 | its first argument. 96 | 97 | ### load 98 | The load operation will load a value pointed to by a pointer into a register. 99 | 100 | e.g. 101 | 102 | %0 = i32 1 103 | %1 = alloca i32 %0 104 | %2 = i32 5 105 | store i32* %1 %2 106 | 107 | %3 = load i32* %1 108 | 109 | We built on the example just above and now the register 3 will hold the value 5. 110 | 111 | The load operation accepts either a global variable identifier or a register as 112 | its argument. 113 | 114 | ### Global arrays (and variables) 115 | Global identifiers always start with an at (@) symbol followed by the identifier itself. 116 | Global arrays syntax can be described as follows: 117 | 118 | @globalname = [TYPE AMOUNT] OPTIONAL-INITIALIZER-LIST 119 | 120 | e.g. 121 | 122 | @globarray = [i32 4] 1 2 3 4 123 | 124 | This example will allocate 4 32 bit integers and initialize them with 1, 2, 3 and 4. 125 | 126 | ### Strings 127 | Strings can be used in an initializer list as shown below. 128 | 129 | e.g. 130 | 131 | @str1 = [i8 13] "Hello world" 10 0 132 | 133 | ## Functions 134 | ### Defining a function 135 | The function definition syntax is as follows. 136 | 137 | function TYPE NAME(OPTIONAL-ARGUMENTS) 138 | { 139 | } 140 | 141 | e.g. 142 | 143 | function i32 main(i32 %0 i8* %1) 144 | { 145 | } 146 | 147 | The example shows a typical main function. 148 | 149 | ### return keyword 150 | The return keyword accepts a register as argument to return from the function. 151 | 152 | e.g. 153 | 154 | %0 = i32 5 155 | return i32 %0 156 | 157 | The type of the return, the type of the function and the type of the register must 158 | all match. 159 | 160 | ### call operation 161 | The call operation can invoke a specified function and store its return value in a register. 162 | 163 | e.g. 164 | 165 | %0 = i32 7 166 | %1 = call i32 foo(%0) 167 | 168 | In this example the function foo is called with one argument (%0) and the result of 169 | the function will be stored in register 1. The type of the function and the call 170 | operation have to match. 171 | 172 | ## Flowcontrol and comparison 173 | ### Labels 174 | Labels can be used by control flow operations like jmp or jmpcmp to "jump" to the 175 | label and start executing code from there. 176 | 177 | Defining a label is as easy as writing 178 | LABELNAME: 179 | 180 | e.g. 181 | 182 | labelname: 183 | jmp labelname 184 | 185 | This example will create an infinite loop (see the jmp operation section) 186 | 187 | ### cmp operation 188 | The cmp operation will compare the first register with the second and will, based on 189 | the comparison operation specified, store 1 (true) or 0 (false) in the resulting register 190 | 191 | e.g. 192 | 193 | %0 = i32 5 194 | %1 = i32 2 195 | %2 = cmp g i32 %0 %1 196 | 197 | Register 2 will hold (1) because the value in register 0 is greater than the value 198 | in register 1. 199 | 200 | The comparison operations are `eq` `neq` `l` `le` `g` `ge` 201 | meaning: equal, not equal, less than, less than or equal to, greater than, greater than or equal to, respectively. 202 | 203 | ### jmp operation 204 | The jmp operation will just move the instructions execution point to the specified label. 205 | 206 | e.g. 207 | 208 | label0: 209 | jmp label0 210 | 211 | The example above will generate an infinite loop 212 | 213 | ### jmpcond operation 214 | This operation is an (as the name suggests) jump and comparison operation in one. 215 | it will preform a comparison and jump to the specified label if the condition 216 | evaluates to true. 217 | 218 | see the cmp operation section for more details on the comparison features. 219 | 220 | e.g. 221 | 222 | %0 = i32 4 223 | %1 = i32 50 224 | jmpcmp neq %0 %1 label0 225 | 226 | # this code will not be executed 227 | %0 = i32 1 228 | 229 | label0: 230 | # Code that actually will be executed here 231 | -------------------------------------------------------------------------------- /include/api/carbon.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #ifdef __cplusplus 8 | extern "C" 9 | { 10 | #endif 11 | struct carbon 12 | { 13 | const char *infile; 14 | struct Parser *__parser; 15 | }; 16 | 17 | struct cfunc 18 | { 19 | const char *name; 20 | int findx; 21 | struct OpList *__statements; 22 | struct Parser *__parser; 23 | }; 24 | 25 | struct ctype 26 | { 27 | int byte_size; 28 | int ptr; 29 | int is_float; 30 | }; 31 | 32 | extern struct ctype __i64_type; 33 | extern struct ctype __i32_type; 34 | extern struct ctype __i16_type; 35 | extern struct ctype __i8_type; 36 | 37 | #define CTYPE_I64 __i64_type 38 | #define CTYPE_I32 __i32_type 39 | #define CTYPE_I16 __i16_type 40 | #define CTYPE_I7 __i8_type 41 | 42 | /* Initialisation and end of carbon */ 43 | struct carbon init_carbon(const char *infile, const char *arch); 44 | void end_carbon(struct carbon carb); 45 | 46 | /* Generation functions */ 47 | void c_gen_glob(struct carbon carb, const char *name, struct ctype t, int init_count, void *initlist); 48 | void c_gen_func(struct cfunc func); 49 | void c_push_op(struct cfunc func, int op, int ar1, int ar2, int ret, struct ctype t); 50 | void c_push_op_id(struct cfunc func, int op, int ar1, int ar2, int ret, const char *id, struct ctype t); 51 | 52 | int c_reg(struct cfunc f, int r, struct ctype t); 53 | 54 | /* Create a function object */ 55 | struct cfunc c_create_func(struct carbon carb, const char *name, struct ctype rettype, 56 | int argc, struct ctype *arglist); 57 | void c_destroy_func(struct cfunc func); 58 | 59 | /* Add attribute functions */ 60 | void c_add_func_attribute(struct cfunc f, const char *attrib); 61 | 62 | /* Write carbon's output to file */ 63 | int c_writeassembly(struct carbon carb, const char *file); 64 | int c_assemble(struct carbon carb, const char *infile, const char *outfile, const char *assembler); 65 | int c_link(struct carbon carb, const char *infile, const char *outfile, const char *linker); 66 | 67 | #ifdef __cplusplus 68 | } 69 | #endif -------------------------------------------------------------------------------- /include/api/ops.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include -------------------------------------------------------------------------------- /include/arch/Z80/generator.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | enum 6 | { 7 | BC, 8 | DE, 9 | HL, 10 | 11 | REGAMOUNT24 12 | }; 13 | 14 | enum 15 | { 16 | A, 17 | B, 18 | C, 19 | D, 20 | E, 21 | H, 22 | L, 23 | 24 | REGAMOUNT8 25 | }; 26 | 27 | class GeneratorZ80 : public Generator 28 | { 29 | private: 30 | std::string m_registers24[REGAMOUNT24] = {"bc", "de", "hl"}; 31 | std::string m_registers8[REGAMOUNT8] = {"a", "b", "c", "d", "e", "h", "l"}; 32 | 33 | protected: 34 | void genIntlitLoad(Type t, int val, Register ret) {} 35 | void genGlobLoad(Type t, std::string glob, Register ret) {} 36 | void genMul(Type t, Register r1, Register r2, Register ret) {} 37 | void genAdd(Type t, Register r1, Register r2, Register ret) {} 38 | void genSub(Type t, Register r1, Register r2, Register ret) {} 39 | void genDiv(Type t, Register r1, Register r2, Register ret) {} 40 | void genMod(Type t, Register r1, Register r2, Register ret) {} 41 | void genReg(Type t, Register r, Register ret) {} 42 | 43 | void genAlloca(Type t, Register r, Register ret) {} 44 | void genStore(Type t, Register r1, Register r2) {} 45 | void genLoad(Type t, Register r, Register ret) {} 46 | void genLoad(Type t, std::string glob, Register ret) {} 47 | void genSpillLoad(Type t, Register r, Register ret) {} 48 | void genSpillStore(Type t, Register r, Register ret) {} 49 | void genFunctionCall(Type t, std::string function, Register ret, std::vector args) {} 50 | void genReturn(Type t, Register ret) {} 51 | void genJmpCond(Type t, int op, Register r1, Register r2, std::string label) {} 52 | void genLabel(std::string label) {} 53 | void genJmp(std::string label) {} 54 | void genCmp(Type t, int op, Register r1, Register r2, Register ret) {} 55 | 56 | public: 57 | void genFunction(Type t, Function f) {} 58 | void genGlobalVariable(std::string name, ArrayType t, std::vector init) {} 59 | void genSetupFile(std::string file) {} 60 | void genExternSymbol(std::string sym) {} 61 | 62 | int registerAmount() { return REGAMOUNT24; } 63 | bool shouldAllocateRegisters() { return true; } 64 | 65 | int assemble(std::string infile, std::string outfile, std::string assembler) { return 0; } 66 | int link(std::string infile, std::string outfile, std::string linker) { return 0; } 67 | 68 | }; -------------------------------------------------------------------------------- /include/arch/Z80/optimizer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | class OptimizerZ80 : public Optimizer 7 | { 8 | protected: 9 | // bool graphCheck(RegisterGraph &graph); 10 | // void prepAssignRegisters(OpList &list); 11 | }; -------------------------------------------------------------------------------- /include/arch/aarch64/generator.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #define REGAMOUNT 7 6 | 7 | class GeneratorAARCH64 : public Generator 8 | { 9 | 10 | private: 11 | /// Helpers 12 | void writeInst(std::string, std::string, std::string, std::string); 13 | void writeInst(std::string, std::string, std::string); 14 | void writeInst(std::string, std::string); 15 | void writeInst(std::string, int); 16 | void writeInst(std::string); 17 | void writeMov(std::string r1, std::string r2); 18 | std::string registerToString(Register r); 19 | 20 | protected: 21 | void genIntlitLoad(Type t, int val, Register ret); 22 | void genGlobLoad(Type t, std::string glob, Register ret); 23 | void genMul(Type t, Register r1, Register r2, Register ret); 24 | void genAdd(Type t, Register r1, Register r2, Register ret); 25 | void genSub(Type t, Register r1, Register r2, Register ret); 26 | void genDiv(Type t, Register r1, Register r2, Register ret); 27 | void genMod(Type t, Register r1, Register r2, Register ret); 28 | void genReg(Type t, Register r, Register ret); 29 | 30 | void genAlloca(Type t, Register r, Register ret); 31 | void genStore(Type t, Register r1, Register r2); 32 | void genLoad(Type t, Register r, Register ret); 33 | void genLoad(Type t, std::string glob, Register ret); 34 | void genSpillLoad(Type t, Register r, Register ret); 35 | void genSpillStore(Type t, Register r, Register ret); 36 | void genFunctionCall(Type t, std::string function, Register ret, std::vector args); 37 | void genReturn(Type t, Register ret); 38 | void genJmpCond(Type t, int op, Register r1, Register r2, std::string label); 39 | void genLabel(std::string label); 40 | void genJmp(std::string label); 41 | void genCmp(Type t, int op, Register r1, Register r2, Register ret); 42 | 43 | public: 44 | void genFunction(Type t, Function f); 45 | void genGlobalVariable(std::string name, ArrayType t, std::vector init); 46 | void genSetupFile(std::string file); 47 | void genExternSymbol(std::string sym); 48 | 49 | int registerAmount() { return REGAMOUNT; } 50 | 51 | int assemble(std::string infile, std::string outfile, std::string assembler); 52 | int link(std::string infile, std::string outfile, std::string linker); 53 | 54 | }; -------------------------------------------------------------------------------- /include/arch/aarch64/optimizer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | class OptimizerAARCH64 : public Optimizer 7 | { 8 | protected: 9 | // bool graphCheck(RegisterGraph &graph); 10 | // void prepAssignRegisters(OpList &list); 11 | }; -------------------------------------------------------------------------------- /include/arch/i386/archconfig.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define TWO_OPERANT_ASSEMBLY true 4 | -------------------------------------------------------------------------------- /include/arch/i386/generator.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #define DATASECTION 1 6 | 7 | namespace x86 8 | { 9 | enum 10 | { 11 | EAX, 12 | EBX, 13 | ECX, 14 | EDX, 15 | // ESI, 16 | // EDI, 17 | 18 | REGAMOUNT 19 | }; 20 | 21 | /// IDk if this is really necessary 22 | enum 23 | { 24 | E, 25 | NE, 26 | LE, 27 | L, 28 | GE, 29 | G, 30 | 31 | CMPOPAMOUNT 32 | }; 33 | } // namespace x86 34 | 35 | using namespace x86; 36 | 37 | class GeneratorX86 : public Generator 38 | { 39 | private: 40 | std::string m_loByteRegs[REGAMOUNT] = {"al", "bl", "cl", "dl"}; 41 | std::string m_shortRegs[REGAMOUNT] = {"ax", "bx", "cx", "dx"}; 42 | std::string m_registers[REGAMOUNT] = {"eax", "ebx", "ecx", "edx" /*, "esi", "edi" */}; 43 | std::string m_cmpOps[CMPOPAMOUNT] = {"e", "ne", "le", "l", "ge", "g"}; 44 | int m_textSection = DATASECTION + 1; 45 | 46 | private: 47 | void writeInst(std::string inst); 48 | void writeInst(std::string inst, std::string arg); 49 | void writeInst(std::string inst, std::string dest, std::string src); 50 | void writeMov(std::string dest, std::string src); 51 | 52 | private: 53 | bool physInUse(int reg); 54 | void genIDiv(Type t, Register r1, Register r2, Register ret, std::string out); 55 | 56 | protected: 57 | void genIntlitLoad(Type t, int val, Register ret); 58 | void genGlobLoad(Type t, std::string glob, Register ret); 59 | void genMul(Type t, Register r1, Register r2, Register ret); 60 | void genAdd(Type t, Register r1, Register r2, Register ret); 61 | void genSub(Type t, Register r1, Register r2, Register ret); 62 | void genDiv(Type t, Register r1, Register r2, Register ret); 63 | void genMod(Type t, Register r1, Register r2, Register ret); 64 | void genReg(Type t, Register r1, Register ret); 65 | 66 | void genAlloca(Type t, Register r, Register ret); 67 | void genStore(Type t, Register r1, Register r2); 68 | void genStore(Type t, std::string glob, Register r2); 69 | void genLoad(Type t, Register r, Register ret); 70 | void genLoad(Type t, std::string glob, Register ret); 71 | 72 | void genFunctionCall(Type t, std::string function, Register ret, std::vector args); 73 | void genSpillLoad(Type t, Register r, Register ret); 74 | void genSpillStore(Type t, Register r, Register ret); 75 | void genReturn(Type t, Register ret); 76 | void genJmpCond(Type t, int op, Register r1, Register r2, std::string label); 77 | void genLabel(std::string label); 78 | void genJmp(std::string label); 79 | void genCmp(Type t, int op, Register r1, Register r2, Register ret); 80 | std::string registerToString(Register r, std::string *list = nullptr); 81 | 82 | public: 83 | void genSetupFile(std::string file); 84 | void genFunction(Function func); 85 | void genGlobalVariable(std::string name, ArrayType t, std::vector init); 86 | void genExternSymbol(std::string sym); 87 | 88 | int registerAmount() { return REGAMOUNT; } 89 | 90 | int assemble(std::string infile, std::string outfile, std::string assembler); 91 | int link(std::string infile, std::string outfile, std::string linker); 92 | 93 | /// This function is currently only used to subtract from esp in the beginning 94 | /// of code generation. 95 | void feedGenerate(OpList list); 96 | }; -------------------------------------------------------------------------------- /include/arch/i386/optimizer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | class OptimizerX86 : public Optimizer 7 | { 8 | protected: 9 | bool graphCheck(RegisterGraph &graph); 10 | void prepAssignRegisters(OpList &list); 11 | }; -------------------------------------------------------------------------------- /include/attributes.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | class Attributes : public std::vector 6 | { 7 | public: 8 | std::string getValueOf(std::string); 9 | int getBoolValueOf(std::string); 10 | }; -------------------------------------------------------------------------------- /include/config.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define FILE_TEMP_LIMIT 8 4 | #define SCANNER_IDENTIFIER_LIMMIT 512 5 | #define LARGEINT uint64_t 6 | 7 | #define DEFAULT_MACHINE "x86" -------------------------------------------------------------------------------- /include/core.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #define __CLR_END "\033[0m" 15 | #define __CLR_YELLOW "\033[33;1m" 16 | #define __CLR_RED "\033[31;1m" 17 | #define __CLR_BOLDWHITE "\033[37;1m" 18 | #define BOLD(x) __CLR_BOLDWHITE x __CLR_END 19 | 20 | #include 21 | extern ErrorSystem g_errsys; -------------------------------------------------------------------------------- /include/debug.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #ifdef MODE_DEBUG 6 | #define __DW(x) \ 7 | do \ 8 | { \ 9 | x; \ 10 | } while (0) 11 | #define __STRINGIFY(x) #x 12 | #define __TOSTR(x) __STRINGIFY(x) 13 | 14 | #define __DBGSTR __CLR_YELLOW "[ DEBUG ] " __CLR_END 15 | #define __DBGSTRR __CLR_RED "[ DEBUG ] " __CLR_END 16 | #define __ASSRT_FAIL(x) __CLR_RED "[ ASSERTION FAILED ] " __CLR_BOLDWHITE "\"" #x "\"" __CLR_END " at line " __TOSTR(__LINE__) " in file " __FILE__ "\n" 17 | 18 | /// Only these are allowed to be used in other source files 19 | 20 | #define dbg_print(x) __DW(std::cout << __DBGSTR << x << "\n") 21 | #define dbg_print_r(x) __DW(std::cout << __DBGSTRR << x << "\n") 22 | #define dbg_printf(...) __DW(printf(__VA_ARGS__)) 23 | #define dbg_assert(x) __DW(if (!(x)) dbg_printf(__ASSRT_FAIL(x))) 24 | #define dbg_call(x) x 25 | 26 | #else 27 | 28 | #define dbg_print(x) 29 | #define dbg_print_r(x) 30 | #define dbg_printf(...) 31 | #define dbg_assert(x) 32 | #define dbg_call(x) 33 | 34 | #endif -------------------------------------------------------------------------------- /include/errsys.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | class ErrorSystem 6 | { 7 | public: 8 | void fatal(std::string reason) __attribute__((noreturn)); 9 | void syntaxError(std::string reason) __attribute__((noreturn)); 10 | }; -------------------------------------------------------------------------------- /include/function.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | class Function 7 | { 8 | private: 9 | std::string m_name; 10 | Type m_retType; 11 | std::vector m_args; 12 | Attributes m_attributes; 13 | 14 | public: 15 | Function(std::string name, Type t); 16 | std::string name() { return m_name; } 17 | Type retType() { return m_retType; } 18 | std::vector &args() { return m_args; } 19 | Attributes &attributes() { return m_attributes; } 20 | void setAttributes(Attributes attrs) { m_attributes = attrs; } 21 | }; -------------------------------------------------------------------------------- /include/generator.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | /// The base generator class will need to be inherited by your architecture 9 | /// dependant generator class. You will need to implement some functionality 10 | /// like for example some gen* functions. 11 | class Generator 12 | { 13 | protected: 14 | std::vector m_internalOutBuf; 15 | 16 | RegisterList m_regData; 17 | int m_opLine; 18 | 19 | protected: 20 | /// @todo: all of these functions should return nothing? and also arguments are useless now ay? 21 | /// All of these next functions MUST be overriden in the architecture 22 | /// dependant generator class 23 | virtual void genIntlitLoad(Type t, int val, Register ret) {} 24 | virtual void genGlobLoad(Type t, std::string glob, Register ret) {} 25 | virtual void genMul(Type t, Register r1, Register r2, Register ret) {} 26 | virtual void genAdd(Type t, Register r1, Register r2, Register ret) {} 27 | virtual void genSub(Type t, Register r1, Register r2, Register ret) {} 28 | virtual void genDiv(Type t, Register r1, Register r2, Register ret) {} 29 | virtual void genMod(Type t, Register r1, Register r2, Register ret) {} 30 | virtual void genReg(Type t, Register r, Register ret) {} 31 | 32 | virtual void genAlloca(Type t, Register r, Register ret) {} 33 | virtual void genStore(Type t, Register r1, Register r2) {} 34 | virtual void genStore(Type t, std::string glob, Register r2) {} 35 | virtual void genLoad(Type t, Register r, Register ret) {} 36 | virtual void genLoad(Type t, std::string glob, Register ret) {} 37 | virtual void genSpillLoad(Type t, Register r, Register ret) {} 38 | virtual void genSpillStore(Type t, Register r, Register ret) {} 39 | virtual void genFunctionCall(Type t, std::string function, Register ret, std::vector args) {} 40 | virtual void genReturn(Type t, Register ret) {} 41 | virtual void genJmpCond(Type t, int op, Register r1, Register r2, std::string label) {} 42 | virtual void genLabel(std::string label) {} 43 | virtual void genJmp(std::string label) {} 44 | virtual void genCmp(Type t, int op, Register r1, Register r2, Register ret) {} 45 | 46 | public: 47 | virtual void genFunction(Function f) {} 48 | virtual void genGlobalVariable(std::string name, ArrayType t, std::vector init) {} 49 | virtual void genSetupFile(std::string file) {} 50 | virtual void genExternSymbol(std::string sym) {} 51 | 52 | /// These functions are used to control carbon's register allocation system. 53 | /// If you want carbon to allocate the registers for you, you must return 54 | /// the amount of physical registers that you have available in the registerAmount() 55 | /// function. If not, just return false from the shouldAllocateRegisters() function. 56 | virtual int registerAmount() { return 0; } 57 | virtual bool shouldAllocateRegisters() { return true; } 58 | 59 | /// These two functions will need to implement the assembling and the linking 60 | /// of the binary. If the assembler string is empty use the default assembler for 61 | /// your platform, if it is not that means that an external assembler was specified 62 | /// by the user. The same goes for the linker. 63 | virtual int assemble(std::string infile, std::string outfile, std::string assembler) { return 0; } 64 | virtual int link(std::string infile, std::string outfile, std::string linker) { return 0; } 65 | 66 | public: 67 | /// This is the base generate function, this can be overriden in the architecture 68 | /// dependant generator class but it is not required. Basically unless you need 69 | /// too (because the generator does not fullfill the needs of your architecture) 70 | /// you can just use the one we provide. 71 | virtual void generate(OpQuad *tree); 72 | 73 | /// This is the base feed function, it does no optimization. Meaning it just 74 | /// passes every quadtriple operation in the OpList to the generate class. 75 | /// You can override this class and implement some form of final architecture 76 | /// optimization. 77 | virtual void feedGenerate(OpList list); 78 | 79 | public: 80 | Generator() {} 81 | 82 | void setRegList(RegisterList list); 83 | int writeOutfile(std::string outfile); 84 | Register &findReg(int r); 85 | 86 | public: 87 | /// Helpers 88 | void write(std::string); 89 | void insert(std::string, int line); 90 | }; -------------------------------------------------------------------------------- /include/machine.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | std::tuple getMachine(std::string machine); -------------------------------------------------------------------------------- /include/oplist.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef __cplusplus 4 | #include 5 | #include 6 | #include 7 | #endif 8 | 9 | static const char *OpNames[] = 10 | { 11 | "NULL", "reg", "intlit", "identifier", 12 | "add", "sub", "mul", "div", "mod", 13 | "call", "alloca", "store", "load", 14 | "jmp", "jmpcond", "cmp", 15 | "return", 16 | "function", 17 | "destroy register", 18 | "spill", "spillload", 19 | 20 | "assign" 21 | }; 22 | 23 | enum OpTypes 24 | { 25 | REG = 1, 26 | INTLIT, 27 | IDENTIFIER, 28 | ADD, 29 | SUB, 30 | MUL, 31 | DIV, 32 | MOD, 33 | CALL, 34 | ALLOCA, 35 | STORE, 36 | LOAD, 37 | JMP, 38 | JMPCOND, 39 | CMP, 40 | RETURN, 41 | FUNCTION, 42 | LABEL, 43 | DESTROYREG, 44 | SPILL, 45 | SPILLLOAD, 46 | SPILLSTORE, 47 | GLOB, 48 | 49 | ASSIGN 50 | }; 51 | 52 | #ifdef __cplusplus 53 | 54 | class OpQuad 55 | { 56 | private: 57 | int m_operation = -1; 58 | int m_arg1 = -1; 59 | int m_arg2 = -1; 60 | int m_return = -1; 61 | 62 | /// @todo: doesn't this kinda break the name opQUAD 63 | int m_extra = -1; 64 | std::vector m_args; 65 | 66 | Type m_type; 67 | 68 | std::string m_identifier; 69 | 70 | public: 71 | OpQuad(int op, Type t); 72 | OpQuad(int op, std::string id); 73 | OpQuad(int op, int arg1, Type t); 74 | OpQuad(int op, int arg1, int arg2, int ret, Type t); 75 | 76 | int operation() { return m_operation; } 77 | int arg1() { return m_arg1; } 78 | int arg2() { return m_arg2; } 79 | int ret() { return m_return; } 80 | int extra() { return m_extra; } 81 | std::vector extraArgs() { return m_args; } 82 | Type type() { return m_type; } 83 | std::string identifier() { return m_identifier; } 84 | 85 | void setReturn(int reg) { m_return = reg; } 86 | void setArg1(int r) { m_arg1 = r; } 87 | void setArg2(int r) { m_arg2 = r; } 88 | void setIdentifier(std::string s) { m_identifier = s; } 89 | void setExtra(int extra) { m_extra = extra; } 90 | void setExtraArgs(std::vector args) { m_args = args; } 91 | 92 | static int tokToOp(int tok); 93 | }; 94 | 95 | class OpList : public std::vector 96 | { 97 | private: 98 | RegisterList m_list; 99 | 100 | private: 101 | void updateRegisterLifetime(OpQuad *op, int line); 102 | 103 | public: 104 | void setRegList(RegisterList list) { m_list = list; } 105 | RegisterList ®List() { return m_list; } 106 | 107 | void push_newregs(RegisterList &list); 108 | void push_back(OpQuad *quad); 109 | void insert(int line, OpQuad *quad); 110 | 111 | /// Removes all the OpQuad pointers in the OpList. 112 | void destroy(); 113 | 114 | #ifdef MODE_DEBUG 115 | void print() 116 | { 117 | } 118 | #endif 119 | }; 120 | 121 | #endif -------------------------------------------------------------------------------- /include/optimizer/optimizer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | class Optimizer 8 | { 9 | private: 10 | OpList twoOpOptimize(OpList); 11 | 12 | protected: 13 | /// This function will run after the register graph was made and colored, 14 | /// you can insert spills in this function and if the function returns 15 | /// true the graphcoloring will again be ran. 16 | virtual bool graphCheck(RegisterGraph &graph) { return false; } 17 | 18 | /// This function will run before the register graph is build. 19 | virtual void prepAssignRegisters(OpList &list) {} 20 | 21 | /// This flag will optimize the code in a way that will result in less 22 | /// code spills for architectures with two operant opcodes like x86 etc. 23 | virtual bool twoRegisterOperants() { return true; } 24 | 25 | public: 26 | void assignRegisters(OpList &list, Generator *gen); 27 | OpList optimize(OpList list); 28 | }; -------------------------------------------------------------------------------- /include/optimizer/reggraph.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | class RegisterNode 8 | { 9 | private: 10 | Register *m_reg; 11 | std::list m_adj; 12 | 13 | public: 14 | RegisterNode(Register *reg) { m_reg = reg; } 15 | Register *reg() { return m_reg; } 16 | std::list &adj() { return m_adj; } 17 | 18 | int calcLife() { return m_reg->lifetime(); } 19 | int virt() { return m_reg->virt(); } 20 | }; 21 | 22 | class RegisterGraph 23 | { 24 | private: 25 | std::vector m_list; 26 | int m_physRegAmount; 27 | bool *m_usedRegs; 28 | OpList &m_statements; 29 | 30 | private: 31 | int firstNonSpill(std::list adj); 32 | void spillLongestLiving(RegisterNode *node); 33 | 34 | public: 35 | RegisterGraph(int physAmount, OpList &tree); 36 | ~RegisterGraph(); 37 | std::vector &list() { return m_list; } 38 | OpList &statements() { return m_statements; } 39 | 40 | public: 41 | void color(); 42 | void connect(int i, int j); 43 | void refresh(); 44 | 45 | #ifdef MODE_DEBUG 46 | void show(); 47 | #endif 48 | }; 49 | -------------------------------------------------------------------------------- /include/parser.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | class Parser 12 | { 13 | private: 14 | Scanner m_scanner; 15 | Generator *m_generator; 16 | Optimizer *m_optimizer; 17 | 18 | RegisterList m_regList; 19 | std::vector m_functions; 20 | 21 | std::string m_asmFile; 22 | 23 | private: 24 | Type parseType(); 25 | ArrayType parseArrayType(); 26 | std::vector parseArrayInit(int amount); 27 | 28 | int parsePrimary(Type t, bool shouldBeReg = false); 29 | OpQuad *parseBinOperator(); 30 | OpQuad *parseOperation(); 31 | OpQuad *parseInitialize(); 32 | OpQuad *parseAssign(); 33 | OpQuad *parseStatement(); 34 | OpQuad *parseReturn(); 35 | OpQuad *parseFunctionCall(); 36 | OpQuad *parseStore(); 37 | OpQuad *parseLoad(); 38 | OpQuad *parseAlloca(); 39 | OpQuad *parseJmpCond(); 40 | OpQuad *parseLabel(); 41 | OpQuad *parseJmp(); 42 | OpQuad *parseCmp(); 43 | OpList parseFunction(); 44 | 45 | void parseGlobal(); 46 | 47 | 48 | public: 49 | Parser(Scanner &scan, Generator *gen, Optimizer *opt, std::string asmFile); 50 | Parser(Generator *gen, Optimizer *opt); 51 | 52 | Generator *generator() { return m_generator; } 53 | Optimizer *optimizer() { return m_optimizer; } 54 | std::vector &functions() { return m_functions; } 55 | 56 | void parse(); 57 | int addFunction(Function func); 58 | int addRegister(int r, Type t); 59 | void generateFunction(OpList statements); 60 | }; -------------------------------------------------------------------------------- /include/register.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | class Register 7 | { 8 | private: 9 | Type m_type; 10 | int m_virt = -1; 11 | 12 | int m_firstOcc = -1; 13 | int m_lastOcc = -1; 14 | 15 | bool m_spilled = false; 16 | int m_hintReg = -1; 17 | int m_hintSpill = -1; 18 | 19 | public: 20 | Register(int virt, Type t) 21 | { 22 | m_virt = virt; 23 | m_type = t; 24 | } 25 | Register(Type t) { m_type = t; } 26 | 27 | int virt() { return m_virt; } 28 | int hintReg() { return m_hintReg; } 29 | bool spilled() { return m_spilled; } 30 | int hintSpill() { return m_hintSpill; } 31 | Type type() { return m_type; } 32 | 33 | void setVirt(int v) { m_virt = v; } 34 | void setHintReg(int r) { m_hintReg = r; } 35 | void setHintSpill(int r) { m_hintSpill = r; } 36 | void setSpilled(bool s) { m_spilled = s; } 37 | 38 | /// This will set the last occurrence of the register to the given line parameter 39 | /// and also update the firstOcc variable if it was still uninitialized. 40 | void setLastOcc(int line); 41 | void setFirstOcc(int line) { m_firstOcc = line; } 42 | 43 | int lifetime() { return m_lastOcc - m_firstOcc; } 44 | int firstOcc() { return m_firstOcc; } 45 | int lastOcc() { return m_lastOcc; } 46 | }; 47 | 48 | class RegisterList : public std::vector 49 | { 50 | private: 51 | int m_lastReg; 52 | 53 | public: 54 | int findReg(int r); 55 | int lastReg(); 56 | int addRegister(int r, Type t); 57 | int spillCount(); 58 | }; -------------------------------------------------------------------------------- /include/scanner.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | class Scanner 7 | { 8 | private: 9 | int m_putback = 0; 10 | FILE *m_infile = nullptr; 11 | std::string m_infileName; 12 | 13 | Token m_token; 14 | Token m_prevToken; 15 | 16 | std::string m_lastIdentifier; 17 | 18 | private: 19 | int next(); 20 | int skip(); 21 | void skipLine(); 22 | int scanInt(int c); 23 | int scanReg(); 24 | std::string scanIdentifier(int c); 25 | int identifyKeyword(std::string id); 26 | void putback(int c); 27 | void scanStringlit(); 28 | void scanAttributes(); 29 | 30 | public: 31 | Token &scan(); 32 | Token &match(int tok); 33 | Token &match(int tok1, int tok2); 34 | Token &scanUntil(int tok); 35 | Scanner(std::string openfile); 36 | Scanner() {} 37 | 38 | public: 39 | Token &token() { return m_token; } 40 | Token &prevToken() { return m_prevToken; } 41 | std::string lastIdentifier() { return m_lastIdentifier; } 42 | std::string getFileName() { return m_infileName; } 43 | }; -------------------------------------------------------------------------------- /include/token.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | static const char *TokenTypes[] = 8 | { 9 | "NULL", "reg", "intlit", "identifier", 10 | "add", "sub", "mul", "div", "mod", 11 | "call", "alloca", "store", "load", 12 | "jmp", "jmpcond", "cmp", 13 | "return", 14 | 15 | "eq", "neq", "le", "l", "ge", "g", 16 | "function", "extern", 17 | "attribute", 18 | "comma", "left parenthesis '('", "right parenthesis ')'", "equalsign", "newline", 19 | "left brace '{'", "right brace '}'", "star '*'", "colon ':'", 20 | "left bracket '['", "right bracket ']'", 21 | "i8", "i16", "i32", "i64", 22 | "f32", "f64", 23 | "glob", "string literal"}; 24 | 25 | class Token 26 | { 27 | public: 28 | enum Types 29 | { 30 | REG = 1, 31 | INTLIT, 32 | IDENTIFIER, 33 | 34 | ADD, 35 | SUB, 36 | MUL, 37 | DIV, 38 | MOD, 39 | CALL, 40 | ALLOCA, 41 | STORE, 42 | LOAD, 43 | JMP, 44 | JMPCOND, 45 | CMP, 46 | RETURN, 47 | 48 | EQ, 49 | NEQ, 50 | LE, 51 | L, 52 | GE, 53 | G, 54 | FUNCTION, 55 | EXTERN, 56 | 57 | COMMA, 58 | LPAREN, 59 | RPAREN, 60 | EQUALSIGN, 61 | NEWLINE, 62 | LBRACE, 63 | RBRACE, 64 | STAR, 65 | COLON, 66 | LBRACKET, 67 | RBRACKET, 68 | 69 | I8, 70 | I16, 71 | I32, 72 | I64, 73 | F32, 74 | F64, 75 | 76 | GLOB, 77 | STRINGLIT, 78 | ATTRIBUTES, 79 | 80 | TOKAMOUNT 81 | }; 82 | 83 | private: 84 | int m_token = 0; 85 | int m_intValue = 0; 86 | 87 | std::string m_identifier; 88 | Attributes m_args; 89 | 90 | public: 91 | #ifdef MODE_DEBUG 92 | void print() 93 | { 94 | dbg_printf("Scanner token %s (%i) val %i identifier '%s'\n", getName().c_str(), m_token, m_intValue, m_identifier.c_str()); 95 | } 96 | #endif 97 | 98 | int token() 99 | { 100 | return m_token; 101 | } 102 | int intValue() { return m_intValue; } 103 | std::string identifier() { return m_identifier; } 104 | Attributes args() { return m_args; } 105 | 106 | void set(int tok, int val); 107 | void set(int tok, std::string id); 108 | void set(int tok, Attributes args); 109 | 110 | void setToken(int tok); 111 | void setIntValue(int val); 112 | void setIdentifier(std::string id); 113 | 114 | std::string getName(); 115 | }; -------------------------------------------------------------------------------- /include/type.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | struct ctype; 7 | 8 | class Type 9 | { 10 | private: 11 | int m_byteSize = -1; 12 | bool m_isFloat = false; 13 | 14 | int m_ptr = 0; 15 | 16 | public: 17 | int byteSize() { return m_byteSize; } 18 | int ptr() { return m_ptr; } 19 | void setPtr(int ptr) { m_ptr = ptr; } 20 | Type() {} 21 | Type(struct ctype t); 22 | Type(int bytesize, bool isFloat = false) 23 | { 24 | m_byteSize = bytesize; 25 | m_isFloat = isFloat; 26 | } 27 | }; 28 | 29 | class ArrayType 30 | { 31 | private: 32 | int m_arrSize = -1; 33 | Type m_type; 34 | 35 | public: 36 | ArrayType(Type t, int amount) 37 | { 38 | m_type = t; 39 | m_arrSize = amount; 40 | } 41 | int arrSize() { return m_arrSize; } 42 | Type type() { return m_type; } 43 | }; 44 | -------------------------------------------------------------------------------- /src/api/bindings.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | struct ctype __i64_type = {.byte_size = 8, .ptr = 0, .is_float = 0}; 10 | struct ctype __i32_type = {.byte_size = 4, .ptr = 0, .is_float = 0}; 11 | struct ctype __i16_type = {.byte_size = 2, .ptr = 0, .is_float = 0}; 12 | struct ctype __i8_type = {.byte_size = 1, .ptr = 0, .is_float = 0}; 13 | 14 | struct carbon init_carbon(const char *infile, const char *arch) 15 | { 16 | struct carbon ret; 17 | Generator *gen; 18 | Optimizer *opt; 19 | std::tie(gen, opt) = getMachine(std::string(arch)); 20 | ret.__parser = new Parser(gen, opt); 21 | ret.infile = infile; 22 | 23 | /// Initialisation 24 | gen->genSetupFile(std::string(infile)); 25 | 26 | return ret; 27 | } 28 | 29 | void end_carbon(struct carbon carb) 30 | { 31 | delete carb.__parser->optimizer(); 32 | delete carb.__parser->generator(); 33 | delete carb.__parser; 34 | } 35 | 36 | struct cfunc c_create_func(struct carbon carb, const char *name, struct ctype t, int argc, 37 | struct ctype *arglist) 38 | { 39 | struct cfunc func; 40 | 41 | func.name = name; 42 | func.__parser = carb.__parser; 43 | func.__statements = new OpList(); 44 | 45 | Function fobj = Function(std::string(name), Type(t)); 46 | for (int i = 0; i < argc; ++i) 47 | { 48 | Type arg = Type(arglist[i]); 49 | fobj.args().push_back(arg); 50 | } 51 | 52 | func.findx = carb.__parser->addFunction(fobj); 53 | return func; 54 | } 55 | 56 | void c_destroy_func(struct cfunc func) 57 | { 58 | delete func.__statements; 59 | } 60 | 61 | void c_add_func_attribute(struct cfunc f, const char *attrib) 62 | { 63 | f.__parser->functions()[f.findx].attributes().push_back(std::string(attrib)); 64 | } 65 | 66 | void c_gen_func(struct cfunc f) 67 | { 68 | f.__parser->generator()->genFunction(f.__parser->functions()[f.findx]); 69 | f.__parser->generateFunction(*f.__statements); 70 | } 71 | 72 | void c_gen_glob(struct carbon carb, const char *name, struct ctype t, int init_count, 73 | void *intlist) 74 | { 75 | std::vector inits; 76 | ArrayType artype(Type(t), init_count); 77 | 78 | for (int i = 0; i < init_count; ++i) 79 | { 80 | inits.push_back(((LARGEINT*) intlist)[i]); 81 | } 82 | 83 | carb.__parser->generator()->genGlobalVariable(std::string(name), artype, inits); 84 | } 85 | 86 | void c_push_op(struct cfunc func, int op, int ar1, int ar2, int ret, struct ctype t) 87 | { 88 | OpQuad *quad = new OpQuad(op, ar1, ar2, ret, Type(t)); 89 | func.__statements->push_back(quad); 90 | } 91 | 92 | void c_push_op_id(struct cfunc func, int op, int ar1, int ar2, int ret, const char *id, struct ctype t) 93 | { 94 | OpQuad *quad = new OpQuad(op, ar1, ar2, ret, Type(t)); 95 | quad->setIdentifier(std::string(id)); 96 | func.__statements->push_back(quad); 97 | } 98 | 99 | int c_reg(struct cfunc f, int r, struct ctype t) 100 | { 101 | f.__statements->regList().addRegister(r, Type(t)); 102 | } 103 | 104 | int c_writeassembly(struct carbon carb, const char *file) 105 | { 106 | return carb.__parser->generator()->writeOutfile(std::string(file)); 107 | } 108 | 109 | int c_assemble(struct carbon carb, const char *infile, const char *outfile, 110 | const char *assembler) 111 | { 112 | if (!assembler) 113 | assembler = ""; 114 | 115 | return carb.__parser->generator()->assemble(std::string(infile), std::string(outfile), 116 | std::string(assembler)); 117 | } 118 | 119 | int c_link(struct carbon carb, const char *infile, const char *outfile, 120 | const char *linker) 121 | { 122 | if (!linker) 123 | linker = ""; 124 | 125 | return carb.__parser->generator()->link(std::string(infile), std::string(outfile), 126 | std::string(linker)); 127 | } -------------------------------------------------------------------------------- /src/arch/aarch64/codegen.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void GeneratorAARCH64::genIntlitLoad(Type t, int val, Register ret) 4 | { 5 | writeInst("mov", registerToString(ret), "#" + std::to_string(val)); 6 | } 7 | void GeneratorAARCH64::genGlobLoad(Type t, std::string glob, Register ret) {} 8 | void GeneratorAARCH64::genMul(Type t, Register r1, Register r2, Register ret) 9 | { 10 | writeInst("mul", registerToString(ret), registerToString(r1), registerToString(r2)); 11 | } 12 | 13 | void GeneratorAARCH64::genAdd(Type t, Register r1, Register r2, Register ret) 14 | { 15 | writeInst("add", registerToString(ret), registerToString(r1), registerToString(r2)); 16 | } 17 | 18 | void GeneratorAARCH64::genSub(Type t, Register r1, Register r2, Register ret) 19 | { 20 | writeInst("sub", registerToString(ret), registerToString(r1), registerToString(r2)); 21 | } 22 | void GeneratorAARCH64::genDiv(Type t, Register r1, Register r2, Register ret) 23 | { 24 | } 25 | void GeneratorAARCH64::genMod(Type t, Register r1, Register r2, Register ret) 26 | { 27 | } 28 | 29 | void GeneratorAARCH64::genReg(Type t, Register r, Register ret) {} 30 | 31 | void GeneratorAARCH64::genAlloca(Type t, Register r, Register ret) {} 32 | void GeneratorAARCH64::genStore(Type t, Register r1, Register r2) {} 33 | void GeneratorAARCH64::genLoad(Type t, Register r, Register ret) {} 34 | void GeneratorAARCH64::genLoad(Type t, std::string glob, Register ret) {} 35 | void GeneratorAARCH64::genSpillLoad(Type t, Register r, Register ret) {} 36 | void GeneratorAARCH64::genSpillStore(Type t, Register r, Register ret) {} 37 | void GeneratorAARCH64::genFunctionCall(Type t, std::string function, Register ret, std::vector args) 38 | { 39 | } 40 | void GeneratorAARCH64::genReturn(Type t, Register ret) 41 | { 42 | writeMov("r0", registerToString(ret)); 43 | } 44 | void GeneratorAARCH64::genJmpCond(Type t, int op, Register r1, Register r2, std::string label) {} 45 | void GeneratorAARCH64::genLabel(std::string label) {} 46 | void GeneratorAARCH64::genJmp(std::string label) {} 47 | void GeneratorAARCH64::genCmp(Type t, int op, Register r1, Register r2, Register ret) {} 48 | void GeneratorAARCH64::genFunction(Type t, Function f) 49 | { 50 | if (f.attributes().getBoolValueOf("global") == true) 51 | write(".globl\t " + f.name()); 52 | write(".type\t" + f.name() + ", %function"); 53 | write(f.name() + ":"); 54 | 55 | /// Now the function preamble 56 | writeInst("push", "{fp, lr}"); 57 | writeInst("add", "fp, sp, #4"); 58 | writeInst("sub", "sp, sp, #8"); 59 | } 60 | void GeneratorAARCH64::genGlobalVariable(std::string name, ArrayType t, std::vector init) {} 61 | 62 | void GeneratorAARCH64::genSetupFile(std::string filename) 63 | { 64 | write(".text"); 65 | } 66 | 67 | void GeneratorAARCH64::genExternSymbol(std::string sym) {} 68 | -------------------------------------------------------------------------------- /src/arch/aarch64/generator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int GeneratorAARCH64::assemble(std::string infile, std::string outfile, std::string assembler) 5 | { 6 | return 0; 7 | } 8 | 9 | int GeneratorAARCH64::link(std::string infile, std::string outfile, std::string linker) 10 | { 11 | return 0; 12 | } 13 | std::string GeneratorAARCH64::registerToString(Register r) 14 | { 15 | if (r.spilled()) 16 | { 17 | } 18 | else if (r.hintReg() != -1) 19 | { 20 | /// @todo: different bytetypes etc 21 | std::string pre = "x"; 22 | dbg_assert(r.hintReg() < 16 && r.hintReg() > 8); 23 | return pre + std::to_string(r.hintReg() + 9); 24 | } 25 | 26 | dbg_assert(0); 27 | return ""; 28 | } 29 | 30 | void GeneratorAARCH64::writeInst(std::string inst, std::string op) 31 | { 32 | write("\t" + inst + "\t" + op); 33 | } 34 | 35 | void GeneratorAARCH64::writeInst(std::string inst, std::string ret, std::string r1, std::string r2) 36 | { 37 | write("\t" + inst + "\t" + ret + ", " + r1 + ", " + r2); 38 | } 39 | 40 | void GeneratorAARCH64::writeInst(std::string inst, std::string ret, std::string val) 41 | { 42 | write("\t" + inst + "\t" + ret + ", " + val); 43 | } 44 | 45 | void GeneratorAARCH64::writeInst(std::string inst, int val) 46 | { 47 | write("\t" + inst + "\t" + std::to_string(val)); 48 | } 49 | 50 | void GeneratorAARCH64::writeInst(std::string inst) 51 | { 52 | write("\t" + inst); 53 | } 54 | 55 | void GeneratorAARCH64::writeMov(std::string r1, std::string r2) 56 | { 57 | if (r1 != r2) 58 | writeInst("mov", r1, r2); 59 | } -------------------------------------------------------------------------------- /src/arch/i386/codegen.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | static std::string nasmSizeName(Type t) 5 | { 6 | switch (t.byteSize()) 7 | { 8 | case 1: 9 | return "BYTE"; 10 | case 2: 11 | return "WORD"; 12 | case 4: 13 | return "DWORD"; 14 | case 8: 15 | return "QWORD"; 16 | } 17 | 18 | dbg_assert(0); 19 | return ""; 20 | } 21 | 22 | void GeneratorX86::genIntlitLoad(Type t, int val, Register ret) 23 | { 24 | writeMov(registerToString(ret), nasmSizeName(t) + " " + std::to_string(val)); 25 | } 26 | 27 | void GeneratorX86::genMul(Type t, Register r1, Register r2, Register ret) 28 | { 29 | writeMov(registerToString(ret), registerToString(r1)); 30 | writeInst("imul", registerToString(ret), registerToString(r2)); 31 | } 32 | 33 | void GeneratorX86::genAdd(Type t, Register r1, Register r2, Register ret) 34 | { 35 | writeMov(registerToString(ret), registerToString(r1)); 36 | writeInst("add", registerToString(ret), registerToString(r2)); 37 | } 38 | 39 | void GeneratorX86::genSub(Type t, Register r1, Register r2, Register ret) 40 | { 41 | writeMov(registerToString(ret), registerToString(r1)); 42 | writeInst("sub", registerToString(ret), registerToString(r2)); 43 | } 44 | 45 | void GeneratorX86::genIDiv(Type t, Register r1, Register r2, Register ret, std::string out) 46 | { 47 | if (physInUse(EDX) && ret.hintReg() != EDX) 48 | writeInst("push", "edx"); 49 | 50 | if (physInUse(EAX) && ret.hintReg() != EAX) 51 | writeInst("push", "eax"); 52 | 53 | std::string divisor = registerToString(r2); 54 | int tmp = (r1.hintReg() == ECX) ? EBX : ECX; 55 | if (r2.hintReg() == EAX || r2.hintReg() == EDX) 56 | { 57 | /// load it to tmp 58 | if (physInUse(tmp) && ret.hintReg() != tmp) 59 | writeInst("push", m_registers[tmp]); 60 | 61 | writeMov(m_registers[tmp], registerToString(r2)); 62 | divisor = m_registers[tmp]; 63 | } 64 | 65 | writeMov("eax", registerToString(r1)); 66 | writeInst("xor", "edx", "edx"); 67 | writeInst("idiv", divisor); 68 | writeMov(registerToString(ret), out); 69 | 70 | if (physInUse(tmp) && ret.hintReg() != tmp) 71 | writeInst("pop", m_registers[tmp]); 72 | 73 | if (physInUse(EAX) && ret.hintReg() != EAX) 74 | writeInst("pop", "eax"); 75 | 76 | if (physInUse(EDX) && ret.hintReg() != EDX) 77 | writeInst("pop", "edx"); 78 | } 79 | 80 | void GeneratorX86::genDiv(Type t, Register r1, Register r2, Register ret) 81 | { 82 | genIDiv(t, r1, r2, ret, "eax"); 83 | } 84 | 85 | void GeneratorX86::genMod(Type t, Register r1, Register r2, Register ret) 86 | { 87 | genIDiv(t, r1, r2, ret, "edx"); 88 | } 89 | 90 | void GeneratorX86::genFunction(Function f) 91 | { 92 | if (f.attributes().getBoolValueOf("global") == true) 93 | write("[global " + f.name() + "]"); 94 | write(f.name() + ":"); 95 | writeInst("push", "ebp"); 96 | writeInst("mov", "ebp", "esp"); 97 | } 98 | 99 | void GeneratorX86::genReturn(Type t, Register ret) 100 | { 101 | writeMov("eax", registerToString(ret)); 102 | writeInst("leave"); 103 | writeInst("ret"); 104 | } 105 | 106 | void GeneratorX86::genReg(Type t, Register r, Register ret) 107 | { 108 | writeMov(registerToString(ret), registerToString(r)); 109 | } 110 | 111 | void GeneratorX86::genSpillLoad(Type t, Register r, Register ret) 112 | { 113 | writeMov(registerToString(ret), registerToString(r)); 114 | } 115 | 116 | void GeneratorX86::genSpillStore(Type t, Register r, Register ret) 117 | { 118 | writeMov(registerToString(ret), registerToString(r)); 119 | } 120 | 121 | static bool isArg(std::vector args, int i) 122 | { 123 | for (Register r : args) 124 | if (r.hintReg() == i) 125 | return true; 126 | 127 | return false; 128 | } 129 | 130 | void GeneratorX86::genFunctionCall(Type t, std::string id, Register ret, std::vector args) 131 | { 132 | /// @todo: spill all live registers 133 | 134 | // Save all live registers 135 | bool regInUse[REGAMOUNT]; 136 | for (int i = 0; i < REGAMOUNT; i++) 137 | { 138 | if (physInUse(i) && i != ret.hintReg() && !isArg(args, i)) 139 | { 140 | regInUse[i] = true; 141 | writeInst("push", m_registers[i]); 142 | } 143 | else 144 | regInUse[i] = false; 145 | } 146 | 147 | for (Register r : args) 148 | { 149 | writeInst("push", registerToString(r, m_registers)); 150 | } 151 | 152 | writeInst("call", id); 153 | if (args.size()) 154 | writeInst("add", "esp", std::to_string(4 * args.size())); 155 | writeMov(registerToString(ret), "eax"); 156 | 157 | // Pop all previously saved registers 158 | for (int i = 0; i < REGAMOUNT; i++) 159 | if (regInUse[i]) 160 | writeInst("pop", m_registers[i]); 161 | } 162 | 163 | void GeneratorX86::genAlloca(Type t, Register r, Register ret) 164 | { 165 | writeInst("imul", registerToString(r), std::to_string(t.byteSize())); 166 | writeInst("sub", "esp", registerToString(r)); 167 | writeInst("mov", registerToString(ret), "esp"); 168 | } 169 | 170 | void GeneratorX86::genStore(Type t, Register r1, Register r2) 171 | { 172 | writeInst("mov", "[" + registerToString(r1) + "]", registerToString(r2)); 173 | } 174 | 175 | void GeneratorX86::genStore(Type t, std::string glob, Register r2) 176 | { 177 | writeInst("mov", "[" + glob + "]", registerToString(r2)); 178 | } 179 | 180 | void GeneratorX86::genLoad(Type t, Register r, Register ret) 181 | { 182 | writeInst("mov", registerToString(ret), "[" + registerToString(r) + "]"); 183 | } 184 | 185 | void GeneratorX86::genLoad(Type t, std::string glob, Register ret) 186 | { 187 | writeInst("mov", registerToString(ret), "[" + glob + "]"); 188 | } 189 | 190 | void GeneratorX86::genJmpCond(Type t, int op, Register r1, Register r2, std::string label) 191 | { 192 | writeInst("cmp", registerToString(r1), registerToString(r2)); 193 | dbg_assert(op >= 0 && op <= CMPOPAMOUNT); 194 | writeInst("j" + m_cmpOps[op], label); 195 | } 196 | 197 | void GeneratorX86::genLabel(std::string label) 198 | { 199 | write(label + ":"); 200 | } 201 | 202 | void GeneratorX86::genJmp(std::string label) 203 | { 204 | writeInst("jmp", label); 205 | } 206 | 207 | void GeneratorX86::genCmp(Type t, int op, Register r1, Register r2, Register ret) 208 | { 209 | writeInst("cmp", registerToString(r1), registerToString(r2)); 210 | dbg_assert(op >= 0 && op <= CMPOPAMOUNT); 211 | writeInst("set" + m_cmpOps[op], registerToString(ret, m_loByteRegs)); 212 | writeInst("and", registerToString(ret), "1"); 213 | } 214 | 215 | static std::string byteSizeToNasmVar(int byteSize) 216 | { 217 | switch (byteSize) 218 | { 219 | case 1: 220 | return "db"; 221 | case 2: 222 | return "dw"; 223 | case 4: 224 | return "dd"; 225 | case 8: 226 | return "dq"; 227 | default: 228 | dbg_assert(0); 229 | return "db"; 230 | } 231 | } 232 | 233 | void GeneratorX86::genGlobalVariable(std::string name, ArrayType t, std::vector init) 234 | { 235 | std::string text = name + ":\n\t"; 236 | text += byteSizeToNasmVar(t.type().byteSize()) + " "; 237 | 238 | for (LARGEINT i : init) 239 | { 240 | text += std::to_string(i) + ", "; 241 | } 242 | 243 | text += '\n'; 244 | 245 | insert(text, DATASECTION); 246 | m_textSection++; 247 | } 248 | 249 | void GeneratorX86::genSetupFile(std::string file) 250 | { 251 | write("section .data"); 252 | write("section .text"); 253 | } 254 | 255 | void GeneratorX86::genGlobLoad(Type t, std::string glob, Register ret) 256 | { 257 | writeMov(registerToString(ret), glob); 258 | } 259 | 260 | void GeneratorX86::genExternSymbol(std::string id) 261 | { 262 | insert("[extern " + id + "]", m_textSection); 263 | } -------------------------------------------------------------------------------- /src/arch/i386/generator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void GeneratorX86::feedGenerate(OpList list) 5 | { 6 | /// This piece of code just subtracts the correct amount from esp so that the 7 | /// stack frame of the function is correct 8 | int spilled = list.regList().spillCount(); 9 | dbg_print("spillcount: " << spilled); 10 | if (spilled) 11 | writeInst("sub", "esp", std::to_string(spilled * 4)); 12 | 13 | m_opLine = 0; 14 | for (OpQuad *op : list) 15 | { 16 | generate(op); 17 | m_opLine++; 18 | } 19 | } 20 | 21 | bool GeneratorX86::physInUse(int reg) 22 | { 23 | for (Register &r : m_regData) 24 | if (r.hintReg() == reg) 25 | { 26 | if (r.firstOcc() <= m_opLine && r.lastOcc() >= m_opLine) 27 | return true; 28 | } 29 | 30 | return false; 31 | } 32 | 33 | std::string GeneratorX86::registerToString(Register r, std::string *list) 34 | { 35 | if (r.spilled()) 36 | { 37 | if (r.hintSpill() >= 0) 38 | return "[ebp-" + std::to_string(4 + 4 * r.hintSpill()) + "]"; 39 | else 40 | return "[ebp+" + std::to_string(4 + (-r.hintSpill()) * 4) + "]"; 41 | } 42 | 43 | if (r.hintReg() != -1) 44 | { 45 | if (!list) 46 | { 47 | if (r.type().byteSize() == 1 && !r.type().ptr()) 48 | list = m_loByteRegs; 49 | else if (r.type().byteSize() == 2 && !r.type().ptr()) 50 | list = m_shortRegs; 51 | else 52 | list = m_registers; 53 | } 54 | 55 | return list[r.hintReg()]; 56 | } 57 | 58 | return ""; 59 | } 60 | 61 | void GeneratorX86::writeInst(std::string inst, std::string dest, std::string src) 62 | { 63 | write("\t" + inst + "\t" + dest + ", " + src); 64 | } 65 | 66 | void GeneratorX86::writeInst(std::string inst, std::string arg) 67 | { 68 | write("\t" + inst + "\t" + arg); 69 | } 70 | 71 | void GeneratorX86::writeInst(std::string inst) 72 | { 73 | write("\t" + inst); 74 | } 75 | 76 | void GeneratorX86::writeMov(std::string dest, std::string src) 77 | { 78 | if (dest != src) 79 | writeInst("mov", dest, src); 80 | } 81 | 82 | int GeneratorX86::assemble(std::string infile, std::string outfile, std::string assembler) 83 | { 84 | if (assembler != "") 85 | return system((assembler + " -o " + outfile + " " + infile).c_str()); 86 | 87 | return system(("nasm -F dwarf -g -felf -o " + outfile + " " + infile).c_str()); 88 | } 89 | 90 | int GeneratorX86::link(std::string infile, std::string outfile, std::string linker) 91 | { 92 | if (linker != "") 93 | return system((linker + " -o " + outfile + " " + infile).c_str()); 94 | 95 | return system(("gcc -o " + outfile + " " + infile + " -m32").c_str()); 96 | } -------------------------------------------------------------------------------- /src/arch/i386/optimizer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | bool OptimizerX86::graphCheck(RegisterGraph &graph) 5 | { 6 | /// @todo: refactor 7 | bool flag = false; 8 | 9 | for (int i = 0; i < (int)graph.statements().regList().size(); i++) 10 | { 11 | Register r = graph.statements().regList()[i]; 12 | if (r.spilled()) 13 | { 14 | OpList tmp = graph.statements(); 15 | int tosearch = i; 16 | int line = 0; 17 | for (OpQuad *quad : tmp) 18 | { 19 | if (quad->operation() != OpTypes::INTLIT && quad->operation() != OpTypes::SPILLSTORE && quad->operation() != OpTypes::SPILLLOAD) 20 | { 21 | if (quad->arg1() == tosearch) 22 | { 23 | flag = true; 24 | int last = graph.statements().regList().addRegister(graph.statements().regList().lastReg() + 1, quad->type()); 25 | graph.statements().insert(line, new OpQuad(OpTypes::SPILLLOAD, tosearch, -1, last, quad->type())); 26 | graph.statements().regList()[last].setLastOcc(line + 1); 27 | quad->setArg1(last); 28 | line++; 29 | } 30 | 31 | if (quad->arg2() == tosearch) 32 | { 33 | flag = true; 34 | int last = graph.statements().regList().addRegister(graph.statements().regList().lastReg() + 1, quad->type()); 35 | graph.statements().insert(line, new OpQuad(OpTypes::SPILLLOAD, tosearch, -1, last, quad->type())); 36 | graph.statements().regList()[last].setLastOcc(line + 1); 37 | quad->setArg2(last); 38 | line++; 39 | } 40 | 41 | if (quad->ret() == tosearch && tmp[line + 1]->operation() != OpTypes::SPILLSTORE) 42 | { 43 | flag = true; 44 | int last = graph.statements().regList().addRegister(graph.statements().regList().lastReg() + 1, quad->type()); 45 | graph.statements().insert(line + 1, new OpQuad(OpTypes::SPILLSTORE, last, -1, tosearch, quad->type())); 46 | graph.statements().regList()[last].setFirstOcc(line); 47 | quad->setReturn(last); 48 | line++; 49 | } 50 | } 51 | line++; 52 | } 53 | } 54 | } 55 | return flag; 56 | } 57 | 58 | void OptimizerX86::prepAssignRegisters(OpList &list) 59 | { 60 | /// We don't need to do anything here for X86 (yet) 61 | } -------------------------------------------------------------------------------- /src/attributes.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | std::string Attributes::getValueOf(std::string s) 4 | { 5 | for (std::string attr : *this) 6 | { 7 | if (attr.find(s) == 0) 8 | return attr.substr(s.size() + 1); 9 | } 10 | 11 | return ""; 12 | } 13 | 14 | int Attributes::getBoolValueOf(std::string s) 15 | { 16 | for (std::string attr : *this) 17 | { 18 | /// @todo: this might not be the cleanest implementation (will just return false if it is not "true") 19 | if (attr.find(s) == 0) 20 | return (attr.substr(s.size() + 1) == "true") ? true : false; 21 | } 22 | 23 | return -1; 24 | } -------------------------------------------------------------------------------- /src/errsys.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | ErrorSystem g_errsys; 5 | 6 | void ErrorSystem::fatal(std::string reason) 7 | { 8 | std::cout << __CLR_RED "Fatal error" __CLR_END ": " << reason << "\n"; 9 | exit(1); 10 | } 11 | 12 | void ErrorSystem::syntaxError(std::string reason) 13 | { 14 | std::cout << __CLR_RED "Syntax error" __CLR_END ": " << reason << "\n"; 15 | exit(1); 16 | } -------------------------------------------------------------------------------- /src/function.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | Function::Function(std::string name, Type t) 4 | { 5 | m_name = name; 6 | m_retType = t; 7 | } -------------------------------------------------------------------------------- /src/generator/generate.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void Generator::generate(OpQuad *op) 5 | { 6 | switch (op->operation()) 7 | { 8 | case OpTypes::INTLIT: 9 | genIntlitLoad(op->type(), op->arg1(), findReg(op->ret())); 10 | break; 11 | 12 | case OpTypes::GLOB: 13 | genGlobLoad(op->type(), op->identifier(), findReg(op->ret())); 14 | break; 15 | 16 | case OpTypes::MUL: 17 | genMul(op->type(), findReg(op->arg1()), findReg(op->arg2()), findReg(op->ret())); 18 | break; 19 | case OpTypes::ADD: 20 | genAdd(op->type(), findReg(op->arg1()), findReg(op->arg2()), findReg(op->ret())); 21 | break; 22 | case OpTypes::SUB: 23 | genSub(op->type(), findReg(op->arg1()), findReg(op->arg2()), findReg(op->ret())); 24 | break; 25 | case OpTypes::DIV: 26 | genDiv(op->type(), findReg(op->arg1()), findReg(op->arg2()), findReg(op->ret())); 27 | break; 28 | case OpTypes::MOD: 29 | genMod(op->type(), findReg(op->arg1()), findReg(op->arg2()), findReg(op->ret())); 30 | break; 31 | 32 | case OpTypes::REG: 33 | genReg(op->type(), findReg(op->arg1()), findReg(op->ret())); 34 | break; 35 | 36 | case OpTypes::CALL: 37 | { 38 | std::vector args; 39 | for (int arg : op->extraArgs()) 40 | args.push_back(findReg(arg)); 41 | genFunctionCall(op->type(), op->identifier(), findReg(op->ret()), args); 42 | } 43 | break; 44 | 45 | case OpTypes::SPILLLOAD: 46 | genSpillLoad(op->type(), findReg(op->arg1()), findReg(op->ret())); 47 | break; 48 | 49 | case OpTypes::SPILLSTORE: 50 | genSpillStore(op->type(), findReg(op->arg1()), findReg(op->ret())); 51 | break; 52 | 53 | case OpTypes::ALLOCA: 54 | genAlloca(op->type(), findReg(op->arg1()), findReg(op->ret())); 55 | break; 56 | 57 | case OpTypes::STORE: 58 | if (op->arg1() == -1) 59 | genStore(op->type(), op->identifier(), findReg(op->arg2())); 60 | else 61 | genStore(op->type(), findReg(op->arg1()), findReg(op->arg2())); 62 | break; 63 | 64 | case OpTypes::LOAD: 65 | if (op->arg1() == -1) 66 | genLoad(op->type(), op->identifier(), findReg(op->ret())); 67 | else 68 | genLoad(op->type(), findReg(op->arg1()), findReg(op->ret())); 69 | break; 70 | 71 | case OpTypes::JMPCOND: 72 | genJmpCond(op->type(), op->extra(), findReg(op->arg1()), findReg(op->arg2()), op->identifier()); 73 | break; 74 | 75 | case OpTypes::LABEL: 76 | genLabel(op->identifier()); 77 | break; 78 | 79 | case OpTypes::JMP: 80 | genJmp(op->identifier()); 81 | break; 82 | 83 | case OpTypes::CMP: 84 | genCmp(op->type(), op->extra(), findReg(op->arg1()), findReg(op->arg2()), findReg(op->ret())); 85 | break; 86 | 87 | case OpTypes::RETURN: 88 | genReturn(op->type(), findReg(op->arg1())); 89 | break; 90 | 91 | default: 92 | dbg_print_r("Unknown " << op->operation()); 93 | break; 94 | } 95 | } -------------------------------------------------------------------------------- /src/generator/generator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int Generator::writeOutfile(std::string outfile) 5 | { 6 | FILE *fp = fopen(outfile.c_str(), "w+"); 7 | if (!fp) 8 | { 9 | g_errsys.fatal("could not open output file '" + outfile + "'"); 10 | return -1; 11 | } 12 | 13 | for (std::string s : m_internalOutBuf) 14 | { 15 | fprintf(fp, "%s\n", s.c_str()); 16 | } 17 | fflush(fp); 18 | return 0; 19 | } 20 | 21 | void Generator::write(std::string s) 22 | { 23 | #ifdef MODE_DEBUG 24 | static int iter = 1; 25 | std::cout << iter << ": " << s << "\n"; 26 | iter++; 27 | #endif 28 | 29 | m_internalOutBuf.push_back(s); 30 | } 31 | 32 | void Generator::insert(std::string s, int line) 33 | { 34 | m_internalOutBuf.insert(m_internalOutBuf.begin() + line, s); 35 | } 36 | 37 | void Generator::feedGenerate(OpList list) 38 | { 39 | m_opLine = 0; 40 | for (OpQuad *op : list) 41 | { 42 | generate(op); 43 | m_opLine++; 44 | } 45 | } 46 | 47 | void Generator::setRegList(RegisterList list) 48 | { 49 | m_regData = list; 50 | } 51 | 52 | Register &Generator::findReg(int r) 53 | { 54 | dbg_assert(r >= 0 && r <= (int)m_regData.size()); 55 | return m_regData[r]; 56 | } -------------------------------------------------------------------------------- /src/machine.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | std::tuple getMachine(std::string machine) 14 | { 15 | Generator *generator = nullptr; 16 | Optimizer *optimizer = nullptr; 17 | switch (machine[0]) 18 | { 19 | case 'a': 20 | if (!machine.compare("aarch64") || !machine.compare("armv8")) 21 | { 22 | generator = new GeneratorAARCH64(); 23 | optimizer = new OptimizerAARCH64(); 24 | } 25 | case 'x': 26 | if (!machine.compare("x86")) 27 | { 28 | generator = new GeneratorX86(); 29 | optimizer = new OptimizerX86(); 30 | } 31 | break; 32 | 33 | case 'e': 34 | if (!machine.compare("Z80")) 35 | { 36 | generator = new GeneratorZ80(); 37 | optimizer = new OptimizerZ80(); 38 | } 39 | } 40 | 41 | return {generator, optimizer}; 42 | } -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | std::string genTemp() 12 | { 13 | srand(time(NULL)); 14 | 15 | std::string s = "/tmp/"; 16 | for (int i = 0; i < FILE_TEMP_LIMIT; i++) 17 | s += 'a' + rand() % 26; 18 | 19 | return s; 20 | } 21 | 22 | const std::string helpString = "Usage: carbon-ir [options] file\n" 23 | "Options:\n" 24 | "\t--help (-h)\t\t\tDisplay this information\n" 25 | "\t--compile (-S)\t\t\tOnly compile to assembly\n" 26 | "\t--nolink (-c)\t\t\tOnly assemble the generated assembly code, do not link\n" 27 | "\t--output (-o)\t [outputfile]\tSpecify the name of the output file (default is a.out)\n" 28 | "\t--assembler (-a) [assembler]\tForce carbon to use a specific assembler\n" 29 | "\t--linker (-l)\t [linker]\tForce carbon to use a specific linker\n" 30 | "\t--machine (-m)\t [architecture]\tSpecify the type of machine you want to generate code for\n"; 31 | 32 | void printHelp() 33 | { 34 | std::cout << helpString << "\n"; 35 | exit(0); 36 | } 37 | 38 | #ifdef __EXEC 39 | int main(int argc, char **argv) 40 | { 41 | int opt; 42 | int opt_index = 0; 43 | 44 | int f_onlyCompile = 0; 45 | int f_onlyAssemble = 0; 46 | std::string outfile = "a.out"; 47 | std::string asmFile = genTemp() + ".s"; 48 | std::string linkFile = genTemp() + ".o"; 49 | std::string machine = DEFAULT_MACHINE; 50 | 51 | std::string assembler = ""; 52 | std::string linker = ""; 53 | 54 | Generator *generator = nullptr; 55 | Optimizer *optimizer = nullptr; 56 | 57 | static struct option options[] = 58 | { 59 | /// Flags 60 | {"help", no_argument, 0, 'h'}, 61 | {"compile", no_argument, &f_onlyCompile, 'S'}, 62 | {"nolink", no_argument, &f_onlyAssemble, 'c'}, 63 | 64 | /// Arguments 65 | {"output", required_argument, 0, 'o'}, 66 | {"assembler", optional_argument, 0, 'a'}, 67 | {"linker", optional_argument, 0, 'l'}, 68 | {"machine", required_argument, 0, 'm'}, 69 | {0, 0, 0, 0}}; 70 | 71 | while ((opt = getopt_long(argc, argv, "m:o:a:l:cSh", options, &opt_index)) != -1) 72 | { 73 | switch (opt) 74 | { 75 | case 0: 76 | /// Set a flag (handled by getopt_long) 77 | break; 78 | 79 | case 'o': 80 | outfile = std::string(optarg); 81 | break; 82 | 83 | case 'S': 84 | f_onlyCompile = true; 85 | break; 86 | 87 | case 'c': 88 | f_onlyAssemble = true; 89 | break; 90 | 91 | case 'm': 92 | machine = std::string(optarg); 93 | break; 94 | 95 | case 'a': 96 | assembler = std::string(optarg); 97 | break; 98 | 99 | case 'l': 100 | linker = std::string(optarg); 101 | break; 102 | 103 | case 'h': 104 | printHelp(); 105 | break; 106 | 107 | default: 108 | g_errsys.fatal("basic usage: carbon-ir -o [outfile] [infiles]\ntry carbin-ir -h for help"); 109 | } 110 | } 111 | 112 | if (optind >= argc) 113 | { 114 | g_errsys.fatal("Error expected input files"); 115 | } 116 | 117 | if (f_onlyCompile) 118 | asmFile = outfile; 119 | 120 | if (f_onlyAssemble) 121 | linkFile = outfile; 122 | 123 | std::tie(generator, optimizer) = getMachine(machine); 124 | 125 | if (generator == nullptr || optimizer == nullptr) 126 | g_errsys.fatal("unknown machine type '" + machine + "'"); 127 | 128 | for (; optind < argc; optind++) 129 | { 130 | Scanner scanner = Scanner(std::string(argv[optind])); 131 | Parser parser = Parser(scanner, generator, optimizer, asmFile); 132 | parser.parse(); 133 | } 134 | 135 | /// @todo: I'm not proud of thess goto's 136 | if (f_onlyCompile) 137 | goto end; 138 | 139 | if (generator->assemble(asmFile, linkFile, assembler)) 140 | g_errsys.fatal("failed to assemble binary"); 141 | 142 | if (f_onlyAssemble) 143 | goto end; 144 | 145 | if (generator->link(linkFile, outfile, linker)) 146 | g_errsys.fatal("failed to link binary"); 147 | 148 | end: 149 | /// @todo: this might cause undefined behaviour, look into it 150 | delete generator; 151 | delete optimizer; 152 | 153 | return 0; 154 | } 155 | #endif -------------------------------------------------------------------------------- /src/oplist.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int OpQuad::tokToOp(int tok) 5 | { 6 | if (tok <= OpTypes::ASSIGN && tok > 0) 7 | return tok; 8 | 9 | dbg_assert(0); 10 | return -1; 11 | } 12 | 13 | OpQuad::OpQuad(int op, Type t) 14 | { 15 | m_operation = op; 16 | m_type = t; 17 | } 18 | 19 | OpQuad::OpQuad(int op, int arg1, Type t) 20 | { 21 | m_operation = op; 22 | m_arg1 = arg1; 23 | m_type = t; 24 | } 25 | 26 | OpQuad::OpQuad(int op, std::string id) 27 | { 28 | m_operation = op; 29 | m_identifier = id; 30 | } 31 | 32 | 33 | OpQuad::OpQuad(int op, int arg1, int arg2, int ret, Type t) 34 | { 35 | m_operation = op; 36 | m_arg1 = arg1; 37 | m_arg2 = arg2; 38 | m_return = ret; 39 | m_type = t; 40 | } 41 | 42 | void OpList::destroy() 43 | { 44 | for (OpQuad *o : *this) 45 | delete o; 46 | } 47 | 48 | void OpList::updateRegisterLifetime(OpQuad *quad, int line) 49 | { 50 | switch (quad->operation()) 51 | { 52 | case OpTypes::INTLIT: 53 | break; 54 | 55 | case OpTypes::CALL: 56 | for (int r : quad->extraArgs()) 57 | m_list[r].setLastOcc(line); 58 | 59 | break; 60 | 61 | default: 62 | if (quad->arg1() != -1) 63 | m_list[quad->arg1()].setLastOcc(line); 64 | if (quad->arg2() != -1) 65 | m_list[quad->arg2()].setLastOcc(line); 66 | } 67 | 68 | if (quad->ret() != -1) 69 | m_list[quad->ret()].setLastOcc(line); 70 | } 71 | 72 | void OpList::push_back(OpQuad *quad) 73 | { 74 | dbg_print("pushed: " << quad->operation()); 75 | updateRegisterLifetime(quad, size()); 76 | std::vector::push_back(quad); 77 | } 78 | 79 | void OpList::insert(int line, OpQuad *quad) 80 | { 81 | for (Register &r : m_list) 82 | { 83 | if (r.firstOcc() >= line) 84 | { 85 | r.setFirstOcc(r.firstOcc() + 1); 86 | } 87 | 88 | if (r.lastOcc() >= line) 89 | { 90 | r.setLastOcc(r.lastOcc() + 1); 91 | } 92 | } 93 | updateRegisterLifetime(quad, line); 94 | std::vector::insert(begin() + line, quad); 95 | } 96 | 97 | void OpList::push_newregs(RegisterList &list) 98 | { 99 | m_list.insert(m_list.end(), list.begin() + m_list.size(), list.end()); 100 | } -------------------------------------------------------------------------------- /src/optimizer/optimizer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | OpList Optimizer::optimize(OpList list) 4 | { 5 | return list; 6 | } -------------------------------------------------------------------------------- /src/optimizer/regassign.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /// @todo: WHYYYY do I use graph coloring instead of a linear scan?????? 6 | 7 | RegisterGraph::RegisterGraph(int physAmount, OpList &list) : m_statements(list) 8 | { 9 | m_physRegAmount = physAmount; 10 | m_usedRegs = new bool[physAmount](); 11 | 12 | refresh(); 13 | } 14 | 15 | RegisterGraph::~RegisterGraph() 16 | { 17 | delete[] m_usedRegs; 18 | 19 | for (RegisterNode *n : m_list) 20 | delete n; 21 | } 22 | 23 | static bool checkLifetimeOverlap(Register &r1, Register &r2) 24 | { 25 | if (r1.firstOcc() <= r2.firstOcc() && r1.lastOcc() >= r2.firstOcc()) 26 | return true; 27 | 28 | if (r2.firstOcc() <= r1.firstOcc() && r2.lastOcc() >= r1.firstOcc()) 29 | return true; 30 | 31 | return false; 32 | } 33 | 34 | void RegisterGraph::refresh() 35 | { 36 | /// @todo: this is not an optimal solution 37 | // First remove the previous elements 38 | for (RegisterNode *n : m_list) 39 | delete n; 40 | 41 | m_list.clear(); 42 | 43 | // Repopulate the list with registernodes 44 | RegisterList &list = m_statements.regList(); 45 | 46 | for (Register &r : list) 47 | m_list.push_back(new RegisterNode(&r)); 48 | 49 | // Check for lifetime overlaps and connect the nodes in the graph 50 | for (auto i = list.begin(); i != std::prev(list.end()); ++i) 51 | for (auto j = std::next(i); j != list.end(); ++j) 52 | if (checkLifetimeOverlap(*i, *j)) 53 | connect(std::distance(list.begin(), i), std::distance(list.begin(), j)); 54 | } 55 | 56 | int RegisterGraph::firstNonSpill(std::list adj) 57 | { 58 | std::vector tot; 59 | tot.resize(adj.size(), false); 60 | 61 | for (RegisterNode *n : adj) 62 | if (n->reg()->spilled()) 63 | tot[n->reg()->hintSpill()] = true; 64 | 65 | int i = 0; 66 | for (bool n : tot) 67 | { 68 | if (!n) 69 | return i; 70 | i++; 71 | } 72 | return tot.size(); 73 | } 74 | 75 | void RegisterGraph::spillLongestLiving(RegisterNode *node) 76 | { 77 | // Populate a list with the wanted nodes 78 | /// @todo: This is ugly but for some weird reason there was no other way to do it 79 | std::list sorted; 80 | for (RegisterNode *n : node->adj()) 81 | if (!n->reg()->spilled()) 82 | sorted.push_back(n); 83 | 84 | sorted.push_back(node); 85 | 86 | sorted.sort([](const RegisterNode *a, const RegisterNode *b) { return ((RegisterNode *)a)->calcLife() < ((RegisterNode *)b)->calcLife(); }); 87 | 88 | RegisterNode *longest = sorted.back(); 89 | if (longest != node) 90 | node->reg()->setHintReg(longest->reg()->hintReg()); 91 | 92 | longest->reg()->setHintReg(-1); 93 | longest->reg()->setSpilled(true); 94 | longest->reg()->setHintSpill(firstNonSpill(longest->adj())); 95 | } 96 | 97 | void RegisterGraph::color() 98 | { 99 | for (auto i = m_list.begin(); i != m_list.end(); ++i) 100 | { 101 | RegisterNode *node = *i; 102 | 103 | if (node->reg()->spilled()) 104 | continue; 105 | 106 | // Mark the used colors 107 | for (auto j = node->adj().begin(); j != node->adj().end(); ++j) 108 | if ((*j)->reg()->hintReg() != -1) 109 | m_usedRegs[(*j)->reg()->hintReg()] = true; 110 | 111 | // Find a unused color or spill 112 | int c = -1; 113 | for (int j = 0; j < m_physRegAmount; ++j) 114 | { 115 | if (!m_usedRegs[j]) 116 | { 117 | c = j; 118 | break; 119 | } 120 | } 121 | 122 | if (c == -1) 123 | { 124 | dbg_print_r("AAAAAAAAAAAAa NO LEFT"); 125 | //for(;;); 126 | // Spill oldest in use virtreg 127 | spillLongestLiving(node); 128 | } 129 | else 130 | node->reg()->setHintReg(c); 131 | 132 | // Unmark adjacent registers 133 | auto j = node->adj().begin(); 134 | for (; j != node->adj().end(); ++j) 135 | if ((*j)->reg()->hintReg() != -1) 136 | m_usedRegs[(*j)->reg()->hintReg()] = false; 137 | } 138 | 139 | #ifdef MODE_DEBUG 140 | for (RegisterNode *r : m_list) 141 | { 142 | std::cout << "Register: " << r->virt() << " colored with: " << r->reg()->hintReg() << "\n"; 143 | if (r->reg()->spilled()) 144 | std::cout << " but spill: " << r->reg()->hintSpill() << "\n"; 145 | } 146 | #endif 147 | } 148 | 149 | void RegisterGraph::connect(int a, int b) 150 | { 151 | m_list[a]->adj().push_back(m_list[b]); 152 | m_list[b]->adj().push_back(m_list[a]); 153 | } 154 | 155 | void Optimizer::assignRegisters(OpList &list, Generator *gen) 156 | { 157 | // Call the preperation function 158 | prepAssignRegisters(list); 159 | 160 | // Build the graph and color it 161 | RegisterGraph graph(gen->registerAmount(), list); 162 | 163 | dbg_call(graph.show()); 164 | graph.color(); 165 | while (graphCheck(graph)) 166 | { 167 | graph.refresh(); 168 | dbg_call(graph.show()); 169 | graph.color(); 170 | } 171 | } 172 | 173 | #ifdef MODE_DEBUG 174 | void RegisterGraph::show() 175 | { 176 | for (RegisterNode *n : m_list) 177 | { 178 | dbg_printf("virt %i overlaps with %i registers\n", n->virt(), (int)n->adj().size()); 179 | for (RegisterNode *adj : n->adj()) 180 | dbg_printf("\toverlaps with %i\n", adj->virt()); 181 | } 182 | } 183 | #endif -------------------------------------------------------------------------------- /src/parser.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | Parser::Parser(Scanner &scan, Generator *gen, Optimizer *opt, std::string asmfile) 5 | : m_scanner(scan) 6 | { 7 | m_generator = gen; 8 | m_optimizer = opt; 9 | m_asmFile = asmfile; 10 | } 11 | 12 | Parser::Parser(Generator *gen, Optimizer *opt) 13 | { 14 | m_generator = gen; 15 | m_optimizer = opt; 16 | } 17 | 18 | Type Parser::parseType() 19 | { 20 | Type t; 21 | switch (m_scanner.token().token()) 22 | { 23 | case Token::Types::I8: 24 | t = Type(1); 25 | break; 26 | case Token::Types::I16: 27 | t = Type(2); 28 | break; 29 | case Token::Types::I32: 30 | t = Type(4); 31 | break; 32 | case Token::Types::I64: 33 | t = Type(8); 34 | break; 35 | default: 36 | g_errsys.syntaxError("unknown type"); 37 | } 38 | 39 | while (m_scanner.scan().token() == Token::Types::STAR) 40 | t.setPtr(t.ptr() + 1); 41 | 42 | return t; 43 | } 44 | 45 | ArrayType Parser::parseArrayType() 46 | { 47 | m_scanner.match(Token::Types::LBRACKET); 48 | Type t = parseType(); 49 | int amount = m_scanner.token().intValue(); 50 | m_scanner.match(Token::Types::INTLIT); 51 | m_scanner.match(Token::Types::RBRACKET); 52 | return ArrayType(t, amount); 53 | } 54 | 55 | /// @todo: this function is pretty useless you can just use a m_scanner.match() call 56 | int Parser::parsePrimary(Type t, bool shouldBeReg) 57 | { 58 | Token &tok = m_scanner.token(); 59 | 60 | if (shouldBeReg) 61 | dbg_assert(tok.token() == Token::Types::REG); 62 | 63 | return tok.intValue(); 64 | } 65 | 66 | int Parser::addRegister(int reg, Type t) 67 | { 68 | /// @todo: this function should probably be removed 69 | return m_regList.addRegister(reg, t); 70 | } 71 | 72 | OpQuad *Parser::parseBinOperator() 73 | { 74 | int tok = m_scanner.token().token(); 75 | m_scanner.scan(); 76 | 77 | Type type = parseType(); 78 | OpQuad *quad = new OpQuad(OpQuad::tokToOp(tok), type); 79 | 80 | /// @todo check if the left and right leaf even support the given type, otherwise error 81 | quad->setArg1(addRegister(parsePrimary(type), type)); 82 | 83 | m_scanner.scan(); 84 | quad->setArg2(addRegister(parsePrimary(type), type)); 85 | 86 | m_scanner.scan(); 87 | return quad; 88 | } 89 | 90 | OpQuad *Parser::parseAssign() 91 | { 92 | int reg = m_scanner.token().intValue(); 93 | m_scanner.scan(); 94 | m_scanner.match(Token::Types::EQUALSIGN); 95 | 96 | OpQuad *quad = parseOperation(); 97 | 98 | if (!quad) 99 | g_errsys.syntaxError("unexpected operation after register assign"); 100 | 101 | quad->setReturn(addRegister(reg, quad->type())); 102 | 103 | return quad; 104 | } 105 | 106 | OpQuad *Parser::parseInitialize() 107 | { 108 | Type t = parseType(); 109 | 110 | OpQuad *quad; 111 | if (m_scanner.token().token() == Token::Types::INTLIT) 112 | quad = new OpQuad(OpTypes::INTLIT, m_scanner.token().intValue(), t); 113 | 114 | else if (m_scanner.token().token() == Token::Types::GLOB) 115 | { 116 | quad = new OpQuad(OpTypes::GLOB, t); 117 | quad->setIdentifier(m_scanner.token().identifier()); 118 | } 119 | else 120 | g_errsys.syntaxError("expected a integer literal or a global variable identifier"); 121 | 122 | m_scanner.scan(); 123 | return quad; 124 | } 125 | 126 | OpQuad *Parser::parseFunctionCall() 127 | { 128 | m_scanner.scan(); 129 | Type t = parseType(); 130 | 131 | OpQuad *quad = new OpQuad(OpTypes::CALL, t); 132 | 133 | std::string id = m_scanner.match(Token::Types::IDENTIFIER).identifier(); 134 | quad->setIdentifier(id); 135 | 136 | m_scanner.match(Token::Types::LPAREN); 137 | 138 | std::vector arguments; 139 | /// @todo: if there is no rparen it will deadlock so fix that (maybe check for newline or something) 140 | /// @todo: Check if the arguments are correct with the function called 141 | while (m_scanner.token().token() != Token::Types::RPAREN) 142 | { 143 | arguments.push_back(m_scanner.token().intValue()); 144 | m_scanner.scan(); 145 | } 146 | m_scanner.scan(); 147 | 148 | quad->setExtraArgs(arguments); 149 | return quad; 150 | } 151 | 152 | OpQuad *Parser::parseLoad() 153 | { 154 | m_scanner.scan(); 155 | Type t = parseType(); 156 | 157 | OpQuad *quad; 158 | if (m_scanner.token().token() == Token::Types::REG) 159 | { 160 | int r = m_scanner.token().intValue(); 161 | r = addRegister(r, t); 162 | quad = new OpQuad(OpTypes::LOAD, r, t); 163 | } 164 | else if (m_scanner.token().token() == Token::Types::GLOB) 165 | { 166 | std::string id = m_scanner.token().identifier(); 167 | quad = new OpQuad(OpTypes::LOAD, t); 168 | quad->setIdentifier(id); 169 | } 170 | else 171 | g_errsys.syntaxError("Expected either a register or a global variable identifier as an argument to load"); 172 | 173 | m_scanner.scan(); 174 | 175 | return quad; 176 | } 177 | 178 | OpQuad *Parser::parseStore() 179 | { 180 | m_scanner.scan(); 181 | Type t = parseType(); 182 | 183 | OpQuad *quad = new OpQuad(OpTypes::STORE, t); 184 | if (m_scanner.token().token() == Token::Types::REG) 185 | { 186 | int r1 = m_scanner.token().intValue(); 187 | r1 = addRegister(r1, t); 188 | quad->setArg1(r1); 189 | } 190 | else if (m_scanner.token().token() == Token::Types::GLOB) 191 | { 192 | quad->setIdentifier(m_scanner.token().identifier()); 193 | } 194 | else 195 | g_errsys.syntaxError("expected either a register or a global variable identifier as the first argument to store"); 196 | 197 | int r2 = m_scanner.scan().intValue(); 198 | m_scanner.match(Token::Types::REG); 199 | r2 = addRegister(r2, t); 200 | quad->setArg2(r2); 201 | 202 | return quad; 203 | } 204 | 205 | OpQuad *Parser::parseAlloca() 206 | { 207 | m_scanner.scan(); 208 | Type t = parseType(); 209 | int r = m_scanner.match(Token::Types::REG).intValue(); 210 | return new OpQuad(OpTypes::ALLOCA, addRegister(r, t), t); 211 | } 212 | 213 | OpQuad *Parser::parseJmp() 214 | { 215 | std::string id = m_scanner.scan().identifier(); 216 | m_scanner.match(Token::Types::IDENTIFIER); 217 | return new OpQuad(OpTypes::JMP, id); 218 | } 219 | 220 | OpQuad *Parser::parseCmp() 221 | { 222 | int op = m_scanner.scan().token(); 223 | m_scanner.scan(); 224 | Type t = parseType(); 225 | int r1 = addRegister(m_scanner.token().intValue(), t); 226 | m_scanner.match(Token::Types::REG); 227 | int r2 = addRegister(m_scanner.token().intValue(), t); 228 | m_scanner.match(Token::Types::REG); 229 | 230 | OpQuad *quad = new OpQuad(OpTypes::CMP, r1, r2, -1, t); 231 | quad->setExtra(op - Token::Types::EQ); 232 | return quad; 233 | } 234 | 235 | OpQuad *Parser::parseOperation() 236 | { 237 | int tok = m_scanner.token().token(); 238 | switch (tok) 239 | { 240 | case Token::Types::ADD: 241 | case Token::Types::SUB: 242 | case Token::Types::MUL: 243 | case Token::Types::DIV: 244 | case Token::Types::MOD: 245 | return parseBinOperator(); 246 | 247 | case Token::Types::I8: 248 | case Token::Types::I16: 249 | case Token::Types::I32: 250 | case Token::Types::I64: 251 | return parseInitialize(); 252 | 253 | case Token::Types::CALL: 254 | return parseFunctionCall(); 255 | 256 | case Token::Types::ALLOCA: 257 | return parseAlloca(); 258 | 259 | case Token::Types::LOAD: 260 | return parseLoad(); 261 | 262 | case Token::Types::CMP: 263 | return parseCmp(); 264 | } 265 | 266 | return nullptr; 267 | } 268 | 269 | OpQuad *Parser::parseJmpCond() 270 | { 271 | int op = m_scanner.scan().token(); 272 | m_scanner.scan(); 273 | Type t = parseType(); 274 | int r1 = addRegister(m_scanner.token().intValue(), t); 275 | m_scanner.match(Token::Types::REG); 276 | int r2 = addRegister(m_scanner.token().intValue(), t); 277 | m_scanner.match(Token::Types::REG); 278 | 279 | std::string id = m_scanner.token().identifier(); 280 | m_scanner.match(Token::Types::IDENTIFIER); 281 | /// @todo: check if this is a valid label 282 | OpQuad *quad = new OpQuad(OpTypes::JMPCOND, r1, r2, -1, t); 283 | quad->setExtra(op - Token::Types::EQ); 284 | quad->setIdentifier(id); 285 | 286 | return quad; 287 | } 288 | 289 | OpQuad *Parser::parseLabel() 290 | { 291 | std::string id = m_scanner.token().identifier(); 292 | m_scanner.scan(); 293 | m_scanner.match(Token::Types::COLON); 294 | return new OpQuad(OpTypes::LABEL, id); 295 | } 296 | 297 | OpQuad *Parser::parseReturn() 298 | { 299 | m_scanner.scan(); 300 | Type type = parseType(); 301 | OpQuad *quad = new OpQuad(OpTypes::RETURN, type); 302 | quad->setArg1(addRegister(parsePrimary(type, true), type)); 303 | m_scanner.scan(); 304 | return quad; 305 | } 306 | 307 | OpQuad *Parser::parseStatement() 308 | { 309 | int tok = m_scanner.token().token(); 310 | 311 | switch (tok) 312 | { 313 | case Token::Types::REG: 314 | return parseAssign(); 315 | 316 | case Token::Types::RETURN: 317 | return parseReturn(); 318 | 319 | case Token::Types::STORE: 320 | return parseStore(); 321 | 322 | case Token::Types::JMPCOND: 323 | return parseJmpCond(); 324 | 325 | case Token::Types::JMP: 326 | return parseJmp(); 327 | 328 | case Token::Types::IDENTIFIER: 329 | return parseLabel(); 330 | } 331 | 332 | return parseOperation(); 333 | } 334 | 335 | OpList Parser::parseFunction() 336 | { 337 | OpList statements; 338 | 339 | m_scanner.scanUntil(Token::Types::FUNCTION); 340 | m_scanner.scan(); 341 | Type t = parseType(); 342 | if (t.byteSize() == -1) 343 | g_errsys.syntaxError("expected a type specifier"); 344 | 345 | std::string fname = m_scanner.token().identifier(); 346 | m_scanner.match(Token::Types::IDENTIFIER); 347 | m_scanner.match(Token::Types::LPAREN); 348 | 349 | m_functions.push_back(Function(fname, t)); 350 | 351 | /// @todo: if there is no rparen it will deadlock so fix that (maybe check for newline or something) 352 | int spill = -1; 353 | while (m_scanner.token().token() != Token::Types::RPAREN) 354 | { 355 | Type t = parseType(); 356 | m_functions.back().args().push_back(t); 357 | int reg = statements.regList().addRegister(m_scanner.token().intValue(), t); 358 | m_scanner.match(OpTypes::REG); 359 | Register &r = statements.regList()[reg]; 360 | r.setFirstOcc(0); 361 | r.setSpilled(true); 362 | r.setHintSpill(spill--); 363 | } 364 | 365 | /// @todo: this might deadlock when no LBRACE is ever found 366 | while (m_scanner.scan().token() != Token::Types::LBRACE) 367 | if (m_scanner.token().token() == Token::Types::ATTRIBUTES) 368 | m_functions.back().setAttributes(Attributes(m_scanner.token().args())); 369 | 370 | m_scanner.scan(); 371 | 372 | while (1) 373 | { 374 | OpQuad *tree = parseStatement(); 375 | if (tree != nullptr) 376 | { 377 | m_scanner.match(Token::Types::NEWLINE); 378 | statements.push_newregs(m_regList); 379 | statements.push_back(tree); 380 | continue; 381 | } 382 | 383 | if (m_scanner.token().token() == Token::Types::RBRACE) 384 | break; 385 | 386 | if (m_scanner.token().token() == EOF) 387 | g_errsys.fatal("unexpected end of file before }"); 388 | 389 | m_scanner.scan(); 390 | } 391 | m_scanner.scan(); 392 | 393 | m_regList.clear(); 394 | m_generator->genFunction(m_functions.back()); 395 | return statements; 396 | } 397 | 398 | std::vector Parser::parseArrayInit(int amount) 399 | { 400 | std::vector init; 401 | 402 | /// @todo @fixme: this might deadlock when this is the end of the file i guess 403 | while (m_scanner.token().token() != Token::Types::NEWLINE) 404 | { 405 | switch (m_scanner.token().token()) 406 | { 407 | case Token::Types::INTLIT: 408 | init.push_back(m_scanner.token().intValue()); 409 | break; 410 | case Token::Types::STRINGLIT: 411 | for (char c : m_scanner.token().identifier()) 412 | init.push_back(c); 413 | break; 414 | default: 415 | g_errsys.syntaxError("unexpected token used as array initializer"); 416 | } 417 | m_scanner.scan(); 418 | } 419 | 420 | return init; 421 | } 422 | 423 | void Parser::parseGlobal() 424 | { 425 | std::string id = m_scanner.token().identifier(); 426 | m_scanner.scan(); 427 | m_scanner.match(Token::Types::EQUALSIGN); 428 | ArrayType t = parseArrayType(); 429 | std::vector init = parseArrayInit(t.arrSize()); 430 | m_generator->genGlobalVariable(id, t, init); 431 | } 432 | 433 | void Parser::generateFunction(OpList statements) 434 | { 435 | statements = m_optimizer->optimize(statements); 436 | if (m_generator->shouldAllocateRegisters()) 437 | m_optimizer->assignRegisters(statements, m_generator); 438 | m_generator->setRegList(statements.regList()); 439 | dbg_call(statements.print()); 440 | m_generator->feedGenerate(statements); 441 | statements.destroy(); 442 | } 443 | 444 | void Parser::parse() 445 | { 446 | m_generator->genSetupFile(m_scanner.getFileName()); 447 | 448 | int tok; 449 | while ((tok = m_scanner.scan().token()) != EOF) 450 | { 451 | if (tok == Token::Types::FUNCTION) 452 | generateFunction(parseFunction()); 453 | 454 | else if (tok == Token::Types::GLOB) 455 | parseGlobal(); 456 | 457 | else if (tok == Token::Types::EXTERN) 458 | { 459 | /// @todo: actually parse extern functions and store them 460 | m_scanner.scan(); 461 | m_scanner.match(Token::Types::FUNCTION); 462 | parseType(); 463 | m_generator->genExternSymbol(m_scanner.token().identifier()); 464 | m_scanner.scanUntil(Token::Types::RPAREN); 465 | m_scanner.scan(); 466 | } 467 | } 468 | 469 | m_generator->writeOutfile(m_asmFile); 470 | dbg_print("end"); 471 | } 472 | 473 | int Parser::addFunction(Function f) 474 | { 475 | m_functions.push_back(f); 476 | return m_functions.size() - 1; 477 | } -------------------------------------------------------------------------------- /src/register.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int RegisterList::lastReg() 4 | { 5 | for (Register &r : *this) 6 | { 7 | if (r.virt() > m_lastReg) 8 | m_lastReg = r.virt(); 9 | } 10 | 11 | return m_lastReg; 12 | } 13 | 14 | 15 | int RegisterList::addRegister(int reg, Type type) 16 | { 17 | int i = 0; 18 | for (Register &r : *this) 19 | { 20 | if (r.virt() == reg) 21 | return i; 22 | i++; 23 | } 24 | 25 | push_back(Register(reg, type)); 26 | return size() - 1; 27 | } 28 | 29 | void Register::setLastOcc(int line) 30 | { 31 | if (m_firstOcc == -1) 32 | m_firstOcc = line; 33 | 34 | m_lastOcc = line; 35 | } 36 | 37 | int RegisterList::findReg(int reg) 38 | { 39 | int i = 0; 40 | for (Register &r : *this) 41 | { 42 | if (r.virt() == reg) 43 | return i; 44 | 45 | ++i; 46 | } 47 | 48 | return -1; 49 | } 50 | 51 | int RegisterList::spillCount() 52 | { 53 | int highest = -1; 54 | for (Register &r: *this) 55 | if (r.spilled()) 56 | if (r.hintSpill() > highest) 57 | highest = r.hintSpill(); 58 | 59 | return highest + 1; 60 | } -------------------------------------------------------------------------------- /src/scanner.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | Scanner::Scanner(std::string file) 5 | { 6 | m_infile = fopen(file.c_str(), "r"); 7 | 8 | if (m_infile == nullptr) 9 | g_errsys.fatal("Unable to open file " BOLD("\"" + file + "\"")); 10 | } 11 | 12 | int Scanner::next() 13 | { 14 | if (m_putback) 15 | { 16 | int c = m_putback; 17 | m_putback = 0; 18 | return c; 19 | } 20 | 21 | return fgetc(m_infile); 22 | } 23 | 24 | void Scanner::putback(int c) 25 | { 26 | m_putback = c; 27 | } 28 | 29 | void Scanner::skipLine() 30 | { 31 | int c = next(); 32 | 33 | while (c != '\n' && c != EOF) 34 | c = next(); 35 | 36 | putback(c); 37 | } 38 | 39 | int Scanner::skip() 40 | { 41 | int c = next(); 42 | while (c == ' ' || c == '\t' || c == '\r') 43 | { 44 | c = next(); 45 | } 46 | 47 | return c; 48 | } 49 | 50 | static int chrpos(char *s, int c) 51 | { 52 | char *p = strchr(s, c); 53 | return (p ? p - s : -1); 54 | } 55 | 56 | int Scanner::scanInt(int c) 57 | { 58 | int i = 0; 59 | int val = 0; 60 | 61 | while ((i = chrpos((char *)"0123456789", c)) >= 0) 62 | { 63 | val = val * 10 + i; 64 | c = next(); 65 | } 66 | 67 | putback(c); 68 | return val; 69 | } 70 | 71 | int Scanner::scanReg() 72 | { 73 | int c = next(); 74 | if (!isdigit(c)) 75 | g_errsys.syntaxError("expected a integer to identify the register"); 76 | 77 | return scanInt(c); 78 | } 79 | 80 | std::string Scanner::scanIdentifier(int c) 81 | { 82 | /// @todo: this might not be an optimal solution (the deallocation and reallocation 83 | /// of the string) 84 | std::string id; 85 | 86 | for (int i = 0; i < SCANNER_IDENTIFIER_LIMMIT; i++) 87 | { 88 | if (isalpha(c) || isdigit(c) || c == '_' || c == '@') 89 | { 90 | id += c; 91 | c = next(); 92 | } 93 | else 94 | { 95 | putback(c); 96 | m_lastIdentifier = id; 97 | return id; 98 | } 99 | } 100 | 101 | g_errsys.syntaxError("identifier too long"); 102 | } 103 | 104 | void Scanner::scanStringlit() 105 | { 106 | /// @todo @fixme: 107 | std::string lit; 108 | int c; 109 | while ((c = next()) != '"') 110 | { 111 | if (c == EOF) 112 | g_errsys.syntaxError("expected a closing quote character before the end of the file"); 113 | 114 | lit += c; 115 | } 116 | m_token.set(Token::Types::STRINGLIT, lit); 117 | } 118 | 119 | void Scanner::scanAttributes() 120 | { 121 | Attributes args; 122 | 123 | std::string buf; 124 | int c; 125 | while ((c = next()) != EOF) 126 | { 127 | if ((c == ' ' || c == '>') && buf.size()) 128 | { 129 | args.push_back(buf); 130 | buf.clear(); 131 | 132 | if (c == '>') 133 | break; 134 | } 135 | else 136 | buf += c; 137 | } 138 | m_token.set(Token::Types::ATTRIBUTES, args); 139 | } 140 | 141 | int Scanner::identifyKeyword(std::string id) 142 | { 143 | /// Small optimization 144 | switch (id[0]) 145 | { 146 | case 'a': 147 | if (!id.compare("add")) 148 | return Token::Types::ADD; 149 | 150 | if (!id.compare("alloca")) 151 | return Token::Types::ALLOCA; 152 | break; 153 | 154 | case 'c': 155 | if (!id.compare("call")) 156 | return Token::Types::CALL; 157 | if (!id.compare("cmp")) 158 | return Token::Types::CMP; 159 | break; 160 | 161 | case 'd': 162 | if (!id.compare("div")) 163 | return Token::Types::DIV; 164 | break; 165 | 166 | case 'e': 167 | if (!id.compare("eq")) 168 | return Token::Types::EQ; 169 | if (!id.compare("extern")) 170 | return Token::Types::EXTERN; 171 | break; 172 | 173 | case 'f': 174 | if (!id.compare("function")) 175 | return Token::Types::FUNCTION; 176 | break; 177 | 178 | case 'g': 179 | if (!id.compare("ge")) 180 | return Token::Types::GE; 181 | if (!id.compare("g")) 182 | return Token::Types::G; 183 | break; 184 | 185 | case 'i': 186 | if (!id.compare("i8")) 187 | return Token::Types::I8; 188 | if (!id.compare("i16")) 189 | return Token::Types::I16; 190 | if (!id.compare("i32")) 191 | return Token::Types::I32; 192 | if (!id.compare("i64")) 193 | return Token::Types::I64; 194 | break; 195 | 196 | case 'j': 197 | if (!id.compare("jmp")) 198 | return Token::Types::JMP; 199 | 200 | if (!id.compare("jmpcond")) 201 | return Token::Types::JMPCOND; 202 | break; 203 | 204 | case 'l': 205 | if (!id.compare("load")) 206 | return Token::Types::LOAD; 207 | if (!id.compare("l")) 208 | return Token::Types::L; 209 | if (!id.compare("le")) 210 | return Token::Types::LE; 211 | break; 212 | 213 | case 'r': 214 | if (!id.compare("return")) 215 | return Token::Types::RETURN; 216 | break; 217 | 218 | case 's': 219 | if (!id.compare("store")) 220 | return Token::Types::STORE; 221 | if (!id.compare("sub")) 222 | return Token::Types::SUB; 223 | break; 224 | 225 | case 'm': 226 | if (!id.compare("mul")) 227 | return Token::Types::MUL; 228 | else if (!id.compare("mod")) 229 | return Token::Types::MOD; 230 | break; 231 | 232 | case 'n': 233 | if (!id.compare("neq")) 234 | return Token::Types::NEQ; 235 | break; 236 | } 237 | 238 | return 0; 239 | } 240 | 241 | Token &Scanner::match(int tok) 242 | { 243 | if (m_token.token() == tok) 244 | return scan(); 245 | 246 | dbg_call(m_token.print();) 247 | g_errsys.syntaxError("unexpected token"); 248 | } 249 | 250 | Token &Scanner::match(int tok1, int tok2) 251 | { 252 | if (m_token.token() == tok1 || m_token.token() == tok2) 253 | return scan(); 254 | 255 | dbg_call(m_token.print();) 256 | g_errsys.syntaxError("unexpected token"); 257 | } 258 | 259 | Token &Scanner::scanUntil(int tok) 260 | { 261 | while (m_token.token() != tok) 262 | scan(); 263 | 264 | return m_token; 265 | } 266 | 267 | Token &Scanner::scan() 268 | { 269 | int c = skip(); 270 | 271 | switch (c) 272 | { 273 | case ',': 274 | /// @todo: I don't think we ever use comma's anymore so this can probably be removed 275 | m_token.setToken(Token::Types::COMMA); 276 | break; 277 | 278 | case '\n': 279 | m_token.setToken(Token::Types::NEWLINE); 280 | break; 281 | 282 | case '%': 283 | m_token.set(Token::Types::REG, scanReg()); 284 | break; 285 | 286 | case '=': 287 | m_token.setToken(Token::Types::EQUALSIGN); 288 | break; 289 | 290 | case '(': 291 | m_token.setToken(Token::Types::LPAREN); 292 | break; 293 | 294 | case ')': 295 | m_token.setToken(Token::Types::RPAREN); 296 | break; 297 | 298 | case '{': 299 | m_token.setToken(Token::Types::LBRACE); 300 | break; 301 | 302 | case '}': 303 | m_token.setToken(Token::Types::RBRACE); 304 | break; 305 | 306 | case '[': 307 | m_token.setToken(Token::Types::LBRACKET); 308 | break; 309 | 310 | case ']': 311 | m_token.setToken(Token::Types::RBRACKET); 312 | break; 313 | 314 | case '*': 315 | m_token.setToken(Token::Types::STAR); 316 | break; 317 | 318 | case ':': 319 | m_token.setToken(Token::Types::COLON); 320 | break; 321 | 322 | case '"': 323 | scanStringlit(); 324 | break; 325 | 326 | case '#': 327 | skipLine(); 328 | return scan(); 329 | 330 | case '@': 331 | m_token.set(Token::Types::GLOB, scanIdentifier(c)); 332 | break; 333 | 334 | case '<': 335 | scanAttributes(); 336 | break; 337 | 338 | case EOF: 339 | m_token.setToken(EOF); 340 | break; 341 | 342 | default: 343 | if (isdigit(c)) 344 | { 345 | m_token.set(Token::Types::INTLIT, scanInt(c)); 346 | } 347 | else if (isalpha(c) || c == '_') 348 | { 349 | /// Also stores the string in scanner::m_lastIdentifier 350 | std::string id = scanIdentifier(c); 351 | 352 | /// @todo: function names should not be scanned as keyword 353 | int tok = identifyKeyword(id); 354 | if (tok) 355 | { 356 | m_token.setToken(tok); 357 | break; 358 | } 359 | 360 | m_token.set(Token::Types::IDENTIFIER, id); 361 | break; 362 | } 363 | } 364 | 365 | dbg_call(m_token.print()); 366 | return m_token; 367 | } -------------------------------------------------------------------------------- /src/token.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void Token::set(int tok, int val) 4 | { 5 | m_token = tok; 6 | m_intValue = val; 7 | } 8 | 9 | void Token::set(int tok, std::string id) 10 | { 11 | m_token = tok; 12 | m_identifier = id; 13 | } 14 | 15 | void Token::set(int tok, Attributes args) 16 | { 17 | m_token = tok; 18 | m_args = args; 19 | } 20 | 21 | void Token::setToken(int tok) 22 | { 23 | m_token = tok; 24 | } 25 | void Token::setIntValue(int val) 26 | { 27 | m_intValue = val; 28 | } 29 | 30 | void Token::setIdentifier(std::string id) 31 | { 32 | m_identifier = id; 33 | } 34 | 35 | std::string Token::getName() 36 | { 37 | if (m_token < Token::Types::TOKAMOUNT && m_token > 0) 38 | return std::string(TokenTypes[m_token]); 39 | 40 | return "NULL"; 41 | } -------------------------------------------------------------------------------- /src/type.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | Type::Type(struct ctype t) 5 | { 6 | m_byteSize = t.byte_size; 7 | m_ptr = t.ptr; 8 | m_isFloat = t.is_float; 9 | } 10 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | python3 tests/test.py $1 2 | -------------------------------------------------------------------------------- /tests/capi/Makefile: -------------------------------------------------------------------------------- 1 | SOURCES = $(wildcard *.c) 2 | OBJECTS = $(patsubst %.c,%,$(SOURCES)) 3 | 4 | CFLAGS += -I ../../include -L/home/robbe/Projects/ir/ -static 5 | 6 | all: $(OBJECTS) 7 | 8 | clean: 9 | rm ${OBJECTS} 10 | 11 | %: %.c 12 | $(CC) $(CFLAGS) -o $@ $< -lcarbon-ir -lstdc++ 13 | -------------------------------------------------------------------------------- /tests/capi/test0.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int main() 7 | { 8 | /* Create the necessary carbon object */ 9 | struct carbon carb = init_carbon("test", "x86"); 10 | 11 | /* Create a function object, used for managing a function and its operations */ 12 | struct cfunc func = c_create_func(carb, "main", CTYPE_I32, 0, 0); 13 | 14 | /* We want our function to be global so we add that attribute */ 15 | c_add_func_attribute(func, "global:true"); 16 | 17 | /* Pushing operations to the function */ 18 | c_push_op(func, INTLIT, 1, -1, c_reg(func, 0, CTYPE_I32), CTYPE_I32); 19 | c_push_op(func, RETURN, c_reg(func, 0, CTYPE_I32), -1, -1, CTYPE_I32); 20 | 21 | /* When we actually want to generate the function we call c_gen_func() */ 22 | c_gen_func(func); 23 | 24 | /* The function object is not necessary anymore */ 25 | c_destroy_func(func); 26 | 27 | /** 28 | * When we are done generating functions etc we write the generated assembly to file 29 | * and link and assemble 30 | **/ 31 | c_writeassembly(carb, "test.s"); 32 | c_assemble(carb, "test.s", "test.o", NULL); 33 | c_link(carb, "test.o", "test", NULL); 34 | 35 | /* Destroy the carbon object */ 36 | end_carbon(carb); 37 | return 0; 38 | } -------------------------------------------------------------------------------- /tests/capi/test1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int main() 7 | { 8 | /* Create the necessary carbon object */ 9 | struct carbon carb = init_carbon("test", "x86"); 10 | 11 | /* Create a function object, used for managing a function and its operations */ 12 | struct cfunc func = c_create_func(carb, "main", CTYPE_I32, 0, 0); 13 | 14 | /* We want our function to be global so we add that attribute */ 15 | c_add_func_attribute(func, "global:true"); 16 | 17 | /* Generate a global variable and give it an initializer */ 18 | LARGEINT list[] = {25}; 19 | c_gen_glob(carb, "glob1", CTYPE_I32, 1, list); 20 | 21 | /* Pushing operations to the function */ 22 | /* Notice how we use a c_push_op_id() function to add an identifier into the 23 | operation */ 24 | c_push_op_id(func, LOAD, -1, -1, c_reg(func, 0, CTYPE_I32), "glob1", CTYPE_I32); 25 | c_push_op(func, RETURN, c_reg(func, 0, CTYPE_I32), -1, -1, CTYPE_I32); 26 | 27 | /* When we actually want to generate the function we call c_gen_func() */ 28 | c_gen_func(func); 29 | 30 | /* The function object is not necessary anymore */ 31 | c_destroy_func(func); 32 | 33 | /** 34 | * When we are done generating functions etc we write the generated assembly to file 35 | * and link and assemble 36 | **/ 37 | c_writeassembly(carb, "test.s"); 38 | c_assemble(carb, "test.s", "test.o", NULL); 39 | c_link(carb, "test.o", "test", NULL); 40 | 41 | /* Destroy the carbon object */ 42 | end_carbon(carb); 43 | return 0; 44 | } -------------------------------------------------------------------------------- /tests/test.py: -------------------------------------------------------------------------------- 1 | """ 2 | DISCLAIMER: the code here is pretty trash, I just quickly wiped it 3 | up to have something to easily test with. 4 | 5 | @todo: Create a better test suite 6 | """ 7 | import os 8 | import sys 9 | import subprocess 10 | import re 11 | 12 | C_RED = "\033[31;1m" 13 | C_YELLOW = "\033[33;1m" 14 | C_GREEN = "\033[32;1m" 15 | C_BLUE = "\033[34;1m" 16 | C_END = "\033[0m" 17 | 18 | testfolder = os.path.join(os.path.dirname(os.path.abspath(__file__)), "testfiles") 19 | executable = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "carbon-ir") 20 | 21 | results = [] 22 | 23 | def add_result(name, success, reason, returncode, expected=0): 24 | results.append((name, success, reason, returncode, expected)) 25 | 26 | def extract_expected(path): 27 | with open(path, "r") as f: 28 | x = f.readline()[2:] 29 | return int(x) 30 | 31 | def rununit(path): 32 | name = os.path.split(path)[-1] 33 | buildcommand = [executable, "-o", "tmp", path] 34 | ret = subprocess.run(buildcommand).returncode 35 | if ret: 36 | add_result(name, False, "compilation error", ret, 0) 37 | return 38 | 39 | expected = extract_expected(path) 40 | runcommand = ["./tmp"] 41 | ret = subprocess.run(runcommand).returncode 42 | 43 | if ret != expected: 44 | add_result(name, False, "binary did not yield expected return value", ret, expected) 45 | return 46 | 47 | add_result(name, True, "success", ret, expected) 48 | 49 | 50 | def format_results(): 51 | for item in results: 52 | if isinstance(item, str): 53 | print(C_BLUE + "[ Folder ] " + C_END + " " + item) 54 | continue 55 | 56 | print(item[0] + ": ", end="") 57 | 58 | if item[1]: 59 | print(C_GREEN + "[ SUCCEEDED ] " + C_END + "test yielded the correct return value, " + str(item[3])) 60 | 61 | else: 62 | print(C_RED + "[ FAILED ] " + C_END + item[2] + ", expected: " + str(item[4]) + " got: " + str(item[3])) 63 | 64 | 65 | def str_to_int(string): 66 | if string.isdigit(): 67 | return int(string) 68 | 69 | return string 70 | 71 | def natural_keys(file): 72 | return [ (str_to_int(c)) for c in re.split(r'(\d+)', file)] 73 | 74 | 75 | def run_all(): 76 | for dirpath, dirnames, filenames in os.walk(testfolder): 77 | results.append(dirpath) 78 | 79 | filenames = sorted(filenames, key=natural_keys) 80 | for name in filenames: 81 | rununit(os.path.join(dirpath, name)) 82 | 83 | format_results() 84 | 85 | if (len(sys.argv) > 1): 86 | rununit(os.path.join(testfolder, sys.argv[1] + ".ir")) 87 | format_results() 88 | else: 89 | run_all() 90 | -------------------------------------------------------------------------------- /tests/testfiles/test0.ir: -------------------------------------------------------------------------------- 1 | # 15 2 | function i32 main() 3 | { 4 | %0 = i32 5 5 | %1 = i32 3 6 | %2 = mul i32 %0 %1 7 | return i32 %2 8 | } -------------------------------------------------------------------------------- /tests/testfiles/test1.ir: -------------------------------------------------------------------------------- 1 | # 6 2 | function i32 main() 3 | { 4 | %0 = i32 1 5 | %1 = i32 5 6 | %2 = add i32 %0 %1 7 | return i32 %2 8 | } -------------------------------------------------------------------------------- /tests/testfiles/test10.ir: -------------------------------------------------------------------------------- 1 | # 1 2 | function i32 main() 3 | { 4 | %0 = i32 4 5 | %1 = i32 4 6 | 7 | %3 = cmp eq i32 %0 %1 8 | 9 | return i32 %3 10 | } -------------------------------------------------------------------------------- /tests/testfiles/test11.ir: -------------------------------------------------------------------------------- 1 | # 5 2 | function i32 main() 3 | { 4 | %0 = i32 4 5 | %1 = i32 4 6 | 7 | jmp sike 8 | %3 = cmp eq i32 %0 %1 9 | jmp end 10 | sike: 11 | %3 = i32 5 12 | 13 | end: 14 | return i32 %3 15 | } -------------------------------------------------------------------------------- /tests/testfiles/test12.ir: -------------------------------------------------------------------------------- 1 | # 6 2 | function i32 add_(i32 %0 i32 %1) 3 | { 4 | %2 = add i32 %0 %1 5 | return i32 %2 6 | } 7 | 8 | function i32 main() 9 | { 10 | %0 = i32 1 11 | %1 = i32 5 12 | %2 = call i32 add_(%0 %1) 13 | return i32 %2 14 | } -------------------------------------------------------------------------------- /tests/testfiles/test13.ir: -------------------------------------------------------------------------------- 1 | # 6 2 | 3 | @glob1 = [i32 1] 6 4 | 5 | function i32 main() 6 | { 7 | %0 = load i32* @glob1 8 | return i32 %0 9 | } -------------------------------------------------------------------------------- /tests/testfiles/test14.ir: -------------------------------------------------------------------------------- 1 | # 6 2 | 3 | extern function i32 printf(i8*) 4 | @str = [i8 6] 104 101 108 108 111 10 0 5 | 6 | function i32 main() 7 | { 8 | %0 = i8* @str 9 | %1 = call i32 printf(%0) 10 | return i32 %1 11 | }p -------------------------------------------------------------------------------- /tests/testfiles/test15.ir: -------------------------------------------------------------------------------- 1 | # 12 2 | 3 | extern function i32 printf(i8*) 4 | @str = [i8 13] "Hello world" 10 00 5 | 6 | function i32 main() 7 | { 8 | %0 = i8* @str 9 | %1 = call i32 printf(%0) 10 | return i32 %1 11 | } -------------------------------------------------------------------------------- /tests/testfiles/test16.ir: -------------------------------------------------------------------------------- 1 | # 5 2 | 3 | @glob1 = [i32 1] 0 4 | 5 | function i32 main() 6 | { 7 | %0 = i32 5 8 | store i32* @glob1 %0 9 | 10 | %1 = load i32* @glob1 11 | return i32 %1 12 | } -------------------------------------------------------------------------------- /tests/testfiles/test2.ir: -------------------------------------------------------------------------------- 1 | # 3 2 | function i32 main() 3 | { 4 | %0 = i32 18 5 | %1 = i32 3 6 | %2 = sub i32 %0 %1 7 | %3 = i32 2 8 | %4 = mul i32 %2 %3 9 | %5 = i32 10 10 | %6 = div i32 %4 %5 11 | return i32 %6 12 | } -------------------------------------------------------------------------------- /tests/testfiles/test3.ir: -------------------------------------------------------------------------------- 1 | # 117 2 | function i32 main() 3 | { 4 | %0 = i32 1 5 | %1 = i32 2 6 | %3 = add i32 %0 %1 7 | 8 | %4 = i32 1 9 | %5 = i32 3 10 | %6 = add i32 %4 %5 11 | 12 | %7 = i32 1 13 | %8 = i32 4 14 | %9 = add i32 %7 %8 15 | 16 | %10 = i32 5 17 | %11 = i32 3 18 | %12 = add i32 %10 %11 19 | 20 | %13 = i32 2 21 | %14 = mul i32 %12 %13 22 | 23 | %15 = i32 10 24 | %16 = sub i32 %14 %15 25 | 26 | %17 = i32 4 27 | %18 = add i32 %16 %17 28 | 29 | %19 = add i32 %18 %3 30 | 31 | %20 = add i32 %6 %9 32 | %21 = mul i32 %19 %20 33 | 34 | return i32 %21 35 | } -------------------------------------------------------------------------------- /tests/testfiles/test4.ir: -------------------------------------------------------------------------------- 1 | # 5 2 | function i32 main() 3 | { 4 | %0 = i32 18 5 | %1 = i32 3 6 | %2 = sub i32 %0 %1 7 | 8 | %3 = i32 2 9 | %4 = mul i32 %2 %3 10 | 11 | %5 = i32 5 12 | %6 = add i32 %4 %5 13 | 14 | %7 = i32 10 15 | %8 = mod i32 %6 %7 16 | return i32 %8 17 | } -------------------------------------------------------------------------------- /tests/testfiles/test5.ir: -------------------------------------------------------------------------------- 1 | # 3 2 | function i32 main() 3 | { 4 | %0 = i32 1 5 | %1 = i32 2 6 | %3 = add i32 %0 %1 7 | 8 | %4 = i32 1 9 | %5 = i32 3 10 | %6 = add i32 %4 %5 11 | 12 | %7 = i32 1 13 | %8 = i32 4 14 | %9 = add i32 %7 %8 15 | 16 | %22 = i32 1 17 | %23 = add i32 %9 %22 18 | 19 | %10 = i32 5 20 | %11 = i32 3 21 | %12 = add i32 %10 %11 22 | 23 | %13 = i32 2 24 | %14 = mul i32 %12 %13 25 | 26 | %15 = i32 10 27 | %16 = sub i32 %14 %15 28 | 29 | %17 = i32 4 30 | %18 = add i32 %16 %17 31 | 32 | %19 = add i32 %18 %3 33 | 34 | %20 = add i32 %6 %9 35 | %21 = mul i32 %19 %20 36 | 37 | %24 = mod i32 %21 %23 38 | 39 | return i32 %24 40 | } -------------------------------------------------------------------------------- /tests/testfiles/test6.ir: -------------------------------------------------------------------------------- 1 | # 4 2 | function i32 foo() 3 | { 4 | %0 = i32 1 5 | %1 = i32 3 6 | %2 = add i32 %0 %1 7 | return i32 %2 8 | } 9 | 10 | function i32 main() 11 | { 12 | %0 = call i32 foo() 13 | return i32 %0 14 | } -------------------------------------------------------------------------------- /tests/testfiles/test7.ir: -------------------------------------------------------------------------------- 1 | # 8 2 | function i32 foo() 3 | { 4 | %0 = i32 1 5 | %1 = i32 3 6 | %2 = add i32 %0 %1 7 | return i32 %2 8 | } 9 | 10 | function i32 main() 11 | { 12 | %0 = i32 2 13 | %1 = call i32 foo() 14 | %2 = mul i32 %1 %0 15 | return i32 %2 16 | } -------------------------------------------------------------------------------- /tests/testfiles/test8.ir: -------------------------------------------------------------------------------- 1 | # 5 2 | function i32 main() 3 | { 4 | %0 = i32 1 5 | %1 = alloca i32 %0 6 | %2 = i32 5 7 | store i32* %1 %2 # store in %1 the value %2 8 | 9 | %3 = load i32* %1 10 | return i32 %3 11 | } -------------------------------------------------------------------------------- /tests/testfiles/test9.ir: -------------------------------------------------------------------------------- 1 | # 2 2 | function i32 main() 3 | { 4 | %0 = i32 4 5 | %1 = i32 4 6 | 7 | %3 = i32 2 8 | jmpcond eq i32 %0 %1 label 9 | 10 | %3 = i32 1 11 | 12 | label: 13 | 14 | return i32 %3 15 | } --------------------------------------------------------------------------------