├── .gitignore ├── CMakeLists.txt ├── Examples ├── Bubble │ ├── bubble │ ├── bubble.S │ └── bubble.fs ├── Conditional │ ├── condition │ ├── condition.S │ └── condition.fs ├── Fibonacci │ ├── fibonacci │ ├── fibonacci.S │ └── fibonacci.fs ├── Loop │ ├── loop │ ├── loop.S │ └── loop.fs ├── Memory │ ├── memory │ ├── memory.S │ └── memory.fs ├── MultipleWords │ ├── multiple │ ├── multiple.S │ └── multiple.fs ├── SimpleEmit │ ├── emit │ ├── emit.S │ └── emit.fs └── Swap │ ├── swap │ ├── swap.S │ └── swap.fs ├── LICENSE ├── MovForth Logo.png ├── README.md ├── headers ├── Constants.h ├── Generation │ ├── FBuilder.h │ ├── IRGenerator.h │ └── ObjectGenerator.h ├── Interpretation │ ├── ForthWord.h │ ├── IP.h │ ├── Input.h │ ├── Interpreter.h │ ├── LHC - Collider.txt │ ├── Local.h │ ├── Primitive.h │ ├── Stack.h │ ├── WordGenerator.h │ ├── iData.h │ └── iWord.h ├── PrimitiveEffects.h ├── PrimitiveEnums.h ├── Print.h ├── Symbolic │ ├── Block.h │ ├── Effects.h │ ├── Instruction.h │ ├── Pass.h │ ├── Structures.h │ └── sWord.h └── SystemExec.h ├── main.cpp └── sources ├── Generation ├── FBuilder.cpp ├── IRGenerator.cpp └── ObjectGenerator.cpp ├── Interpretation ├── ForthWord.cpp ├── IP.cpp ├── Interpreter.cpp ├── Primitive.cpp ├── Stack.cpp ├── WordGenerator.cpp ├── iData.cpp └── iWord.cpp ├── Print.cpp ├── Symbolic ├── Block.cpp ├── Instruction.cpp ├── Pass.cpp ├── Pass_BlockStackGraph.cpp ├── Pass_MatchingPairs.cpp ├── Pass_ToBasicBlocks.cpp ├── Pass_WordStackGraph.cpp └── sWord.cpp └── SystemExec.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | # Project exclude paths 2 | .idea/ 3 | *.ll 4 | CompilationSpace/ 5 | .vscode/ 6 | build/ -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.16) 3 | 4 | project(movForth) 5 | 6 | set(CMAKE_CXX_STANDARD 17) 7 | set(LLVM_ENABLE_NEW_PASS_MANAGER OFF) 8 | 9 | # finding libz may be a bit buggy on some systems; 10 | # cmake expects to find it in /lib/ but it may be in /lib/x86_64-linux-gnu/ 11 | # if you have installed libz-dev but cmake still fails, 12 | # you can try sym linking it or setting ZLIB_LIBRARY to the location of the .so 13 | find_package(ZLIB REQUIRED) 14 | 15 | add_executable(movForth 16 | 17 | main.cpp 18 | 19 | sources/Print.cpp 20 | sources/SystemExec.cpp 21 | 22 | sources/Interpretation/iWord.cpp 23 | sources/Interpretation/iData.cpp 24 | sources/Interpretation/Interpreter.cpp 25 | sources/Interpretation/WordGenerator.cpp 26 | sources/Interpretation/ForthWord.cpp 27 | sources/Interpretation/Stack.cpp 28 | sources/Interpretation/IP.cpp 29 | sources/Interpretation/Primitive.cpp 30 | 31 | sources/Symbolic/sWord.cpp 32 | sources/Symbolic/Instruction.cpp 33 | sources/Symbolic/Block.cpp 34 | sources/Symbolic/Pass.cpp 35 | sources/Symbolic/Pass_ToBasicBlocks.cpp 36 | sources/Symbolic/Pass_BlockStackGraph.cpp 37 | sources/Symbolic/Pass_MatchingPairs.cpp 38 | sources/Symbolic/Pass_WordStackGraph.cpp 39 | 40 | sources/Generation/IRGenerator.cpp 41 | sources/Generation/FBuilder.cpp 42 | sources/Generation/ObjectGenerator.cpp 43 | ) 44 | 45 | 46 | find_package(LLVM REQUIRED CONFIG PATHS) 47 | message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") 48 | message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") 49 | 50 | 51 | target_include_directories(movForth PUBLIC ${LLVM_INCLUDE_DIRS}) 52 | separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS}) 53 | add_definitions(${LLVM_DEFINITIONS}) 54 | 55 | llvm_map_components_to_libnames(llvm_libs 56 | support core irreader native passes codegen 57 | ) 58 | 59 | message(STATUS "Linking LLVM libraries ${llvm_libs}") 60 | target_link_libraries(movForth PUBLIC ${llvm_libs}) 61 | 62 | SET(LLVM_ENABLE_DUMP ON) 63 | 64 | -------------------------------------------------------------------------------- /Examples/Bubble/bubble: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Reschivon/movForth/a9f4b1025e0d38fe934f9b6d05a2aebdcf5ca3d5/Examples/Bubble/bubble -------------------------------------------------------------------------------- /Examples/Bubble/bubble.S: -------------------------------------------------------------------------------- 1 | .text 2 | .file "MovForth" 3 | .globl main 4 | .p2align 4, 0x90 5 | .type main,@function 6 | main: 7 | .cfi_startproc 8 | pushq %rbp 9 | .cfi_def_cfa_offset 16 10 | pushq %r15 11 | .cfi_def_cfa_offset 24 12 | pushq %r14 13 | .cfi_def_cfa_offset 32 14 | pushq %r13 15 | .cfi_def_cfa_offset 40 16 | pushq %r12 17 | .cfi_def_cfa_offset 48 18 | pushq %rbx 19 | .cfi_def_cfa_offset 56 20 | subq $24, %rsp 21 | .cfi_def_cfa_offset 80 22 | .cfi_offset %rbx, -56 23 | .cfi_offset %r12, -48 24 | .cfi_offset %r13, -40 25 | .cfi_offset %r14, -32 26 | .cfi_offset %r15, -24 27 | .cfi_offset %rbp, -16 28 | movq %rsp, %rsi 29 | movl $20000, %edi 30 | callq .Lpopulate 31 | xorl %ebp, %ebp 32 | movq (%rsp), %r12 33 | leaq 8(%rsp), %r14 34 | leaq 16(%rsp), %r15 35 | jmp .LBB0_1 36 | .p2align 4, 0x90 37 | .LBB0_5: 38 | incq %rbp 39 | cmpq $19999, %rbp 40 | jge .LBB0_6 41 | .LBB0_1: 42 | xorl %ebx, %ebx 43 | jmp .LBB0_2 44 | .p2align 4, 0x90 45 | .LBB0_4: 46 | movl $19999, %eax 47 | subq %rbp, %rax 48 | movq %r13, %rbx 49 | cmpq %rax, %r13 50 | jge .LBB0_5 51 | .LBB0_2: 52 | leaq 1(%rbx), %r13 53 | movq %r13, %rdi 54 | movq %r12, %rsi 55 | movq %r14, %rdx 56 | callq ".Lget-value" 57 | movq %rbx, %rdi 58 | movq %r12, %rsi 59 | movq %r15, %rdx 60 | callq ".Lget-value" 61 | movq 8(%rsp), %rax 62 | cmpq 16(%rsp), %rax 63 | jge .LBB0_4 64 | leaq 1(%rbx), %rdi 65 | movq %rbx, %rsi 66 | movq %r12, %rdx 67 | callq .Lswap 68 | jmp .LBB0_4 69 | .LBB0_6: 70 | movl $20000, %edi 71 | movq %r12, %rsi 72 | callq ".Lprint-array" 73 | movq %r12, %rdi 74 | callq free@PLT 75 | addq $24, %rsp 76 | .cfi_def_cfa_offset 56 77 | popq %rbx 78 | .cfi_def_cfa_offset 48 79 | popq %r12 80 | .cfi_def_cfa_offset 40 81 | popq %r13 82 | .cfi_def_cfa_offset 32 83 | popq %r14 84 | .cfi_def_cfa_offset 24 85 | popq %r15 86 | .cfi_def_cfa_offset 16 87 | popq %rbp 88 | .cfi_def_cfa_offset 8 89 | retq 90 | .Lfunc_end0: 91 | .size main, .Lfunc_end0-main 92 | .cfi_endproc 93 | 94 | .p2align 4, 0x90 95 | .type .Lpopulate,@function 96 | .Lpopulate: 97 | .cfi_startproc 98 | pushq %r14 99 | .cfi_def_cfa_offset 16 100 | pushq %rbx 101 | .cfi_def_cfa_offset 24 102 | pushq %rax 103 | .cfi_def_cfa_offset 32 104 | .cfi_offset %rbx, -24 105 | .cfi_offset %r14, -16 106 | movq %rsi, %r14 107 | movq %rdi, %rbx 108 | leaq (,%rdi,8), %rdi 109 | callq malloc@PLT 110 | movl $42, %esi 111 | xorl %ecx, %ecx 112 | movq %rax, %rdx 113 | .p2align 4, 0x90 114 | .LBB1_1: 115 | imull $1309, %esi, %esi 116 | addl $13849, %esi 117 | movzwl %si, %esi 118 | movq %rsi, (%rdx) 119 | incq %rcx 120 | addq $8, %rdx 121 | cmpq %rbx, %rcx 122 | jl .LBB1_1 123 | movq %rax, (%r14) 124 | addq $8, %rsp 125 | .cfi_def_cfa_offset 24 126 | popq %rbx 127 | .cfi_def_cfa_offset 16 128 | popq %r14 129 | .cfi_def_cfa_offset 8 130 | retq 131 | .Lfunc_end1: 132 | .size .Lpopulate, .Lfunc_end1-.Lpopulate 133 | .cfi_endproc 134 | 135 | .p2align 4, 0x90 136 | .type ".Lget-value",@function 137 | ".Lget-value": 138 | .cfi_startproc 139 | movq (%rsi,%rdi,8), %rax 140 | movq %rax, (%rdx) 141 | retq 142 | .Lfunc_end2: 143 | .size ".Lget-value", .Lfunc_end2-".Lget-value" 144 | .cfi_endproc 145 | 146 | .p2align 4, 0x90 147 | .type .Lswap,@function 148 | .Lswap: 149 | .cfi_startproc 150 | movq (%rdx,%rsi,8), %rax 151 | movq (%rdx,%rdi,8), %rcx 152 | movq %rcx, (%rdx,%rsi,8) 153 | movq %rax, (%rdx,%rdi,8) 154 | retq 155 | .Lfunc_end3: 156 | .size .Lswap, .Lfunc_end3-.Lswap 157 | .cfi_endproc 158 | 159 | .p2align 4, 0x90 160 | .type ".Lprint-array",@function 161 | ".Lprint-array": 162 | .cfi_startproc 163 | pushq %r15 164 | .cfi_def_cfa_offset 16 165 | pushq %r14 166 | .cfi_def_cfa_offset 24 167 | pushq %rbx 168 | .cfi_def_cfa_offset 32 169 | .cfi_offset %rbx, -32 170 | .cfi_offset %r14, -24 171 | .cfi_offset %r15, -16 172 | movq %rsi, %r15 173 | movq %rdi, %r14 174 | xorl %ebx, %ebx 175 | .p2align 4, 0x90 176 | .LBB4_1: 177 | movq (%r15), %rsi 178 | movl $.L__unnamed_1, %edi 179 | xorl %eax, %eax 180 | callq printf@PLT 181 | incq %rbx 182 | addq $8, %r15 183 | cmpq %r14, %rbx 184 | jl .LBB4_1 185 | popq %rbx 186 | .cfi_def_cfa_offset 24 187 | popq %r14 188 | .cfi_def_cfa_offset 16 189 | popq %r15 190 | .cfi_def_cfa_offset 8 191 | retq 192 | .Lfunc_end4: 193 | .size ".Lprint-array", .Lfunc_end4-".Lprint-array" 194 | .cfi_endproc 195 | 196 | .type .L__unnamed_1,@object 197 | .section .rodata.str1.1,"aMS",@progbits,1 198 | .L__unnamed_1: 199 | .asciz "%d\n" 200 | .size .L__unnamed_1, 4 201 | 202 | .section ".note.GNU-stack","",@progbits 203 | -------------------------------------------------------------------------------- /Examples/Bubble/bubble.fs: -------------------------------------------------------------------------------- 1 | \ Implementation of Bubble Sort 2 | \ Lotta boilerplate here, because there's no mechanism 3 | \ to `include` files (yet). The locals here compile 4 | \ to the same code as an equivalent stack-using program 5 | 6 | 7 | \ Control flow utilities 8 | 9 | : negative -1 * ; 10 | 11 | : jump> 12 | literal branch , 13 | here 14 | 0 , ; 15 | 16 | : jumpif> 17 | literal branchif , 18 | here 19 | 0 , ; 20 | 21 | : >land 22 | dup 23 | here - negative 24 | swap ! ; 25 | 26 | : land< \ equivalent of MARK 27 | here ; 28 | 29 | : ; 44 | 45 | : else immediate 46 | jump> 47 | swap 48 | >land ; 49 | 50 | : then immediate 51 | >land ; 52 | 53 | \ Array utilities 54 | 55 | : cells 8 * ; \ movForth uses 64 bit only for now 56 | 57 | : get-value \ pointer index -- value 58 | cells + @ ; 59 | 60 | : set-index \ pointer index value -- 61 | rot rot 62 | cells + 63 | ! ; 64 | 65 | : swap \ pointer index1 index2 -- 66 | to index2 to index1 to pointer 67 | 68 | pointer index1 get-value to num1 69 | pointer index2 get-value to num2 70 | 71 | pointer index1 num2 set-index 72 | pointer index2 num1 set-index 73 | ; 74 | 75 | : print-array \ pointer length -- 76 | to length to pointer 77 | 0 to index 78 | while 79 | pointer index get-value . 80 | index 1 + to index 81 | index length < repeatif 82 | ; 83 | 84 | 85 | : random \ seed -- seed 86 | 1309 * 13849 + 65535 & 87 | ; 88 | 89 | : populate \ length -- pointer 90 | to length 91 | 42 to seed 92 | length 8 * malloc to pointer 93 | 94 | 0 to i 95 | while 96 | seed random to seed 97 | pointer i seed set-index 98 | i 1 + to i 99 | i length < repeatif 100 | 101 | pointer 102 | ; 103 | 104 | 105 | : main 106 | 20000 to length 107 | length populate to array 108 | 109 | 0 to i 110 | while 111 | 0 to j 112 | while 113 | array j 1 + get-value 114 | array j get-value 115 | < if 116 | array j j 1 + swap 117 | then 118 | 119 | j 1 + to j 120 | j length i - 1 - < repeatif 121 | 122 | i 1 + to i 123 | i length 1 - < repeatif 124 | 125 | array length print-array 126 | 127 | array free 128 | ; 129 | 130 | -------------------------------------------------------------------------------- /Examples/Conditional/condition: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Reschivon/movForth/a9f4b1025e0d38fe934f9b6d05a2aebdcf5ca3d5/Examples/Conditional/condition -------------------------------------------------------------------------------- /Examples/Conditional/condition.S: -------------------------------------------------------------------------------- 1 | .text 2 | .file "MovForth" 3 | .globl main 4 | .p2align 4, 0x90 5 | .type main,@function 6 | main: 7 | .cfi_startproc 8 | pushq %rax 9 | .cfi_def_cfa_offset 16 10 | movl $.L__unnamed_1, %edi 11 | movl $32, %esi 12 | xorl %eax, %eax 13 | callq printf@PLT 14 | movl $.L__unnamed_1, %edi 15 | movl $32, %esi 16 | xorl %eax, %eax 17 | callq printf@PLT 18 | popq %rax 19 | .cfi_def_cfa_offset 8 20 | retq 21 | .Lfunc_end0: 22 | .size main, .Lfunc_end0-main 23 | .cfi_endproc 24 | 25 | .type .L__unnamed_1,@object 26 | .section .rodata.str1.1,"aMS",@progbits,1 27 | .L__unnamed_1: 28 | .asciz "%d\n" 29 | .size .L__unnamed_1, 4 30 | 31 | .section ".note.GNU-stack","",@progbits 32 | -------------------------------------------------------------------------------- /Examples/Conditional/condition.fs: -------------------------------------------------------------------------------- 1 | : negative -1 * ; 2 | 3 | : jump> 4 | literal branch , 5 | here 6 | 0 , ; 7 | 8 | : jumpif> 9 | literal branchif , 10 | here 11 | 0 , ; 12 | 13 | : >land 14 | dup 15 | here - negative 16 | swap ! ; 17 | 18 | : if immediate 19 | jumpif> ; 20 | 21 | : else immediate 22 | jump> 23 | swap 24 | >land ; 25 | 26 | : then immediate 27 | >land ; 28 | 29 | : conditional if 32 32 else 42 42 then . . ; 30 | 31 | : main 1 conditional ; 32 | -------------------------------------------------------------------------------- /Examples/Fibonacci/fibonacci: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Reschivon/movForth/a9f4b1025e0d38fe934f9b6d05a2aebdcf5ca3d5/Examples/Fibonacci/fibonacci -------------------------------------------------------------------------------- /Examples/Fibonacci/fibonacci.S: -------------------------------------------------------------------------------- 1 | .text 2 | .file "MovForth" 3 | .globl main 4 | .p2align 4, 0x90 5 | .type main,@function 6 | main: 7 | .cfi_startproc 8 | pushq %r15 9 | .cfi_def_cfa_offset 16 10 | pushq %r14 11 | .cfi_def_cfa_offset 24 12 | pushq %rbx 13 | .cfi_def_cfa_offset 32 14 | .cfi_offset %rbx, -32 15 | .cfi_offset %r14, -24 16 | .cfi_offset %r15, -16 17 | movl $1, %eax 18 | movq $-40, %r14 19 | movl $1, %ebx 20 | .p2align 4, 0x90 21 | .LBB0_1: 22 | movq %rax, %r15 23 | addq %rax, %rbx 24 | movl $.L__unnamed_1, %edi 25 | movq %rbx, %rsi 26 | xorl %eax, %eax 27 | callq printf@PLT 28 | movq %rbx, %rax 29 | movq %r15, %rbx 30 | incq %r14 31 | jne .LBB0_1 32 | popq %rbx 33 | .cfi_def_cfa_offset 24 34 | popq %r14 35 | .cfi_def_cfa_offset 16 36 | popq %r15 37 | .cfi_def_cfa_offset 8 38 | retq 39 | .Lfunc_end0: 40 | .size main, .Lfunc_end0-main 41 | .cfi_endproc 42 | 43 | .type .L__unnamed_1,@object 44 | .section .rodata.str1.1,"aMS",@progbits,1 45 | .L__unnamed_1: 46 | .asciz "%d\n" 47 | .size .L__unnamed_1, 4 48 | 49 | .section ".note.GNU-stack","",@progbits 50 | -------------------------------------------------------------------------------- /Examples/Fibonacci/fibonacci.fs: -------------------------------------------------------------------------------- 1 | 2 | 3 | : land< 4 | here ; 5 | 6 | 7 | : [!NOTE] 19 | > Q: What's the point of a purely compiled Forth, ie. without interactivity? 20 | > 21 | > That's a good question. Short answer is for speed. Long answer is that this project was a proof of concept for mixing traditional and modern tools and I 22 | > simply did it for fun and not as anything practical. But perhaps this can be addressed by having an interpreter dynamically link to a 23 | > compiled lib. 24 | 25 | ## Installation 26 | 27 | - Have LLVM installed (LLVM-10 and above). For Ubuntu/Debian-based distros, simply run: 28 | 29 | ``` 30 | sudo apt install llvm 31 | ``` 32 | 33 | - Clone the repository 34 | 35 | ``` 36 | git clone https://github.com/Reschivon/movForth 37 | ``` 38 | 39 | - Build as CMake project 40 | ``` 41 | cd movForth 42 | mkdir build && cd build 43 | cmake .. 44 | make -j4 45 | ``` 46 | 47 | - Optionally, you can install `clang++`. MovForth uses clang++ to link the generated assembly file with stdlibc++ and produce a binary. If clang++ is not installed, you can link and assemble the .S file manually. 48 | 49 | Alternatively, you can check `Releases` for precompiled binaries, but it is highly unlikely I will remember to update them. 50 | 51 | ## Running 52 | 53 | The `main.cpp` file compiles into a simple command line utility for movForth. The syntax is: 54 | 55 | `movForth [source file]` 56 | 57 | An example usage, assuming you have a cmake build directory named `build`, is: 58 | ``` 59 | build/movForth Examples/fibonacci/source.fs 60 | ``` 61 | Do check out the `Examples/` folder. In the likely scenario that you encounter a bug or crash, feel free to create an issue. 62 | 63 | 64 | ## Concept 65 | MovForth achieves LLVM IR generation in three steps 66 | 67 | 1. The immediate (compile-time) portion of Forth source is run, and a dictionary is generated 68 | 2. Data flow and control flow graphs are built for the words in the dictionary. This process computes the number of parameters and returns for each word, and allocates unique registers for each element passed on the stack. 69 | 3. The graphs from the previous step are used to generate LLVM Basic Blocks and registers. Optimization passes are run and machine code for a target of choice is produced. 70 | 71 | **Future features:** 72 | - Benchmarks 73 | - Standardizing movForth to be as close to "regular" Forth as possible 74 | - Dynamic linking so you can have an interactive Forth with certain compiled words 75 | - Interfacing with libraries that follow the C ABI 76 | 77 | ## Supported Primitives 78 | MovForth is powerful because operations that are immediate in a traditional interpreted Forth are run during compile time. Therefore, meta-compilation words like `IF`, `DO`, and `COLON` incur no runtime penalty. 79 | 80 | Words available during immediate mode: 81 | 82 | ```< = + - * / SWAP ROT DUP DROP . SHOW ' , SEE [ ] IMMEDIATE ALLOT @ ! BRANCH BRANCHIF LITERAL HERE CREATE``` 83 | 84 | Words available during runtime: 85 | 86 | ```< = + - * / BRANCH BRANCHIF . DUP DROP LITERAL ! @ MALLOC ``` 87 | 88 | Locals can be used in both compile and runtime, and compile to the 89 | exact same machine code as their stack-based counterpart. 90 | They use the word `to` for assignment. 91 | 92 | Push 42 to local named "life" (creation of locals is implicit): 93 | 94 | `42 to life` 95 | 96 | Push the value stored in local "life" to stack: 97 | 98 | `life` 99 | 100 | ## Contribute 101 | I welcome all tips, comments, or help! I'll do my best to respond in a timely manner. 102 | -------------------------------------------------------------------------------- /headers/Constants.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by reschivon on 11/17/21. 3 | // 4 | 5 | #ifndef MOVFORTH_CONSTANTS_H 6 | #define MOVFORTH_CONSTANTS_H 7 | 8 | #include 9 | 10 | typedef int64_t element; 11 | 12 | #endif //MOVFORTH_CONSTANTS_H 13 | -------------------------------------------------------------------------------- /headers/Generation/FBuilder.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | #ifndef MOVFORTH_FBUILDER_H 4 | #define MOVFORTH_FBUILDER_H 5 | 6 | #include 7 | #include "../Symbolic/Structures.h" 8 | 9 | using namespace llvm; 10 | 11 | namespace mov { 12 | 13 | class FBuilder : public IRBuilder<> { 14 | 15 | Function *the_function; 16 | LLVMContext &the_context; 17 | 18 | std::unordered_map val_ptrs; 19 | 20 | std::unordered_map blocks; 21 | 22 | Value* emit_string_ptr = nullptr; 23 | 24 | public: 25 | explicit FBuilder(LLVMContext &context, Function *the_function); 26 | 27 | Value* get_emit_string_ptr(){ 28 | if(!emit_string_ptr) 29 | emit_string_ptr = CreateGlobalStringPtr("%d\n"); 30 | return emit_string_ptr; 31 | } 32 | 33 | /** 34 | * Build a load instruction and return the result of that load 35 | * @param reg 36 | * @return 37 | */ 38 | Value* build_load_register(Register reg); 39 | 40 | /** 41 | * Build a store instruction to the reg variable 42 | * @param value 43 | * @param reg 44 | */ 45 | void build_store_register(Value* value, Register reg); 46 | 47 | /** 48 | * Inform the builder ahead of time that certain 49 | * parameter variables point to special parameter Values 50 | * @param reg 51 | * @param a_i 52 | */ 53 | void insert_val_ptr(Register reg, Value *a_i); 54 | 55 | void create_block(uint index, BasicBlock *block); 56 | 57 | BasicBlock *get_block(uint index); 58 | 59 | Value *CreateForthConstant(element val) { 60 | return CreateAdd( 61 | ConstantInt::get(Context, APInt(64, val, true)), 62 | ConstantInt::get(Context, APInt(64, 0, true)) 63 | ); 64 | } 65 | 66 | /** 67 | * Make sure this variable gets allocated at the beginning, then return the Value* 68 | * pointing to that allocation 69 | * @param reg 70 | * @param val_size 71 | * @return 72 | */ 73 | Value * create_ptr_to_val(Register reg); 74 | 75 | private: 76 | 77 | /** 78 | * Return the Value* pointing to the allocation of this variable 79 | * @param reg 80 | * @return 81 | */ 82 | Value * get_ptr_to_val(Register reg); 83 | 84 | /** 85 | * Utility for building alloca instructions in entry block 86 | * @param func 87 | * @param var_name 88 | * @param size 89 | * @return 90 | */ 91 | AllocaInst *create_entry_block_alloca(Function *func, const std::string &var_name); 92 | 93 | }; 94 | } 95 | 96 | #endif //MOVFORTH_FBUILDER_H 97 | -------------------------------------------------------------------------------- /headers/Generation/IRGenerator.h: -------------------------------------------------------------------------------- 1 | #ifndef MOVFORTH_IRGENERATOR_H 2 | #define MOVFORTH_IRGENERATOR_H 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include "llvm/IR/LegacyPassManager.h" 10 | #include "../Symbolic/sWord.h" 11 | #include "FBuilder.h" 12 | 13 | using namespace llvm; 14 | namespace mov { 15 | 16 | class IRModule { 17 | std::shared_ptr module; 18 | public: 19 | IRModule(std::shared_ptr m); 20 | std::shared_ptr get_module(); 21 | }; 22 | 23 | 24 | class IRGenerator { 25 | private: 26 | std::unordered_map visited_words{}; 27 | 28 | LLVMContext the_context; 29 | std::shared_ptr the_module; 30 | std::unique_ptr fpm; 31 | 32 | void declare_printf(); 33 | Function* make_main(); 34 | 35 | Function* generate_function(sWord *fword, bool is_root); 36 | Function* get_function(sWordptr word); 37 | 38 | bool do_optimize = true; 39 | 40 | public: 41 | IRGenerator(); 42 | 43 | std::pair generate(sWord *root); 44 | 45 | void print_module(const std::string &program_name, bool to_file = false); 46 | 47 | int hello_world(); 48 | 49 | void hello_world2(); 50 | 51 | void exec_module(const std::string& program_name); 52 | 53 | void exec_module2(const std::string &program_name); 54 | 55 | void declare_malloc(); 56 | 57 | void declare_free(); 58 | }; 59 | } 60 | 61 | 62 | 63 | #endif //MOVFORTH_IRGENERATOR_H 64 | -------------------------------------------------------------------------------- /headers/Generation/ObjectGenerator.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef MOVFORTH_OBJECTGENERATOR_H 3 | #define MOVFORTH_OBJECTGENERATOR_H 4 | 5 | #include 6 | #include "IRGenerator.h" 7 | 8 | using namespace llvm; 9 | namespace mov { 10 | class ObjectGenerator { 11 | public: 12 | static bool generate(const std::string &filename, IRModule mod); 13 | static void link(const std::string &assembly_name, const std::string &executable_name); 14 | }; 15 | } 16 | 17 | #endif //MOVFORTH_OBJECTGENERATOR_H 18 | -------------------------------------------------------------------------------- /headers/Interpretation/ForthWord.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by reschivon on 11/17/21. 3 | // 4 | 5 | #ifndef MOVFORTH_FORTHWORD_H 6 | #define MOVFORTH_FORTHWORD_H 7 | 8 | #include "iWord.h" 9 | #include "iData.h" 10 | #include "Interpreter.h" 11 | 12 | namespace mov { 13 | /** 14 | * Defined word, holds a definition 15 | */ 16 | class ForthWord : public iWord { 17 | std::list definition; 18 | 19 | public: 20 | 21 | ForthWord(std::string name, bool immediate); 22 | 23 | void execute(IP ip, Interpreter &interpreter) override; 24 | 25 | std::unordered_map locals; 26 | 27 | /** 28 | * Append definition 29 | */ 30 | void add(iData data); 31 | 32 | /** 33 | * Set index of definition to value 34 | * @throws std::__out_of_range if invalid index 35 | */ 36 | void set(int index, iData value); 37 | 38 | /** 39 | * For debuggin only. Does what you think 40 | */ 41 | void definition_to_string(); 42 | 43 | /** 44 | * @return reference to definition 45 | */ 46 | std::list &def() { 47 | return definition; 48 | } 49 | 50 | /** 51 | * 52 | * @return the size of definition 53 | */ 54 | uint def_size(){ 55 | int size = 0; 56 | for(auto thing : definition) 57 | size++; 58 | return size; 59 | } 60 | }; 61 | 62 | 63 | 64 | } 65 | 66 | #endif //MOVFORTH_FORTHWORD_H 67 | -------------------------------------------------------------------------------- /headers/Interpretation/IP.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by reschivon on 11/17/21. 3 | // 4 | 5 | #ifndef MOVFORTH_IP_H 6 | #define MOVFORTH_IP_H 7 | 8 | #include 9 | #include "iData.h" 10 | 11 | namespace mov { 12 | /** 13 | * Iterator over a word's definition 14 | */ 15 | 16 | typedef std::list::iterator& IP; 17 | 18 | } 19 | 20 | #endif //MOVFORTH_IP_H 21 | -------------------------------------------------------------------------------- /headers/Interpretation/Input.h: -------------------------------------------------------------------------------- 1 | #ifndef MOVFORTH_INTER_INPUT_H 2 | #define MOVFORTH_INTER_INPUT_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "../Print.h" 10 | 11 | namespace mov { 12 | 13 | /** 14 | * Interface for all types of Forth input 15 | */ 16 | class Input { 17 | /** 18 | * Next space or whitespace delimited token from inout source 19 | * @return std::string of token 20 | */ 21 | virtual std::string next_token() = 0; 22 | 23 | protected: 24 | bool has_next = true; 25 | 26 | public: 27 | bool open(){ 28 | return has_next; 29 | } 30 | }; 31 | 32 | 33 | class string_input : public Input{ 34 | std::istringstream input; 35 | 36 | public: 37 | explicit string_input(const std::string &data) : input(data) {} 38 | 39 | std::string next_token() override { 40 | std::string ret; 41 | bool open = (input >> ret).operator bool(); 42 | if(open) 43 | return ret; 44 | return ""; 45 | } 46 | }; 47 | 48 | /** 49 | * Helper class that returns tokens one by one from file 50 | */ 51 | class file_input : public Input { 52 | std::ifstream file; 53 | 54 | public: 55 | /** 56 | * Makes a file_input that immediately opens an ifstream to the provided file 57 | * @param path path to file 58 | */ 59 | explicit file_input(const std::string& path) { 60 | file.open(path); 61 | if (!file.is_open()) 62 | println("Cannot open file " + path); 63 | } 64 | 65 | std::string next_token() override { 66 | std::string tok; 67 | 68 | if(file >> tok) 69 | { 70 | std::string::size_type slash_index = tok.find('\\'); 71 | if(slash_index != std::string::npos){ 72 | tok = tok.substr(0, slash_index); 73 | file.ignore(std::numeric_limits::max(), '\n'); 74 | } 75 | }else 76 | { 77 | has_next = false; 78 | } 79 | 80 | if(tok.empty()){ 81 | if(has_next) 82 | return next_token(); 83 | }else{ 84 | return tok; 85 | } 86 | 87 | return ""; 88 | } 89 | }; 90 | 91 | class cin_input : public Input { 92 | 93 | public: 94 | std::string next_token() override { 95 | std::string ret; 96 | if(std::cin >> ret) 97 | return ret; 98 | return ""; 99 | } 100 | }; 101 | 102 | 103 | } 104 | 105 | 106 | #endif //MOVFORTH_INTER_INPUT_H 107 | -------------------------------------------------------------------------------- /headers/Interpretation/Interpreter.h: -------------------------------------------------------------------------------- 1 | #ifndef MOVFORTH_INTER_INTERPRETER_H 2 | #define MOVFORTH_INTER_INTERPRETER_H 3 | 4 | #include 5 | 6 | #include "Input.h" 7 | 8 | namespace mov { 9 | class Stack; 10 | class iData; 11 | class iWord; 12 | /** 13 | * Forth Interpreter that holds state 14 | */ 15 | 16 | class iWordGenerator; 17 | class Interpreter { 18 | 19 | iWordGenerator &word_generator; 20 | void init_words(); 21 | 22 | public: 23 | file_input input; 24 | std::vector dictionary; 25 | Stack &stack; 26 | bool immediate = true; 27 | 28 | /** 29 | * Finds the most recent dictionary entry that matches the given name 30 | * @param name name of word to be found 31 | * @return a pointer to the word, if found. Otherwise nullptr 32 | */ 33 | iWord* find(const std::string &name); 34 | 35 | /** 36 | * Creates an interpreter object and run the given Forth file 37 | * @param path relative or absolute path to the Forth file 38 | */ 39 | explicit Interpreter(const std::string& path); 40 | 41 | bool interpret(); 42 | }; 43 | } 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /headers/Interpretation/LHC - Collider.txt: -------------------------------------------------------------------------------- 1 | Lyrics of the song "Collider" 2 | by LHC (Les Horribles Cernettes) 3 | 4 | Their Youtube channel reads: 5 | "The one and only High Energy band. Formed in 6 | 1990, at the same time, and in the office 7 | next door to the World Wide Web" 8 | 9 | Notable for being the first image 10 | uploaded to the World Wide Web 11 | 12 | If you enjoy hurting yourself, 13 | you can replace "collider" with any unstressed 14 | three syllable word that relates to your 15 | profession. For example, "compiler" 16 | 17 | 18 | Dip, dah, ooh, yeah 19 | Dip, dah, ooh, yeah 20 | Dip, dah, ooh, yeah 21 | Dip, dah, ooh, yeah 22 | 23 | You say you love me but you never beep me 24 | You always promise but you never date me 25 | I try to fax but it's busy, always 26 | I try the network but you crash the gateways 27 | 28 | You never spend your nights with me 29 | And you don't go out with other girls either 30 | You only love your collider 31 | 32 | I fill you screen with hearts and roses 33 | I fill your mail file with lovely phrases 34 | They all come back: "invalid user" 35 | You never let me into your computer 36 | 37 | You never spend your nights with me 38 | And you don't go out with other girls either 39 | You prefer your collider 40 | 41 | I gave you golden ring to show you my love 42 | You went to stick it in a printed circuit 43 | To fix a voltage leak in your collector 44 | You plug my feelings into your detector 45 | 46 | You never spend your nights with me 47 | And you don't go out with other girls either 48 | You only love your collider 49 | Your collider 50 | Your collider -------------------------------------------------------------------------------- /headers/Interpretation/Local.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | #ifndef MOVFORTH_LOCAL_H 5 | #define MOVFORTH_LOCAL_H 6 | 7 | struct Local { 8 | 9 | std::string word; std::string name; 10 | 11 | bool operator==(const Local &other) const { 12 | return name == name && other.word == word; 13 | } 14 | 15 | Local(std::string word, std::string name) 16 | : word(std::move(word)), name(std::move(name)){ 17 | } 18 | 19 | std::string to_string() const{ 20 | return "Local(" + word + " " + name + ")"; 21 | } 22 | 23 | }; 24 | 25 | struct LocalHasher 26 | { 27 | std::size_t operator()(const Local& k) const 28 | { 29 | size_t res = 17; 30 | res = res * 31 + std::hash()(k.word); 31 | res = res * 31 + std::hash()(k.name); 32 | return res; 33 | } 34 | }; 35 | 36 | #endif //MOVFORTH_LOCAL_H 37 | -------------------------------------------------------------------------------- /headers/Interpretation/Primitive.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by reschivon on 11/17/21. 3 | // 4 | 5 | #ifndef MOVFORTH_PRIMITIVE_H 6 | #define MOVFORTH_PRIMITIVE_H 7 | 8 | #include "iWord.h" 9 | #include "iData.h" 10 | #include "Interpreter.h" 11 | 12 | namespace mov { 13 | 14 | /** 15 | * Primitive word 16 | * Get singletons from primitive_lookup 17 | */ 18 | class Primitive : public iWord { 19 | std::function action; 20 | 21 | friend class iWordGenerator; 22 | 23 | public: 24 | Primitive(std::string name, primitive_words id, bool immediate, 25 | std::function action, bool stateful); 26 | 27 | void execute(IP ip, Interpreter &interpreter) override; 28 | 29 | }; 30 | 31 | class TO : public Primitive { 32 | public: 33 | explicit TO(); 34 | }; 35 | 36 | class ToLocal : public Primitive { 37 | public: 38 | Local local; 39 | std::unordered_map &locals; 40 | 41 | explicit ToLocal(Local local, std::unordered_map &locals); 42 | 43 | std::string name() override; 44 | }; 45 | 46 | class FromLocal : public Primitive { 47 | public: 48 | Local local; 49 | std::unordered_map &locals; 50 | 51 | explicit FromLocal(Local local, std::unordered_map &locals); 52 | 53 | std::string name() override; 54 | }; 55 | } 56 | #endif //MOVFORTH_PRIMITIVE_H 57 | -------------------------------------------------------------------------------- /headers/Interpretation/Stack.h: -------------------------------------------------------------------------------- 1 | #ifndef MOVFORTH_INTER_STACK_H 2 | #define MOVFORTH_INTER_STACK_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "../Constants.h" 10 | 11 | #include "iData.h" 12 | #include "iWord.h" 13 | 14 | 15 | namespace mov { 16 | 17 | /** 18 | * Stack of the interpreter 19 | * Holds Data objects as elements 20 | */ 21 | class Stack { 22 | std::vector stack{}; 23 | 24 | public: 25 | void push(element number); 26 | void push(iWordptr iWord_pointer); 27 | void push(iData thing); 28 | 29 | /** 30 | * @return the top number, or 0 is top is not number 31 | */ 32 | element pop_number(); 33 | 34 | 35 | /** 36 | * @return the top Data, or an empty Data if stack is empty 37 | */ 38 | iData pop(); 39 | 40 | iData top(); 41 | 42 | int size(); 43 | 44 | void for_each(std::function action); 45 | }; 46 | 47 | 48 | 49 | } 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /headers/Interpretation/WordGenerator.h: -------------------------------------------------------------------------------- 1 | #ifndef MOVFORTH_INTER_iWordGEN_H 2 | #define MOVFORTH_INTER_iWordGEN_H 3 | 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "../PrimitiveEnums.h" 11 | #include "iWord.h" 12 | #include "Interpreter.h" 13 | #include "IP.h" 14 | 15 | 16 | namespace mov { 17 | class iWordGenerator { 18 | 19 | public: 20 | std::unordered_map> generator_lookup{}; 21 | 22 | void register_primitive(const std::string& name, primitive_words id, std::function action, bool stateful = false); 23 | 24 | void register_lambda_word(const std::string& name, primitive_words id, std::function action, bool immediate, bool stateful = false); 25 | 26 | iWordptr get(const std::string &name); 27 | }; 28 | 29 | } 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /headers/Interpretation/iData.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | #ifndef MOVFORTH_IDATA_H 4 | #define MOVFORTH_IDATA_H 5 | 6 | #include 7 | #include 8 | 9 | #include "../Constants.h" 10 | 11 | namespace mov{ 12 | 13 | class iWord; 14 | class ForthWord; 15 | class Primitive; 16 | 17 | struct iData : std::variant { 18 | 19 | using dict_data_var_type = std::variant; 20 | 21 | [[nodiscard]] bool is_number() const; 22 | [[nodiscard]] bool is_word() const; 23 | [[nodiscard]] bool is_forth_word() const; 24 | [[nodiscard]] bool is_primitive() const; 25 | [[nodiscard]] bool is_empty() const; 26 | 27 | element as_number(); 28 | iWord* as_word(); 29 | ForthWord* as_forth_word() const; 30 | Primitive* as_primitive(); 31 | 32 | explicit iData(dict_data_var_type data); 33 | iData(); 34 | 35 | std::string to_string(); 36 | }; 37 | 38 | } 39 | 40 | 41 | #endif //MOVFORTH_IDATA_H 42 | -------------------------------------------------------------------------------- /headers/Interpretation/iWord.h: -------------------------------------------------------------------------------- 1 | #ifndef MOVFORTH_INTER_iWord_H 2 | #define MOVFORTH_INTER_iWord_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "../PrimitiveEffects.h" 13 | #include "Local.h" 14 | #include "IP.h" 15 | 16 | namespace mov{ 17 | 18 | class iWord; 19 | class Interpreter; 20 | typedef iWord *iWordptr; 21 | 22 | /** 23 | * A word in the interpreter's dictionary 24 | */ 25 | class iWord{ 26 | 27 | const std::string _name; 28 | 29 | public: 30 | /** 31 | * Identifier for primitive words 32 | * Use this to compare *word types*, ie. all branch instances have equal ids 33 | * Defined words fall under a general id primitive_words::OTHER 34 | */ 35 | primitive_words id = primitive_words::OTHER; 36 | 37 | bool immediate; 38 | 39 | /** 40 | * Whether the word relies on the Data in the next 41 | * definition entry during execution 42 | */ 43 | const bool stateful; 44 | 45 | iWord(std::string name, bool immediate, bool stateful); 46 | iWord(std::string name, primitive_words id, bool immediate, bool stateful); 47 | 48 | virtual void execute(IP ip, Interpreter &interpreter) = 0; 49 | 50 | virtual std::string name(); 51 | 52 | bool branchy(); 53 | 54 | }; 55 | 56 | 57 | } 58 | 59 | 60 | 61 | #endif 62 | -------------------------------------------------------------------------------- /headers/PrimitiveEffects.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | #ifndef MOVFORTH_PRIMITIVEEFFECTS_H 4 | #define MOVFORTH_PRIMITIVEEFFECTS_H 5 | 6 | #include "PrimitiveEnums.h" 7 | #include "Symbolic/sWord.h" 8 | 9 | namespace mov{ 10 | static const std::vector> swap_oi_pairs = {{1, 0}, {0, 1}}; 11 | static const std::vector> rot_oi_pairs = {{1, 0}, {0, 1}, {2, 2}}; // original: {{1, 1}, {0, 0}, {2, 2}}; 12 | static const std::vector> dup_oi_pairs = {{1, 0}, {0, 0}}; 13 | 14 | // primitives (static instances) 15 | static const std::unordered_map primitive_lookup = { 16 | {primitive_words::EQUALS, new sWord("=", primitive_words::EQUALS, Effects{.num_popped = 2, .num_pushed = 1})}, 17 | {primitive_words::ADD, new sWord("+", primitive_words::ADD, Effects{.num_popped = 2, .num_pushed = 1})}, 18 | {primitive_words::SUBTRACT, new sWord("-", primitive_words::SUBTRACT, {.num_popped = 2, .num_pushed = 1})}, 19 | {primitive_words::MULTIPLY, new sWord("*", primitive_words::MULTIPLY, {.num_popped = 2, .num_pushed = 1})}, 20 | {primitive_words::DIVIDE, new sWord("/", primitive_words::DIVIDE, {.num_popped = 2, .num_pushed = 1})}, 21 | {primitive_words::SWAP, new sWord("swap", primitive_words::SWAP, Effects{.num_popped = 2, .num_pushed = 2, .consume_token=false, .compiled_slots=0, .interpret_state=Effects::interpret_state::NONE, .define_new_word=false, .out_in_pairs = swap_oi_pairs})}, 22 | {primitive_words::ROT, new sWord("rot", primitive_words::ROT, Effects{.num_popped = 3, .num_pushed = 3, .consume_token=false, .compiled_slots=0, .interpret_state=Effects::interpret_state::NONE, .define_new_word=false, .out_in_pairs = rot_oi_pairs})}, 23 | {primitive_words::DUP, new sWord("dup", primitive_words::DUP, Effects{.num_popped = 1, .num_pushed = 2, .consume_token=false, .compiled_slots=0, .interpret_state=Effects::interpret_state::NONE, .define_new_word=false, .out_in_pairs = dup_oi_pairs})}, 24 | {primitive_words::DROP, new sWord("drop", primitive_words::DROP, {.num_popped = 1})}, 25 | {primitive_words::EMIT, new sWord(".", primitive_words::EMIT, {.num_popped = 1})}, 26 | {primitive_words::SHOW, new sWord(".S", primitive_words::SHOW, Effects::neutral)}, 27 | {primitive_words::TICK, new sWord("'", primitive_words::TICK, {.num_popped = 0, .num_pushed = 0, .consume_token = true})}, 28 | {primitive_words::COMMA, new sWord(",", primitive_words::COMMA, {.num_popped = 1, .num_pushed=0, .consume_token=false, .compiled_slots = 1})}, 29 | {primitive_words::SEE, new sWord("see", primitive_words::SEE, Effects::neutral)}, 30 | {primitive_words::TOIMMEDIATE, new sWord("[", primitive_words::TOIMMEDIATE, {.num_popped = 0, .num_pushed = 0, .consume_token=false, .compiled_slots=0, .interpret_state = Effects::interpret_state::TOIMM})}, 31 | {primitive_words::TOCOMPILE, new sWord("]", primitive_words::TOCOMPILE, {.num_popped = 0, .num_pushed = 0, .consume_token=false, .compiled_slots=0, .interpret_state = Effects::interpret_state::TOCOMP})}, 32 | {primitive_words::IMMEDIATE, new sWord("immediate", primitive_words::IMMEDIATE, Effects::neutral)}, // very rare this ends up in compiled code, consider warn on encounter 33 | {primitive_words::ALLOT, new sWord("allot", primitive_words::ALLOT, {.num_popped = 1})}, 34 | {primitive_words::FETCH, new sWord("@", primitive_words::FETCH, {.num_popped = 1, .num_pushed = 1})}, 35 | {primitive_words::STORE, new sWord("!", primitive_words::STORE, {.num_popped = 2,})}, 36 | {primitive_words::BRANCH, new sWord("branch", primitive_words::BRANCH, Effects::neutral)}, 37 | {primitive_words::BRANCHIF, new sWord("branchif", primitive_words::BRANCHIF, {.num_popped = 1})}, 38 | {primitive_words::LITERAL, new sWord("literal", primitive_words::LITERAL, {.num_popped =0, .num_pushed = 1})}, 39 | {primitive_words::HERE, new sWord("here", primitive_words::HERE, {.num_popped =0, .num_pushed = 1})}, 40 | {primitive_words::CREATE, new sWord("create", primitive_words::CREATE, {.num_popped = 0, .num_pushed = 0, .consume_token = true, .compiled_slots=0, .interpret_state=Effects::interpret_state::NONE, .define_new_word = true})}, 41 | {primitive_words::MALLOC, new sWord("alloca", primitive_words::MALLOC, {.num_popped = 1, .num_pushed = 1})}, 42 | {primitive_words::FREE, new sWord("dealloca", primitive_words::FREE, {.num_popped = 1})}, 43 | {primitive_words::AND, new sWord("&", primitive_words::AND, {.num_popped = 2, .num_pushed = 1})}, 44 | {primitive_words::LESS, new sWord("<", primitive_words::LESS, {.num_popped = 2, .num_pushed = 1})}, 45 | {primitive_words::TO, new sWord("is", primitive_words::TO, {.num_popped = 0, .num_pushed = 0, .consume_token = true})}, 46 | {primitive_words::TOLOCAL, new sWord("tolocal", primitive_words::TOLOCAL, {.num_popped = 1})}, 47 | {primitive_words::FROMLOCAL, new sWord("fromlocal", primitive_words::FROMLOCAL, {.num_popped = 0, .num_pushed = 1})} 48 | }; 49 | } 50 | 51 | #endif //MOVFORTH_PRIMITIVEEFFECTS_H 52 | -------------------------------------------------------------------------------- /headers/PrimitiveEnums.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | #ifndef MOVFORTH_PRIMITIVEENUMS_H 4 | #define MOVFORTH_PRIMITIVEENUMS_H 5 | 6 | namespace mov{ 7 | enum primitive_words{ 8 | ADD =0, 9 | SUBTRACT =1, 10 | MULTIPLY =2, 11 | DIVIDE =3, 12 | SWAP =4, 13 | ROT =5, 14 | DUP =6, 15 | DROP =7, 16 | EMIT =8, 17 | SHOW =9, 18 | TICK =10, 19 | COMMA =11, 20 | SEE =12, 21 | TOIMMEDIATE =13, 22 | TOCOMPILE =14, 23 | IMMEDIATE =15, 24 | ALLOT =16, 25 | FETCH =17, 26 | STORE =18, 27 | BRANCH =19, 28 | BRANCHIF =20, 29 | LITERAL =21, 30 | HERE =22, 31 | CREATE =23, 32 | EXIT =24, 33 | OTHER =25, 34 | EQUALS =26, 35 | MALLOC =27, 36 | FREE =28, 37 | AND =29, 38 | LESS =30, 39 | TO =31, 40 | TOLOCAL =32, 41 | FROMLOCAL =33 42 | }; 43 | } 44 | #endif //MOVFORTH_PRIMITIVEENUMS_H 45 | -------------------------------------------------------------------------------- /headers/Print.h: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | #ifndef MOVFORTH_PRINT_H 6 | #define MOVFORTH_PRINT_H 7 | 8 | static std::string color_begin = "\\033[1;32m\\n"; 9 | static std::string color_end = "\\033[0m\\n"; 10 | 11 | static int indents; 12 | 13 | static std::string tab_string = "\n"; 14 | 15 | // Debug toggle - not the best, but works for now 16 | static const bool debug = true; 17 | 18 | std::string newlines_have_indents(const std::string& in); 19 | 20 | void regen_tab(); 21 | 22 | void indent(); 23 | 24 | void unindent(); 25 | 26 | void print_single(std::basic_string arg); 27 | void print_single(int arg); 28 | 29 | template 30 | inline void print(Args&&... args) { 31 | (print_single(std::forward(args)), ...); 32 | } 33 | 34 | template 35 | inline void println0(Args&&... args){ 36 | print("\n", args...); 37 | } 38 | 39 | template 40 | inline void println(Args&&... args){ 41 | println0(args...); 42 | } 43 | template 44 | inline void dln(Args&&... args){ 45 | if(debug) 46 | println(args...); 47 | } 48 | 49 | template 50 | inline void d(Args&&... args){ 51 | if(debug) 52 | print(args...); 53 | } 54 | 55 | #endif -------------------------------------------------------------------------------- /headers/Symbolic/Block.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | #ifndef MOVFORTH_BLOCK_H 4 | #define MOVFORTH_BLOCK_H 5 | 6 | #include "Instruction.h" 7 | #include "Effects.h" 8 | 9 | namespace mov{ 10 | struct BBgen{ 11 | unsigned int get(){ 12 | return id++; 13 | } 14 | private: 15 | unsigned int id = 1; 16 | }; 17 | 18 | struct Instruction; 19 | 20 | struct Block{ 21 | typedef std::reference_wrapper bb_ref; 22 | 23 | sWordptr parent; 24 | 25 | unsigned int index = 0; // like Register but I was lazy 26 | 27 | std::vector instructions{}; //TODO reference wrapper 28 | 29 | // Nodes of this basic block before the first instruction 30 | NodeList inputs{}; 31 | // Nodes of this basic block after the last instruction 32 | NodeList outputs{}; 33 | // Parameter nodes used in ONLY this block; 34 | // added by propagate stack 35 | NodeList params{}; 36 | 37 | // Do not store push/pop effects for Block 38 | // in graph generation, the stack may come in 39 | // with more than 0 elements, and this makes 40 | // calculating push/pop complicated 41 | // We never need this statistic anyways 42 | Effects effects_without_push_pop; 43 | 44 | explicit Block(BBgen& gen, sWordptr parent); 45 | 46 | std::vector nextBBs(); 47 | bool is_exit(); 48 | std::string name(); 49 | 50 | // cyclic pass use only (*sigh* there was no other way) 51 | bool inputs_aligned = false; 52 | bool outputs_aligned = false; 53 | 54 | RegisterGen register_gen; // to track registers for this BB 55 | std::vector initial_registers; // temporary store 56 | 57 | uint initial_accumulated_stack_size = 0; 58 | uint initial_accumulated_params = 0; 59 | 60 | void align_registers(); 61 | 62 | }; 63 | } 64 | #endif // MOVFORTH_BLOCK_H 65 | -------------------------------------------------------------------------------- /headers/Symbolic/Effects.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef MOVFORTH_EFFECTS_H 3 | #define MOVFORTH_EFFECTS_H 4 | 5 | namespace mov{ 6 | struct Effects{ 7 | uint num_popped = 0; 8 | uint num_pushed = 0; 9 | 10 | bool consume_token = false; // ONLY for single token consumers, leave out " 11 | 12 | unsigned int compiled_slots = 0; // TODO still unsure how to handle memory 13 | 14 | enum interpret_state{NONE, TOCOMP, TOIMM} interpret_state = interpret_state::NONE; 15 | 16 | bool define_new_word = false; 17 | 18 | // mapping of index for identical registers 19 | std::vector> out_in_pairs; 20 | 21 | void acquire_side_effects_ignore_push_pop(Effects& other){ 22 | // pop and push handled elsewhere 23 | 24 | if(other.consume_token) consume_token = true; 25 | 26 | compiled_slots += other.compiled_slots; 27 | 28 | if(other.interpret_state != interpret_state::NONE) 29 | interpret_state = other.interpret_state; 30 | 31 | if(other.define_new_word) define_new_word = true; 32 | } 33 | 34 | // explicit Effects(int num_popped=0, int num_pushed=0, unsigned int compiled_slots=0, 35 | // enum interpret_state interpret_state=interpret_state::NONE, 36 | // bool define_new_word=false, std::vector> out_in_pairs={}) : 37 | // num_popped(num_popped), 38 | // num_pushed(num_pushed), 39 | // compiled_slots(compiled_slots), 40 | // interpret_state(interpret_state), 41 | // define_new_word(define_new_word), 42 | // out_in_pairs(out_in_pairs) 43 | // {} 44 | 45 | static Effects neutral; 46 | }; 47 | 48 | } 49 | 50 | #endif //MOVFORTH_EFFECTS_H 51 | -------------------------------------------------------------------------------- /headers/Symbolic/Instruction.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef MOVFORTH_INSTRUCTION_H 3 | #define MOVFORTH_INSTRUCTION_H 4 | 5 | #include "Structures.h" 6 | #include "../PrimitiveEnums.h" 7 | 8 | namespace mov{ 9 | 10 | struct BranchInstruction; 11 | struct BranchIfInstruction; 12 | struct ReturnInstruction; 13 | 14 | struct Instruction{ 15 | sWordptr linked_word; 16 | sData data = sData(nullptr); // acquired from next in token list 17 | 18 | NodeList pop_nodes{}; 19 | NodeList push_nodes{}; 20 | 21 | explicit Instruction(sWordptr linked_word, sData data); 22 | 23 | BranchInstruction* as_branch(); 24 | BranchIfInstruction* as_branchif(); 25 | ReturnInstruction* as_return(); 26 | 27 | virtual std::string name(); 28 | [[nodiscard]] primitive_words id() const; 29 | 30 | [[nodiscard]] bool branchy() const; 31 | }; 32 | 33 | struct Block; 34 | 35 | struct BranchInstruction : public Instruction { 36 | Block *jump_to; 37 | explicit BranchInstruction(sWordptr linked_word, sData data, Block *jump_to); 38 | 39 | std::string name() override; 40 | }; 41 | 42 | struct BranchIfInstruction : public Instruction { 43 | Block *jump_to_next; 44 | Block *jump_to_far; 45 | explicit BranchIfInstruction(sWordptr linked_word, sData data, Block *jump_to_close, Block *jump_to_far); 46 | 47 | std::string name() override; 48 | }; 49 | 50 | struct ReturnInstruction : public Instruction{ 51 | ReturnInstruction(); 52 | std::string name() override; 53 | }; 54 | 55 | } 56 | 57 | #endif //MOVFORTH_INSTRUCTION_H 58 | -------------------------------------------------------------------------------- /headers/Symbolic/Pass.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | #ifndef MOVFORTH_mov_symbolicPASS_H 4 | #define MOVFORTH_mov_symbolicPASS_H 5 | 6 | #include "../Symbolic/Structures.h" 7 | #include "../Interpretation/iWord.h" 8 | #include "sWord.h" 9 | #include "../Interpretation/ForthWord.h" 10 | 11 | namespace mov { 12 | class Analysis{ 13 | /** 14 | * cached results of already analyzed words 15 | */ 16 | std::unordered_map visited_words{}; 17 | 18 | /** converts an iData (interpretation data object) 19 | * into an sData (analysis data object). Will analyze word types 20 | * stored in the data if the word exists 21 | * @param data iData to be converted 22 | * @return sData 23 | */ 24 | sData symbolize_data(iData data); 25 | 26 | /** 27 | * Takes a ForthWord object from the interpreter and converts 28 | * it to a mostly empty (graphless) sWord that has basic blocks 29 | * premade according to the ForthWord's BRANCH and BRANCHIF 30 | * @param template_word ForthWord from the interpreter for reference 31 | * @return new sWordptr with basic blocks precomputed (and nothing else) 32 | */ 33 | sWordptr translate_to_basic_blocks(ForthWord *template_word); 34 | 35 | /** 36 | * Creates stack graph for the provided word 37 | * Prerequisite: word must have basic blocks computed 38 | * @param wordptr word that will have its stack graph computed 39 | */ 40 | void word_stack_graph(sWordptr wordptr); 41 | 42 | /** 43 | * Takes a root BB and creates all basic block stack graphs that 44 | * are linked to the root BB in the control flow graph 45 | * Also links stack graphs between BBs 46 | * Called exclusively by word_stack_graph 47 | * @param stack inital stack state 48 | * @param bb 49 | */ 50 | void explore_graph_dfs(NodeList stack, Block &bb, RegisterGen param_gen); 51 | 52 | /** 53 | * Make a stack graph for an entire definition via multiple calls to propagate_stack(). 54 | * Also compute and set all non-stack effects for bb 55 | * Also sets inputs and outputs, the OVERALL stack state before and after 56 | * the block executes, respectively 57 | * 58 | * @param running_stack inital state of the stack 59 | * @param bb the basic block whose instructions with be graphed 60 | * @param register_gen to generate_function the names of new registers 61 | * @param params Nodelist where additional params will be noted 62 | * @return the running stack 63 | */ 64 | static NodeList 65 | basic_block_stack_graph(NodeList &running_stack, Block &bb, NodeList ¶ms, 66 | RegisterGen ®ister_gen, RegisterGen ¶m_gen); 67 | 68 | /** 69 | * Appends new register edges to the existing register graph, based on 70 | * the push and pop effects of the provided instruction. Also updates 71 | * stack parameter to the state of stack after this instruction 72 | * 73 | * @param stack current state of stack 74 | * @param instruction dictates the number of graph edges to be added 75 | * @param params if additional parameter nodes are required, they will be added to this 76 | * @param register_gen to generate_function the names of new registers 77 | */ 78 | static void 79 | propagate_stack(NodeList &stack, Instruction *instruction, NodeList ¶ms, RegisterGen ®ister_gen, 80 | RegisterGen ¶m_gen); 81 | 82 | 83 | static void 84 | propagate_stack_tolocal(NodeList &stack, Instruction *instruction, NodeList ¶ms, RegisterGen ¶m_gen, 85 | RegisterGen ®ister_gen, sWordptr parent); 86 | 87 | static void 88 | propagate_stack_fromlocal(NodeList &stack, Instruction *instruction, NodeList ¶ms, sWordptr parent); 89 | 90 | /** 91 | * If there are stack elements that remain unchanged (but could be shuffled) 92 | * between the start and end of this BB, then note the unchanged register pairs 93 | * in bb.effects.matching pairs 94 | * 95 | * @param bb the basic block in which to find matching pairs 96 | */ 97 | void compute_matching_pairs(Block &bb); 98 | 99 | 100 | public: 101 | /** 102 | * 103 | * @param original_word 104 | * @return nullptr if failure 105 | */ 106 | sWordptr static_analysis(iWordptr original_word); 107 | void inlining(iWordptr root); 108 | static sWordptr show_word_info(sWordptr wordptr); 109 | 110 | }; 111 | } 112 | 113 | #endif 114 | -------------------------------------------------------------------------------- /headers/Symbolic/Structures.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef MOVFORTH_mov_TYPES_H 3 | #define MOVFORTH_mov_TYPES_H 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "../Print.h" 12 | #include "../Constants.h" 13 | #include "../Interpretation/Local.h" 14 | 15 | /* 16 | * So many graphs! Graphs, graphs, everywhere! 17 | * 18 | * It was a lot easier to use raw pointers than 19 | * to deal with references and shared_ptr 20 | */ 21 | 22 | namespace mov { 23 | 24 | class sWord; 25 | 26 | typedef sWord *sWordptr; 27 | 28 | class sData { 29 | using variant_t = std::variant; 30 | variant_t data{}; 31 | public: 32 | bool is_num() {return data.index() == 1;} 33 | bool is_xt() {return data.index() == 2;} 34 | bool is_empty() {return data.index() == 0;}; 35 | bool is_local() {return data.index() == 3;}; 36 | 37 | std::string type(){ 38 | switch(data.index()) { 39 | case 0: return "unknown"; 40 | case 1: return "number"; 41 | case 2: return "xt"; 42 | case 3: return "Local"; 43 | default: return ""; 44 | } 45 | } 46 | element as_num() {return std::get(data);} 47 | sWordptr as_xt() {return std::get(data);} 48 | Local as_local() {return std::get(data);} 49 | 50 | explicit sData(variant_t data) : data(std::move(data)) {} 51 | 52 | std::string to_string(){ 53 | if(is_xt()) 54 | return "xt"; 55 | if(is_num()) 56 | return std::to_string(as_num()); 57 | return "undef"; 58 | } 59 | }; 60 | 61 | struct Register{ 62 | int index = -1; 63 | int bb_index = -1; 64 | enum registerType {UNDEF=0, NORMAL=1, PARAM=2, RETURN=3} register_type = UNDEF; 65 | 66 | Register operator++(int){ 67 | index++; 68 | return *this; 69 | } 70 | 71 | bool operator==(const Register &other) const { 72 | return (index == other.index && 73 | bb_index == other.bb_index && 74 | register_type == other.register_type); 75 | } 76 | 77 | [[nodiscard]] std::string to_string() const{ 78 | switch (register_type) 79 | { 80 | case NORMAL: 81 | return "(register " + std::to_string(bb_index) + "-" + std::to_string(index) + ")"; 82 | case PARAM: 83 | return "(param " + std::to_string(bb_index) + "-" + std::to_string(index) + ")"; 84 | case RETURN: 85 | return "(return " + std::to_string(bb_index) + "-" + std::to_string(index) + ")"; 86 | case UNDEF: 87 | return "(undefined)"; 88 | default: 89 | return "(fucked)"; 90 | } 91 | } 92 | 93 | [[nodiscard]] std::string to_string_allowed_chars() const { 94 | switch (register_type) 95 | { 96 | case NORMAL: 97 | return "reg" + std::to_string(bb_index) + "." + std::to_string(index); 98 | case PARAM: 99 | return "par" + std::to_string(bb_index) + "." + std::to_string(index); 100 | case RETURN: 101 | return "ret" + std::to_string(bb_index) + "." + std::to_string(index); 102 | case UNDEF: 103 | return "undef"; 104 | default: 105 | return "fuck"; 106 | } 107 | } 108 | 109 | public: 110 | struct RegisterHash{ 111 | std::size_t operator()(const Register& r) const 112 | { 113 | using std::size_t; 114 | 115 | std::size_t concatenated = (uint) r.index; 116 | concatenated <<= 32; 117 | concatenated |= (uint) r.bb_index; 118 | std::size_t enh = (uint) r.register_type; 119 | enh <<= 61; 120 | concatenated |= enh; 121 | 122 | return concatenated; 123 | } 124 | }; 125 | 126 | }; 127 | 128 | struct RegisterGen{ 129 | Register get(){ 130 | auto ret = current; 131 | current++; 132 | return ret; 133 | } 134 | Register get_param(){ 135 | auto ret = current_param; 136 | current_param++; 137 | return ret; 138 | } 139 | 140 | explicit RegisterGen(int groupID) : groupID(groupID) {} 141 | private: 142 | const int groupID; 143 | Register current = {1, groupID, Register::NORMAL}; 144 | Register current_param = {1, groupID, Register::PARAM}; 145 | }; 146 | 147 | struct Node{ 148 | Node* prev_node = nullptr; 149 | Register backward_edge_register; // guaranteed to exist 150 | Register forward_edge_register; // may not always be nonnull 151 | 152 | static void link(Node *back, Node *front, Register id){ 153 | front->prev_node = back; 154 | front->backward_edge_register = id; 155 | } 156 | 157 | static void link_bidirection(Node *back, Node *front, Register id){ 158 | front->prev_node = back; 159 | front->backward_edge_register = id; 160 | 161 | back->forward_edge_register = id; 162 | } 163 | 164 | static void redefine_preceding_edge(Node *node, Register id){ 165 | node->backward_edge_register = id; 166 | node->prev_node->forward_edge_register = id; 167 | } 168 | 169 | static void redefine_preceding_type(Node *node, Register::registerType ty){ 170 | Register old_reg = node->backward_edge_register; 171 | old_reg.register_type = ty; 172 | redefine_preceding_edge(node, old_reg); 173 | } 174 | }; 175 | 176 | // thin wrapper for convenience 177 | struct NodeList : std::vector{ 178 | Node* push_back(Node *push){ 179 | std::vector::push_back(push); 180 | return back(); 181 | } 182 | 183 | Node* new_bottom(){ 184 | return push_bottom(new Node); 185 | } 186 | 187 | Node* push_bottom(Node *push){ // bad perf, but keeping it simple 188 | std::vector::insert(begin(), push); 189 | return back(); 190 | } 191 | 192 | Node* new_top(){ 193 | return push_back(new Node); 194 | } 195 | 196 | unsigned int size(){ 197 | return (unsigned int) std::vector::size(); 198 | } 199 | 200 | static void move_top_elements(NodeList &from, NodeList &to, int num){ 201 | for(uint i = 1; i <= num; i++){ 202 | Node *stack_top = from[from.size() - i]; 203 | to.push_back(stack_top); 204 | } 205 | from.erase(from.end() - num, from.end()); 206 | } 207 | }; 208 | } 209 | 210 | #endif 211 | -------------------------------------------------------------------------------- /headers/Symbolic/sWord.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef MOVFORTH_WORD_H 3 | #define MOVFORTH_WORD_H 4 | 5 | #include 6 | 7 | #include "Structures.h" 8 | #include "../PrimitiveEnums.h" 9 | #include "Effects.h" 10 | #include "Block.h" 11 | 12 | namespace mov{ 13 | 14 | class sWord{ 15 | public: 16 | const std::string name; 17 | primitive_words id = primitive_words::OTHER; 18 | 19 | explicit sWord(std::string name, primitive_words id); 20 | explicit sWord(std::string name, primitive_words id, Effects effects); 21 | 22 | RegisterGen param_gen = RegisterGen(0); 23 | 24 | std::unordered_map locals; 25 | 26 | std::vector blocks{}; 27 | 28 | NodeList my_graphs_returns{}; 29 | 30 | Effects effects; 31 | 32 | bool branchy(); 33 | }; 34 | 35 | } 36 | 37 | namespace std 38 | { 39 | 40 | } 41 | 42 | #endif //MOVFORTH_WORD_H 43 | -------------------------------------------------------------------------------- /headers/SystemExec.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef MOVFORTH_SYSTEMEXEC_H 3 | #define MOVFORTH_SYSTEMEXEC_H 4 | 5 | std::string exec(const std::string& command); 6 | 7 | #endif //MOVFORTH_SYSTEMEXEC_H 8 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "headers/Interpretation/Interpreter.h" 3 | #include "headers/Symbolic/Pass.h" 4 | #include "headers/Generation/IRGenerator.h" 5 | #include "headers/Generation/ObjectGenerator.h" 6 | 7 | int main(int argc, char* argv[]) { 8 | 9 | std::string forth_path; 10 | 11 | if(argc == 1) { // no args, set to default 12 | forth_path = "../CompilationSpace/boot.fs"; 13 | }else{ // argument(s) passed! Is it a .fs file? 14 | if(argc == 2) { 15 | forth_path = std::string(argv[1]); 16 | }else{ 17 | std::cout << "Pass only one argument - the path to a Forth file" << std::endl; 18 | return 1; 19 | } 20 | } 21 | 22 | std::string program_name = forth_path.substr(0, forth_path.find_last_of('.')); 23 | 24 | 25 | // Create a plain old interpreter that interprets 26 | // the contents of the Forth file boot.fs 27 | mov::Interpreter interpreter(forth_path); 28 | bool interpret_error = interpreter.interpret(); 29 | if(interpret_error) { 30 | println("Interpretation phase ended with errors"); 31 | return 1; 32 | } 33 | 34 | 35 | // Extract the compiled word "main" from the interpreter's 36 | // dictionary. "main" has pointers to each of its definition words, 37 | // which also have pointers to each of their definition words 38 | auto word_to_compile = interpreter.find("main"); 39 | if(!word_to_compile) { 40 | println("Could not find a user-written or generated word with name \"main\""); 41 | return 1; 42 | } 43 | 44 | // Do static analysis on "main," and return symbolic information 45 | // computed from "main" in the form of a mov::sWord 46 | mov::Analysis analysis; 47 | analysis.inlining(word_to_compile); 48 | dynamic_cast(word_to_compile)->definition_to_string(); 49 | auto converted_word = analysis.static_analysis(word_to_compile); 50 | 51 | 52 | // Show the fruits of labor from static analysis 53 | mov::Analysis::show_word_info(converted_word); 54 | 55 | 56 | // Generate IR from the symbolic object 57 | mov::IRGenerator ir_generator; 58 | auto module_result = ir_generator.generate(converted_word); 59 | if(module_result.second) { 60 | println("Error during IR generation"); 61 | return 1; 62 | } 63 | 64 | 65 | ir_generator.print_module(program_name, true); 66 | 67 | // Run the new module_result 68 | // Note: invokes `lli` command in a new shell instance 69 | // May not work on every system 70 | //ir_generator.exec_module(program_name); 71 | 72 | bool generate_error = mov::ObjectGenerator::generate(program_name + ".S", module_result.first); 73 | if(generate_error) { 74 | println("Error during executable generation"); 75 | return 1; 76 | } 77 | 78 | // Link the assembly file provided 79 | // Note: invokes `clang++` command in a new shell instance 80 | // May not work on every system 81 | mov::ObjectGenerator::link(program_name + ".S", program_name); 82 | 83 | ir_generator.exec_module2(program_name); 84 | 85 | return 0; 86 | } 87 | 88 | -------------------------------------------------------------------------------- /sources/Generation/FBuilder.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include "../../headers/Generation/FBuilder.h" 4 | 5 | using namespace mov; 6 | using namespace llvm; 7 | 8 | FBuilder::FBuilder(LLVMContext &context, Function *the_function) : 9 | IRBuilder<>(context), the_context(context), the_function(the_function) 10 | { 11 | val_ptrs.reserve(10); // there are usually less then 10 Variables in a word 12 | blocks.reserve(5); // there are usually less then 10 blocks in a word 13 | } 14 | 15 | 16 | Value* FBuilder::build_load_register(Register reg) { 17 | Value *alloc = get_ptr_to_val(reg); 18 | 19 | return CreateLoad(alloc); 20 | } 21 | 22 | void FBuilder::build_store_register(Value *value, Register reg) { 23 | Value *alloc = create_ptr_to_val(reg); 24 | CreateStore(value, alloc); 25 | } 26 | 27 | Value* FBuilder::create_ptr_to_val(Register reg) { 28 | bool already_has = val_ptrs.find(reg) != val_ptrs.end(); 29 | if(!already_has) { 30 | //println("inserted new alloca: ", reg.to_string_allowed_chars()); 31 | AllocaInst* new_alloc = create_entry_block_alloca(the_function, reg.to_string_allowed_chars()); 32 | val_ptrs.insert(std::make_pair(reg, new_alloc)); 33 | return new_alloc; 34 | } else { 35 | //println("already have alloca: ", reg.to_string_allowed_chars()); 36 | return val_ptrs.at(reg); 37 | } 38 | } 39 | 40 | Value* FBuilder::get_ptr_to_val(Register reg) { 41 | Value *ptr_to_val; 42 | try { 43 | ptr_to_val = val_ptrs.at(reg); 44 | } catch (std::out_of_range&){ 45 | println("register " + reg.to_string_allowed_chars(), " does not exist"); 46 | return nullptr; 47 | } 48 | //println("retrived register " + reg.to_string_allowed_chars()); 49 | 50 | if(ptr_to_val == nullptr) 51 | println("WHAT THE FUCKKKK"); 52 | 53 | return ptr_to_val; 54 | } 55 | 56 | void FBuilder::insert_val_ptr(Register reg, Value *a_i){ 57 | val_ptrs.insert(std::make_pair(reg, a_i)); 58 | } 59 | 60 | 61 | void FBuilder::create_block(uint index, BasicBlock* block) { 62 | blocks.insert(std::make_pair(index, block)); 63 | } 64 | 65 | BasicBlock* FBuilder::get_block(uint index) { 66 | return blocks.at(index); 67 | } 68 | 69 | AllocaInst* FBuilder::create_entry_block_alloca(Function *func, const std::string &var_name) { 70 | IRBuilder<> entry_builder(&func->getEntryBlock(), 71 | func->getEntryBlock().begin()); 72 | return entry_builder.CreateAlloca(Type::getInt64Ty(the_context), nullptr, var_name); 73 | } 74 | 75 | 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /sources/Generation/IRGenerator.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include // just for testing 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "llvm/Support/raw_ostream.h" 9 | #include "llvm/Support/FileSystem.h" 10 | 11 | #include "llvm/Transforms/Scalar.h" // for some reason some passes live here 12 | #include "llvm/Transforms/Utils/Mem2Reg.h" 13 | #include "llvm/Transforms/Utils/PromoteMemToReg.h" 14 | #include "llvm/Transforms/Utils.h" 15 | 16 | #include "llvm/Transforms/InstCombine/InstCombine.h" 17 | 18 | // #include "llvm/Transforms/IPO.h" 19 | #include "llvm/Transforms/IPO/Inliner.h" 20 | 21 | #include "../../headers/Generation/IRGenerator.h" 22 | #include "../../headers/Interpretation/iWord.h" 23 | #include "../../headers/SystemExec.h" 24 | 25 | using namespace llvm; 26 | using namespace mov; 27 | 28 | 29 | IRGenerator::IRGenerator() 30 | : the_context(LLVMContext()), 31 | the_module(std::make_shared("MovForth", the_context)), 32 | fpm(std::make_unique(the_module.get())) 33 | { 34 | 35 | // Promote allocas to registers. 36 | fpm->add(createPromoteMemoryToRegisterPass()); //SSA conversion 37 | // fpm->add(createCFGSimplificationPass()); //Dead code elimination 38 | // fpm->add(createSROAPass()); 39 | // fpm->add(new InlinerPass()); // TODO not sure what threshold means 40 | // fpm->add(createFunctionInliningPass()); 41 | // fpm->add(createLoopSimplifyCFGPass()); 42 | // fpm->add(createConstantPropagationPass()); 43 | // fpm->add(createNewGVNPass());//Global value numbering 44 | // fpm->add(createReassociatePass()); 45 | // fpm->add(createPartiallyInlineLibCallsPass()); //Inline standard calls 46 | // fpm->add(createDeadCodeEliminationPass()); 47 | // fpm->add(createCFGSimplificationPass()); //Cleanup 48 | 49 | // merge successive `icmp %x, 0` which can happen becasue branchif compiles 50 | // an icmp, which must be undone by another icmp 51 | fpm->add(createInstructionCombiningPass()); 52 | // fpm->add(createFlattenCFGPass()); //Flatten the control flow graph. 53 | 54 | // constant folding pass 55 | fpm->add(createSROAPass()); 56 | fpm->add(createCorrelatedValuePropagationPass()); 57 | fpm->add(createReassociatePass()); 58 | fpm->add(createCFGSimplificationPass()); 59 | 60 | 61 | fpm->doInitialization(); 62 | } 63 | 64 | std::pair IRGenerator::generate(mov::sWord *root) { 65 | 66 | declare_printf(); 67 | declare_malloc(); 68 | declare_free(); 69 | 70 | Function *func = generate_function(root, true); 71 | if(!func) 72 | return std::make_pair(IRModule(nullptr), true); 73 | 74 | // optimize_module_becasue_for_some_reason_FPM_isnt_doing_anything(); 75 | fpm->doFinalization(); 76 | 77 | return std::make_pair(IRModule(the_module), false); 78 | } 79 | 80 | Function *IRGenerator::get_function(sWordptr fword) { 81 | bool already_generated = visited_words.find(fword) != visited_words.end(); 82 | if(!already_generated) { 83 | auto pair = visited_words.insert(std::pair(fword, generate_function(fword, false))); 84 | return pair.first->second; 85 | }else{ 86 | return visited_words.at(fword); 87 | } 88 | } 89 | void IRGenerator::exec_module(const std::string& program_name) { 90 | dln(); 91 | dln("=========[Exec]========="); 92 | println(exec("lli " + program_name + ".ll")); 93 | } 94 | void IRGenerator::exec_module2(const std::string& program_name) { 95 | dln(); 96 | dln("=========[Exec]========="); 97 | println(exec("./" + program_name)); 98 | } 99 | Function *IRGenerator::generate_function(mov::sWord *fword, bool is_root) { 100 | 101 | dln(); 102 | dln("=========[IR for " + fword->name + "]========="); 103 | 104 | if(is_root && fword->effects.num_popped != 0) { 105 | println("Word ", fword->name, " must not pop from stack to be compiled"); 106 | return nullptr; 107 | } 108 | 109 | if(is_root && fword->effects.num_pushed != 0) { 110 | println("Word ", fword->name, " must not push to stack to be compiled"); 111 | return nullptr; 112 | } 113 | 114 | // make_main(); 115 | 116 | uint num_params = fword->effects.num_popped; 117 | uint num_returns = fword->effects.num_pushed; 118 | 119 | // create empty function corresponding to fword 120 | std::vector arg_types; 121 | arg_types.insert(arg_types.end(), num_params, Type::getInt64Ty(the_context)); 122 | arg_types.insert(arg_types.end(), num_returns,Type::getInt64PtrTy(the_context)); 123 | 124 | FunctionType *func_type = FunctionType::get(Type::getVoidTy(the_context), arg_types, false); 125 | Function* the_function = Function::Create(func_type, 126 | is_root? Function::ExternalLinkage : Function::PrivateLinkage, 127 | fword->name, 128 | the_module.get()); 129 | 130 | FBuilder builder(the_context, the_function); 131 | 132 | // create an empty entry block -- sometimes forth code 133 | // will branch to entry, which is not allowed 134 | BasicBlock::Create(the_context, "entry", the_function); 135 | 136 | // create empty basic blocks - not added to function yet 137 | for(const auto& block : fword->blocks) { 138 | BasicBlock *newbb = BasicBlock::Create( 139 | the_context, 140 | std::to_string(block.index) + ".br", 141 | the_function); 142 | builder.create_block(block.index, newbb); 143 | } 144 | 145 | // insert at the entry block 146 | builder.SetInsertPoint(&the_function->getEntryBlock()); 147 | builder.CreateBr(builder.get_block(1)); 148 | 149 | // insert at the first Forth block 150 | builder.SetInsertPoint(builder.get_block(1)); 151 | 152 | // set names for all arguments, alloca space for them, 153 | // store the arg register in the space, 154 | // and record the AllocaInst in the register map 155 | RegisterGen param_name_gen(0); 156 | for (int i = 0; i < fword->effects.num_popped; i++) { 157 | auto *arg = the_function->args().begin() + i; 158 | 159 | Register reg = param_name_gen.get_param(); 160 | arg->setName(reg.to_string_allowed_chars() + "reg"); 161 | 162 | println("Store argument at register ", arg->getName().str()); 163 | 164 | builder.build_store_register(arg, reg); 165 | } 166 | 167 | // register all reference params as AllocInst* in the lookup 168 | uint word_ret = 0; 169 | for (uint i = fword->effects.num_popped; i < fword->effects.num_popped + fword->effects.num_pushed; i++) { 170 | // Value that is actually Alloca instance (pointer) 171 | Value *arg = the_function->args().begin() + i; 172 | 173 | Register reg = fword->my_graphs_returns[word_ret++]->backward_edge_register; 174 | arg->setName(reg.to_string_allowed_chars() + "reg"); 175 | 176 | builder.insert_val_ptr(reg, arg); 177 | } 178 | 179 | // now lets iterate over every instruction in every Block 180 | for(const auto& block : fword->blocks) { 181 | //println("IR for Basic Block #", block.index); 182 | indent(); 183 | 184 | BasicBlock *bb = builder.get_block(block.index); 185 | 186 | // the_function->getBasicBlockList().push_back(bb); 187 | builder.SetInsertPoint(bb); 188 | 189 | for(auto instr : block.instructions) { 190 | 191 | println("Generating IR for [", instr->name(), "]"); 192 | switch (instr->id()) { 193 | case OTHER: { 194 | //println("other"); 195 | 196 | std::vector arg_values; 197 | 198 | for (auto pop: instr->pop_nodes) { 199 | //println("push value to arg"); 200 | arg_values.push_back(builder.build_load_register(pop->backward_edge_register)); 201 | } 202 | for (auto push: instr->push_nodes) { 203 | // alloca instruction (pointer) hidden as value 204 | //println("push AllocaInstr to arg"); 205 | arg_values.push_back(builder.create_ptr_to_val(push->forward_edge_register)); 206 | } 207 | sWordptr werd = instr->linked_word; 208 | Function *funk = get_function(werd); 209 | if(!funk) 210 | return nullptr; 211 | 212 | builder.CreateCall(funk, arg_values); 213 | break; 214 | } 215 | 216 | case LITERAL: { 217 | //println("Literal(", instr->data.as_num(), ")"); 218 | 219 | Value *constant = builder.CreateForthConstant(instr->data.as_num()); 220 | 221 | Register push_to_reg = instr->push_nodes[0]->forward_edge_register; 222 | 223 | builder.build_store_register(constant, push_to_reg); 224 | break; 225 | } 226 | 227 | case MALLOC: { 228 | Register amount = instr->pop_nodes[0]->backward_edge_register; 229 | Value *amount_v = builder.build_load_register(amount); 230 | 231 | Value *mem_pointer_void = builder.CreateCall(the_module->getFunction("malloc"), amount_v); 232 | 233 | Register mem_pointer_register = instr->push_nodes[0]->forward_edge_register; 234 | Value *mem_pointer = builder.CreatePtrToInt(mem_pointer_void, builder.getInt64Ty()); 235 | builder.build_store_register(mem_pointer, mem_pointer_register); 236 | 237 | break; 238 | } 239 | 240 | case FREE: { 241 | Register mem_pointer = instr->pop_nodes[0]->backward_edge_register; 242 | Value *mem_pointer_v = builder.build_load_register(mem_pointer); 243 | 244 | Value *mem_pointer_as_ptr = builder.CreateIntToPtr(mem_pointer_v, builder.getInt8PtrTy()); 245 | 246 | builder.CreateCall(the_module->getFunction("free"), mem_pointer_as_ptr); 247 | 248 | break; 249 | } 250 | 251 | case STORE: { 252 | Register mem_address = instr->pop_nodes[0]->backward_edge_register; 253 | Register value = instr->pop_nodes[1]->backward_edge_register; 254 | 255 | Value *mem_address_v = builder.build_load_register(mem_address); 256 | Value *mem_pointer_v = builder.CreateIntToPtr(mem_address_v, PointerType::get(builder.getInt64Ty(), 0)); 257 | Value *value_v = builder.build_load_register(value); 258 | 259 | builder.CreateStore(value_v, mem_pointer_v); 260 | 261 | break; 262 | } 263 | 264 | case FETCH: { 265 | 266 | Register mem_address = instr->pop_nodes[0]->backward_edge_register; 267 | Value *mem_address_v = builder.build_load_register(mem_address); 268 | // Value *mem_pointer_v = builder.CreateIntToPtr(mem_address_v, builder.getInt8PtrTy()); 269 | Value *mem_pointer_v = builder.CreateIntToPtr(mem_address_v, Type::getInt64PtrTy(builder.getContext())); 270 | 271 | Value *val = builder.CreateLoad( 272 | //PointerType::get(builder.getInt64Ty(), 0), 273 | builder.getInt64Ty(), 274 | mem_pointer_v); 275 | 276 | Register val_register = instr->push_nodes[0]->forward_edge_register; 277 | builder.build_store_register(val, val_register); 278 | 279 | break; 280 | } 281 | 282 | case BRANCH: { 283 | //println("Branch"); 284 | 285 | Block *jump_to = instr->as_branch()->jump_to; 286 | BasicBlock *dest = builder.get_block(jump_to->index); 287 | 288 | builder.CreateBr(dest); 289 | break; 290 | } 291 | 292 | case BRANCHIF: { 293 | //println("Branchif"); 294 | 295 | Register condition = instr->pop_nodes[0]->backward_edge_register; 296 | 297 | Value *cond_value = builder.build_load_register(condition); 298 | 299 | Value *TF = builder.CreateICmpEQ( 300 | ConstantInt::get(the_context, APInt(64, 0)), 301 | cond_value 302 | ); 303 | 304 | Block *jump_true = instr->as_branchif()->jump_to_next; 305 | Block *jump_false = instr->as_branchif()->jump_to_far; 306 | 307 | BasicBlock *true_dest = builder.get_block(jump_true->index); 308 | BasicBlock *false_dest = builder.get_block(jump_false->index); 309 | 310 | // yes, true_dest and false_dest are flipped 311 | builder.CreateCondBr(TF, false_dest, true_dest); 312 | break; 313 | } 314 | 315 | case EQUALS: { 316 | //println("Equals"); 317 | 318 | Register one = instr->pop_nodes[0]->backward_edge_register; 319 | Register two = instr->pop_nodes[1]->backward_edge_register; 320 | 321 | Value *one_v = builder.build_load_register(one); 322 | Value *two_v = builder.build_load_register(two); 323 | 324 | Value *equals_bool = builder.CreateICmpEQ(one_v, two_v); 325 | Value *equals = builder.CreateIntCast(equals_bool, builder.getInt64Ty(), false); 326 | 327 | Register diff_register = instr->push_nodes[0]->forward_edge_register; 328 | builder.build_store_register(equals, diff_register); 329 | break; 330 | } 331 | 332 | case EMIT: { 333 | //println("Emit"); 334 | 335 | Register num = instr->pop_nodes[0]->backward_edge_register; 336 | 337 | Value *num_value = builder.build_load_register(num); 338 | 339 | std::vector print_args{ 340 | builder.get_emit_string_ptr(), 341 | num_value 342 | }; 343 | 344 | builder.CreateCall(the_module->getFunction("printf"), print_args); 345 | break; 346 | } 347 | 348 | case AND: { 349 | Register one = instr->pop_nodes[0]->backward_edge_register; 350 | Register two = instr->pop_nodes[1]->backward_edge_register; 351 | 352 | Value *one_v = builder.build_load_register(one); 353 | Value *two_v = builder.build_load_register(two); 354 | 355 | Value *anded = builder.CreateAnd(one_v, two_v); 356 | Register anded_register = instr->push_nodes[0]->forward_edge_register; 357 | 358 | builder.build_store_register(anded, anded_register); 359 | 360 | break; 361 | } 362 | 363 | case LESS: { 364 | Register one = instr->pop_nodes[0]->backward_edge_register; 365 | Register two = instr->pop_nodes[1]->backward_edge_register; 366 | 367 | Value *one_v = builder.build_load_register(one); 368 | Value *two_v = builder.build_load_register(two); 369 | 370 | Value *less_than = builder.CreateICmpSLT(two_v, one_v); 371 | Value *less_than64 = builder.CreateIntCast(less_than, builder.getInt64Ty(), false); 372 | 373 | Register less_than_register = instr->push_nodes[0]->forward_edge_register; 374 | builder.build_store_register(less_than64, less_than_register); 375 | 376 | break; 377 | } 378 | 379 | case ADD: { 380 | Register one = instr->pop_nodes[0]->backward_edge_register; 381 | Register two = instr->pop_nodes[1]->backward_edge_register; 382 | 383 | Value *one_v = builder.build_load_register(one); 384 | Value *two_v = builder.build_load_register(two); 385 | 386 | Value *sum = builder.CreateAdd(one_v, two_v); 387 | Register sum_register = instr->push_nodes[0]->forward_edge_register; 388 | 389 | builder.build_store_register(sum, sum_register); 390 | break; 391 | } 392 | 393 | case SUBTRACT: { 394 | Register one = instr->pop_nodes[0]->backward_edge_register; 395 | Register two = instr->pop_nodes[1]->backward_edge_register; 396 | 397 | Value *one_v = builder.build_load_register(one); 398 | Value *two_v = builder.build_load_register(two); 399 | 400 | Value *diff = builder.CreateSub(two_v, one_v); 401 | Register diff_register = instr->push_nodes[0]->forward_edge_register; 402 | 403 | builder.build_store_register(diff, diff_register); 404 | break; 405 | } 406 | 407 | case MULTIPLY: { 408 | Register one = instr->pop_nodes[0]->backward_edge_register; 409 | Register two = instr->pop_nodes[1]->backward_edge_register; 410 | 411 | Value *one_v = builder.build_load_register(one); 412 | Value *two_v = builder.build_load_register(two); 413 | 414 | Value *product = builder.CreateMul(one_v, two_v); 415 | Register product_register = instr->push_nodes[0]->forward_edge_register; 416 | 417 | builder.build_store_register(product, product_register); 418 | break; 419 | } 420 | 421 | case DIVIDE: { 422 | Register one = instr->pop_nodes[0]->backward_edge_register; 423 | Register two = instr->pop_nodes[1]->backward_edge_register; 424 | 425 | Value *one_v = builder.build_load_register(one); 426 | Value *two_v = builder.build_load_register(two); 427 | 428 | Value *factor = builder.CreateSDiv(two_v, one_v); 429 | Register factor_register = instr->push_nodes[0]->forward_edge_register; 430 | 431 | builder.build_store_register(factor, factor_register); 432 | break; 433 | } 434 | 435 | case DUP: { 436 | Register one = instr->pop_nodes[0]->backward_edge_register; 437 | 438 | Value *one_v = builder.build_load_register(one); 439 | 440 | Register out_one = instr->push_nodes[0]->forward_edge_register; 441 | Register out_two = instr->push_nodes[1]->forward_edge_register; 442 | 443 | builder.build_store_register(one_v, out_one); 444 | builder.build_store_register(one_v, out_two); 445 | 446 | break; 447 | } 448 | 449 | case SWAP: { 450 | //println("Swap"); 451 | 452 | Register one = instr->pop_nodes[0]->backward_edge_register; 453 | Register two = instr->pop_nodes[1]->backward_edge_register; 454 | 455 | Value *one_v = builder.build_load_register(one); 456 | Value *two_v = builder.build_load_register(two); 457 | 458 | Register one_out = instr->push_nodes[0]->forward_edge_register; 459 | Register two_out = instr->push_nodes[1]->forward_edge_register; 460 | 461 | builder.build_store_register(one_v, one_out); 462 | builder.build_store_register(two_v, two_out); 463 | break; 464 | } 465 | 466 | case ROT: { 467 | //println("Rot"); 468 | 469 | Register one = instr->pop_nodes[0]->backward_edge_register; 470 | Register two = instr->pop_nodes[1]->backward_edge_register; 471 | Register three = instr->pop_nodes[2]->backward_edge_register; 472 | 473 | Value *one_v = builder.build_load_register(one); 474 | Value *two_v = builder.build_load_register(two); 475 | Value *three_v = builder.build_load_register(three); 476 | 477 | Register one_out = instr->push_nodes[0]->forward_edge_register; 478 | Register two_out = instr->push_nodes[1]->forward_edge_register; 479 | Register three_out = instr->push_nodes[2]->forward_edge_register; 480 | 481 | builder.build_store_register(one_v, two_out); 482 | builder.build_store_register(two_v, one_out); 483 | builder.build_store_register(three_v, three_out); 484 | 485 | break; 486 | } 487 | 488 | case FROMLOCAL:{ 489 | println("from local ", instr->data.as_local().to_string()); 490 | Register in = fword->locals.at(instr->data.as_local()); 491 | Value *in_v = builder.build_load_register(in); 492 | 493 | Register out = instr->push_nodes[0]->forward_edge_register; 494 | builder.build_store_register(in_v, out); 495 | 496 | break; 497 | } 498 | 499 | case TOLOCAL: { 500 | println("to local ", instr->data.as_local().to_string()); 501 | Register in = instr->pop_nodes[0]->backward_edge_register; 502 | Value *in_v = builder.build_load_register(in); 503 | 504 | 505 | Register out = fword->locals.at(instr->data.as_local()); 506 | builder.build_store_register(in_v, out); 507 | 508 | break; 509 | } 510 | 511 | case DROP: // becasue it actually does nothing in terms 512 | // of new register values 513 | break; 514 | 515 | case EXIT: // do nothing; IR gen has its own way of inserting returns 516 | break; 517 | 518 | 519 | default: 520 | println("Word ", instr->name(), " is not a word supported at runtime"); 521 | return nullptr; 522 | } 523 | } 524 | 525 | unindent(); 526 | //println(); 527 | } 528 | 529 | // insert return at end 530 | builder.SetInsertPoint(&the_function->getBasicBlockList().back()); 531 | builder.CreateRetVoid(); 532 | 533 | // do optimizations 534 | if(do_optimize) { 535 | println("Running optimization passes"); 536 | fpm->run(*the_function); 537 | }else{ 538 | println("Optimization disabled"); 539 | } 540 | 541 | // see if it's all right? 542 | if (verifyFunction(*the_function, &errs())) { 543 | println("Faulty IR for word ", fword->name); 544 | println("(Specific error dumped to error stream. Try scrolling up)"); 545 | the_function->print(errs()); 546 | return nullptr; 547 | } 548 | 549 | // yay 550 | dln("Done building IR"); 551 | 552 | return the_function; 553 | } 554 | 555 | class stdout_stream : public raw_ostream { 556 | uint64_t pos = 0; 557 | 558 | void write_impl(const char *Ptr, size_t Size) override { 559 | pos += Size; 560 | std::cout << Ptr; 561 | } 562 | 563 | [[nodiscard]] uint64_t current_pos() const override { 564 | return pos; 565 | } 566 | }; 567 | 568 | void IRGenerator::print_module(const std::string &program_name, bool to_file) { 569 | println(); 570 | println("==========[LLVM IR]===========\n"); 571 | 572 | stdout_stream stdout; 573 | the_module->print(stdout, nullptr); 574 | outs().flush(); 575 | 576 | // print to file 577 | if (to_file) { 578 | std::error_code ec; 579 | raw_fd_ostream file_out_stream(program_name + ".ll", ec, sys::fs::OpenFlags::OF_None); 580 | the_module->print(file_out_stream, nullptr, true, true); 581 | } 582 | } 583 | 584 | 585 | void IRGenerator::declare_printf(){ 586 | std::vector printf_arg_types {Type::getInt8PtrTy(the_context)}; 587 | FunctionType *printf_type = FunctionType::get(Type::getInt64Ty(the_context), printf_arg_types, true); 588 | Function::Create(printf_type, Function::ExternalLinkage, "printf", the_module.get()); 589 | } 590 | 591 | void IRGenerator::declare_malloc() { 592 | std::vector malloc_arg_types {Type::getInt64Ty(the_context)}; 593 | FunctionType *malloc_type = FunctionType::get(Type::getInt8PtrTy(the_context), malloc_arg_types, false); 594 | Function::Create(malloc_type, Function::ExternalLinkage, "malloc", the_module.get()); 595 | } 596 | 597 | void IRGenerator::declare_free() { 598 | std::vector free_arg_types {Type::getInt8PtrTy(the_context)}; 599 | FunctionType *free_type = FunctionType::get(Type::getVoidTy(the_context), free_arg_types, false); 600 | Function::Create(free_type, Function::ExternalLinkage, "free", the_module.get()); 601 | } 602 | 603 | Function* IRGenerator::make_main(){ 604 | 605 | // create empty main function 606 | FunctionType *main_type = FunctionType::get(Type::getInt64Ty(the_context), false); 607 | Function *main = Function::Create(main_type, Function::ExternalLinkage, "main", the_module.get()); 608 | 609 | FBuilder builder(the_context, main); 610 | 611 | // create empty entry BB in main function 612 | BasicBlock *entry = BasicBlock::Create(the_context, "entry", main); 613 | builder.SetInsertPoint(entry); 614 | 615 | std::vector print_args{ 616 | builder.CreateGlobalStringPtr("%d\n"), 617 | ConstantInt::get(the_context, APInt(64, 20)) 618 | }; 619 | builder.CreateCall(the_module->getFunction("printf"), print_args); 620 | builder.CreateRet(ConstantInt::get(the_context, APInt(64, 0))); 621 | 622 | return main; 623 | } 624 | 625 | 626 | void IRGenerator::hello_world2() { 627 | declare_printf(); 628 | make_main(); 629 | print_module("hello world"); 630 | } 631 | 632 | 633 | int IRGenerator::hello_world() { 634 | // create context, the_module and builder 635 | LLVMContextRef context = LLVMContextCreate(); 636 | LLVMModuleRef module = LLVMModuleCreateWithNameInContext("hello", context); 637 | LLVMBuilderRef builder = LLVMCreateBuilderInContext(context); 638 | 639 | // types 640 | LLVMTypeRef int_8_type = LLVMInt8TypeInContext(context); 641 | LLVMTypeRef int_8_type_ptr = LLVMPointerType(int_8_type, 0); 642 | LLVMTypeRef int_32_type = LLVMInt32TypeInContext(context); 643 | 644 | // puts function 645 | LLVMTypeRef puts_function_args_type[] { 646 | int_8_type_ptr 647 | }; 648 | 649 | LLVMTypeRef puts_function_type = LLVMFunctionType(int_32_type, puts_function_args_type, 1, false); 650 | LLVMValueRef puts_function = LLVMAddFunction(module, "puts", puts_function_type); 651 | // end 652 | 653 | // main function 654 | LLVMTypeRef main_function_type = LLVMFunctionType(int_32_type, nullptr, 0, false); 655 | LLVMValueRef main_function = LLVMAddFunction(module, "main", main_function_type); 656 | 657 | LLVMBasicBlockRef entry = LLVMAppendBasicBlockInContext(context, main_function, "entry"); 658 | LLVMPositionBuilderAtEnd(builder, entry); 659 | 660 | LLVMValueRef puts_function_args[] = { 661 | LLVMBuildPointerCast(builder, // cast [14 x i8] type to int8 pointer 662 | LLVMBuildGlobalString(builder, "Hello, World!", "hello"), // build hello string constant 663 | int_8_type_ptr, "0") 664 | }; 665 | 666 | LLVMBuildCall(builder, puts_function, puts_function_args, 1, "i"); 667 | LLVMBuildRet(builder, LLVMConstInt(int_32_type, 0, false)); 668 | // end 669 | 670 | LLVMDumpModule(module); // dump the_module to STDOUT 671 | LLVMPrintModuleToFile(module, "../hello.ll", nullptr); 672 | 673 | // clean memory 674 | LLVMDisposeBuilder(builder); 675 | LLVMDisposeModule(module); 676 | LLVMContextDispose(context); 677 | 678 | println("done generation"); 679 | 680 | return 0; 681 | } 682 | 683 | 684 | 685 | 686 | IRModule::IRModule(std::shared_ptr m) 687 | : module(m) 688 | {} 689 | 690 | std::shared_ptr IRModule::get_module() { 691 | return module; 692 | } 693 | -------------------------------------------------------------------------------- /sources/Generation/ObjectGenerator.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "llvm/Support/FileSystem.h" 3 | #include "llvm/Support/TargetSelect.h" 4 | #include "llvm/Support/TargetRegistry.h" 5 | #include "llvm/Target/TargetOptions.h" 6 | #include "llvm/Target/TargetMachine.h" 7 | #include "llvm/Support/Host.h" 8 | 9 | #include "llvm/IR/LegacyPassManager.h" 10 | 11 | #include "../../headers/Generation/ObjectGenerator.h" 12 | #include "../../headers/SystemExec.h" 13 | #include "../../headers/Print.h" 14 | #include "../../headers/Generation/IRGenerator.h" 15 | 16 | using namespace mov; 17 | using namespace llvm; 18 | 19 | bool ObjectGenerator::generate(const std::string &filename, IRModule mod) { 20 | Module &module = *mod.get_module(); 21 | 22 | auto TargetTriple = sys::getDefaultTargetTriple(); 23 | println("Target Triple Detected as ", TargetTriple); 24 | 25 | // Don't need 26 | // InitializeAllTargets(); 27 | // InitializeAllTargetInfos(); 28 | // InitializeAllTargetMCs(); 29 | // InitializeAllAsmParsers(); 30 | // InitializeAllAsmPrinters(); 31 | 32 | // really only need this one 33 | // LLVMInitializeX86TargetInfo(); 34 | // LLVMInitializeX86TargetMC(); 35 | // LLVMInitializeX86AsmParser(); 36 | // LLVMInitializeX86AsmPrinter(); 37 | 38 | InitializeNativeTarget(); 39 | InitializeNativeTargetAsmParser(); 40 | InitializeNativeTargetAsmPrinter(); 41 | 42 | std::string Error; 43 | auto Target = TargetRegistry::lookupTarget(TargetTriple, Error); 44 | 45 | // Print an error and exit if we couldn't find the requested target. 46 | // This generally occurs if we've forgotten to initialise the 47 | // TargetRegistry or we have a bogus target triple. 48 | if (!Target) { 49 | println(Error); 50 | return true; 51 | } 52 | 53 | auto CPU = "skylake"; 54 | auto Features = ""; 55 | 56 | TargetOptions opt; 57 | auto RM = Optional(); 58 | auto TargetMachine = Target->createTargetMachine(TargetTriple, CPU, Features, opt, RM); 59 | 60 | module.setDataLayout(TargetMachine->createDataLayout()); 61 | module.setTargetTriple(TargetTriple); 62 | 63 | std::error_code EC; 64 | raw_fd_ostream dest(filename, EC, sys::fs::OpenFlags::OF_None); 65 | 66 | if (EC) { 67 | println("Could not open file: ", EC.message()); 68 | return true; 69 | } 70 | 71 | legacy::PassManager pass; 72 | // auto FileType = TargetMachine::CGFT_ObjectFile; 73 | auto FileType = CGFT_AssemblyFile; 74 | 75 | if (TargetMachine->addPassesToEmitFile(pass, dest, &dest, FileType)) { 76 | println("TargetMachine can't emit a file of this type"); 77 | return true; 78 | } 79 | 80 | pass.run(module); 81 | dest.flush(); 82 | 83 | return false; 84 | } 85 | 86 | void ObjectGenerator::link(const std::string &assembly_name, const std::string &executable_name) { 87 | println(); 88 | println("==========[Linking]==========="); 89 | auto command = "clang++ " + assembly_name + " -stdlib=libstdc++ -fPIE -o " + executable_name; 90 | println(exec(command)); 91 | } 92 | -------------------------------------------------------------------------------- /sources/Interpretation/ForthWord.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by reschivon on 11/17/21. 3 | // 4 | 5 | #include "../../headers/Interpretation/ForthWord.h" 6 | 7 | using namespace mov; 8 | 9 | 10 | ForthWord::ForthWord(std::string name, bool immediate) 11 | : iWord(name, immediate, false) {} 12 | 13 | void ForthWord::execute(IP ip, Interpreter &interpreter) { 14 | for(auto &[local, value] : locals) { 15 | locals[local] = iData(nullptr); 16 | } 17 | 18 | for(auto it = definition.begin(); it != definition.end(); it++) { 19 | 20 | // println(" [exec] ", (it->is_word()?it->as_word()->name():std::to_string(it->as_number())), " "); 21 | 22 | iData current_cell = *it; 23 | if(current_cell.is_word()) 24 | current_cell.as_word()->execute(it, interpreter); 25 | else 26 | println("Too many numbers in definition, no LITERAL to consume them"); 27 | } 28 | } 29 | void ForthWord::add(iData data){ 30 | definition.push_back(data); 31 | } 32 | 33 | void ForthWord::definition_to_string() { 34 | for(auto thing : definition) 35 | print(thing.to_string(), " "); 36 | } 37 | 38 | void ForthWord::set(int index, iData value) { 39 | auto front = definition.begin(); 40 | std::advance(front, index); 41 | *front = value; 42 | } 43 | 44 | 45 | -------------------------------------------------------------------------------- /sources/Interpretation/IP.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by reschivon on 11/17/21. 3 | // 4 | 5 | #include "../../headers/Interpretation/IP.h" 6 | 7 | using namespace mov; 8 | 9 | -------------------------------------------------------------------------------- /sources/Interpretation/Interpreter.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../headers/Interpretation/Interpreter.h" 6 | 7 | #include "../../headers/Interpretation/ForthWord.h" 8 | #include "../../headers/Interpretation/Primitive.h" 9 | #include "../../headers/Interpretation/WordGenerator.h" 10 | #include "../../headers/Interpretation/Stack.h" 11 | 12 | using namespace mov; 13 | 14 | Interpreter::Interpreter(const std::string& path) 15 | : input(path), word_generator(*new iWordGenerator()), stack(*new Stack()){ 16 | init_words(); 17 | } 18 | 19 | bool Interpreter::interpret() { 20 | dln("========[Interpretation]======"); 21 | std::cout << std::endl; 22 | 23 | std::string token; 24 | while (true){ 25 | // get next token 26 | token = input.next_token(); 27 | if(!input.open()) break; 28 | 29 | // try to get primitive word for token 30 | iWordptr wordptr = find(token); 31 | 32 | // try to get Forth word for token 33 | if(!wordptr) 34 | wordptr = word_generator.get(token); 35 | 36 | // save for convenience 37 | ForthWord *latest_forth_word = nullptr; 38 | if(dictionary.back().is_forth_word()) 39 | latest_forth_word = dictionary.back().as_forth_word(); 40 | 41 | if(wordptr == nullptr){ 42 | // might be a number or Local 43 | 44 | if(latest_forth_word != nullptr){ 45 | auto &locals = latest_forth_word->locals; 46 | Local local = Local(latest_forth_word->name(), token); 47 | 48 | if(locals.find(local) != locals.end()) { 49 | // compile a FromLocal 50 | latest_forth_word->add(iData(new FromLocal(local, latest_forth_word->locals))); 51 | continue; 52 | } 53 | } 54 | 55 | try{ 56 | int num = stoi(token); 57 | 58 | if(immediate) 59 | stack.push(num); 60 | else { 61 | if(latest_forth_word != nullptr){ 62 | //dln("compile number ", num); 63 | latest_forth_word->add(iData(find("literal"))); 64 | latest_forth_word->add(iData(num)); 65 | }else{ 66 | println("attempted to compile LITERAL to a primitive word"); 67 | return true; 68 | } 69 | } 70 | } catch (...) { 71 | // not a number or word 72 | println("word ", token, " not found and not number"); 73 | return true; 74 | } 75 | }else{ 76 | // is a word 77 | if(wordptr->immediate || immediate) { 78 | // dln("execute word ", wordptr->name()); 79 | auto dummy = (new std::list())->begin(); 80 | wordptr->execute(dummy, *this); 81 | } else { 82 | if(latest_forth_word != nullptr){ 83 | // dln("compile FW ", wordptr->name()); 84 | latest_forth_word->add(iData(wordptr)); 85 | }else { 86 | println("attempted to compile xts to a primitive word"); 87 | return true; 88 | } 89 | } 90 | } 91 | 92 | } 93 | 94 | dln("======[End Interpretation]======"); 95 | std::cout << std::endl; 96 | println(); 97 | return false; 98 | } 99 | 100 | iWordptr Interpreter::find(const std::string& name) { 101 | auto find_result = std::find_if( 102 | dictionary.rbegin(), 103 | dictionary.rend(), 104 | [name](iData other){return other.is_forth_word() && other.as_forth_word()->name() == name;}); 105 | 106 | if(find_result == dictionary.rend()) 107 | // try primitives 108 | return word_generator.get(name); 109 | 110 | return find_result->as_forth_word(); 111 | } 112 | 113 | 114 | void Interpreter::init_words(){ 115 | 116 | word_generator.register_primitive("=", primitive_words::EQUALS, [&](IP ip, Interpreter &interpreter) { 117 | stack.push(stack.pop_number() == stack.pop_number()); 118 | }); 119 | 120 | word_generator.register_primitive("+", primitive_words::ADD, [&](IP ip, Interpreter &interpreter) { 121 | stack.push(stack.pop_number() + stack.pop_number()); 122 | }); 123 | 124 | word_generator.register_primitive("-", primitive_words::SUBTRACT, [&](IP ip, Interpreter &interpreter) { 125 | auto one = stack.pop_number(); 126 | auto two = stack.pop_number(); 127 | stack.push(two - one); 128 | 129 | }); 130 | 131 | word_generator.register_primitive("*", primitive_words::MULTIPLY, [&](IP ip, Interpreter &interpreter) { 132 | stack.push(stack.pop_number() * stack.pop_number()); 133 | 134 | }); 135 | 136 | word_generator.register_primitive("/", primitive_words::DIVIDE, [&](IP ip, Interpreter &interpreter) { 137 | stack.push(stack.pop_number() / stack.pop_number()); 138 | }); 139 | 140 | word_generator.register_primitive("swap", primitive_words::SWAP, [&](IP ip, Interpreter &interpreter) { 141 | auto top = stack.pop_number(), second = stack.pop_number(); 142 | stack.push(top); 143 | stack.push(second); 144 | }); 145 | 146 | word_generator.register_primitive("rot", primitive_words::ROT, [&](IP ip, Interpreter &interpreter) { 147 | auto top = stack.pop_number(); 148 | auto second = stack.pop_number(); 149 | auto third = stack.pop_number(); 150 | 151 | stack.push(second);// third 152 | stack.push(top); // second 153 | stack.push(third); // top 154 | }); 155 | 156 | word_generator.register_primitive("dup", primitive_words::DUP, [&](IP ip, Interpreter &interpreter) { 157 | stack.push(stack.top()); 158 | }); 159 | 160 | word_generator.register_primitive("drop", primitive_words::DROP, [&](IP ip, Interpreter &interpreter) { 161 | stack.pop_number(); 162 | }); 163 | 164 | word_generator.register_primitive(".", primitive_words::EMIT, [&](IP ip, Interpreter &interpreter) { 165 | println(stack.pop_number()); 166 | }); 167 | 168 | word_generator.register_primitive(".S", primitive_words::SHOW, [&](IP ip, Interpreter &interpreter) { 169 | stack.for_each([](iData thing) { 170 | print(thing.to_string(), " "); 171 | }); 172 | println("<-top"); 173 | }); 174 | 175 | word_generator.register_primitive("'", primitive_words::TICK, [&](IP ip, Interpreter &interpreter) { 176 | std::string next_token = input.next_token(); 177 | auto cfa = find(next_token); 178 | if (cfa != nullptr) 179 | stack.push(cfa); 180 | }); 181 | 182 | word_generator.register_primitive(",", primitive_words::COMMA, [&](IP ip, Interpreter &interpreter) { 183 | if (immediate) { 184 | // Can't remember what this for 185 | // it's execution time stuff 186 | println("Compiling ", stack.top().to_string(), " to word ", dictionary.back().to_string()); 187 | dictionary.back().as_forth_word()->add(stack.pop()); 188 | 189 | } else if (dictionary.back().is_forth_word()) { 190 | // compiling time stuff 191 | //println("Compiling ", stack.top().to_string(), " to word ", dictionary.back().as_forth_word()->name()); 192 | 193 | dictionary.back().as_forth_word()->add(stack.pop()); 194 | } else { 195 | println("attempted to compile to a primitive"); 196 | } 197 | }); 198 | 199 | word_generator.register_primitive("see", primitive_words::SEE, [&](IP ip, Interpreter &interpreter) { 200 | std::cout << "\n\tDefinitions:\n"; 201 | 202 | for (iData dict_data : dictionary) { 203 | // get the word pointer 204 | if(!dict_data.is_forth_word()) continue; 205 | iWordptr word_pointer = dict_data.as_word(); 206 | 207 | bool is_forth_word = dict_data.is_forth_word(); 208 | ForthWord *as_forth_word = nullptr; 209 | if(is_forth_word) 210 | as_forth_word = dict_data.as_forth_word(); 211 | 212 | // print name with tabular indentation 213 | std::cout << std::setfill(' ') << std::setw(15) << 214 | word_pointer->name() + " "; 215 | // print flags 216 | std::cout <<((word_pointer->immediate) ? "IMM " : " "); 217 | 218 | // print definition 219 | if (is_forth_word) 220 | as_forth_word->definition_to_string(); 221 | std::cout << std::endl; 222 | 223 | // print locals 224 | std::cout << std::string(15, ' '); 225 | if (is_forth_word) 226 | for(const auto& [local, value] : as_forth_word->locals) 227 | std::cout << local.name << " "; 228 | 229 | std::cout << std::endl; 230 | } 231 | std::cout << std::endl; 232 | }); 233 | 234 | word_generator.register_lambda_word("[", primitive_words::TOIMMEDIATE, [&](IP ip, Interpreter &interpreter) { 235 | immediate = true; 236 | }, true); 237 | 238 | word_generator.register_primitive("]", primitive_words::TOCOMPILE, [&](IP ip, Interpreter &interpreter) { 239 | immediate = false; 240 | }); 241 | 242 | word_generator.register_lambda_word("immediate", primitive_words::IMMEDIATE, [&](IP ip, Interpreter &interpreter) { 243 | if(!dictionary.back().is_forth_word()){ 244 | println("tried to set immediate on number data"); 245 | return; 246 | } 247 | dictionary.back().as_forth_word()->immediate = true; 248 | }, true); 249 | 250 | 251 | word_generator.register_primitive("allot", primitive_words::ALLOT, [&](IP ip, Interpreter &interpreter) { 252 | element num_to_allot = stack.pop_number(); 253 | if(num_to_allot < 0) 254 | while(num_to_allot --> 0 || dictionary.back().is_number()) 255 | dictionary.pop_back(); 256 | else 257 | while(num_to_allot --> 0) 258 | dictionary.emplace_back(0); 259 | }); 260 | 261 | word_generator.register_primitive("@", primitive_words::FETCH, [&](IP ip, Interpreter &interpreter) { 262 | stack.push(*(element*)stack.pop_number()); 263 | }); 264 | 265 | word_generator.register_primitive("!", primitive_words::STORE, [&](IP ip, Interpreter &interpreter) { 266 | if(immediate){ 267 | // words being executed, no point in setting xts since nothing 268 | // is in state of half-compilation. User must be setting memory 269 | element ptr = stack.pop_number(); 270 | element value = stack.pop_number(); 271 | 272 | auto *addr = (element*) ptr; 273 | *addr = value; 274 | }else{ 275 | // words being compiled, very bad time for setting dictionary memory right now 276 | // therefore we can assume user wants to set xts 277 | if (!dictionary.back().is_forth_word()) 278 | println("Define some words before calling HERE"); 279 | else { 280 | element location = stack.pop_number(); 281 | iData value = stack.pop(); 282 | 283 | auto it = dictionary.back().as_forth_word()->def().begin(); 284 | std::advance(it, location); 285 | *it = value; 286 | 287 | println("Set index ", location, " of word ", dictionary.back().to_string(), " to ", value.to_string()); 288 | } 289 | } 290 | 291 | }); 292 | 293 | 294 | word_generator.register_primitive("branch", primitive_words::BRANCH, [&](IP ip, Interpreter &interpreter) { 295 | std::advance(ip, std::next(ip)->as_number()); 296 | }, true); 297 | 298 | word_generator.register_primitive("branchif", primitive_words::BRANCHIF, [&](IP ip, Interpreter &interpreter) { 299 | auto next_data = std::next(ip); 300 | if (stack.pop_number() == 0) 301 | std::advance(ip, next_data->as_number()); 302 | else 303 | ip++; 304 | }, true); 305 | 306 | word_generator.register_primitive("literal", primitive_words::LITERAL, [&](IP ip, Interpreter &interpreter) { 307 | stack.push(*(++ip)); 308 | }, true); 309 | 310 | // Use HERE for relative computations only 311 | // Does not represent specific address 312 | word_generator.register_primitive("here", primitive_words::HERE, [&](IP ip, Interpreter &interpreter) { 313 | if(immediate){ 314 | // words being executed, no point in compiling branches since nothing 315 | // is in state of half-compilation. Prime time for ALLOTing 316 | dictionary.emplace_back(0); 317 | }else{ 318 | // words being compiled, very bad time for ALLOT right now 319 | // therefore we can assume user wants to mark positions for BRANCH 320 | if (!dictionary.back().is_forth_word()) 321 | println("Define some words before calling HERE"); 322 | else 323 | stack.push((element) dictionary.back().as_forth_word()->def().size()); 324 | } 325 | }); 326 | 327 | word_generator.register_primitive("create", primitive_words::CREATE, [&](IP ip, Interpreter &interpreter) { 328 | std::string next_token = input.next_token(); 329 | dictionary.emplace_back(new ForthWord(next_token, false)); 330 | }); 331 | 332 | word_generator.register_primitive("malloc", primitive_words::MALLOC, [&](IP ip, Interpreter &interpreter) { 333 | element size_to_alloc = stack.pop_number(); 334 | auto* ptr = (element*) malloc(size_to_alloc); 335 | stack.push((element) ptr); 336 | }); 337 | 338 | word_generator.register_primitive("free", primitive_words::FREE, [&](IP ip, Interpreter &interpreter) { 339 | element mem_pointer = stack.pop_number(); 340 | free((void*) mem_pointer); 341 | }); 342 | 343 | word_generator.register_primitive("&", primitive_words::AND, [&](IP ip, Interpreter &interpreter) { 344 | stack.push(stack.pop_number() | stack.pop_number()); 345 | }); 346 | 347 | word_generator.register_primitive("<", primitive_words::LESS, [&](IP ip, Interpreter &interpreter) { 348 | stack.push(stack.pop_number() > stack.pop_number()); 349 | }); 350 | 351 | word_generator.generator_lookup["to"] = [](){ 352 | return new class TO(); 353 | }; 354 | 355 | 356 | // tolocal and fromlocal are never generated by the user. So they will not be registered here 357 | 358 | auto colon_word = new ForthWord(":", false); 359 | colon_word->add(iData(find("create"))); 360 | colon_word->add(iData(find("]"))); 361 | dictionary.emplace_back(colon_word); 362 | 363 | auto exit_word = new ForthWord(";", true); 364 | exit_word->add(iData(find("["))); 365 | dictionary.emplace_back(exit_word); 366 | } 367 | 368 | -------------------------------------------------------------------------------- /sources/Interpretation/Primitive.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by reschivon on 11/17/21. 3 | // 4 | 5 | #include "../../headers/Interpretation/Primitive.h" 6 | #include "../../headers/Interpretation/Stack.h" 7 | #include "../../headers/Interpretation/ForthWord.h" 8 | 9 | using namespace mov; 10 | 11 | 12 | Primitive::Primitive(std::string name, primitive_words id, bool immediate, std::function action, bool stateful) 13 | : iWord(std::move(name), id, immediate, stateful), action(std::move(action)) {} 14 | 15 | void Primitive::execute(IP ip, Interpreter &interpreter) { 16 | action(ip, interpreter); 17 | } 18 | 19 | ToLocal::ToLocal(Local local, std::unordered_map &locals) 20 | : Primitive("toLocal", primitive_words::TOLOCAL, false,[&](IP ip, Interpreter &interpreter) { 21 | 22 | locals.at(this->local) = interpreter.stack.pop(); 23 | }, false), 24 | 25 | local(local), 26 | locals(locals) 27 | { 28 | if(locals.find(local) == locals.end()) 29 | locals.insert(std::make_pair(local, iData(nullptr))); 30 | } 31 | 32 | std::string ToLocal::name() { 33 | return iWord::name() + "(" + local.name + ")"; 34 | } 35 | 36 | FromLocal::FromLocal(Local local, std::unordered_map &locals) 37 | 38 | : Primitive("fromLocal", primitive_words::FROMLOCAL, false, [&](IP ip, Interpreter &interpreter) { 39 | 40 | // println("Push local ", this->locals[this->local].to_string(), " to stack "); 41 | interpreter.stack.push(this->locals[this->local]); 42 | }, false), 43 | 44 | local(local), 45 | locals(locals) 46 | {} 47 | 48 | std::string FromLocal::name() { 49 | return iWord::name() + "(" + local.name + ")"; 50 | } 51 | 52 | TO::TO() 53 | : Primitive("to", primitive_words::TO, true, 54 | [](IP ip, Interpreter &interpreter) { 55 | auto parent = interpreter.dictionary.back().as_forth_word(); 56 | 57 | std::string next_token = interpreter.input.next_token(); 58 | 59 | Local this_local = Local(parent->name(), next_token); 60 | parent->locals.insert(std::make_pair(this_local, iData(0))); 61 | parent->add(iData(new ToLocal(this_local, parent->locals))); 62 | } 63 | , false) 64 | {} 65 | -------------------------------------------------------------------------------- /sources/Interpretation/Stack.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by reschivon on 11/17/21. 3 | // 4 | #include "../../headers/Interpretation/Stack.h" 5 | using namespace mov; 6 | 7 | void Stack::push(element number) { 8 | stack.emplace_back(number); 9 | } 10 | 11 | void Stack::push(iWordptr iWord_pointer) { 12 | stack.emplace_back(iWord_pointer); 13 | } 14 | 15 | void Stack::push(iData thing) { 16 | stack.push_back(thing); 17 | } 18 | 19 | element Stack::pop_number() { 20 | if(stack.empty()) 21 | println("tried popping empty stack"); 22 | 23 | if(stack.back().is_number()) { // int 24 | auto ret = stack.back().as_number(); 25 | stack.pop_back(); 26 | return ret; 27 | } 28 | if(stack.back().is_word()) { // iWord ptr 29 | println("tried popping cfa expecting int"); 30 | return 0; 31 | } 32 | return 0; 33 | } 34 | 35 | iData Stack::pop() { 36 | if(stack.empty()) { 37 | println("tried popping empty stack"); 38 | return iData(nullptr); 39 | } 40 | auto ret = stack.back(); 41 | stack.pop_back(); 42 | return ret; 43 | } 44 | 45 | iData Stack::top() { 46 | if(stack.empty()) { 47 | println("tried getting top of empty stack"); 48 | return iData(nullptr); 49 | } 50 | return stack.back(); 51 | } 52 | 53 | int Stack::size() { 54 | return stack.size(); 55 | } 56 | 57 | void Stack::for_each(std::function action) { 58 | for(auto thing : stack) 59 | action(thing); 60 | } 61 | -------------------------------------------------------------------------------- /sources/Interpretation/WordGenerator.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by reschivon on 11/17/21. 3 | // 4 | 5 | #include "../../headers/Interpretation/WordGenerator.h" 6 | #include "../../headers/Interpretation/Primitive.h" 7 | 8 | using namespace mov; 9 | 10 | void 11 | mov::iWordGenerator::register_lambda_word(const std::string &name, primitive_words id, std::function action, 12 | bool immediate, bool stateful) { 13 | generator_lookup[name] = [=]{ 14 | return new Primitive(name, id, immediate, action, stateful); 15 | }; 16 | } 17 | 18 | void 19 | mov::iWordGenerator::register_primitive(const std::string &name, primitive_words id, std::function action, 20 | bool stateful) { 21 | register_lambda_word(name, id, action, false, stateful); 22 | } 23 | 24 | iWordptr mov::iWordGenerator::get(const std::string &name) { 25 | auto primitive = generator_lookup.find(name); 26 | if(primitive == generator_lookup.end()) 27 | return nullptr; 28 | 29 | return primitive->second(); 30 | } 31 | -------------------------------------------------------------------------------- /sources/Interpretation/iData.cpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | #include "../../headers/Interpretation/iData.h" 5 | #include "../../headers/Interpretation/ForthWord.h" 6 | #include "../../headers/Interpretation/Primitive.h" 7 | 8 | using namespace mov; 9 | 10 | 11 | iData::iData(iData::dict_data_var_type data) : dict_data_var_type(data) {} 12 | 13 | std::string iData::to_string() { 14 | if(is_word()) 15 | return as_word()->name(); 16 | if(is_number()) 17 | return std::to_string(as_number()); 18 | return "undef"; 19 | } 20 | 21 | iData::iData() : iData::dict_data_var_type(nullptr) { } 22 | 23 | bool iData::is_number() const { return index() == 0;} 24 | 25 | bool iData::is_word() const { return index() == 1 || index() == 2 || index() == 3;} 26 | 27 | bool iData::is_forth_word() const { return index() == 2;} 28 | 29 | bool iData::is_primitive() const { return index() == 3;} 30 | 31 | bool iData::is_empty() const { return index() == 4;} 32 | 33 | 34 | 35 | element iData::as_number() { return std::get(*this);} 36 | 37 | iWord* iData::as_word() 38 | { 39 | if(is_forth_word()) 40 | return std::get(*this); 41 | if(is_primitive()) 42 | return std::get(*this); 43 | if(is_number()) { 44 | println("Expected a word, got number"); 45 | return nullptr; 46 | } 47 | return std::get(*this); // assume word if not FW 48 | } 49 | ForthWord* iData::as_forth_word() const { return std::get(*this);} 50 | Primitive* iData::as_primitive() { return std::get(*this);} 51 | 52 | -------------------------------------------------------------------------------- /sources/Interpretation/iWord.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "../../headers/Interpretation/iWord.h" 4 | 5 | using namespace mov; 6 | 7 | iWord::iWord(std::string name, bool immediate, bool stateful) 8 | : _name(name), immediate(immediate), stateful(stateful) {} 9 | 10 | iWord::iWord(std::string name, primitive_words id, bool immediate, bool stateful) 11 | : _name(name), id(id), immediate(immediate), stateful(stateful) {} 12 | 13 | std::string iWord::name() { 14 | return _name; 15 | } 16 | bool iWord::branchy() { 17 | return id == primitive_words::BRANCH || id == primitive_words::BRANCHIF; 18 | } 19 | 20 | -------------------------------------------------------------------------------- /sources/Print.cpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "../headers/Print.h" 4 | 5 | std::string newlines_have_indents(const std::string &in) { 6 | return std::regex_replace(in, std::regex("\n"), tab_string); 7 | } 8 | 9 | void print_single(int arg) { 10 | std::cout << arg; 11 | } 12 | 13 | void print_single(std::basic_string arg) { 14 | std::cout << newlines_have_indents(arg); 15 | } 16 | 17 | void regen_tab() { 18 | tab_string = "\n"; 19 | for(int i = 0; i < indents; i++) 20 | tab_string += " "; 21 | } 22 | 23 | void unindent() { 24 | indents--; 25 | if(indents < 0) 26 | indents = 0; 27 | regen_tab(); 28 | std::cout << tab_string; 29 | } 30 | 31 | void indent() { 32 | indents++; 33 | regen_tab(); 34 | } 35 | -------------------------------------------------------------------------------- /sources/Symbolic/Block.cpp: -------------------------------------------------------------------------------- 1 | #include "../../headers/Symbolic/Block.h" 2 | 3 | using namespace mov; 4 | 5 | 6 | Block::Block(BBgen &gen, sWordptr parent) 7 | : index(gen.get()), register_gen((int) index), parent(parent) {} 8 | 9 | std::vector Block::nextBBs() { 10 | if (instructions.back()->id() == primitive_words::BRANCH) 11 | 12 | return {std::ref(*instructions.back()->as_branch()->jump_to)}; 13 | 14 | else if (instructions.back()->id() == primitive_words::BRANCHIF) 15 | { 16 | return { 17 | *instructions.back()->as_branchif()->jump_to_next, 18 | *instructions.back()->as_branchif()->jump_to_far 19 | }; 20 | } 21 | else if (instructions.back()->id() == primitive_words::EXIT) 22 | 23 | return {}; 24 | 25 | // should never happen 26 | println("Expected next BB to be return or branch; was not. \n", 27 | "Instead got ", instructions.back()->id()); 28 | return {}; 29 | } 30 | 31 | bool Block::is_exit() { 32 | return !instructions.empty() && instructions.back()->id() == primitive_words::EXIT; 33 | } 34 | 35 | /** 36 | * This function makes sure that registers between control flow splits and 37 | * merges always line up. It's "the grass is always greener" algorithm. 38 | * 39 | * The registers pushed to by @prev are guaranteed to be popped by @post. These 40 | * registers are always indexed according to @post. So if post has index %4 and 41 | * there are 3 registers spanning across @prev and @post, then they will be named 42 | * 4-1 4-2 4-3. 43 | * 44 | * So if @prev has not been visited (thus this register-aligning function has not 45 | * been run on it) its final nodes will be set to take the registers 4-1 4-2 4-3. If @post 46 | * has not been visited, we DO NOT have to create actual nodes. We merely store 47 | * 4-1 4-2 4-3 in @post's initial_registers so that future @prev blocks can take these 48 | * registers 49 | * 50 | * @param prev 51 | * @param post 52 | */ 53 | static void align_register_edge(Block &prev, Block &post); 54 | void Block::align_registers() { 55 | auto nexts = nextBBs(); 56 | if(nexts.size() == 2){ 57 | if(nexts[0].get().inputs_aligned) { 58 | align_register_edge(*this, nexts[0]); 59 | align_register_edge(*this, nexts[1]); 60 | } 61 | else if(nexts[1].get().inputs_aligned) { 62 | align_register_edge(*this, nexts[1]); 63 | align_register_edge(*this, nexts[0]); 64 | } 65 | else { 66 | align_register_edge(*this, nexts[1]); 67 | align_register_edge(*this, nexts[0]); 68 | } 69 | } 70 | if(nexts.size() == 1) { 71 | align_register_edge(*this, nexts[0].get()); 72 | } 73 | } 74 | 75 | static void align_register_edge(Block &prev, Block &post) { 76 | dln("Align registers from #", prev.index, " to #", post.index); 77 | if(!prev.outputs_aligned && post.inputs_aligned){ 78 | println("Gen nodes for prev"); 79 | indent(); 80 | for(int i = 0; i < prev.outputs.size(); i++) { 81 | print(post.initial_registers[i].to_string()); 82 | Node::redefine_preceding_edge( 83 | prev.outputs[i], 84 | post.initial_registers[i]); 85 | } 86 | unindent(); 87 | prev.outputs_aligned = true; 88 | } 89 | 90 | if(prev.outputs_aligned && !post.inputs_aligned){ 91 | println("Gen nodes for post"); 92 | indent(); 93 | for(Node *thing : prev.outputs) { 94 | println(thing->backward_edge_register.to_string()); 95 | post.initial_registers.push_back(thing->backward_edge_register); 96 | } 97 | unindent(); 98 | post.inputs_aligned = true; 99 | } 100 | 101 | if(!prev.outputs_aligned && !post.inputs_aligned){ 102 | println("Gen nodes for both"); 103 | indent(); 104 | for(Node *prev_node : prev.outputs){ 105 | // new common register for the new edge 106 | auto new_reg = post.register_gen.get(); 107 | print(new_reg.to_string()); 108 | // modify forward edge of preceding node to be the new register 109 | Node::redefine_preceding_edge(prev_node, new_reg); 110 | // note new register in a field of the next BB 111 | post.initial_registers.push_back(new_reg); 112 | } 113 | unindent(); 114 | prev.outputs_aligned = true; 115 | post.inputs_aligned = true; 116 | } 117 | } 118 | 119 | std::string Block::name() { 120 | return "bb#" + std::to_string(index); 121 | } 122 | -------------------------------------------------------------------------------- /sources/Symbolic/Instruction.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "../../headers/Symbolic/Instruction.h" 3 | #include "../../headers/Symbolic/sWord.h" 4 | 5 | using namespace mov; 6 | 7 | BranchInstruction *Instruction::as_branch() { 8 | return (BranchInstruction*)(this); 9 | } 10 | 11 | BranchIfInstruction *Instruction::as_branchif() { 12 | return (BranchIfInstruction*)(this); 13 | } 14 | 15 | ReturnInstruction *Instruction::as_return() { 16 | return (ReturnInstruction*)(this); 17 | } 18 | 19 | bool Instruction::branchy() const{ 20 | return 21 | linked_word->id == primitive_words::BRANCH || 22 | linked_word->id == primitive_words::BRANCHIF || 23 | linked_word->id == primitive_words::EXIT; 24 | } 25 | 26 | std::string Instruction::name(){ 27 | return linked_word->name + 28 | (data.is_empty()?"": "(" + data.to_string() + ")"); 29 | } 30 | 31 | Instruction::Instruction(sWordptr linked_word, sData data) 32 | : linked_word(linked_word), data(data) {} 33 | 34 | primitive_words Instruction::id() const { 35 | return linked_word->id; 36 | } 37 | 38 | BranchInstruction::BranchInstruction(sWordptr linked_word, sData data, 39 | Block *jump_to) 40 | : Instruction(linked_word, data), jump_to(jump_to) {} 41 | 42 | BranchIfInstruction::BranchIfInstruction(sWordptr linked_word, sData data, 43 | Block *jump_to_close, 44 | Block *jump_to_far) 45 | : Instruction(linked_word, data), jump_to_next(jump_to_close), jump_to_far(jump_to_far) {} 46 | 47 | ReturnInstruction::ReturnInstruction() 48 | : Instruction(new sWord("exit", primitive_words::EXIT), sData(nullptr)){} 49 | 50 | 51 | std::string BranchInstruction::name() { 52 | return linked_word->name + "(to " + std::to_string(jump_to->index) + ")"; 53 | } 54 | 55 | std::string BranchIfInstruction::name() { 56 | return linked_word->name + "(true: " + std::to_string(jump_to_next->index) + ", false: " + std::to_string(jump_to_far->index) +")"; 57 | } 58 | 59 | std::string ReturnInstruction::name() { 60 | return "exit"; 61 | } 62 | 63 | 64 | -------------------------------------------------------------------------------- /sources/Symbolic/Pass.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include "../../headers/Symbolic/Pass.h" 6 | #include "../../headers/Print.h" 7 | #include "../../headers/Interpretation/iData.h" 8 | #include "../../headers/Interpretation/Primitive.h" 9 | 10 | using namespace mov; 11 | 12 | sData Analysis::symbolize_data(iData data) { 13 | if (data.is_number()) 14 | return sData(data.as_number()); 15 | if (data.is_word()) 16 | return sData(static_analysis(data.as_word())); 17 | if(data.is_empty()) 18 | return sData(nullptr); 19 | println("FUCK"); 20 | return sData(nullptr); 21 | } 22 | 23 | sWordptr Analysis::static_analysis(iWordptr original_word) { 24 | 25 | // check to see if we have passed over this word already 26 | // if so, return a pointer to it 27 | auto cached = visited_words.find(original_word); 28 | if (cached != visited_words.end()) 29 | return cached->second; 30 | 31 | if (dynamic_cast(original_word)) 32 | { // is a primitive: return the singleton of the primitive 33 | sWordptr word_singleton = primitive_lookup.at(original_word->id); 34 | return word_singleton; 35 | } 36 | 37 | if (auto forth_word = dynamic_cast(original_word)) 38 | { 39 | 40 | dln(); 41 | dln("Translate ", original_word->name(), " to basic blocks"); 42 | 43 | auto converted = translate_to_basic_blocks(forth_word); 44 | 45 | dln("[", original_word->name(), "] START graphing all BBs"); 46 | indent(); 47 | 48 | word_stack_graph(converted); 49 | 50 | unindent(); 51 | dln("[", original_word->name(), "] FINSIHED graphing all BBs"); 52 | dln("pushes: ", converted->effects.num_pushed); 53 | dln("pops: ", converted->effects.num_popped); 54 | 55 | return converted; 56 | } 57 | 58 | println("Word provided for static analysis is null"); 59 | 60 | return nullptr; 61 | } 62 | 63 | sWordptr Analysis::show_word_info(sWordptr wordptr) { 64 | println("============[", wordptr->name, "]==========="); 65 | 66 | println("push:" , wordptr->effects.num_pushed, 67 | " pop:" , wordptr->effects.num_popped); 68 | 69 | println(); 70 | println("Basic Blocks:"); 71 | for(const auto& bbe : wordptr->blocks){ 72 | println("bbe " + std::to_string(bbe.index) + ":"); 73 | print(" "); 74 | for(auto instr : bbe.instructions) 75 | print(" " , instr->name()); 76 | println(); 77 | } 78 | println(); 79 | 80 | println(); 81 | println("Stack Graph:"); 82 | 83 | for(auto &bb : wordptr->blocks){ 84 | println("----------------------------------"); 85 | println("[" , bb.name() , "] stack graph"); 86 | indent(); 87 | 88 | for(auto &instr : bb.instructions){ 89 | // propagate the stack state 90 | println(); 91 | println("[", instr->name(), "]"); 92 | 93 | if(!instr->pop_nodes.empty()) { 94 | println("pops:"); 95 | for (auto node: instr->pop_nodes) 96 | print(" ", node->backward_edge_register.to_string()); 97 | } 98 | 99 | if(!instr->push_nodes.empty()) { 100 | println("pushes:"); 101 | for (auto thing: instr->push_nodes) 102 | print(" ", thing->forward_edge_register.to_string()); 103 | } 104 | } 105 | 106 | unindent(); 107 | 108 | print("next BBs: "); 109 | for(auto next : bb.nextBBs()) 110 | print(next.get().index , " "); 111 | println(); 112 | } 113 | 114 | return wordptr; 115 | } 116 | 117 | 118 | /** 119 | * Replace the element @it of the host list with the provided list 120 | * Side effect of linked lists: this function modifies the iterator 121 | * since the provided iterator is no longer valid 122 | * @param host 123 | * @param it 124 | * @param to_insert 125 | * @return iterator pointing to the last inserted element 126 | */ 127 | std::list::iterator replace_with(std::list host, std::list::const_iterator it, const std::list& to_insert){ 128 | // inline it 129 | for(const auto& sub_word : to_insert) 130 | host.insert(it, sub_word); 131 | return std::prev(host.erase(it)); 132 | } 133 | 134 | bool can_inline(std::list::iterator it, ForthWord *host, int maximum_definition_size){ 135 | //dln("Consider the inlining of ", it->as_word()->name()); 136 | 137 | if (it->as_word()->id == primitive_words::OTHER) { 138 | 139 | auto *sub_fw = dynamic_cast(it->as_word()); 140 | auto &sub_def = sub_fw->def(); 141 | uint sub_def_size = sub_fw->def_size(); 142 | 143 | if (host->def_size() + sub_def_size < maximum_definition_size) { 144 | dln("Inlining word ", sub_fw->name(), " into ", host->name()); 145 | return true; 146 | } else { 147 | return false; 148 | } 149 | } 150 | 151 | return false; 152 | } 153 | 154 | const static uint INLINE_WORD_MAX_XTS = 50; 155 | void dfs(iWordptr word, std::set& visited){ 156 | 157 | if(visited.find(word) != visited.end()) { 158 | // this word has already been visited 159 | return; 160 | }else { 161 | // this word has not been visited 162 | visited.insert(word); 163 | } 164 | 165 | if(dynamic_cast(word)){ 166 | return; 167 | } 168 | 169 | 170 | if(auto *fw = dynamic_cast(word)){ 171 | dln("[", word->name(), "] looking to inline words in definition"); 172 | indent(); 173 | 174 | 175 | auto &definition = fw->def(); 176 | 177 | for(auto sub_word : definition) 178 | if(sub_word.is_word()) 179 | dfs(sub_word.as_word(), visited); 180 | 181 | int branch_offset = 0; 182 | // Loop over def, see which words we can inline... 183 | for(auto it = definition.begin(); it != definition.end(); it++){ 184 | 185 | if(!it->is_word()) 186 | continue; 187 | 188 | // Great, let's inline this XT 189 | if(can_inline(it, fw, INLINE_WORD_MAX_XTS)) { 190 | auto *iterator_forthword = dynamic_cast(it->as_word()); 191 | 192 | // add locals 193 | for(auto [local, val] : iterator_forthword->locals) { 194 | fw->locals.insert(std::make_pair(local, iData(nullptr))); 195 | } 196 | 197 | it = replace_with(definition, it, iterator_forthword->def()); 198 | 199 | // grrr!! std::list does not update its size when inserted into 200 | branch_offset += (int) iterator_forthword->def_size() - 1; 201 | }else{ 202 | // fix branch jumps 203 | auto iterator_word = it->as_word(); 204 | auto next_slot = std::next(it); 205 | 206 | if(iterator_word->branchy() && next_slot->as_number() < 0){ 207 | *next_slot = iData(next_slot->as_number() - branch_offset); 208 | } 209 | } 210 | } 211 | unindent(); 212 | 213 | } 214 | } 215 | 216 | void Analysis::inlining(iWordptr root) { 217 | std::set visited; 218 | 219 | dfs(root, visited); 220 | } 221 | 222 | -------------------------------------------------------------------------------- /sources/Symbolic/Pass_BlockStackGraph.cpp: -------------------------------------------------------------------------------- 1 | #include "../../headers/Symbolic/Pass.h" 2 | 3 | using namespace mov; 4 | 5 | 6 | void Analysis::propagate_stack(NodeList &stack, Instruction *instruction, NodeList ¶ms, RegisterGen ®ister_gen, 7 | RegisterGen ¶m_gen) { 8 | 9 | const auto effects = instruction->linked_word->effects; 10 | instruction->pop_nodes.clear(); // not sure why it sometimes already has stuff 11 | 12 | // if the stack does not have at least (effects_without_push_pop.num_popped) items for us, 13 | // then we will create param nodes until there are enough items 14 | unsigned int nodes_from_stack = std::min(effects.num_popped, stack.size()); 15 | // we must make this comparison in signed integers 16 | unsigned int nodes_from_params = std::max((int)effects.num_popped - (int)stack.size(), 0); 17 | 18 | // pop input nodes from stack to current instruction 19 | NodeList::move_top_elements(stack, instruction->pop_nodes, 20 | (int) nodes_from_stack); 21 | 22 | while (nodes_from_params -- > 0) 23 | Node::link_bidirection(params.new_top(), 24 | instruction->pop_nodes.new_top(), 25 | param_gen.get_param()); 26 | 27 | // make empty output nodes 28 | for (int i = 0; i < effects.num_pushed; i++) 29 | instruction->push_nodes.new_top(); 30 | 31 | // link output and param nodes that represent the same data 32 | for (auto out_in_pair : effects.out_in_pairs) 33 | { 34 | auto pop_node = instruction->pop_nodes[out_in_pair.second]; 35 | auto push_node = instruction->push_nodes[out_in_pair.first]; 36 | Node::link(pop_node, push_node, pop_node->backward_edge_register); 37 | } 38 | 39 | // link output nodes to stack 40 | for (auto push_node : instruction->push_nodes) 41 | if(push_node->prev_node != nullptr) 42 | Node::link_bidirection(push_node, stack.new_top(), push_node->backward_edge_register); 43 | else 44 | Node::link_bidirection(push_node, stack.new_top(), register_gen.get()); 45 | 46 | dln("pops: first -> "); 47 | for (auto node : instruction->pop_nodes) 48 | d(" ", node->backward_edge_register.to_string()); 49 | 50 | dln("pushes: first -> "); 51 | for (auto thing : instruction->push_nodes) 52 | d(" ", thing->forward_edge_register.to_string()); 53 | } 54 | 55 | void Analysis::propagate_stack_fromlocal(NodeList &stack, Instruction *instruction, NodeList ¶ms, sWordptr parent) { 56 | 57 | instruction->pop_nodes.clear(); // not sure why it sometimes already has stuff 58 | 59 | Register locals_register = parent->locals.at(instruction->data.as_local()); 60 | 61 | Node::link_bidirection(instruction->push_nodes.new_top(), stack.new_top(), locals_register); 62 | 63 | dln("fromLocal takes data from register ", locals_register.to_string()); 64 | } 65 | 66 | 67 | void Analysis::propagate_stack_tolocal(NodeList &stack, Instruction *instruction, 68 | NodeList ¶ms, RegisterGen ¶m_gen, 69 | RegisterGen ®ister_gen, sWordptr parent) { 70 | 71 | instruction->pop_nodes.clear(); // not sure why it sometimes already has stuff 72 | 73 | bool node_from_param = stack.empty(); 74 | bool node_from_stack = !stack.empty(); 75 | 76 | // pop input nodes from stack to current instruction 77 | if(node_from_stack) 78 | NodeList::move_top_elements(stack, instruction->pop_nodes, 1); 79 | 80 | if(node_from_param) 81 | Node::link_bidirection(params.new_top(), 82 | instruction->pop_nodes.new_top(), 83 | param_gen.get_param()); 84 | 85 | const auto &curr_local = instruction->data.as_local(); 86 | const auto &local_search = parent->locals.find(curr_local); 87 | if(local_search == parent->locals.end()) { 88 | // we haven't already allocated register space for this local 89 | const Register& local_reg = register_gen.get(); 90 | parent->locals.insert(std::make_pair(curr_local, local_reg)); 91 | 92 | dln("toLocal gets data from register ", instruction->pop_nodes[0]->backward_edge_register.to_string()); 93 | dln("toLocal sends data to special register ", local_reg.to_string()); 94 | }else{ 95 | // we already encountered this local, its register is stored 96 | dln("toLocal gets data from register ", instruction->pop_nodes[0]->backward_edge_register.to_string()); 97 | dln("toLocal sends data to special register ", local_search->second.to_string()); 98 | } 99 | 100 | 101 | 102 | } 103 | 104 | 105 | NodeList Analysis::basic_block_stack_graph(NodeList &running_stack, Block &bb, NodeList ¶ms, 106 | RegisterGen ®ister_gen, RegisterGen ¶m_gen) { 107 | 108 | dln(); 109 | 110 | // a copy of the stack 111 | bb.inputs = running_stack; 112 | 113 | // set inputs to pregenerated register_names during rgister alignment 114 | dln("Loading initial registers"); 115 | indent(); 116 | int i = 0; 117 | for(auto *node : bb.inputs){ 118 | dln("Load ", bb.initial_registers[i].to_string()); 119 | node->backward_edge_register = bb.initial_registers[i]; 120 | i++; 121 | } 122 | unindent(); 123 | 124 | for (auto instruction : bb.instructions) 125 | { 126 | auto definee = instruction->linked_word; 127 | 128 | // update the bb's total Effects 129 | bb.effects_without_push_pop.acquire_side_effects_ignore_push_pop(definee->effects); 130 | 131 | // propagate the stack state 132 | dln(); 133 | dln("[", definee->name, "]"); 134 | 135 | if(definee->id == primitive_words::TOLOCAL) 136 | Analysis::propagate_stack_tolocal(running_stack, instruction, 137 | params, param_gen, register_gen, bb.parent); 138 | else if (definee->id == primitive_words::FROMLOCAL) 139 | Analysis::propagate_stack_fromlocal(running_stack, instruction, params, bb.parent); 140 | else 141 | Analysis::propagate_stack(running_stack, instruction, 142 | params, register_gen, param_gen); 143 | 144 | dln(); 145 | dln("[stack:]"); 146 | for (auto thing : running_stack) 147 | dln( thing->backward_edge_register.to_string()); 148 | dln("^ top ^"); 149 | } 150 | 151 | // a copy of the stack 152 | bb.outputs = running_stack; 153 | 154 | return running_stack; 155 | } 156 | -------------------------------------------------------------------------------- /sources/Symbolic/Pass_MatchingPairs.cpp: -------------------------------------------------------------------------------- 1 | #include "../../headers/Symbolic/Pass.h" 2 | 3 | using namespace mov; 4 | 5 | /** 6 | * Check if any stack elments pass through bb without changes 7 | * If so, registers a matching pair to bb, which can be used 8 | * for optimizing register generation 9 | * @param bb 10 | */ 11 | void Analysis::compute_matching_pairs(Block &bb) { 12 | 13 | // matching pairs 14 | for (int i = 0; i < bb.outputs.size(); i++) 15 | { 16 | Node *output = bb.outputs[i]; 17 | 18 | Node *next = output->prev_node; 19 | while (true) 20 | { 21 | if (next->prev_node == nullptr) 22 | break; 23 | if (next->backward_edge_register.register_type == Register::PARAM) 24 | { 25 | bb.effects_without_push_pop.out_in_pairs.emplace_back(i, next->backward_edge_register.index); 26 | break; 27 | } 28 | next = next->prev_node; 29 | } 30 | } 31 | } 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /sources/Symbolic/Pass_ToBasicBlocks.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "../../headers/Symbolic/Pass.h" 3 | #include "../../headers/Interpretation/iData.h" 4 | #include "../../headers/Interpretation/Primitive.h" 5 | 6 | using namespace mov; 7 | 8 | // This might be more complicated than it needs 9 | // to be since I tried to keep everything O(n) 10 | 11 | struct BasicBlockBuilder{ 12 | private: 13 | short* bbe_lookup; 14 | short* jump_lookup; 15 | const short bbe_lookup_size; 16 | 17 | sWordptr new_word; 18 | BBgen gen; 19 | 20 | public: 21 | using it_type = std::vector::iterator; 22 | 23 | BasicBlockBuilder(sWordptr new_word, short instructions) 24 | : new_word(new_word), bbe_lookup_size(instructions + 1) 25 | { 26 | bbe_lookup = new short[bbe_lookup_size]; 27 | std::fill_n(bbe_lookup, bbe_lookup_size, -1); 28 | 29 | // ensure there is always entry block 30 | bbe_lookup[0] = 0; 31 | 32 | const short jump_lookup_size = instructions; 33 | jump_lookup = new short[jump_lookup_size]; 34 | std::fill_n(jump_lookup, jump_lookup_size, -1); 35 | } 36 | 37 | void make_bb_for_jump(int from, int delta){ 38 | short dest_bb_entry = from + delta + 1; 39 | jump_lookup[from] = dest_bb_entry; 40 | 41 | if(bbe_lookup[dest_bb_entry] == -1){ 42 | bbe_lookup[dest_bb_entry] = 1; 43 | //println("at index ", dest_bb_entry, " is a bb"); 44 | } 45 | } 46 | 47 | it_type get_bb_at_index(int index){ 48 | return new_word->blocks.begin() + bbe_lookup[index] - 1; 49 | } 50 | 51 | it_type get_bb_for_branch_at(int index){ 52 | return get_bb_at_index(jump_lookup[index]); 53 | } 54 | 55 | bool is_index_bb(int index){ 56 | return bbe_lookup[index] != -1; 57 | } 58 | 59 | int index_of_bb_at(int index) { 60 | return bbe_lookup[index]; 61 | } 62 | int get_bb_index_for_branch_at(int index) { 63 | return index_of_bb_at(jump_lookup[index]); 64 | } 65 | 66 | void createBBs(){ 67 | for(int index = 0; index < bbe_lookup_size; index++){ 68 | if(bbe_lookup[index] != -1) { 69 | new_word->blocks.emplace_back(gen, new_word); 70 | bbe_lookup[index] = new_word->blocks.size(); 71 | } 72 | } 73 | } 74 | }; 75 | 76 | // why is this happening 77 | uint sizeOf(const std::list& li){ 78 | uint size = 0; 79 | for(auto thing : li) 80 | size++; 81 | return size; 82 | } 83 | 84 | sWordptr Analysis::translate_to_basic_blocks(ForthWord *template_word){ 85 | auto new_word = new sWord(template_word->name(), primitive_words::OTHER); 86 | 87 | //dln("compute basic blocks for [" , template_word->name() , "]"); 88 | 89 | // cache this word for the future 90 | visited_words[template_word] = new_word; 91 | 92 | //dln("Populating Basic Blocks\n"); 93 | BasicBlockBuilder bb_builder(new_word, (short) sizeOf(template_word->def())); 94 | 95 | // precompute BB entry points 96 | int i = 0; 97 | std::list template_def = template_word->def(); 98 | for(auto it = template_def.begin(); it != template_def.end(); it++, i++){ 99 | auto template_sub_def = it; 100 | 101 | if(!template_sub_def->is_word()) 102 | continue; 103 | 104 | if(template_sub_def->as_word()->id == primitive_words::BRANCH){ 105 | auto next_number = std::next(it)->as_number(); 106 | bb_builder.make_bb_for_jump(i, next_number); 107 | } 108 | 109 | if(template_sub_def->as_word()->id == primitive_words::BRANCHIF){ 110 | bb_builder.make_bb_for_jump(i, 1); 111 | // must come after //TODO 112 | auto next_number = std::next(it)->as_number(); 113 | bb_builder.make_bb_for_jump(i, next_number); 114 | } 115 | } 116 | 117 | bb_builder.createBBs(); 118 | println(); 119 | 120 | // fill BBs with instructions derived from template word 121 | auto curr_bb = bb_builder.get_bb_at_index(0); 122 | //println("switch to bb#", bb_builder.index_of_bb_at(0)); 123 | 124 | i = 0; 125 | template_def = template_word->def(); 126 | for(auto it = template_def.begin(); it != template_def.end(); it++, i++){ 127 | 128 | auto template_sub_def = it->as_word(); 129 | 130 | auto new_sub_def = static_analysis(template_sub_def); 131 | 132 | sData next_data = sData(nullptr); 133 | if(template_sub_def->stateful){ 134 | next_data = symbolize_data(*std::next(it)); 135 | } 136 | 137 | switch (template_sub_def->id) { 138 | case BRANCH: 139 | curr_bb->instructions.push_back(new BranchInstruction( 140 | new_sub_def, next_data, 141 | bb_builder.get_bb_for_branch_at(i).base())); 142 | break; 143 | 144 | case BRANCHIF: 145 | curr_bb->instructions.push_back(new BranchIfInstruction( 146 | new_sub_def, next_data, 147 | bb_builder.get_bb_at_index(i + 2).base(), 148 | bb_builder.get_bb_for_branch_at(i).base())); 149 | break; 150 | 151 | case TOLOCAL: { 152 | auto *toLocal = dynamic_cast(template_sub_def); 153 | curr_bb->instructions.push_back( 154 | new Instruction(new_sub_def,sData{toLocal->local})); 155 | break; 156 | } 157 | 158 | case FROMLOCAL: { 159 | auto *fromLocal = dynamic_cast(template_sub_def); 160 | curr_bb->instructions.push_back( 161 | new Instruction(new_sub_def,sData{fromLocal->local})); 162 | break; 163 | } 164 | 165 | default: 166 | curr_bb->instructions.push_back(new Instruction(new_sub_def, next_data)); 167 | } 168 | 169 | // the next word will be consumed, so skip 170 | if(template_sub_def->stateful) { 171 | it++; 172 | i++; 173 | } 174 | 175 | // reached the end of a BB, go to next 176 | if(bb_builder.is_index_bb(i + 1)){ 177 | //println("Next index is bb entry: ", i+1); 178 | auto next_bb = bb_builder.get_bb_at_index(i + 1); 179 | 180 | if(!curr_bb->instructions.back()->branchy()) 181 | curr_bb->instructions.push_back(new BranchInstruction(new sWord("branch", primitive_words::BRANCH), sData(nullptr), next_bb.base())); 182 | 183 | curr_bb = next_bb; 184 | } 185 | } 186 | unindent(); 187 | 188 | // ensure last instr of last BB is `return` 189 | auto &last_bb = new_word->blocks.back(); 190 | auto &last_instr = last_bb.instructions.back(); 191 | 192 | if(last_bb.instructions.empty() || last_instr->name() != "is_exit") 193 | last_bb.instructions.push_back(new ReturnInstruction); 194 | 195 | return new_word; 196 | } 197 | 198 | 199 | 200 | -------------------------------------------------------------------------------- /sources/Symbolic/Pass_WordStackGraph.cpp: -------------------------------------------------------------------------------- 1 | #include "../../headers/Symbolic/Pass.h" 2 | 3 | using namespace mov; 4 | 5 | /** 6 | * Generate stack graph for every basic block within wordptr 7 | * Also make sure register names match between basic block edges 8 | * Ensure that merging Bbs have the same stack size 9 | * Set the new effects_without_push_pop of wordptr 10 | * @param wordptr the word whose definition will be graphed 11 | */ 12 | void Analysis::word_stack_graph(sWordptr wordptr) { 13 | dln(); 14 | 15 | // build stack graph 16 | NodeList stack; 17 | 18 | // This function does all the heavy lifting 19 | explore_graph_dfs(stack, wordptr->blocks.front(), wordptr->param_gen); 20 | 21 | // mark remaining registers as return 22 | NodeList &return_nodes = wordptr->blocks.back().outputs; 23 | wordptr->my_graphs_returns = return_nodes; 24 | for(auto &node : return_nodes){ 25 | Node::redefine_preceding_type(node, Register::registerType::RETURN); 26 | } 27 | 28 | // compute push/pop effects for this word 29 | auto &lastBB = wordptr->blocks.back(); 30 | 31 | uint total_accumulated_params = lastBB.initial_accumulated_params + lastBB.params.size(); 32 | uint total_accumulated_stack_size = lastBB.outputs.size(); 33 | 34 | wordptr->effects.num_popped = total_accumulated_params; 35 | wordptr->effects.num_pushed = total_accumulated_stack_size; 36 | 37 | } 38 | 39 | 40 | void Analysis::explore_graph_dfs(NodeList stack, Block &bb, RegisterGen param_gen) { 41 | if(bb.outputs_aligned) 42 | return; 43 | else 44 | bb.initial_accumulated_stack_size = (int) stack.size(); 45 | 46 | dln(); 47 | d("[" , bb.name() , "] START making stack graph"); 48 | indent(); 49 | 50 | // transformed stack == stack, but I want to make 51 | // it clear that stack has been modified 52 | 53 | // param_gen passed by reference, we want to know if params 54 | // got used in there 55 | NodeList transformed_stack = Analysis::basic_block_stack_graph(stack, bb, bb.params, 56 | bb.register_gen, param_gen); 57 | 58 | Analysis::compute_matching_pairs(bb); 59 | 60 | d("next BBs:"); 61 | for(auto next : bb.nextBBs()) 62 | d(" " , next.get().index); 63 | dln(); 64 | 65 | unindent(); 66 | dln("[" , bb.name() , "] FINSIHED making stack graph"); 67 | 68 | uint final_accumulated_params = bb.initial_accumulated_params + bb.params.size(); 69 | uint final_accumulated_stack_size = transformed_stack.size(); 70 | 71 | dln("accumulated total params: ", final_accumulated_params); 72 | 73 | for(auto next : bb.nextBBs()){ 74 | if(next.get().outputs_aligned){ 75 | if(final_accumulated_params != next.get().initial_accumulated_params){ 76 | println("[FATAL] input size mismatch on edge from " , bb.name() , " to " , next.get().name()); 77 | println("Past input num: " , next.get().initial_accumulated_params , " current: " , bb.initial_accumulated_params); 78 | } 79 | if(final_accumulated_stack_size != next.get().initial_accumulated_stack_size){ 80 | println("[FATAL] Control flow edge mismatch on edge from " , bb.name() , " to " , next.get().name()); 81 | println("Past stack size: " , next.get().initial_accumulated_stack_size , " current: " , transformed_stack.size()); 82 | } 83 | }else{ 84 | next.get().initial_accumulated_params = final_accumulated_params; 85 | next.get().initial_accumulated_stack_size = final_accumulated_stack_size; 86 | } 87 | } 88 | 89 | // aka align registers between control flow edges 90 | bb.align_registers(); 91 | 92 | for(auto next : bb.nextBBs()) 93 | // param gen is passed by value becasue each control flow 94 | // edge tracks the accumulated params at that point 95 | explore_graph_dfs(transformed_stack, next, param_gen); 96 | } 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /sources/Symbolic/sWord.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "../../headers/Symbolic/sWord.h" 3 | 4 | #include 5 | 6 | using namespace mov; 7 | 8 | Effects Effects::neutral{}; 9 | 10 | sWord::sWord(std::string name, primitive_words id) 11 | : sWord(std::move(name), id, Effects()) {} 12 | 13 | sWord::sWord(std::string name, primitive_words id, Effects effects) 14 | : name(std::move(name)), id(id), effects(std::move(effects)) { 15 | locals.reserve(10); 16 | } 17 | 18 | bool sWord::branchy() { 19 | return 20 | id == primitive_words::BRANCH || 21 | id == primitive_words::BRANCHIF || 22 | id == primitive_words::EXIT; 23 | } 24 | -------------------------------------------------------------------------------- /sources/SystemExec.cpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "../headers/SystemExec.h" 12 | #include "../headers/Print.h" 13 | 14 | std::string exec(const std::string& command) { 15 | 16 | println("$ ", command); 17 | 18 | std::array buffer{}; 19 | std::string result; 20 | std::unique_ptr pipe(popen(command.c_str(), "r"), pclose); 21 | if (!pipe) { 22 | throw std::runtime_error("popen() failed!"); 23 | } 24 | while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) { 25 | result += buffer.data(); 26 | } 27 | return result; 28 | } 29 | 30 | --------------------------------------------------------------------------------