├── .gitignore ├── LICENSE ├── README.md ├── examples └── hello.bla └── src ├── code_gen.odin ├── main.odin ├── parse.odin ├── pe.odin ├── pe_types.odin ├── tokenizer.odin └── util.odin /.gitignore: -------------------------------------------------------------------------------- 1 | /*.bat 2 | /*.exe 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2022 by Ginger Bill. 2 | 3 | Permission to use, copy, modify, and/or distribute this software for any purpose 4 | with or without fee is hereby granted. 5 | 6 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 7 | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 8 | FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, 9 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS 10 | OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER 11 | TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 12 | THIS SOFTWARE. 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Blaise 2 | 3 | Blaise (pronounced blɛz, after [Blaise Pascal](https://wikipedia.org/wiki/Blaise_Pascal)) is an educational languaged influenced by [Niklaus Wirth](https://wikipedia.org/wiki/Niklaus_Wirth)'s language [PL/0](https://en.wikipedia.org/wiki/PL/0) in his famous book [Algorithms + Data Structures = Programs](https://wikipedia.org/wiki/Algorithms_%2B_Data_Structures_%3D_Programs). 4 | 5 | This language is meant as an educational tool to teach people how to make a compiler from scratch and all of the minimal stages to produce an executable. 6 | 7 | * [Tokenization/Lexical Analysis](https://wikipedia.org/wiki/Lexical_analysis#Tokenization) 8 | * [Recursive Descent Parsing](https://wikipedia.org/wiki/Recursive_descent_parser) (without an AST) 9 | * [x86](https://wikipedia.org/wiki/X86) 32-bit Code Generation 10 | * [PE File Format](https://docs.microsoft.com/en-us/windows/win32/debug/pe-format) 11 | -------------------------------------------------------------------------------- /examples/hello.bla: -------------------------------------------------------------------------------- 1 | var x, y, z, r, q, a, b, c, n, f := 1 // global variables 2 | 3 | proc multiply { 4 | var a, b 5 | a := x 6 | b := y 7 | z := 0 8 | while b > 0 { 9 | if odd b { 10 | z := z + a 11 | } 12 | a := 2 * a 13 | b := b / 2 14 | } 15 | } 16 | 17 | proc divide { 18 | var w 19 | r := x 20 | q := 0 21 | w := y 22 | while w <= r { 23 | w := 2 * w 24 | } 25 | while w > y { 26 | q := 2 * q 27 | w := w / 2 28 | if w <= r { 29 | r := r - w 30 | q := q + 1 31 | } 32 | } 33 | } 34 | 35 | proc gcd { 36 | var f, g 37 | f := a 38 | g := b 39 | while f != g { 40 | if f < g { g := g - f } 41 | if g < f { f := f - g } 42 | } 43 | c := f 44 | } 45 | 46 | proc fact { 47 | if n > 1 { 48 | f := n * f 49 | n := n - 1 50 | call fact 51 | } 52 | } 53 | 54 | 55 | x := 1003 // input x 56 | y := 7 // input y 57 | call multiply; print z 58 | call divide; print q; print r 59 | a := 13656 // input a 60 | b := 6 // input b 61 | call gcd; print c 62 | n := 1 // input n 63 | call fact; print f 64 | 65 | // n := 0 66 | // repeat { 67 | // print n 68 | // n := n + 1 69 | // } while n < 10 70 | 71 | // n := 0 72 | 73 | -------------------------------------------------------------------------------- /src/code_gen.odin: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "core:bytes" 4 | 5 | regs := []Register{.eax, .ebx, .ecx, .edx, .ebp, .esi, .edi} 6 | Register :: enum u8 { 7 | eax = 0, 8 | ecx = 1, 9 | edx = 2, 10 | ebx = 3, 11 | esp = 4, 12 | ebp = 5, 13 | esi = 6, 14 | edi = 7, 15 | 16 | // r8 = 8, 17 | // r9 = 9, 18 | // r10 = 10, 19 | // r11 = 11, 20 | // r12 = 12, 21 | // r13 = 13, 22 | // r14 = 14, 23 | // r15 = 15, 24 | } 25 | 26 | gen: struct { 27 | code: bytes.Buffer, 28 | reg: i32, 29 | pc: u32, 30 | ep: u32, 31 | data_p: u32, 32 | 33 | labels: ^Label, 34 | } 35 | 36 | Label :: struct { 37 | next: ^Label, 38 | pc: u32, 39 | jmp: u32, 40 | } 41 | 42 | data_base: u32 = IMAGE_BASE + TEXT_BASE + IMPORT_SIZE 43 | code_base: u32 = IMAGE_BASE + TEXT_BASE + IMPORT_SIZE 44 | 45 | 46 | INT_SIZE :: 4 47 | 48 | splat_u32 :: #force_inline proc(x: u32) -> (a, b, c, d: u8) { 49 | return u8(x), u8(x>>8), u8(x>>16), u8(x>>24) 50 | } 51 | splat_i32 :: #force_inline proc(x: i32) -> (a, b, c, d: u8) { 52 | return splat_u32(u32(x)) 53 | } 54 | splat :: proc{ 55 | splat_u32, 56 | splat_i32, 57 | } 58 | 59 | emit :: proc(args: ..u8) { 60 | bytes.buffer_write(&gen.code, args) 61 | gen.pc += u32(len(args)) 62 | } 63 | 64 | gen_global_var :: proc(value: i32) { 65 | emit(splat(value)) 66 | code_base += INT_SIZE 67 | } 68 | 69 | gen_proc :: proc(p: ^Procedure, part: enum{Head, Tail}) { 70 | local_count := p.local_count 71 | if part == .Head { 72 | if local_count > 0 { 73 | // sub esp, local_count*INT_SIZE 74 | emit(0x81, 0xec) 75 | emit(splat(local_count * INT_SIZE)) 76 | } 77 | } else { 78 | if local_count > 0 { 79 | // add esp, local_count*INT_SIZE 80 | emit(0x81, 0xc4) 81 | emit(splat(local_count * INT_SIZE)) 82 | } 83 | // ret 84 | emit(0xc3) 85 | } 86 | } 87 | 88 | gen_inc_reg :: proc(c: ^Checker) { 89 | if int(gen.reg) >= len(regs) { 90 | c->fatalf(next(c).pos, "expression nested level has reached maximum") 91 | } 92 | gen.reg += 1 93 | } 94 | 95 | 96 | gen_imm :: proc(c: ^Checker, val: i32) { 97 | // mov REG, val 98 | emit(0xb8+u8(regs[gen.reg])) 99 | emit(splat(val)) 100 | gen_inc_reg(c) 101 | } 102 | 103 | gen_neg :: proc() { 104 | // neg REG 105 | emit(0xf7) 106 | emit(0xd8+u8(regs[gen.reg-1])) 107 | } 108 | 109 | @(require_results) 110 | gen_placeholder_label :: proc() -> ^Label { 111 | label := new_clone(Label{nil, gen.pc, 0}) 112 | emit(splat_u32(0x00)) 113 | return label 114 | } 115 | 116 | gen_odd :: proc() -> ^Label { 117 | // test REG, 0x1 118 | // je LOC 119 | gen.reg -= 1 120 | emit(u8(0xf7), u8(0xc0)+u8(regs[gen.reg]), splat_u32(0x01)) 121 | emit(u8(0x0f), u8(0x84)); return gen_placeholder_label() 122 | } 123 | 124 | gen_exit :: proc() { 125 | // push 0x0 126 | // call DWORD PTR ds:imp_exit 127 | // add ESP, 0x4 128 | emit(0x6a, 0x00) 129 | emit(0xff, 0x15, splat(imp_exit)) 130 | emit(u8(0x83), u8(0xc4), u8(0x04)) 131 | } 132 | 133 | gen_push :: proc(reg: Register) { 134 | // push REG 135 | emit(u8(0x50)+u8(reg)) 136 | } 137 | 138 | gen_pop :: proc(reg: Register) { 139 | // pop REG 140 | emit(u8(0x58)+u8(reg)) 141 | } 142 | 143 | gen_ident_mem :: proc(e: Entity) { 144 | assert(e.kind == .Var) 145 | if e.is_global { 146 | // add REG, data_base+addr 147 | emit(0x05+u8(regs[gen.reg])*8, splat(data_base + e.addr)) 148 | } else { 149 | emit(0x84+u8(regs[gen.reg])*8) 150 | emit(0x24, splat(e.addr)) 151 | } 152 | } 153 | 154 | gen_ident :: proc(c: ^Checker, e: Entity) { 155 | if e.kind == .Const { 156 | // mov REG, val 157 | gen_imm(c, e.value) 158 | return 159 | } 160 | // mov REG, MEM 161 | emit(u8(0x8b)) 162 | gen_ident_mem(e) 163 | gen_inc_reg(c) 164 | } 165 | 166 | gen_assignment :: proc(e: Entity) { 167 | gen.reg -= 1 168 | // mov MEM, REG 169 | emit(u8(0x89)) 170 | gen_ident_mem(e) 171 | } 172 | 173 | @(require_results) 174 | gen_cond :: proc(cond: Token_Kind) -> ^Label { 175 | r0 := u8(regs[gen.reg-2]) 176 | r1 := u8(regs[gen.reg-1]) 177 | // cmp REG0, REG1 178 | emit(0x39, 0xc0 + r1*8 + r0) 179 | gen.reg -= 2 180 | 181 | op_code: u8 182 | #partial switch cond { 183 | case .Eq: op_code = 0x85 // jne 184 | case .NotEq: op_code = 0x84 // je 185 | case .Lt: op_code = 0x8d // jnl 186 | case .LtEq: op_code = 0x8f // jg 187 | case .Gt: op_code = 0x8e // jng 188 | case .GtEq: op_code = 0x8c // jl 189 | case: 190 | unreachable() 191 | } 192 | // OP_CODE LOC 193 | emit(0x0f, op_code) 194 | return gen_placeholder_label() 195 | } 196 | 197 | gen_begin_label :: proc(label: ^Label) { 198 | label.jmp = gen.pc - (label.pc + 4) 199 | label.next = gen.labels 200 | gen.labels = label 201 | } 202 | 203 | gen_jmp :: proc(jpc: u32) { 204 | // jmp LOC 205 | jmp := jpc - (gen.pc + 5) 206 | emit(0xe9, splat(jmp)) 207 | } 208 | 209 | 210 | gen_add :: proc() { 211 | gen.reg -= 1 212 | r0 := u8(regs[gen.reg-1]) 213 | r1 := u8(regs[gen.reg]) 214 | // add REG0, REG1 215 | emit(0x01, 0xc0 + r1*8 + r0) 216 | } 217 | gen_sub :: proc() { 218 | gen.reg -= 1 219 | r0 := u8(regs[gen.reg-1]) 220 | r1 := u8(regs[gen.reg]) 221 | // sub REG0, REG1 222 | emit(0x29, 0xc0 + r1*8 + r0) 223 | } 224 | gen_mul :: proc() { 225 | gen.reg -= 1 226 | r0 := u8(regs[gen.reg-1]) 227 | r1 := u8(regs[gen.reg]) 228 | // imul REG0, REG1 229 | emit(0x0f, 0xaf, 0xc0+r0*8+r1) 230 | } 231 | gen_div :: proc() { 232 | if gen.reg > 2 { 233 | for i in 0.. 2 { 244 | // remainder result is stored EAX 245 | gen_push(.eax) 246 | for i := gen.reg - 2; i >= 0; i -= 1 { 247 | gen_pop(regs[i]) 248 | } 249 | } 250 | gen.reg -= 1 251 | } 252 | gen_mod :: proc() { 253 | if gen.reg > 2 { 254 | for i in 0.. 2 { 266 | // remainder result is stored EDX 267 | gen_push(.edx) 268 | for i := gen.reg - 2; i >= 0; i -= 1 { 269 | gen_pop(regs[i]) 270 | } 271 | } 272 | gen.reg -= 1 273 | } 274 | 275 | gen_call :: proc(p: ^Procedure) { 276 | call := p.addr - (gen.pc + 5) 277 | // call LOC 278 | emit(0xe8, splat(call)) 279 | 280 | } 281 | 282 | gen_input :: proc(e: Entity) { 283 | if e.is_global { 284 | // push ADDR 285 | emit(0x68, splat(data_base + e.addr)) 286 | } else { 287 | // lea REG, [esp+0x12] 288 | emit(0x8d, 0x84+u8(regs[gen.reg])*8, 0x24, splat(e.addr+12)) 289 | gen_push(regs[gen.reg]) 290 | } 291 | // push p_fmt_addr 292 | // call DWORD PTR ds:imp_scanf 293 | // add esp, 0x8 294 | emit(0x68, splat(p_fmt_addr)) 295 | emit(0xff, 0x15, splat(imp_scanf)) 296 | emit(0x83, 0xc4, 0x08) 297 | } 298 | 299 | gen_print :: proc() { 300 | gen.reg -= 1 301 | gen_push(regs[gen.reg]) 302 | // push p_fmt_addr 303 | // call DWORD PTR ds:imp_printf 304 | // add esp, 0x8 305 | emit(0x68, splat(p_fmt_addr)) 306 | emit(0xff, 0x15, splat(imp_printf)) 307 | emit(0x83, 0xc4, 0x08) 308 | } 309 | -------------------------------------------------------------------------------- /src/main.odin: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "core:os" 4 | import "core:fmt" 5 | 6 | main :: proc() { 7 | exe_name := "blaise" if len(os.args) == 0 else os.args[0] 8 | if len(os.args) < 2 { 9 | fmt.eprintln("%s usage: ", exe_name) 10 | } 11 | filename := os.args[1] 12 | compile(filename) 13 | 14 | // { 15 | // data, ok := os.read_entire_file("blaise.exe") 16 | // defer delete(data) 17 | // assert(ok) 18 | // pe_offset := (^u32)(&data[60])^ 19 | // pe := (^PEFileHeader)(&data[pe_offset]) 20 | // optional_header := (^OptionalHeader64)(&data[pe_offset + size_of(PEFileHeader)]) 21 | // fmt.printf("%v\n", pe) 22 | // fmt.printf("%#v\n", optional_header) 23 | // } 24 | } -------------------------------------------------------------------------------- /src/parse.odin: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "core:fmt" 4 | import "core:os" 5 | import "core:bytes" 6 | import "core:strconv" 7 | import "core:path/filepath" 8 | 9 | Entity :: struct { 10 | kind: Token_Kind, 11 | name: string, 12 | value: i32, 13 | addr: u32, 14 | is_global: bool, 15 | procedure: ^Procedure, 16 | } 17 | 18 | Entity_Table :: distinct map[string]Entity 19 | 20 | Procedure :: struct { 21 | parent: ^Procedure, 22 | name: string, 23 | addr: u32, 24 | local_count: u32, 25 | is_global: bool, 26 | entities: Entity_Table, 27 | } 28 | 29 | Checker :: struct { 30 | scopes: map[string]^Procedure, 31 | curr_procedure: ^Procedure, 32 | tokenizer: Tokenizer, 33 | prev_token: Token, 34 | curr_token: Token, 35 | 36 | filename: string, 37 | fatalf: proc(c: ^Checker, pos: Pos, format: string, args: ..any) -> !, 38 | } 39 | 40 | default_fatalf :: proc(c: ^Checker, pos: Pos, format: string, args: ..any) -> ! { 41 | fmt.eprintf("%s(%d:%d) ", c.filename, pos.line, pos.column) 42 | fmt.eprintf(format, ..args) 43 | fmt.eprintln() 44 | os.exit(1) 45 | } 46 | 47 | compile :: proc(filename: string) { 48 | imports := create_imports() 49 | bytes.buffer_write(&gen.code, imports) 50 | 51 | data, ok := os.read_entire_file(filename) 52 | assert(ok) 53 | defer delete(data) 54 | 55 | c := &Checker{} 56 | c.filename, _ = filepath.abs(filename) 57 | tokenizer_init(&c.tokenizer, string(data)) 58 | next(c) 59 | parse_program(c) 60 | 61 | gen_exit() 62 | 63 | buf := bytes.buffer_to_bytes(&gen.code) 64 | 65 | // fixup labels 66 | { 67 | b := buf[len(imports):] 68 | for label := gen.labels; label != nil; label = label.next { 69 | put_i32(b[label.pc:], i32(label.jmp)) 70 | } 71 | } 72 | 73 | res_path := fmt.aprintf("%s.exe", filepath.stem(filename)) 74 | 75 | write_exe(res_path, bytes.buffer_to_bytes(&gen.code), imports) 76 | } 77 | 78 | 79 | // Grammar related procedures 80 | 81 | 82 | next :: proc(c: ^Checker) -> (res: Token) { 83 | token, err := get_token(&c.tokenizer) 84 | if err != nil && token.kind != .EOF { 85 | c->fatalf(token.pos, "found invalid token: %v", err) 86 | } 87 | c.prev_token, c.curr_token = c.curr_token, token 88 | return c.prev_token 89 | } 90 | expect :: proc(c: ^Checker, kind: Token_Kind) -> Token { 91 | token := next(c) 92 | if token.kind != kind { 93 | c->fatalf(token.pos, "expected %q, got %s", token_string_table[kind], token_string(token)) 94 | } 95 | return token 96 | } 97 | allow :: proc(c: ^Checker, kind: Token_Kind) -> bool { 98 | if c.curr_token.kind == kind { 99 | next(c) 100 | return true 101 | } 102 | return false 103 | } 104 | peek :: proc(c: ^Checker) -> Token_Kind { 105 | return c.curr_token.kind 106 | } 107 | 108 | declare :: proc(c: ^Checker, pos: Pos, name: string, kind: Token_Kind, value: i32) { 109 | p := c.curr_procedure 110 | if name in p.entities { 111 | c->fatalf(pos, "redeclaration of '%s'", name) 112 | } 113 | addr: u32 114 | if p.is_global { 115 | addr = gen.data_p 116 | gen.data_p += 4 117 | gen_global_var(value) 118 | } else { 119 | addr = p.local_count * 4 120 | p.local_count += 1 121 | } 122 | 123 | p.entities[name] = Entity{ 124 | kind = kind, 125 | name = name, 126 | value = value, 127 | addr = addr, 128 | procedure = p, 129 | is_global = p.is_global, 130 | } 131 | } 132 | 133 | 134 | /* 135 | const_decl = "const" ident ":=" number {"," ident "=" number} ";" ; 136 | */ 137 | const_decl :: proc(c: ^Checker) { 138 | expect(c, .Const) 139 | for { 140 | name := expect(c, .Ident) 141 | expect(c, .Assign) 142 | val_tok := expect(c, .Integer) 143 | val, ok := strconv.parse_i64(val_tok.text) 144 | assert(ok) 145 | declare(c, name.pos, name.text, .Const, i32(val)) 146 | if allow(c, .Semicolon) { 147 | break 148 | } 149 | expect(c, .Comma) 150 | } 151 | } 152 | 153 | /* 154 | var_decl = "var" ident [":=" number] {"," ident [":=" number]} ";" ; 155 | */ 156 | var_decl :: proc(c: ^Checker) { 157 | expect(c, .Var) 158 | for { 159 | value := i32(0) 160 | name := expect(c, .Ident) 161 | if allow(c, .Assign) { 162 | val, ok := strconv.parse_i64(expect(c, .Integer).text) 163 | assert(ok) 164 | value = i32(val) 165 | } 166 | 167 | declare(c, name.pos, name.text, .Var, value) 168 | if allow(c, .Semicolon) { 169 | break 170 | } 171 | expect(c, .Comma) 172 | } 173 | } 174 | 175 | /* 176 | value_decls = { [const_decl] [var_decl] } ; 177 | */ 178 | value_decls :: proc(c: ^Checker) { 179 | for { 180 | #partial switch peek(c) { 181 | case .Const: 182 | const_decl(c) 183 | case .Var: 184 | var_decl(c) 185 | case: 186 | return 187 | } 188 | } 189 | } 190 | 191 | 192 | push_procedure :: proc(c: ^Checker, scope: string) -> ^Procedure { 193 | p := new_clone(Procedure{ 194 | entities = make(Entity_Table), 195 | name = scope, 196 | is_global = scope == "", 197 | parent = c.curr_procedure, 198 | }) 199 | c.scopes[scope] = p 200 | c.curr_procedure = p 201 | return p 202 | } 203 | 204 | pop_procedure :: proc(c: ^Checker) { 205 | c.curr_procedure = c.curr_procedure.parent 206 | } 207 | 208 | check_ident :: proc(c: ^Checker, tok: Token, is_assignment: bool) -> Entity { 209 | name := tok.text 210 | e, ok := c.curr_procedure.entities[name] 211 | if ok { 212 | if is_assignment && e.kind != .Var { 213 | c->fatalf(tok.pos, "expected a variable, got '%s'", name) 214 | } 215 | return e 216 | } 217 | e, ok = c.scopes[""].entities[name] 218 | if ok { 219 | if is_assignment && e.kind != .Var { 220 | c->fatalf(tok.pos, "expected a variable, got '%s'", name) 221 | } 222 | return e 223 | } 224 | c->fatalf(tok.pos, "undeclared name '%s'", name) 225 | return e 226 | } 227 | 228 | /* 229 | factor = ident | number | "(" expression ")"; 230 | */ 231 | factor :: proc(c: ^Checker) { 232 | tok := next(c) 233 | #partial switch tok.kind { 234 | case .Ident: 235 | e := check_ident(c, tok, false) 236 | gen_ident(c, e) 237 | case .Integer: 238 | val, ok := strconv.parse_i64(tok.text) 239 | assert(ok) 240 | gen_imm(c, i32(val)) 241 | case .Open_Paren: 242 | expression(c) 243 | expect(c, .Close_Paren) 244 | case: 245 | c->fatalf(tok.pos, "invalid factor, got %s", token_string(tok)) 246 | } 247 | } 248 | 249 | /* 250 | term = factor {("*"|"/"|"%") factor}; 251 | */ 252 | term :: proc(c: ^Checker) { 253 | factor(c) 254 | for { 255 | op := peek(c) 256 | #partial switch op { 257 | case .Mul: next(c); factor(c); gen_mul() 258 | case .Div: next(c); factor(c); gen_div() 259 | case .Mod: next(c); factor(c); gen_mod() 260 | case: return 261 | } 262 | } 263 | } 264 | 265 | /* 266 | expression = ["+"|"-"] term {["+"|"-"] term}; 267 | */ 268 | expression :: proc(c: ^Checker) { 269 | neg := false 270 | if allow(c, .Sub) { 271 | neg = true 272 | } else if allow(c, .Add) { 273 | // 274 | } 275 | term(c) 276 | if neg { 277 | gen_neg() 278 | } 279 | for { 280 | op := peek(c) 281 | if op != .Add && op != .Sub { 282 | break 283 | } 284 | next(c) 285 | term(c) 286 | if op == .Add { 287 | gen_add() 288 | } else { 289 | gen_sub() 290 | } 291 | } 292 | } 293 | 294 | /* 295 | condition = "odd" expression | 296 | expression ("="|"!="|"<"|"<="|">"|">=") expression; 297 | */ 298 | @(require_results) 299 | condition :: proc(c: ^Checker) -> ^Label { 300 | if allow(c, .Odd) { 301 | expression(c) 302 | return gen_odd() 303 | } 304 | 305 | expression(c) 306 | cond := next(c) 307 | expression(c) 308 | 309 | #partial switch cond.kind { 310 | case .Eq, .NotEq, .Lt, .LtEq, .Gt, .GtEq: 311 | // okay 312 | case: 313 | c->fatalf(cond.pos, "comparison operator expected, got %s", token_string(cond)) 314 | } 315 | return gen_cond(cond.kind) 316 | } 317 | 318 | /* 319 | procedure_body = "{" value_decls statement_list "}"; 320 | */ 321 | procedure_body :: proc(c: ^Checker) { 322 | expect(c, .Open_Brace) 323 | 324 | value_decls(c) 325 | c.curr_procedure.addr = gen.pc 326 | gen_proc(c.curr_procedure, .Head) 327 | 328 | for peek(c) != .Close_Brace { 329 | statement(c) 330 | if !allow(c, .Semicolon) { 331 | break 332 | } 333 | } 334 | expect(c, .Close_Brace) 335 | gen_proc(c.curr_procedure, .Tail) 336 | } 337 | 338 | /* 339 | statement_list = statement {";" statement} [";"] ; 340 | */ 341 | statement_list :: proc(c: ^Checker, end: Token_Kind) { 342 | for peek(c) != end { 343 | statement(c) 344 | if !allow(c, .Semicolon) { 345 | break 346 | } 347 | } 348 | } 349 | 350 | /* 351 | block = "{" statement {";" statement} [";"] "}"; 352 | */ 353 | block :: proc(c: ^Checker, ignore_begin := false) { 354 | if !ignore_begin { 355 | expect(c, .Open_Brace) 356 | } 357 | statement_list(c, .Close_Brace) 358 | expect(c, .Close_Brace) 359 | } 360 | 361 | /* 362 | statement = [ ident ":=" expression 363 | | "call" ident 364 | | "input" ident 365 | | "print" ident 366 | | "if" condition block [ "else" statement ] 367 | | "while" condition block 368 | | "repeat" block "while" condition 369 | | block ]; 370 | */ 371 | statement :: proc(c: ^Checker) { 372 | check_call :: proc(c: ^Checker, tok: Token) -> ^Procedure { 373 | p, ok := c.scopes[tok.text] 374 | if !ok { 375 | c->fatalf(tok.pos, "undeclared procedure '%s'", tok.text) 376 | } 377 | return p 378 | } 379 | 380 | tok := next(c) 381 | #partial switch tok.kind { 382 | case .Ident: 383 | // ident ":=" expression 384 | e := check_ident(c, tok, true) 385 | expect(c, .Assign) 386 | expression(c) 387 | gen_assignment(e) 388 | 389 | case .Call: 390 | // "call" ident 391 | procedure := expect(c, .Ident) 392 | p := check_call(c, procedure) 393 | gen_call(p) 394 | case .Input: 395 | // "input" ident 396 | e := check_ident(c, expect(c, .Ident), true) 397 | gen_input(e) 398 | case .Print: 399 | // "print" ident 400 | expression(c) 401 | gen_print() 402 | case .If: 403 | // "if" condition block [ "else" statement ] 404 | label := condition(c) 405 | block(c) 406 | gen_begin_label(label) 407 | if allow(c, .Else) { 408 | #partial switch peek(c) { 409 | case .If, .Open_Brace: 410 | statement(c) 411 | case: 412 | c->fatalf(tok.pos, "expected an if statement or block after 'else'") 413 | } 414 | } 415 | case .While: 416 | // "while" condition block 417 | wpc := gen.pc 418 | label := condition(c) 419 | block(c) 420 | gen_jmp(wpc) 421 | gen_begin_label(label) 422 | case .Repeat: 423 | // "repeat" block "while" condition 424 | rpc := gen.pc 425 | block(c) 426 | expect(c, .While) 427 | label := condition(c) 428 | gen_jmp(rpc) 429 | gen_begin_label(label) 430 | 431 | case .Var, .Const, .Proc: 432 | if c.curr_procedure.is_global { 433 | c->fatalf(tok.pos, "'%s' declarations must be at the top of the file", tok.text) 434 | } else { 435 | c->fatalf(tok.pos, "'%s' declarations must be at the top of the procedure's block", tok.text) 436 | } 437 | case .Open_Brace: 438 | // block 439 | block(c, true) 440 | case: 441 | c->fatalf(tok.pos, "invalid statement, got %s", token_string(tok)) 442 | } 443 | } 444 | 445 | /* 446 | procedure = "proc" ident procedure_body ";" ; 447 | */ 448 | procedure :: proc(c: ^Checker) { 449 | expect(c, .Proc) 450 | name := expect(c, .Ident) 451 | scope := name.text 452 | if scope in c.scopes { 453 | c->fatalf(name.pos, "%s redeclared", scope) 454 | } 455 | push_procedure(c, scope) 456 | procedure_body(c) 457 | pop_procedure(c) 458 | expect(c, .Semicolon) 459 | } 460 | 461 | /* 462 | program = { [const_decl] [var_decl] [procedure] } 463 | statement {";" statement} [";"] EOF ; 464 | */ 465 | parse_program :: proc(c: ^Checker) { 466 | if c.fatalf == nil { 467 | c.fatalf = default_fatalf 468 | } 469 | 470 | global := push_procedure(c, "") 471 | defer pop_procedure(c) 472 | 473 | decls: for { 474 | #partial switch peek(c) { 475 | case .Const: 476 | const_decl(c) 477 | case .Var: 478 | var_decl(c) 479 | case .Proc: 480 | procedure(c) 481 | case: 482 | break decls 483 | } 484 | } 485 | 486 | gen.ep = TEXT_BASE + IMPORT_SIZE + gen.pc 487 | 488 | assert(c.curr_procedure == global) 489 | 490 | statement_list(c, .EOF) 491 | 492 | allow(c, .Semicolon) 493 | expect(c, .EOF) 494 | } 495 | 496 | 497 | /* 498 | 499 | program = { [const_decl] [var_decl] [procedure] } 500 | statement_list EOF ; 501 | 502 | statement_list = statement {";" statement} [";"] ; 503 | 504 | const_decl = "const" ident ":=" number {"," ident "=" number} ";" ; 505 | 506 | var_decl = "var" ident [":=" number] {"," ident [":=" number]} ";" ; 507 | 508 | value_decls = { [const_decl] [var_decl] } ; 509 | 510 | procedure = "proc" ident "{" value_decls statement_list "}" ";" ; 511 | 512 | statement = [ ident ":=" expression 513 | | "call" ident 514 | | "input" ident 515 | | "print" ident 516 | | "if" condition block [ "else" statement ] 517 | | "while" condition block 518 | | "repeat" block "while" condition 519 | | block ]; 520 | 521 | expression = ["+"|"-"] term {["+"|"-"] term}; 522 | 523 | term = factor {("*"|"/"|"%") factor}; 524 | 525 | factor = ident | number | "(" expression ")"; 526 | 527 | condition = "odd" expression | 528 | expression ("="|"!="|"<"|"<="|">"|">=") expression; 529 | 530 | */ -------------------------------------------------------------------------------- /src/pe.odin: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "core:os" 4 | import "core:bytes" 5 | 6 | imp_exit: u32 7 | imp_scanf: u32 8 | imp_printf: u32 9 | 10 | s_fmt_addr: u32 11 | p_fmt_addr: u32 12 | 13 | IMPORT_SIZE :: 100 14 | TEXT_BASE :: 0x1000 15 | IMAGE_BASE :: 0x400000 16 | 17 | imports_buf: [512]byte 18 | 19 | create_imports :: proc() -> (imports: []byte) { 20 | imports = imports_buf[:] 21 | 22 | put_i32(imports[12:], TEXT_BASE+56) // name rva 23 | put_i32(imports[16:], TEXT_BASE+40) // first thunk 24 | // thunk array 25 | put_i32(imports[40:], TEXT_BASE+67) 26 | put_i32(imports[44:], TEXT_BASE+74) 27 | put_i32(imports[48:], TEXT_BASE+82) 28 | 29 | copy(imports[56:], "msvcrt.dll\x00") 30 | copy(imports[67:], "\x00\x00exit\x00") 31 | copy(imports[74:], "\x00\x00scanf\x00") 32 | copy(imports[82:], "\x00\x00printf\x00") 33 | copy(imports[91:], "%d\x00") 34 | copy(imports[94:], "%d\n\x00") 35 | 36 | imports = imports[:IMPORT_SIZE] 37 | 38 | imp_exit = IMAGE_BASE + TEXT_BASE + 40 39 | imp_scanf = IMAGE_BASE + TEXT_BASE + 44 40 | imp_printf = IMAGE_BASE + TEXT_BASE + 48 41 | 42 | s_fmt_addr = IMAGE_BASE + TEXT_BASE + 91 43 | p_fmt_addr = IMAGE_BASE + TEXT_BASE + 94 44 | 45 | return 46 | } 47 | 48 | write_exe :: proc(filename: string, code: []byte, imports: []byte) { 49 | write_padding :: proc(b: ^bytes.Buffer, n: int) { 50 | for _ in 0.. string { 133 | if tok.kind == .Semicolon && tok.text == "\n" { 134 | return "newline" 135 | } 136 | return token_string_table[tok.kind] 137 | } 138 | 139 | 140 | tokenizer_init :: proc(t: ^Tokenizer, data: string) { 141 | t^ = Tokenizer{pos = {line=1}, data = data} 142 | next_rune(t) 143 | if t.r == utf8.RUNE_BOM { 144 | next_rune(t) 145 | } 146 | } 147 | 148 | next_rune :: proc(t: ^Tokenizer) -> rune #no_bounds_check { 149 | if t.offset >= len(t.data) { 150 | t.r = utf8.RUNE_EOF 151 | } else { 152 | t.offset += t.w 153 | t.r, t.w = utf8.decode_rune_in_string(t.data[t.offset:]) 154 | t.pos.column = t.offset - t.curr_line_offset 155 | if t.offset >= len(t.data) { 156 | t.r = utf8.RUNE_EOF 157 | } 158 | } 159 | return t.r 160 | } 161 | 162 | 163 | get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { 164 | skip_whitespace :: proc(t: ^Tokenizer, on_newline: bool) { 165 | loop: for t.offset < len(t.data) { 166 | switch t.r { 167 | case ' ', '\t', '\v', '\f', '\r': 168 | next_rune(t) 169 | case '\n': 170 | if on_newline { 171 | break loop 172 | } 173 | t.line += 1 174 | t.curr_line_offset = t.offset 175 | t.pos.column = 1 176 | next_rune(t) 177 | case: 178 | switch t.r { 179 | case 0x2028, 0x2029, 0xFEFF: 180 | next_rune(t) 181 | continue loop 182 | } 183 | break loop 184 | } 185 | } 186 | } 187 | 188 | skip_whitespace(t, t.insert_semicolon) 189 | 190 | token.pos = t.pos 191 | 192 | token.kind = .Invalid 193 | 194 | curr_rune := t.r 195 | next_rune(t) 196 | 197 | block: switch curr_rune { 198 | case utf8.RUNE_ERROR: 199 | err = .Illegal_Character 200 | 201 | case utf8.RUNE_EOF, '\x00': 202 | token.kind = .EOF 203 | err = .EOF 204 | 205 | case '\n': 206 | // If this is reached, treat a newline as if it is a semicolon 207 | t.insert_semicolon = false 208 | token.text = "\n" 209 | token.kind = .Semicolon 210 | t.line += 1 211 | t.curr_line_offset = t.offset 212 | t.pos.column = 1 213 | return 214 | 215 | case 'A'..='Z', 'a'..='z', '_': 216 | token.kind = .Ident 217 | 218 | for t.offset < len(t.data) { 219 | switch t.r { 220 | case 'A'..='Z', 'a'..='z', '0'..='9', '_': 221 | next_rune(t) 222 | continue 223 | } 224 | break 225 | } 226 | 227 | // This could easily be a `map[string]Token_Kind` 228 | switch str := string(t.data[token.offset:t.offset]); str { 229 | case "var": token.kind = .Var 230 | case "const": token.kind = .Const 231 | case "proc": token.kind = .Proc 232 | 233 | case "input": token.kind = .Input 234 | case "print": token.kind = .Print 235 | case "call": token.kind = .Call 236 | case "odd": token.kind = .Odd 237 | 238 | case "if": token.kind = .If 239 | case "else": token.kind = .Else 240 | case "while": token.kind = .While 241 | case "repeat": token.kind = .Repeat 242 | } 243 | 244 | case '0'..='9': 245 | token.kind = .Integer 246 | if curr_rune == '0' && (t.r == 'x' || t.r == 'X') { 247 | next_rune(t) 248 | for t.offset < len(t.data) { 249 | switch t.r { 250 | case '0'..='9', 'a'..='f', 'A'..='F': 251 | next_rune(t) 252 | continue 253 | } 254 | break 255 | } 256 | break 257 | } 258 | 259 | for t.offset < len(t.data) && '0' <= t.r && t.r <= '9' { 260 | next_rune(t) 261 | } 262 | 263 | 264 | case ':': 265 | token.kind = .Colon 266 | if t.r == '=' { 267 | next_rune(t) 268 | token.kind = .Assign 269 | } 270 | 271 | case '+': token.kind = .Add 272 | case '-': token.kind = .Sub 273 | case '*': token.kind = .Mul 274 | case '%': token.kind = .Mod 275 | 276 | case '.': token.kind = .Period 277 | case ',': token.kind = .Comma 278 | case ';': token.kind = .Semicolon 279 | case '{': token.kind = .Open_Brace 280 | case '}': token.kind = .Close_Brace 281 | case '(': token.kind = .Open_Paren 282 | case ')': token.kind = .Close_Paren 283 | 284 | case '=': token.kind = .Eq 285 | case '<': 286 | token.kind = .Lt 287 | if t.r == '=' { 288 | next_rune(t) 289 | token.kind = .LtEq 290 | } 291 | case '>': 292 | token.kind = .Gt 293 | if t.r == '=' { 294 | next_rune(t) 295 | token.kind = .GtEq 296 | } 297 | case '!': 298 | token.kind = .Invalid 299 | if t.r == '=' { 300 | next_rune(t) 301 | token.kind = .NotEq 302 | } 303 | 304 | case '/': 305 | token.kind = .Div 306 | 307 | switch t.r { 308 | case '/': 309 | // Single-line comments 310 | for t.offset < len(t.data) { 311 | r := next_rune(t) 312 | if r == '\n' { 313 | break 314 | } 315 | } 316 | return get_token(t) 317 | case '*': 318 | // None-nested multi-line comments 319 | for t.offset < len(t.data) { 320 | next_rune(t) 321 | if t.r == '*' { 322 | next_rune(t) 323 | if t.r == '/' { 324 | next_rune(t) 325 | return get_token(t) 326 | } 327 | } 328 | } 329 | err = .EOF 330 | } 331 | 332 | case: 333 | err = .Illegal_Character 334 | } 335 | 336 | #partial switch token.kind { 337 | case .Invalid: 338 | // preserve insert_semicolon info 339 | 340 | case .EOF, .Semicolon: 341 | t.insert_semicolon = false 342 | 343 | case .Ident, .Integer, 344 | .Close_Brace, .Close_Paren: 345 | t.insert_semicolon = true 346 | 347 | case: 348 | t.insert_semicolon = false 349 | } 350 | 351 | token.text = string(t.data[token.offset : t.offset]) 352 | return 353 | } 354 | -------------------------------------------------------------------------------- /src/util.odin: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "core:encoding/endian" 4 | 5 | put_i32 :: #force_inline proc(b: []byte, v: i32) { 6 | endian.put_i32(b, .Little, v) 7 | } 8 | 9 | padding :: proc(num, pad: int) -> int { 10 | if num%pad == 0 { 11 | return 0 12 | } 13 | return pad - num%pad 14 | } 15 | --------------------------------------------------------------------------------