├── .gitignore ├── .inferconfig ├── LICENSE.txt ├── README.md ├── build.in.ninja ├── build.sh ├── config.sh ├── dev.sh ├── docs ├── calling-convention.md ├── elf-spec-v1.1.txt ├── elf-spec-v1.2.pdf ├── elf │ ├── header.html │ ├── index.html │ ├── intro.html │ ├── lddynamic.html │ ├── ldheader.html │ ├── ldintro.html │ ├── relocation.html │ ├── sections.html │ ├── string-table.html │ └── symbol-table.html ├── guiding-principles.md ├── ir-if-cond-gen-elseb.txt ├── link-thoughts.md ├── syntax │ ├── if.w │ ├── structname.w │ └── ttype.w ├── the-zen-of-python.txt ├── typeid.md └── x86-64-register-encodings.txt ├── example ├── consts.w ├── factorial.w ├── future-borrow-move.w ├── future-lambda.w ├── future-ownership.w ├── future-type-functions-generics.w ├── future-type-variants.w ├── future-where.w └── mem.w ├── experimental └── x86_64-backend │ ├── asm.c │ ├── asm.h │ ├── elf │ ├── builder.c │ ├── builder.h │ ├── builder_asm.c │ ├── elf.h │ ├── file.c │ └── file.h │ ├── elf64.c │ ├── elf64.h │ └── x86-64.c ├── misc ├── asmlab │ ├── hello-c.c │ ├── hello-c.elf.dis.txt │ ├── hello-c.s │ ├── hello1.elf.dis.txt │ ├── hello1.s │ ├── hello1.sh │ ├── mini1-32.elf.dis.txt │ ├── mini1-32.s │ ├── mini1.elf.dis.txt │ ├── mini1.s │ ├── mini2.elf.dis.txt │ ├── mini2.s │ └── start-linux-vm.sh ├── etc │ └── mini2.s ├── filter-compdb.py ├── gen_ops.py ├── gen_parselet_map.py └── test-asm-out.sh ├── src ├── build │ ├── build.h │ ├── buildctx.c │ ├── source.c │ └── source.h ├── common │ ├── array.c │ ├── array.h │ ├── array_test.c │ ├── assert.c │ ├── assert.h │ ├── buf.c │ ├── buf.h │ ├── defs.h │ ├── dlmalloc.c │ ├── dlmalloc.h │ ├── hash.c │ ├── hash.h │ ├── hashmap.c.h │ ├── hashmap.h │ ├── memory.c │ ├── memory.h │ ├── os.c │ ├── os.h │ ├── ptrmap.c │ ├── ptrmap.h │ ├── rbtree.c.h │ ├── sds.c │ ├── sds.h │ ├── sds_test.c │ ├── str.c │ ├── str.h │ ├── test.c │ ├── test.h │ ├── thread.c │ ├── thread.h │ ├── thread_pthread.c.h │ ├── thread_pthread.h │ ├── tstyle.c │ ├── tstyle.h │ ├── unicode.c │ └── unicode.h ├── convlit.c ├── convlit.h ├── ir │ ├── arch_base.lisp │ ├── block.c │ ├── builder.c │ ├── builder.h │ ├── constcache.c │ ├── fun.c │ ├── ir.h │ ├── op.c │ ├── op.h │ ├── pkg.c │ ├── repr.c │ └── value.c ├── main.c ├── parse │ ├── ast.c │ ├── ast.h │ ├── parse.c │ ├── parse.h │ ├── parseint.c │ ├── parseint.h │ ├── resolve_sym.c │ ├── resolve_type.c │ ├── scan.c │ ├── scan.h │ ├── token.c │ └── token.h ├── sym.c ├── sym.h ├── typeid.c ├── typeid.h ├── types.c └── types.h └── test ├── emptyfile └── file-no-final-line-break /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.o 3 | *.a 4 | *.d 5 | *.gz 6 | *.elf 7 | *.elf.hex 8 | /*.sublime-* 9 | /_local 10 | /build 11 | /build.ninja 12 | /builddeps 13 | * copy 14 | -------------------------------------------------------------------------------- /.inferconfig: -------------------------------------------------------------------------------- 1 | // Config file for infer (https://fbinfer.com/) 2 | // - for switches options, the value is a JSON boolean (true or false, without quotes) 3 | // - for integers, the value is a JSON integer (without quotes) 4 | // - string options have string values 5 | // - path options have string values, and are interpreted relative to the location of the 6 | // .inferconfig file 7 | // - cumulative options are JSON arrays of the appropriate type 8 | // 9 | // Note: This JSON flavor supports comments but not extra trailing commas. 10 | { 11 | "cxx": false, // don't analyze C++ 12 | "results-dir": "build/infer", 13 | "skip-analysis-in-path": [ "src/dlmalloc.c" ], 14 | "disable-issue-type": ["DEAD_STORE"] 15 | } 16 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | ISC License 2 | 3 | Copyright (c) 2019-2020, Rasmus Andersson 4 | 5 | Permission to use, copy, modify, and/or distribute this software for any 6 | purpose with or without fee is hereby granted, provided that the above 7 | copyright notice and this permission notice appear in all copies. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # wlang 2 | 3 | Programming language in development 4 | 5 | ```txt 6 | ./dev.sh example/factorial.w 7 | ``` 8 | 9 | Notes: 10 | 11 | - Implemented in C11. 12 | - Uses dlmalloc instead of libc malloc, making it portable to wasm etc. 13 | - `./dev.sh []` — build and run product (incremental) 14 | - `./dev.sh -lldb []` — build and run product in debugger (incremental) 15 | - `./dev.sh -analyze` — run incremental code analyzer on uncommited changes (incremental) 16 | - `./build.sh` — build release product and exit 17 | - `./build.sh -g` — build debug product and exit 18 | - `./build.sh -analyze` — analyze entire project using ([Infer](https://fbinfer.com/)) 19 | - `./build.sh -test` — build & run all tests and generate code coverage reports. 20 | - Debug products are built with Clang address sanitizer by default. 21 | To disable asan/msan, edit the `build.in.ninja` file. 22 | 23 | Requirements for building: 24 | 25 | - [clang](https://clang.llvm.org/) version >=7 26 | - [Ninja](https://ninja-build.org/) version >=1.2 27 | - Bash or a bash-compatible shell, for running the build scripts 28 | - [Python 3](https://www.python.org/) used for code generation 29 | - [Infer](https://fbinfer.com/) used for code analysis (optional) 30 | 31 | If you're on macOS, install everything you need with `brew install clang python ninja infer`. 32 | -------------------------------------------------------------------------------- /build.in.ninja: -------------------------------------------------------------------------------- 1 | ninja_required_version = 1.3 2 | 3 | builddir = build 4 | 5 | cflags = $ 6 | -std=c11 $ 7 | -Wall -Wuninitialized -Wmissing-field-initializers -Wconditional-uninitialized $ 8 | -Wno-nullability-completeness -Wno-unused-function $ 9 | -fcolor-diagnostics 10 | 11 | lflags = 12 | 13 | cflags_opt = $cflags -Oz -DNDEBUG 14 | lflags_opt = $lflags -O3 -flto 15 | 16 | cflags_dev = $cflags -O0 -g -DDEBUG 17 | lflags_dev = $lflags -g -O0 -fno-lto 18 | 19 | cflags_test = $cflags_dev -DW_TEST_BUILD -fprofile-instr-generate -fcoverage-mapping 20 | lflags_test = $lflags_dev -fprofile-instr-generate -fcoverage-mapping 21 | 22 | # https://clang.llvm.org/docs/AddressSanitizer.html 23 | # 24 | # -fno-omit-frame-pointer 25 | # Leave frame pointers. Allows the fast unwinder to function properly. 26 | # 27 | # -fno-optimize-sibling-calls 28 | # Disable tail calls to improve stack traces 29 | # 30 | cflags_dev_asan = $cflags_dev $ 31 | -fsanitize=address $ 32 | -fsanitize-address-use-after-scope $ 33 | -fno-omit-frame-pointer $ 34 | -fno-optimize-sibling-calls 35 | lflags_dev_asan = $lflags_dev -fsanitize=address 36 | 37 | rule compile_obj 38 | command = clang -MMD -MF $out.d $cflags -c -o $out $in 39 | depfile = $out.d 40 | 41 | rule link 42 | command = clang $lflags -o $out $in 43 | 44 | rule gen_ops 45 | command = python3 misc/gen_ops.py 46 | 47 | rule gen_parselet_map 48 | command = python3 misc/gen_parselet_map.py $out 49 | 50 | 51 | CONFIG_REPLACE_BUILDS 52 | 53 | build src/ir/op.c: gen_ops src/ir/arch_base.lisp src/types.h src/parse/token.h 54 | build $builddir/gen_parselet_map.marker: gen_parselet_map src/parse/parse.c 55 | 56 | build release: phony | $builddir/gen_parselet_map.marker $builddir/wp 57 | build debug: phony | $builddir/gen_parselet_map.marker $builddir/wp.g 58 | build test: phony | $builddir/gen_parselet_map.marker $builddir/wp.test 59 | 60 | default debug 61 | -------------------------------------------------------------------------------- /config.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | cd "$(dirname "$0")" 3 | 4 | sources=() 5 | for f in $(find src -type f -name '*.c' | sort); do 6 | sources+=( $f ) 7 | done 8 | 9 | # : 10 | # Each name should have corresponding $cflags_ and $lflags_ defined in build.in.ninja 11 | products=( \ 12 | opt:wp \ 13 | dev:wp.g \ 14 | test:wp.test \ 15 | ) 16 | 17 | builddir=build 18 | TF=$builddir/.build.ninja 19 | mkdir -p "$builddir" 20 | rm -f "$TF" 21 | touch "$TF" 22 | 23 | for product in ${products[@]}; do 24 | TUPLE=(${product//:/ }) 25 | name=${TUPLE[0]} 26 | exe=${TUPLE[1]} 27 | objects=() 28 | 29 | echo "# --------------------------------------------------" >> "$TF" 30 | echo "# ${name} (\$builddir/${exe})" >> "$TF" 31 | 32 | for srcfile in ${sources[@]}; do 33 | 34 | # only include *_test.c files in the "test" target 35 | if [[ "$srcfile" == *"_test.c" ]] && [[ "$name" != "test" ]]; then 36 | # echo "skip test $srcfile for target $name" 37 | continue 38 | fi 39 | 40 | objfile=$(dirname "$srcfile")/$(basename "$srcfile" .c).o 41 | objfile=\$builddir/obj/${name}/${objfile//src\//} 42 | objects+=( "$\n ${objfile}" ) 43 | 44 | echo "build ${objfile}: compile_obj $srcfile" >> "$TF" 45 | echo " cflags = \$cflags_${name}" >> "$TF" 46 | done 47 | 48 | echo -e "build \$builddir/${exe}: link ${objects[@]}" >> "$TF" 49 | echo " lflags = \$lflags_${name}" >> "$TF" 50 | echo "" >> "$TF" 51 | done 52 | 53 | sed -E "/CONFIG_REPLACE_BUILDS/r $TF" "build.in.ninja" \ 54 | | sed -E "/CONFIG_REPLACE_BUILDS/d" \ 55 | > build.ninja 56 | 57 | rm "$TF" 58 | -------------------------------------------------------------------------------- /docs/calling-convention.md: -------------------------------------------------------------------------------- 1 | # Calling convention 2 | 3 | Considerations: 4 | 5 | - Where are arguments stored? (Stack, registers, which registers?) 6 | - Where are return values stored? 7 | - Are registers caller-save, callee-save or a hybrid? 8 | 9 | ## Where are arguments & return values stored? 10 | 11 | Go stores all arguments and return values on the stack and does not use registers at all. 12 | The Go team have discussed changing this and make use of registers. 13 | They approximate a 5-10%[^1] performance gain. 14 | However, the Go authors recognize that there are some considerable downsides to passing 15 | arguments in registers: It is more complex has a higher implementation and maintainenance cost 16 | for the compiler. It also makes Go's stack traces—which includes arguments values—really 17 | tricky to implement (since past frame's argument values would be over-written.) 18 | [There's a discussion on github/golang/go.](https://github.com/golang/go/issues/18597) 19 | 20 | It's worth noting that Go decided to not make this change and stick with pure stack use. 21 | Except from increased complexity, the reasons were grounded in legacy. 22 | 23 | Most programming languages and VMs makes use of registers for arguments and return values 24 | because in practice, even with pure stack calling, registers are clobbered and need to be 25 | saved anyway. An example: 26 | 27 | fun foo(a, b, c int) int { a + c * d } 28 | fun bar(x, y int) int { 29 | foo(x + y, x * y, 2) + 10 30 | } 31 | fun main -> bar(1, 2) 32 | 33 | SSA IR: 34 | 35 | fun foo (int,int,int)->int 36 | v0 = arg 0 # a 37 | v1 = arg 1 # b 38 | v2 = arg 2 # c 39 | v3 = mul v2 v1 # c * d 40 | v4 = add v0 v3 # tmp' = a + v3 41 | ret v4 42 | 43 | fun bar (int,int)->int 44 | v0 = arg 0 # x 45 | v1 = arg 1 # y 46 | v2 = add v0 v1 # x + y 47 | v3 = mul v0 v1 # x * y 48 | v4 = const 2 49 | v5 = params v2, v3, v4 50 | v6 = call foo v5 51 | v7 = result 0 52 | ret v7 53 | 54 | fun main 55 | v0 = const 1 56 | v1 = const 2 57 | v2 = params v0, v1 58 | v3 = call bar v2 59 | ret 60 | 61 | Looking at the main function calling to `bar`, here's what it looks like with arguments and 62 | return values on the stack: 63 | 64 | bar: # (int,int)->int 65 | | stack now looks like this: 66 | | 100-96 1 | arg "x", 4 bytes 67 | | 96-92 2 | arg "y", 4 bytes 68 | | 92-84 return address | 8 bytes (64-bit) 69 | | -- 84 ---- ---------------- 70 | | 71 | push %rbp | save stack pointer; store value of rbp to stack 72 | mov %rsp, %rbp | make stack pointer the base pointer 73 | mov 16(%rbp), %rax | load "x" argument into rax (84+16 = 100) 74 | mov 12(%rbp), %rbx | load "y" argument into rbx (84+12 = 96) 75 | mov %rbx, %rcx | copy "y" so that "add" does not over-write it 76 | add %rax, %rcx | x + y -> rcx 77 | mul %rax, %rbx | x * y -> rbx (over-writes "y") 78 | mov $2 %rax | store constant 2 in rax (over-writes "x") 79 | | At this point: rcx=v2, rbx=v3, rax=v4 80 | mov %rcx, -4(%rbp) | store v2 on stack 81 | mov %rbx, -8(%rbp) | store v3 on stack 82 | mov %rax, -12(%rbp) | store v4 on stack 83 | call foo 84 | mov -12(%rbp), %rax | load result from stack into rax 85 | add $10, %rax | + 10 -> rax 86 | mov %rax, 4(%rbp) | store result value to stack 87 | ret 88 | 89 | main: 90 | mov $1, -4(%rbp) # store "x" argument on stack 91 | mov $2, -8(%rbp) # store "y" argument on stack 92 | | 93 | | stack now looks like this: 94 | | -- 100 ---- ---------------- 95 | | 100-96 1 | arg "x", 4 bytes 96 | | 96-92 2 | arg "y", 4 bytes 97 | | 98 | call bar 99 | | ignore return value 100 | ret 101 | 102 | 103 | Pros & cons, pure stack vs registers: 104 | - (+stack) Simple implementation 105 | - (+stack) Portable 106 | - In practice assembly and lowered IR is not portable for other reasons. 107 | - "Portable" here means that the stragegy and code generation does not need 108 | to be customized for different machine targets. 109 | - (+regs) Performace 110 | - (+regs) Uses less memory 111 | 112 | main: 113 | push $1 # store 1 at (rsp), increment rsp 114 | push $2 # store 1 at (rsp), increment rsp 115 | call main 116 | add $8, %rsp # move stack pointer back (2*4 = 2*sizeof(int)) 117 | ret 118 | 119 | [^1]: [Proposal: Passing Go arguments and results in registers](https://gist.github.com/dr2chase/5a1107998024c76de22e122ed836562d), also referenced in [go review tracker (stale)](https://go-review.googlesource.com/c/proposal/+/35054/) 120 | -------------------------------------------------------------------------------- /docs/elf-spec-v1.2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsms/wlang/03eb8e72eaacfa451be06cf398762a70cd7b30c8/docs/elf-spec-v1.2.pdf -------------------------------------------------------------------------------- /docs/elf/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ELF 5 | 6 | 7 | 8 |

Object files

9 | 10 | 15 | 20 | 24 | 27 | 31 | 35 | 36 | 37 |

Program loading and dynamic linking

38 | 39 | 42 | 49 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /docs/elf/ldintro.html: -------------------------------------------------------------------------------- 1 | 2 | Program loading and dynamic linking

3 |

Introduction to Program loading and dynamic linking

4 | This section describes the object file 5 | information and system actions that create running programs. 6 | Some information here applies to all systems; 7 | information specific to one processor resides in 8 | sections marked accordingly. 9 |

10 | Executable and shared object files statically represent programs. 11 | To execute such programs, the system uses the files to create 12 | dynamic program representations, or process images. 13 | As section ''Virtual Address Space'' in Chapter 3 of the 14 | processor supplement describes, a process image has segments that 15 | hold its text, data, stack, and so on. This chapter's major sections 16 | discuss the following: 17 |

    18 |
  • 19 | Program Header. 20 | This section complements Chapter 4, describing 21 | object file structures that relate directly to program execution. 22 | The primary data structure, a program header table, locates 23 | segment images within the file and contains other information 24 | necessary to create the memory image for the program. 25 |
  • 26 | Program Loading. 27 | Given an object file, the system must load 28 | it into memory for the program to run. 29 |
  • 30 | Dynamic linking. 31 | After the system loads the program it must complete 32 | the process image by resolving symbolic references among the object 33 | files that compose the process. 34 |
35 |
36 | NOTE: 37 | The processor supplement defines a naming convention for ELF constants 38 | that have processor ranges specified. Names such as DT_, 39 | PT_, 40 | for processor specific extensions, incorporate the name of the 41 | processor: DT_M32_SPECIAL, for example. 42 | Pre-existing processor 43 | extensions not using this convention will be supported. 44 | 45 | 46 | 47 | 48 | 49 |
Pre-Existing Extensions
DT_JUMP_REL
50 |
51 | Previous 52 | Contents 53 | Next 54 |
55 | 56 | 57 | © 1997, 1998, 1999, 2000 The Santa Cruz Operation, Inc. All rights reserved. 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /docs/elf/string-table.html: -------------------------------------------------------------------------------- 1 | 2 | String Table

3 |

String Table

4 | String table sections hold null-terminated character sequences, 5 | commonly called strings. 6 | The object file uses these strings to represent symbol and section names. 7 | One references a string as an index into the 8 | string table section. 9 | The first byte, which is index zero, is defined to hold 10 | a null character. 11 | Likewise, a string table's last byte is defined to hold 12 | a null character, ensuring null termination for all strings. 13 | A string whose index is zero specifies 14 | either no name or a null name, depending on the context. 15 | An empty string table section is permitted; its section header's sh_size 16 | member would contain zero. 17 | Non-zero indexes are invalid for an empty string table. 18 |

19 | A section header's sh_name 20 | member holds an index into the section header string table 21 | section, as designated by the e_shstrndx 22 | member of the ELF header. 23 | The following figures show a string table with 25 bytes 24 | and the strings associated with various indexes. 25 |

26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 |
Index+0+1+2+3+4+5+6+7+8+9
0\0name.\0Var
10iable\0able
20\0\0xx\0 
74 |


75 | Figure 4-15: String Table Indexes 76 |

77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 |
IndexString
0none
1name.
7Variable
11able
16able
24null string
106 |


107 |

108 | As the example shows, a string table index may refer 109 | to any byte in the section. 110 | A string may appear more than once; 111 | references to substrings may exist; 112 | and a single string may be referenced multiple times. 113 | Unreferenced strings also are allowed. 114 |


115 | Previous 116 | Contents 117 | Next 118 |
119 | 120 | 121 | © 1997, 1998, 1999, 2000 The Santa Cruz Operation, Inc. All rights reserved. 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /docs/ir-if-cond-gen-elseb.txt: -------------------------------------------------------------------------------- 1 | fun main { 2 | z = if true { 3 | a = 4 # avoid block elimination while working on ir builder 4 | y = a + 1 5 | } else { 6 | 0 7 | } 8 | } 9 | 10 | package foo 11 | fun main () 0x10bb82370 12 | b0: 13 | v0 bool = ConstBool [0x1] # 1 use 14 | v1 int = ConstI32 [0x4] # 1 use ; a 15 | v2 int = ConstI32 [0x1] # 1 use 16 | v4 int = ConstI32 [0x0] # 1 use 17 | if v0 -> b1 b3 18 | 19 | b1: <- b0 # b0.then 20 | v3 int = AddI32 v1 v2 # 1 use ; y 21 | cont -> b3 22 | 23 | b3: <- b1 b0 # b0.end 24 | v5 int = Phi v3 v4 # 1 use ; z 25 | ret v5 26 | 27 | 28 | ———————————————————————————————————————————————————————————————————————————————————— 29 | 30 | fun main { 31 | z = if true { 32 | a = 4 # avoid block elimination while working on ir builder 33 | y = a + 1 34 | } 35 | } 36 | 37 | package foo 38 | fun main () 0x10897d370 39 | b0: 40 | v0 bool = ConstBool [0x1] # 1 use 41 | v1 int = ConstI32 [0x4] # 1 use ; a 42 | v2 int = ConstI32 [0x1] # 1 use 43 | v4 int = ConstI32 [0x0] # 1 use 44 | if v0 -> b1 b2 45 | 46 | b1: <- b0 # b0.then 47 | v3 int = AddI32 v1 v2 # 1 use ; y 48 | cont -> b2 49 | 50 | b2: <- b0 b1 # b0.end 51 | v5 int = Phi v3 v4 # 1 use ; z 52 | ret v5 53 | -------------------------------------------------------------------------------- /docs/link-thoughts.md: -------------------------------------------------------------------------------- 1 | Thoughts on linking code 2 | 3 | What if instead of object files, we were to maintain a graph database of all 4 | assembled code? 5 | 6 | Traditionally a C-like compiler will parse, compile and assemble each source file into 7 | an ELF/Mach-O/etc object file and 8 | finally—when all object files required for a program are available—link them all together 9 | by reading & parsing all these object files just to build a new object (exe) file. 10 | Here's an example of a simple program with four source files: 11 | 12 | main 13 | / | \ 14 | foo bar baz 15 | \ / 16 | util 17 | 18 | main requires foo, bar and baz. foo and bar both require util. 19 | In practice this is not a tree but a list: 20 | 21 | - main -> main.o 22 | - foo -> foo.o 23 | - bar -> bar.o 24 | - baz -> baz.o 25 | - util -> util.o 26 | 27 | A C-like compiler would link foo, bar, baz, util and main objects everytime any part changes. 28 | Say we only change baz, we take the cost of re-linking the tree of foo, bar & util. 29 | 30 | Imagine if these were represented as a tree even as linked objects, not just temporarily 31 | inside the compiler. Then we could link subtrees together: 32 | 33 | main = [main.o, foo+util+bar.o, baz.o] 34 | foo+util+bar.o = [foo.o, bar.o, util.o] 35 | foo+util.o = [foo.o, util.o] # Can be skipped; unused 36 | foo+util.o = [bar.o, util.o] # Can be skipped; unused 37 | 38 | If `baz` changes, we can reuse the subtree object `foo+util+bar.o` 39 | -------------------------------------------------------------------------------- /docs/syntax/if.w: -------------------------------------------------------------------------------- 1 | def foo(x): 2 | return false if !authenticated(x) 3 | true 4 | 5 | print(foo(0)) 6 | print(foo(8)) 7 | -------------------------------------------------------------------------------- /docs/syntax/structname.w: -------------------------------------------------------------------------------- 1 | type Account { id, flag int } #T {flag:iid:i} 2 | fun foo(a Account) { #T ^({flag:iid:i})i 3 | return 0 if a.flag == 0 4 | # ^ stmt if expr 5 | a.id 6 | } 7 | id, flag = 1, 0 8 | x = { id, flag } #T {flag:iid:i} 9 | foo(x) # ok 10 | y = Account { id } # flag is zero init 11 | foo(y) # ok 12 | foo({ id }) # ok. flag is zero init 13 | # inline type def: 14 | fun bar(a { id int }) {...} 15 | bar({ id }) # block expr or struct init? 16 | type User { 17 | account { id, flag int } # inline type def 18 | name str 19 | } 20 | u = { name: "Sam", account: { id: 3 } } 21 | foo(u.account) # ok 22 | 23 | compile(callback, config) where 24 | config = Config{ 25 | infile: "foo.w", 26 | debug: true, 27 | }, 28 | callback = fun (ev Event) -> log(ev) 29 | 30 | 31 | #---------------------- 32 | 33 | # struct exprs must be prefixed by type to 34 | # disambiguate from block expr 35 | a = { 3 } # block 36 | b = Account { id: 3 } # struct 37 | c = (type { id int }) { id: 3 } # struct 38 | type Account { id int } 39 | 40 | struct Account { id int } 41 | type Foo = struct { id int } 42 | c = (struct { id int }) { id: 3 } # struct 43 | 44 | type Account = { id int } 45 | c = (type _ = { id int }) { id: 3 } 46 | 47 | 48 | -------------------------------------------------------------------------------- /docs/syntax/ttype.w: -------------------------------------------------------------------------------- 1 | type Account = Twitter(handle str) 2 | | Google(email str, id int) 3 | | Local 4 | | Test 5 | # really just compiled to tuples 6 | def signIn(a Account): 7 | switch a 8 | Twitter(h): #... 9 | Google(email, id): #... 10 | Local | Test: #... 11 | # becomes 12 | def signIn(a (int,str)|(int,str,int),(int)): 13 | switch a[0] 14 | case 0: #... 15 | case 1: #... 16 | case 2: case 3: #... 17 | #---- generics w required type Name: 18 | type Vec3(T) = (T,T,T) 19 | a Vec3(int) = (1,1,0) 20 | b Bec3(float) = (1.0,1.0,0.0) 21 | c = (1,1,0) # == a 22 | -------------------------------------------------------------------------------- /docs/the-zen-of-python.txt: -------------------------------------------------------------------------------- 1 | > PEP 20 -- The Zen of Python 2 | > Aug 19, 2004 by Tim Peters 3 | 4 | Beautiful is better than ugly. 5 | Explicit is better than implicit. 6 | Simple is better than complex. 7 | Complex is better than complicated. 8 | Flat is better than nested. 9 | Sparse is better than dense. 10 | Readability counts. 11 | Special cases aren't special enough to break the rules. 12 | Although practicality beats purity. 13 | Errors should never pass silently. 14 | Unless explicitly silenced. 15 | In the face of ambiguity, refuse the temptation to guess. 16 | There should be one-- and preferably only one --obvious way to do it. 17 | Although that way may not be obvious at first unless you're Dutch. 18 | Now is better than never. 19 | Although never is often better than *right* now. 20 | If the implementation is hard to explain, it's a bad idea. 21 | If the implementation is easy to explain, it may be a good idea. 22 | Namespaces are one honking great idea -- let's do more of those! 23 | -------------------------------------------------------------------------------- /docs/x86-64-register-encodings.txt: -------------------------------------------------------------------------------- 1 | Enc 8-bit GP 16-bit GP 32-bit GP 64-bit GP 80-bit x87 64-bit MMX 128-bit XMM 256-bit YMM 16-bit Segment 32-bit Control 32-bit Debug 2 | 0.000 (0) AL AX EAX RAX ST0 MMX0 XMM0 YMM0 ES CR0 DR0 3 | 0.001 (1) CL CX ECX RCX ST1 MMX1 XMM1 YMM1 CS CR1 DR1 4 | 0.010 (2) DL DX EDX RDX ST2 MMX2 XMM2 YMM2 SS CR2 DR2 5 | 0.011 (3) BL BX EBX RBX ST3 MMX3 XMM3 YMM3 DS CR3 DR3 6 | 0.100 (4) AH, SPL1 SP ESP RSP ST4 MMX4 XMM4 YMM4 FS CR4 DR4 7 | 0.101 (5) CH, BPL1 BP EBP RBP ST5 MMX5 XMM5 YMM5 GS CR5 DR5 8 | 0.110 (6) DH, SIL1 SI ESI RSI ST6 MMX6 XMM6 YMM6 - CR6 DR6 9 | 0.111 (7) BH, DIL1 DI EDI RDI ST7 MMX7 XMM7 YMM7 - CR7 DR7 10 | 1.000 (8) R8L R8W R8D R8 - MMX0 XMM8 YMM8 ES CR8 DR8 11 | 1.001 (9) R9L R9W R9D R9 - MMX1 XMM9 YMM9 CS CR9 DR9 12 | 1.010 (10) R10L R10W R10D R10 - MMX2 XMM10 YMM10 SS CR10 DR10 13 | 1.011 (11) R11L R11W R11D R11 - MMX3 XMM11 YMM11 DS CR11 DR11 14 | 1.100 (12) R12L R12W R12D R12 - MMX4 XMM12 YMM12 FS CR12 DR12 15 | 1.101 (13) R13L R13W R13D R13 - MMX5 XMM13 YMM13 GS CR13 DR13 16 | 1.110 (14) R14L R14W R14D R14 - MMX6 XMM14 YMM14 - CR14 DR14 17 | 1.111 (15) R15L R15W R15D R15 - MMX7 XMM15 YMM15 - CR15 DR15 18 | -------------------------------------------------------------------------------- /example/consts.w: -------------------------------------------------------------------------------- 1 | const start = 5 2 | const one = 1 3 | 4 | var foo int 5 | const no = false 6 | const t = int 7 | const f = fun -> 1 8 | 9 | fun main { 10 | var x = true 11 | factorial(start) 12 | } 13 | 14 | # Factorial function 15 | fun factorial(n int) t { 16 | const zero = 0 17 | if n == zero { 18 | one 19 | } else { 20 | n * factorial(n - 1) 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /example/factorial.w: -------------------------------------------------------------------------------- 1 | # < 2 | # <= 3 | # << 4 | # <<= 5 | # > 6 | # >= 7 | # >> 8 | # >>= 9 | 10 | fun main { 11 | # a, b = 1, 2 + 1 12 | # z = 20 as int8 13 | # a = z as int16 14 | # a = int16(20) 15 | # b = int64(arg0) 16 | # k = x / y * z # oops! Right-associate but should be left-associative 17 | 18 | # a = 1 + 2 # 1 left & right are untyped 19 | # a = 2 + (1 as uint32) # 2 left is untyped, right is typed 20 | # a = (1 as uint32) + 2 # 3 left is typed, right is untyped 21 | # a = (1 as uint32) + (2 as uint32) # 4 left & right are typed 22 | 23 | # a = 4 24 | # b = a 25 | # y = b + 1 26 | 27 | z = if true { 28 | a = 4 # avoid block elimination while working on ir builder 29 | y = a + 1 30 | } else { 31 | 0 32 | } 33 | 34 | z 35 | 36 | # factorial(start) 37 | } 38 | 39 | # fun foo(i int) -> i 40 | 41 | # # Factorial function 42 | # fun factorial(n int) int { 43 | # if n <= 0 { 44 | # 1 45 | # } else { 46 | # n * factorial(n - 1) 47 | # } 48 | # } 49 | 50 | # fun factorial(n float32) float32 { 51 | # if n <= 0.0 { 52 | # 1.0 53 | # } else { 54 | # n * factorial(n - 1.0) 55 | # } 56 | # } 57 | 58 | # fun factorial(n int) int { 59 | # # y = 3 60 | # # x, y, _ = 1, 2, 3 61 | # # t = (1, 2, 3) 62 | # # xs = for x in [1,2,3] { x * 2 } 63 | # # if n <= 0 1 else n * factorial(n - 1) 64 | # if n <= 0 { 65 | # 1 66 | # } else { 67 | # n * factorial(n - 1) 68 | # } 69 | # } 70 | -------------------------------------------------------------------------------- /example/future-borrow-move.w: -------------------------------------------------------------------------------- 1 | # 2 | # -- This is just an idea-- 3 | # 4 | # Rust-like borrowing and moving, with a twist. 5 | # - Things are borrowed by default 6 | # - Moving can be explicit 7 | # 8 | 9 | type Thing { 10 | x int 11 | } 12 | 13 | fun helperTakes(t Thing) { 14 | t.x = 2 # ok to mutate since we own t 15 | } 16 | 17 | fun f1 { 18 | t = Thing(1) 19 | t.x = 0 # ok to mutate since we own t 20 | helperTakesOver(t) # t moves 21 | } 22 | 23 | fun f2 { 24 | t = Thing(1) 25 | helperTakesOver(t) 26 | y = t # no longer alive; error: t moved to helperTakesOver 27 | } 28 | 29 | fun helperBorrows(t &Thing) { 30 | print(t.x) # reading is okay, but... 31 | # t.x = 2 # ...mutation is not, since we are just borrowing t 32 | } 33 | 34 | fun f3 { 35 | t = Thing(1) 36 | helperBorrows(t) 37 | t.x = 0 # ok to mutate since we still own t 38 | } 39 | 40 | fun f4 { 41 | # borrowing prevents moving 42 | t = Thing(1) 43 | y = &t # y borrows t 44 | helperTakes(t) # error: cannot move t; borrowed by y 45 | } 46 | 47 | fun f5 { 48 | # scope is important 49 | t = Thing(1) 50 | { 51 | y = &t # y borrows t 52 | } 53 | helperTakes(t) # ok; no borrowed refs in scope 54 | } 55 | -------------------------------------------------------------------------------- /example/future-ownership.w: -------------------------------------------------------------------------------- 1 | # Exploring a variant of the Rust model 2 | # 3 | # - Each value in Rust has a variable that’s called its owner. 4 | # - There can only be one owner at a time. 5 | # - When the owner goes out of scope, the value will be dropped. 6 | # 7 | # https://doc.rust-lang.org/book/ch04-01-what-is-ownership.html 8 | 9 | type User { 10 | id int 11 | name str 12 | emails [str] 13 | } 14 | 15 | fun print(u &User) # borrows u 16 | fun addEmail(u User, email &str) User # takes ownership of u, borrows email, returns u 17 | fun store(u User) # takes ownership of u 18 | 19 | fun example1 { 20 | u = User(id=0, name="sam") # heap-alloc + assign pointer 21 | u = addEmail(u, "sam@hawtmail.com") 22 | print(u) # print borrows u 23 | store(u) # u moves to store(); local u is invalid 24 | print(u) # error! u has moved to store 25 | } 26 | 27 | fun example2 { 28 | u = User(id=0, name="sam") 29 | { 30 | t = timer(fun { 31 | # u in here is an immutable borrowed reference 32 | print(u) # ok; print just reads 33 | store(u) # error! can't move reference u to store 34 | }) 35 | # u is immutable here as a reference has been borrowed 36 | print(u) # ok; print just reads 37 | store(u) # error! can't move borrowed u to store 38 | } 39 | # t is gone thus nothing borrows u anymore; u is mutable and can be moved 40 | store(u) # ok; u moved to store 41 | print(u) # error! u has moved to store 42 | } 43 | -------------------------------------------------------------------------------- /example/future-type-functions-generics.w: -------------------------------------------------------------------------------- 1 | # generics w required type Name 2 | type Vec3(T) = (T, T, T) 3 | type Tup3(Y) = (Y, Y, Y) 4 | a Vec3(int) = (1,1,0) 5 | b Bec3(float) = (1.0,1.0,0.0) 6 | c = (1,1,0) # == a 7 | d Tup3(float) = (1.0,1.0,0.0) # == b 8 | -------------------------------------------------------------------------------- /example/future-type-variants.w: -------------------------------------------------------------------------------- 1 | # OCaml / ReasonML style types 2 | type Account = Twitter(str) 3 | | Google(str, int) 4 | | Local 5 | | Test 6 | 7 | fun signIn(a Account) { 8 | switch a { 9 | Twitter(handle) -> print "Sign in to twitter as @$handle" 10 | Google(email, id) -> print "Sign in to Google with #$id $email" 11 | Local | Test -> print "Use local computer user" 12 | } 13 | } 14 | 15 | # really just compiles to tuples. The above becomes: 16 | fun signIn(a (int,str)|(int,str,int)|(int)) { 17 | switch a[0] { 18 | case 0: 19 | handle = a[1] 20 | print("Sign in to twitter as @$handle") 21 | case 1: 22 | email, id = a[1:] 23 | print("Sign in to Google with #$id $email") 24 | case 2: case 3: 25 | print("Use local computer user") 26 | } 27 | } 28 | 29 | # ReasonML syntax: 30 | # 31 | # type account = Twitter(string) 32 | # | Google(string, int) 33 | # | Local 34 | # | Test 35 | # 36 | # let a = Twitter("bobby99") 37 | # let b = Google("bob@gmail.com", 123556) 38 | # 39 | # let signIn = switch (a) { 40 | # | Twitter(handle) => "Sign in to twitter as @$handle" 41 | # | Google(email, id) => "Sign in to Google with #$id $email" 42 | # | Local | Test => "Use local computer user" 43 | # }; 44 | # 45 | -------------------------------------------------------------------------------- /example/future-where.w: -------------------------------------------------------------------------------- 1 | # where 2 | # 3 | # Similar to Haskell's "where" 4 | # Similar to Rust's "where" 5 | # 6 | # Note: This may be a bad idea. 7 | # 8 | 9 | fun fmtSyntaxErrors(errors [Error]) { 10 | errors.map(e -> 11 | logger.warn("$severity in $file:$line:$col: $error$snippet") where { 12 | severity = if e.severity == nil "error" else e.severity 13 | line, col, snippet = switch e.loc { 14 | nil -> (0,0,"") 15 | Location(source, line, col) -> { 16 | line, col, switch source.IndexOfNth('\n', line - 1) { 17 | nil -> "" 18 | i -> "\n" + source[i:i+1] 19 | } 20 | } 21 | } 22 | } 23 | ) 24 | } 25 | 26 | fun fmtSyntaxErrors(errors [Error]) { 27 | errors.map(e -> { 28 | severity = if e.severity == nil "error" else e.severity 29 | line, col, snippet = switch e.loc { 30 | nil -> (0,0,"") 31 | Location(source, line, col) -> { 32 | line, col, switch source.IndexOfNth('\n', line - 1) { 33 | nil -> "" 34 | i -> "\n" + source[i:i+1] 35 | } 36 | } 37 | } 38 | logger.warn("$severity in $file:$line:$col: $error$snippet") 39 | }) 40 | } 41 | 42 | # Python-esque 43 | 44 | fun fmtSyntaxErrors(errors [Error]): 45 | errors.map(e -> 46 | logger.warn("$severity in $file:$line:$col: $error$snippet") where: 47 | severity = if e.severity == nil "error" else e.severity 48 | line, col, snippet = switch e.loc: 49 | nil -> (0,0,"") 50 | Location(source, line, col) -> 51 | line, col, switch source.IndexOfNth('\n', line - 1): 52 | nil -> "" 53 | i -> "\n" + source[i:i+1] ) 54 | 55 | 56 | fun fmtSyntaxErrors(errors [Error]): 57 | errors.map(e -> { 58 | severity = if e.severity == nil "error" else e.severity 59 | line, col, snippet = switch e.loc: 60 | nil -> (0,0,"") 61 | Location(source, line, col) -> 62 | line, col, switch source.IndexOfNth('\n', line - 1): 63 | nil -> "" 64 | i -> "\n" + source[i:i+1] 65 | logger.warn("$severity in $file:$line:$col: $error$snippet") 66 | }) 67 | -------------------------------------------------------------------------------- /example/mem.w: -------------------------------------------------------------------------------- 1 | # comment 2 | 3 | 4 | # const lol int = 5 5 | # var foo Foo 6 | # var red, green int = 4, 5 7 | # var x int = 8 8 | # var A, B, C int 9 | # var a, b int = 1, 2 10 | # var r, g, b = 255, 128, 5 11 | # r, g, b = 255, 128, (g = 5) 12 | 13 | # fun lol(int, int32, Foo) int64 14 | # var f fun(int, int32) int64 15 | 16 | const start = 5 17 | 18 | fun main { 19 | # var x = 1 20 | factorial(start) 21 | } 22 | 23 | fun factorial(n int) int { 24 | if n == 0 { 25 | 1 26 | } else { 27 | n * factorial(n - 1) 28 | } 29 | } 30 | 31 | # fun multiply(x, y int, z int32) int { 32 | # if x > y { 33 | # x * y * z 34 | # } else if x == 0 { 35 | # return 8 36 | # } else { 37 | # x / y * z 38 | # } 39 | # } 40 | 41 | 42 | 43 | # z = { x = 6; 5 * x } 44 | 45 | # # r, g, b = 255, 128, g = 5 # invalid: 46 | # # (Assign = 47 | # # (ExprList 48 | # # (Ident r) 49 | # # (Ident g) 50 | # # (Ident b)) 51 | # # (Assign = 52 | # # (ExprList 53 | # # (Int 255) 54 | # # (Int 128) 55 | # # (Ident g)) 56 | # # (Int 5))) 57 | 58 | # 4 + # let's add four 59 | # 5 + # and five to 60 | # 6 # six 61 | # foo + bar * baz 62 | 63 | # oändlig # C3 A4 64 | # 😀 = 1337 65 | 66 | # # !$lol; int # another comment 67 | # # foo * bar + 8 68 | # # const lol, foo, bar = 9, 7, 0 69 | # # var cat = 6 70 | # # x ++ 71 | # # y -- 72 | # # 3 * 9 73 | # # -1 + 5 74 | 75 | # fun multiply (x, y int, z i32) int { 76 | # x * y * z 77 | # } 78 | 79 | # # multiply = (x, y int, z i32) -> { 80 | # # x * y * z 81 | # # } 82 | 83 | # # fun map(c Collection, f (T,int)->str) str 84 | # # fun map(c Collection, f fun(T,int)str) str 85 | 86 | # # names = map(entries, (entry, index) -> entry.name) 87 | -------------------------------------------------------------------------------- /experimental/x86_64-backend/asm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | void AsmELF(); 4 | -------------------------------------------------------------------------------- /experimental/x86_64-backend/elf/builder.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "../../array.h" 3 | #include "../../buf.h" 4 | 5 | typedef struct ELFData ELFData; 6 | typedef struct ELFSec ELFSec; 7 | typedef struct ELFProg ELFProg; 8 | typedef struct ELFBuilder ELFBuilder; 9 | 10 | typedef enum ELFErr { 11 | ELF_OK = 0, 12 | ELF_E_UNSPECIFIED, 13 | } ELFErr; 14 | 15 | typedef enum ELFMode { 16 | ELFMode32, 17 | ELFMode64, 18 | } ELFMode; 19 | 20 | // Section header 21 | typedef struct ELFSec { 22 | ELFBuilder* builder; // owning builder 23 | ELFData* data; // section data pointer. May be NULL. 24 | u16 index; // section index (offset in b.shv initially, index after assembly) 25 | // ELF type-agnostic members of Elf32_Shdr & Elf64_Shdr 26 | u32 type; // Type of section (sh_type) 27 | u32 name; // Section name, index in shstrtab (sh_name) 28 | u32 flags; // Bitflags ELF_SHF_* (sh_flags) 29 | ELFSec* link; // Index of another section (like sh_link) 30 | // Data used during assembly 31 | union { 32 | Elf32_Shdr sh32; 33 | Elf64_Shdr sh64; 34 | }; 35 | } ELFSec; 36 | 37 | // Program header 38 | typedef struct ELFProg { 39 | ELFBuilder* builder; // owning builder 40 | ELFData* data; // segment data pointer. May be NULL. 41 | // ELF type-agnostic members of Elf32_Phdr & Elf64_Phdr 42 | u32 type; // (p_type) 43 | u32 flags; // (p_flags) 44 | // Data used during assembly 45 | union { 46 | u32 align32; 47 | u64 align64; 48 | }; 49 | } ELFProg; 50 | 51 | // ELFData represents a segment and/or section data. 52 | // Referenced by at least one of either a section header or a program header (or both.) 53 | typedef struct ELFData { 54 | ELFBuilder* builder; 55 | Array secv; // [ELFSec*] section headers referencing this data 56 | void* _secv_storage[1]; 57 | Array progv; // [ELFProg*] program headers referencing this data 58 | void* _progv_storage[1]; 59 | Buf buf; // the data 60 | // Data used during assembly 61 | union { 62 | u32 offs32; 63 | u64 offs64; 64 | }; 65 | } ELFData; 66 | 67 | // Builder 68 | typedef struct ELFBuilder { 69 | Memory mem; // allocator (NULL = global allocator) 70 | ELFMode mode; 71 | ELFMachine machine; 72 | u8 encoding; // ELF_DATA_* constant. Set to best-guess at init based on machine. 73 | Array dv; // data segments [ELFData*] 74 | Array shv; // section headers [ELFSec*] 75 | Array phv; // program headers [ELFProg*] 76 | // special sections (pointers into shv) 77 | ELFSec* shstrtab; // ".shstrtab" Section Header string table section 78 | ELFSec* strtab; // ".strtab" General string table section 79 | ELFSec* symtab; // ".symtab" General symbol table section 80 | } ELFBuilder; 81 | 82 | 83 | // Initialize a builder for use. 84 | void ELFBuilderInit(ELFBuilder* b, ELFMachine m, Memory nullable mem); 85 | 86 | // Free all memory used by the builder (does not free memory for b itself.) 87 | void ELFBuilderFree(ELFBuilder* b); 88 | 89 | // Allocate a new data to be linked with a section and/or program header. 90 | ELFData* ELFBuilderNewData(ELFBuilder* b); 91 | 92 | // Add a new section header of type with name which optionally references data. 93 | ELFSec* ELFBuilderNewSec(ELFBuilder* b, const char* name, u32 type, ELFData* data); 94 | 95 | // Add a new program header of type with name which optionally references data. 96 | ELFProg* ELFBuilderNewProg(ELFBuilder* b, u32 type, u32 flags, ELFData* data); 97 | 98 | // Add a new SYMTAB section named name, which stores its names in strtab. 99 | ELFSec* ELFBuilderNewSymtab(ELFBuilder* b, const ELFSec* strtab, const char* name); 100 | 101 | // Retrieves the null-terminated name of the section, as provided to ELFBuilderNewSec. 102 | const char* ELFSecName(const ELFSec* sec); 103 | 104 | // Append a name to a string table. Return its index. strtab->type must be STRTAB. 105 | u32 ELFStrtabAppend(ELFSec* strtab, const char* name); 106 | 107 | // Look up a name in a string table. nameindex is a byte offset. 108 | const char* ELFStrtabLookup(const ELFSec* sec, u32 nameindex); 109 | 110 | // Add a symbol with name to symtab, originating in section with index shndx. 111 | // Returns a pointer to the symbol. 112 | // The returned pointer is only valid until the next call to ELFSymtabAdd* as it 113 | // references memory that might change during a call. 114 | Elf32_Sym* ELFSymtabAdd32(ELFSec* symtab, ELFSec* sec, const char* name, u8 bind, u8 typ, u32 val); 115 | Elf64_Sym* ELFSymtabAdd64(ELFSec* symtab, ELFSec* sec, const char* name, u8 bind, u8 typ, u64 val); 116 | 117 | // Assemble ELF file 118 | ELFErr ELFBuilderAssemble(ELFBuilder* b, Buf* buf); 119 | -------------------------------------------------------------------------------- /experimental/x86_64-backend/elf/file.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // An ELF file has two views into its data: 4 | // 1. the program header shows the segments used at run time, whereas 5 | // 2. the section header lists the set of sections of the binary. 6 | 7 | typedef struct ELFFile { 8 | const char* nullable name; 9 | const u8* buf; 10 | size_t len; 11 | const char* shstrtab; // pointer into buf of shstrtab. NULL if none. 12 | } ELFFile; 13 | 14 | void ELFFileInit(ELFFile* f, const char* nullable name, const u8* data, size_t len); 15 | bool ELFFileValidate(const ELFFile* f, FILE* nullable errlogfp); 16 | static const char* ELFFileName(const ELFFile* f, const char* defaultname); 17 | 18 | // Access basic information 19 | static u8 ELFFileClass(const ELFFile* f); // ELF_CLASS_{NONE,32,64} 20 | 21 | // Access headers 22 | static const Elf32_Ehdr* ELFFileEH32(const ELFFile* f); 23 | static const Elf64_Ehdr* ELFFileEH64(const ELFFile* f); 24 | static const Elf32_Phdr* ELFFilePH32(const ELFFile* f, u32 index); 25 | static const Elf64_Phdr* ELFFilePH64(const ELFFile* f, u32 index); 26 | static const Elf32_Shdr* ELFFileSH32(const ELFFile* f, u32 index); 27 | static const Elf64_Shdr* ELFFileSH64(const ELFFile* f, u32 index); 28 | 29 | // Print human-readable information 30 | void ELFFilePrint(const ELFFile* f, FILE* fp); 31 | 32 | 33 | // ---------------------------------------------------------- 34 | // inline implementations 35 | 36 | inline static const char* ELFFileName(const ELFFile* f, const char* defaultname) { 37 | return f->name == NULL ? defaultname : f->name; 38 | } 39 | 40 | inline static u8 ELFFileClass(const ELFFile* f) { 41 | return f->buf[ELF_EI_CLASS]; 42 | } 43 | 44 | inline static const Elf32_Ehdr* ELFFileEH32(const ELFFile* f) { 45 | return (const Elf32_Ehdr*)f->buf; 46 | } 47 | inline static const Elf64_Ehdr* ELFFileEH64(const ELFFile* f) { 48 | return (const Elf64_Ehdr*)f->buf; 49 | } 50 | 51 | inline static const Elf32_Phdr* ELFFilePH32(const ELFFile* f, u32 index) { 52 | auto eh = ELFFileEH32(f); 53 | return (const Elf32_Phdr*)&f->buf[eh->e_phoff + (sizeof(Elf32_Phdr) * index)]; 54 | } 55 | inline static const Elf64_Phdr* ELFFilePH64(const ELFFile* f, u32 index) { 56 | auto eh = ELFFileEH64(f); 57 | return (const Elf64_Phdr*)&f->buf[eh->e_phoff + (sizeof(Elf64_Phdr) * index)]; 58 | } 59 | 60 | inline static const Elf32_Shdr* ELFFileSH32(const ELFFile* f, u32 index) { 61 | auto eh = ELFFileEH32(f); 62 | return (const Elf32_Shdr*)&f->buf[eh->e_shoff + (sizeof(Elf32_Shdr) * index)]; 63 | } 64 | inline static const Elf64_Shdr* ELFFileSH64(const ELFFile* f, u32 index) { 65 | auto eh = ELFFileEH64(f); 66 | return (const Elf64_Shdr*)&f->buf[eh->e_shoff + (sizeof(Elf64_Shdr) * index)]; 67 | } 68 | -------------------------------------------------------------------------------- /experimental/x86_64-backend/elf64.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "../defs.h" 3 | #include "../buf.h" 4 | 5 | typedef struct ELF64 { 6 | Buf buf; // Main buffer (ELF header + program headers + data segments) 7 | u16 phnum; // number of program headers (in buf + sizeof(ELH header)) 8 | Buf shbuf; // section headers 9 | Buf strtab; // string table 10 | Buf shstrtab; // section header string table 11 | Buf symtab; // symbol table 12 | } ELF64; 13 | 14 | void ELF64Init(ELF64* e, Memory nullable mem); 15 | void ELF64Free(ELF64* e); 16 | 17 | inline static Memory* ELF64Memory(ELF64* e) { 18 | return e->buf.mem; 19 | } 20 | 21 | inline static Elf64_Ehdr* ELF64GetEH(ELF64* e) { 22 | return (Elf64_Ehdr*)e->buf.ptr; 23 | } 24 | -------------------------------------------------------------------------------- /misc/asmlab/hello-c.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Build: 4 | clang -O0 -S -o hello-c.s hello-c.c 5 | clang -O0 -o hello-c.elf hello-c.c 6 | 7 | Dump ELF & disassembly: 8 | llvm-objdump -D --syms --full-contents --all-headers hello-c.elf > hello-c.elf.dis.txt 9 | 10 | */ 11 | 12 | #include 13 | 14 | int main() { 15 | char *str = "Hello World\n"; 16 | for (int i = 0; i < 3; i++) { 17 | write(1, str, strlen(str)); 18 | } 19 | return 9; 20 | } 21 | -------------------------------------------------------------------------------- /misc/asmlab/hello-c.s: -------------------------------------------------------------------------------- 1 | .text 2 | .file "hello-c.c" 3 | .globl main # -- Begin function main 4 | .p2align 4, 0x90 5 | .type main,@function 6 | main: # @main 7 | .cfi_startproc 8 | # %bb.0: # %entry 9 | pushq %rbp 10 | .cfi_def_cfa_offset 16 11 | .cfi_offset %rbp, -16 12 | movq %rsp, %rbp 13 | .cfi_def_cfa_register %rbp 14 | subq $32, %rsp 15 | movl $0, -4(%rbp) 16 | movabsq $.L.str, %rax 17 | movq %rax, -16(%rbp) 18 | movl $0, -20(%rbp) 19 | .LBB0_1: # %for.cond 20 | # =>This Inner Loop Header: Depth=1 21 | cmpl $3, -20(%rbp) 22 | jge .LBB0_4 23 | # %bb.2: # %for.body 24 | # in Loop: Header=BB0_1 Depth=1 25 | movq -16(%rbp), %rsi 26 | movq -16(%rbp), %rdi 27 | movq %rsi, -32(%rbp) # 8-byte Spill 28 | callq strlen 29 | movl $1, %edi 30 | movq -32(%rbp), %rsi # 8-byte Reload 31 | movq %rax, %rdx 32 | callq write 33 | # %bb.3: # %for.inc 34 | # in Loop: Header=BB0_1 Depth=1 35 | movl -20(%rbp), %eax 36 | addl $1, %eax 37 | movl %eax, -20(%rbp) 38 | jmp .LBB0_1 39 | .LBB0_4: # %for.end 40 | movl $9, %eax 41 | addq $32, %rsp 42 | popq %rbp 43 | .cfi_def_cfa %rsp, 8 44 | retq 45 | .Lfunc_end0: 46 | .size main, .Lfunc_end0-main 47 | .cfi_endproc 48 | # -- End function 49 | .type .L.str,@object # @.str 50 | .section .rodata.str1.1,"aMS",@progbits,1 51 | .L.str: 52 | .asciz "Hello World\n" 53 | .size .L.str, 13 54 | 55 | .ident "clang version 10.0.0 (/b/s/w/ir/cache/git/chromium.googlesource.com-external-github.com-llvm-llvm--project cb47b8783017a76c5f2e4b974cfd6b22c9f1d5ff)" 56 | .section ".note.GNU-stack","",@progbits 57 | .addrsig 58 | .addrsig_sym write 59 | .addrsig_sym strlen 60 | -------------------------------------------------------------------------------- /misc/asmlab/hello1.s: -------------------------------------------------------------------------------- 1 | /* 2 | Minimal program with rodata 3 | 4 | VM with llvm preinstalled: 5 | docker run --rm -it -v "$PWD:/src" rsms/emsdk 6 | 7 | Build & Run 8 | clang -nostdlib -O0 -o hello1.elf hello1.s && ./hello1.elf ; echo $? 9 | 10 | See exact contents: 11 | llvm-objdump -D --syms --full-contents --all-headers hello1.elf > hello1.elf.dis.txt 12 | hexdump -v -C hello1.elf > hello1.elf.hex 13 | 14 | GP regs: eax, ebx, ecx, edx, edi, esi 15 | Special regs: 16 | - ebp base pointer (end address of current stack frame) 17 | - esp current top of the stack (end address of stack) 18 | - eip instruction pointer 19 | - eflags 20 | 21 | Stack notes: 22 | movl (%esp), %eax # indirect addressing. copy top of stack to eax 23 | movl 4(%esp), %eax # base pointer addressing. copy second item on stack to eax 24 | 25 | */ 26 | // constants, system calls 27 | .equ SYS_OPEN, 5 28 | .equ SYS_WRITE, 4 29 | .equ SYS_READ, 3 30 | .equ SYS_CLOSE, 6 31 | .equ SYS_EXIT, 1 32 | // 33 | // options for open (look at usr/include/asm/fcntl.h for various values. You can combine them 34 | // by adding them or OR-ing them) 35 | .equ O_RDONLY, 0 36 | .equ O_CREAT_WRONLY_TRUNC, 03101 37 | // 38 | // standard file descriptors 39 | .equ STDIN, 0 40 | .equ STDOUT, 1 41 | .equ STDERR, 2 42 | // 43 | // misc 44 | .equ SYSCALL, 0x80 # Linux syscall interrupt code 45 | .equ EOF, 0 # End of file code 46 | 47 | .section .bss 48 | // Buffer - this is where the data is loaded into from the data file and written from 49 | // into the output file. This should never exceed 16,000 for various reasons. 50 | .equ BUFFER_SIZE, 500 51 | .lcomm BUFFER_DATA, BUFFER_SIZE 52 | 53 | // 4000ff 54 | 55 | // // .data contains mutable constant data 56 | // .section .data 57 | // data_items: # Array of naturally-wide integers 58 | // .long 3,67,34,222,45,75,54,34,44,33,22,11,66,0 59 | // .size data_items, 112 # 14 * 8 (sizeof long) 60 | // .type data_items, @object # mark as "object" in ELF symbol table 61 | 62 | // .text contains immutable executable data 63 | .section .text 64 | .globl _start 65 | 66 | write_hello_to_stdout: // ()->() 67 | pushq %rbp # save stack pointer on stack 68 | movq %rsp, %rbp # make stack pointer the base pointer 69 | movq $SYS_WRITE, %rax # syscall msg id "write to fd" 70 | movq $STDOUT, %rbx # fd = STDOUT 71 | movabsq $hellomsg, %rcx # buffer pointer (becomes VMA addr into .rodata) 72 | movq $12, %rdx # buffer size 73 | int $SYSCALL 74 | movq %rbp, %rsp # restore stack pointer 75 | popq %rbp # restore base pointer 76 | ret 77 | 78 | // main: 79 | // pushq %rbp # save stack pointer on stack 80 | // movq %rsp, %rbp # make stack pointer the base pointer 81 | // subq $8, %rsp # reserve 2xi32 on stack 82 | // call write_hello_to_stdout 83 | // call write_hello_to_stdout 84 | // call write_hello_to_stdout 85 | // movq %rbp, %rsp # restore stack pointer 86 | // popq %rbp # restore base pointer 87 | // movl $9, %eax # return value 9 88 | // ret # jump to address at 8(%rbp) & increment %rbp 89 | 90 | main: 91 | pushq %rbp # save stack pointer on stack 92 | movq %rsp, %rbp # make stack pointer the base pointer 93 | subq $4, %rsp # reserve 1xi32 on stack (else call would mess up stack) 94 | movl $0, -4(%rbp) # put 0 in stack slot 1 (local0) 95 | for_cond: 96 | cmpl $3, -4(%rbp) # compare local0 with 3 (how many times we print) 97 | jge for_end # if local0 >= 3 then jump to end 98 | call write_hello_to_stdout 99 | movl -4(%rbp), %eax # load local1 into rAX 100 | addl $1, %eax # add 1 to rAX 101 | movl %eax, -4(%rbp) # store rAX to local1 102 | jmp for_cond # loop 103 | for_end: 104 | movl -4(%rbp), %eax # return the number of times we printed "Hello" 105 | // movl $9, %eax # return value 9 106 | movq %rbp, %rsp # restore stack pointer 107 | popq %rbp # restore base pointer 108 | ret # jump to address at 8(%rbp) & increment %rbp 109 | 110 | _start: 111 | call main # call main; return value is in %eax 112 | // movl $42, %ebx # exit status in ebx 113 | movl %eax, %ebx # %ebx holds the return status 114 | movl $SYS_EXIT, %eax # %eax holds the syscall message ID (1="exit") 115 | int $SYSCALL 116 | 117 | 118 | .section .rodata 119 | hellomsg: 120 | .asciz "Hello world\n" 121 | .size hellomsg, 13 122 | .type hellomsg, @object 123 | 124 | .section .data 125 | hellomsg2: 126 | .asciz "O hai world\n" 127 | .size hellomsg2, 13 128 | .type hellomsg2, @object 129 | 130 | 131 | // // example that exists with the CLI argument count 132 | // _start: 133 | // movq %rsp, %rbp 134 | // movl 0(%rbp), %ebx # argc is at stack pointer 135 | // # Note: 8(%rbp)=argv[0], 16(%rbp)=argv[1], ... (64-bit) 136 | // movl $SYS_EXIT, %eax # %eax holds the syscall message ID (1="exit") 137 | // int $IN_LINUX_SYSCALL # interrupt "syscall" 138 | 139 | 140 | // // minimal program that writes "hello world\n" to stdout 141 | // _start: 142 | // movq $SYS_WRITE, %rax # syscall msg id "write to fd" 143 | // movq $STDOUT, %rbx # fd = STDOUT 144 | // movabsq $hellomsg, %rcx # buffer pointer (becomes VMA addr into .rodata) 145 | // movq $13, %rdx # buffer size 146 | // int $SYSCALL 147 | // movl $0, %ebx # %ebx holds the return status 148 | // movl $SYS_EXIT, %eax # %eax holds the syscall message ID (1="exit") 149 | // int $SYSCALL 150 | 151 | 152 | // // the most minimal program; just exit (using classic interrupt syscall) 153 | // _start: 154 | // movl $42, %ebx # exit status 155 | // movl $1, %eax # syscall message "exit" 156 | // int $0x80 # interrupt "syscall" 157 | 158 | // // the most minimal program; just exit (using modern syscall op) 159 | // // See: https://github.com/torvalds/linux/blob/v3.13/arch/x86/syscalls/syscall_64.tbl#L69 160 | // // See: https://github.com/torvalds/linux/blob/v3.13/arch/x86/kernel/entry_64.S#L569-L591 161 | // _start: 162 | // movq $60, %rax # syscall no "exit" 163 | // movq $42, %rdi # exit status 164 | // syscall 165 | -------------------------------------------------------------------------------- /misc/asmlab/hello1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | cd "$(dirname "$0")" 3 | 4 | echo "Building hello1.s -> hello1.elf" 5 | clang -nostdlib -O0 -o hello1.elf hello1.s 6 | 7 | echo "Dumping disassemly hello1.elf -> hello1.elf.dis.txt" 8 | llvm-objdump -D --syms --full-contents --all-headers hello1.elf > hello1.elf.dis.txt 9 | 10 | echo "Running ./hello1.elf" 11 | ./hello1.elf 12 | echo $? 13 | -------------------------------------------------------------------------------- /misc/asmlab/mini1-32.elf.dis.txt: -------------------------------------------------------------------------------- 1 | 2 | mini1-32.elf: file format ELF32-i386 3 | 4 | architecture: i386 5 | start address: 0x08048054 6 | 7 | Program Header: 8 | LOAD off 0x00000000 vaddr 0x08048000 paddr 0x08048000 align 2**12 9 | filesz 0x00000062 memsz 0x00000062 flags r-x 10 | 11 | Dynamic Section: 12 | Sections: 13 | Idx Name Size VMA Type 14 | 0 00000000 00000000 15 | 1 .text 0000000e 08048054 TEXT 16 | 2 .symtab 00000060 00000000 17 | 3 .strtab 00000019 00000000 18 | 4 .shstrtab 00000021 00000000 19 | 20 | SYMBOL TABLE: 21 | 08048054 l d .text 00000000 .text 22 | 08048054 .text 00000000 _start 23 | 08049062 .text 00000000 __bss_start 24 | 08049062 .text 00000000 _edata 25 | 08049064 .text 00000000 _end 26 | Contents of section .text: 27 | 8048054 b8010000 00bb2a00 000089e5 0f34 ......*......4 28 | Contents of section .symtab: 29 | 0000 00000000 00000000 00000000 00000000 ................ 30 | 0010 00000000 54800408 00000000 03000100 ....T........... 31 | 0020 06000000 54800408 00000000 10000100 ....T........... 32 | 0030 01000000 62900408 00000000 10000100 ....b........... 33 | 0040 0d000000 62900408 00000000 10000100 ....b........... 34 | 0050 14000000 64900408 00000000 10000100 ....d........... 35 | Contents of section .strtab: 36 | 0000 005f5f62 73735f73 74617274 005f6564 .__bss_start._ed 37 | 0010 61746100 5f656e64 00 ata._end. 38 | Contents of section .shstrtab: 39 | 0000 002e7379 6d746162 002e7374 72746162 ..symtab..strtab 40 | 0010 002e7368 73747274 6162002e 74657874 ..shstrtab..text 41 | 0020 00 . 42 | 43 | Disassembly of section .text: 44 | 45 | 08048054 _start: 46 | 8048054: b8 01 00 00 00 movl $1, %eax 47 | 8048059: bb 2a 00 00 00 movl $42, %ebx 48 | 804805e: 89 e5 movl %esp, %ebp 49 | 8048060: 0f 34 sysenter 50 | 51 | Disassembly of section .symtab: 52 | 53 | 00000000 .symtab: 54 | ... 55 | 14: 54 pushl %esp 56 | 15: 80 04 08 00 addb $0, (%eax,%ecx) 57 | 19: 00 00 addb %al, (%eax) 58 | 1b: 00 03 addb %al, (%ebx) 59 | 1d: 00 01 addb %al, (%ecx) 60 | 1f: 00 06 addb %al, (%esi) 61 | 21: 00 00 addb %al, (%eax) 62 | 23: 00 54 80 04 addb %dl, 4(%eax,%eax,4) 63 | 27: 08 00 orb %al, (%eax) 64 | 29: 00 00 addb %al, (%eax) 65 | 2b: 00 10 addb %dl, (%eax) 66 | 2d: 00 01 addb %al, (%ecx) 67 | 2f: 00 01 addb %al, (%ecx) 68 | 31: 00 00 addb %al, (%eax) 69 | 33: 00 62 90 addb %ah, -112(%edx) 70 | 36: 04 08 addb $8, %al 71 | 38: 00 00 addb %al, (%eax) 72 | 3a: 00 00 addb %al, (%eax) 73 | 3c: 10 00 adcb %al, (%eax) 74 | 3e: 01 00 addl %eax, (%eax) 75 | 40: 0d 00 00 00 62 orl $1644167168, %eax 76 | 45: 90 nop 77 | 46: 04 08 addb $8, %al 78 | 48: 00 00 addb %al, (%eax) 79 | 4a: 00 00 addb %al, (%eax) 80 | 4c: 10 00 adcb %al, (%eax) 81 | 4e: 01 00 addl %eax, (%eax) 82 | 50: 14 00 adcb $0, %al 83 | 52: 00 00 addb %al, (%eax) 84 | 54: 64 90 nop 85 | 56: 04 08 addb $8, %al 86 | 58: 00 00 addb %al, (%eax) 87 | 5a: 00 00 addb %al, (%eax) 88 | 5c: 10 00 adcb %al, (%eax) 89 | 5e: 01 00 addl %eax, (%eax) 90 | 91 | Disassembly of section .strtab: 92 | 93 | 00000000 .strtab: 94 | 0: 00 5f 5f addb %bl, 95(%edi) 95 | 3: 62 73 73 bound %esi, 115(%ebx) 96 | 6: 5f popl %edi 97 | 7: 73 74 jae 116 <.symtab+0x7d> 98 | 9: 61 popal 99 | a: 72 74 jb 116 <.symtab+0x80> 100 | c: 00 5f 65 addb %bl, 101(%edi) 101 | f: 64 61 popal 102 | 11: 74 61 je 97 <.symtab+0x74> 103 | 13: 00 5f 65 addb %bl, 101(%edi) 104 | 16: 6e outsb (%esi), %dx 105 | 17: 64 00 106 | 107 | Disassembly of section .shstrtab: 108 | 109 | 00000000 .shstrtab: 110 | 0: 00 2e addb %ch, (%esi) 111 | 2: 73 79 jae 121 <.symtab+0x7d> 112 | 4: 6d insl %dx, %es:(%edi) 113 | 5: 74 61 je 97 <.symtab+0x68> 114 | 7: 62 00 bound %eax, (%eax) 115 | 9: 2e 73 74 jae 116 <.symtab+0x80> 116 | c: 72 74 jb 116 <.symtab+0x82> 117 | e: 61 popal 118 | f: 62 00 bound %eax, (%eax) 119 | 11: 2e 73 68 jae 104 <.symtab+0x7c> 120 | 14: 73 74 jae 116 <.symtab+0x8a> 121 | 16: 72 74 jb 116 <.symtab+0x8c> 122 | 18: 61 popal 123 | 19: 62 00 bound %eax, (%eax) 124 | 1b: 2e 74 65 je 101 <.symtab+0x83> 125 | 1e: 78 74 js 116 <.symtab+0x94> 126 | 20: 00 127 | -------------------------------------------------------------------------------- /misc/asmlab/mini1-32.s: -------------------------------------------------------------------------------- 1 | # 2 | # Minimal Linux program, using modern sysenter instruction (32-bit) 3 | # Note that 32-bit x86 does not have the syscall instruction of AMD-origin, but instead 4 | # a sysenter instruction. 5 | # 6 | # For info on Linux syscalls, see: 7 | # https://github.com/torvalds/linux/blob/v3.13/arch/x86/syscalls/syscall_32.tbl 8 | # https://github.com/torvalds/linux/blob/v3.13/arch/x86/kernel/entry_32.S 9 | # 10 | # Build & Run: 11 | # clang -nostdlib -O0 -m32 -o mini1-32.elf mini1-32.s && ./mini1-32.elf ; echo $? 12 | # 13 | # Disassemble: 14 | # llvm-objdump -D --syms --full-contents --all-headers mini1-32.elf > mini1-32.elf.dis.txt 15 | # 16 | # Dump to view exact contents: 17 | # hexdump -v -C mini1-32.elf > mini1-32.elf.hex 18 | # 19 | .text 20 | .globl _start 21 | 22 | _start: 23 | movl $1, %eax # sysenter number for "exit" 24 | movl $42, %ebx # exit status 25 | movl %esp, %ebp 26 | sysenter 27 | -------------------------------------------------------------------------------- /misc/asmlab/mini1.elf.dis.txt: -------------------------------------------------------------------------------- 1 | 2 | mini1.elf: file format ELF64-x86-64 3 | 4 | architecture: x86_64 5 | start address: 0x0000000000400078 6 | 7 | Program Header: 8 | LOAD off 0x0000000000000000 vaddr 0x0000000000400000 paddr 0x0000000000400000 align 2**21 9 | filesz 0x0000000000000088 memsz 0x0000000000000088 flags r-x 10 | 11 | Dynamic Section: 12 | Sections: 13 | Idx Name Size VMA Type 14 | 0 00000000 0000000000000000 15 | 1 .text 00000010 0000000000400078 TEXT 16 | 2 .symtab 00000090 0000000000000000 17 | 3 .strtab 00000019 0000000000000000 18 | 4 .shstrtab 00000021 0000000000000000 19 | 20 | SYMBOL TABLE: 21 | 0000000000400078 l d .text 00000000 .text 22 | 0000000000400078 .text 00000000 _start 23 | 0000000000601000 .text 00000000 __bss_start 24 | 0000000000601000 .text 00000000 _edata 25 | 0000000000601000 .text 00000000 _end 26 | Contents of section .text: 27 | 400078 48c7c03c 00000048 c7c72a00 00000f05 H..<...H..*..... 28 | Contents of section .symtab: 29 | 0000 00000000 00000000 00000000 00000000 ................ 30 | 0010 00000000 00000000 00000000 03000100 ................ 31 | 0020 78004000 00000000 00000000 00000000 x.@............. 32 | 0030 06000000 10000100 78004000 00000000 ........x.@..... 33 | 0040 00000000 00000000 01000000 10000100 ................ 34 | 0050 00106000 00000000 00000000 00000000 ..`............. 35 | 0060 0d000000 10000100 00106000 00000000 ..........`..... 36 | 0070 00000000 00000000 14000000 10000100 ................ 37 | 0080 00106000 00000000 00000000 00000000 ..`............. 38 | Contents of section .strtab: 39 | 0000 005f5f62 73735f73 74617274 005f6564 .__bss_start._ed 40 | 0010 61746100 5f656e64 00 ata._end. 41 | Contents of section .shstrtab: 42 | 0000 002e7379 6d746162 002e7374 72746162 ..symtab..strtab 43 | 0010 002e7368 73747274 6162002e 74657874 ..shstrtab..text 44 | 0020 00 . 45 | 46 | Disassembly of section .text: 47 | 48 | 0000000000400078 _start: 49 | 400078: 48 c7 c0 3c 00 00 00 movq $60, %rax 50 | 40007f: 48 c7 c7 2a 00 00 00 movq $42, %rdi 51 | 400086: 0f 05 syscall 52 | 53 | Disassembly of section .symtab: 54 | 55 | 0000000000000000 .symtab: 56 | ... 57 | 1c: 03 00 addl (%rax), %eax 58 | 1e: 01 00 addl %eax, (%rax) 59 | 20: 78 00 js 0 60 | 22: 40 00 00 addb %al, (%rax) 61 | ... 62 | 2d: 00 00 addb %al, (%rax) 63 | 2f: 00 06 addb %al, (%rsi) 64 | 31: 00 00 addb %al, (%rax) 65 | 33: 00 10 addb %dl, (%rax) 66 | 35: 00 01 addb %al, (%rcx) 67 | 37: 00 78 00 addb %bh, (%rax) 68 | 3a: 40 00 00 addb %al, (%rax) 69 | ... 70 | 45: 00 00 addb %al, (%rax) 71 | 47: 00 01 addb %al, (%rcx) 72 | 49: 00 00 addb %al, (%rax) 73 | 4b: 00 10 addb %dl, (%rax) 74 | 4d: 00 01 addb %al, (%rcx) 75 | 4f: 00 00 addb %al, (%rax) 76 | 51: 10 60 00 adcb %ah, (%rax) 77 | ... 78 | 60: 0d 00 00 00 10 orl $268435456, %eax 79 | 65: 00 01 addb %al, (%rcx) 80 | 67: 00 00 addb %al, (%rax) 81 | 69: 10 60 00 adcb %ah, (%rax) 82 | ... 83 | 78: 14 00 adcb $0, %al 84 | 7a: 00 00 addb %al, (%rax) 85 | 7c: 10 00 adcb %al, (%rax) 86 | 7e: 01 00 addl %eax, (%rax) 87 | 80: 00 10 addb %dl, (%rax) 88 | 82: 60 89 | ... 90 | 8f: 00 91 | 92 | Disassembly of section .strtab: 93 | 94 | 0000000000000000 .strtab: 95 | 0: 00 5f 5f addb %bl, 95(%rdi) 96 | 3: 62 97 | 4: 73 73 jae 115 98 | 6: 5f popq %rdi 99 | 7: 73 74 jae 116 100 | 9: 61 101 | a: 72 74 jb 116 102 | c: 00 5f 65 addb %bl, 101(%rdi) 103 | f: 64 61 104 | 11: 74 61 je 97 105 | 13: 00 5f 65 addb %bl, 101(%rdi) 106 | 16: 6e outsb (%rsi), %dx 107 | 17: 64 00 108 | 109 | Disassembly of section .shstrtab: 110 | 111 | 0000000000000000 .shstrtab: 112 | 0: 00 2e addb %ch, (%rsi) 113 | 2: 73 79 jae 121 <.shstrtab+0x7d> 114 | 4: 6d insl %dx, %es:(%rdi) 115 | 5: 74 61 je 97 <.shstrtab+0x68> 116 | 7: 62 00 2e 73 117 | b: 74 72 je 114 <.shstrtab+0x7f> 118 | d: 74 61 je 97 <.shstrtab+0x70> 119 | f: 62 00 2e 73 120 | 13: 68 73 74 72 74 pushq $1953657971 121 | 18: 61 122 | 19: 62 00 2e 74 123 | 1d: 65 78 74 js 116 <.shstrtab+0x94> 124 | 20: 00 125 | -------------------------------------------------------------------------------- /misc/asmlab/mini1.s: -------------------------------------------------------------------------------- 1 | # 2 | # Minimal Linux program, using modern syscall instruction 3 | # 4 | # For info on Linux syscalls, see: 5 | # https://github.com/torvalds/linux/blob/v3.13/arch/x86/syscalls/syscall_64.tbl#L69 6 | # https://github.com/torvalds/linux/blob/v3.13/arch/x86/kernel/entry_64.S#L569-L591 7 | # 8 | # Build & Run: 9 | # clang -nostdlib -O0 -o mini1.elf mini1.s && ./mini1.elf ; echo $? 10 | # 11 | # Disassemble: 12 | # llvm-objdump -D --syms --full-contents --all-headers mini1.elf > mini1.elf.dis.txt 13 | # 14 | # Dump to view exact contents: 15 | # hexdump -v -C mini1.elf > mini1.hex 16 | # 17 | .text 18 | .globl _start 19 | 20 | _start: 21 | movq $60, %rax # syscall no "exit" 22 | movq $42, %rdi # exit status 23 | syscall 24 | -------------------------------------------------------------------------------- /misc/asmlab/mini2.elf.dis.txt: -------------------------------------------------------------------------------- 1 | 2 | mini2.elf: file format ELF64-x86-64 3 | 4 | architecture: x86_64 5 | start address: 0x0000000000400078 6 | 7 | Program Header: 8 | LOAD off 0x0000000000000000 vaddr 0x0000000000400000 paddr 0x0000000000400000 align 2**21 9 | filesz 0x0000000000000084 memsz 0x0000000000000084 flags r-x 10 | 11 | Dynamic Section: 12 | Sections: 13 | Idx Name Size VMA Type 14 | 0 00000000 0000000000000000 15 | 1 .text 0000000c 0000000000400078 TEXT 16 | 2 .symtab 00000090 0000000000000000 17 | 3 .strtab 00000019 0000000000000000 18 | 4 .shstrtab 00000021 0000000000000000 19 | 20 | SYMBOL TABLE: 21 | 0000000000400078 l d .text 00000000 .text 22 | 0000000000400078 .text 00000000 _start 23 | 0000000000600084 .text 00000000 __bss_start 24 | 0000000000600084 .text 00000000 _edata 25 | 0000000000600088 .text 00000000 _end 26 | Contents of section .text: 27 | 400078 b8010000 00bb2a00 0000cd80 ......*..... 28 | Contents of section .symtab: 29 | 0000 00000000 00000000 00000000 00000000 ................ 30 | 0010 00000000 00000000 00000000 03000100 ................ 31 | 0020 78004000 00000000 00000000 00000000 x.@............. 32 | 0030 06000000 10000100 78004000 00000000 ........x.@..... 33 | 0040 00000000 00000000 01000000 10000100 ................ 34 | 0050 84006000 00000000 00000000 00000000 ..`............. 35 | 0060 0d000000 10000100 84006000 00000000 ..........`..... 36 | 0070 00000000 00000000 14000000 10000100 ................ 37 | 0080 88006000 00000000 00000000 00000000 ..`............. 38 | Contents of section .strtab: 39 | 0000 005f5f62 73735f73 74617274 005f6564 .__bss_start._ed 40 | 0010 61746100 5f656e64 00 ata._end. 41 | Contents of section .shstrtab: 42 | 0000 002e7379 6d746162 002e7374 72746162 ..symtab..strtab 43 | 0010 002e7368 73747274 6162002e 74657874 ..shstrtab..text 44 | 0020 00 . 45 | 46 | Disassembly of section .text: 47 | 48 | 0000000000400078 _start: 49 | 400078: b8 01 00 00 00 movl $1, %eax 50 | 40007d: bb 2a 00 00 00 movl $42, %ebx 51 | 400082: cd 80 int $128 52 | 53 | Disassembly of section .symtab: 54 | 55 | 0000000000000000 .symtab: 56 | ... 57 | 1c: 03 00 addl (%rax), %eax 58 | 1e: 01 00 addl %eax, (%rax) 59 | 20: 78 00 js 0 60 | 22: 40 00 00 addb %al, (%rax) 61 | ... 62 | 2d: 00 00 addb %al, (%rax) 63 | 2f: 00 06 addb %al, (%rsi) 64 | 31: 00 00 addb %al, (%rax) 65 | 33: 00 10 addb %dl, (%rax) 66 | 35: 00 01 addb %al, (%rcx) 67 | 37: 00 78 00 addb %bh, (%rax) 68 | 3a: 40 00 00 addb %al, (%rax) 69 | ... 70 | 45: 00 00 addb %al, (%rax) 71 | 47: 00 01 addb %al, (%rcx) 72 | 49: 00 00 addb %al, (%rax) 73 | 4b: 00 10 addb %dl, (%rax) 74 | 4d: 00 01 addb %al, (%rcx) 75 | 4f: 00 84 00 60 00 00 00 addb %al, 96(%rax,%rax) 76 | ... 77 | 5e: 00 00 addb %al, (%rax) 78 | 60: 0d 00 00 00 10 orl $268435456, %eax 79 | 65: 00 01 addb %al, (%rcx) 80 | 67: 00 84 00 60 00 00 00 addb %al, 96(%rax,%rax) 81 | ... 82 | 76: 00 00 addb %al, (%rax) 83 | 78: 14 00 adcb $0, %al 84 | 7a: 00 00 addb %al, (%rax) 85 | 7c: 10 00 adcb %al, (%rax) 86 | 7e: 01 00 addl %eax, (%rax) 87 | 80: 88 00 movb %al, (%rax) 88 | 82: 60 89 | ... 90 | 8f: 00 91 | 92 | Disassembly of section .strtab: 93 | 94 | 0000000000000000 .strtab: 95 | 0: 00 5f 5f addb %bl, 95(%rdi) 96 | 3: 62 97 | 4: 73 73 jae 115 98 | 6: 5f popq %rdi 99 | 7: 73 74 jae 116 100 | 9: 61 101 | a: 72 74 jb 116 102 | c: 00 5f 65 addb %bl, 101(%rdi) 103 | f: 64 61 104 | 11: 74 61 je 97 105 | 13: 00 5f 65 addb %bl, 101(%rdi) 106 | 16: 6e outsb (%rsi), %dx 107 | 17: 64 00 108 | 109 | Disassembly of section .shstrtab: 110 | 111 | 0000000000000000 .shstrtab: 112 | 0: 00 2e addb %ch, (%rsi) 113 | 2: 73 79 jae 121 <.shstrtab+0x7d> 114 | 4: 6d insl %dx, %es:(%rdi) 115 | 5: 74 61 je 97 <.shstrtab+0x68> 116 | 7: 62 00 2e 73 117 | b: 74 72 je 114 <.shstrtab+0x7f> 118 | d: 74 61 je 97 <.shstrtab+0x70> 119 | f: 62 00 2e 73 120 | 13: 68 73 74 72 74 pushq $1953657971 121 | 18: 61 122 | 19: 62 00 2e 74 123 | 1d: 65 78 74 js 116 <.shstrtab+0x94> 124 | 20: 00 125 | -------------------------------------------------------------------------------- /misc/asmlab/mini2.s: -------------------------------------------------------------------------------- 1 | # 2 | # Minimal Linux program, using legacy interrupt 3 | # 4 | # For info on Linux syscalls, see: 5 | # https://github.com/torvalds/linux/blob/v3.13/arch/x86/syscalls/syscall_64.tbl#L69 6 | # https://github.com/torvalds/linux/blob/v3.13/arch/x86/kernel/entry_64.S#L569-L591 7 | # 8 | # Build & Run: 9 | # clang -nostdlib -O0 -o mini2.elf mini2.s && ./mini2.elf ; echo $? 10 | # 11 | # Disassemble: 12 | # llvm-objdump -D --syms --full-contents --all-headers mini2.elf > mini2.elf.dis.txt 13 | # 14 | # Dump to view exact contents: 15 | # hexdump -v -C mini2.elf > mini2.hex 16 | # 17 | .text 18 | .globl _start 19 | 20 | _start: 21 | movl $1, %eax # syscall message "exit" 22 | movl $42, %ebx # exit status 23 | int $0x80 # interrupt "syscall" 24 | -------------------------------------------------------------------------------- /misc/asmlab/start-linux-vm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | cd "$(dirname "$0")" 3 | echo "Running docker:rsms/emsdk in $PWD" 4 | docker run --rm -it -v "$PWD:/src" rsms/emsdk 5 | -------------------------------------------------------------------------------- /misc/etc/mini2.s: -------------------------------------------------------------------------------- 1 | # 2 | # Minimal Linux program 3 | # 4 | # Build & Run: 5 | # docker run --rm -it -v "$PWD:/src" rsms/emsdk \ 6 | # bash -c "clang -nostdlib -O0 -o mini2 mini2.s && ./mini2" ; echo $? 7 | # 8 | # Build in 32-bit mode: (interesting for seeing differences to 64-bit) 9 | # docker run --rm -it -v "$PWD:/src" rsms/emsdk \ 10 | # bash -c "clang -nostdlib -O0 -m32 -o mini2_32 mini2.s && ./mini2_32" ; echo $? 11 | # 12 | # Build with debugging info: 13 | # docker run --rm -it -v "$PWD:/src" rsms/emsdk \ 14 | # clang -nostdlib -O0 -g -o mini2.g mini2.s 15 | # 16 | # Disassemble to see LLVMs view on the binary: 17 | # docker run --rm -it -v "$PWD:/src" rsms/emsdk \ 18 | # llvm-objdump -D --syms --full-contents --all-headers mini2 > mini2.dis.txt 19 | # 20 | # Disassemble with objdump for annotated assembly and opcodes: 21 | # objdump -d -S mini2 22 | # objdump -d -S hello-c 23 | # 24 | # Dump to view exact contents: 25 | # hexdump -v -C mini2 > mini2.hex 26 | # 27 | # View ELF details: 28 | # readelf -a mini2 > mini2.readelf.txt 29 | # 30 | .text 31 | .globl _start 32 | _start: 33 | movl $42, %ebx # exit status 34 | movl $1, %eax # syscall message "exit" 35 | int $0x80 # interrupt "syscall" 36 | -------------------------------------------------------------------------------- /misc/filter-compdb.py: -------------------------------------------------------------------------------- 1 | import sys, json 2 | 3 | INFILE = sys.argv[1] 4 | PREFIX = sys.argv[2] # e.g "build/obj/dev/" 5 | 6 | with open(INFILE, "r") as f: 7 | compdb = json.load(f) 8 | 9 | compdb = [e for e in compdb if e["output"].startswith(PREFIX)] 10 | 11 | 12 | # json.dumps([1, 2, 3, {'4': 5, '6': 7}], separators=(',', ':')) 13 | print(json.dumps(compdb, sort_keys=True, indent=2)) 14 | -------------------------------------------------------------------------------- /misc/gen_parselet_map.py: -------------------------------------------------------------------------------- 1 | # 2 | # This script reads and updates the parselet map in src/parse.c 3 | # 4 | import re, sys, os, os.path 5 | 6 | def err(msg): 7 | print(msg) 8 | sys.exit(1) 9 | 10 | srcdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 11 | os.chdir(srcdir) 12 | 13 | sourcefilename = "src/parse/parse.c" 14 | 15 | with open(sourcefilename, "r") as f: 16 | source = f.read() 17 | 18 | # //!Parselet (TPlusPlus UNARY_POSTFIX) (TMinusMinus UNARY_POSTFIX) 19 | # //!PrefixParselet TPlus TMinus TStar TSlash 20 | parseletp = re.compile( 21 | r'\n//\s*\!Parselet\s+(?P(?:\([^\)]+\)[\s\r\n\/\/]*)+)\n\s*(?:static|)\s*Node\*\s*(?P\w+)') 22 | prefixparseletp = re.compile( 23 | r'\n//\s*\!PrefixParselet\s+([^\n]+)\n\s*(?:static|)\s*Node\*\s*(\w+)') 24 | splitspecs = re.compile(r'\)[\s\r\n\/\/]*\(') 25 | splitsep = re.compile(r'[\s,]+') 26 | parselets = dict() # keyed by token, e.g. "TPlus" 27 | 28 | for m in prefixparseletp.finditer(source): 29 | fun = m.group(2) 30 | for tok in splitsep.split(m.group(1)): 31 | struct_init = parselets.get(tok) 32 | if struct_init: 33 | err("duplicate parselet %s for token %s" % (fun, tok)) 34 | parselets[tok] = [fun, "NULL", "MEMBER"] 35 | 36 | for m in parseletp.finditer(source): 37 | md = m.groupdict() 38 | for s in splitspecs.split(md["m"]): 39 | tok, prec = splitsep.split(s.strip("()"), 1) 40 | fun = md["fun"] 41 | # print({ "tok": tok, "prec": prec, "fun": md["fun"] }) 42 | struct_init = parselets.get(tok) 43 | if not struct_init: 44 | parselets[tok] = ["NULL", fun, prec] 45 | else: 46 | if struct_init[1] != "NULL": 47 | err("duplicate parselet %s for token %s" % (fun, tok)) 48 | struct_init[1] = fun 49 | struct_init[2] = prec 50 | 51 | # const Parselet parselets[TMax] = { 52 | # [TComment] = { PLComment, NULL, PREC_LOWEST }, 53 | # }; 54 | output = [ 55 | '// automatically generated by %s; do not edit' % __file__, 56 | ] 57 | output.append("static const Parselet parselets[TMax] = {") 58 | for tok, struct_init in parselets.items(): 59 | output.append(" [%s] = {%s, %s, PREC_%s}," % (tok, *struct_init)) 60 | output.append("};") 61 | output = "\n".join(output) 62 | 63 | startstr = '//PARSELET_MAP_BEGIN\n' 64 | endstr = '\n//PARSELET_MAP_END' 65 | start = source.find(startstr) 66 | end = source.find(endstr, start) 67 | if start == -1: 68 | err("can not find %r in %s" % (startstr, sourcefilename)) 69 | if end == -1: 70 | err("can not find %r in %s" % (endstr, sourcefilename)) 71 | 72 | source2 = source[:start + len(startstr)] + output + source[end:] 73 | 74 | # write changes only if we modified the source 75 | if source2 != source: 76 | print("write", sourcefilename) 77 | with open(sourcefilename, "w") as f: 78 | f.write(source2) 79 | # write "marker" file for ninja/make 80 | with open("build/gen_parselet_map.marker", "w") as f: 81 | f.write("x") 82 | -------------------------------------------------------------------------------- /misc/test-asm-out.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | PROG=$1 3 | 4 | echo "hexdump -> $PROG.hex" 5 | hexdump -v -C "$PROG" | tee "$PROG".hex 6 | 7 | echo "./$PROG" 8 | 9 | PROGNAME=$(basename "$PROG") 10 | pushd "$(dirname "$PROG")" >/dev/null 11 | PROGDIR=$PWD 12 | popd >/dev/null 13 | 14 | docker run --rm -v "$PROGDIR:/mnt1" debian:latest "/mnt1/$PROGNAME" 15 | EXIT_STATUS=$? 16 | echo "Exit status: $EXIT_STATUS" 17 | -------------------------------------------------------------------------------- /src/build/build.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "../common/defs.h" 3 | #include "../common/memory.h" 4 | #include "source.h" 5 | 6 | // ErrorHandler callback type 7 | typedef void(ErrorHandler)(const Source*, SrcPos, ConstStr msg, void* userdata); 8 | 9 | // CCtx compilation context 10 | // 11 | // TODO: Rename to "Build" ("the build") 12 | // 13 | typedef struct { 14 | ErrorHandler* errh; 15 | void* userdata; // passed to errh 16 | Source src; 17 | Memory mem; // memory used only during compilation, like AST nodes 18 | } CCtx; 19 | 20 | // initialize and/or recycle a CCtx 21 | void CCtxInit( 22 | CCtx*, 23 | ErrorHandler* errh, 24 | void* userdata, 25 | Str srcname, 26 | const u8* srcbuf, // caller owns 27 | size_t srclen 28 | ); 29 | void CCtxFree(CCtx*); 30 | void CCtxErrorf(const CCtx* cc, SrcPos pos, const char* format, ...); 31 | -------------------------------------------------------------------------------- /src/build/buildctx.c: -------------------------------------------------------------------------------- 1 | // CCtx compilation context 2 | #include "build.h" 3 | 4 | // reset and/or initialize a compilation context 5 | void CCtxInit( 6 | CCtx* cc, 7 | ErrorHandler* errh, 8 | void* userdata, 9 | Str srcname, 10 | const u8* srcbuf, 11 | size_t srclen 12 | ) { 13 | // Disabled since srcbuf is owned by caller 14 | // if (cc->src.buf != NULL) { 15 | // free((void*)cc->src.buf); 16 | // cc->src.buf = NULL; 17 | // } 18 | if (cc->src.name != NULL) { 19 | SourceFree(&cc->src); 20 | } 21 | SourceInit(&cc->src, srcname, srcbuf, srclen); 22 | cc->mem = MemoryNew(0); 23 | cc->errh = errh; 24 | cc->userdata = userdata; 25 | } 26 | 27 | 28 | void CCtxFree(CCtx* cc) { 29 | // if (cc->src.buf != NULL) { 30 | // free((void*)cc->src.buf); 31 | // cc->src.buf = NULL; 32 | // } 33 | SourceFree(&cc->src); 34 | MemoryFree(cc->mem); 35 | } 36 | 37 | 38 | void CCtxErrorf(const CCtx* cc, SrcPos pos, const char* format, ...) { 39 | if (cc->errh == NULL) { 40 | return; 41 | } 42 | va_list ap; 43 | va_start(ap, format); 44 | auto msg = sdsempty(); 45 | if (strlen(format) > 0) { 46 | msg = sdscatvprintf(msg, format, ap); 47 | assert(sdslen(msg) > 0); // format may contain %S which is not supported by sdscatvprintf 48 | } 49 | va_end(ap); 50 | cc->errh(&cc->src, pos, msg, cc->userdata); 51 | sdsfree(msg); 52 | } 53 | -------------------------------------------------------------------------------- /src/build/source.c: -------------------------------------------------------------------------------- 1 | #include "build.h" 2 | #include "../common/tstyle.h" 3 | 4 | 5 | void SourceInit(Source* s, Str name, const u8* buf, size_t len) { 6 | s->name = sdsdup(name); 7 | s->buf = buf; 8 | s->len = len; 9 | s->_lineoffsets = NULL; 10 | s->_linecount = 0; 11 | } 12 | 13 | 14 | void SourceFree(Source* s) { 15 | sdsfree(s->name); 16 | s->name = NULL; 17 | if (s->_lineoffsets) { 18 | memfree(NULL, s->_lineoffsets); 19 | s->_lineoffsets = NULL; 20 | } 21 | } 22 | 23 | 24 | static void computeLineOffsets(Source* s) { 25 | assert(s->_lineoffsets == NULL); 26 | 27 | size_t cap = 256; // best guess for common line numbers, to allocate up-front 28 | s->_lineoffsets = (u32*)memalloc(NULL, sizeof(u32) * cap); 29 | s->_lineoffsets[0] = 0; 30 | 31 | u32 linecount = 1; 32 | u32 i = 0; 33 | while (i < s->len) { 34 | if (s->buf[i++] == '\n') { 35 | if (linecount == cap) { 36 | // more lines 37 | cap = cap * 2; 38 | s->_lineoffsets = (u32*)memrealloc(NULL, s->_lineoffsets, sizeof(u32) * cap); 39 | } 40 | s->_lineoffsets[linecount] = i; 41 | linecount++; 42 | } 43 | } 44 | 45 | s->_linecount = linecount; 46 | } 47 | 48 | 49 | LineCol SrcPosLineCol(SrcPos pos) { 50 | Source* s = pos.src; 51 | if (s == NULL) { 52 | // NoSrcPos 53 | LineCol lico = { 0, 0 }; 54 | return lico; 55 | } 56 | 57 | if (!s->_lineoffsets) { 58 | computeLineOffsets(s); 59 | } 60 | 61 | if (pos.offs >= s->len) { dlog("pos.offs=%u >= s->len=%zu", pos.offs, s->len); } 62 | assert(pos.offs < s->len); 63 | 64 | u32 count = s->_linecount; 65 | u32 line = 0; 66 | u32 debug1 = 10; 67 | while (count > 0 && debug1--) { 68 | u32 step = count / 2; 69 | u32 i = line + step; 70 | if (s->_lineoffsets[i] <= pos.offs) { 71 | line = i + 1; 72 | count = count - step - 1; 73 | } else { 74 | count = step; 75 | } 76 | } 77 | LineCol lico = { line - 1, line > 0 ? pos.offs - s->_lineoffsets[line - 1] : pos.offs }; 78 | return lico; 79 | } 80 | 81 | 82 | static const u8* lineContents(Source* s, u32 line, u32* out_len) { 83 | if (!s->_lineoffsets) { 84 | computeLineOffsets(s); 85 | } 86 | if (line >= s->_linecount) { 87 | return NULL; 88 | } 89 | auto start = s->_lineoffsets[line]; 90 | const u8* lineptr = s->buf + start; 91 | if (out_len) { 92 | if (line + 1 < s->_linecount) { 93 | *out_len = (s->_lineoffsets[line + 1] - 1) - start; 94 | } else { 95 | *out_len = (s->buf + s->len) - lineptr; 96 | } 97 | } 98 | return lineptr; 99 | } 100 | 101 | 102 | Str SrcPosFmt(Str s, SrcPos pos) { 103 | auto l = SrcPosLineCol(pos); 104 | return sdscatfmt(s, "%s:%u:%u", 105 | pos.src ? pos.src->name : sdsnew(""), l.line + 1, l.col + 1); 106 | } 107 | 108 | 109 | Str SrcPosMsg(Str s, SrcPos pos, ConstStr message) { 110 | auto l = SrcPosLineCol(pos); 111 | s = sdscatfmt(s, "%s%s:%u:%u: %S%s\n", 112 | TStyleTable[TStyle_bold], 113 | pos.src ? pos.src->name : sdsnew(""), l.line + 1, l.col + 1, 114 | message, 115 | TStyle_none 116 | ); 117 | s = TStyleNone(s); 118 | 119 | // include line contents 120 | if (pos.src) { 121 | u32 linelen; 122 | auto lineptr = lineContents(pos.src, l.line, &linelen); 123 | if (lineptr != null) { 124 | s = sdscatlen(s, lineptr, linelen); 125 | } 126 | s = sdscatlen(s, "\n", 1); 127 | 128 | // draw a squiggle (or caret when span is unknown) decorating the interesting range 129 | if (l.col > 0) { 130 | // indentation 131 | s = sdsgrow(s, sdslen(s) + l.col, ' '); 132 | } 133 | if (pos.span > 0) { 134 | s = sdsgrow(s, sdslen(s) + pos.span + 1, '~'); 135 | s[sdslen(s)-1] = '\n'; 136 | } else { 137 | s = sdscatlen(s, "^\n", 2); 138 | } 139 | } 140 | 141 | return s; 142 | } 143 | 144 | -------------------------------------------------------------------------------- /src/build/source.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "../common/defs.h" 3 | #include "../common/str.h" 4 | 5 | // Source 6 | typedef struct { 7 | Str name; 8 | const u8* buf; // owned by caller 9 | size_t len; // length of buf 10 | u32* _lineoffsets; 11 | u32 _linecount; 12 | } Source; 13 | 14 | // SrcPos 15 | // TODO: considering implementing something like lico and Pos/XPos from go 16 | // https://golang.org/src/cmd/internal/src/pos.go 17 | // https://golang.org/src/cmd/internal/src/xpos.go 18 | typedef struct { 19 | Source* src; // source 20 | u32 offs; // offset into src->buf 21 | u32 span; // span length. 0 = unknown or does no apply. 22 | } SrcPos; 23 | 24 | // NoSrcPos is the "null" of SrcPos 25 | #define NoSrcPos (({ SrcPos p = {NULL,0,0}; p; })) 26 | 27 | // LineCol 28 | typedef struct { u32 line; u32 col; } LineCol; 29 | 30 | void SourceInit(Source*, Str name, const u8* buf, size_t len); 31 | void SourceFree(Source*); 32 | Str SrcPosMsg(Str s, SrcPos, ConstStr message); 33 | Str SrcPosFmt(Str s, SrcPos pos); // "::" 34 | LineCol SrcPosLineCol(SrcPos); 35 | -------------------------------------------------------------------------------- /src/common/array.c: -------------------------------------------------------------------------------- 1 | #include "array.h" 2 | #include // for qsort_r 3 | 4 | // ARRAY_CAP_STEP defines a power-of-two which the cap must be aligned to. 5 | // This is used to round up growth. I.e. grow by 60 with a cap of 32 would increase the cap 6 | // to 96 (= 32 + (align2(60, ARRAY_CAP_STEP=32) = 64)). 7 | #define ARRAY_CAP_STEP 32 8 | 9 | typedef struct SortCtx { 10 | ArraySortFun* f; 11 | void* userdata; 12 | } SortCtx; 13 | 14 | 15 | static int _sort(void* ctx, const void* s1p, const void* s2p) { 16 | return ((SortCtx*)ctx)->f( 17 | *((const void**)s1p), 18 | *((const void**)s2p), 19 | ((SortCtx*)ctx)->userdata 20 | ); 21 | } 22 | 23 | void ArraySort(Array* a, ArraySortFun* f, void* userdata) { 24 | SortCtx ctx = { f, userdata }; 25 | qsort_r(a->v, a->len, sizeof(void*), &ctx, &_sort); 26 | } 27 | 28 | 29 | void ArrayGrow(Array* a, size_t addl, Memory mem) { 30 | u32 reqcap = a->cap + addl; 31 | u32 cap = align2(reqcap, ARRAY_CAP_STEP); 32 | if (a->onheap || a->v == NULL) { 33 | a->v = memrealloc(mem, a->v, sizeof(void*) * cap); 34 | } else { 35 | // moving array from stack to heap 36 | void** v = (void**)memalloc(mem, sizeof(void*) * cap); 37 | memcpy(v, a->v, sizeof(void*) * a->len); 38 | a->v = v; 39 | a->onheap = true; 40 | } 41 | a->cap = cap; 42 | } 43 | 44 | int ArrayIndexOf(Array* nonull a, void* nullable entry) { 45 | for (u32 i = 0; i < a->len; i++) { 46 | if (a->v[i] == entry) { 47 | return (int)i; 48 | } 49 | } 50 | return -1; 51 | } 52 | 53 | void ArrayRemove(Array* a, u32 start, u32 count) { 54 | assert(start + count <= a->len); 55 | // ArrayRemove( [0 1 2 3 4 5 6 7] start=2 count=3 ) => [0 1 5 6 7] 56 | // 57 | for (u32 i = start + count; i < a->len; i++) { 58 | a->v[i - count] = a->v[i]; 59 | } 60 | // [0 1 2 3 4 5 6 7] a->v[5-3] = a->v[5] => [0 1 5 3 4 5 6 7] 61 | // ^ i 62 | // 63 | // [0 1 2 3 4 5 6 7] a->v[6-3] = a->v[6] => [0 1 5 6 4 5 6 7] 64 | // ^ i 65 | // 66 | // [0 1 2 3 4 5 6 7] a->v[7-3] = a->v[7] => [0 1 5 6 7 5 6 7] 67 | // ^ i 68 | // 69 | // len -= count => [0 1 5 6 7] 70 | a->len -= count; 71 | } 72 | 73 | 74 | // ArrayCopy copies src of srclen to a, starting at a.v[start], growing a if needed using m. 75 | void ArrayCopy(Array* nonull a, u32 start, const void* src, u32 srclen, Memory nullable mem) { 76 | u32 capNeeded = start + srclen; 77 | if (capNeeded > a->cap) { 78 | if (a->v == NULL) { 79 | // initial allocation to exactly the size needed 80 | a->v = (void*)memalloc(mem, sizeof(void*) * capNeeded); 81 | a->cap = capNeeded; 82 | a->onheap = true; 83 | } else { 84 | ArrayGrow(a, capNeeded - a->cap, mem); 85 | } 86 | } 87 | memcpy(&a->v[start], src, srclen * sizeof(void*)); 88 | a->len = max(a->len, start + srclen); 89 | } 90 | -------------------------------------------------------------------------------- /src/common/array.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "defs.h" 3 | #include "memory.h" 4 | 5 | // very simple array type 6 | typedef struct { 7 | void** v; 8 | u32 cap; 9 | u32 len; 10 | bool onheap; // false if v is space on stack 11 | } Array; 12 | 13 | #define Array_INIT { NULL, 0, 0, true } 14 | 15 | static void ArrayInit(Array* nonull a); 16 | static void ArrayInitWithStorage(Array* nonull a, void* nonull storage, u32 storagecap); 17 | static void ArrayFree(Array* nonull a, Memory nullable mem); 18 | void ArrayGrow(Array* nonull a, size_t addl, nullable Memory mem); // cap=align2(len+addl) 19 | static void ArrayPush(Array* nonull a, void* nullable v, Memory nullable mem); 20 | static void* ArrayPop(Array* nonull a); 21 | void ArrayRemove(Array* nonull a, u32 start, u32 count); 22 | int ArrayIndexOf(Array* nonull a, void* nullable entry); // -1 on failure 23 | 24 | // ArrayCopy copies src of srclen to a, starting at a.v[start], growing a if needed using m. 25 | void ArrayCopy(Array* nonull a, u32 start, const void* src, u32 srclen, Memory nullable m); 26 | 27 | // The comparison function must return an integer less than, equal to, or greater than zero if 28 | // the first argument is considered to be respectively less than, equal to, or greater than the 29 | // second. 30 | typedef int (ArraySortFun)(const void* elem1, const void* elem2, void* userdata); 31 | 32 | // ArraySort sorts the array in place using comparator to rank entries 33 | void ArraySort(Array* a, ArraySortFun* comparator, void* userdata); 34 | 35 | // Macros: 36 | // ArrayForEach(Array* nonull a, TYPE elemtype, NAME elemname) 37 | // 38 | 39 | // ------------------------------------------------------------------------------------------------ 40 | // inline implementations 41 | 42 | inline static void ArrayInit(Array* nonull a) { 43 | a->v = 0; 44 | a->cap = 0; 45 | a->len = 0; 46 | a->onheap = true; 47 | } 48 | 49 | inline static void ArrayInitWithStorage(Array* nonull a, void* nonull ptr, u32 cap){ 50 | a->v = ptr; 51 | a->cap = cap; 52 | a->len = 0; 53 | a->onheap = false; 54 | } 55 | 56 | inline static void ArrayFree(Array* a, Memory mem) { 57 | if (a->onheap) { 58 | memfree(mem, a->v); 59 | 60 | #if DEBUG 61 | a->v = NULL; 62 | a->cap = 0; 63 | #endif 64 | } 65 | } 66 | 67 | inline static void ArrayPush(Array* a, void* v, Memory mem) { 68 | if (a->len == a->cap) { 69 | ArrayGrow(a, 1, mem); 70 | } 71 | a->v[a->len++] = v; 72 | } 73 | 74 | inline static void* ArrayPop(Array* a) { 75 | return a->len > 0 ? a->v[--a->len] : NULL; 76 | } 77 | 78 | #define ArrayForEach(a, ELEMTYPE, LOCALNAME) \ 79 | /* this for introduces LOCALNAME */ \ 80 | for (auto LOCALNAME = (ELEMTYPE*)(a)->v[0]; \ 81 | LOCALNAME == (ELEMTYPE*)(a)->v[0]; \ 82 | LOCALNAME++) \ 83 | /* actual for loop */ \ 84 | for ( \ 85 | u32 LOCALNAME##__i = 0, \ 86 | LOCALNAME##__end = (a)->len; \ 87 | LOCALNAME = (ELEMTYPE*)(a)->v[LOCALNAME##__i], \ 88 | LOCALNAME##__i < LOCALNAME##__end; \ 89 | LOCALNAME##__i++ \ 90 | ) /* */ 91 | 92 | 93 | // static void ArrayInit(Array* a) { 94 | // a->v = NULL; 95 | // a->cap = a->len = 0; 96 | // a->onheap = true; 97 | // } 98 | 99 | // Better to use Array_INIT 100 | // Array_STACK_INIT(u32 capacity) => Array 101 | // #define Array_STACK_INIT(capacity) (({ \ 102 | // void* __ArrayStackStorage__##__LINE__[capacity]; \ 103 | // Array a = { __ArrayStackStorage__##__LINE__, 0, (capacity), false }; \ 104 | // a; \ 105 | // })) 106 | 107 | 108 | // #define ArrayForEach1(a, ELEMTYPE, ELEMNAME, body) \ 109 | // do { for (u32 __i_ArrayForEach = 0; __i_ArrayForEach < (a)->len; __i_ArrayForEach++) { \ 110 | // ELEMTYPE* ELEMNAME = (ELEMTYPE*)(a)->v[__i_ArrayForEach]; \ 111 | // { body } \ 112 | // } } while(0) 113 | -------------------------------------------------------------------------------- /src/common/array_test.c: -------------------------------------------------------------------------------- 1 | #include "test.h" 2 | #include "array.h" 3 | 4 | #define ARRAY_CAP_STEP 32 /* copied from array.c */ 5 | 6 | W_UNIT_TEST(Array, { 7 | 8 | { // starts empty and immediately becomes fully heap allocated 9 | Array a = Array_INIT; 10 | ArrayPush(&a, (void*)1, NULL); // visits ArrayGrow's "onheap" branch 11 | ArrayPush(&a, (void*)2, NULL); 12 | ArrayPush(&a, (void*)3, NULL); 13 | 14 | asserteq(a.len, 3); 15 | asserteq(a.cap, ARRAY_CAP_STEP); 16 | asserteq((int)a.v[0], 1); 17 | asserteq((int)a.v[1], 2); 18 | asserteq((int)a.v[2], 3); 19 | 20 | asserteq(ArrayIndexOf(&a, (void*)2), 1); 21 | asserteq(ArrayIndexOf(&a, (void*)4), -1); 22 | 23 | asserteq((int)ArrayPop(&a), 3); 24 | asserteq((int)ArrayPop(&a), 2); 25 | asserteq((int)ArrayPop(&a), 1); 26 | 27 | asserteq(a.len, 0); 28 | asserteq(a.cap, ARRAY_CAP_STEP); 29 | ArrayFree(&a, NULL); 30 | } 31 | 32 | { // initially stack allocated, then moves to heap 33 | Array a; void* storage[2]; 34 | ArrayInitWithStorage(&a, storage, 2); 35 | asserteq(a.onheap, false); 36 | ArrayPush(&a, (void*)1, NULL); 37 | asserteq(a.onheap, false); 38 | ArrayPush(&a, (void*)2, NULL); 39 | asserteq(a.onheap, false); 40 | ArrayPush(&a, (void*)3, NULL); // visits ArrayGrow's "move stack to heap" branch 41 | asserteq(a.onheap, true); // should have moved to heap 42 | 43 | asserteq(a.len, 3); 44 | asserteq(a.cap, ARRAY_CAP_STEP); 45 | asserteq((int)a.v[0], 1); 46 | asserteq((int)a.v[1], 2); 47 | asserteq((int)a.v[2], 3); 48 | asserteq((int)ArrayPop(&a), 3); 49 | asserteq((int)ArrayPop(&a), 2); 50 | asserteq((int)ArrayPop(&a), 1); 51 | asserteq(a.len, 0); 52 | asserteq(a.cap, ARRAY_CAP_STEP); 53 | ArrayFree(&a, NULL); 54 | } 55 | 56 | { // ArrayCopy 57 | Array a = Array_INIT; 58 | for (intptr_t i = 0; i < 10; i++) { 59 | ArrayPush(&a, (void*)i, NULL); 60 | } 61 | // copy to an empty array. Causes initial, exact allocation 62 | Array a2 = Array_INIT; 63 | ArrayCopy(&a2, 0, a.v, a.len, NULL); 64 | asserteq(a2.len, 10); 65 | asserteq(a2.cap, 10); // should be exact after copy into empty array, not ARRAY_CAP_STEP 66 | ArrayPush(&a2, (void*)10, NULL); 67 | asserteq(a2.cap, align2(11, ARRAY_CAP_STEP)); // should have grown 68 | 69 | // copy to a non-empty array. Causes growth 70 | u32 nitems = (a2.cap - a2.len) + 1; 71 | auto items = (void**)memalloc(NULL, nitems * sizeof(void*)); 72 | auto len1 = a2.len; 73 | ArrayCopy(&a2, len1, items, nitems, NULL); 74 | asserteq(a2.len, len1 + nitems); 75 | memfree(NULL, items); 76 | 77 | ArrayFree(&a2, NULL); 78 | ArrayFree(&a, NULL); 79 | } 80 | 81 | { // ArrayRemove 82 | Array a = Array_INIT; 83 | // a.v = [0 1 2 3 4 5 6 7 8 9] 84 | for (intptr_t i = 0; i < 10; i++) { 85 | ArrayPush(&a, (void*)i, NULL); 86 | } 87 | for (intptr_t i = 0; i < 10; i++) { 88 | asserteq(a.v[i], (void*)i); 89 | } 90 | asserteq(a.len, 10); 91 | 92 | // delete in middle 93 | // [0 1 2 3 4 5 6 7 8 9] => [0 1 6 7 8 9] 94 | // ~~~~~~~ 95 | Array a2 = Array_INIT; 96 | ArrayCopy(&a2, 0, a.v, a.len, NULL); 97 | asserteq(a2.len, 10); 98 | ArrayRemove(&a2, 2, 4); 99 | asserteq(a2.len, 6); 100 | asserteq(a2.v[0], (void*)0); 101 | asserteq(a2.v[1], (void*)1); 102 | asserteq(a2.v[2], (void*)6); 103 | asserteq(a2.v[3], (void*)7); 104 | asserteq(a2.v[4], (void*)8); 105 | asserteq(a2.v[5], (void*)9); 106 | 107 | // delete at beginning 108 | // [0 1 2 3 4 5 6 7 8 9] => [4 5 6 7 8 9] 109 | // ~~~~~~~ 110 | a2.len = 0; 111 | ArrayCopy(&a2, 0, a.v, a.len, NULL); 112 | asserteq(a2.len, 10); 113 | ArrayRemove(&a2, 0, 4); 114 | asserteq(a2.len, 6); 115 | asserteq(a2.v[0], (void*)4); 116 | asserteq(a2.v[1], (void*)5); 117 | asserteq(a2.v[2], (void*)6); 118 | asserteq(a2.v[3], (void*)7); 119 | asserteq(a2.v[4], (void*)8); 120 | asserteq(a2.v[5], (void*)9); 121 | 122 | // delete at end 123 | // [0 1 2 3 4 5 6 7 8 9] => [0 1 2 3 4 5] 124 | // ~~~~~~~ 125 | a2.len = 0; 126 | ArrayCopy(&a2, 0, a.v, a.len, NULL); 127 | asserteq(a2.len, 10); 128 | ArrayRemove(&a2, 6, 4); 129 | asserteq(a2.len, 6); 130 | asserteq(a2.v[0], (void*)0); 131 | asserteq(a2.v[1], (void*)1); 132 | asserteq(a2.v[2], (void*)2); 133 | asserteq(a2.v[3], (void*)3); 134 | asserteq(a2.v[4], (void*)4); 135 | asserteq(a2.v[5], (void*)5); 136 | 137 | ArrayFree(&a2, NULL); 138 | ArrayFree(&a, NULL); 139 | } 140 | 141 | }) 142 | -------------------------------------------------------------------------------- /src/common/assert.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "assert.h" 4 | #include "os.h" 5 | #include "tstyle.h" 6 | 7 | #include "test.h" 8 | 9 | 10 | static bool fprintSourceFile( 11 | FILE* nonull fp, 12 | const char* nonull file, 13 | u32 line, 14 | u32 contextLines, 15 | bool colors 16 | ) { 17 | // try to read source file 18 | size_t srclen = 1024*1024; // read limit 19 | auto srcbuf = os_readfile(file, &srclen, NULL); 20 | if (srcbuf == NULL) { 21 | return false; 22 | } 23 | int len = (int)srclen; 24 | int lineno = 1; 25 | int linemin = max(0, line - contextLines); 26 | int linemax = line + contextLines; 27 | int start = -1; 28 | int end = -1; 29 | int linestart = 0; 30 | bool tail = true; 31 | 32 | for (int i = 0; i < len; i++) { 33 | if (srcbuf[i] == '\n') { 34 | if (lineno == linemin) { 35 | start = linestart; 36 | } 37 | if (lineno == line) { 38 | fprintf(fp, "%s%-4d >%s %.*s\n", 39 | colors ? TStyleTable[TStyle_inverse] : "", 40 | lineno, 41 | colors ? TStyle_none : "", 42 | i - linestart, 43 | &srcbuf[linestart] 44 | ); 45 | } else if (linemin <= lineno && lineno <= linemax) { 46 | fprintf(fp, "%-4d %.*s\n", lineno, i - linestart, &srcbuf[linestart]); 47 | } 48 | if (lineno == linemax) { 49 | end = i; 50 | tail = false; 51 | break; 52 | } 53 | lineno++; 54 | linestart = i + 1; 55 | } 56 | } 57 | if (tail) { // no linebreak at end of file 58 | fprintf(fp, "% 4d %.*s\n", lineno, len - linestart, &srcbuf[linestart]); 59 | } 60 | return true; 61 | } 62 | 63 | 64 | static void fprintStackTrace(FILE* nonull fp, int offsetFrames) { 65 | // try to show stack trace 66 | void* callstack[200]; 67 | int framecount = backtrace(callstack, countof(callstack)); 68 | if (framecount > 0) { 69 | char** strs = backtrace_symbols(callstack, framecount); 70 | if (strs != NULL) { 71 | fprintf(fp, "Call stack:\n"); 72 | for (int i = offsetFrames + 1; i < framecount; ++i) { 73 | fprintf(fp, " %s\n", strs[i]); 74 | } 75 | free(strs); 76 | } 77 | } 78 | } 79 | 80 | 81 | void WAssertf(const char* srcfile, int srcline, const char* nonull format, ...) { 82 | bool colors = TSTyleStderrIsTTY(); 83 | va_list ap; 84 | va_start(ap, format); 85 | vfprintf(stderr, format, ap); 86 | va_end(ap); 87 | fputc('\n', stderr); 88 | if (srcfile != NULL) { 89 | fprintSourceFile(stderr, srcfile, srcline, /* contextLines */ 3, colors); 90 | } 91 | fprintStackTrace(stderr, /* offsetFrames = */ 1); 92 | } 93 | 94 | 95 | const char* _assert_joinstr(const char* s1, ... /* NULL terminated */) { 96 | static char buf[256] = {0}; 97 | char* p = buf; 98 | 99 | size_t len = strlen(s1); 100 | memcpy(p, s1, len); 101 | p += len; 102 | 103 | va_list ap; 104 | va_start(ap, s1); 105 | while (1) { 106 | const char* s = va_arg(ap, const char*); 107 | if (s == NULL) { 108 | break; 109 | } 110 | len = strlen(s); 111 | memcpy(p, s, len); 112 | p += len; 113 | *p = '\0'; 114 | } 115 | va_end(ap); 116 | assertf((buf + sizeof(buf)) > p, "overflow"); 117 | *p = '\0'; 118 | return buf; 119 | } 120 | 121 | 122 | // Note: Since this prints to stderr, only enable this in the "test" product 123 | #ifdef W_TEST_BUILD 124 | W_UNIT_TEST(Assert, { 125 | const char* pch = _assert_joinstr("aa", "bb", "cc", NULL); 126 | assert(memcmp(pch, "aabbcc", 6) == 0); 127 | asserteq(pch[6], 0); 128 | 129 | // non-existant file 130 | fprintf(stdout, "----- START TEST OUTPUT -----\n"); 131 | fprintSourceFile(stdout, __FILE__ ".xxx", 1, /* contextLines */ 3, /*colors*/ true); 132 | // no colors, no linebreak at end 133 | fprintSourceFile(stdout, "test/file-no-final-line-break", 134 | 2, /* contextLines */ 3, /*colors*/ false); 135 | 136 | fprintf(stdout, "----- THE BELOW ASSERTION IS EXPECTED TO FAIL -----\n"); 137 | WAssertf(__FILE__, __LINE__, "%s:%d: test %d", __FILE__, __LINE__, 123); 138 | fprintf(stdout, "----- THE ABOVE ASSERTION IS EXPECTED TO FAIL -----\n"); 139 | fprintf(stdout, "----- END TEST OUTPUT -----\n"); 140 | }) 141 | #endif 142 | -------------------------------------------------------------------------------- /src/common/assert.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "defs.h" 3 | #include 4 | // 5 | // assertion testing 6 | // 7 | // assert((bool)cond) 8 | // prints error and calls abort() if cond is false 9 | // debug builds: active 10 | // opt build: eliminated 11 | // 12 | // assertf((bool)cond, const char* format, ...) 13 | // prints format with arguments and calls abort() if cond is false. 14 | // debug builds: active 15 | // opt build: eliminated 16 | // 17 | // checknull(T expr) -> T 18 | // evaluates expr and if the result is null, calls assert(result). Returns result. 19 | // debug builds: active 20 | // opt build: pass-through 21 | // 22 | 23 | // WAssertf prints message with format, including a stack trace if available. 24 | // If srcfile != NULL, attempts to print source code around srcline. 25 | void WAssertf(const char* nullable srcfile, int srcline, const char* nonull format, ...); 26 | 27 | #ifdef assert 28 | #undef assert 29 | #endif 30 | 31 | #ifdef DEBUG 32 | #define assertf(cond, format, ...) \ 33 | ({ if (!(cond)) { \ 34 | WAssertf(__FILE__, __LINE__, "%s:%d: " format, __FILE__, __LINE__, ##__VA_ARGS__); \ 35 | abort(); \ 36 | } }) 37 | 38 | #define assert(cond) \ 39 | ({ if (!(cond)) { \ 40 | WAssertf(__FILE__, __LINE__, "%s:%d: %s", __FILE__, __LINE__, #cond); \ 41 | abort(); \ 42 | } }) 43 | 44 | const char* _assert_joinstr(const char* s1, ... /* NULL terminated */); 45 | 46 | #define asserteq(expr, expect) \ 47 | ({ auto actual = (expr); \ 48 | auto expected = (expect); \ 49 | if (actual != expected) { \ 50 | WAssertf(__FILE__, __LINE__, \ 51 | _assert_joinstr("%s:%d: %s ; got ", WFormatForValue(actual), \ 52 | ", expected ", WFormatForValue(expected), NULL), \ 53 | __FILE__, __LINE__, #expr, actual, expected); \ 54 | abort(); \ 55 | } \ 56 | }) 57 | 58 | #define checknull(expr) \ 59 | ({ auto v = (expr); assert(v != NULL); v; }) 60 | 61 | #else 62 | #define assertf(cond, format, ...) do{}while(0) 63 | #define assert(cond, ...) do{}while(0) 64 | #define asserteq(expr, expect) do{}while(0) 65 | #define checknull(expr) expr 66 | #endif 67 | 68 | -------------------------------------------------------------------------------- /src/common/buf.c: -------------------------------------------------------------------------------- 1 | #include "defs.h" 2 | #include "memory.h" 3 | #include "buf.h" 4 | 5 | // Do not allocate more than this much extra memory in a call to _BufMakeRoomFor 6 | #define BUF_MAX_PREALLOC (1024*1024) 7 | 8 | void BufInit(Buf* b, Memory mem, size_t cap) { 9 | b->mem = mem; 10 | if (cap > 0) { 11 | b->ptr = (u8*)memalloc(mem, cap); 12 | } else { 13 | b->ptr = NULL; 14 | } 15 | b->cap = cap; 16 | b->len = 0; 17 | } 18 | 19 | void BufFree(Buf* b) { 20 | if (b->ptr != NULL) { 21 | memfree(b->mem, b->ptr); 22 | } 23 | #if DEBUG 24 | memset(b, 0, sizeof(Buf)); 25 | #endif 26 | } 27 | 28 | void _BufMakeRoomFor(Buf* b, size_t size) { 29 | size_t cap = align2(b->len + size, 32); 30 | // Anticipate growing more; allocate some extra space beyond what is needed: 31 | if (cap < BUF_MAX_PREALLOC) { 32 | cap *= 2; 33 | } else { 34 | // Reached the limit of preallocation size. 35 | // Instead of doubling the allocating, add on a constant. 36 | cap += BUF_MAX_PREALLOC; 37 | } 38 | b->ptr = memrealloc(b->mem, b->ptr, cap); 39 | b->cap = cap; 40 | } 41 | 42 | // Adds a string to the string table. Returns the strtab offset. 43 | void BufAppend(Buf* b, const void* ptr, size_t size) { 44 | BufMakeRoomFor(b, size); 45 | memcpy(&b->ptr[b->len], ptr, size); 46 | b->len += size; 47 | } 48 | 49 | u8* BufAlloc(Buf* b, size_t size) { 50 | BufMakeRoomFor(b, size); 51 | u8* ptr = &b->ptr[b->len]; 52 | b->len += size; 53 | return ptr; 54 | } 55 | 56 | u8* BufAllocz(Buf* b, size_t size) { 57 | BufMakeRoomFor(b, size); 58 | u8* ptr = &b->ptr[b->len]; 59 | memset(&b->ptr[b->len], 0, size); 60 | b->len += size; 61 | return ptr; 62 | } 63 | 64 | void BufAppendFill(Buf* b, u8 v, size_t size) { 65 | BufMakeRoomFor(b, size); 66 | memset(&b->ptr[b->len], v, size); 67 | b->len += size; 68 | } -------------------------------------------------------------------------------- /src/common/buf.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | typedef struct Buf { 4 | Memory mem; 5 | u8* ptr; 6 | size_t cap; 7 | size_t len; 8 | } Buf; 9 | 10 | void BufInit(Buf* nonull b, Memory nullable mem, size_t cap); 11 | void BufFree(Buf* nonull b); 12 | static void BufMakeRoomFor(Buf* nonull b, size_t size); // ensures free space for at least size 13 | void BufAppend(Buf* nonull b, const void* nonull ptr, size_t size); 14 | void BufAppendFill(Buf* nonull b, u8 v, size_t size); // append size bytes of value v 15 | static void BufAppendc(Buf* b, char c); // append one byte 16 | u8* BufAlloc(Buf* nonull b, size_t size); // like BufAppend but leaves allocated data untouched. 17 | u8* BufAllocz(Buf* nonull b, size_t size); // zeroes segment 18 | 19 | void _BufMakeRoomFor(Buf* b, size_t size); 20 | 21 | inline static void BufAppendc(Buf* b, char c) { 22 | if (b->cap <= b->len) { _BufMakeRoomFor(b, 1); } 23 | b->ptr[b->len++] = (u8)c; 24 | } 25 | 26 | inline static void BufMakeRoomFor(Buf* b, size_t size) { 27 | if (b->cap - b->len < size) { 28 | _BufMakeRoomFor(b, size); 29 | } 30 | } 31 | 32 | 33 | // DefArrayBuffer allows defining a type and a set of porcelain functions around Buf 34 | // to make it act as an array holding elements of any type (ElemT). 35 | // 36 | // Prototypes: 37 | // 38 | // void ArrayT##Init(ArrayT* nonull a, Memory nullable mem, size_t cap); 39 | // void ArrayT##Free(ArrayT* nonull a); 40 | // ElemT* ArrayT##At(ArrayT* nonull a, size_t index); 41 | // void ArrayT##Push(ArrayT* nonull a, ElemT v); 42 | // ElemT ArrayT##Pop(ArrayT* nonull a); 43 | // ElemT* ArrayT##Alloc(ArrayT* nonull a, size_t count); 44 | // void ArrayT##MakeRoomFor(ArrayT* nonull a, size_t count); 45 | // 46 | #define DefArrayBuffer(ArrayT, ElemT) \ 47 | typedef Buf ArrayT; \ 48 | inline static void ArrayT##Init(ArrayT* nonull a, Memory nullable mem, size_t cap) { \ 49 | BufInit(a, mem, cap * sizeof(ElemT)); \ 50 | } \ 51 | inline static void ArrayT##Free(ArrayT* nonull a) { BufFree(a); } \ 52 | inline static ElemT* ArrayT##At(ArrayT* nonull a, size_t i) { \ 53 | return (ElemT*)&a->ptr[i * sizeof(ElemT)]; \ 54 | } \ 55 | inline static void ArrayT##Push(ArrayT* nonull a, ElemT v) { BufAppend(a, &v, sizeof(ElemT)); }\ 56 | inline static ElemT ArrayT##Pop(ArrayT* nonull a) { \ 57 | a->len -= sizeof(ElemT); \ 58 | return *(ElemT*)&a->ptr[a->len]; \ 59 | } \ 60 | inline static ElemT* ArrayT##Alloc(ArrayT* nonull a, size_t count) { \ 61 | return (ElemT*)BufAlloc(a, count * sizeof(ElemT)); \ 62 | } \ 63 | inline static void ArrayT##MakeRoomFor(ArrayT* nonull a, size_t count) { \ 64 | BufMakeRoomFor(a, count * sizeof(ElemT)); \ 65 | } \ 66 | /* DefArrayBuffer */ 67 | 68 | 69 | #define ArrayBufferForEach(b, ELEMTYPE, LOCALNAME) \ 70 | /* this "for" introduces LOCALNAME */ \ 71 | for (auto LOCALNAME = (ELEMTYPE*)&((b)->ptr[0]), \ 72 | LOCALNAME##__guard = (ELEMTYPE*)NULL; \ 73 | LOCALNAME##__guard == NULL; \ 74 | LOCALNAME##__guard++) \ 75 | /* actual for loop */ \ 76 | for ( \ 77 | u32 LOCALNAME##__i = 0, \ 78 | LOCALNAME##__end = (b)->len; \ 79 | LOCALNAME = (ELEMTYPE*)&((b)->ptr[LOCALNAME##__i]), \ 80 | LOCALNAME##__i < LOCALNAME##__end; \ 81 | LOCALNAME##__i += sizeof(ELEMTYPE) \ 82 | ) /* */ 83 | 84 | -------------------------------------------------------------------------------- /src/common/defs.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | // target endianess 12 | #if !defined(W_BYTE_ORDER_LE) && !defined(W_BYTE_ORDER_BE) 13 | #if (defined(__BIG_ENDIAN__) && !defined(__LITTLE_ENDIAN__)) || \ 14 | (defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN)) || \ 15 | defined(__ARMEB__) || defined(__THUMBEB__) || defined(__AARCH64EB__) || \ 16 | defined(_MIPSEB) || defined(__MIPSEB) || defined(__MIPSEB__) 17 | #define W_BYTE_ORDER_BE 1 18 | #elif (defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)) || \ 19 | (defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)) || \ 20 | defined(__ARMEL__) || defined(__THUMBEL__) || defined(__AARCH64EL__) || \ 21 | defined(_MIPSEL) || defined(__MIPSEL) || defined(__MIPSEL__) 22 | #define W_BYTE_ORDER_LE 1 23 | #else 24 | #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ 25 | defined(__x86_64__) || defined(__x86_64) || \ 26 | defined(__arm__) || defined(__arm) || defined(__ARM__) || \ 27 | defined(__ARM) || defined(__arm64__) 28 | #define W_BYTE_ORDER_LE 1 29 | #else 30 | #error "can't infer endianess. Define W_BYTE_ORDER_LE or W_BYTE_ORDER_BE manually." 31 | #endif 32 | #endif 33 | #endif 34 | 35 | typedef _Bool bool; 36 | typedef signed char i8; 37 | typedef unsigned char u8; 38 | typedef signed short int i16; 39 | typedef unsigned short int u16; 40 | typedef signed int i32; 41 | typedef unsigned int u32; 42 | typedef signed long long int i64; 43 | typedef unsigned long long int u64; 44 | typedef float f32; 45 | typedef double f64; 46 | 47 | #ifndef true 48 | #define true ((bool)(1)) 49 | #define false ((bool)(0)) 50 | #endif 51 | 52 | #ifndef null 53 | #define null NULL 54 | #endif 55 | 56 | #define nonull _Nonnull /* note: nonull conflicts with attribute name */ 57 | #define nullable _Nullable 58 | 59 | #ifndef W_ASSUME_NONNULL_BEGIN 60 | #define W_ASSUME_NONNULL_BEGIN _Pragma("clang assume_nonnull begin") 61 | #endif 62 | #ifndef W_ASSUME_NONNULL_END 63 | #define W_ASSUME_NONNULL_END _Pragma("clang assume_nonnull end") 64 | #endif 65 | 66 | #define auto __auto_type 67 | 68 | #if __has_c_attribute(returns_nonnull) 69 | #define nonull_return __attribute__((returns_nonnull)) 70 | #else 71 | #define nonull_return 72 | #endif 73 | 74 | #if __has_c_attribute(fallthrough) 75 | #define FALLTHROUGH [[fallthrough]] 76 | #else 77 | #define FALLTHROUGH 78 | #endif 79 | 80 | #ifdef DEBUG 81 | // so that we can simply do: "#if DEBUG" 82 | #undef DEBUG 83 | #define DEBUG 1 84 | #else 85 | #define DEBUG 0 86 | #endif 87 | 88 | // WFormatForValue returns a printf formatting pattern for the type of x 89 | #define WFormatForValue(x) _Generic((x), \ 90 | unsigned long long: "%llu", \ 91 | unsigned long: "%lu", \ 92 | unsigned int: "%u", \ 93 | long long: "%lld", \ 94 | long: "%ld", \ 95 | int: "%d", \ 96 | char: "%c", \ 97 | unsigned char: "%C", \ 98 | const char*: "%s", \ 99 | char*: "%s", \ 100 | void*: "%p", \ 101 | const void*: "%p", \ 102 | default: "%p" \ 103 | ) 104 | 105 | #include "assert.h" 106 | 107 | #if DEBUG 108 | #include 109 | #define dlog(format, ...) \ 110 | fprintf(stdout, "D " format "\t(%s:%d)\n", ##__VA_ARGS__, __FILE__, __LINE__) 111 | #define logerr(format, ...) \ 112 | fprintf(stderr, format " (%s:%d)\n", ##__VA_ARGS__, __FILE__, __LINE__) 113 | #else 114 | #define dlog(...) do{}while(0) 115 | #define logerr(format, ...) \ 116 | fprintf(stderr, format "\n", ##__VA_ARGS__) 117 | #endif 118 | 119 | #define max(a,b) \ 120 | ({__typeof__ (a) _a = (a); \ 121 | __typeof__ (b) _b = (b); \ 122 | _a > _b ? _a : _b; }) 123 | 124 | #define min(a,b) \ 125 | ({__typeof__ (a) _a = (a); \ 126 | __typeof__ (b) _b = (b); \ 127 | _a < _b ? _a : _b; }) 128 | 129 | #ifndef offsetof 130 | #define offsetof(st, m) ((size_t)&(((st*)0)->m)) 131 | #endif 132 | 133 | #define countof(x) \ 134 | ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x]))))) 135 | 136 | // popcount 137 | #define popcount(x) _Generic((x), \ 138 | unsigned long long: __builtin_popcountll, \ 139 | unsigned long: __builtin_popcountl, \ 140 | default: __builtin_popcount \ 141 | )(x) 142 | 143 | // division of integer, rounding up 144 | #define W_IDIV_CEIL(x, y) (1 + (((x) - 1) / (y))) 145 | 146 | #define die(format, ...) do { \ 147 | logerr(format, ##__VA_ARGS__); \ 148 | exit(1); \ 149 | } while(0) 150 | 151 | // T align2(T x, T y) rounds up n to closest boundary w (w must be a power of two) 152 | // 153 | // E.g. 154 | // align(0, 4) => 0 155 | // align(1, 4) => 4 156 | // align(2, 4) => 4 157 | // align(3, 4) => 4 158 | // align(4, 4) => 4 159 | // align(5, 4) => 8 160 | // ... 161 | // 162 | #define align2(n,w) ({ \ 163 | assert(((w) & ((w) - 1)) == 0); /* alignment w is not a power of two */ \ 164 | ((n) + ((w) - 1)) & ~((w) - 1); \ 165 | }) 166 | 167 | 168 | // // Attribute for opting out of address sanitation. 169 | // // Needed for realloc() with a null pointer. 170 | // // e.g. 171 | // // W_NO_SANITIZE_ADDRESS 172 | // // void ThisFunctionWillNotBeInstrumented() { return realloc(NULL, 1); } 173 | // #if defined(__clang__) || defined (__GNUC__) 174 | // #define W_NO_SANITIZE_ADDRESS __attribute__((no_sanitize("address"))) 175 | // #else 176 | // #define W_NO_SANITIZE_ADDRESS 177 | // #endif 178 | -------------------------------------------------------------------------------- /src/common/hash.c: -------------------------------------------------------------------------------- 1 | #include "hash.h" 2 | 3 | u32 hashFNV1a(const u8* buf, size_t len) { 4 | const u32 prime = 0x01000193; // pow(2,24) + pow(2,8) + 0x93 5 | u32 hash = 0x811C9DC5; // seed 6 | const u8* end = buf + len; 7 | while (buf < end) { 8 | hash = (*buf++ ^ hash) * prime; 9 | } 10 | return hash; 11 | } 12 | 13 | u64 hashFNV1a64(const u8* buf, size_t len) { 14 | const u64 prime = 0x100000001B3; // pow(2,40) + pow(2,8) + 0xb3 15 | u64 hash = 0xCBF29CE484222325; // seed 16 | const u8* end = buf + len; 17 | while (buf < end) { 18 | hash = (*buf++ ^ hash) * prime; 19 | } 20 | return hash; 21 | } 22 | -------------------------------------------------------------------------------- /src/common/hash.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "defs.h" 3 | 4 | u32 hashFNV1a(const u8* buf, size_t len); 5 | u64 hashFNV1a64(const u8* buf, size_t len); 6 | -------------------------------------------------------------------------------- /src/common/hashmap.c.h: -------------------------------------------------------------------------------- 1 | // example: 2 | // #define HASHMAP_NAME FooMap 3 | // #define HASHMAP_KEY Foo 4 | // #define HASHMAP_KEY_HASH FooHash // should return an unsigned integer 5 | // #define HASHMAP_VALUE char* 6 | #ifndef HASHMAP_NAME 7 | #error "please define HASHMAP_NAME" 8 | #endif 9 | #ifndef HASHMAP_KEY 10 | #error "please define HASHMAP_KEY" 11 | #endif 12 | #ifndef HASHMAP_KEY_HASH 13 | #error "please define HASHMAP_KEY_HASH" 14 | #endif 15 | #ifndef HASHMAP_VALUE 16 | #error "please define HASHMAP_VALUE" 17 | #endif 18 | 19 | #define _HM_MAKE_FN_NAME(a, b) a ## b 20 | #define _HM_FUN(prefix, name) _HM_MAKE_FN_NAME(prefix, name) 21 | #define HM_FUN(name) _HM_FUN(HASHMAP_NAME, name) 22 | 23 | typedef enum HMFlag { 24 | HMFlagNone = 0, 25 | HMFlagBucketMemoryDense = 1 << 0, // bucket memory is inside map memory. used by Free 26 | } HMFlag; 27 | 28 | static const u32 bucketSize = 6; // entries per bucket 29 | 30 | typedef struct { 31 | struct { 32 | HASHMAP_KEY key; 33 | HASHMAP_VALUE value; 34 | } entries[bucketSize]; 35 | } Bucket; 36 | 37 | 38 | void HM_FUN(Init)(HASHMAP_NAME* m, u32 initbuckets, Memory mem) { 39 | m->cap = initbuckets; 40 | m->len = 0; 41 | m->mem = mem; 42 | m->buckets = memalloc(mem, m->cap * sizeof(Bucket)); 43 | } 44 | 45 | HASHMAP_NAME* HM_FUN(New)(u32 initbuckets, Memory mem) { 46 | // new differs from Init in that it allocates space for itself and the initial 47 | // buckets in one go. This is usually a little bit faster and reduces memory 48 | // fragmentation in cases where many hashmaps are created. 49 | size_t bucketSize = initbuckets * sizeof(Bucket); 50 | char* ptr = memalloc(mem, sizeof(HASHMAP_NAME) + bucketSize); 51 | auto m = (HASHMAP_NAME*)ptr; 52 | m->cap = initbuckets; 53 | m->mem = mem; 54 | m->flags = HMFlagBucketMemoryDense; 55 | if (bucketSize > 0) { 56 | m->buckets = ptr + sizeof(HASHMAP_NAME); 57 | } 58 | return m; 59 | } 60 | 61 | void HM_FUN(Dealloc)(HASHMAP_NAME* m) { 62 | // should never call Dealloc on a map created with New 63 | assert(!(m->flags & HMFlagBucketMemoryDense)); 64 | 65 | memfree(m->mem, m->buckets); 66 | #if DEBUG 67 | m->buckets = NULL; 68 | m->len = 0; 69 | m->cap = 0; 70 | #endif 71 | } 72 | 73 | void HM_FUN(Free)(HASHMAP_NAME* m) { 74 | if (!(m->flags & HMFlagBucketMemoryDense)) { 75 | memfree(m->mem, m->buckets); 76 | } 77 | memfree(m->mem, m); 78 | #if DEBUG 79 | m->buckets = NULL; 80 | m->len = 0; 81 | m->cap = 0; 82 | #endif 83 | } 84 | 85 | 86 | static void mapGrow(HASHMAP_NAME* m) { 87 | u32 cap = m->cap * 2; 88 | rehash: { 89 | auto newbuckets = (Bucket*)memalloc(m->mem, cap * sizeof(Bucket)); 90 | for (u32 bi = 0; bi < m->cap; bi++) { 91 | auto b = &((Bucket*)m->buckets)[bi]; 92 | for (u32 i = 0; i < bucketSize; i++) { 93 | auto e = &b->entries[i]; 94 | if (e->key == NULL) { 95 | break; 96 | } 97 | if (e->value == NULL) { 98 | // skip deleted entry (compactation) 99 | continue; 100 | } 101 | u32 index = ((u32)HASHMAP_KEY_HASH(e->key)) % cap; 102 | auto newb = &newbuckets[index]; 103 | bool fit = false; 104 | for (u32 i2 = 0; i2 < bucketSize; i2++) { 105 | auto e2 = &newb->entries[i2]; 106 | if (e2->key == NULL) { 107 | // found a free slot in newb 108 | *e2 = *e; 109 | fit = true; 110 | break; 111 | } 112 | } 113 | if (!fit) { 114 | // no free slot found in newb; need to grow further. 115 | memfree(m->mem, newbuckets); 116 | cap = cap * 2; 117 | goto rehash; 118 | } 119 | } 120 | } 121 | if (!(m->flags & HMFlagBucketMemoryDense)) { 122 | memfree(m->mem, m->buckets); 123 | } 124 | m->buckets = newbuckets; 125 | m->cap = cap; 126 | m->flags &= ~HMFlagBucketMemoryDense; 127 | } 128 | } 129 | 130 | 131 | // HM_FUN(Set) inserts key=value into m. 132 | // Returns replaced value or NULL if key did not exist in map. 133 | HASHMAP_VALUE HM_FUN(Set)(HASHMAP_NAME* m, HASHMAP_KEY key, HASHMAP_VALUE value) { 134 | assert(value != NULL); 135 | while (1) { // grow loop 136 | u32 index = ((u32)HASHMAP_KEY_HASH(key)) % m->cap; 137 | auto b = &((Bucket*)m->buckets)[index]; 138 | // dlog("bucket(key=\"%s\") #%u b=%p e=%p", key, index, b, &b->entries[0]); 139 | for (u32 i = 0; i < bucketSize; i++) { 140 | auto e = &b->entries[i]; 141 | if (e->value == NULL) { 142 | // free slot 143 | e->key = key; 144 | e->value = value; 145 | m->len++; 146 | return NULL; 147 | } 148 | if (e->key == key) { 149 | // key already in map -- replace value 150 | auto oldval = e->value; 151 | e->value = value; 152 | return oldval; 153 | } 154 | // dlog("collision key=\"%s\" <> e->key=\"%s\"", key, e->key); 155 | } 156 | // overloaded -- grow buckets 157 | // dlog("grow & rehash"); 158 | mapGrow(m); 159 | } 160 | } 161 | 162 | 163 | HASHMAP_VALUE HM_FUN(Del)(HASHMAP_NAME* m, HASHMAP_KEY key) { 164 | u32 index = ((u32)HASHMAP_KEY_HASH(key)) % m->cap; 165 | auto b = &((Bucket*)m->buckets)[index]; 166 | for (u32 i = 0; i < bucketSize; i++) { 167 | auto e = &b->entries[i]; 168 | if (e->key == key) { 169 | if (!e->value) { 170 | break; 171 | } 172 | // mark as deleted 173 | auto value = e->value; 174 | e->value = NULL; 175 | m->len--; 176 | return value; 177 | } 178 | } 179 | return NULL; 180 | } 181 | 182 | 183 | HASHMAP_VALUE HM_FUN(Get)(const HASHMAP_NAME* m, HASHMAP_KEY key) { 184 | u32 index = ((u32)HASHMAP_KEY_HASH(key)) % m->cap; 185 | auto b = &((Bucket*)m->buckets)[index]; 186 | for (u32 i = 0; i < bucketSize; i++) { 187 | auto e = &b->entries[i]; 188 | if (e->key == key) { 189 | return e->value; 190 | } 191 | if (e->key == NULL) { 192 | break; 193 | } 194 | } 195 | return NULL; 196 | } 197 | 198 | 199 | void HM_FUN(Clear)(HASHMAP_NAME* m) { 200 | memset(m->buckets, 0, sizeof(Bucket) * m->cap); 201 | m->len = 0; 202 | } 203 | 204 | 205 | void HM_FUN(Iter)(const HASHMAP_NAME* m, HM_FUN(Iterator)* it, void* userdata) { 206 | bool stop = false; 207 | for (u32 bi = 0; bi < m->cap; bi++) { 208 | auto b = &((Bucket*)m->buckets)[bi]; 209 | for (u32 i = 0; i < bucketSize; i++) { 210 | auto e = &b->entries[i]; 211 | if (e->key == NULL) { 212 | break; 213 | } 214 | if (e->value != NULL) { 215 | it(e->key, e->value, &stop, userdata); 216 | if (stop) { 217 | return; 218 | } 219 | } 220 | } 221 | } 222 | } 223 | 224 | // static u32* hashmapDebugDistr(const HASHMAP_NAME* m) { 225 | // u32 valindex = 0; 226 | // u32* vals = (u32*)memalloc(m->mem, m->cap * sizeof(u32)); 227 | // for (u32 bi = 0; bi < m->cap; bi++) { 228 | // auto b = &((Bucket*)m->buckets)[bi]; 229 | // u32 depth = 0; 230 | // for (u32 i = 0; i < bucketSize; i++) { 231 | // auto e = &b->entries[i]; 232 | // if (e->key == NULL) { 233 | // break; 234 | // } 235 | // if (e->value != NULL) { 236 | // depth++; 237 | // } 238 | // } 239 | // vals[valindex++] = depth; 240 | // } 241 | // return vals; 242 | // } 243 | 244 | #undef _HM_MAKE_FN_NAME 245 | #undef _HM_FUN 246 | #undef HM_FUN 247 | -------------------------------------------------------------------------------- /src/common/hashmap.h: -------------------------------------------------------------------------------- 1 | // Note: intentionally not "#pragma once" 2 | #include "memory.h" 3 | // example: 4 | // #define HASHMAP_NAME FooMap 5 | // #define HASHMAP_KEY Foo 6 | // #define HASHMAP_VALUE char* 7 | #ifndef HASHMAP_NAME 8 | #error "please define HASHMAP_NAME" 9 | #endif 10 | #ifndef HASHMAP_KEY 11 | #error "please define HASHMAP_KEY" 12 | #endif 13 | #ifndef HASHMAP_VALUE 14 | #error "please define HASHMAP_VALUE" 15 | #endif 16 | 17 | #define _HM_MAKE_FN_NAME(a, b) a ## b 18 | #define _HM_FUN(prefix, name) _HM_MAKE_FN_NAME(prefix, name) 19 | #define HM_FUN(name) _HM_FUN(HASHMAP_NAME, name) 20 | #define HASHMAP_IS_INIT(m) ((m)->buckets != NULL) 21 | 22 | typedef struct { 23 | u32 cap; // number of buckets 24 | u32 len; // number of key-value entries 25 | u32 flags; // 26 | Memory mem; // memory allocator. NULL = use global allocator 27 | void* buckets; // internal 28 | } HASHMAP_NAME; 29 | 30 | #ifdef HASHMAP_INCLUDE_DECLARATIONS 31 | // Include declarations. 32 | // Normally these are copy-pasted and hand-converted in the user-level header. 33 | 34 | // New creates a new map with initbuckets intial buckets. 35 | HASHMAP_NAME* HM_FUN(New)(u32 initbuckets, Memory) 36 | 37 | // Free frees all memory of a map, including the map's memory. 38 | // Use Free when you created a map with New. 39 | // Use Dealloc when you manage the memory of the map yourself and used Init. 40 | void HM_FUN(Free)(HASHMAP_NAME*); 41 | 42 | // Init initializes a map structure. initbuckets is the number of initial buckets. 43 | void HM_FUN(Init)(HASHMAP_NAME*, u32 initbuckets, Memory); 44 | 45 | // Dealloc frees buckets data (but not the hashmap itself.) 46 | // The hashmap is invalid after this call. Call Init to reuse. 47 | void HM_FUN(Dealloc)(HASHMAP_NAME*); 48 | 49 | // Get searches for key. Returns value, or NULL if not found. 50 | HASHMAP_VALUE HM_FUN(Get)(const HASHMAP_NAME*, HASHMAP_KEY key); 51 | 52 | // Set inserts key=value into m. Returns the replaced value or NULL if not found. 53 | HASHMAP_VALUE HM_FUN(Set)(HASHMAP_NAME*, HASHMAP_KEY key, HASHMAP_VALUE value); 54 | 55 | // Del removes value for key. Returns the removed value or NULL if not found. 56 | HASHMAP_VALUE HM_FUN(Del)(HASHMAP_NAME*, HASHMAP_KEY key); 57 | 58 | // Clear removes all entries. In contrast to Free, map remains valid. 59 | void HM_FUN(Clear)(HASHMAP_NAME*); 60 | 61 | // Iterator function type. Set stop=true to stop iteration. 62 | typedef void(HM_FUN(Iterator))(HASHMAP_KEY key, HASHMAP_VALUE value, bool* stop, void* userdata); 63 | 64 | // Iter iterates over entries of the map. 65 | void HM_FUN(Iter)(const HASHMAP_NAME*, HM_FUN(Iterator)*, void* userdata); 66 | 67 | #endif 68 | 69 | #undef _HM_MAKE_FN_NAME 70 | #undef _HM_FUN 71 | #undef HM_FUN 72 | -------------------------------------------------------------------------------- /src/common/memory.c: -------------------------------------------------------------------------------- 1 | #include "memory.h" 2 | #include "array.h" 3 | #include "os.h" 4 | #include "test.h" 5 | 6 | 7 | static size_t memPageSize = 0; 8 | 9 | static void __attribute__((constructor)) init() { 10 | memPageSize = os_mempagesize(); 11 | } 12 | 13 | Memory MemoryNew(size_t initHint) { 14 | if (initHint == 0) { 15 | initHint = memPageSize; 16 | } 17 | return create_mspace(/*capacity*/initHint, /*locked*/0); 18 | } 19 | 20 | void MemoryRecycle(Memory* memptr) { 21 | // TODO: see if there is a way to make dlmalloc reuse msp 22 | destroy_mspace(*memptr); 23 | *memptr = create_mspace(/*capacity*/memPageSize, /*locked*/0); 24 | } 25 | 26 | void MemoryFree(Memory mem) { 27 | destroy_mspace(mem); 28 | } 29 | 30 | 31 | char* memallocCStr(Memory mem, const char* pch, size_t len) { 32 | auto s = (char*)memalloc(mem, len + 1); 33 | memcpy(s, pch, len); 34 | s[len] = 0; 35 | return s; 36 | } 37 | 38 | char* memallocCStrConcat(Memory mem, const char* s1, ...) { 39 | va_list ap; 40 | 41 | size_t len1 = strlen(s1); 42 | size_t len = len1; 43 | u32 count = 0; 44 | va_start(ap, s1); 45 | while (1) { 46 | const char* s = va_arg(ap,const char*); 47 | if (s == NULL || count == 20) { // TODO: warn about limit somehow? 48 | break; 49 | } 50 | len += strlen(s); 51 | } 52 | va_end(ap); 53 | 54 | char* newstr = (char*)memalloc(mem, len + 1); 55 | char* dstptr = newstr; 56 | memcpy(dstptr, s1, len1); 57 | dstptr += len1; 58 | 59 | va_start(ap, s1); 60 | for (u32 i = 0; i < count; i++) { 61 | const char* s = va_arg(ap,const char*); 62 | auto len = strlen(s); 63 | memcpy(dstptr, s, len); 64 | dstptr += len; 65 | } 66 | va_end(ap); 67 | 68 | *dstptr = 0; 69 | 70 | return newstr; 71 | } 72 | 73 | 74 | // memsprintf is like sprintf but uses memory from mem. 75 | char* memsprintf(Memory mem, const char* format, ...) { 76 | va_list ap; 77 | va_start(ap, format); 78 | size_t bufsize = (strlen(format) * 2) + 1; 79 | char* buf = memalloc(mem, bufsize); 80 | size_t idealsize = (size_t)vsnprintf(buf, bufsize, format, ap); 81 | if (idealsize >= bufsize) { 82 | // buf is too small 83 | buf = mspace_realloc(mem, buf, idealsize + 1); 84 | idealsize = (size_t)vsnprintf(buf, bufsize, format, ap); 85 | assert(idealsize < bufsize); // according to libc docs, this should be true 86 | } 87 | va_end(ap); 88 | return buf; 89 | } 90 | 91 | 92 | typedef struct GC { 93 | Array gen1, gen2; 94 | } GC; 95 | 96 | 97 | /*__thread*/ Memory _gmem = NULL; 98 | /*__thread*/ GC tlsGC = { Array_INIT, Array_INIT }; 99 | 100 | 101 | Memory _GlobalMemory() { 102 | return (_gmem == NULL) ? (_gmem = create_mspace(0, 0)) : _gmem; 103 | } 104 | 105 | 106 | void* memgcalloc(size_t size) { 107 | void* ptr = mspace_calloc(_GlobalMemory(), 1, size); 108 | _memgc(ptr); 109 | return ptr; 110 | } 111 | 112 | 113 | void memgc_collect() { 114 | auto gc = &tlsGC; 115 | // dlog("memgc_collect gen1 %u, gen2 %u", gc->gen1.len, gc->gen2.len); 116 | 117 | // free anything in gen2 118 | if (gc->gen2.len > 0) { 119 | assert(_gmem != NULL); 120 | 121 | // Node: dlmalloc mentions that for large bulk_free sets, sorting the pointers first may 122 | // increases locality and may increase performance. If we ever decide to performance tune 123 | // this code, it may be worth considering. 124 | 125 | #if DEBUG 126 | size_t unfreed = 127 | #endif 128 | mspace_bulk_free(_gmem, gc->gen2.v, gc->gen2.len); 129 | 130 | #if DEBUG 131 | // unfreed is always zero in release builds as dlmalloc footers are only enabled in DEBUG. 132 | if (unfreed > 0) { 133 | dlog("[gc] warning: collector found %zu elements from a non-global allocator", unfreed); 134 | } 135 | #endif 136 | 137 | gc->gen2.len = 0; 138 | } 139 | 140 | // swap gen1 with gen2 141 | auto tmp = gc->gen2; 142 | gc->gen2 = gc->gen1; 143 | gc->gen1 = tmp; 144 | } 145 | 146 | 147 | // memgcmark marks ptr for garbage collection 148 | void _memgc(void* ptr) { 149 | assert(_gmem != NULL); // ptr is allocated in the global allocator, so this should not be null 150 | auto gc = &tlsGC; 151 | ArrayPush(&gc->gen1, ptr, _gmem); 152 | } 153 | 154 | 155 | #if DEBUG 156 | static void test() { 157 | // printf("-------------------------Memory-------------------------\n"); 158 | 159 | u32 allocCount1 = 5; 160 | 161 | for (u32 i = 0; i < allocCount1; i++) { 162 | void* ptr = memalloc(NULL, 16); 163 | memgc(ptr); 164 | } 165 | 166 | auto gc = &tlsGC; 167 | 168 | assert(gc->gen1.len == allocCount1); 169 | 170 | memgc_collect(); 171 | assert(gc->gen1.len == 0); // gen1 should always be 0 after call to memgc_collect 172 | 173 | assert(gc->gen2.len == allocCount1); 174 | 175 | u32 allocCount2 = 8; 176 | 177 | for (int i = 0; i < allocCount2; i++) { 178 | memgcalloc(16); 179 | } 180 | 181 | assert(gc->gen1.len == allocCount2); 182 | assert(gc->gen2.len == allocCount1); 183 | 184 | memgc_collect(); 185 | assert(gc->gen1.len == 0); 186 | 187 | assert(gc->gen2.len == allocCount2); 188 | 189 | memgc_collect(); 190 | 191 | assert(gc->gen1.len == 0); 192 | assert(gc->gen2.len == 0); 193 | 194 | // test bulk_free to ensure that we get an error message logged in case foreign pointers 195 | // are added to the GC. 196 | // Caution: This depend on FOOTER=1 being defined for dlmalloc, which is only the case 197 | // for DEBUG builds. 198 | { 199 | Memory mem = MemoryNew(0); 200 | memgc(memalloc(mem, 16)); // add pointer from unrelated mspace to gc 201 | memgc(memalloc(_gmem, 16)); // add pointer from the correct mspace to gc 202 | assert(gc->gen1.len == 2); 203 | size_t unfreed = mspace_bulk_free(_gmem, gc->gen1.v, gc->gen1.len); 204 | assert(unfreed == 1); 205 | MemoryFree(mem); 206 | } 207 | 208 | // printf("------------------------/Memory-------------------------\n"); 209 | // exit(0); 210 | } 211 | W_UNIT_TEST(Memory, { test(); }) // W_UNIT_TEST 212 | #endif 213 | -------------------------------------------------------------------------------- /src/common/memory.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "defs.h" 3 | #include "dlmalloc.h" 4 | #include "sds.h" 5 | 6 | // Memory is an isolated-space memory allocator, useful for allocating many small 7 | // short-lived fragments of memory, like for example AST nodes. 8 | // 9 | // Passing NULL to mangagement functions like memalloc uses a shared global allocator 10 | // and works the same way as libc malloc, free et al. 11 | // 12 | typedef mspace Memory; 13 | 14 | // memalloc allocates memory. Returned memory is zeroed. 15 | static void* memalloc(Memory nullable mem, size_t size) nonull_return; 16 | 17 | // memalloct is a convenience for: (MyStructType*)memalloc(m, sizeof(MyStructType)) 18 | #define memalloct(mem, TYPE) ((TYPE*)memalloc(mem, sizeof(TYPE))) 19 | 20 | // memalloc reallocates some memory. Additional memory is NOT zeroed. 21 | static void* memrealloc(Memory nullable mem, void* nullable ptr, size_t newsize) nonull_return; 22 | 23 | // memfree frees memory. 24 | static void memfree(Memory nullable mem, void* nonull ptr); 25 | 26 | // memallocCStr is like strdup 27 | char* memallocCStr(Memory nullable mem, const char* nonull pch, size_t len); 28 | 29 | // memallocCStrConcat concatenates up to 20 c-strings together. 30 | // Arguments must be terminated with NULL. 31 | char* memallocCStrConcat(Memory nullable mem, const char* nonull s1, ...); 32 | 33 | // memsprintf is like sprintf but uses memory from mem. 34 | char* memsprintf(Memory mem, const char* format, ...); 35 | 36 | // ----------------------------------------------------------------------------------------------- 37 | // Rudimentary garbage collector for short-lived data. 38 | 39 | // memgcalloc allocates memory that will be free'd automatically. 40 | // This is equivalent to: memgc(memalloc(NULL, size)) 41 | void* memgcalloc(size_t size) nonull_return; 42 | 43 | // memgcalloct is a convenience for: (MyStructType*)memgcalloc(sizeof(MyStructType)) 44 | #define memgcalloct(mem, TYPE) ((TYPE)*memgcalloc(mem, sizeof(TYPE))) 45 | 46 | // memgc marks ptr for garbage collection. 47 | // Memory which has been marked for garbage collection must not be freed manually. 48 | // Note: If we ever have the need, add a memgc_remove function for explicitly removing a pointer. 49 | // T memgc(T ptr) 50 | #define memgc(ptr) ({ _memgc(ptr); (ptr); }) 51 | 52 | // memgcsds marks an sds string for garbage collection. (Does not work with Sym.) 53 | static sds memgcsds(sds nonull s) nonull_return; 54 | 55 | // memgc_collect performs very basic garbage collection. 56 | // Each Memory space maintains two lists for gc: gen1 and gen2. memgc(ptr) adds to gen1. 57 | // When memgc_collect is called: 58 | // 1. every pointer in gen2 is free'd; gen2 list is emptied. 59 | // 2. every pointer in gen1 is moved to gen2. 60 | // Thus, this is NOT a generic "smart" garbage collector. 61 | // Caution: Calling memgc_collect twice in a row causes all gc objects to be free'd immediately. 62 | // Always uses the global allocator. 63 | void memgc_collect(); 64 | 65 | 66 | // ----------------------------------------------------------------------------------------------- 67 | // Memory spaces 68 | 69 | // Create a new memory space 70 | Memory MemoryNew(size_t initHint/*=0*/); 71 | void MemoryRecycle(Memory* memptr); // recycle for reuse 72 | void MemoryFree(Memory mem); // free all memory allocated by mem 73 | 74 | // ----------------------------------------------------------------------------------------------- 75 | // inline and internal implementations 76 | 77 | void _memgc(void* nonull ptr); 78 | Memory _GlobalMemory() nonull_return; 79 | 80 | inline static void* memalloc(Memory mem, size_t size) { 81 | return mspace_calloc(mem == NULL ? _GlobalMemory() : mem, 1, size); 82 | } 83 | 84 | inline static void* memrealloc(Memory mem, void* ptr, size_t newsize) { 85 | return mspace_realloc(mem == NULL ? _GlobalMemory() : mem, ptr, newsize); 86 | } 87 | 88 | inline static void memfree(Memory mem, void* ptr) { 89 | mspace_free(mem == NULL ? _GlobalMemory() : mem, ptr); 90 | } 91 | 92 | inline static sds memgcsds(sds s) { 93 | _memgc( ((char*)s) - sdsHdrSize(s[-1]) ); 94 | return s; 95 | } 96 | -------------------------------------------------------------------------------- /src/common/os.c: -------------------------------------------------------------------------------- 1 | #include // sysconf 2 | #include 3 | 4 | #include "defs.h" 5 | #include "os.h" 6 | 7 | static size_t _mempagesize = 0; 8 | 9 | size_t os_mempagesize() { 10 | if (_mempagesize == 0) { 11 | auto z = sysconf(_SC_PAGESIZE); 12 | if (z <= 0) { 13 | _mempagesize = 1024 * 4; // usually 4kB 14 | } else { 15 | _mempagesize = (size_t)z; 16 | } 17 | } 18 | return _mempagesize; 19 | } 20 | 21 | 22 | u8* os_readfile(const char* filename, size_t* size_inout, Memory mem) { 23 | assert(size_inout != NULL); 24 | 25 | int fd = open(filename, O_RDONLY); 26 | if (fd < 0) { 27 | return NULL; 28 | } 29 | 30 | struct stat st; 31 | if (fstat(fd, &st) != 0) { 32 | close(fd); 33 | return NULL; 34 | } 35 | 36 | size_t bufsize = (size_t)st.st_size; 37 | size_t limit = *size_inout; 38 | if (limit > 0 && limit < bufsize) { 39 | bufsize = limit; 40 | } 41 | 42 | u8* buf = (u8*)memalloc(mem, bufsize); 43 | 44 | auto nread = read(fd, buf, bufsize); 45 | close(fd); 46 | if (nread < 0) { 47 | memfree(mem, buf); 48 | *size_inout = 0; 49 | return NULL; 50 | } 51 | 52 | assert(nread == bufsize); 53 | 54 | *size_inout = bufsize; 55 | return buf; 56 | } 57 | 58 | 59 | bool os_writefile(const char* filename, const void* ptr, size_t size) { 60 | FILE* fp = fopen(filename, "w"); 61 | if (fp == NULL) { 62 | return false; 63 | } 64 | auto z = fwrite(ptr, size, 1, fp); 65 | fclose(fp); 66 | return size == 0 ? z == 0 : z == 1; 67 | } 68 | 69 | -------------------------------------------------------------------------------- /src/common/os.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "memory.h" 3 | 4 | // os 5 | size_t os_mempagesize(); // always returns a suitable number 6 | 7 | // Read entire file into a heap-allocated buffer. 8 | // If *size_inout is >0 then it is used as a limit of how much to read from the file. 9 | // If size_inout is not null, it is set to the size of the returned byte array. 10 | u8* os_readfile(const char* nonull filename, size_t* nonull size_inout, Memory nullable mem); 11 | 12 | // Write data at ptr of bytes size to file at filename. 13 | bool os_writefile(const char* nonull filename, const void* nonull ptr, size_t size); 14 | -------------------------------------------------------------------------------- /src/common/ptrmap.c: -------------------------------------------------------------------------------- 1 | #include "ptrmap.h" 2 | #include "hash.h" 3 | #include "test.h" 4 | 5 | #include /* log2 */ 6 | #include /* *_MAX */ 7 | 8 | #if ((ULONG_MAX) > (UINT_MAX)) 9 | // 64-bit address 10 | #define ptrhash(ptr) ((size_t)hashFNV1a64((const u8*)&(ptr), 8)) 11 | #else 12 | // 32-bit address 13 | #define ptrhash(ptr) ((size_t)hashFNV1a((const u8*)&(ptr), 4)) 14 | #endif 15 | 16 | // This is a good and very fast hash function for small sets of sequential pointers, 17 | // but as the address space grows the distribution worsens quickly compared to FNV1a. 18 | // static size_t ptrhash2(void* p) { 19 | // // Note: the log2 call is eliminated and replaced by a constant when compiling 20 | // // with optimizations. 21 | // const size_t shift = (size_t)log2(1 + sizeof(void*)); 22 | // return (size_t)(p) >> shift; 23 | // } 24 | 25 | // hashmap implementation 26 | #define HASHMAP_NAME PtrMap 27 | #define HASHMAP_KEY const void* 28 | #define HASHMAP_VALUE void* 29 | #define HASHMAP_KEY_HASH ptrhash 30 | #include "hashmap.c.h" 31 | #undef HASHMAP_NAME 32 | #undef HASHMAP_KEY 33 | #undef HASHMAP_VALUE 34 | #undef HASHMAP_KEY_HASH 35 | 36 | 37 | #if DEBUG 38 | static void testMapIterator(const void* key, void* value, bool* stop, void* userdata) { 39 | // dlog("\"%s\" => %zu", key, (size_t)value); 40 | size_t* n = (size_t*)userdata; 41 | (*n)++; 42 | } 43 | #endif 44 | 45 | 46 | W_UNIT_TEST(PtrMap, { 47 | auto mem = MemoryNew(0); 48 | auto m = PtrMapNew(8, mem); 49 | 50 | assert(m->len == 0); 51 | 52 | #define SYM(cstr) symgeth((const u8*)(cstr), strlen(cstr)) 53 | void* oldval; 54 | 55 | oldval = PtrMapSet(m, "hello", (void*)1); 56 | // dlog("PtrMapSet(hello) => %zu", (size_t)oldval); 57 | assert(m->len == 1); 58 | 59 | oldval = PtrMapSet(m, "hello", (void*)2); 60 | // dlog("PtrMapSet(hello) => %zu", (size_t)oldval); 61 | assert(m->len == 1); 62 | 63 | assert(PtrMapDel(m, "hello") == (void*)2); 64 | assert(m->len == 0); 65 | 66 | size_t n = 100; 67 | PtrMapSet(m, "break", (void*)n++); assert(m->len == n - 100); 68 | PtrMapSet(m, "case", (void*)n++); assert(m->len == n - 100); 69 | PtrMapSet(m, "const", (void*)n++); assert(m->len == n - 100); 70 | PtrMapSet(m, "continue", (void*)n++); assert(m->len == n - 100); 71 | PtrMapSet(m, "default", (void*)n++); assert(m->len == n - 100); 72 | PtrMapSet(m, "defer", (void*)n++); assert(m->len == n - 100); 73 | PtrMapSet(m, "else", (void*)n++); assert(m->len == n - 100); 74 | PtrMapSet(m, "enum", (void*)n++); assert(m->len == n - 100); 75 | PtrMapSet(m, "fallthrough", (void*)n++); assert(m->len == n - 100); 76 | PtrMapSet(m, "for", (void*)n++); assert(m->len == n - 100); 77 | PtrMapSet(m, "fun", (void*)n++); assert(m->len == n - 100); 78 | PtrMapSet(m, "go", (void*)n++); assert(m->len == n - 100); 79 | PtrMapSet(m, "if", (void*)n++); assert(m->len == n - 100); 80 | PtrMapSet(m, "import", (void*)n++); assert(m->len == n - 100); 81 | PtrMapSet(m, "in", (void*)n++); assert(m->len == n - 100); 82 | PtrMapSet(m, "interface", (void*)n++); assert(m->len == n - 100); 83 | PtrMapSet(m, "is", (void*)n++); assert(m->len == n - 100); 84 | PtrMapSet(m, "return", (void*)n++); assert(m->len == n - 100); 85 | PtrMapSet(m, "select", (void*)n++); assert(m->len == n - 100); 86 | PtrMapSet(m, "struct", (void*)n++); assert(m->len == n - 100); 87 | PtrMapSet(m, "switch", (void*)n++); assert(m->len == n - 100); 88 | PtrMapSet(m, "symbol", (void*)n++); assert(m->len == n - 100); 89 | PtrMapSet(m, "type", (void*)n++); assert(m->len == n - 100); 90 | PtrMapSet(m, "var", (void*)n++); assert(m->len == n - 100); 91 | PtrMapSet(m, "while", (void*)n++); assert(m->len == n - 100); 92 | PtrMapSet(m, "_", (void*)n++); assert(m->len == n - 100); 93 | PtrMapSet(m, "int", (void*)n++); assert(m->len == n - 100); 94 | 95 | // // print distribution of load on each bucket 96 | // printf("bucket,load\n"); 97 | // u32* vals = hashmapDebugDistr(m); 98 | // for (u32 i = 0; i < m.cap; i++) { 99 | // printf("%u,%u\n", i+1, vals[i]); 100 | // } 101 | // free(vals); 102 | 103 | // counts 104 | n = 0; 105 | PtrMapIter(m, testMapIterator, &n); 106 | assert(n == 27); 107 | 108 | // del 109 | assert(PtrMapSet(m, "hello", (void*)2) == NULL); 110 | assert(PtrMapGet(m, "hello") == (void*)2); 111 | assert(PtrMapDel(m, "hello") == (void*)2); 112 | assert(PtrMapGet(m, "hello") == NULL); 113 | assert(PtrMapSet(m, "hello", (void*)2) == NULL); 114 | assert(PtrMapGet(m, "hello") == (void*)2); 115 | 116 | PtrMapFree(m); 117 | MemoryFree(mem); 118 | }) // W_UNIT_TEST 119 | -------------------------------------------------------------------------------- /src/common/ptrmap.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "defs.h" 3 | 4 | // PtrMap maps void* to void*. sizeof(PtrMap) == 3*sizeof(void*) 5 | #define HASHMAP_NAME PtrMap 6 | #define HASHMAP_KEY const void* 7 | #define HASHMAP_VALUE void* 8 | #include "hashmap.h" 9 | #undef HASHMAP_NAME 10 | #undef HASHMAP_KEY 11 | #undef HASHMAP_VALUE 12 | 13 | // PtrMapInit initializes a map structure. initbuckets is the number of initial buckets. 14 | void PtrMapInit(PtrMap*, u32 initbuckets, Memory mem/*nullable*/); 15 | 16 | // bool PtrMapIsInit(PtrMap*) 17 | #define PtrMapIsInit HASHMAP_IS_INIT 18 | 19 | // PtrMapDealloc frees heap memory used by a map, but leaves PtrMap untouched. 20 | void PtrMapDealloc(PtrMap*); 21 | 22 | // Creates and initializes a new PtrMap in mem, or global memory if mem is NULL. 23 | PtrMap* PtrMapNew(u32 initbuckets, Memory mem/*null*/); 24 | 25 | // PtrMapFree frees PtrMap along with its data. 26 | void PtrMapFree(PtrMap*); 27 | 28 | // PtrMapGet searches for key. Returns value, or NULL if not found. 29 | void* PtrMapGet(const PtrMap*, const void* key); 30 | 31 | // PtrMapSet inserts key=value into m. Returns the replaced value or NULL if not found. 32 | void* PtrMapSet(PtrMap*, const void* key, void* value); 33 | 34 | // PtrMapDel removes value for key. Returns the removed value or NULL if not found. 35 | void* PtrMapDel(PtrMap*, const void* key); 36 | 37 | // PtrMapClear removes all entries. In contrast to PtrMapFree, map remains valid. 38 | void PtrMapClear(PtrMap*); 39 | 40 | // Iterator function type. Set stop=true to stop iteration. 41 | typedef void(PtrMapIterator)(const void* key, void* value, bool* stop, void* userdata); 42 | 43 | // PtrMapIter iterates over entries of the map. 44 | void PtrMapIter(const PtrMap*, PtrMapIterator*, void* userdata); 45 | 46 | -------------------------------------------------------------------------------- /src/common/str.c: -------------------------------------------------------------------------------- 1 | #include "str.h" 2 | #include "memory.h" 3 | #include "test.h" 4 | 5 | // bytesrepr and strrepr returns a printable representation of an sds string (sds, Sym, etc.) 6 | // using sdscatrepr which encodes non-printable ASCII chars for safe printing. 7 | ConstStr bytesrepr(const u8* s, size_t len) { 8 | return memgcsds(sdscatrepr(sdsnewcap(len + 2), (const char*)s, len)); 9 | } 10 | 11 | bool strhasprefix(ConstStr s, const char* prefix) { 12 | size_t plen = strlen(prefix); 13 | return sdslen(s) < plen ? false : memcmp(s, prefix, plen) == 0; 14 | } 15 | 16 | 17 | W_UNIT_TEST(Str, { 18 | assert(strcmp( strrepr(sdsnew("lolcat")), "\"lolcat\"" ) == 0); 19 | assert(strcmp( strrepr(sdsnew("lol\"cat")), "\"lol\\\"cat\"" ) == 0); 20 | assert(strcmp( strrepr(sdsnew("lol\ncat")), "\"lol\\ncat\"" ) == 0); 21 | assert(strcmp( strrepr(sdsnew("lol\x01 cat")), "\"lol\\x01 cat\"" ) == 0); 22 | 23 | assert(strhasprefix(sdsnew("lolcat"), "lol") == true); 24 | assert(strhasprefix(sdsnew("lol"), "lol") == true); 25 | assert(strhasprefix(sdsnew("lo"), "lol") == false); 26 | }) 27 | -------------------------------------------------------------------------------- /src/common/str.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "sds.h" 3 | #include "defs.h" 4 | 5 | #define Str sds 6 | #define ConstStr constsds 7 | 8 | inline static Str strgrow(Str nonull s, size_t addlSize) { 9 | return sdsMakeRoomFor(s, align2(addlSize, 128)); 10 | } 11 | 12 | // true if s starts with C-string prefix 13 | bool strhasprefix(ConstStr nonull s, const char* nonull prefix); 14 | 15 | // strrepr returns a printable representation of an sds string (sds, Sym, etc.) 16 | // using sdscatrepr which encodes non-printable ASCII chars for safe printing. 17 | // E.g. "foo\x00bar" if the string contains a zero byte. 18 | // Returns a garbage-collected string. 19 | ConstStr bytesrepr(const u8* s, size_t len); 20 | inline static ConstStr strrepr(ConstStr s) { 21 | return bytesrepr((const u8*)s, sdslen(s)); 22 | } 23 | -------------------------------------------------------------------------------- /src/common/test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | // #include 4 | 5 | #include "defs.h" 6 | #include "test.h" 7 | 8 | static WTestMode _testMode = (WTestMode)-1; 9 | 10 | WTestMode getTestMode() { 11 | if (_testMode == (WTestMode)-1) { 12 | _testMode = WTestModeNone; 13 | char* testmode = getenv("W_TEST_MODE"); 14 | if (testmode != NULL) { 15 | if (strcmp(testmode, "on") == 0) { 16 | _testMode = WTestModeOn; 17 | } else if (strcmp(testmode, "exclusive") == 0) { 18 | _testMode = WTestModeExclusive; 19 | } 20 | } 21 | } 22 | return _testMode; 23 | } 24 | -------------------------------------------------------------------------------- /src/common/test.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "assert.h" 3 | // 4 | // testing 5 | // 6 | // Preprocessor macros: 7 | // W_TEST_BUILD is defined for the "test" target product (but not for "debug".) 8 | // W_UNIT_TEST_ENABLED is defined for "test" and "debug" targets (since DEBUG is.) 9 | // W_UNIT_TEST(name, body) defines a unit test to be run before main() 10 | // 11 | 12 | #if DEBUG 13 | #define W_UNIT_TEST_ENABLED 1 14 | #define W_UNIT_TEST(name, body) \ 15 | __attribute__((constructor)) static void unit_test_##name() { \ 16 | if (getTestMode() != WTestModeNone) { \ 17 | printf("TEST " #name " %s\n", __FILE__); \ 18 | body \ 19 | } \ 20 | } 21 | #else 22 | #define W_UNIT_TEST(name, body) 23 | #define W_UNIT_TEST_ENABLED 0 24 | #endif 25 | 26 | typedef enum WTestMode { 27 | // W_TEST_MODE Description 28 | WTestModeNone = 0, // "" testing disabled 29 | WTestModeOn, // "on" testing enabled 30 | WTestModeExclusive, // "exclusive" only test; don't run main function 31 | } WTestMode; 32 | 33 | // getTestMode retrieves the effective WTestMode parsed from environment W_TEST_MODE 34 | WTestMode getTestMode(); 35 | -------------------------------------------------------------------------------- /src/common/thread.c: -------------------------------------------------------------------------------- 1 | #include "defs.h" 2 | #include "thread.h" 3 | 4 | #if defined(__STDC_NO_THREADS__) && __STDC_NO_THREADS__ 5 | // pthread layer 6 | #include "thread_pthread.c.h" 7 | #endif 8 | 9 | 10 | ThreadStatus ThreadStart(Thread* nonull t, thrd_start_t nonull fn, void* nullable arg) { 11 | return (ThreadStatus)thrd_create(t, fn, arg); 12 | } 13 | 14 | 15 | int ThreadAwait(Thread t) { 16 | int result = 0; 17 | thrd_join(t, &result); // ignore ThreadStatus 18 | return result; 19 | } 20 | 21 | 22 | Thread ThreadSpawn(thrd_start_t nonull fn, void* nullable arg) nonull_return { 23 | Thread t; 24 | if (ThreadStart(&t, fn, arg) != ThreadSuccess) { 25 | return NULL; 26 | } 27 | return t; 28 | } 29 | -------------------------------------------------------------------------------- /src/common/thread.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #if defined(__STDC_NO_THREADS__) && __STDC_NO_THREADS__ 3 | #include "thread_pthread.h" 4 | #else 5 | #include 6 | #endif 7 | 8 | typedef enum ThreadStatus { 9 | ThreadSuccess = thrd_success, 10 | ThreadNomem = thrd_nomem, 11 | ThreadTimedout = thrd_timedout, 12 | ThreadBusy = thrd_busy, 13 | ThreadError = thrd_error, 14 | } ThreadStatus; 15 | 16 | typedef thrd_t Thread; 17 | 18 | ThreadStatus ThreadStart(Thread* nonull t, thrd_start_t nonull fn, void* nullable arg); 19 | Thread nullable ThreadSpawn(thrd_start_t nonull fn, void* nullable arg); // null on error 20 | int ThreadAwait(Thread t); 21 | -------------------------------------------------------------------------------- /src/common/thread_pthread.c.h: -------------------------------------------------------------------------------- 1 | /* 2 | Author: John Tsiombikas 3 | 4 | I place this piece of code in the public domain. Feel free to use as you see 5 | fit. I'd appreciate it if you keep my name at the top of the code somehwere, 6 | but whatever. 7 | 8 | Main project site: https://github.com/jtsiomb/c11threads 9 | */ 10 | 11 | // note: assumes c11threads.h has been included 12 | 13 | #include 14 | #include 15 | #include /* for sched_yield */ 16 | #include 17 | 18 | #ifdef __APPLE__ 19 | /* Darwin doesn't implement timed mutexes currently */ 20 | #define C11THREADS_NO_TIMED_MUTEX 21 | #endif 22 | 23 | #ifdef C11THREADS_NO_TIMED_MUTEX 24 | #define PTHREAD_MUTEX_TIMED_NP PTHREAD_MUTEX_NORMAL 25 | #define C11THREADS_TIMEDLOCK_POLL_INTERVAL 5000000 /* 5 ms */ 26 | #endif 27 | 28 | 29 | static inline int thrd_create(thrd_t *thr, thrd_start_t func, void *arg) 30 | { 31 | int res = pthread_create(thr, 0, (void*(*)(void*))func, arg); 32 | if(res == 0) { 33 | return thrd_success; 34 | } 35 | return res == ENOMEM ? thrd_nomem : thrd_error; 36 | } 37 | 38 | static inline void thrd_exit(int res) 39 | { 40 | pthread_exit((void*)(long)res); 41 | } 42 | 43 | static inline int thrd_join(thrd_t thr, int *res) 44 | { 45 | void *retval; 46 | 47 | if(pthread_join(thr, &retval) != 0) { 48 | return thrd_error; 49 | } 50 | if(res) { 51 | *res = (int)(long)retval; 52 | } 53 | return thrd_success; 54 | } 55 | 56 | static inline int thrd_detach(thrd_t thr) 57 | { 58 | return pthread_detach(thr) == 0 ? thrd_success : thrd_error; 59 | } 60 | 61 | static inline thrd_t thrd_current(void) 62 | { 63 | return pthread_self(); 64 | } 65 | 66 | static inline int thrd_equal(thrd_t a, thrd_t b) 67 | { 68 | return pthread_equal(a, b); 69 | } 70 | 71 | static inline int thrd_sleep(const struct timespec *ts_in, struct timespec *rem_out) 72 | { 73 | if(nanosleep(ts_in, rem_out) < 0) { 74 | if(errno == EINTR) return -1; 75 | return -2; 76 | } 77 | return 0; 78 | } 79 | 80 | static inline void thrd_yield(void) 81 | { 82 | sched_yield(); 83 | } 84 | 85 | /* ---- mutexes ---- */ 86 | 87 | static inline int mtx_init(mtx_t *mtx, int type) 88 | { 89 | int res; 90 | pthread_mutexattr_t attr; 91 | 92 | pthread_mutexattr_init(&attr); 93 | 94 | if(type & mtx_timed) { 95 | pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_TIMED_NP); 96 | } 97 | if(type & mtx_recursive) { 98 | pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); 99 | } 100 | 101 | res = pthread_mutex_init(mtx, &attr) == 0 ? thrd_success : thrd_error; 102 | pthread_mutexattr_destroy(&attr); 103 | return res; 104 | } 105 | 106 | static inline void mtx_destroy(mtx_t *mtx) 107 | { 108 | pthread_mutex_destroy(mtx); 109 | } 110 | 111 | static inline int mtx_lock(mtx_t *mtx) 112 | { 113 | int res = pthread_mutex_lock(mtx); 114 | if(res == EDEADLK) { 115 | return thrd_busy; 116 | } 117 | return res == 0 ? thrd_success : thrd_error; 118 | } 119 | 120 | static inline int mtx_trylock(mtx_t *mtx) 121 | { 122 | int res = pthread_mutex_trylock(mtx); 123 | if(res == EBUSY) { 124 | return thrd_busy; 125 | } 126 | return res == 0 ? thrd_success : thrd_error; 127 | } 128 | 129 | static inline int mtx_timedlock(mtx_t *mtx, const struct timespec *ts) 130 | { 131 | int res; 132 | #ifdef C11THREADS_NO_TIMED_MUTEX 133 | /* fake a timedlock by polling trylock in a loop and waiting for a bit */ 134 | struct timeval now; 135 | struct timespec sleeptime; 136 | 137 | sleeptime.tv_sec = 0; 138 | sleeptime.tv_nsec = C11THREADS_TIMEDLOCK_POLL_INTERVAL; 139 | 140 | while((res = pthread_mutex_trylock(mtx)) == EBUSY) { 141 | gettimeofday(&now, NULL); 142 | 143 | if(now.tv_sec > ts->tv_sec || (now.tv_sec == ts->tv_sec && 144 | (now.tv_usec * 1000) >= ts->tv_nsec)) { 145 | return thrd_timedout; 146 | } 147 | 148 | nanosleep(&sleeptime, NULL); 149 | } 150 | #else 151 | if((res = pthread_mutex_timedlock(mtx, ts)) == ETIMEDOUT) { 152 | return thrd_timedout; 153 | } 154 | #endif 155 | return res == 0 ? thrd_success : thrd_error; 156 | } 157 | 158 | static inline int mtx_unlock(mtx_t *mtx) 159 | { 160 | return pthread_mutex_unlock(mtx) == 0 ? thrd_success : thrd_error; 161 | } 162 | 163 | /* ---- condition variables ---- */ 164 | 165 | static inline int cnd_init(cnd_t *cond) 166 | { 167 | return pthread_cond_init(cond, 0) == 0 ? thrd_success : thrd_error; 168 | } 169 | 170 | static inline void cnd_destroy(cnd_t *cond) 171 | { 172 | pthread_cond_destroy(cond); 173 | } 174 | 175 | static inline int cnd_signal(cnd_t *cond) 176 | { 177 | return pthread_cond_signal(cond) == 0 ? thrd_success : thrd_error; 178 | } 179 | 180 | static inline int cnd_broadcast(cnd_t *cond) 181 | { 182 | return pthread_cond_broadcast(cond) == 0 ? thrd_success : thrd_error; 183 | } 184 | 185 | static inline int cnd_wait(cnd_t *cond, mtx_t *mtx) 186 | { 187 | return pthread_cond_wait(cond, mtx) == 0 ? thrd_success : thrd_error; 188 | } 189 | 190 | static inline int cnd_timedwait(cnd_t *cond, mtx_t *mtx, const struct timespec *ts) 191 | { 192 | int res; 193 | 194 | if((res = pthread_cond_timedwait(cond, mtx, ts)) != 0) { 195 | return res == ETIMEDOUT ? thrd_timedout : thrd_error; 196 | } 197 | return thrd_success; 198 | } 199 | 200 | /* ---- thread-specific data ---- */ 201 | 202 | static inline int tss_create(tss_t *key, tss_dtor_t dtor) 203 | { 204 | return pthread_key_create(key, dtor) == 0 ? thrd_success : thrd_error; 205 | } 206 | 207 | static inline void tss_delete(tss_t key) 208 | { 209 | pthread_key_delete(key); 210 | } 211 | 212 | static inline int tss_set(tss_t key, void *val) 213 | { 214 | return pthread_setspecific(key, val) == 0 ? thrd_success : thrd_error; 215 | } 216 | 217 | static inline void *tss_get(tss_t key) 218 | { 219 | return pthread_getspecific(key); 220 | } 221 | 222 | /* ---- misc ---- */ 223 | 224 | static inline void call_once(once_flag *flag, void (*func)(void)) 225 | { 226 | pthread_once(flag, func); 227 | } 228 | 229 | #if __STDC_VERSION__ < 201112L || defined(C11THREADS_NO_TIMED_MUTEX) 230 | /* TODO take base into account */ 231 | inline int timespec_get(struct timespec *ts, int base) 232 | { 233 | struct timeval tv; 234 | 235 | gettimeofday(&tv, 0); 236 | 237 | ts->tv_sec = tv.tv_sec; 238 | ts->tv_nsec = tv.tv_usec * 1000; 239 | return base; 240 | } 241 | #endif /* not C11 */ 242 | -------------------------------------------------------------------------------- /src/common/thread_pthread.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | #define ONCE_FLAG_INIT PTHREAD_ONCE_INIT 5 | 6 | typedef pthread_t thrd_t; 7 | typedef pthread_mutex_t mtx_t; 8 | typedef pthread_cond_t cnd_t; 9 | typedef pthread_key_t tss_t; 10 | typedef pthread_once_t once_flag; 11 | 12 | typedef int (*thrd_start_t)(void*); 13 | typedef void (*tss_dtor_t)(void*); 14 | 15 | enum { 16 | mtx_plain = 0, 17 | mtx_recursive = 1, 18 | mtx_timed = 2, 19 | }; 20 | 21 | enum { 22 | thrd_success, 23 | thrd_timedout, 24 | thrd_busy, 25 | thrd_error, 26 | thrd_nomem 27 | }; 28 | -------------------------------------------------------------------------------- /src/common/tstyle.c: -------------------------------------------------------------------------------- 1 | #include "tstyle.h" 2 | #include // for isatty() 3 | 4 | 5 | const char* TStyleTable[_TStyle_MAX] = { 6 | "\x1b[1m", // TStyle_bold // : sfn('1', '1', '22'), 7 | "\x1b[3m", // TStyle_italic // : sfn('3', '3', '23'), 8 | "\x1b[4m", // TStyle_underline // : sfn('4', '4', '24'), 9 | "\x1b[7m", // TStyle_inverse // : sfn('7', '7', '27'), 10 | "\x1b[37m", // TStyle_white // : sfn('37', '38;2;255;255;255', '39'), 11 | "\x1b[90m", // TStyle_grey // : sfn('90', '38;5;244', '39'), 12 | "\x1b[30m", // TStyle_black // : sfn('30', '38;5;16', '39'), 13 | "\x1b[94m", // TStyle_blue // : sfn('34', '38;5;75', '39'), 14 | "\x1b[96m", // TStyle_cyan // : sfn('36', '38;5;87', '39'), 15 | "\x1b[92m", // TStyle_green // : sfn('32', '38;5;84', '39'), 16 | "\x1b[95m", // TStyle_magenta // : sfn('35', '38;5;213', '39'), 17 | "\x1b[35m", // TStyle_purple // : sfn('35', '38;5;141', '39'), 18 | "\x1b[35m", // TStyle_pink // : sfn('35', '38;5;211', '39'), 19 | "\x1b[91m", // TStyle_red // : sfn('31', '38;2;255;110;80', '39'), 20 | "\x1b[33m", // TStyle_yellow // : sfn('33', '38;5;227', '39'), 21 | "\x1b[93m", // TStyle_lightyellow // : sfn('93', '38;5;229', '39'), 22 | "\x1b[33m", // TStyle_orange // : sfn('33', '38;5;215', '39'), 23 | }; 24 | 25 | const char* TStyle_none = "\x1b[0m"; 26 | const char* TStyle_noColor = "\x1b[39m"; 27 | 28 | 29 | static int _TSTyleStdoutIsTTY = -1; 30 | static int _TSTyleStderrIsTTY = -1; 31 | 32 | // STDIN = 0 33 | // STDOUT = 1 34 | // STDERR = 2 35 | 36 | bool TSTyleStdoutIsTTY() { 37 | if (_TSTyleStdoutIsTTY == -1) { 38 | _TSTyleStdoutIsTTY = isatty(1) ? 1 : 0; 39 | } 40 | return !!_TSTyleStdoutIsTTY; 41 | } 42 | 43 | bool TSTyleStderrIsTTY() { 44 | if (_TSTyleStderrIsTTY == -1) { 45 | _TSTyleStderrIsTTY = isatty(1) ? 1 : 0; 46 | } 47 | return !!_TSTyleStderrIsTTY; 48 | } 49 | -------------------------------------------------------------------------------- /src/common/tstyle.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "defs.h" 3 | #include "str.h" 4 | 5 | typedef enum { 6 | TStyle_bold, // : sfn('1', '1', '22'), 7 | TStyle_italic, // : sfn('3', '3', '23'), 8 | TStyle_underline, // : sfn('4', '4', '24'), 9 | TStyle_inverse, // : sfn('7', '7', '27'), 10 | TStyle_white, // : sfn('37', '38;2;255;255;255', '39'), 11 | TStyle_grey, // : sfn('90', '38;5;244', '39'), 12 | TStyle_black, // : sfn('30', '38;5;16', '39'), 13 | TStyle_blue, // : sfn('34', '38;5;75', '39'), 14 | TStyle_cyan, // : sfn('36', '38;5;87', '39'), 15 | TStyle_green, // : sfn('32', '38;5;84', '39'), 16 | TStyle_magenta, // : sfn('35', '38;5;213', '39'), 17 | TStyle_purple, // : sfn('35', '38;5;141', '39'), 18 | TStyle_pink, // : sfn('35', '38;5;211', '39'), 19 | TStyle_red, // : sfn('31', '38;2;255;110;80', '39'), 20 | TStyle_yellow, // : sfn('33', '38;5;227', '39'), 21 | TStyle_lightyellow, // : sfn('93', '38;5;229', '39'), 22 | TStyle_orange, // : sfn('33', '38;5;215', '39'), 23 | _TStyle_MAX, 24 | } TStyle; 25 | 26 | const char* TStyleTable[_TStyle_MAX]; 27 | const char* TStyle_none; 28 | const char* TStyle_noColor; 29 | 30 | static inline Str TStyleBold(Str s) { return sdscat(s, TStyleTable[TStyle_bold]); } 31 | static inline Str TStyleItalic(Str s) { return sdscat(s, TStyleTable[TStyle_italic]); } 32 | static inline Str TStyleUnderline(Str s) { return sdscat(s, TStyleTable[TStyle_underline]); } 33 | static inline Str TStyleInverse(Str s) { return sdscat(s, TStyleTable[TStyle_inverse]); } 34 | static inline Str TStyleWhite(Str s) { return sdscat(s, TStyleTable[TStyle_white]); } 35 | static inline Str TStyleGrey(Str s) { return sdscat(s, TStyleTable[TStyle_grey]); } 36 | static inline Str TStyleBlack(Str s) { return sdscat(s, TStyleTable[TStyle_black]); } 37 | static inline Str TStyleBlue(Str s) { return sdscat(s, TStyleTable[TStyle_blue]); } 38 | static inline Str TStyleCyan(Str s) { return sdscat(s, TStyleTable[TStyle_cyan]); } 39 | static inline Str TStyleGreen(Str s) { return sdscat(s, TStyleTable[TStyle_green]); } 40 | static inline Str TStyleMagenta(Str s) { return sdscat(s, TStyleTable[TStyle_magenta]); } 41 | static inline Str TStylePurple(Str s) { return sdscat(s, TStyleTable[TStyle_purple]); } 42 | static inline Str TStylePink(Str s) { return sdscat(s, TStyleTable[TStyle_pink]); } 43 | static inline Str TStyleRed(Str s) { return sdscat(s, TStyleTable[TStyle_red]); } 44 | static inline Str TStyleYellow(Str s) { return sdscat(s, TStyleTable[TStyle_yellow]); } 45 | static inline Str TStyleLightyellow(Str s) { return sdscat(s, TStyleTable[TStyle_lightyellow]); } 46 | static inline Str TStyleOrange(Str s) { return sdscat(s, TStyleTable[TStyle_orange]); } 47 | 48 | static inline Str TStyleNone(Str s) { return sdscat(s, TStyle_none); } 49 | static inline Str TStyleNoColor(Str s) { return sdscat(s, TStyle_noColor); } 50 | 51 | bool TSTyleStdoutIsTTY(); 52 | bool TSTyleStderrIsTTY(); 53 | -------------------------------------------------------------------------------- /src/common/unicode.c: -------------------------------------------------------------------------------- 1 | #include "unicode.h" 2 | 3 | Rune utf8decode(const u8* buf, size_t len, u32* out_width) { 4 | u8 b = *buf; 5 | if (b < RuneSelf) { 6 | *out_width = 1; 7 | return b; 8 | } 9 | if ((b >> 5) == 0x6) { 10 | *out_width = 2; 11 | return len < 2 ? RuneErr 12 | : ((b << 6) & 0x7ff) + 13 | ((buf[1]) & 0x3f); 14 | } else if ((b >> 4) == 0xE) { 15 | *out_width = 3; 16 | return len < 3 ? RuneErr 17 | : ((b << 12) & 0xffff) + 18 | ((buf[1] << 6) & 0xfff) + 19 | ((buf[2]) & 0x3f); 20 | } else if ((b >> 3) == 0x1E) { 21 | *out_width = 4; 22 | return len < 4 ? RuneErr 23 | : ((b << 18) & 0x1fffff) + 24 | ((buf[1] << 12) & 0x3ffff) + 25 | ((buf[2] << 6) & 0xfff) + 26 | ((buf[3]) & 0x3f); 27 | } 28 | *out_width = 1; 29 | return RuneErr; 30 | } 31 | -------------------------------------------------------------------------------- /src/common/unicode.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "defs.h" 4 | 5 | typedef i32 Rune; 6 | 7 | static const Rune RuneErr = 0xFFFD; // Unicode replacement character 8 | static const Rune RuneSelf = 0x80; 9 | // characters below RuneSelf are represented as themselves in a single byte. 10 | static const u32 UTF8Max = 4; // Maximum number of bytes of a UTF8-encoded char. 11 | 12 | Rune utf8decode(const u8* buf, size_t len, u32* out_width); 13 | -------------------------------------------------------------------------------- /src/convlit.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "common/defs.h" 3 | #include "build/build.h" 4 | 5 | // convlit converts an expression to type t. 6 | // If n is already of type t, n is simply returned. 7 | // CCtx is used for error reporting. 8 | 9 | typedef struct Node Node; 10 | 11 | // For explicit conversions, which allows a greater range of conversions. 12 | static Node* ConvlitExplicit(CCtx* cc, Node* n, Node* t); 13 | 14 | // For implicit conversions (e.g. operands) 15 | static Node* ConvlitImplicit(CCtx* cc, Node* n, Node* t); 16 | 17 | 18 | Node* convlit(CCtx* cc, Node* n, Node* t, bool explicit); 19 | inline static Node* ConvlitExplicit(CCtx* cc, Node* n, Node* t) { 20 | return convlit(cc, n, t, /*explicit*/ true); 21 | } 22 | inline static Node* ConvlitImplicit(CCtx* cc, Node* n, Node* t) { 23 | return convlit(cc, n, t, /*explicit*/ false); 24 | } 25 | -------------------------------------------------------------------------------- /src/ir/block.c: -------------------------------------------------------------------------------- 1 | #include "ir.h" 2 | 3 | 4 | IRBlock* IRBlockNew(IRFun* f, IRBlockKind kind, const SrcPos* pos/*?*/) { 5 | assert(f->bid < 0xFFFFFFFF); // too many block IDs generated 6 | auto b = memalloct(f->mem, IRBlock); 7 | b->f = f; 8 | b->id = f->bid++; 9 | b->kind = kind; 10 | if (pos != NULL) { 11 | b->pos = *pos; 12 | } 13 | ArrayInitWithStorage(&b->values, b->valuesStorage, sizeof(b->valuesStorage)/sizeof(void*)); 14 | ArrayPush(&f->blocks, b, b->f->mem); 15 | return b; 16 | } 17 | 18 | 19 | void IRBlockDiscard(IRBlock* b) { 20 | assert(b->f != NULL); 21 | auto blocks = &b->f->blocks; 22 | 23 | #if DEBUG 24 | // make sure no other block refers to this block 25 | for (int i = 0; i < blocks->len; i++) { 26 | auto b2 = (IRBlock*)blocks->v[i]; 27 | if (b2 == b) { 28 | continue; 29 | } 30 | assertf(b2->preds[0] != b, "b%u holds a reference to b%u (preds[0])", b2->id, b->id); 31 | assertf(b2->preds[1] != b, "b%u holds a reference to b%u (preds[1])", b2->id, b->id); 32 | assertf(b2->succs[0] != b, "b%u holds a reference to b%u (succs[0])", b2->id, b->id); 33 | assertf(b2->succs[1] != b, "b%u holds a reference to b%u (succs[1])", b2->id, b->id); 34 | } 35 | #endif 36 | 37 | if (blocks->v[blocks->len - 1] == b) { 38 | blocks->len--; 39 | } else { 40 | auto i = ArrayIndexOf(blocks, b); 41 | assert(i > -1); 42 | ArrayRemove(blocks, i, 1); 43 | } 44 | memfree(b->f->mem, b); 45 | } 46 | 47 | 48 | void IRBlockAddValue(IRBlock* b, IRValue* v) { 49 | ArrayPush(&b->values, v, b->f->mem); 50 | } 51 | 52 | void IRBlockSetControl(IRBlock* b, IRValue* v) { 53 | if (b->control) { 54 | b->control->uses--; 55 | } 56 | b->control = v; 57 | if (v) { 58 | v->uses++; 59 | } 60 | } 61 | 62 | 63 | static void IRBlockAddPred(IRBlock* b, IRBlock* pred) { 64 | assert(!b->sealed); // cannot modify preds after block is sealed 65 | // pick first available hole in fixed-size array: 66 | for (u32 i = 0; i < countof(b->preds); i++) { 67 | if (b->preds[i] == NULL) { 68 | b->preds[i] = pred; 69 | return; 70 | } 71 | } 72 | assert(0 && "trying to add more than countof(IRBlock.preds) blocks"); 73 | } 74 | 75 | static void IRBlockAddSucc(IRBlock* b, IRBlock* succ) { 76 | // pick first available hole in fixed-size array: 77 | for (u32 i = 0; i < countof(b->succs); i++) { 78 | if (b->succs[i] == NULL) { 79 | b->succs[i] = succ; 80 | return; 81 | } 82 | } 83 | assert(0 && "trying to add more than countof(IRBlock.succs) blocks"); 84 | } 85 | 86 | void IRBlockAddEdgeTo(IRBlock* b1, IRBlock* b2) { 87 | assert(!b1->sealed); // cannot modify preds after block is sealed 88 | IRBlockAddSucc(b1, b2); // b1 -> b2 89 | IRBlockAddPred(b2, b1); // b2 <- b1 90 | assert(b1->f != NULL); 91 | assert(b1->f == b2->f); // blocks must be part of the same function 92 | IRFunInvalidateCFG(b1->f); 93 | } 94 | 95 | 96 | void IRBlockSetPred(IRBlock* b, u32 index, IRBlock* pred) { 97 | assert(!b->sealed); 98 | assert(index < countof(b->preds)); 99 | b->preds[index] = pred; 100 | assert(b->f != NULL); 101 | IRFunInvalidateCFG(b->f); 102 | } 103 | 104 | void IRBlockDelPred(IRBlock* b, u32 index) { 105 | assert(!b->sealed); 106 | assert(index < countof(b->preds)); 107 | if (b->preds[index] != NULL) { 108 | b->preds[index] = NULL; 109 | assert(b->f != NULL); 110 | IRFunInvalidateCFG(b->f); 111 | } 112 | } 113 | 114 | 115 | void IRBlockSetSucc(IRBlock* b, u32 index, IRBlock* succ) { 116 | assert(index < countof(b->succs)); 117 | b->succs[index] = succ; 118 | assert(b->f != NULL); 119 | IRFunInvalidateCFG(b->f); 120 | } 121 | 122 | void IRBlockDelSucc(IRBlock* b, u32 index) { 123 | assert(index < countof(b->succs)); 124 | if (b->succs[index] != NULL) { 125 | b->succs[index] = NULL; 126 | assert(b->f != NULL); 127 | IRFunInvalidateCFG(b->f); 128 | } 129 | } 130 | 131 | -------------------------------------------------------------------------------- /src/ir/builder.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "ir.h" 3 | #include "../common/array.h" 4 | #include "../common/ptrmap.h" 5 | #include "../build/build.h" 6 | 7 | 8 | typedef enum IRBuilderFlags { 9 | IRBuilderDefault = 0, 10 | IRBuilderComments = 1 << 1, // include comments in some values, for formatting 11 | IRBuilderOpt = 1 << 2, // apply construction-pass [optimization]s 12 | } IRBuilderFlags; 13 | 14 | 15 | typedef struct IRBuilder { 16 | Memory mem; // houses all IR data constructed by this builder 17 | PtrMap funs; // Node* => IRFun* -- generated functions 18 | IRBuilderFlags flags; 19 | IRPkg* pkg; 20 | 21 | // state used during building 22 | const CCtx* cc; // current source context (source-file specific) 23 | IRBlock* b; // current block 24 | IRFun* f; // current function 25 | 26 | SymMap* vars; // Sym => IRValue* 27 | // variable assignments in the current block (map from variable symbol to ssa value) 28 | // this PtrMap is moved into defvars when a block ends (internal call to endBlock.) 29 | 30 | Array defvars; void* defvarsStorage[512]; // PtrMap*[] (from vars) 31 | // all defined variables at the end of each block. Indexed by block id. 32 | // null indicates there are no variables in that block. 33 | 34 | // incompletePhis :Map>|null 35 | // tracks pending, incomplete phis that are completed by sealBlock for 36 | // blocks that are sealed after they have started. This happens when preds 37 | // are not known at the time a block starts, but is known and registered 38 | // before the block ends. 39 | 40 | } IRBuilder; 41 | 42 | // start a new IRPkg. 43 | // b must be zeroed memory or a reused builder. 44 | void IRBuilderInit(IRBuilder* b, IRBuilderFlags flags, const char* pkgname/*null*/); 45 | void IRBuilderFree(IRBuilder* b); 46 | 47 | // add ast to top-level of the current IRPkg. Returns false if any errors occured. 48 | bool IRBuilderAdd(IRBuilder* b, const CCtx* cc, Node* ast); 49 | -------------------------------------------------------------------------------- /src/ir/fun.c: -------------------------------------------------------------------------------- 1 | #include "ir.h" 2 | 3 | 4 | IRFun* IRFunNew(Memory mem, Node* n) { 5 | assert(n->type != NULL); 6 | assert(n->type->kind == NFunType); 7 | auto f = (IRFun*)memalloc(mem, sizeof(IRFun)); 8 | f->mem = mem; 9 | ArrayInitWithStorage(&f->blocks, f->blocksStorage, sizeof(f->blocksStorage)/sizeof(void*)); 10 | f->typeid = n->type->t.id; 11 | f->name = n->fun.name; // may be NULL 12 | f->pos = n->pos; // copy 13 | auto params = n->type->fun.params; 14 | f->nargs = params == NULL ? 0 : params->kind == NTuple ? params->array.a.len : 1; 15 | return f; 16 | } 17 | 18 | 19 | static IRValue* getConst64(IRFun* f, TypeCode t, u64 value) { 20 | 21 | // TODO: simplify const cache to just hold int32 and int64 since we can store all 22 | // values in these. 23 | 24 | // dlog("getConst64 t=%s value=%llX", TypeCodeName(t), value); 25 | int addHint = 0; 26 | auto v = IRConstCacheGet(f->consts, f->mem, t, value, &addHint); 27 | if (v == NULL) { 28 | auto op = IROpConstFromAST(t); 29 | assert(IROpInfo(op)->aux != IRAuxNone); 30 | // Create const operation and add it to the entry block of function f 31 | v = IRValueNew(f, f->blocks.v[0], op, t, /*SrcPos*/NULL); 32 | v->auxInt = value; 33 | f->consts = IRConstCacheAdd(f->consts, f->mem, t, value, v, addHint); 34 | // dlog("getConst64 add new const op=%s value=%llX => v%u", IROpNames[op], value, v->id); 35 | } else { 36 | // dlog("getConst64 use cached const op=%s value=%llX => v%u", IROpNames[v->op], value, v->id); 37 | } 38 | return v; 39 | } 40 | 41 | // returns a constant IRValue representing n for type t 42 | IRValue* IRFunGetConstBool(IRFun* f, bool value) { 43 | // TODO: as there are just two values; avoid using the const cache. 44 | return getConst64(f, TypeCode_bool, value ? 1 : 0); 45 | } 46 | 47 | // returns a constant IRValue representing n for type t 48 | IRValue* IRFunGetConstInt(IRFun* f, TypeCode t, u64 value) { 49 | assert(TypeCodeIsInt(t)); 50 | return getConst64(f, t, value); 51 | } 52 | 53 | IRValue* IRFunGetConstFloat(IRFun* f, TypeCode t, double value) { 54 | assert(TypeCodeIsFloat(t)); 55 | // reintrepret bits (double is IEEE 754 in C11) 56 | u64 ivalue = *(u64*)(&value); 57 | return getConst64(f, t, ivalue); 58 | } 59 | 60 | void IRFunMoveBlockToEnd(IRFun* f, u32 blockIndex) { 61 | // moves block at index to end of f->blocks 62 | assert(f->blocks.len > blockIndex); 63 | if (f->blocks.len > blockIndex + 1) { 64 | // not last 65 | auto b = checknull(f->blocks.v[blockIndex]); 66 | 67 | // shift all blocks after blockIndex one step to the left 68 | // e.g. given blockIndex=2: 69 | // 0 1 2 3 4 70 | // [a,b,c,d,e] 71 | // [a,b,d,d,e] 72 | // [a,b,d,e,e] 73 | u32 end = f->blocks.len - 1; 74 | u32 i = blockIndex; 75 | for (; i < end; i++) { 76 | f->blocks.v[i] = f->blocks.v[i + 1]; 77 | } 78 | f->blocks.v[i] = b; 79 | } 80 | } 81 | 82 | void IRFunInvalidateCFG(IRFun* f) { 83 | // TODO 84 | // f->cachedPostorder = NULL; 85 | // f->cachedLoopnest = NULL; 86 | // f->cachedIdom = NULL; 87 | // f->cachedSdom = NULL; 88 | } 89 | -------------------------------------------------------------------------------- /src/ir/ir.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "../common/defs.h" 3 | #include "../common/memory.h" 4 | #include "../common/array.h" 5 | #include "../build/source.h" 6 | #include "../parse/ast.h" 7 | #include "../sym.h" 8 | #include "op.h" 9 | 10 | 11 | typedef enum IRBlockKind { 12 | IRBlockInvalid = 0, 13 | IRBlockCont, // plain block with a single successor 14 | IRBlockFirst, // 2 successors, always takes the first one (second is dead) 15 | IRBlockIf, // 2 successors, if control goto succs[0] else goto succs[1] 16 | IRBlockRet, // no successors, control value is memory result 17 | } IRBlockKind; 18 | 19 | typedef enum IRBranchPrediction { 20 | IRBranchUnlikely = -1, 21 | IRBranchUnknown = 0, 22 | IRBranchLikely = 1, 23 | } IRBranchPrediction; 24 | 25 | 26 | typedef struct IRPkg IRPkg; 27 | typedef struct IRFun IRFun; 28 | typedef struct IRBlock IRBlock; 29 | typedef struct IRValue IRValue; 30 | 31 | 32 | // Edge represents a CFG edge 33 | typedef struct IREdge { int TODO; } IREdge; 34 | 35 | 36 | // IRConstCache is used internally by IRFun (fun.c) and holds constants 37 | typedef struct IRConstCache { 38 | u32 bmap; // maps TypeCode => branch array index 39 | void* branches[]; // dense branch array 40 | } IRConstCache; 41 | 42 | 43 | typedef struct IRValue { 44 | u32 id; // unique identifier 45 | IROp op; // operation that computes this value 46 | TypeCode type; 47 | SrcPos pos; // source position 48 | IRValue* args[3]; u8 argslen; // arguments 49 | union { 50 | i64 auxInt; // floats are stored as reinterpreted bits 51 | }; 52 | u32 uses; // use count. Each appearance in args or IRBlock.control counts once. 53 | const char* comment; // short comment for IR formatting. Likely NULL. (memalloc) 54 | } IRValue; 55 | 56 | 57 | // Block represents a basic block 58 | typedef struct IRBlock { 59 | IRFun* f; // owning function 60 | u32 id; // block ID 61 | IRBlockKind kind; // kind of block 62 | bool sealed; // true if no further predecessors will be added 63 | SrcPos pos; // source position 64 | const char* comment; // short comment for IR formatting. May be NULL. 65 | IRBlock* succs[2]; // Successor/subsequent blocks (CFG) 66 | IRBlock* preds[2]; // Predecessors (CFG) 67 | 68 | // three-address code values 69 | Array values; void* valuesStorage[8]; // IRValue*[] 70 | 71 | // control is a value that determines how the block is exited. 72 | // Its value depends on the kind of the block. For instance, a IRBlockIf has a boolean 73 | // control value and IRBlockExit has a memory control value. 74 | IRValue* control; 75 | 76 | } IRBlock; 77 | 78 | 79 | // Fun represents a function 80 | typedef struct IRFun { 81 | Memory mem; // owning allocator 82 | Array blocks; void* blocksStorage[4]; // IRBlock*[] 83 | Sym name; // may be NULL 84 | SrcPos pos; // source position 85 | u32 nargs; // number of arguments 86 | Sym typeid; // TypeCode encoding 87 | 88 | // internal; valid only during building 89 | u32 bid; // block ID allocator 90 | u32 vid; // value ID allocator 91 | IRConstCache* consts; // constants cache maps type+value => IRValue 92 | } IRFun; 93 | 94 | 95 | // Pkg represents a package with functions and data 96 | typedef struct IRPkg { 97 | Memory mem; // owning allocator 98 | const char* name; // c-string. "_" if NULL is passed for name to IRPkgNew. TODO use Sym? 99 | // TODO: Move the PtrMap funs from builder here. Need to make PtrMap use Memory. 100 | Array funs; void* funsStorage[4]; // IRFun*[] 101 | } IRPkg; 102 | 103 | 104 | IRValue* IRValueNew(IRFun* f, IRBlock* b/*null*/, IROp op, TypeCode type, const SrcPos*/*null*/); 105 | void IRValueAddComment(IRValue* v, Memory, ConstStr comment); 106 | void IRValueAddArg(IRValue* v, IRValue* arg); 107 | 108 | 109 | IRBlock* IRBlockNew(IRFun* f, IRBlockKind, const SrcPos*/*nullable*/); 110 | void IRBlockDiscard(IRBlock* b); // removes it from b->f and frees memory of b. 111 | void IRBlockAddValue(IRBlock* b, IRValue* v); 112 | void IRBlockSetControl(IRBlock* b, IRValue* v/*pass null to clear*/); 113 | void IRBlockAddEdgeTo(IRBlock* b1, IRBlock* b2); // add an edge from b1 to successor block b2 114 | void IRBlockSetPred(IRBlock* b, u32 index, IRBlock* pred); 115 | void IRBlockDelPred(IRBlock* b, u32 index); 116 | void IRBlockSetSucc(IRBlock* b, u32 index, IRBlock* succ); 117 | void IRBlockDelSucc(IRBlock* b, u32 index); 118 | 119 | 120 | IRFun* IRFunNew(Memory, Node* n); 121 | IRValue* IRFunGetConstBool(IRFun* f, bool value); 122 | IRValue* IRFunGetConstInt(IRFun* f, TypeCode t, u64 n); 123 | IRValue* IRFunGetConstFloat(IRFun* f, TypeCode t, double n); 124 | void IRFunInvalidateCFG(IRFun*); 125 | void IRFunMoveBlockToEnd(IRFun*, u32 blockIndex); // moves block at index to end of f->blocks 126 | 127 | 128 | IRPkg* IRPkgNew(Memory, const char* name/*null*/); 129 | void IRPkgAddFun(IRPkg* pkg, IRFun* f); 130 | 131 | 132 | Str IRReprPkgStr(const IRPkg* f, Str init/*null*/); 133 | 134 | 135 | // Note: Must use the same Memory for all calls to the same IRConstCache. 136 | // Note: addHint is only valid until the next call to a mutating function like Add. 137 | IRValue* IRConstCacheGet( 138 | const IRConstCache* c, Memory, TypeCode t, u64 value, int* out_addHint); 139 | IRConstCache* IRConstCacheAdd( 140 | IRConstCache* c, Memory, TypeCode t, u64 value, IRValue* v, int addHint); 141 | -------------------------------------------------------------------------------- /src/ir/pkg.c: -------------------------------------------------------------------------------- 1 | #include "ir.h" 2 | 3 | 4 | IRPkg* IRPkgNew(Memory mem, const char* name) { 5 | size_t namelen = name == NULL ? 0 : (strlen(name) + 1); 6 | auto pkg = (IRPkg*)memalloc(mem, sizeof(IRPkg) + namelen); 7 | 8 | pkg->mem = mem; 9 | 10 | ArrayInitWithStorage(&pkg->funs, pkg->funsStorage, sizeof(pkg->funsStorage)/sizeof(void*)); 11 | 12 | if (name == NULL) { 13 | pkg->name = "_"; 14 | } else { 15 | char* name2 = ((char*)pkg) + namelen; 16 | memcpy(name2, name, namelen); 17 | name2[namelen] = 0; 18 | pkg->name = name2; 19 | } 20 | 21 | return pkg; 22 | } 23 | 24 | 25 | void IRPkgAddFun(IRPkg* pkg, IRFun* f) { 26 | ArrayPush(&pkg->funs, f, pkg->mem); 27 | } 28 | -------------------------------------------------------------------------------- /src/ir/repr.c: -------------------------------------------------------------------------------- 1 | #include "ir.h" 2 | 3 | typedef struct { 4 | Str buf; 5 | bool includeTypes; 6 | } IRRepr; 7 | 8 | 9 | 10 | static void reprValue(IRRepr* r, const IRValue* v) { 11 | assert(v->op < Op_MAX); 12 | 13 | // vN type = Op 14 | r->buf = sdscatprintf(r->buf, 15 | " v%-2u %-7s = %-*s", 16 | v->id, 17 | TypeCodeName(v->type), 18 | IROpNamesMaxLen, 19 | IROpNames[v->op] 20 | ); 21 | 22 | // arg arg 23 | for (u8 i = 0; i < v->argslen; i++) { 24 | r->buf = sdscatprintf(r->buf, i+1 < v->argslen ? " v%-2u " : " v%u", v->args[i]->id); 25 | } 26 | 27 | // [auxInt] 28 | auto opinfo = IROpInfo(v->op); 29 | switch (opinfo->aux) { 30 | case IRAuxNone: 31 | break; 32 | case IRAuxBool: 33 | case IRAuxI8: 34 | case IRAuxI16: 35 | case IRAuxI32: 36 | r->buf = sdscatprintf(r->buf, " [0x%X]", (u32)v->auxInt); 37 | break; 38 | case IRAuxF32: 39 | r->buf = sdscatprintf(r->buf, " [%f]", *(f32*)(&v->auxInt)); 40 | break; 41 | case IRAuxI64: 42 | r->buf = sdscatprintf(r->buf, " [0x%llX]", v->auxInt); 43 | break; 44 | case IRAuxF64: 45 | r->buf = sdscatprintf(r->buf, " [%f]", *(f64*)(&v->auxInt)); 46 | break; 47 | } 48 | 49 | // {aux} 50 | // TODO non-numeric aux 51 | 52 | // comment 53 | if (v->comment != NULL) { 54 | r->buf = sdscatfmt(r->buf, "\t# %u use ; %s", v->uses, v->comment); 55 | } else { 56 | r->buf = sdscatfmt(r->buf, "\t# %u use", v->uses); 57 | } 58 | 59 | r->buf = sdscatlen(r->buf, "\n", 1); 60 | } 61 | 62 | 63 | 64 | static void reprBlock(IRRepr* r, const IRBlock* b) { 65 | // start of block header 66 | r->buf = sdscatfmt(r->buf, " b%u:", b->id); 67 | 68 | // predecessors 69 | if (b->preds[0] != NULL) { 70 | if (b->preds[1] != NULL) { 71 | r->buf = sdscatfmt(r->buf, " <- b%u b%u", b->preds[0]->id, b->preds[1]->id); 72 | } else { 73 | r->buf = sdscatfmt(r->buf, " <- b%u", b->preds[0]->id); 74 | } 75 | } else { 76 | assertf(b->preds[1] == NULL, "preds are not dense"); 77 | } 78 | 79 | // end block header 80 | if (b->comment != NULL) { 81 | r->buf = sdscatfmt(r->buf, "\t # %s", b->comment); 82 | } 83 | r->buf = sdscatc(r->buf, '\n'); 84 | 85 | // values 86 | ArrayForEach(&b->values, IRValue, v) { 87 | reprValue(r, v); 88 | } 89 | 90 | // successors 91 | switch (b->kind) { 92 | case IRBlockInvalid: 93 | r->buf = sdscat(r->buf, " ?\n"); 94 | break; 95 | 96 | case IRBlockCont: { 97 | auto contb = b->succs[0]; 98 | if (contb != NULL) { 99 | r->buf = sdscatfmt(r->buf, " cont -> b%u\n", contb->id); 100 | } else { 101 | r->buf = sdscatfmt(r->buf, " cont -> ?\n"); 102 | } 103 | break; 104 | } 105 | 106 | case IRBlockFirst: 107 | case IRBlockIf: { 108 | auto thenb = b->succs[0]; 109 | auto elseb = b->succs[1]; 110 | assert(thenb != NULL && elseb != NULL); 111 | assertf(b->control != NULL, "missing control value"); 112 | r->buf = sdscatfmt(r->buf, 113 | " %s v%u -> b%u b%u\n", 114 | b->kind == IRBlockIf ? "if" : "first", 115 | b->control->id, 116 | thenb->id, 117 | elseb->id 118 | ); 119 | break; 120 | } 121 | 122 | case IRBlockRet: 123 | assert(b->control != NULL); 124 | r->buf = sdscatfmt(r->buf, " ret v%u\n", b->control->id); 125 | break; 126 | 127 | } 128 | 129 | r->buf = sdscatc(r->buf, '\n'); 130 | } 131 | 132 | 133 | static void reprFun(IRRepr* r, const IRFun* f) { 134 | r->buf = sdscatprintf(r->buf, 135 | "fun %s %s %p\n", 136 | f->name == NULL ? "_" : f->name, 137 | f->typeid == NULL ? "()" : f->typeid, 138 | f 139 | ); 140 | ArrayForEach(&f->blocks, IRBlock, b) { 141 | reprBlock(r, b); 142 | } 143 | } 144 | 145 | 146 | static void reprPkg(IRRepr* r, const IRPkg* pkg) { 147 | r->buf = sdscatfmt(r->buf, "package %s\n", pkg->name); 148 | ArrayForEach(&pkg->funs, IRFun, f) { 149 | reprFun(r, f); 150 | } 151 | } 152 | 153 | 154 | Str IRReprPkgStr(const IRPkg* pkg, Str init) { 155 | IRRepr r = { .buf=init, .includeTypes=true }; 156 | if (r.buf == NULL) { 157 | r.buf = sdsempty(); 158 | } 159 | reprPkg(&r, pkg); 160 | return r.buf; 161 | } 162 | -------------------------------------------------------------------------------- /src/ir/value.c: -------------------------------------------------------------------------------- 1 | #include "ir.h" 2 | 3 | 4 | IRValue* IRValueNew(IRFun* f, IRBlock* b, IROp op, TypeCode type, const SrcPos* pos) { 5 | assert(f->vid < 0xFFFFFFFF); // too many block IDs generated 6 | auto v = (IRValue*)memalloc(f->mem, sizeof(IRValue)); 7 | v->id = f->vid++; 8 | v->op = op; 9 | v->type = type; 10 | if (pos != NULL) { 11 | v->pos = *pos; 12 | } 13 | if (b != NULL) { 14 | ArrayPush(&b->values, v, b->f->mem); 15 | } else { 16 | dlog("WARN IRValueNew b=NULL"); 17 | } 18 | return v; 19 | } 20 | 21 | void IRValueAddComment(IRValue* v, Memory mem, ConstStr comment) { 22 | if (comment != NULL) { // allow passing NULL to do nothing 23 | auto commentLen = sdslen(comment); 24 | if (commentLen > 0) { 25 | if (v->comment == NULL) { 26 | v->comment = memallocCStr(mem, comment, commentLen); 27 | } else { 28 | v->comment = memallocCStrConcat(mem, v->comment, "; ", comment, NULL); 29 | } 30 | } 31 | } 32 | } 33 | 34 | void IRValueAddArg(IRValue* v, IRValue* arg) { 35 | assert(v->argslen < countof(v->args)); 36 | v->args[v->argslen++] = arg; 37 | arg->uses ++; 38 | } 39 | -------------------------------------------------------------------------------- /src/main.c: -------------------------------------------------------------------------------- 1 | #include "build/build.h" 2 | #include "parse/parse.h" 3 | #include "ir/builder.h" 4 | #include "common/os.h" 5 | #include "common/test.h" 6 | 7 | static void errorHandler(const Source* src, SrcPos pos, ConstStr msg, void* userdata) { 8 | u32* errcount = (u32*)userdata; 9 | (*errcount)++; 10 | auto s = SrcPosMsg(sdsempty(), pos, msg); 11 | s[sdslen(s)-1] = '\n'; // repurpose NUL 12 | fwrite(s, sdslen(s), 1, stderr); 13 | sdsfree(s); 14 | } 15 | 16 | 17 | static void printAst(const Node* n) { 18 | auto s = NodeRepr(n, sdsempty()); 19 | s = sdscatlen(s, "\n", 1); 20 | fwrite(s, sdslen(s), 1, stdout); 21 | sdsfree(s); 22 | } 23 | 24 | 25 | static void printIR(const IRPkg* pkg) { 26 | auto s = IRReprPkgStr(pkg, sdsempty()); 27 | s = sdscatlen(s, "\n", 1); 28 | fwrite(s, sdslen(s), 1, stdout); 29 | sdsfree(s); 30 | } 31 | 32 | 33 | void parsefile(Str filename, Scope* pkgscope) { 34 | 35 | // load file contents 36 | size_t len = 0; 37 | auto buf = os_readfile(filename, &len, NULL); 38 | if (!buf) { 39 | die("%s: %s", filename, strerror(errno)); 40 | } 41 | 42 | // our userdata is number of errors encountered (incremented by errorHandler) 43 | u32 errcount = 0; 44 | 45 | // compilation context 46 | CCtx cc = {0}; // TODO: share across individual, non-overlapping compile sessions 47 | CCtxInit(&cc, errorHandler, &errcount, filename, buf, len); 48 | 49 | printf("————————————————————————————————————————————————————————————————\n"); 50 | printf("PARSE\n"); 51 | // parse input 52 | static P parser; // shared parser (zero-initialized since it's static) 53 | auto file = Parse(&parser, &cc, ParseComments /*| ParseOpt*/, pkgscope); 54 | printAst(file); 55 | if (errcount != 0) { goto end; } 56 | 57 | // resolve symbols and types 58 | if (parser.unresolved == 0) { 59 | dlog("(no unresolved names; not running sym resolver)"); 60 | } else { 61 | printf("————————————————————————————————————————————————————————————————\n"); 62 | printf("RESOLVE NAMES\n"); 63 | ResolveSym(&cc, parser.s.flags, file, pkgscope); 64 | printAst(file); 65 | if (errcount != 0) { goto end; } 66 | } 67 | 68 | printf("————————————————————————————————————————————————————————————————\n"); 69 | printf("RESOLVE TYPES\n"); 70 | ResolveType(&cc, file); 71 | printAst(file); 72 | if (errcount != 0) { goto end; } 73 | 74 | printf("————————————————————————————————————————————————————————————————\n"); 75 | printf("BUILD IR\n"); 76 | // build some IR 77 | IRBuilder irbuilder = {}; 78 | IRBuilderInit(&irbuilder, IRBuilderComments /*| IRBuilderOpt*/, "foo"); // start a new package 79 | IRBuilderAdd(&irbuilder, &cc, file); // add ast to current package 80 | 81 | printf("————————————————————————————————————————————————————————————————\n"); 82 | // print IR SLC 83 | printIR(irbuilder.pkg); 84 | IRBuilderFree(&irbuilder); 85 | 86 | // // assemble 87 | // AsmELF(); 88 | 89 | end: 90 | CCtxFree(&cc); 91 | memgc_collect(); 92 | } 93 | 94 | 95 | int main(int argc, char **argv) { 96 | if (getTestMode() == WTestModeExclusive) { 97 | return 0; 98 | } 99 | 100 | if (argc < 2) { 101 | fprintf(stderr, "usage: %s ...\n", argv[0]); 102 | exit(1); 103 | } 104 | 105 | // int out = 1; // stdout 106 | // TODO: support -o CLI flag. 107 | // int out = open(argv[2], O_WRONLY | O_CREAT, 0660); 108 | // if (out < 0) { 109 | // fprintf(stderr, "error opening output %s: %s\n", argv[2], strerror(errno)); 110 | // exit(1); 111 | // } 112 | 113 | auto pkgscope = ScopeNew(GetGlobalScope(), NULL); 114 | parsefile(sdsnew(argv[1]), pkgscope); 115 | 116 | return 0; 117 | } 118 | 119 | -------------------------------------------------------------------------------- /src/parse/parse.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "scan.h" 3 | #include "../common/array.h" 4 | #include "ast.h" 5 | // #include "common/assert.h" 6 | // #include "common/test.h" 7 | // #include "common/memory.h" 8 | // #include "common/str.h" 9 | // #include "common/os.h" 10 | // #include "sym.h" 11 | 12 | // parser 13 | typedef struct P { 14 | S s; // scanner 15 | u32 fnest; // function nesting level (for error handling) 16 | u32 unresolved; // number of unresolved identifiers 17 | Scope* scope; // current scope 18 | CCtx* cc; // compilation context 19 | } P; 20 | Node* Parse(P*, CCtx*, ParseFlags, Scope* pkgscope); 21 | Node* NodeOptIfCond(Node* n); // TODO: move this and parser into a parse.h file 22 | 23 | // Symbol resolver 24 | Node* ResolveSym(CCtx*, ParseFlags, Node*, Scope*); 25 | 26 | // Type resolver 27 | void ResolveType(CCtx*, Node*); 28 | -------------------------------------------------------------------------------- /src/parse/parseint.c: -------------------------------------------------------------------------------- 1 | #include "parseint.h" 2 | #include 3 | 4 | #define GEN_STRTO_X(BITS, MAXVAL) \ 5 | bool parseint##BITS(const char* pch, size_t size, int base, u##BITS* result) { \ 6 | assert(base >= 2 && base <= 36); \ 7 | const char* s = pch; \ 8 | const char* end = pch + size; \ 9 | u##BITS acc = 0; \ 10 | u##BITS cutoff = MAXVAL; \ 11 | u##BITS cutlim = cutoff % base; \ 12 | cutoff /= base; \ 13 | int any = 0; \ 14 | for (char c = *s; s != end; c = *++s) { \ 15 | if (c >= '0' && c <= '9') { \ 16 | c -= '0'; \ 17 | } else if (c >= 'A' && c <= 'Z') { \ 18 | c -= 'A' - 10; \ 19 | } else if (c >= 'a' && c <= 'z') { \ 20 | c -= 'a' - 10; \ 21 | } else { \ 22 | return false; \ 23 | } \ 24 | if (c >= base) { \ 25 | return false; \ 26 | } \ 27 | if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) { \ 28 | any = -1; \ 29 | } else { \ 30 | any = 1; \ 31 | acc *= base; \ 32 | acc += c; \ 33 | } \ 34 | } \ 35 | if (any < 0 || /* more digits than what fits in acc */ \ 36 | any == 0) \ 37 | { \ 38 | return false; \ 39 | } \ 40 | *result = acc; \ 41 | return true; \ 42 | } 43 | 44 | GEN_STRTO_X(32, 0xFFFFFFFFu) 45 | GEN_STRTO_X(64, 0xFFFFFFFFFFFFFFFFull) 46 | 47 | #ifndef NDEBUG 48 | __attribute__((constructor)) static void test() { 49 | #define T32(cstr, base, expectnum) (({ \ 50 | u32 result = 0; \ 51 | bool ok = parseint32(cstr, strlen(cstr), base, &result); \ 52 | assert(ok || !cstr); \ 53 | if (result != expectnum) { fprintf(stderr, "result: 0x%X\n", result); } \ 54 | assert(result == expectnum || !"got: "&& result); \ 55 | })) 56 | 57 | #define T64(cstr, base, expectnum) (({ \ 58 | u64 result = 0; \ 59 | bool ok = parseint64(cstr, strlen(cstr), base, &result); \ 60 | assert(ok || !cstr); \ 61 | if (result != expectnum) { fprintf(stderr, "result: 0x%llX\n", result); } \ 62 | assert(result == expectnum || !"got: "&& result); \ 63 | })) 64 | 65 | T32("FFAA3191", 16, 0xFFAA3191); 66 | T32("0", 16, 0); 67 | T32("000000", 16, 0); 68 | T32("7FFFFFFF", 16, 0x7FFFFFFF); 69 | T32("EFFFFFFF", 16, 0xEFFFFFFF); 70 | T32("FFFFFFFF", 16, 0xFFFFFFFF); 71 | 72 | // fits in s64 73 | T64("7fffffffffffffff", 16, 0x7FFFFFFFFFFFFFFF); 74 | T64("9223372036854775807", 10, 0x7FFFFFFFFFFFFFFF); 75 | T64("777777777777777777777", 8, 0x7FFFFFFFFFFFFFFF); 76 | T64("1y2p0ij32e8e7", 36, 0x7FFFFFFFFFFFFFFF); 77 | 78 | T64("efffffffffffffff", 16, 0xEFFFFFFFFFFFFFFF); // this caught a bug once 79 | 80 | T64("ffffffffffffffff", 16, 0xFFFFFFFFFFFFFFFF); 81 | T64("18446744073709551615", 10, 0xFFFFFFFFFFFFFFFF); 82 | T64("1777777777777777777777", 8, 0xFFFFFFFFFFFFFFFF); 83 | T64("3w5e11264sgsf", 36, 0xFFFFFFFFFFFFFFFF); 84 | } 85 | #endif 86 | -------------------------------------------------------------------------------- /src/parse/parseint.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "../common/defs.h" 3 | 4 | bool parseint32(const char* ptr, size_t len, int base, u32* result); 5 | bool parseint64(const char* ptr, size_t len, int base, u64* result); 6 | -------------------------------------------------------------------------------- /src/parse/scan.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "../common/defs.h" 3 | #include "../build/build.h" 4 | #include "../sym.h" 5 | #include "token.h" 6 | 7 | // parser & scanner flags 8 | typedef enum { 9 | ParseFlagsDefault = 0, 10 | ParseComments = 1 << 1, // parse comments, populating S.comments 11 | ParseOpt = 1 << 2, // apply optimizations. might produce a non-1:1 AST/token stream 12 | } ParseFlags; 13 | 14 | // scanned comment 15 | typedef struct Comment { 16 | struct Comment* next; // next comment in linked list 17 | Source* src; // source 18 | const u8* ptr; // ptr into source 19 | size_t len; // byte length 20 | } Comment; 21 | 22 | // scanner 23 | typedef struct S { 24 | Memory mem; 25 | Source* src; // input source 26 | const u8* inp; // input buffer current pointer 27 | const u8* inp0; // input buffer previous pointer 28 | const u8* inend; // input buffer end 29 | ParseFlags flags; 30 | 31 | Tok tok; // current token 32 | const u8* tokstart; // start of current token 33 | const u8* tokend; // end of current token 34 | Sym name; // Current name (valid for TIdent and keywords) 35 | bool insertSemi; // insert a semicolon before next newline 36 | Comment* comments; // linked list head of comments scanned so far 37 | Comment* comments_tail; // linked list tail of comments scanned so far 38 | 39 | u32 lineno; // source position line 40 | const u8* linestart; // source position line start pointer (for column) 41 | 42 | ErrorHandler* errh; 43 | void* userdata; 44 | } S; 45 | 46 | // SInit initializes a scanner 47 | void SInit(S*, Memory, Source*, ParseFlags, ErrorHandler*, void* userdata); 48 | 49 | // SNext scans the next token 50 | Tok SNext(S*); 51 | 52 | // SSrcPos returns the source position of current token 53 | inline static SrcPos SSrcPos(S* s) { 54 | assert(s->tokstart >= s->src->buf); 55 | assert(s->tokstart < (s->src->buf + s->src->len)); 56 | assert(s->tokend >= s->tokstart); 57 | assert(s->tokend <= (s->src->buf + s->src->len)); 58 | size_t offs = s->tokstart - s->src->buf; 59 | SrcPos p = { s->src, offs, s->tokend - s->tokstart }; 60 | return p; 61 | } 62 | -------------------------------------------------------------------------------- /src/parse/token.c: -------------------------------------------------------------------------------- 1 | #include "../common/defs.h" 2 | #include "token.h" 3 | 4 | const char* TokName(Tok t) { 5 | switch (t) { 6 | #define I_ENUM(name, str) case name: return str; 7 | TOKENS(I_ENUM) 8 | #undef I_ENUM 9 | 10 | case TKeywordsStart: return "TKeywordsStart"; 11 | 12 | #define I_ENUM(str, name) case name: return "keyword " #str; 13 | TOKEN_KEYWORDS(I_ENUM) 14 | #undef I_ENUM 15 | 16 | case TKeywordsEnd: return "TKeywordsEnd"; 17 | 18 | case TMax: return "TMax"; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/parse/token.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // Defines the Tok enum 3 | 4 | // scanner tokens 5 | #define TOKENS(_) \ 6 | _( TNone , "TNone" ) \ 7 | _( TComma , ",") \ 8 | _( TSemi , ";") \ 9 | \ 10 | _( T_PRIM_OPS_START , "") \ 11 | /* primary "intrinsic" operator tokens, most of them mapping directly to IR ops */ \ 12 | _( TStar , "*") \ 13 | _( TSlash , "/") \ 14 | _( TPercent , "%") \ 15 | _( TShl , "<<") \ 16 | _( TShr , ">>") \ 17 | _( TAnd , "&") \ 18 | _( TPlus , "+") \ 19 | _( TMinus , "-") \ 20 | _( TPipe , "|") \ 21 | _( THat , "^") \ 22 | _( TTilde , "~") \ 23 | _( TExcalm , "!") \ 24 | _( TEq , "==") \ 25 | _( TNEq , "!=") \ 26 | _( TLt , "<") \ 27 | _( TLEq , "<=") \ 28 | _( TGt , ">") \ 29 | _( TGEq , ">=") \ 30 | _( TPlusPlus , "++") \ 31 | _( TMinusMinus , "--") \ 32 | \ 33 | _( T_PRIM_OPS_END , "") /* end of operator tokens */ \ 34 | \ 35 | _( TAssign , "=") \ 36 | _( TShlAssign , "<<=") \ 37 | _( TShrAssign , ">>=") \ 38 | _( TPlusAssign , "+=") \ 39 | _( TMinusAssign , "-=") \ 40 | _( TStarAssign , "*=") \ 41 | _( TSlashAssign , "/=") \ 42 | _( TPercentAssign , "%=") \ 43 | _( TAndAssign , "&=") \ 44 | _( TPipeAssign , "|=") \ 45 | _( TTildeAssign , "~=") \ 46 | _( THatAssign , "^=") \ 47 | _( TLParen , "(") \ 48 | _( TRParen , ")") \ 49 | _( TLBrace , "{") \ 50 | _( TRBrace , "}") \ 51 | _( TLBrack , "[") \ 52 | _( TRBrack , "]") \ 53 | _( TAndAnd , "&&") \ 54 | _( TPipePipe , "||") \ 55 | _( TRArr , "->") \ 56 | _( TIdent , "identifier") \ 57 | _( TIntLit , "int") \ 58 | _( TFloatLit , "float") \ 59 | _( TComment , "comment") \ 60 | /*END TOKENS*/ 61 | #define TOKEN_KEYWORDS(_) \ 62 | _( as, TAs) \ 63 | _( break, TBreak) \ 64 | _( case, TCase) \ 65 | _( continue, TContinue) \ 66 | _( default, TDefault) \ 67 | _( defer, TDefer) \ 68 | _( else, TElse) \ 69 | _( enum, TEnum) \ 70 | _( for, TFor) \ 71 | _( fun, TFun) \ 72 | _( if, TIf) \ 73 | _( import, TImport) \ 74 | _( in, TIn) \ 75 | _( interface, TInterface) \ 76 | _( is, TIs) \ 77 | _( mutable, TMutable) \ 78 | _( nil, TNil) \ 79 | _( return, TReturn) \ 80 | _( select, TSelect) \ 81 | _( struct, TStruct) \ 82 | _( switch, TSwitch) \ 83 | _( symbol, TSymbol) \ 84 | _( type, TType) \ 85 | _( while, TWhile) \ 86 | // Limited to a total of 31 keywords. See scan.c 87 | //END TOKEN_KEYWORDS 88 | 89 | typedef enum { 90 | #define I_ENUM(name, str) name, 91 | TOKENS(I_ENUM) 92 | #undef I_ENUM 93 | 94 | // TKeywordsStart is used for 0-based keyword indexing. 95 | // Its explicit value is used by sym.c to avoid having to regenerate keyword symbols 96 | // whenever a non-keyword token is added. I.e. this number can be changed freely but will 97 | // require regeneration of the code in sym.c. 98 | TKeywordsStart = 0x100, 99 | #define I_ENUM(_str, name) name, 100 | TOKEN_KEYWORDS(I_ENUM) 101 | #undef I_ENUM 102 | TKeywordsEnd, 103 | 104 | TMax 105 | } Tok; 106 | 107 | static_assert(TKeywordsEnd - TKeywordsStart <= 32, "too many keywords"); 108 | 109 | // Get printable name 110 | const char* TokName(Tok); 111 | -------------------------------------------------------------------------------- /src/sym.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "common/str.h" 3 | #include "parse/token.h" 4 | #include "types.h" 5 | 6 | // Sym is a type of sds string, compatible with sds functions, with an additional header 7 | // containing a precomputed FNV1a hash. Sym is immutable. 8 | typedef const char* Sym; 9 | 10 | // Predefinition of Node 11 | typedef struct Node Node; 12 | 13 | // Get a symbol (retrieves or interns) 14 | Sym symget(const u8* data, size_t len, u32 hash); 15 | 16 | // Hashes data and then calls symget 17 | Sym symgeth(const u8* data, size_t len); 18 | 19 | // Compare two Sym's string values. 20 | inline static int symcmp(Sym a, Sym b) { return a == b ? 0 : strcmp(a, b); } 21 | 22 | typedef struct __attribute__((__packed__)) SymHeader { 23 | u32 hash; 24 | struct sdshdr16 sh; 25 | } SymHeader; 26 | 27 | // access SymHeader from Sym/sds/const char* 28 | #define SYM_HDR(s) ((const SymHeader*)((s) - (sizeof(SymHeader)))) 29 | 30 | // access FNV1a hash of s 31 | inline static u32 symhash(Sym s) { return SYM_HDR(s)->hash; } 32 | 33 | // faster alternative to sdslen, without type lookup 34 | inline static u16 symlen(Sym s) { return SYM_HDR(s)->sh.len; } 35 | 36 | // Returns the Tok representing this sym in the language syntax. 37 | // Either returns a keyword token or TIdent if s is not a keyword. 38 | inline static Tok symLangTok(Sym s) { 39 | // Bits 4-8 represents offset into Tok enum when s is a language keyword. 40 | u32 kwindex = SYM_HDR(s)->sh.flags >> SDS_TYPE_BITS; 41 | return kwindex == 0 ? TIdent : kwindex + TKeywordsStart; 42 | } 43 | 44 | // SymMap maps Sym to pointers 45 | #define HASHMAP_NAME SymMap 46 | #define HASHMAP_KEY Sym 47 | #define HASHMAP_VALUE void* 48 | #include "common/hashmap.h" 49 | #undef HASHMAP_NAME 50 | #undef HASHMAP_KEY 51 | #undef HASHMAP_VALUE 52 | 53 | // Creates and initializes a new SymMap in mem, or global memory if mem is NULL. 54 | SymMap* SymMapNew(u32 initbuckets, Memory mem/*null*/); 55 | 56 | // SymMapInit initializes a map structure. initbuckets is the number of initial buckets. 57 | void SymMapInit(SymMap*, u32 initbuckets, Memory mem/*null*/); 58 | 59 | // SymMapFree frees SymMap along with its data. 60 | void SymMapFree(SymMap*); 61 | 62 | // SymMapDealloc frees heap memory used by a map, but leaves SymMap untouched. 63 | void SymMapDealloc(SymMap*); 64 | 65 | // SymMapGet searches for key. Returns value, or NULL if not found. 66 | void* SymMapGet(const SymMap*, Sym key); 67 | 68 | // SymMapSet inserts key=value into m. Returns the replaced value or NULL if not found. 69 | void* SymMapSet(SymMap*, Sym key, void* value); 70 | 71 | // SymMapDel removes value for key. Returns the removed value or NULL if not found. 72 | void* SymMapDel(SymMap*, Sym key); 73 | 74 | // SymMapClear removes all entries. In contrast to SymMapFree, map remains valid. 75 | void SymMapClear(SymMap*); 76 | 77 | // Iterator function type. Set stop=true to stop iteration. 78 | typedef void(SymMapIterator)(Sym key, void* value, bool* stop, void* userdata); 79 | 80 | // SymMapIter iterates over entries of the map. 81 | void SymMapIter(const SymMap*, SymMapIterator*, void* userdata); 82 | 83 | 84 | // symbols for language keywords (defined in token.h) 85 | #define SYM_DEF(str, _) \ 86 | const Sym sym_##str; 87 | TOKEN_KEYWORDS(SYM_DEF) 88 | #undef SYM_DEF 89 | 90 | 91 | // symbols and AST nodes for predefined types (defined in types.h) 92 | #define SYM_DEF(name) \ 93 | const Sym sym_##name; \ 94 | Node* Type_##name; 95 | TYPE_SYMS(SYM_DEF) 96 | #undef SYM_DEF 97 | 98 | // nil is special and implemented without macros since its sym is defined by TOKEN_KEYWORDS 99 | Node* Type_nil; 100 | Node* Const_nil; 101 | 102 | // ideal is the type of untyped constants 103 | Node* Type_ideal; 104 | 105 | // TypeCodeToTypeNode returns the type Node for TypeCode t. 106 | static Node* TypeCodeToTypeNode(TypeCode t); 107 | Node* const _TypeCodeToTypeNodeMap[TypeCode_CONCRETE_END]; 108 | inline static Node* TypeCodeToTypeNode(TypeCode t) { 109 | assert(t >= 0 && t < TypeCode_CONCRETE_END); 110 | return _TypeCodeToTypeNodeMap[t]; 111 | } 112 | 113 | // symbols and AST nodes for predefined constants 114 | #define PREDEFINED_CONSTANTS(_) \ 115 | _( true, bool, 1 ) \ 116 | _( false, bool, 0 ) \ 117 | /*END PREDEFINED_CONSTANTS*/ 118 | #define SYM_DEF(name, _type, _val) \ 119 | const Sym sym_##name; \ 120 | Node* Const_##name; 121 | PREDEFINED_CONSTANTS(SYM_DEF) 122 | #undef SYM_DEF 123 | 124 | 125 | // symbols for predefined common identifiers 126 | // predefined common identifiers (excluding types) 127 | #define PREDEFINED_IDENTS(ID) \ 128 | ID( _ ) \ 129 | /*END PREDEFINED_IDENTS*/ 130 | #define SYM_DEF(name) \ 131 | const Sym sym_##name; 132 | PREDEFINED_IDENTS(SYM_DEF) 133 | #undef SYM_DEF 134 | -------------------------------------------------------------------------------- /src/typeid.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "common/defs.h" 3 | #include "types.h" 4 | #include "sym.h" 5 | 6 | typedef struct Node Node; 7 | 8 | // GetTypeID retrieves the TypeID for the type node n. 9 | // This function may mutate n by computing and storing id to n.t.id. 10 | Sym GetTypeID(Node* n); 11 | 12 | // TypeEquals returns true if a and b are equivalent types (i.e. identical). 13 | bool TypeEquals(Node* a, Node* b); 14 | 15 | // TypeConv describes the effect of converting one type to another 16 | typedef enum TypeConv { 17 | TypeConvLossless = 0, // conversion is "perfect". e.g. int32 -> int64 18 | TypeConvLossy, // conversion may be lossy. e.g. int32 -> float32 19 | TypeConvImpossible, // conversion is not possible. e.g. (int,int) -> bool 20 | } TypeConv; 21 | 22 | // // TypeConversion returns the effect of converting fromType -> toType. 23 | // // intsize is the size in bytes of the "int" and "uint" types. E.g. 4 for 32-bit. 24 | // TypeConv CheckTypeConversion(Node* fromType, Node* toType, u32 intsize); 25 | -------------------------------------------------------------------------------- /src/types.c: -------------------------------------------------------------------------------- 1 | #include "types.h" 2 | 3 | // Lookup table TypeCode => string encoding char 4 | const char TypeCodeEncoding[TypeCode_MAX] = { 5 | #define I_ENUM(name, encoding, _flags) encoding, 6 | TYPE_CODES(I_ENUM) 7 | #undef I_ENUM 8 | }; 9 | 10 | 11 | // #if DEBUG 12 | const char* _TypeCodeName[TypeCode_MAX] = { 13 | #define I_ENUM(name, _encoding, _flags) #name, 14 | TYPE_CODES(I_ENUM) 15 | #undef I_ENUM 16 | }; 17 | 18 | 19 | const TypeCodeFlag TypeCodeFlagMap[TypeCode_MAX] = { 20 | #define I_ENUM(_name, _encoding, flags) flags, 21 | TYPE_CODES(I_ENUM) 22 | #undef I_ENUM 23 | }; 24 | 25 | const char* CTypeName(CType ct) { 26 | switch (ct) { 27 | case CType_INVALID: return "INVALID"; 28 | case CType_int: return "int"; 29 | case CType_rune: return "rune"; 30 | case CType_float: return "float"; 31 | case CType_str: return "str"; 32 | case CType_bool: return "bool"; 33 | case CType_nil: return "nil"; 34 | } 35 | return "?"; 36 | } 37 | 38 | 39 | // const char* TypeCodeName(TypeCode tc) { 40 | // assert(tc > 0 && tc < TypeCode_MAX); 41 | // return _TypeCodeName[tc]; 42 | // } 43 | // #else 44 | // // compact names where a string is formed from encoding chars + sentinels bytes. 45 | // // E.g. "b\01\02\03\04\05\06\07\08\0f\0F\0..." Index is *2 that of TypeCode. 46 | // static const char _TypeCodeName[TypeCode_MAX * 2] = { 47 | // #define I_ENUM(_, enc) enc, 0, 48 | // TYPE_CODES(I_ENUM) 49 | // #undef I_ENUM 50 | // }; 51 | // const char* TypeCodeName(TypeCode tc) { 52 | // assert(tc > 0 && tc < TypeCode_MAX); 53 | // return &_TypeCodeName[tc * 2]; 54 | // } 55 | // #endif 56 | -------------------------------------------------------------------------------- /src/types.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "common/defs.h" 3 | 4 | typedef enum TypeCodeFlag { 5 | TypeCodeFlagNone = 0, 6 | TypeCodeFlagSizeMask = 0b0000000000001111, // bitmask for extracting SizeN flag 7 | TypeCodeFlagSize1 = 1 << 0, // = 1 = 1 byte (8 bits) wide 8 | TypeCodeFlagSize2 = 1 << 1, // = 2 = 2 bytes (16 bits) wide 9 | TypeCodeFlagSize4 = 1 << 2, // = 4 = 4 bytes (32 bits) wide 10 | TypeCodeFlagSize8 = 1 << 3, // = 8 = 8 bytes (64 bits) wide 11 | TypeCodeFlagInt = 1 << 4, // is integer 12 | TypeCodeFlagFloat = 1 << 5, // is float 13 | TypeCodeFlagSigned = 1 << 6, // [integers only]: is signed 14 | } TypeCodeFlag; 15 | 16 | // TypeCode with their string encoding. 17 | // Note: misc/gen_ops.py relies on "#define TYPE_CODES" and "NUM_END". 18 | #define TYPE_CODES(_) \ 19 | /* named types exported in the global scope. Names must match those of TYPE_SYMS. */ \ 20 | /* Note: numeric types are listed first as their enum value is used as dense indices. */ \ 21 | /* Note: order of intrinsic integer types must be signed,unsigned,signed,unsigned... */ \ 22 | /* Reordering these requires updating TypeCodeIsInt() below. */ \ 23 | /* name encoding */ \ 24 | _( bool , 'b', 0 ) \ 25 | _( int8 , '1', TypeCodeFlagSize1 | TypeCodeFlagInt | TypeCodeFlagSigned ) \ 26 | _( uint8 , '2', TypeCodeFlagSize1 | TypeCodeFlagInt ) \ 27 | _( int16 , '3', TypeCodeFlagSize2 | TypeCodeFlagInt | TypeCodeFlagSigned ) \ 28 | _( uint16 , '4', TypeCodeFlagSize2 | TypeCodeFlagInt ) \ 29 | _( int32 , '5', TypeCodeFlagSize4 | TypeCodeFlagInt | TypeCodeFlagSigned ) \ 30 | _( uint32 , '6', TypeCodeFlagSize4 | TypeCodeFlagInt ) \ 31 | _( int64 , '7', TypeCodeFlagSize8 | TypeCodeFlagInt | TypeCodeFlagSigned ) \ 32 | _( uint64 , '8', TypeCodeFlagSize8 | TypeCodeFlagInt ) \ 33 | _( float32 , 'f', TypeCodeFlagSize4 | TypeCodeFlagFloat ) \ 34 | _( float64 , 'F', TypeCodeFlagSize8 | TypeCodeFlagFloat ) \ 35 | _( int , 'i', TypeCodeFlagInt | TypeCodeFlagSigned ) \ 36 | _( uint , 'u', TypeCodeFlagInt ) \ 37 | _( NUM_END, 0, 0 ) /* sentinel; not a TypeCode */ \ 38 | _( str , 's', 0 ) \ 39 | _( nil , '0', 0 ) \ 40 | _( CONCRETE_END, 0, 0 ) /* sentinel; not a TypeCode */ \ 41 | /* internal types not directly reachable by names in the language */ \ 42 | _( fun , '^', 0 ) \ 43 | _( tuple , '(', 0 ) _( tupleEnd , ')', 0 ) \ 44 | _( list , '[', 0 ) _( listEnd , ']', 0 ) \ 45 | _( struct , '{', 0 ) _( structEnd , '}', 0 ) \ 46 | /* special type codes used in IR */ \ 47 | _( ideal , 0 , 0 ) /* untyped numeric constants */ \ 48 | _( param1 , 'P', 0 ) /* parameteric. For IR, matches other type, e.g. output == input */ \ 49 | _( param2 , 'P', 0 ) 50 | /*END TYPE_CODES*/ 51 | 52 | // TypeCode identifies all basic types 53 | typedef enum { 54 | #define I_ENUM(name, _encoding, _flags) TypeCode_##name, 55 | TYPE_CODES(I_ENUM) 56 | #undef I_ENUM 57 | 58 | TypeCode_MAX 59 | } TypeCode; 60 | 61 | // order of intrinsic integer types must be signed,unsigned,signed,unsigned... 62 | static_assert(TypeCode_int8+1 == TypeCode_uint8, "integer order incorrect"); 63 | static_assert(TypeCode_int16+1 == TypeCode_uint16, "integer order incorrect"); 64 | static_assert(TypeCode_int32+1 == TypeCode_uint32, "integer order incorrect"); 65 | static_assert(TypeCode_int64+1 == TypeCode_uint64, "integer order incorrect"); 66 | // must be less than 32 numeric types 67 | static_assert(TypeCode_NUM_END <= 32, "there must be no more than 32 numeric types"); 68 | 69 | // CType describes the constant kind of an "ideal" (untyped) constant. 70 | // These are ordered from less dominant to more dominant -- a CType with a higher value 71 | // takes precedence over a CType with a lower value in cases like untyped binary operations. 72 | typedef enum CType { 73 | CType_INVALID, 74 | CType_int, 75 | CType_rune, 76 | CType_float, 77 | CType_str, 78 | CType_bool, 79 | CType_nil, 80 | } CType; 81 | const char* CTypeName(CType ct); 82 | 83 | // named types exported in the global scope. 84 | // IMPORTANT: These must match the list of TypeCodes up until CONCRETE_END. 85 | // Looking for all type defs? sym.h puts it all together. 86 | #define TYPE_SYMS(_) \ 87 | _( bool ) \ 88 | _( int8 ) \ 89 | _( uint8 ) \ 90 | _( int16 ) \ 91 | _( uint16 ) \ 92 | _( int32 ) \ 93 | _( uint32 ) \ 94 | _( int64 ) \ 95 | _( uint64 ) \ 96 | _( float32 ) \ 97 | _( float64 ) \ 98 | _( int ) \ 99 | _( uint ) \ 100 | _( str ) \ 101 | /*END TYPE_SYMS*/ 102 | 103 | // Note: The following function is provided by sym.h 104 | // static Node* TypeCodeToTypeNode(TypeCode t); 105 | 106 | // Lookup table TypeCode => string encoding char 107 | const char TypeCodeEncoding[TypeCode_MAX]; 108 | 109 | // Symbolic name of type code. Eg "int32" 110 | static const char* TypeCodeName(TypeCode); 111 | const char* _TypeCodeName[TypeCode_MAX]; 112 | inline static const char* TypeCodeName(TypeCode tc) { 113 | assert(tc >= 0 && tc < TypeCode_MAX); 114 | return _TypeCodeName[tc]; 115 | } 116 | 117 | // access TypeCodeFlag 118 | const TypeCodeFlag TypeCodeFlagMap[TypeCode_MAX]; 119 | 120 | inline static bool TypeCodeIsInt(TypeCode t) { return TypeCodeFlagMap[t] & TypeCodeFlagInt; } 121 | inline static bool TypeCodeIsFloat(TypeCode t) { return TypeCodeFlagMap[t] & TypeCodeFlagFloat; } 122 | -------------------------------------------------------------------------------- /test/emptyfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsms/wlang/03eb8e72eaacfa451be06cf398762a70cd7b30c8/test/emptyfile -------------------------------------------------------------------------------- /test/file-no-final-line-break: -------------------------------------------------------------------------------- 1 | A 2 | B 3 | C --------------------------------------------------------------------------------