├── .gitignore
├── .inferconfig
├── LICENSE.txt
├── README.md
├── build.in.ninja
├── build.sh
├── config.sh
├── dev.sh
├── docs
    ├── calling-convention.md
    ├── elf-spec-v1.1.txt
    ├── elf-spec-v1.2.pdf
    ├── elf
    │   ├── header.html
    │   ├── index.html
    │   ├── intro.html
    │   ├── lddynamic.html
    │   ├── ldheader.html
    │   ├── ldintro.html
    │   ├── relocation.html
    │   ├── sections.html
    │   ├── string-table.html
    │   └── symbol-table.html
    ├── guiding-principles.md
    ├── ir-if-cond-gen-elseb.txt
    ├── link-thoughts.md
    ├── syntax
    │   ├── if.w
    │   ├── structname.w
    │   └── ttype.w
    ├── the-zen-of-python.txt
    ├── typeid.md
    └── x86-64-register-encodings.txt
├── example
    ├── consts.w
    ├── factorial.w
    ├── future-borrow-move.w
    ├── future-lambda.w
    ├── future-ownership.w
    ├── future-type-functions-generics.w
    ├── future-type-variants.w
    ├── future-where.w
    └── mem.w
├── experimental
    └── x86_64-backend
    │   ├── asm.c
    │   ├── asm.h
    │   ├── elf
    │       ├── builder.c
    │       ├── builder.h
    │       ├── builder_asm.c
    │       ├── elf.h
    │       ├── file.c
    │       └── file.h
    │   ├── elf64.c
    │   ├── elf64.h
    │   └── x86-64.c
├── misc
    ├── asmlab
    │   ├── hello-c.c
    │   ├── hello-c.elf.dis.txt
    │   ├── hello-c.s
    │   ├── hello1.elf.dis.txt
    │   ├── hello1.s
    │   ├── hello1.sh
    │   ├── mini1-32.elf.dis.txt
    │   ├── mini1-32.s
    │   ├── mini1.elf.dis.txt
    │   ├── mini1.s
    │   ├── mini2.elf.dis.txt
    │   ├── mini2.s
    │   └── start-linux-vm.sh
    ├── etc
    │   └── mini2.s
    ├── filter-compdb.py
    ├── gen_ops.py
    ├── gen_parselet_map.py
    └── test-asm-out.sh
├── src
    ├── build
    │   ├── build.h
    │   ├── buildctx.c
    │   ├── source.c
    │   └── source.h
    ├── common
    │   ├── array.c
    │   ├── array.h
    │   ├── array_test.c
    │   ├── assert.c
    │   ├── assert.h
    │   ├── buf.c
    │   ├── buf.h
    │   ├── defs.h
    │   ├── dlmalloc.c
    │   ├── dlmalloc.h
    │   ├── hash.c
    │   ├── hash.h
    │   ├── hashmap.c.h
    │   ├── hashmap.h
    │   ├── memory.c
    │   ├── memory.h
    │   ├── os.c
    │   ├── os.h
    │   ├── ptrmap.c
    │   ├── ptrmap.h
    │   ├── rbtree.c.h
    │   ├── sds.c
    │   ├── sds.h
    │   ├── sds_test.c
    │   ├── str.c
    │   ├── str.h
    │   ├── test.c
    │   ├── test.h
    │   ├── thread.c
    │   ├── thread.h
    │   ├── thread_pthread.c.h
    │   ├── thread_pthread.h
    │   ├── tstyle.c
    │   ├── tstyle.h
    │   ├── unicode.c
    │   └── unicode.h
    ├── convlit.c
    ├── convlit.h
    ├── ir
    │   ├── arch_base.lisp
    │   ├── block.c
    │   ├── builder.c
    │   ├── builder.h
    │   ├── constcache.c
    │   ├── fun.c
    │   ├── ir.h
    │   ├── op.c
    │   ├── op.h
    │   ├── pkg.c
    │   ├── repr.c
    │   └── value.c
    ├── main.c
    ├── parse
    │   ├── ast.c
    │   ├── ast.h
    │   ├── parse.c
    │   ├── parse.h
    │   ├── parseint.c
    │   ├── parseint.h
    │   ├── resolve_sym.c
    │   ├── resolve_type.c
    │   ├── scan.c
    │   ├── scan.h
    │   ├── token.c
    │   └── token.h
    ├── sym.c
    ├── sym.h
    ├── typeid.c
    ├── typeid.h
    ├── types.c
    └── types.h
└── test
    ├── emptyfile
    └── file-no-final-line-break


/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | *.o
 3 | *.a
 4 | *.d
 5 | *.gz
 6 | *.elf
 7 | *.elf.hex
 8 | /*.sublime-*
 9 | /_local
10 | /build
11 | /build.ninja
12 | /builddeps
13 | * copy
14 | 


--------------------------------------------------------------------------------
/.inferconfig:
--------------------------------------------------------------------------------
 1 | // Config file for infer (https://fbinfer.com/)
 2 | // - for switches options, the value is a JSON boolean (true or false, without quotes)
 3 | // - for integers, the value is a JSON integer (without quotes)
 4 | // - string options have string values
 5 | // - path options have string values, and are interpreted relative to the location of the
 6 | //   .inferconfig file
 7 | // - cumulative options are JSON arrays of the appropriate type
 8 | //
 9 | // Note: This JSON flavor supports comments but not extra trailing commas.
10 | {
11 |   "cxx": false, // don't analyze C++
12 |   "results-dir": "build/infer",
13 |   "skip-analysis-in-path": [ "src/dlmalloc.c" ],
14 |   "disable-issue-type": ["DEAD_STORE"]
15 | }
16 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | ISC License
 2 | 
 3 | Copyright (c) 2019-2020, Rasmus Andersson <rsms.me>
 4 | 
 5 | Permission to use, copy, modify, and/or distribute this software for any
 6 | purpose with or without fee is hereby granted, provided that the above
 7 | copyright notice and this permission notice appear in all copies.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # wlang
 2 | 
 3 | Programming language in development
 4 | 
 5 | ```txt
 6 | ./dev.sh example/factorial.w
 7 | ```
 8 | 
 9 | Notes:
10 | 
11 | - Implemented in C11.
12 | - Uses dlmalloc instead of libc malloc, making it portable to wasm etc.
13 | - `./dev.sh [<srcfile>]`       — build and run product (incremental)
14 | - `./dev.sh -lldb [<srcfile>]` — build and run product in debugger (incremental)
15 | - `./dev.sh -analyze`          — run incremental code analyzer on uncommited changes (incremental)
16 | - `./build.sh`                 — build release product and exit
17 | - `./build.sh -g`              — build debug product and exit
18 | - `./build.sh -analyze`        — analyze entire project using ([Infer](https://fbinfer.com/))
19 | - `./build.sh -test`           — build & run all tests and generate code coverage reports.
20 | - Debug products are built with Clang address sanitizer by default.
21 |   To disable asan/msan, edit the `build.in.ninja` file.
22 | 
23 | Requirements for building:
24 | 
25 | - [clang](https://clang.llvm.org/) version >=7
26 | - [Ninja](https://ninja-build.org/) version >=1.2
27 | - Bash or a bash-compatible shell, for running the build scripts
28 | - [Python 3](https://www.python.org/) used for code generation
29 | - [Infer](https://fbinfer.com/) used for code analysis (optional)
30 | 
31 | If you're on macOS, install everything you need with `brew install clang python ninja infer`.
32 | 


--------------------------------------------------------------------------------
/build.in.ninja:
--------------------------------------------------------------------------------
 1 | ninja_required_version = 1.3
 2 | 
 3 | builddir = build
 4 | 
 5 | cflags = $
 6 |   -std=c11 $
 7 |   -Wall -Wuninitialized -Wmissing-field-initializers -Wconditional-uninitialized $
 8 |   -Wno-nullability-completeness -Wno-unused-function $
 9 |   -fcolor-diagnostics
10 | 
11 | lflags =
12 | 
13 | cflags_opt = $cflags -Oz -DNDEBUG
14 | lflags_opt  = $lflags -O3 -flto
15 | 
16 | cflags_dev = $cflags -O0 -g -DDEBUG
17 | lflags_dev  = $lflags -g -O0 -fno-lto
18 | 
19 | cflags_test = $cflags_dev -DW_TEST_BUILD -fprofile-instr-generate -fcoverage-mapping
20 | lflags_test = $lflags_dev -fprofile-instr-generate -fcoverage-mapping
21 | 
22 | # https://clang.llvm.org/docs/AddressSanitizer.html
23 | #
24 | # -fno-omit-frame-pointer
25 | #   Leave frame pointers. Allows the fast unwinder to function properly.
26 | #
27 | # -fno-optimize-sibling-calls
28 | #   Disable tail calls to improve stack traces
29 | #
30 | cflags_dev_asan = $cflags_dev $
31 |   -fsanitize=address $
32 |   -fsanitize-address-use-after-scope $
33 |   -fno-omit-frame-pointer $
34 |   -fno-optimize-sibling-calls
35 | lflags_dev_asan = $lflags_dev -fsanitize=address
36 | 
37 | rule compile_obj
38 |   command = clang -MMD -MF $out.d $cflags -c -o $out $in
39 |   depfile = $out.d
40 | 
41 | rule link
42 |   command = clang $lflags -o $out $in
43 | 
44 | rule gen_ops
45 |   command = python3 misc/gen_ops.py
46 | 
47 | rule gen_parselet_map
48 |   command = python3 misc/gen_parselet_map.py $out
49 | 
50 | 
51 | CONFIG_REPLACE_BUILDS
52 | 
53 | build src/ir/op.c: gen_ops src/ir/arch_base.lisp src/types.h src/parse/token.h
54 | build $builddir/gen_parselet_map.marker: gen_parselet_map src/parse/parse.c
55 | 
56 | build release: phony | $builddir/gen_parselet_map.marker $builddir/wp
57 | build debug:   phony | $builddir/gen_parselet_map.marker $builddir/wp.g
58 | build test:    phony | $builddir/gen_parselet_map.marker $builddir/wp.test
59 | 
60 | default debug
61 | 


--------------------------------------------------------------------------------
/config.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | cd "$(dirname "$0")"
 3 | 
 4 | sources=()
 5 | for f in $(find src -type f -name '*.c' | sort); do
 6 |   sources+=( $f )
 7 | done
 8 | 
 9 | # <name>:<executable>
10 | # Each name should have corresponding $cflags_<name> and $lflags_<name> defined in build.in.ninja
11 | products=( \
12 |   opt:wp       \
13 |   dev:wp.g     \
14 |   test:wp.test \
15 | )
16 | 
17 | builddir=build
18 | TF=$builddir/.build.ninja
19 | mkdir -p "$builddir"
20 | rm -f "$TF"
21 | touch "$TF"
22 | 
23 | for product in ${products[@]}; do
24 |   TUPLE=(${product//:/ })
25 |   name=${TUPLE[0]}
26 |   exe=${TUPLE[1]}
27 |   objects=()
28 | 
29 |   echo "# --------------------------------------------------" >> "$TF"
30 |   echo "# ${name} (\$builddir/${exe})" >> "$TF"
31 | 
32 |   for srcfile in ${sources[@]}; do
33 | 
34 |     # only include *_test.c files in the "test" target
35 |     if [[ "$srcfile" == *"_test.c" ]] && [[ "$name" != "test" ]]; then
36 |       # echo "skip test $srcfile for target $name"
37 |       continue
38 |     fi
39 | 
40 |     objfile=$(dirname "$srcfile")/$(basename "$srcfile" .c).o
41 |     objfile=\$builddir/obj/${name}/${objfile//src\//}
42 |     objects+=( "$\n  ${objfile}" )
43 | 
44 |     echo "build ${objfile}: compile_obj $srcfile" >> "$TF"
45 |     echo "  cflags = \$cflags_${name}" >> "$TF"
46 |   done
47 | 
48 |   echo -e "build \$builddir/${exe}: link ${objects[@]}" >> "$TF"
49 |   echo "  lflags = \$lflags_${name}" >> "$TF"
50 |   echo "" >> "$TF"
51 | done
52 | 
53 | sed -E "/CONFIG_REPLACE_BUILDS/r $TF" "build.in.ninja" \
54 | | sed -E "/CONFIG_REPLACE_BUILDS/d" \
55 | > build.ninja
56 | 
57 | rm "$TF"
58 | 


--------------------------------------------------------------------------------
/docs/calling-convention.md:
--------------------------------------------------------------------------------
  1 | # Calling convention
  2 | 
  3 | Considerations:
  4 | 
  5 | - Where are arguments stored? (Stack, registers, which registers?)
  6 | - Where are return values stored?
  7 | - Are registers caller-save, callee-save or a hybrid?
  8 | 
  9 | ## Where are arguments & return values stored?
 10 | 
 11 | Go stores all arguments and return values on the stack and does not use registers at all.
 12 | The Go team have discussed changing this and make use of registers.
 13 | They approximate a 5-10%[^1] performance gain.
 14 | However, the Go authors recognize that there are some considerable downsides to passing
 15 | arguments in registers: It is more complex has a higher implementation and maintainenance cost
 16 | for the compiler. It also makes Go's stack traces—which includes arguments values—really
 17 | tricky to implement (since past frame's argument values would be over-written.)
 18 | [There's a discussion on github/golang/go.](https://github.com/golang/go/issues/18597)
 19 | 
 20 | It's worth noting that Go decided to not make this change and stick with pure stack use.
 21 | Except from increased complexity, the reasons were grounded in legacy.
 22 | 
 23 | Most programming languages and VMs makes use of registers for arguments and return values
 24 | because in practice, even with pure stack calling, registers are clobbered and need to be
 25 | saved anyway. An example:
 26 | 
 27 | 	fun foo(a, b, c int) int { a + c * d }
 28 | 	fun bar(x, y int) int {
 29 | 		foo(x + y, x * y, 2) + 10
 30 | 	}
 31 | 	fun main -> bar(1, 2)
 32 | 
 33 | SSA IR:
 34 | 
 35 | 	fun foo (int,int,int)->int
 36 | 		v0 = arg 0      # a
 37 | 		v1 = arg 1      # b
 38 | 		v2 = arg 2      # c
 39 | 		v3 = mul v2 v1  # c * d
 40 | 		v4 = add v0 v3  # tmp' = a + v3
 41 | 	ret v4
 42 | 
 43 | 	fun bar (int,int)->int
 44 | 		v0 = arg   0      # x
 45 | 		v1 = arg   1      # y
 46 | 		v2 = add   v0 v1  # x + y
 47 | 		v3 = mul   v0 v1  # x * y
 48 | 		v4 = const 2
 49 | 		v5 = params v2, v3, v4
 50 | 		v6 = call foo v5
 51 | 		v7 = result 0
 52 | 	ret v7
 53 | 
 54 | 	fun main
 55 | 		v0 = const 1
 56 | 		v1 = const 2
 57 | 		v2 = params v0, v1
 58 | 		v3 = call bar v2
 59 | 	ret
 60 | 
 61 | Looking at the main function calling to `bar`, here's what it looks like with arguments and
 62 | return values on the stack:
 63 | 
 64 | 	bar:  # (int,int)->int
 65 | 		| stack now looks like this:
 66 | 		|    100-96  1               |  arg "x", 4 bytes
 67 | 		|    96-92   2               |  arg "y", 4 bytes
 68 | 		|    92-84   return address  |  8 bytes (64-bit)
 69 | 		| -- 84 ---- <base pointer> ----------------
 70 | 		|
 71 | 		push %rbp            | save stack pointer; store value of rbp to stack
 72 | 		mov  %rsp, %rbp      | make stack pointer the base pointer
 73 | 		mov  16(%rbp), %rax  | load "x" argument into rax (84+16 = 100)
 74 | 		mov  12(%rbp), %rbx  | load "y" argument into rbx (84+12 = 96)
 75 | 		mov  %rbx, %rcx      | copy "y" so that "add" does not over-write it
 76 | 		add  %rax, %rcx      | x + y -> rcx
 77 | 		mul  %rax, %rbx      | x * y -> rbx  (over-writes "y")
 78 | 		mov  $2    %rax      | store constant 2 in rax (over-writes "x")
 79 | 		                     | At this point: rcx=v2, rbx=v3, rax=v4
 80 | 		mov  %rcx, -4(%rbp)  | store v2 on stack
 81 | 		mov  %rbx, -8(%rbp)  | store v3 on stack
 82 | 		mov  %rax, -12(%rbp) | store v4 on stack
 83 | 		call foo
 84 | 		mov  -12(%rbp), %rax | load result from stack into rax
 85 | 		add  $10, %rax       | <result> + 10 -> rax
 86 | 		mov  %rax, 4(%rbp)   | store result value to stack
 87 | 		ret
 88 | 
 89 | 	main:
 90 | 		mov  $1, -4(%rbp)  # store "x" argument on stack
 91 | 		mov  $2, -8(%rbp)  # store "y" argument on stack
 92 | 		|
 93 | 		| stack now looks like this:
 94 | 		| -- 100 ---- <base pointer> ----------------
 95 | 		|    100-96  1              |  arg "x", 4 bytes
 96 | 		|    96-92   2              |  arg "y", 4 bytes
 97 | 		|
 98 | 		call bar
 99 | 		| ignore return value
100 | 		ret
101 | 
102 | 
103 | Pros & cons, pure stack vs registers:
104 | - (+stack) Simple implementation
105 | - (+stack) Portable
106 | 	- In practice assembly and lowered IR is not portable for other reasons.
107 | 	- "Portable" here means that the stragegy and code generation does not need
108 | 	  to be customized for different machine targets.
109 | - (+regs) Performace
110 | - (+regs) Uses less memory
111 | 
112 | 	main:
113 | 		push $1  # store 1 at (rsp), increment rsp
114 | 		push $2  # store 1 at (rsp), increment rsp
115 | 		call main
116 | 		add  $8, %rsp  # move stack pointer back (2*4 = 2*sizeof(int))
117 | 		ret
118 | 
119 | [^1]: [Proposal: Passing Go arguments and results in registers](https://gist.github.com/dr2chase/5a1107998024c76de22e122ed836562d), also referenced in [go review tracker (stale)](https://go-review.googlesource.com/c/proposal/+/35054/)
120 | 


--------------------------------------------------------------------------------
/docs/elf-spec-v1.2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsms/wlang/03eb8e72eaacfa451be06cf398762a70cd7b30c8/docs/elf-spec-v1.2.pdf


--------------------------------------------------------------------------------
/docs/elf/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 | <title>ELF</title>
 5 | </head>
 6 | <body>
 7 | 
 8 | <h2>Object files</h2>
 9 | 
10 | <ul>
11 | 	<li><a href=intro.html><b>Introduction</b></a></li>
12 | 	<li><a href=intro.html#file_format>File Format</a></li>
13 | 	<li><a href=intro.html#data_representation>Data Representation</a></li>
14 | </ul>
15 | <ul>
16 | 	<li><a href=header.html><b>ELF Header</b></a></li>
17 | 	<li><a href=header.html#elfid>ELF Identification</a></li>
18 | 	<li><a href=header.html#machine>Machine Information (Processor-Specific)</a></li>
19 | </ul>
20 | <ul>
21 | 	<li><a href=sections.html><b>Sections</b></a></li>
22 | 	<li><a href=sections.html#special_sections>Special Sections</a></li>
23 | </ul>
24 | <ul>
25 | 	<li><a href=string-table.html><b>String Table</b></a></li>
26 | </ul>
27 | <ul>
28 | 	<li><a href=symbol-table.html><b>Symbol Table</b></a></li>
29 | 	<li><a href=symbol-table.html#symbol_value>Symbol Values</a></li>
30 | </ul>
31 | <ul>
32 | 	<li><a href=relocation.html><b>Relocation</b></a></li>
33 | 	<li><a href=relocation.html#reloc_types>Relocation Types (Processor-Specific)</a></li>
34 | </ul>
35 | 
36 | 
37 | <h2>Program loading and dynamic linking</h2>
38 | 
39 | <ul>
40 | 	<li><a href=ldintro.html><b>Introduction</b></a></li>
41 | </ul>
42 | <ul>
43 | 	<li><a href=ldheader.html><b>Program Header</b></a></li>
44 | 	<li><a href=ldheader.html#base_address>Base Address</a></li>
45 | 	<li><a href=ldheader.html#segment_permissions>Segment Permissions</a></li>
46 | 	<li><a href=ldheader.html#segment_contents>Segment Contents</a></li>
47 | 	<li><a href=ldheader.html#note_section>Note Section</a></li>
48 | </ul>
49 | <ul>
50 | 	<li><a href=lddynamic.html><b>Dynamic Linking</b></a></li>
51 | 	<li><a href=lddynamic.html#interpreter>Program Interpreter</a></li>
52 | 	<li><a href=lddynamic.html#dynamic_linker>Dynamic Linker</a></li>
53 | 	<li><a href=lddynamic.html#dynamic_section>Dynamic Section</a></li>
54 | 	<li><a href=lddynamic.html#shobj_dependencies>Shared Object Dependencies</a></li>
55 | 	<li><a href=lddynamic.html#substitution>Substitution Sequences</a></li>
56 | 	<li><a href=lddynamic.html#got>Global Offset Table (Processor-Specific)</a></li>
57 | 	<li><a href=lddynamic.html#plt>Procedure Linkage Table (Processor-Specific)</a></li>
58 | 	<li><a href=lddynamic.html#hash>Hash Table</a></li>
59 | 	<li><a href=lddynamic.html#init_fini>Initialization and Termination Functions</a></li>
60 | </ul>
61 | 
62 | </body>
63 | </html>
64 | 


--------------------------------------------------------------------------------
/docs/elf/ldintro.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <title>Program loading and dynamic linking</title><p>
 3 | <h1>Introduction to Program loading and dynamic linking</h1><p>
 4 | This section describes the object file
 5 | information and system actions that create running programs.
 6 | Some information here applies to all systems;
 7 | information specific to one processor resides in
 8 | sections marked accordingly.
 9 | <p>
10 | Executable and shared object files statically represent programs.
11 | To execute such programs, the system uses the files to create
12 | dynamic program representations, or process images.
13 | As section ''Virtual Address Space'' in Chapter 3 of the
14 | processor supplement describes, a process image has segments that
15 | hold its text, data, stack, and so on.  This chapter's major sections
16 | discuss the following:
17 | <ul>
18 | <li>
19 | <a href=ldheader.html>Program Header.</a>
20 | This section complements Chapter 4, describing
21 | object file structures that relate directly to program execution.
22 | The primary data structure, a program header table, locates
23 | segment images within the file and contains other information
24 | necessary to create the memory image for the program.
25 | <li>
26 | Program Loading.
27 | Given an object file, the system must load
28 | it into memory for the program to run.
29 | <li>
30 | <a href=lddynamic.html>Dynamic linking.</a>
31 | After the system loads the program it must complete
32 | the process image by resolving symbolic references among the object
33 | files that compose the process.
34 | </ul>
35 | <hr>
36 | <img src=warning.gif alt="NOTE:">
37 | The processor supplement defines a naming convention for ELF constants
38 | that have processor ranges specified.  Names such as <code>DT_</code>,
39 | <code>PT_</code>,
40 | for processor specific extensions, incorporate the name of the
41 | processor: <code>DT_M32_SPECIAL</code>, for example.
42 | Pre-existing processor
43 | extensions not using this convention will be supported.
44 | <table>
45 | <th>Pre-Existing Extensions</th>
46 | <tr>
47 | <td><code>DT_JUMP_REL</code></td>
48 | </tr>
49 | </table>
50 | <hr>
51 | <a href=relocation.html><img src=previous.gif alt="Previous"></a>
52 | <a href=index.html><img src=contents.gif alt="Contents"></a>
53 | <a href=ldheader.html><img src=next.gif alt="Next"></a>
54 | <hr>
55 | <i>
56 | <small>
57 | &#169; 1997, 1998, 1999, 2000 The Santa Cruz Operation, Inc.  All rights reserved.
58 | </small>
59 | </i>
60 | </html>
61 | 


--------------------------------------------------------------------------------
/docs/elf/string-table.html:
--------------------------------------------------------------------------------
  1 | <html>
  2 | <title>String Table</title><p>
  3 | <h1>String Table</h1><p>
  4 | String table sections hold null-terminated character sequences,
  5 | commonly called strings.
  6 | The object file uses these strings to represent symbol and section names.
  7 | One references a string as an index into the
  8 | string table section.
  9 | The first byte, which is index zero, is defined to hold
 10 | a null character.
 11 | Likewise, a string table's last byte is defined to hold
 12 | a null character, ensuring null termination for all strings.
 13 | A string whose index is zero specifies
 14 | either no name or a null name, depending on the context.
 15 | An empty string table section is permitted; its section header's <code>sh_size</code>
 16 | member would contain zero.
 17 | Non-zero indexes are invalid for an empty string table.
 18 | <p>
 19 | A section header's <code>sh_name</code>
 20 | member holds an index into the section header string table
 21 | section, as designated by the <code>e_shstrndx</code>
 22 | member of the ELF header.
 23 | The following figures show a string table with 25 bytes
 24 | and the strings associated with various indexes.
 25 | <p>
 26 | <table border cellspacing=0>
 27 | <th>Index</th>
 28 | <th>+0</th>
 29 | <th>+1</th>
 30 | <th>+2</th>
 31 | <th>+3</th>
 32 | <th>+4</th>
 33 | <th>+5</th>
 34 | <th>+6</th>
 35 | <th>+7</th>
 36 | <th>+8</th>
 37 | <th>+9</th>
 38 | <tr>
 39 | <td align=right><b>0</b></td>
 40 | <td align=center width="50"><code>\0</code></td>
 41 | <td align=center width="50"><code>n</code></td>
 42 | <td align=center width="50"><code>a</code></td>
 43 | <td align=center width="50"><code>m</code></td>
 44 | <td align=center width="50"><code>e</code></td>
 45 | <td align=center width="50"><code>.</code></td>
 46 | <td align=center width="50"><code>\0</code></td>
 47 | <td align=center width="50"><code>V</code></td>
 48 | <td align=center width="50"><code>a</code></td>
 49 | <td align=center width="50"><code>r</td>
 50 | </tr>
 51 | <tr>
 52 | <td align=right><b>10</b></td>
 53 | <td align=center width="50"><code>i</code></td>
 54 | <td align=center width="50"><code>a</code></td>
 55 | <td align=center width="50"><code>b</code></td>
 56 | <td align=center width="50"><code>l</code></td>
 57 | <td align=center width="50"><code>e</code></td>
 58 | <td align=center width="50"><code>\0</code></td>
 59 | <td align=center width="50"><code>a</code></td>
 60 | <td align=center width="50"><code>b</code></td>
 61 | <td align=center width="50"><code>l</code></td>
 62 | <td align=center width="50"><code>e</code></td>
 63 | </tr>
 64 | <tr>
 65 | <td align=right><b>20</b></td>
 66 | <td align=center width="50"><code>\0</code></td>
 67 | <td align=center width="50"><code>\0</code></td>
 68 | <td align=center width="50"><code>x</code></td>
 69 | <td align=center width="50"><code>x</code></td>
 70 | <td align=center width="50"><code>\0</code></td>
 71 | <td colspan=5><code>&nbsp</code></td>
 72 | </tr>
 73 | </table>
 74 | <hr>
 75 | <b>Figure 4-15: String Table Indexes</b>
 76 | <p>
 77 | <table border cellspacing=0>
 78 | <th>Index</th>
 79 | <th>String</th>
 80 | <tr>
 81 | <td align=right>0</td>
 82 | <td><i>none</i></td>
 83 | </tr>
 84 | <tr>
 85 | <td align=right>1</td>
 86 | <td>name.</td>
 87 | </tr>
 88 | <tr>
 89 | <td align=right>7</td>
 90 | <td>Variable</td>
 91 | </tr>
 92 | <tr>
 93 | <td align=right>11</td>
 94 | <td>able</td>
 95 | </tr>
 96 | <tr>
 97 | <td align=right>16</td>
 98 | <td>able</td>
 99 | </tr>
100 | <tr>
101 | <td align=right>24</td>
102 | <td><i>null string</i></td>
103 | </tr>
104 | </tr>
105 | </table>
106 | <hr>
107 | <p>
108 | As the example shows, a string table index may refer
109 | to any byte in the section.
110 | A string may appear more than once;
111 | references to substrings may exist;
112 | and a single string may be referenced multiple times.
113 | Unreferenced strings also are allowed.
114 | <hr>
115 | <a href=header.html><img src=previous.gif alt="Previous"></a>
116 | <a href=index.html><img src=contents.gif alt="Contents"></a>
117 | <a href=symbol-table.html><img src=next.gif alt="Next"></a>
118 | <hr>
119 | <i>
120 | <small>
121 | &#169; 1997, 1998, 1999, 2000 The Santa Cruz Operation, Inc.  All rights reserved.
122 | </small>
123 | </i>
124 | </html>
125 | 


--------------------------------------------------------------------------------
/docs/ir-if-cond-gen-elseb.txt:
--------------------------------------------------------------------------------
 1 | fun main {
 2 |   z = if true {
 3 |     a = 4  # avoid block elimination while working on ir builder
 4 |     y = a + 1
 5 |   } else {
 6 |     0
 7 |   }
 8 | }
 9 | 
10 | package foo
11 | fun main () 0x10bb82370
12 |   b0:
13 |     v0  bool    = ConstBool    [0x1]  # 1 use
14 |     v1  int     = ConstI32     [0x4]  # 1 use ; a
15 |     v2  int     = ConstI32     [0x1]  # 1 use
16 |     v4  int     = ConstI32     [0x0]  # 1 use
17 |   if v0 -> b1 b3
18 | 
19 |   b1: <- b0  # b0.then
20 |     v3  int     = AddI32       v1   v2  # 1 use ; y
21 |   cont -> b3
22 | 
23 |   b3: <- b1 b0   # b0.end
24 |     v5  int     = Phi          v3   v4  # 1 use ; z
25 |   ret v5
26 | 
27 | 
28 | ————————————————————————————————————————————————————————————————————————————————————
29 | 
30 | fun main {
31 |   z = if true {
32 |     a = 4  # avoid block elimination while working on ir builder
33 |     y = a + 1
34 |   }
35 | }
36 | 
37 | package foo
38 | fun main () 0x10897d370
39 |   b0:
40 |     v0  bool    = ConstBool    [0x1]  # 1 use
41 |     v1  int     = ConstI32     [0x4]  # 1 use ; a
42 |     v2  int     = ConstI32     [0x1]  # 1 use
43 |     v4  int     = ConstI32     [0x0]  # 1 use
44 |   if v0 -> b1 b2
45 | 
46 |   b1: <- b0  # b0.then
47 |     v3  int     = AddI32       v1   v2  # 1 use ; y
48 |   cont -> b2
49 | 
50 |   b2: <- b0 b1   # b0.end
51 |     v5  int     = Phi          v3   v4  # 1 use ; z
52 |   ret v5
53 | 


--------------------------------------------------------------------------------
/docs/link-thoughts.md:
--------------------------------------------------------------------------------
 1 | Thoughts on linking code
 2 | 
 3 | What if instead of object files, we were to maintain a graph database of all
 4 | assembled code?
 5 | 
 6 | Traditionally a C-like compiler will parse, compile and assemble each source file into
 7 | an ELF/Mach-O/etc object file and
 8 | finally—when all object files required for a program are available—link them all together
 9 | by reading & parsing all these object files just to build a new object (exe) file.
10 | Here's an example of a simple program with four source files:
11 | 
12 | 	    main
13 | 	  /  |   \
14 | 	foo  bar  baz
15 | 	  \  /
16 | 	  util
17 | 
18 | main requires foo, bar and baz. foo and bar both require util.
19 | In practice this is not a tree but a list:
20 | 
21 | - main -> main.o
22 | - foo -> foo.o
23 | - bar -> bar.o
24 | - baz -> baz.o
25 | - util -> util.o
26 | 
27 | A C-like compiler would link foo, bar, baz, util and main objects everytime any part changes.
28 | Say we only change baz, we take the cost of re-linking the tree of foo, bar & util.
29 | 
30 | Imagine if these were represented as a tree even as linked objects, not just temporarily
31 | inside the compiler. Then we could link subtrees together:
32 | 
33 | 	main           = [main.o, foo+util+bar.o, baz.o]
34 | 	foo+util+bar.o = [foo.o, bar.o, util.o]
35 | 	foo+util.o     = [foo.o, util.o]         # Can be skipped; unused
36 | 	foo+util.o     = [bar.o, util.o]         # Can be skipped; unused
37 | 
38 | If `baz` changes, we can reuse the subtree object `foo+util+bar.o`
39 | 


--------------------------------------------------------------------------------
/docs/syntax/if.w:
--------------------------------------------------------------------------------
1 | def foo(x):
2 | 	return false if !authenticated(x)
3 | 	true
4 | 	
5 | print(foo(0))
6 | print(foo(8))
7 | 


--------------------------------------------------------------------------------
/docs/syntax/structname.w:
--------------------------------------------------------------------------------
 1 | type Account { id, flag int } #T {flag:iid:i}
 2 | fun foo(a Account) { #T ^({flag:iid:i})i
 3 | 	return 0 if a.flag == 0
 4 | 	# ^ stmt if expr
 5 | 	a.id
 6 | }
 7 | id, flag = 1, 0
 8 | x = { id, flag } #T {flag:iid:i}
 9 | foo(x) # ok
10 | y = Account { id } # flag is zero init
11 | foo(y) # ok
12 | foo({ id }) # ok. flag is zero init
13 | # inline type def:
14 | fun bar(a { id int }) {...}
15 | bar({ id }) # block expr or struct init?
16 | type User {
17 | 	account { id, flag int } # inline type def
18 | 	name str
19 | }
20 | u = { name: "Sam", account: { id: 3 } }
21 | foo(u.account) # ok
22 | 
23 | compile(callback, config) where
24 |   config = Config{
25 | 	  infile: "foo.w",
26 | 	  debug: true,
27 |   },
28 |   callback = fun (ev Event) -> log(ev)
29 | 
30 | 
31 | #----------------------
32 | 
33 | # struct exprs must be prefixed by type to
34 | # disambiguate from block expr
35 | a = { 3 } # block
36 | b = Account { id: 3 } # struct
37 | c = (type { id int }) { id: 3 } # struct
38 | type Account { id int }
39 | 
40 | struct Account { id int }
41 | type Foo = struct { id int }
42 | c = (struct { id int }) { id: 3 } # struct
43 | 
44 | type Account = { id int }
45 | c = (type _ = { id int }) { id: 3 }
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/docs/syntax/ttype.w:
--------------------------------------------------------------------------------
 1 | type Account = Twitter(handle str)
 2 |              | Google(email str, id int)
 3 |              | Local
 4 |              | Test
 5 | # really just compiled to tuples
 6 | def signIn(a Account):
 7 | 	switch a
 8 | 	Twitter(h): #...
 9 | 	Google(email, id): #...
10 | 	Local | Test: #...
11 | # becomes
12 | def signIn(a (int,str)|(int,str,int),(int)):
13 | 	switch a[0]
14 | 	case 0: #...
15 | 	case 1: #...
16 | 	case 2: case 3: #...
17 | #---- generics w required type Name:
18 | type Vec3(T) = (T,T,T)
19 | a Vec3(int) = (1,1,0)
20 | b Bec3(float) = (1.0,1.0,0.0)
21 | c = (1,1,0) # == a
22 | 


--------------------------------------------------------------------------------
/docs/the-zen-of-python.txt:
--------------------------------------------------------------------------------
 1 | > PEP 20 -- The Zen of Python
 2 | > Aug 19, 2004 by Tim Peters
 3 | 
 4 | Beautiful is better than ugly.
 5 | Explicit is better than implicit.
 6 | Simple is better than complex.
 7 | Complex is better than complicated.
 8 | Flat is better than nested.
 9 | Sparse is better than dense.
10 | Readability counts.
11 | Special cases aren't special enough to break the rules.
12 | Although practicality beats purity.
13 | Errors should never pass silently.
14 | Unless explicitly silenced.
15 | In the face of ambiguity, refuse the temptation to guess.
16 | There should be one-- and preferably only one --obvious way to do it.
17 | Although that way may not be obvious at first unless you're Dutch.
18 | Now is better than never.
19 | Although never is often better than *right* now.
20 | If the implementation is hard to explain, it's a bad idea.
21 | If the implementation is easy to explain, it may be a good idea.
22 | Namespaces are one honking great idea -- let's do more of those!
23 | 


--------------------------------------------------------------------------------
/docs/x86-64-register-encodings.txt:
--------------------------------------------------------------------------------
 1 | Enc         8-bit GP  16-bit GP  32-bit GP  64-bit GP  80-bit x87  64-bit MMX  128-bit XMM  256-bit YMM  16-bit Segment  32-bit Control  32-bit Debug
 2 | 0.000 (0)   AL        AX         EAX        RAX        ST0         MMX0        XMM0         YMM0         ES              CR0             DR0
 3 | 0.001 (1)   CL        CX         ECX        RCX        ST1         MMX1        XMM1         YMM1         CS              CR1             DR1
 4 | 0.010 (2)   DL        DX         EDX        RDX        ST2         MMX2        XMM2         YMM2         SS              CR2             DR2
 5 | 0.011 (3)   BL        BX         EBX        RBX        ST3         MMX3        XMM3         YMM3         DS              CR3             DR3
 6 | 0.100 (4)   AH, SPL1  SP         ESP        RSP        ST4         MMX4        XMM4         YMM4         FS              CR4             DR4
 7 | 0.101 (5)   CH, BPL1  BP         EBP        RBP        ST5         MMX5        XMM5         YMM5         GS              CR5             DR5
 8 | 0.110 (6)   DH, SIL1  SI         ESI        RSI        ST6         MMX6        XMM6         YMM6         -               CR6             DR6
 9 | 0.111 (7)   BH, DIL1  DI         EDI        RDI        ST7         MMX7        XMM7         YMM7         -               CR7             DR7
10 | 1.000 (8)   R8L       R8W        R8D        R8         -           MMX0        XMM8         YMM8         ES              CR8             DR8
11 | 1.001 (9)   R9L       R9W        R9D        R9         -           MMX1        XMM9         YMM9         CS              CR9             DR9
12 | 1.010 (10)  R10L      R10W       R10D       R10        -           MMX2        XMM10        YMM10        SS              CR10            DR10
13 | 1.011 (11)  R11L      R11W       R11D       R11        -           MMX3        XMM11        YMM11        DS              CR11            DR11
14 | 1.100 (12)  R12L      R12W       R12D       R12        -           MMX4        XMM12        YMM12        FS              CR12            DR12
15 | 1.101 (13)  R13L      R13W       R13D       R13        -           MMX5        XMM13        YMM13        GS              CR13            DR13
16 | 1.110 (14)  R14L      R14W       R14D       R14        -           MMX6        XMM14        YMM14        -               CR14            DR14
17 | 1.111 (15)  R15L      R15W       R15D       R15        -           MMX7        XMM15        YMM15        -               CR15            DR15
18 | 


--------------------------------------------------------------------------------
/example/consts.w:
--------------------------------------------------------------------------------
 1 | const start = 5
 2 | const one = 1
 3 | 
 4 | var foo int
 5 | const no = false
 6 | const t = int
 7 | const f = fun -> 1
 8 | 
 9 | fun main {
10 |   var x = true
11 |   factorial(start)
12 | }
13 | 
14 | # Factorial function
15 | fun factorial(n int) t {
16 |   const zero = 0
17 |   if n == zero {
18 |     one
19 |   } else {
20 |     n * factorial(n - 1)
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/example/factorial.w:
--------------------------------------------------------------------------------
 1 | # <
 2 | # <=
 3 | # <<
 4 | # <<=
 5 | # >
 6 | # >=
 7 | # >>
 8 | # >>=
 9 | 
10 | fun main {
11 |   # a, b = 1, 2 + 1
12 |   # z = 20 as int8
13 |   # a = z as int16
14 |   # a = int16(20)
15 |   # b = int64(arg0)
16 |   # k = x / y * z # oops! Right-associate but should be left-associative
17 | 
18 |   # a = 1 + 2                         # 1  left & right are untyped
19 |   # a = 2 + (1 as uint32)             # 2  left is untyped, right is typed
20 |   # a = (1 as uint32) + 2             # 3  left is typed, right is untyped
21 |   # a = (1 as uint32) + (2 as uint32) # 4  left & right are typed
22 | 
23 |   # a = 4
24 |   # b = a
25 |   # y = b + 1
26 | 
27 |   z = if true {
28 |     a = 4  # avoid block elimination while working on ir builder
29 |     y = a + 1
30 |   } else {
31 |     0
32 |   }
33 | 
34 |   z
35 | 
36 |   # factorial(start)
37 | }
38 | 
39 | # fun foo(i int) -> i
40 | 
41 | # # Factorial function
42 | # fun factorial(n int) int {
43 | #   if n <= 0 {
44 | #     1
45 | #   } else {
46 | #     n * factorial(n - 1)
47 | #   }
48 | # }
49 | 
50 | # fun factorial(n float32) float32 {
51 | #   if n <= 0.0 {
52 | #     1.0
53 | #   } else {
54 | #     n * factorial(n - 1.0)
55 | #   }
56 | # }
57 | 
58 | # fun factorial(n int) int {
59 | #   # y = 3
60 | #   # x, y, _ = 1, 2, 3
61 | #   # t = (1, 2, 3)
62 | #   # xs = for x in [1,2,3] { x * 2 }
63 | #   # if n <= 0 1 else n * factorial(n - 1)
64 | #   if n <= 0 {
65 | #     1
66 | #   } else {
67 | #     n * factorial(n - 1)
68 | #   }
69 | # }
70 | 


--------------------------------------------------------------------------------
/example/future-borrow-move.w:
--------------------------------------------------------------------------------
 1 | #
 2 | # -- This is just an idea--
 3 | #
 4 | # Rust-like borrowing and moving, with a twist.
 5 | # - Things are borrowed by default
 6 | # - Moving can be explicit
 7 | #
 8 | 
 9 | type Thing {
10 |   x int
11 | }
12 | 
13 | fun helperTakes(t Thing) {
14 |   t.x = 2 # ok to mutate since we own t
15 | }
16 | 
17 | fun f1 {
18 |   t = Thing(1)
19 |   t.x = 0 # ok to mutate since we own t
20 |   helperTakesOver(t) # t moves
21 | }
22 | 
23 | fun f2 {
24 |   t = Thing(1)
25 |   helperTakesOver(t)
26 |   y = t # no longer alive; error: t moved to helperTakesOver
27 | }
28 | 
29 | fun helperBorrows(t &Thing) {
30 |   print(t.x) # reading is okay, but...
31 |   # t.x = 2 # ...mutation is not, since we are just borrowing t
32 | }
33 | 
34 | fun f3 {
35 |   t = Thing(1)
36 |   helperBorrows(t)
37 |   t.x = 0 # ok to mutate since we still own t
38 | }
39 | 
40 | fun f4 {
41 |   # borrowing prevents moving
42 |   t = Thing(1)
43 |   y = &t # y borrows t
44 |   helperTakes(t) # error: cannot move t; borrowed by y
45 | }
46 | 
47 | fun f5 {
48 |   # scope is important
49 |   t = Thing(1)
50 |   {
51 |     y = &t # y borrows t
52 |   }
53 |   helperTakes(t) # ok; no borrowed refs in scope
54 | }
55 | 


--------------------------------------------------------------------------------
/example/future-ownership.w:
--------------------------------------------------------------------------------
 1 | # Exploring a variant of the Rust model
 2 | #
 3 | # - Each value in Rust has a variable that’s called its owner.
 4 | # - There can only be one owner at a time.
 5 | # - When the owner goes out of scope, the value will be dropped.
 6 | #
 7 | # https://doc.rust-lang.org/book/ch04-01-what-is-ownership.html
 8 | 
 9 | type User {
10 |   id     int
11 |   name   str
12 |   emails [str]
13 | }
14 | 
15 | fun print(u &User)  # borrows u
16 | fun addEmail(u User, email &str) User  # takes ownership of u, borrows email, returns u
17 | fun store(u User)  # takes ownership of u
18 | 
19 | fun example1 {
20 |   u = User(id=0, name="sam")  # heap-alloc + assign pointer
21 |   u = addEmail(u, "sam@hawtmail.com")
22 |   print(u)  # print borrows u
23 |   store(u)  # u moves to store(); local u is invalid
24 |   print(u)  # error! u has moved to store
25 | }
26 | 
27 | fun example2 {
28 |   u = User(id=0, name="sam")
29 |   {
30 |     t = timer(fun {
31 |       # u in here is an immutable borrowed reference
32 |       print(u)  # ok; print just reads
33 |       store(u)  # error! can't move reference u to store
34 |     })
35 |     # u is immutable here as a reference has been borrowed
36 |     print(u)  # ok; print just reads
37 |     store(u)  # error! can't move borrowed u to store
38 |   }
39 |   # t is gone thus nothing borrows u anymore; u is mutable and can be moved
40 |   store(u)  # ok; u moved to store
41 |   print(u)  # error! u has moved to store
42 | }
43 | 


--------------------------------------------------------------------------------
/example/future-type-functions-generics.w:
--------------------------------------------------------------------------------
1 | # generics w required type Name
2 | type Vec3(T) = (T, T, T)
3 | type Tup3(Y) = (Y, Y, Y)
4 | a Vec3(int) = (1,1,0)
5 | b Bec3(float) = (1.0,1.0,0.0)
6 | c = (1,1,0) # == a
7 | d Tup3(float) = (1.0,1.0,0.0) # == b
8 | 


--------------------------------------------------------------------------------
/example/future-type-variants.w:
--------------------------------------------------------------------------------
 1 | # OCaml / ReasonML style types
 2 | type Account = Twitter(str)
 3 |              | Google(str, int)
 4 |              | Local
 5 |              | Test
 6 | 
 7 | fun signIn(a Account) {
 8 |   switch a {
 9 |     Twitter(handle)   -> print "Sign in to twitter as @$handle"
10 |     Google(email, id) -> print "Sign in to Google with #$id $email"
11 |     Local | Test      -> print "Use local computer user"
12 |   }
13 | }
14 | 
15 | # really just compiles to tuples. The above becomes:
16 | fun signIn(a (int,str)|(int,str,int)|(int)) {
17 |   switch a[0] {
18 |     case 0:
19 |       handle = a[1]
20 |       print("Sign in to twitter as @$handle")
21 |     case 1:
22 |       email, id = a[1:]
23 |       print("Sign in to Google with #$id $email")
24 |     case 2: case 3:
25 |       print("Use local computer user")
26 |   }
27 | }
28 | 
29 | # ReasonML syntax:
30 | #
31 | # type account = Twitter(string)
32 | #              | Google(string, int)
33 | #              | Local
34 | #              | Test
35 | #
36 | # let a = Twitter("bobby99")
37 | # let b = Google("bob@gmail.com", 123556)
38 | #
39 | # let signIn = switch (a) {
40 | #   | Twitter(handle)   => "Sign in to twitter as @$handle"
41 | #   | Google(email, id) => "Sign in to Google with #$id $email"
42 | #   | Local | Test      => "Use local computer user"
43 | # };
44 | #
45 | 


--------------------------------------------------------------------------------
/example/future-where.w:
--------------------------------------------------------------------------------
 1 | # <expr> where <bindings>
 2 | #
 3 | # Similar to Haskell's "where"
 4 | # Similar to Rust's "where"
 5 | #
 6 | # Note: This may be a bad idea.
 7 | #
 8 | 
 9 | fun fmtSyntaxErrors(errors [Error]) {
10 |   errors.map(e ->
11 |     logger.warn("$severity in $file:$line:$col: $error$snippet") where {
12 |       severity = if e.severity == nil "error" else e.severity
13 |       line, col, snippet = switch e.loc {
14 |         nil -> (0,0,"")
15 |         Location(source, line, col) -> {
16 |           line, col, switch source.IndexOfNth('\n', line - 1) {
17 |             nil -> ""
18 |             i   -> "\n" + source[i:i+1]
19 |           }
20 |         }
21 |       }
22 |     }
23 |   )
24 | }
25 | 
26 | fun fmtSyntaxErrors(errors [Error]) {
27 |   errors.map(e -> {
28 |     severity = if e.severity == nil "error" else e.severity
29 |     line, col, snippet = switch e.loc {
30 |       nil -> (0,0,"")
31 |       Location(source, line, col) -> {
32 |         line, col, switch source.IndexOfNth('\n', line - 1) {
33 |           nil -> ""
34 |           i   -> "\n" + source[i:i+1]
35 |         }
36 |       }
37 |     }
38 |     logger.warn("$severity in $file:$line:$col: $error$snippet")
39 |   })
40 | }
41 | 
42 | # Python-esque
43 | 
44 | fun fmtSyntaxErrors(errors [Error]):
45 |   errors.map(e ->
46 |     logger.warn("$severity in $file:$line:$col: $error$snippet") where:
47 |       severity = if e.severity == nil "error" else e.severity
48 |       line, col, snippet = switch e.loc:
49 |         nil -> (0,0,"")
50 |         Location(source, line, col) ->
51 |           line, col, switch source.IndexOfNth('\n', line - 1):
52 |             nil -> ""
53 |             i   -> "\n" + source[i:i+1] )
54 | 
55 | 
56 | fun fmtSyntaxErrors(errors [Error]):
57 |   errors.map(e -> {
58 |     severity = if e.severity == nil "error" else e.severity
59 |     line, col, snippet = switch e.loc:
60 |       nil -> (0,0,"")
61 |       Location(source, line, col) ->
62 |         line, col, switch source.IndexOfNth('\n', line - 1):
63 |           nil -> ""
64 |           i   -> "\n" + source[i:i+1]
65 |     logger.warn("$severity in $file:$line:$col: $error$snippet")
66 |   })
67 | 


--------------------------------------------------------------------------------
/example/mem.w:
--------------------------------------------------------------------------------
 1 | # comment
 2 | 
 3 | 
 4 | # const lol int = 5
 5 | # var foo Foo
 6 | # var red, green int = 4, 5
 7 | # var x int = 8
 8 | # var A, B, C int
 9 | # var a, b int = 1, 2
10 | # var r, g, b = 255, 128, 5
11 | # r, g, b = 255, 128, (g = 5)
12 | 
13 | # fun lol(int, int32, Foo) int64
14 | # var f fun(int, int32) int64
15 | 
16 | const start = 5
17 | 
18 | fun main {
19 |   # var x = 1
20 |   factorial(start)
21 | }
22 | 
23 | fun factorial(n int) int {
24 |   if n == 0 {
25 |     1
26 |   } else {
27 |     n * factorial(n - 1)
28 |   }
29 | }
30 | 
31 | # fun multiply(x, y int, z int32) int {
32 | #   if x > y {
33 | #     x * y * z
34 | #   } else if x == 0 {
35 | #     return 8
36 | #   } else {
37 | #     x / y * z
38 | #   }
39 | # }
40 | 
41 | 
42 | 
43 | # z = { x = 6; 5 * x }
44 | 
45 | # # r, g, b = 255, 128, g = 5  # invalid:
46 | # # (Assign =
47 | # #   (ExprList
48 | # #     (Ident r)
49 | # #     (Ident g)
50 | # #     (Ident b))
51 | # #   (Assign =
52 | # #     (ExprList
53 | # #       (Int 255)
54 | # #       (Int 128)
55 | # #       (Ident g))
56 | # #     (Int 5)))
57 | 
58 | # 4 + # let's add four
59 | # 5 + # and five to
60 | # 6   # six
61 | # foo + bar * baz
62 | 
63 | # oändlig  # C3 A4
64 | # 😀 = 1337
65 | 
66 | # # !$lol; int # another comment
67 | # # foo * bar + 8
68 | # # const lol, foo, bar = 9, 7, 0
69 | # # var cat = 6
70 | # # x ++
71 | # # y --
72 | # # 3 * 9
73 | # # -1 + 5
74 | 
75 | # fun multiply (x, y int, z i32) int {
76 | #   x * y * z
77 | # }
78 | 
79 | # # multiply = (x, y int, z i32) -> {
80 | # #   x * y * z
81 | # # }
82 | 
83 | # # fun map<T>(c Collection<T>, f (T,int)->str) str
84 | # # fun map<T>(c Collection<T>, f fun(T,int)str) str
85 | 
86 | # # names = map(entries, (entry, index) -> entry.name)
87 | 


--------------------------------------------------------------------------------
/experimental/x86_64-backend/asm.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | void AsmELF();
4 | 


--------------------------------------------------------------------------------
/experimental/x86_64-backend/elf/builder.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "../../array.h"
  3 | #include "../../buf.h"
  4 | 
  5 | typedef struct ELFData    ELFData;
  6 | typedef struct ELFSec     ELFSec;
  7 | typedef struct ELFProg    ELFProg;
  8 | typedef struct ELFBuilder ELFBuilder;
  9 | 
 10 | typedef enum ELFErr {
 11 |   ELF_OK = 0,
 12 |   ELF_E_UNSPECIFIED,
 13 | } ELFErr;
 14 | 
 15 | typedef enum ELFMode {
 16 |   ELFMode32,
 17 |   ELFMode64,
 18 | } ELFMode;
 19 | 
 20 | // Section header
 21 | typedef struct ELFSec {
 22 |   ELFBuilder* builder; // owning builder
 23 |   ELFData*    data;    // section data pointer. May be NULL.
 24 |   u16         index;   // section index (offset in b.shv initially, index after assembly)
 25 |   // ELF type-agnostic members of Elf32_Shdr & Elf64_Shdr
 26 |   u32         type;    // Type of section (sh_type)
 27 |   u32         name;    // Section name, index in shstrtab (sh_name)
 28 |   u32         flags;   // Bitflags ELF_SHF_* (sh_flags)
 29 |   ELFSec*     link;    // Index of another section (like sh_link)
 30 |   // Data used during assembly
 31 |   union {
 32 |     Elf32_Shdr sh32;
 33 |     Elf64_Shdr sh64;
 34 |   };
 35 | } ELFSec;
 36 | 
 37 | // Program header
 38 | typedef struct ELFProg {
 39 |   ELFBuilder* builder; // owning builder
 40 |   ELFData*    data;    // segment data pointer. May be NULL.
 41 |   // ELF type-agnostic members of Elf32_Phdr & Elf64_Phdr
 42 |   u32         type;    // (p_type)
 43 |   u32         flags;   // (p_flags)
 44 |   // Data used during assembly
 45 |   union {
 46 |     u32 align32;
 47 |     u64 align64;
 48 |   };
 49 | } ELFProg;
 50 | 
 51 | // ELFData represents a segment and/or section data.
 52 | // Referenced by at least one of either a section header or a program header (or both.)
 53 | typedef struct ELFData {
 54 |   ELFBuilder* builder;
 55 |   Array       secv;  // [ELFSec*] section headers referencing this data
 56 |   void*       _secv_storage[1];
 57 |   Array       progv; // [ELFProg*] program headers referencing this data
 58 |   void*       _progv_storage[1];
 59 |   Buf         buf;   // the data
 60 |   // Data used during assembly
 61 |   union {
 62 |     u32 offs32;
 63 |     u64 offs64;
 64 |   };
 65 | } ELFData;
 66 | 
 67 | // Builder
 68 | typedef struct ELFBuilder {
 69 |   Memory       mem;      // allocator (NULL = global allocator)
 70 |   ELFMode      mode;
 71 |   ELFMachine   machine;
 72 |   u8           encoding; // ELF_DATA_* constant. Set to best-guess at init based on machine.
 73 |   Array        dv;       // data segments [ELFData*]
 74 |   Array        shv;      // section headers [ELFSec*]
 75 |   Array        phv;      // program headers [ELFProg*]
 76 |   // special sections (pointers into shv)
 77 |   ELFSec*      shstrtab; // ".shstrtab" Section Header string table section
 78 |   ELFSec*      strtab;   // ".strtab" General string table section
 79 |   ELFSec*      symtab;   // ".symtab" General symbol table section
 80 | } ELFBuilder;
 81 | 
 82 | 
 83 | // Initialize a builder for use.
 84 | void ELFBuilderInit(ELFBuilder* b, ELFMachine m, Memory nullable mem);
 85 | 
 86 | // Free all memory used by the builder (does not free memory for b itself.)
 87 | void ELFBuilderFree(ELFBuilder* b);
 88 | 
 89 | // Allocate a new data to be linked with a section and/or program header.
 90 | ELFData* ELFBuilderNewData(ELFBuilder* b);
 91 | 
 92 | // Add a new section header of type with name which optionally references data.
 93 | ELFSec* ELFBuilderNewSec(ELFBuilder* b, const char* name, u32 type, ELFData* data);
 94 | 
 95 | // Add a new program header of type with name which optionally references data.
 96 | ELFProg* ELFBuilderNewProg(ELFBuilder* b, u32 type, u32 flags, ELFData* data);
 97 | 
 98 | // Add a new SYMTAB section named name, which stores its names in strtab.
 99 | ELFSec* ELFBuilderNewSymtab(ELFBuilder* b, const ELFSec* strtab, const char* name);
100 | 
101 | // Retrieves the null-terminated name of the section, as provided to ELFBuilderNewSec.
102 | const char* ELFSecName(const ELFSec* sec);
103 | 
104 | // Append a name to a string table. Return its index. strtab->type must be STRTAB.
105 | u32 ELFStrtabAppend(ELFSec* strtab, const char* name);
106 | 
107 | // Look up a name in a string table. nameindex is a byte offset.
108 | const char* ELFStrtabLookup(const ELFSec* sec, u32 nameindex);
109 | 
110 | // Add a symbol with name to symtab, originating in section with index shndx.
111 | // Returns a pointer to the symbol.
112 | // The returned pointer is only valid until the next call to ELFSymtabAdd* as it
113 | // references memory that might change during a call.
114 | Elf32_Sym* ELFSymtabAdd32(ELFSec* symtab, ELFSec* sec, const char* name, u8 bind, u8 typ, u32 val);
115 | Elf64_Sym* ELFSymtabAdd64(ELFSec* symtab, ELFSec* sec, const char* name, u8 bind, u8 typ, u64 val);
116 | 
117 | // Assemble ELF file
118 | ELFErr ELFBuilderAssemble(ELFBuilder* b, Buf* buf);
119 | 


--------------------------------------------------------------------------------
/experimental/x86_64-backend/elf/file.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // An ELF file has two views into its data:
 4 | // 1. the program header shows the segments used at run time, whereas
 5 | // 2. the section header lists the set of sections of the binary.
 6 | 
 7 | typedef struct ELFFile {
 8 |   const char* nullable name;
 9 |   const u8*            buf;
10 |   size_t               len;
11 |   const char*          shstrtab; // pointer into buf of shstrtab. NULL if none.
12 | } ELFFile;
13 | 
14 | void ELFFileInit(ELFFile* f, const char* nullable name, const u8* data, size_t len);
15 | bool ELFFileValidate(const ELFFile* f, FILE* nullable errlogfp);
16 | static const char* ELFFileName(const ELFFile* f, const char* defaultname);
17 | 
18 | // Access basic information
19 | static u8 ELFFileClass(const ELFFile* f); // ELF_CLASS_{NONE,32,64}
20 | 
21 | // Access headers
22 | static const Elf32_Ehdr* ELFFileEH32(const ELFFile* f);
23 | static const Elf64_Ehdr* ELFFileEH64(const ELFFile* f);
24 | static const Elf32_Phdr* ELFFilePH32(const ELFFile* f, u32 index);
25 | static const Elf64_Phdr* ELFFilePH64(const ELFFile* f, u32 index);
26 | static const Elf32_Shdr* ELFFileSH32(const ELFFile* f, u32 index);
27 | static const Elf64_Shdr* ELFFileSH64(const ELFFile* f, u32 index);
28 | 
29 | // Print human-readable information
30 | void ELFFilePrint(const ELFFile* f, FILE* fp);
31 | 
32 | 
33 | // ----------------------------------------------------------
34 | // inline implementations
35 | 
36 | inline static const char* ELFFileName(const ELFFile* f, const char* defaultname) {
37 |   return f->name == NULL ? defaultname : f->name;
38 | }
39 | 
40 | inline static u8 ELFFileClass(const ELFFile* f) {
41 |   return f->buf[ELF_EI_CLASS];
42 | }
43 | 
44 | inline static const Elf32_Ehdr* ELFFileEH32(const ELFFile* f) {
45 |   return (const Elf32_Ehdr*)f->buf;
46 | }
47 | inline static const Elf64_Ehdr* ELFFileEH64(const ELFFile* f) {
48 |   return (const Elf64_Ehdr*)f->buf;
49 | }
50 | 
51 | inline static const Elf32_Phdr* ELFFilePH32(const ELFFile* f, u32 index) {
52 |   auto eh = ELFFileEH32(f);
53 |   return (const Elf32_Phdr*)&f->buf[eh->e_phoff + (sizeof(Elf32_Phdr) * index)];
54 | }
55 | inline static const Elf64_Phdr* ELFFilePH64(const ELFFile* f, u32 index) {
56 |   auto eh = ELFFileEH64(f);
57 |   return (const Elf64_Phdr*)&f->buf[eh->e_phoff + (sizeof(Elf64_Phdr) * index)];
58 | }
59 | 
60 | inline static const Elf32_Shdr* ELFFileSH32(const ELFFile* f, u32 index) {
61 |   auto eh = ELFFileEH32(f);
62 |   return (const Elf32_Shdr*)&f->buf[eh->e_shoff + (sizeof(Elf32_Shdr) * index)];
63 | }
64 | inline static const Elf64_Shdr* ELFFileSH64(const ELFFile* f, u32 index) {
65 |   auto eh = ELFFileEH64(f);
66 |   return (const Elf64_Shdr*)&f->buf[eh->e_shoff + (sizeof(Elf64_Shdr) * index)];
67 | }
68 | 


--------------------------------------------------------------------------------
/experimental/x86_64-backend/elf64.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "../defs.h"
 3 | #include "../buf.h"
 4 | 
 5 | typedef struct ELF64 {
 6 |   Buf    buf;        // Main buffer (ELF header + program headers + data segments)
 7 |   u16    phnum;      // number of program headers (in buf + sizeof(ELH header))
 8 |   Buf    shbuf;      // section headers
 9 |   Buf    strtab;     // string table
10 |   Buf    shstrtab;   // section header string table
11 |   Buf    symtab;     // symbol table
12 | } ELF64;
13 | 
14 | void ELF64Init(ELF64* e, Memory nullable mem);
15 | void ELF64Free(ELF64* e);
16 | 
17 | inline static Memory* ELF64Memory(ELF64* e) {
18 |   return e->buf.mem;
19 | }
20 | 
21 | inline static Elf64_Ehdr* ELF64GetEH(ELF64* e) {
22 |   return (Elf64_Ehdr*)e->buf.ptr;
23 | }
24 | 


--------------------------------------------------------------------------------
/misc/asmlab/hello-c.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Build:
 4 |   clang -O0 -S -o hello-c.s hello-c.c
 5 |   clang -O0 -o hello-c.elf hello-c.c
 6 | 
 7 | Dump ELF & disassembly:
 8 |   llvm-objdump -D --syms --full-contents --all-headers hello-c.elf > hello-c.elf.dis.txt
 9 | 
10 | */
11 | 
12 | #include <unistd.h>
13 | 
14 | int main() {
15 |   char *str = "Hello World\n";
16 |   for (int i = 0; i < 3; i++) {
17 |     write(1, str, strlen(str));
18 |   }
19 |   return 9;
20 | }
21 | 


--------------------------------------------------------------------------------
/misc/asmlab/hello-c.s:
--------------------------------------------------------------------------------
 1 | 	.text
 2 | 	.file	"hello-c.c"
 3 | 	.globl	main                    # -- Begin function main
 4 | 	.p2align	4, 0x90
 5 | 	.type	main,@function
 6 | main:                                   # @main
 7 | 	.cfi_startproc
 8 | # %bb.0:                                # %entry
 9 | 	pushq	%rbp
10 | 	.cfi_def_cfa_offset 16
11 | 	.cfi_offset %rbp, -16
12 | 	movq	%rsp, %rbp
13 | 	.cfi_def_cfa_register %rbp
14 | 	subq	$32, %rsp
15 | 	movl	$0, -4(%rbp)
16 | 	movabsq	$.L.str, %rax
17 | 	movq	%rax, -16(%rbp)
18 | 	movl	$0, -20(%rbp)
19 | .LBB0_1:                                # %for.cond
20 |                                         # =>This Inner Loop Header: Depth=1
21 | 	cmpl	$3, -20(%rbp)
22 | 	jge	.LBB0_4
23 | # %bb.2:                                # %for.body
24 |                                         #   in Loop: Header=BB0_1 Depth=1
25 | 	movq	-16(%rbp), %rsi
26 | 	movq	-16(%rbp), %rdi
27 | 	movq	%rsi, -32(%rbp)         # 8-byte Spill
28 | 	callq	strlen
29 | 	movl	$1, %edi
30 | 	movq	-32(%rbp), %rsi         # 8-byte Reload
31 | 	movq	%rax, %rdx
32 | 	callq	write
33 | # %bb.3:                                # %for.inc
34 |                                         #   in Loop: Header=BB0_1 Depth=1
35 | 	movl	-20(%rbp), %eax
36 | 	addl	$1, %eax
37 | 	movl	%eax, -20(%rbp)
38 | 	jmp	.LBB0_1
39 | .LBB0_4:                                # %for.end
40 | 	movl	$9, %eax
41 | 	addq	$32, %rsp
42 | 	popq	%rbp
43 | 	.cfi_def_cfa %rsp, 8
44 | 	retq
45 | .Lfunc_end0:
46 | 	.size	main, .Lfunc_end0-main
47 | 	.cfi_endproc
48 |                                         # -- End function
49 | 	.type	.L.str,@object          # @.str
50 | 	.section	.rodata.str1.1,"aMS",@progbits,1
51 | .L.str:
52 | 	.asciz	"Hello World\n"
53 | 	.size	.L.str, 13
54 | 
55 | 	.ident	"clang version 10.0.0 (/b/s/w/ir/cache/git/chromium.googlesource.com-external-github.com-llvm-llvm--project cb47b8783017a76c5f2e4b974cfd6b22c9f1d5ff)"
56 | 	.section	".note.GNU-stack","",@progbits
57 | 	.addrsig
58 | 	.addrsig_sym write
59 | 	.addrsig_sym strlen
60 | 


--------------------------------------------------------------------------------
/misc/asmlab/hello1.s:
--------------------------------------------------------------------------------
  1 | /*
  2 | Minimal program with rodata
  3 | 
  4 | VM with llvm preinstalled:
  5 | 	docker run --rm -it -v "$PWD:/src" rsms/emsdk
  6 | 
  7 | Build & Run
  8 | 	clang -nostdlib -O0 -o hello1.elf hello1.s && ./hello1.elf ; echo $?
  9 | 
 10 | See exact contents:
 11 | 	llvm-objdump -D --syms --full-contents --all-headers hello1.elf > hello1.elf.dis.txt
 12 | 	hexdump -v -C hello1.elf > hello1.elf.hex
 13 | 
 14 | GP regs: eax, ebx, ecx, edx, edi, esi
 15 | Special regs:
 16 | - ebp    base pointer (end address of current stack frame)
 17 | - esp    current top of the stack (end address of stack)
 18 | - eip    instruction pointer
 19 | - eflags
 20 | 
 21 | Stack notes:
 22 | 	movl (%esp), %eax   # indirect addressing. copy top of stack to eax
 23 | 	movl 4(%esp), %eax  # base pointer addressing. copy second item on stack to eax
 24 | 
 25 | */
 26 | // constants, system calls
 27 | .equ SYS_OPEN,  5
 28 | .equ SYS_WRITE, 4
 29 | .equ SYS_READ,  3
 30 | .equ SYS_CLOSE, 6
 31 | .equ SYS_EXIT,  1
 32 | //
 33 | // options for open (look at usr/include/asm/fcntl.h for various values. You can combine them
 34 | // by adding them or OR-ing them)
 35 | .equ O_RDONLY, 		    0
 36 | .equ O_CREAT_WRONLY_TRUNC, 03101
 37 | //
 38 | // standard file descriptors
 39 | .equ STDIN,  0
 40 | .equ STDOUT, 1
 41 | .equ STDERR, 2
 42 | //
 43 | // misc
 44 | .equ SYSCALL,	0x80		# Linux syscall interrupt code
 45 | .equ EOF,		0			# End of file code
 46 | 
 47 | .section .bss
 48 |  // Buffer - this is where the data is loaded into from the data file and written from
 49 |  // into the output file. This should never exceed 16,000 for various reasons.
 50 | .equ   BUFFER_SIZE, 500
 51 | .lcomm BUFFER_DATA, BUFFER_SIZE
 52 | 
 53 | // 4000ff
 54 | 
 55 | // // .data contains mutable constant data
 56 | // .section .data
 57 | // data_items:								# Array of naturally-wide integers
 58 | // 	.long	3,67,34,222,45,75,54,34,44,33,22,11,66,0
 59 | // 	.size	data_items, 112				# 14 * 8 (sizeof long)
 60 | // 	.type	data_items, @object			# mark as "object" in ELF symbol table
 61 | 
 62 | // .text contains immutable executable data
 63 | .section .text
 64 | .globl _start
 65 | 
 66 | write_hello_to_stdout: // ()->()
 67 | 	pushq		%rbp					# save stack pointer on stack
 68 | 	movq		%rsp,		%rbp		# make stack pointer the base pointer
 69 | 	movq		$SYS_WRITE,	%rax		# syscall msg id "write to fd"
 70 | 	movq  		$STDOUT,	%rbx		# fd = STDOUT
 71 | 	movabsq  	$hellomsg,	%rcx		# buffer pointer (becomes VMA addr into .rodata)
 72 | 	movq		$12,		%rdx		# buffer size
 73 | 	int			$SYSCALL
 74 | 	movq 		%rbp,		%rsp		# restore stack pointer
 75 | 	popq 		%rbp					# restore base pointer
 76 | 	ret
 77 | 
 78 | // main:
 79 | // 	pushq		%rbp					# save stack pointer on stack
 80 | // 	movq		%rsp,		%rbp		# make stack pointer the base pointer
 81 | // 	subq		$8,			%rsp		# reserve 2xi32 on stack
 82 | // 	call		write_hello_to_stdout
 83 | // 	call		write_hello_to_stdout
 84 | // 	call		write_hello_to_stdout
 85 | // 	movq 		%rbp,		%rsp		# restore stack pointer
 86 | // 	popq 		%rbp					# restore base pointer
 87 | // 	movl		$9,			%eax		# return value 9
 88 | // 	ret									# jump to address at 8(%rbp) & increment %rbp
 89 | 
 90 | main:
 91 | 	pushq		%rbp					# save stack pointer on stack
 92 | 	movq		%rsp,		%rbp		# make stack pointer the base pointer
 93 | 	subq		$4,			%rsp		# reserve 1xi32 on stack (else call would mess up stack)
 94 | 	movl		$0,			-4(%rbp)	# put 0 in stack slot 1 (local0)
 95 |   for_cond:
 96 | 	cmpl		$3,			-4(%rbp)	# compare local0 with 3 (how many times we print)
 97 | 	jge			for_end					# if local0 >= 3 then jump to end
 98 | 	call		write_hello_to_stdout
 99 | 	movl		-4(%rbp),	%eax		# load local1 into rAX
100 | 	addl		$1,			%eax		# add 1 to rAX
101 | 	movl		%eax,		-4(%rbp)	# store rAX to local1
102 | 	jmp			for_cond				# loop
103 |   for_end:
104 | 	movl		-4(%rbp),	%eax		# return the number of times we printed "Hello"
105 | 	// movl		$9,			%eax		# return value 9
106 | 	movq 		%rbp,		%rsp		# restore stack pointer
107 | 	popq 		%rbp					# restore base pointer
108 | 	ret									# jump to address at 8(%rbp) & increment %rbp
109 | 
110 | _start:
111 | 	call		main					# call main; return value is in %eax
112 | 	// movl		$42,		%ebx		# exit status in ebx
113 | 	movl		%eax,		%ebx		# %ebx holds the return status
114 | 	movl		$SYS_EXIT,	%eax		# %eax holds the syscall message ID (1="exit")
115 | 	int			$SYSCALL
116 | 
117 | 
118 | .section .rodata
119 | hellomsg:
120 | 	.asciz	"Hello world\n"
121 | 	.size	hellomsg, 13
122 | 	.type	hellomsg, @object
123 | 
124 | .section .data
125 | hellomsg2:
126 | 	.asciz	"O hai world\n"
127 | 	.size	hellomsg2, 13
128 | 	.type	hellomsg2, @object
129 | 
130 | 
131 | // // example that exists with the CLI argument count
132 | // _start:
133 | // 	movq  		%rsp, 		%rbp
134 | // 	movl		0(%rbp),	%ebx		# argc is at stack pointer
135 | // 	# Note: 8(%rbp)=argv[0], 16(%rbp)=argv[1], ... (64-bit)
136 | // 	movl		$SYS_EXIT,	%eax		# %eax holds the syscall message ID (1="exit")
137 | // 	int			$IN_LINUX_SYSCALL		# interrupt "syscall"
138 | 
139 | 
140 | // // minimal program that writes "hello world\n" to stdout
141 | // _start:
142 | // 	movq		$SYS_WRITE,	%rax		# syscall msg id "write to fd"
143 | // 	movq  		$STDOUT,	%rbx		# fd = STDOUT
144 | // 	movabsq  	$hellomsg,	%rcx		# buffer pointer (becomes VMA addr into .rodata)
145 | // 	movq		$13,		%rdx		# buffer size
146 | // 	int			$SYSCALL
147 | // 	movl		$0,			%ebx		# %ebx holds the return status
148 | // 	movl		$SYS_EXIT,	%eax		# %eax holds the syscall message ID (1="exit")
149 | // 	int			$SYSCALL
150 | 
151 | 
152 | // // the most minimal program; just exit (using classic interrupt syscall)
153 | // _start:
154 | //   movl  $42, %ebx    # exit status
155 | //   movl  $1,  %eax    # syscall message "exit"
156 | //   int   $0x80        # interrupt "syscall"
157 | 
158 | // // the most minimal program; just exit (using modern syscall op)
159 | // // See: https://github.com/torvalds/linux/blob/v3.13/arch/x86/syscalls/syscall_64.tbl#L69
160 | // // See: https://github.com/torvalds/linux/blob/v3.13/arch/x86/kernel/entry_64.S#L569-L591
161 | // _start:
162 | //   movq  $60, %rax    # syscall no "exit"
163 | //   movq  $42, %rdi    # exit status
164 | //   syscall
165 | 


--------------------------------------------------------------------------------
/misc/asmlab/hello1.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | cd "$(dirname "$0")"
 3 | 
 4 | echo "Building hello1.s -> hello1.elf"
 5 | clang -nostdlib -O0 -o hello1.elf hello1.s
 6 | 
 7 | echo "Dumping disassemly hello1.elf -> hello1.elf.dis.txt"
 8 | llvm-objdump -D --syms --full-contents --all-headers hello1.elf > hello1.elf.dis.txt
 9 | 
10 | echo "Running ./hello1.elf"
11 | ./hello1.elf
12 | echo $?
13 | 


--------------------------------------------------------------------------------
/misc/asmlab/mini1-32.elf.dis.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | mini1-32.elf:	file format ELF32-i386
  3 | 
  4 | architecture: i386
  5 | start address: 0x08048054
  6 | 
  7 | Program Header:
  8 |     LOAD off    0x00000000 vaddr 0x08048000 paddr 0x08048000 align 2**12
  9 |          filesz 0x00000062 memsz 0x00000062 flags r-x
 10 | 
 11 | Dynamic Section:
 12 | Sections:
 13 | Idx Name          Size     VMA      Type
 14 |   0               00000000 00000000 
 15 |   1 .text         0000000e 08048054 TEXT
 16 |   2 .symtab       00000060 00000000 
 17 |   3 .strtab       00000019 00000000 
 18 |   4 .shstrtab     00000021 00000000 
 19 | 
 20 | SYMBOL TABLE:
 21 | 08048054 l    d  .text	00000000 .text
 22 | 08048054         .text	00000000 _start
 23 | 08049062         .text	00000000 __bss_start
 24 | 08049062         .text	00000000 _edata
 25 | 08049064         .text	00000000 _end
 26 | Contents of section .text:
 27 |  8048054 b8010000 00bb2a00 000089e5 0f34      ......*......4
 28 | Contents of section .symtab:
 29 |  0000 00000000 00000000 00000000 00000000  ................
 30 |  0010 00000000 54800408 00000000 03000100  ....T...........
 31 |  0020 06000000 54800408 00000000 10000100  ....T...........
 32 |  0030 01000000 62900408 00000000 10000100  ....b...........
 33 |  0040 0d000000 62900408 00000000 10000100  ....b...........
 34 |  0050 14000000 64900408 00000000 10000100  ....d...........
 35 | Contents of section .strtab:
 36 |  0000 005f5f62 73735f73 74617274 005f6564  .__bss_start._ed
 37 |  0010 61746100 5f656e64 00                 ata._end.
 38 | Contents of section .shstrtab:
 39 |  0000 002e7379 6d746162 002e7374 72746162  ..symtab..strtab
 40 |  0010 002e7368 73747274 6162002e 74657874  ..shstrtab..text
 41 |  0020 00                                   .
 42 | 
 43 | Disassembly of section .text:
 44 | 
 45 | 08048054 _start:
 46 |  8048054: b8 01 00 00 00               	movl	$1, %eax
 47 |  8048059: bb 2a 00 00 00               	movl	$42, %ebx
 48 |  804805e: 89 e5                        	movl	%esp, %ebp
 49 |  8048060: 0f 34                        	sysenter
 50 | 
 51 | Disassembly of section .symtab:
 52 | 
 53 | 00000000 .symtab:
 54 | 		...
 55 |       14: 54                           	pushl	%esp
 56 |       15: 80 04 08 00                  	addb	$0, (%eax,%ecx)
 57 |       19: 00 00                        	addb	%al, (%eax)
 58 |       1b: 00 03                        	addb	%al, (%ebx)
 59 |       1d: 00 01                        	addb	%al, (%ecx)
 60 |       1f: 00 06                        	addb	%al, (%esi)
 61 |       21: 00 00                        	addb	%al, (%eax)
 62 |       23: 00 54 80 04                  	addb	%dl, 4(%eax,%eax,4)
 63 |       27: 08 00                        	orb	%al, (%eax)
 64 |       29: 00 00                        	addb	%al, (%eax)
 65 |       2b: 00 10                        	addb	%dl, (%eax)
 66 |       2d: 00 01                        	addb	%al, (%ecx)
 67 |       2f: 00 01                        	addb	%al, (%ecx)
 68 |       31: 00 00                        	addb	%al, (%eax)
 69 |       33: 00 62 90                     	addb	%ah, -112(%edx)
 70 |       36: 04 08                        	addb	$8, %al
 71 |       38: 00 00                        	addb	%al, (%eax)
 72 |       3a: 00 00                        	addb	%al, (%eax)
 73 |       3c: 10 00                        	adcb	%al, (%eax)
 74 |       3e: 01 00                        	addl	%eax, (%eax)
 75 |       40: 0d 00 00 00 62               	orl	$1644167168, %eax
 76 |       45: 90                           	nop
 77 |       46: 04 08                        	addb	$8, %al
 78 |       48: 00 00                        	addb	%al, (%eax)
 79 |       4a: 00 00                        	addb	%al, (%eax)
 80 |       4c: 10 00                        	adcb	%al, (%eax)
 81 |       4e: 01 00                        	addl	%eax, (%eax)
 82 |       50: 14 00                        	adcb	$0, %al
 83 |       52: 00 00                        	addb	%al, (%eax)
 84 |       54: 64 90                        	nop
 85 |       56: 04 08                        	addb	$8, %al
 86 |       58: 00 00                        	addb	%al, (%eax)
 87 |       5a: 00 00                        	addb	%al, (%eax)
 88 |       5c: 10 00                        	adcb	%al, (%eax)
 89 |       5e: 01 00                        	addl	%eax, (%eax)
 90 | 
 91 | Disassembly of section .strtab:
 92 | 
 93 | 00000000 .strtab:
 94 |        0: 00 5f 5f                     	addb	%bl, 95(%edi)
 95 |        3: 62 73 73                     	bound	%esi, 115(%ebx)
 96 |        6: 5f                           	popl	%edi
 97 |        7: 73 74                        	jae	116 <.symtab+0x7d>
 98 |        9: 61                           	popal
 99 |        a: 72 74                        	jb	116 <.symtab+0x80>
100 |        c: 00 5f 65                     	addb	%bl, 101(%edi)
101 |        f: 64 61                        	popal
102 |       11: 74 61                        	je	97 <.symtab+0x74>
103 |       13: 00 5f 65                     	addb	%bl, 101(%edi)
104 |       16: 6e                           	outsb	(%esi), %dx
105 |       17: 64 00                        	<unknown>
106 | 
107 | Disassembly of section .shstrtab:
108 | 
109 | 00000000 .shstrtab:
110 |        0: 00 2e                        	addb	%ch, (%esi)
111 |        2: 73 79                        	jae	121 <.symtab+0x7d>
112 |        4: 6d                           	insl	%dx, %es:(%edi)
113 |        5: 74 61                        	je	97 <.symtab+0x68>
114 |        7: 62 00                        	bound	%eax, (%eax)
115 |        9: 2e 73 74                     	jae	116 <.symtab+0x80>
116 |        c: 72 74                        	jb	116 <.symtab+0x82>
117 |        e: 61                           	popal
118 |        f: 62 00                        	bound	%eax, (%eax)
119 |       11: 2e 73 68                     	jae	104 <.symtab+0x7c>
120 |       14: 73 74                        	jae	116 <.symtab+0x8a>
121 |       16: 72 74                        	jb	116 <.symtab+0x8c>
122 |       18: 61                           	popal
123 |       19: 62 00                        	bound	%eax, (%eax)
124 |       1b: 2e 74 65                     	je	101 <.symtab+0x83>
125 |       1e: 78 74                        	js	116 <.symtab+0x94>
126 |       20: 00                           	<unknown>
127 | 


--------------------------------------------------------------------------------
/misc/asmlab/mini1-32.s:
--------------------------------------------------------------------------------
 1 | #
 2 | # Minimal Linux program, using modern sysenter instruction (32-bit)
 3 | # Note that 32-bit x86 does not have the syscall instruction of AMD-origin, but instead
 4 | # a sysenter instruction.
 5 | #
 6 | # For info on Linux syscalls, see:
 7 | #   https://github.com/torvalds/linux/blob/v3.13/arch/x86/syscalls/syscall_32.tbl
 8 | #   https://github.com/torvalds/linux/blob/v3.13/arch/x86/kernel/entry_32.S
 9 | #
10 | # Build & Run:
11 | #   clang -nostdlib -O0 -m32 -o mini1-32.elf mini1-32.s && ./mini1-32.elf ; echo $?
12 | #
13 | # Disassemble:
14 | #   llvm-objdump -D --syms --full-contents --all-headers mini1-32.elf > mini1-32.elf.dis.txt
15 | #
16 | # Dump to view exact contents:
17 | #   hexdump -v -C mini1-32.elf > mini1-32.elf.hex
18 | #
19 | .text
20 | .globl  _start
21 | 
22 | _start:
23 |   movl  $1,  %eax    # sysenter number for "exit"
24 |   movl  $42, %ebx    # exit status
25 |   movl   %esp, %ebp
26 |   sysenter
27 | 


--------------------------------------------------------------------------------
/misc/asmlab/mini1.elf.dis.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | mini1.elf:	file format ELF64-x86-64
  3 | 
  4 | architecture: x86_64
  5 | start address: 0x0000000000400078
  6 | 
  7 | Program Header:
  8 |     LOAD off    0x0000000000000000 vaddr 0x0000000000400000 paddr 0x0000000000400000 align 2**21
  9 |          filesz 0x0000000000000088 memsz 0x0000000000000088 flags r-x
 10 | 
 11 | Dynamic Section:
 12 | Sections:
 13 | Idx Name          Size     VMA              Type
 14 |   0               00000000 0000000000000000 
 15 |   1 .text         00000010 0000000000400078 TEXT
 16 |   2 .symtab       00000090 0000000000000000 
 17 |   3 .strtab       00000019 0000000000000000 
 18 |   4 .shstrtab     00000021 0000000000000000 
 19 | 
 20 | SYMBOL TABLE:
 21 | 0000000000400078 l    d  .text	00000000 .text
 22 | 0000000000400078         .text	00000000 _start
 23 | 0000000000601000         .text	00000000 __bss_start
 24 | 0000000000601000         .text	00000000 _edata
 25 | 0000000000601000         .text	00000000 _end
 26 | Contents of section .text:
 27 |  400078 48c7c03c 00000048 c7c72a00 00000f05  H..<...H..*.....
 28 | Contents of section .symtab:
 29 |  0000 00000000 00000000 00000000 00000000  ................
 30 |  0010 00000000 00000000 00000000 03000100  ................
 31 |  0020 78004000 00000000 00000000 00000000  x.@.............
 32 |  0030 06000000 10000100 78004000 00000000  ........x.@.....
 33 |  0040 00000000 00000000 01000000 10000100  ................
 34 |  0050 00106000 00000000 00000000 00000000  ..`.............
 35 |  0060 0d000000 10000100 00106000 00000000  ..........`.....
 36 |  0070 00000000 00000000 14000000 10000100  ................
 37 |  0080 00106000 00000000 00000000 00000000  ..`.............
 38 | Contents of section .strtab:
 39 |  0000 005f5f62 73735f73 74617274 005f6564  .__bss_start._ed
 40 |  0010 61746100 5f656e64 00                 ata._end.
 41 | Contents of section .shstrtab:
 42 |  0000 002e7379 6d746162 002e7374 72746162  ..symtab..strtab
 43 |  0010 002e7368 73747274 6162002e 74657874  ..shstrtab..text
 44 |  0020 00                                   .
 45 | 
 46 | Disassembly of section .text:
 47 | 
 48 | 0000000000400078 _start:
 49 |   400078: 48 c7 c0 3c 00 00 00         	movq	$60, %rax
 50 |   40007f: 48 c7 c7 2a 00 00 00         	movq	$42, %rdi
 51 |   400086: 0f 05                        	syscall
 52 | 
 53 | Disassembly of section .symtab:
 54 | 
 55 | 0000000000000000 .symtab:
 56 | 		...
 57 |       1c: 03 00                        	addl	(%rax), %eax
 58 |       1e: 01 00                        	addl	%eax, (%rax)
 59 |       20: 78 00                        	js	0
 60 |       22: 40 00 00                     	addb	%al, (%rax)
 61 | 		...
 62 |       2d: 00 00                        	addb	%al, (%rax)
 63 |       2f: 00 06                        	addb	%al, (%rsi)
 64 |       31: 00 00                        	addb	%al, (%rax)
 65 |       33: 00 10                        	addb	%dl, (%rax)
 66 |       35: 00 01                        	addb	%al, (%rcx)
 67 |       37: 00 78 00                     	addb	%bh, (%rax)
 68 |       3a: 40 00 00                     	addb	%al, (%rax)
 69 | 		...
 70 |       45: 00 00                        	addb	%al, (%rax)
 71 |       47: 00 01                        	addb	%al, (%rcx)
 72 |       49: 00 00                        	addb	%al, (%rax)
 73 |       4b: 00 10                        	addb	%dl, (%rax)
 74 |       4d: 00 01                        	addb	%al, (%rcx)
 75 |       4f: 00 00                        	addb	%al, (%rax)
 76 |       51: 10 60 00                     	adcb	%ah, (%rax)
 77 | 		...
 78 |       60: 0d 00 00 00 10               	orl	$268435456, %eax
 79 |       65: 00 01                        	addb	%al, (%rcx)
 80 |       67: 00 00                        	addb	%al, (%rax)
 81 |       69: 10 60 00                     	adcb	%ah, (%rax)
 82 | 		...
 83 |       78: 14 00                        	adcb	$0, %al
 84 |       7a: 00 00                        	addb	%al, (%rax)
 85 |       7c: 10 00                        	adcb	%al, (%rax)
 86 |       7e: 01 00                        	addl	%eax, (%rax)
 87 |       80: 00 10                        	addb	%dl, (%rax)
 88 |       82: 60                           	<unknown>
 89 | 		...
 90 |       8f: 00                           	<unknown>
 91 | 
 92 | Disassembly of section .strtab:
 93 | 
 94 | 0000000000000000 .strtab:
 95 |        0: 00 5f 5f                     	addb	%bl, 95(%rdi)
 96 |        3: 62                           	<unknown>
 97 |        4: 73 73                        	jae	115
 98 |        6: 5f                           	popq	%rdi
 99 |        7: 73 74                        	jae	116
100 |        9: 61                           	<unknown>
101 |        a: 72 74                        	jb	116
102 |        c: 00 5f 65                     	addb	%bl, 101(%rdi)
103 |        f: 64 61                        	<unknown>
104 |       11: 74 61                        	je	97
105 |       13: 00 5f 65                     	addb	%bl, 101(%rdi)
106 |       16: 6e                           	outsb	(%rsi), %dx
107 |       17: 64 00                        	<unknown>
108 | 
109 | Disassembly of section .shstrtab:
110 | 
111 | 0000000000000000 .shstrtab:
112 |        0: 00 2e                        	addb	%ch, (%rsi)
113 |        2: 73 79                        	jae	121 <.shstrtab+0x7d>
114 |        4: 6d                           	insl	%dx, %es:(%rdi)
115 |        5: 74 61                        	je	97 <.shstrtab+0x68>
116 |        7: 62 00 2e 73                  	<unknown>
117 |        b: 74 72                        	je	114 <.shstrtab+0x7f>
118 |        d: 74 61                        	je	97 <.shstrtab+0x70>
119 |        f: 62 00 2e 73                  	<unknown>
120 |       13: 68 73 74 72 74               	pushq	$1953657971
121 |       18: 61                           	<unknown>
122 |       19: 62 00 2e 74                  	<unknown>
123 |       1d: 65 78 74                     	js	116 <.shstrtab+0x94>
124 |       20: 00                           	<unknown>
125 | 


--------------------------------------------------------------------------------
/misc/asmlab/mini1.s:
--------------------------------------------------------------------------------
 1 | #
 2 | # Minimal Linux program, using modern syscall instruction
 3 | #
 4 | # For info on Linux syscalls, see:
 5 | #   https://github.com/torvalds/linux/blob/v3.13/arch/x86/syscalls/syscall_64.tbl#L69
 6 | #   https://github.com/torvalds/linux/blob/v3.13/arch/x86/kernel/entry_64.S#L569-L591
 7 | #
 8 | # Build & Run:
 9 | #   clang -nostdlib -O0 -o mini1.elf mini1.s && ./mini1.elf ; echo $?
10 | #
11 | # Disassemble:
12 | #   llvm-objdump -D --syms --full-contents --all-headers mini1.elf > mini1.elf.dis.txt
13 | #
14 | # Dump to view exact contents:
15 | #   hexdump -v -C mini1.elf > mini1.hex
16 | #
17 | .text
18 | .globl  _start
19 | 
20 | _start:
21 |   movq  $60, %rax    # syscall no "exit"
22 |   movq  $42, %rdi    # exit status
23 |   syscall
24 | 


--------------------------------------------------------------------------------
/misc/asmlab/mini2.elf.dis.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | mini2.elf:	file format ELF64-x86-64
  3 | 
  4 | architecture: x86_64
  5 | start address: 0x0000000000400078
  6 | 
  7 | Program Header:
  8 |     LOAD off    0x0000000000000000 vaddr 0x0000000000400000 paddr 0x0000000000400000 align 2**21
  9 |          filesz 0x0000000000000084 memsz 0x0000000000000084 flags r-x
 10 | 
 11 | Dynamic Section:
 12 | Sections:
 13 | Idx Name          Size     VMA              Type
 14 |   0               00000000 0000000000000000 
 15 |   1 .text         0000000c 0000000000400078 TEXT
 16 |   2 .symtab       00000090 0000000000000000 
 17 |   3 .strtab       00000019 0000000000000000 
 18 |   4 .shstrtab     00000021 0000000000000000 
 19 | 
 20 | SYMBOL TABLE:
 21 | 0000000000400078 l    d  .text	00000000 .text
 22 | 0000000000400078         .text	00000000 _start
 23 | 0000000000600084         .text	00000000 __bss_start
 24 | 0000000000600084         .text	00000000 _edata
 25 | 0000000000600088         .text	00000000 _end
 26 | Contents of section .text:
 27 |  400078 b8010000 00bb2a00 0000cd80           ......*.....
 28 | Contents of section .symtab:
 29 |  0000 00000000 00000000 00000000 00000000  ................
 30 |  0010 00000000 00000000 00000000 03000100  ................
 31 |  0020 78004000 00000000 00000000 00000000  x.@.............
 32 |  0030 06000000 10000100 78004000 00000000  ........x.@.....
 33 |  0040 00000000 00000000 01000000 10000100  ................
 34 |  0050 84006000 00000000 00000000 00000000  ..`.............
 35 |  0060 0d000000 10000100 84006000 00000000  ..........`.....
 36 |  0070 00000000 00000000 14000000 10000100  ................
 37 |  0080 88006000 00000000 00000000 00000000  ..`.............
 38 | Contents of section .strtab:
 39 |  0000 005f5f62 73735f73 74617274 005f6564  .__bss_start._ed
 40 |  0010 61746100 5f656e64 00                 ata._end.
 41 | Contents of section .shstrtab:
 42 |  0000 002e7379 6d746162 002e7374 72746162  ..symtab..strtab
 43 |  0010 002e7368 73747274 6162002e 74657874  ..shstrtab..text
 44 |  0020 00                                   .
 45 | 
 46 | Disassembly of section .text:
 47 | 
 48 | 0000000000400078 _start:
 49 |   400078: b8 01 00 00 00               	movl	$1, %eax
 50 |   40007d: bb 2a 00 00 00               	movl	$42, %ebx
 51 |   400082: cd 80                        	int	$128
 52 | 
 53 | Disassembly of section .symtab:
 54 | 
 55 | 0000000000000000 .symtab:
 56 | 		...
 57 |       1c: 03 00                        	addl	(%rax), %eax
 58 |       1e: 01 00                        	addl	%eax, (%rax)
 59 |       20: 78 00                        	js	0
 60 |       22: 40 00 00                     	addb	%al, (%rax)
 61 | 		...
 62 |       2d: 00 00                        	addb	%al, (%rax)
 63 |       2f: 00 06                        	addb	%al, (%rsi)
 64 |       31: 00 00                        	addb	%al, (%rax)
 65 |       33: 00 10                        	addb	%dl, (%rax)
 66 |       35: 00 01                        	addb	%al, (%rcx)
 67 |       37: 00 78 00                     	addb	%bh, (%rax)
 68 |       3a: 40 00 00                     	addb	%al, (%rax)
 69 | 		...
 70 |       45: 00 00                        	addb	%al, (%rax)
 71 |       47: 00 01                        	addb	%al, (%rcx)
 72 |       49: 00 00                        	addb	%al, (%rax)
 73 |       4b: 00 10                        	addb	%dl, (%rax)
 74 |       4d: 00 01                        	addb	%al, (%rcx)
 75 |       4f: 00 84 00 60 00 00 00         	addb	%al, 96(%rax,%rax)
 76 | 		...
 77 |       5e: 00 00                        	addb	%al, (%rax)
 78 |       60: 0d 00 00 00 10               	orl	$268435456, %eax
 79 |       65: 00 01                        	addb	%al, (%rcx)
 80 |       67: 00 84 00 60 00 00 00         	addb	%al, 96(%rax,%rax)
 81 | 		...
 82 |       76: 00 00                        	addb	%al, (%rax)
 83 |       78: 14 00                        	adcb	$0, %al
 84 |       7a: 00 00                        	addb	%al, (%rax)
 85 |       7c: 10 00                        	adcb	%al, (%rax)
 86 |       7e: 01 00                        	addl	%eax, (%rax)
 87 |       80: 88 00                        	movb	%al, (%rax)
 88 |       82: 60                           	<unknown>
 89 | 		...
 90 |       8f: 00                           	<unknown>
 91 | 
 92 | Disassembly of section .strtab:
 93 | 
 94 | 0000000000000000 .strtab:
 95 |        0: 00 5f 5f                     	addb	%bl, 95(%rdi)
 96 |        3: 62                           	<unknown>
 97 |        4: 73 73                        	jae	115
 98 |        6: 5f                           	popq	%rdi
 99 |        7: 73 74                        	jae	116
100 |        9: 61                           	<unknown>
101 |        a: 72 74                        	jb	116
102 |        c: 00 5f 65                     	addb	%bl, 101(%rdi)
103 |        f: 64 61                        	<unknown>
104 |       11: 74 61                        	je	97
105 |       13: 00 5f 65                     	addb	%bl, 101(%rdi)
106 |       16: 6e                           	outsb	(%rsi), %dx
107 |       17: 64 00                        	<unknown>
108 | 
109 | Disassembly of section .shstrtab:
110 | 
111 | 0000000000000000 .shstrtab:
112 |        0: 00 2e                        	addb	%ch, (%rsi)
113 |        2: 73 79                        	jae	121 <.shstrtab+0x7d>
114 |        4: 6d                           	insl	%dx, %es:(%rdi)
115 |        5: 74 61                        	je	97 <.shstrtab+0x68>
116 |        7: 62 00 2e 73                  	<unknown>
117 |        b: 74 72                        	je	114 <.shstrtab+0x7f>
118 |        d: 74 61                        	je	97 <.shstrtab+0x70>
119 |        f: 62 00 2e 73                  	<unknown>
120 |       13: 68 73 74 72 74               	pushq	$1953657971
121 |       18: 61                           	<unknown>
122 |       19: 62 00 2e 74                  	<unknown>
123 |       1d: 65 78 74                     	js	116 <.shstrtab+0x94>
124 |       20: 00                           	<unknown>
125 | 


--------------------------------------------------------------------------------
/misc/asmlab/mini2.s:
--------------------------------------------------------------------------------
 1 | #
 2 | # Minimal Linux program, using legacy interrupt
 3 | #
 4 | # For info on Linux syscalls, see:
 5 | #   https://github.com/torvalds/linux/blob/v3.13/arch/x86/syscalls/syscall_64.tbl#L69
 6 | #   https://github.com/torvalds/linux/blob/v3.13/arch/x86/kernel/entry_64.S#L569-L591
 7 | #
 8 | # Build & Run:
 9 | #   clang -nostdlib -O0 -o mini2.elf mini2.s && ./mini2.elf ; echo $?
10 | #
11 | # Disassemble:
12 | #   llvm-objdump -D --syms --full-contents --all-headers mini2.elf > mini2.elf.dis.txt
13 | #
14 | # Dump to view exact contents:
15 | #   hexdump -v -C mini2.elf > mini2.hex
16 | #
17 | .text
18 | .globl  _start
19 | 
20 | _start:
21 |   movl  $1,  %eax    # syscall message "exit"
22 |   movl  $42, %ebx    # exit status
23 |   int   $0x80        # interrupt "syscall"
24 | 


--------------------------------------------------------------------------------
/misc/asmlab/start-linux-vm.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e
2 | cd "$(dirname "$0")"
3 | echo "Running docker:rsms/emsdk in $PWD"
4 | docker run --rm -it -v "$PWD:/src" rsms/emsdk
5 | 


--------------------------------------------------------------------------------
/misc/etc/mini2.s:
--------------------------------------------------------------------------------
 1 | #
 2 | # Minimal Linux program
 3 | #
 4 | # Build & Run:
 5 | #   docker run --rm -it -v "$PWD:/src" rsms/emsdk \
 6 | #     bash -c "clang -nostdlib -O0 -o mini2 mini2.s && ./mini2" ; echo $?
 7 | #
 8 | # Build in 32-bit mode: (interesting for seeing differences to 64-bit)
 9 | #   docker run --rm -it -v "$PWD:/src" rsms/emsdk \
10 | #     bash -c "clang -nostdlib -O0 -m32 -o mini2_32 mini2.s && ./mini2_32" ; echo $?
11 | #
12 | # Build with debugging info:
13 | #   docker run --rm -it -v "$PWD:/src" rsms/emsdk \
14 | #     clang -nostdlib -O0 -g -o mini2.g mini2.s
15 | #
16 | # Disassemble to see LLVMs view on the binary:
17 | #   docker run --rm -it -v "$PWD:/src" rsms/emsdk \
18 | #     llvm-objdump -D --syms --full-contents --all-headers mini2 > mini2.dis.txt
19 | #
20 | # Disassemble with objdump for annotated assembly and opcodes:
21 | #   objdump -d -S mini2
22 | #   objdump -d -S hello-c
23 | #
24 | # Dump to view exact contents:
25 | #   hexdump -v -C mini2 > mini2.hex
26 | #
27 | # View ELF details:
28 | #   readelf -a mini2 > mini2.readelf.txt
29 | #
30 | .text
31 | .globl  _start
32 | _start:
33 |   movl  $42, %ebx    # exit status
34 |   movl  $1,  %eax    # syscall message "exit"
35 |   int   $0x80        # interrupt "syscall"
36 | 


--------------------------------------------------------------------------------
/misc/filter-compdb.py:
--------------------------------------------------------------------------------
 1 | import sys, json
 2 | 
 3 | INFILE = sys.argv[1]
 4 | PREFIX = sys.argv[2]  # e.g "build/obj/dev/"
 5 | 
 6 | with open(INFILE, "r") as f:
 7 |   compdb = json.load(f)
 8 | 
 9 | compdb = [e for e in compdb if e["output"].startswith(PREFIX)]
10 | 
11 | 
12 | # json.dumps([1, 2, 3, {'4': 5, '6': 7}], separators=(',', ':'))
13 | print(json.dumps(compdb, sort_keys=True, indent=2))
14 | 


--------------------------------------------------------------------------------
/misc/gen_parselet_map.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # This script reads and updates the parselet map in src/parse.c
 3 | #
 4 | import re, sys, os, os.path
 5 | 
 6 | def err(msg):
 7 |   print(msg)
 8 |   sys.exit(1)
 9 | 
10 | srcdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
11 | os.chdir(srcdir)
12 | 
13 | sourcefilename = "src/parse/parse.c"
14 | 
15 | with open(sourcefilename, "r") as f:
16 |   source = f.read()
17 | 
18 | # //!Parselet (TPlusPlus UNARY_POSTFIX) (TMinusMinus UNARY_POSTFIX)
19 | # //!PrefixParselet TPlus TMinus TStar TSlash
20 | parseletp = re.compile(
21 |   r'\n//\s*\!Parselet\s+(?P<m>(?:\([^\)]+\)[\s\r\n\/\/]*)+)\n\s*(?:static|)\s*Node\*\s*(?P<fun>\w+)')
22 | prefixparseletp = re.compile(
23 |   r'\n//\s*\!PrefixParselet\s+([^\n]+)\n\s*(?:static|)\s*Node\*\s*(\w+)')
24 | splitspecs = re.compile(r'\)[\s\r\n\/\/]*\(')
25 | splitsep = re.compile(r'[\s,]+')
26 | parselets = dict()  # keyed by token, e.g. "TPlus"
27 | 
28 | for m in prefixparseletp.finditer(source):
29 |   fun = m.group(2)
30 |   for tok in splitsep.split(m.group(1)):
31 |     struct_init = parselets.get(tok)
32 |     if struct_init:
33 |       err("duplicate parselet %s for token %s" % (fun, tok))
34 |     parselets[tok] = [fun, "NULL", "MEMBER"]
35 | 
36 | for m in parseletp.finditer(source):
37 |   md = m.groupdict()
38 |   for s in splitspecs.split(md["m"]):
39 |     tok, prec = splitsep.split(s.strip("()"), 1)
40 |     fun = md["fun"]
41 |     # print({ "tok": tok, "prec": prec, "fun": md["fun"] })
42 |     struct_init = parselets.get(tok)
43 |     if not struct_init:
44 |       parselets[tok] = ["NULL", fun, prec]
45 |     else:
46 |       if struct_init[1] != "NULL":
47 |         err("duplicate parselet %s for token %s" % (fun, tok))
48 |       struct_init[1] = fun
49 |       struct_init[2] = prec
50 | 
51 | # const Parselet parselets[TMax] = {
52 | #   [TComment] = { PLComment, NULL, PREC_LOWEST },
53 | # };
54 | output = [
55 |   '// automatically generated by %s; do not edit' % __file__,
56 | ]
57 | output.append("static const Parselet parselets[TMax] = {")
58 | for tok, struct_init in parselets.items():
59 |   output.append("  [%s] = {%s, %s, PREC_%s}," % (tok, *struct_init))
60 | output.append("};")
61 | output = "\n".join(output)
62 | 
63 | startstr = '//PARSELET_MAP_BEGIN\n'
64 | endstr   = '\n//PARSELET_MAP_END'
65 | start = source.find(startstr)
66 | end   = source.find(endstr, start)
67 | if start == -1:
68 |   err("can not find %r in %s" % (startstr, sourcefilename))
69 | if end == -1:
70 |   err("can not find %r in %s" % (endstr, sourcefilename))
71 | 
72 | source2 = source[:start + len(startstr)] + output + source[end:]
73 | 
74 | # write changes only if we modified the source
75 | if source2 != source:
76 |   print("write", sourcefilename)
77 |   with open(sourcefilename, "w") as f:
78 |     f.write(source2)
79 | # write "marker" file for ninja/make
80 | with open("build/gen_parselet_map.marker", "w") as f:
81 |   f.write("x")
82 | 


--------------------------------------------------------------------------------
/misc/test-asm-out.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | PROG=$1
 3 | 
 4 | echo "hexdump -> $PROG.hex"
 5 | hexdump -v -C "$PROG" | tee "$PROG".hex
 6 | 
 7 | echo "./$PROG"
 8 | 
 9 | PROGNAME=$(basename "$PROG")
10 | pushd "$(dirname "$PROG")" >/dev/null
11 | PROGDIR=$PWD
12 | popd >/dev/null
13 | 
14 | docker run --rm -v "$PROGDIR:/mnt1" debian:latest "/mnt1/$PROGNAME"
15 | EXIT_STATUS=$?
16 | echo "Exit status: $EXIT_STATUS"
17 | 


--------------------------------------------------------------------------------
/src/build/build.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "../common/defs.h"
 3 | #include "../common/memory.h"
 4 | #include "source.h"
 5 | 
 6 | // ErrorHandler callback type
 7 | typedef void(ErrorHandler)(const Source*, SrcPos, ConstStr msg, void* userdata);
 8 | 
 9 | // CCtx compilation context
10 | //
11 | // TODO: Rename to "Build" ("the build")
12 | //
13 | typedef struct {
14 |   ErrorHandler* errh;
15 |   void*         userdata; // passed to errh
16 |   Source        src;
17 |   Memory        mem; // memory used only during compilation, like AST nodes
18 | } CCtx;
19 | 
20 | // initialize and/or recycle a CCtx
21 | void CCtxInit(
22 |   CCtx*,
23 |   ErrorHandler* errh,
24 |   void*         userdata,
25 |   Str           srcname,
26 |   const u8*     srcbuf,  // caller owns
27 |   size_t        srclen
28 | );
29 | void CCtxFree(CCtx*);
30 | void CCtxErrorf(const CCtx* cc, SrcPos pos, const char* format, ...);
31 | 


--------------------------------------------------------------------------------
/src/build/buildctx.c:
--------------------------------------------------------------------------------
 1 | // CCtx compilation context
 2 | #include "build.h"
 3 | 
 4 | // reset and/or initialize a compilation context
 5 | void CCtxInit(
 6 |   CCtx*         cc,
 7 |   ErrorHandler* errh,
 8 |   void*         userdata,
 9 |   Str           srcname,
10 |   const u8*     srcbuf,
11 |   size_t        srclen
12 | ) {
13 |   // Disabled since srcbuf is owned by caller
14 |   // if (cc->src.buf != NULL) {
15 |   //   free((void*)cc->src.buf);
16 |   //   cc->src.buf = NULL;
17 |   // }
18 |   if (cc->src.name != NULL) {
19 |     SourceFree(&cc->src);
20 |   }
21 |   SourceInit(&cc->src, srcname, srcbuf, srclen);
22 |   cc->mem = MemoryNew(0);
23 |   cc->errh = errh;
24 |   cc->userdata = userdata;
25 | }
26 | 
27 | 
28 | void CCtxFree(CCtx* cc) {
29 |   // if (cc->src.buf != NULL) {
30 |   //   free((void*)cc->src.buf);
31 |   //   cc->src.buf = NULL;
32 |   // }
33 |   SourceFree(&cc->src);
34 |   MemoryFree(cc->mem);
35 | }
36 | 
37 | 
38 | void CCtxErrorf(const CCtx* cc, SrcPos pos, const char* format, ...) {
39 |   if (cc->errh == NULL) {
40 |     return;
41 |   }
42 |   va_list ap;
43 |   va_start(ap, format);
44 |   auto msg = sdsempty();
45 |   if (strlen(format) > 0) {
46 |     msg = sdscatvprintf(msg, format, ap);
47 |     assert(sdslen(msg) > 0); // format may contain %S which is not supported by sdscatvprintf
48 |   }
49 |   va_end(ap);
50 |   cc->errh(&cc->src, pos, msg, cc->userdata);
51 |   sdsfree(msg);
52 | }
53 | 


--------------------------------------------------------------------------------
/src/build/source.c:
--------------------------------------------------------------------------------
  1 | #include "build.h"
  2 | #include "../common/tstyle.h"
  3 | 
  4 | 
  5 | void SourceInit(Source* s, Str name, const u8* buf, size_t len) {
  6 |   s->name = sdsdup(name);
  7 |   s->buf = buf;
  8 |   s->len = len;
  9 |   s->_lineoffsets = NULL;
 10 |   s->_linecount = 0;
 11 | }
 12 | 
 13 | 
 14 | void SourceFree(Source* s) {
 15 |   sdsfree(s->name);
 16 |   s->name = NULL;
 17 |   if (s->_lineoffsets) {
 18 |     memfree(NULL, s->_lineoffsets);
 19 |     s->_lineoffsets = NULL;
 20 |   }
 21 | }
 22 | 
 23 | 
 24 | static void computeLineOffsets(Source* s) {
 25 |   assert(s->_lineoffsets == NULL);
 26 | 
 27 |   size_t cap = 256; // best guess for common line numbers, to allocate up-front
 28 |   s->_lineoffsets = (u32*)memalloc(NULL, sizeof(u32) * cap);
 29 |   s->_lineoffsets[0] = 0;
 30 | 
 31 |   u32 linecount = 1;
 32 |   u32 i = 0;
 33 |   while (i < s->len) {
 34 |     if (s->buf[i++] == '\n') {
 35 |       if (linecount == cap) {
 36 |         // more lines
 37 |         cap = cap * 2;
 38 |         s->_lineoffsets = (u32*)memrealloc(NULL, s->_lineoffsets, sizeof(u32) * cap);
 39 |       }
 40 |       s->_lineoffsets[linecount] = i;
 41 |       linecount++;
 42 |     }
 43 |   }
 44 | 
 45 |   s->_linecount = linecount;
 46 | }
 47 | 
 48 | 
 49 | LineCol SrcPosLineCol(SrcPos pos) {
 50 |   Source* s = pos.src;
 51 |   if (s == NULL) {
 52 |     // NoSrcPos
 53 |     LineCol lico = { 0, 0 };
 54 |     return lico;
 55 |   }
 56 | 
 57 |   if (!s->_lineoffsets) {
 58 |     computeLineOffsets(s);
 59 |   }
 60 | 
 61 |   if (pos.offs >= s->len) { dlog("pos.offs=%u >= s->len=%zu", pos.offs, s->len); }
 62 |   assert(pos.offs < s->len);
 63 | 
 64 |   u32 count = s->_linecount;
 65 |   u32 line = 0;
 66 |   u32 debug1 = 10;
 67 |   while (count > 0 && debug1--) {
 68 |     u32 step = count / 2;
 69 |     u32 i = line + step;
 70 |     if (s->_lineoffsets[i] <= pos.offs) {
 71 |       line = i + 1;
 72 |       count = count - step - 1;
 73 |     } else {
 74 |       count = step;
 75 |     }
 76 |   }
 77 |   LineCol lico = { line - 1, line > 0 ? pos.offs - s->_lineoffsets[line - 1] : pos.offs };
 78 |   return lico;
 79 | }
 80 | 
 81 | 
 82 | static const u8* lineContents(Source* s, u32 line, u32* out_len) {
 83 |   if (!s->_lineoffsets) {
 84 |     computeLineOffsets(s);
 85 |   }
 86 |   if (line >= s->_linecount) {
 87 |     return NULL;
 88 |   }
 89 |   auto start = s->_lineoffsets[line];
 90 |   const u8* lineptr = s->buf + start;
 91 |   if (out_len) {
 92 |     if (line + 1 < s->_linecount) {
 93 |       *out_len = (s->_lineoffsets[line + 1] - 1) - start;
 94 |     } else {
 95 |       *out_len = (s->buf + s->len) - lineptr;
 96 |     }
 97 |   }
 98 |   return lineptr;
 99 | }
100 | 
101 | 
102 | Str SrcPosFmt(Str s, SrcPos pos) {
103 |   auto l = SrcPosLineCol(pos);
104 |   return sdscatfmt(s, "%s:%u:%u",
105 |     pos.src ? pos.src->name : sdsnew("<input>"), l.line + 1, l.col + 1);
106 | }
107 | 
108 | 
109 | Str SrcPosMsg(Str s, SrcPos pos, ConstStr message) {
110 |   auto l = SrcPosLineCol(pos);
111 |   s = sdscatfmt(s, "%s%s:%u:%u: %S%s\n",
112 |     TStyleTable[TStyle_bold],
113 |     pos.src ? pos.src->name : sdsnew("<input>"), l.line + 1, l.col + 1,
114 |     message,
115 |     TStyle_none
116 |   );
117 |   s = TStyleNone(s);
118 | 
119 |   // include line contents
120 |   if (pos.src) {
121 |     u32 linelen;
122 |     auto lineptr = lineContents(pos.src, l.line, &linelen);
123 |     if (lineptr != null) {
124 |       s = sdscatlen(s, lineptr, linelen);
125 |     }
126 |     s = sdscatlen(s, "\n", 1);
127 | 
128 |     // draw a squiggle (or caret when span is unknown) decorating the interesting range
129 |     if (l.col > 0) {
130 |       // indentation
131 |       s = sdsgrow(s, sdslen(s) + l.col, ' ');
132 |     }
133 |     if (pos.span > 0) {
134 |       s = sdsgrow(s, sdslen(s) + pos.span + 1, '~');
135 |       s[sdslen(s)-1] = '\n';
136 |     } else {
137 |       s = sdscatlen(s, "^\n", 2);
138 |     }
139 |   }
140 | 
141 |   return s;
142 | }
143 | 
144 | 


--------------------------------------------------------------------------------
/src/build/source.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "../common/defs.h"
 3 | #include "../common/str.h"
 4 | 
 5 | // Source
 6 | typedef struct {
 7 |   Str       name;
 8 |   const u8* buf;       // owned by caller
 9 |   size_t    len;       // length of buf
10 |   u32*      _lineoffsets;
11 |   u32       _linecount;
12 | } Source;
13 | 
14 | // SrcPos
15 | // TODO: considering implementing something like lico and Pos/XPos from go
16 | //       https://golang.org/src/cmd/internal/src/pos.go
17 | //       https://golang.org/src/cmd/internal/src/xpos.go
18 | typedef struct {
19 |   Source* src;   // source
20 |   u32     offs;  // offset into src->buf
21 |   u32     span;  // span length. 0 = unknown or does no apply.
22 | } SrcPos;
23 | 
24 | // NoSrcPos is the "null" of SrcPos
25 | #define NoSrcPos (({ SrcPos p = {NULL,0,0}; p; }))
26 | 
27 | // LineCol
28 | typedef struct { u32 line; u32 col; } LineCol;
29 | 
30 | void SourceInit(Source*, Str name, const u8* buf, size_t len);
31 | void SourceFree(Source*);
32 | Str SrcPosMsg(Str s, SrcPos, ConstStr message);
33 | Str SrcPosFmt(Str s, SrcPos pos); // "<file>:<line>:<col>"
34 | LineCol SrcPosLineCol(SrcPos);
35 | 


--------------------------------------------------------------------------------
/src/common/array.c:
--------------------------------------------------------------------------------
 1 | #include "array.h"
 2 | #include <stdlib.h> // for qsort_r
 3 | 
 4 | // ARRAY_CAP_STEP defines a power-of-two which the cap must be aligned to.
 5 | // This is used to round up growth. I.e. grow by 60 with a cap of 32 would increase the cap
 6 | // to 96 (= 32 + (align2(60, ARRAY_CAP_STEP=32) = 64)).
 7 | #define ARRAY_CAP_STEP 32
 8 | 
 9 | typedef struct SortCtx {
10 |   ArraySortFun* f;
11 |   void*         userdata;
12 | } SortCtx;
13 | 
14 | 
15 | static int _sort(void* ctx, const void* s1p, const void* s2p) {
16 |   return ((SortCtx*)ctx)->f(
17 |     *((const void**)s1p),
18 |     *((const void**)s2p),
19 |     ((SortCtx*)ctx)->userdata
20 |   );
21 | }
22 | 
23 | void ArraySort(Array* a, ArraySortFun* f, void* userdata) {
24 |   SortCtx ctx = { f, userdata };
25 |   qsort_r(a->v, a->len, sizeof(void*), &ctx, &_sort);
26 | }
27 | 
28 | 
29 | void ArrayGrow(Array* a, size_t addl, Memory mem) {
30 |   u32 reqcap = a->cap + addl;
31 |   u32 cap = align2(reqcap, ARRAY_CAP_STEP);
32 |   if (a->onheap || a->v == NULL) {
33 |     a->v = memrealloc(mem, a->v, sizeof(void*) * cap);
34 |   } else {
35 |     // moving array from stack to heap
36 |     void** v = (void**)memalloc(mem, sizeof(void*) * cap);
37 |     memcpy(v, a->v, sizeof(void*) * a->len);
38 |     a->v = v;
39 |     a->onheap = true;
40 |   }
41 |   a->cap = cap;
42 | }
43 | 
44 | int ArrayIndexOf(Array* nonull a, void* nullable entry) {
45 |   for (u32 i = 0; i < a->len; i++) {
46 |     if (a->v[i] == entry) {
47 |       return (int)i;
48 |     }
49 |   }
50 |   return -1;
51 | }
52 | 
53 | void ArrayRemove(Array* a, u32 start, u32 count) {
54 |   assert(start + count <= a->len);
55 |   // ArrayRemove( [0 1 2 3 4 5 6 7] start=2 count=3 ) => [0 1 5 6 7]
56 |   //
57 |   for (u32 i = start + count; i < a->len; i++) {
58 |     a->v[i - count] = a->v[i];
59 |   }
60 |   // [0 1 2 3 4 5 6 7]   a->v[5-3] = a->v[5]  =>  [0 1 5 3 4 5 6 7]
61 |   //      ^     i
62 |   //
63 |   // [0 1 2 3 4 5 6 7]   a->v[6-3] = a->v[6]  =>  [0 1 5 6 4 5 6 7]
64 |   //        ^     i
65 |   //
66 |   // [0 1 2 3 4 5 6 7]   a->v[7-3] = a->v[7]  =>  [0 1 5 6 7 5 6 7]
67 |   //          ^     i
68 |   //
69 |   // len -= count                             =>  [0 1 5 6 7]
70 |   a->len -= count;
71 | }
72 | 
73 | 
74 | // ArrayCopy copies src of srclen to a, starting at a.v[start], growing a if needed using m.
75 | void ArrayCopy(Array* nonull a, u32 start, const void* src, u32 srclen, Memory nullable mem) {
76 |   u32 capNeeded = start + srclen;
77 |   if (capNeeded > a->cap) {
78 |     if (a->v == NULL) {
79 |       // initial allocation to exactly the size needed
80 |       a->v = (void*)memalloc(mem, sizeof(void*) * capNeeded);
81 |       a->cap = capNeeded;
82 |       a->onheap = true;
83 |     } else {
84 |       ArrayGrow(a, capNeeded - a->cap, mem);
85 |     }
86 |   }
87 |   memcpy(&a->v[start], src, srclen * sizeof(void*));
88 |   a->len = max(a->len, start + srclen);
89 | }
90 | 


--------------------------------------------------------------------------------
/src/common/array.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "defs.h"
  3 | #include "memory.h"
  4 | 
  5 | // very simple array type
  6 | typedef struct {
  7 |   void** v;
  8 |   u32    cap;
  9 |   u32    len;
 10 |   bool   onheap;  // false if v is space on stack
 11 | } Array;
 12 | 
 13 | #define Array_INIT { NULL, 0, 0, true }
 14 | 
 15 | static void  ArrayInit(Array* nonull a);
 16 | static void  ArrayInitWithStorage(Array* nonull a, void* nonull storage, u32 storagecap);
 17 | static void  ArrayFree(Array* nonull a, Memory nullable mem);
 18 | void         ArrayGrow(Array* nonull a, size_t addl, nullable Memory mem); // cap=align2(len+addl)
 19 | static void  ArrayPush(Array* nonull a, void* nullable v, Memory nullable mem);
 20 | static void* ArrayPop(Array* nonull a);
 21 | void         ArrayRemove(Array* nonull a, u32 start, u32 count);
 22 | int          ArrayIndexOf(Array* nonull a, void* nullable entry); // -1 on failure
 23 | 
 24 | // ArrayCopy copies src of srclen to a, starting at a.v[start], growing a if needed using m.
 25 | void ArrayCopy(Array* nonull a, u32 start, const void* src, u32 srclen, Memory nullable m);
 26 | 
 27 | // The comparison function must return an integer less than, equal to, or greater than zero if
 28 | // the first argument is considered to be respectively less than, equal to, or greater than the
 29 | // second.
 30 | typedef int (ArraySortFun)(const void* elem1, const void* elem2, void* userdata);
 31 | 
 32 | // ArraySort sorts the array in place using comparator to rank entries
 33 | void ArraySort(Array* a, ArraySortFun* comparator, void* userdata);
 34 | 
 35 | // Macros:
 36 | //   ArrayForEach(Array* nonull a, TYPE elemtype, NAME elemname) <body>
 37 | //
 38 | 
 39 | // ------------------------------------------------------------------------------------------------
 40 | // inline implementations
 41 | 
 42 | inline static void ArrayInit(Array* nonull a) {
 43 |   a->v = 0;
 44 |   a->cap = 0;
 45 |   a->len = 0;
 46 |   a->onheap = true;
 47 | }
 48 | 
 49 | inline static void ArrayInitWithStorage(Array* nonull a, void* nonull ptr, u32 cap){
 50 |   a->v = ptr;
 51 |   a->cap = cap;
 52 |   a->len = 0;
 53 |   a->onheap = false;
 54 | }
 55 | 
 56 | inline static void ArrayFree(Array* a, Memory mem) {
 57 |   if (a->onheap) {
 58 |     memfree(mem, a->v);
 59 | 
 60 |     #if DEBUG
 61 |     a->v = NULL;
 62 |     a->cap = 0;
 63 |     #endif
 64 |   }
 65 | }
 66 | 
 67 | inline static void ArrayPush(Array* a, void* v, Memory mem) {
 68 |   if (a->len == a->cap) {
 69 |     ArrayGrow(a, 1, mem);
 70 |   }
 71 |   a->v[a->len++] = v;
 72 | }
 73 | 
 74 | inline static void* ArrayPop(Array* a) {
 75 |   return a->len > 0 ? a->v[--a->len] : NULL;
 76 | }
 77 | 
 78 | #define ArrayForEach(a, ELEMTYPE, LOCALNAME)        \
 79 |   /* this for introduces LOCALNAME */               \
 80 |   for (auto LOCALNAME = (ELEMTYPE*)(a)->v[0];       \
 81 |        LOCALNAME == (ELEMTYPE*)(a)->v[0];           \
 82 |        LOCALNAME++)                                 \
 83 |   /* actual for loop */                             \
 84 |   for (                                             \
 85 |     u32 LOCALNAME##__i = 0,                         \
 86 |         LOCALNAME##__end = (a)->len;                \
 87 |     LOCALNAME = (ELEMTYPE*)(a)->v[LOCALNAME##__i],  \
 88 |     LOCALNAME##__i < LOCALNAME##__end;              \
 89 |     LOCALNAME##__i++                                \
 90 |   ) /* <body should follow here> */
 91 | 
 92 | 
 93 | // static void ArrayInit(Array* a) {
 94 | //   a->v = NULL;
 95 | //   a->cap = a->len = 0;
 96 | //   a->onheap = true;
 97 | // }
 98 | 
 99 | // Better to use Array_INIT
100 | // Array_STACK_INIT(u32 capacity) => Array
101 | // #define Array_STACK_INIT(capacity) (({ \
102 | //   void* __ArrayStackStorage__##__LINE__[capacity]; \
103 | //   Array a = { __ArrayStackStorage__##__LINE__, 0, (capacity), false }; \
104 | //   a; \
105 | // }))
106 | 
107 | 
108 | // #define ArrayForEach1(a, ELEMTYPE, ELEMNAME, body)                                        \
109 | //   do { for (u32 __i_ArrayForEach = 0; __i_ArrayForEach < (a)->len; __i_ArrayForEach++) { \
110 | //     ELEMTYPE* ELEMNAME = (ELEMTYPE*)(a)->v[__i_ArrayForEach];                            \
111 | //     { body }                                                                             \
112 | //   } } while(0)
113 | 


--------------------------------------------------------------------------------
/src/common/array_test.c:
--------------------------------------------------------------------------------
  1 | #include "test.h"
  2 | #include "array.h"
  3 | 
  4 | #define ARRAY_CAP_STEP 32 /* copied from array.c */
  5 | 
  6 | W_UNIT_TEST(Array, {
  7 | 
  8 |   { // starts empty and immediately becomes fully heap allocated
  9 |     Array a = Array_INIT;
 10 |     ArrayPush(&a, (void*)1, NULL); // visits ArrayGrow's "onheap" branch
 11 |     ArrayPush(&a, (void*)2, NULL);
 12 |     ArrayPush(&a, (void*)3, NULL);
 13 | 
 14 |     asserteq(a.len, 3);
 15 |     asserteq(a.cap, ARRAY_CAP_STEP);
 16 |     asserteq((int)a.v[0], 1);
 17 |     asserteq((int)a.v[1], 2);
 18 |     asserteq((int)a.v[2], 3);
 19 | 
 20 |     asserteq(ArrayIndexOf(&a, (void*)2), 1);
 21 |     asserteq(ArrayIndexOf(&a, (void*)4), -1);
 22 | 
 23 |     asserteq((int)ArrayPop(&a), 3);
 24 |     asserteq((int)ArrayPop(&a), 2);
 25 |     asserteq((int)ArrayPop(&a), 1);
 26 | 
 27 |     asserteq(a.len, 0);
 28 |     asserteq(a.cap, ARRAY_CAP_STEP);
 29 |     ArrayFree(&a, NULL);
 30 |   }
 31 | 
 32 |   { // initially stack allocated, then moves to heap
 33 |     Array a; void* storage[2];
 34 |     ArrayInitWithStorage(&a, storage, 2);
 35 |     asserteq(a.onheap, false);
 36 |     ArrayPush(&a, (void*)1, NULL);
 37 |     asserteq(a.onheap, false);
 38 |     ArrayPush(&a, (void*)2, NULL);
 39 |     asserteq(a.onheap, false);
 40 |     ArrayPush(&a, (void*)3, NULL);  // visits ArrayGrow's "move stack to heap" branch
 41 |     asserteq(a.onheap, true); // should have moved to heap
 42 | 
 43 |     asserteq(a.len, 3);
 44 |     asserteq(a.cap, ARRAY_CAP_STEP);
 45 |     asserteq((int)a.v[0], 1);
 46 |     asserteq((int)a.v[1], 2);
 47 |     asserteq((int)a.v[2], 3);
 48 |     asserteq((int)ArrayPop(&a), 3);
 49 |     asserteq((int)ArrayPop(&a), 2);
 50 |     asserteq((int)ArrayPop(&a), 1);
 51 |     asserteq(a.len, 0);
 52 |     asserteq(a.cap, ARRAY_CAP_STEP);
 53 |     ArrayFree(&a, NULL);
 54 |   }
 55 | 
 56 |   { // ArrayCopy
 57 |     Array a = Array_INIT;
 58 |     for (intptr_t i = 0; i < 10; i++) {
 59 |       ArrayPush(&a, (void*)i, NULL);
 60 |     }
 61 |     // copy to an empty array. Causes initial, exact allocation
 62 |     Array a2 = Array_INIT;
 63 |     ArrayCopy(&a2, 0, a.v, a.len, NULL);
 64 |     asserteq(a2.len, 10);
 65 |     asserteq(a2.cap, 10); // should be exact after copy into empty array, not ARRAY_CAP_STEP
 66 |     ArrayPush(&a2, (void*)10, NULL);
 67 |     asserteq(a2.cap, align2(11, ARRAY_CAP_STEP)); // should have grown
 68 | 
 69 |     // copy to a non-empty array. Causes growth
 70 |     u32 nitems = (a2.cap - a2.len) + 1;
 71 |     auto items = (void**)memalloc(NULL, nitems * sizeof(void*));
 72 |     auto len1 = a2.len;
 73 |     ArrayCopy(&a2, len1, items, nitems, NULL);
 74 |     asserteq(a2.len, len1 + nitems);
 75 |     memfree(NULL, items);
 76 | 
 77 |     ArrayFree(&a2, NULL);
 78 |     ArrayFree(&a, NULL);
 79 |   }
 80 | 
 81 |   { // ArrayRemove
 82 |     Array a = Array_INIT;
 83 |     // a.v = [0 1 2 3 4 5 6 7 8 9]
 84 |     for (intptr_t i = 0; i < 10; i++) {
 85 |       ArrayPush(&a, (void*)i, NULL);
 86 |     }
 87 |     for (intptr_t i = 0; i < 10; i++) {
 88 |       asserteq(a.v[i], (void*)i);
 89 |     }
 90 |     asserteq(a.len, 10);
 91 | 
 92 |     // delete in middle
 93 |     // [0 1 2 3 4 5 6 7 8 9] => [0 1 6 7 8 9]
 94 |     //      ~~~~~~~
 95 |     Array a2 = Array_INIT;
 96 |     ArrayCopy(&a2, 0, a.v, a.len, NULL);
 97 |     asserteq(a2.len, 10);
 98 |     ArrayRemove(&a2, 2, 4);
 99 |     asserteq(a2.len, 6);
100 |     asserteq(a2.v[0], (void*)0);
101 |     asserteq(a2.v[1], (void*)1);
102 |     asserteq(a2.v[2], (void*)6);
103 |     asserteq(a2.v[3], (void*)7);
104 |     asserteq(a2.v[4], (void*)8);
105 |     asserteq(a2.v[5], (void*)9);
106 | 
107 |     // delete at beginning
108 |     // [0 1 2 3 4 5 6 7 8 9] => [4 5 6 7 8 9]
109 |     //  ~~~~~~~
110 |     a2.len = 0;
111 |     ArrayCopy(&a2, 0, a.v, a.len, NULL);
112 |     asserteq(a2.len, 10);
113 |     ArrayRemove(&a2, 0, 4);
114 |     asserteq(a2.len, 6);
115 |     asserteq(a2.v[0], (void*)4);
116 |     asserteq(a2.v[1], (void*)5);
117 |     asserteq(a2.v[2], (void*)6);
118 |     asserteq(a2.v[3], (void*)7);
119 |     asserteq(a2.v[4], (void*)8);
120 |     asserteq(a2.v[5], (void*)9);
121 | 
122 |     // delete at end
123 |     // [0 1 2 3 4 5 6 7 8 9] => [0 1 2 3 4 5]
124 |     //              ~~~~~~~
125 |     a2.len = 0;
126 |     ArrayCopy(&a2, 0, a.v, a.len, NULL);
127 |     asserteq(a2.len, 10);
128 |     ArrayRemove(&a2, 6, 4);
129 |     asserteq(a2.len, 6);
130 |     asserteq(a2.v[0], (void*)0);
131 |     asserteq(a2.v[1], (void*)1);
132 |     asserteq(a2.v[2], (void*)2);
133 |     asserteq(a2.v[3], (void*)3);
134 |     asserteq(a2.v[4], (void*)4);
135 |     asserteq(a2.v[5], (void*)5);
136 | 
137 |     ArrayFree(&a2, NULL);
138 |     ArrayFree(&a, NULL);
139 |   }
140 | 
141 | })
142 | 


--------------------------------------------------------------------------------
/src/common/assert.c:
--------------------------------------------------------------------------------
  1 | #include <execinfo.h>
  2 | 
  3 | #include "assert.h"
  4 | #include "os.h"
  5 | #include "tstyle.h"
  6 | 
  7 | #include "test.h"
  8 | 
  9 | 
 10 | static bool fprintSourceFile(
 11 |   FILE* nonull fp,
 12 |   const char* nonull file,
 13 |   u32 line,
 14 |   u32 contextLines,
 15 |   bool colors
 16 | ) {
 17 |   // try to read source file
 18 |   size_t srclen = 1024*1024; // read limit
 19 |   auto srcbuf = os_readfile(file, &srclen, NULL);
 20 |   if (srcbuf == NULL) {
 21 |     return false;
 22 |   }
 23 |   int len = (int)srclen;
 24 |   int lineno = 1;
 25 |   int linemin = max(0, line - contextLines);
 26 |   int linemax = line + contextLines;
 27 |   int start = -1;
 28 |   int end = -1;
 29 |   int linestart = 0;
 30 |   bool tail = true;
 31 | 
 32 |   for (int i = 0; i < len; i++) {
 33 |     if (srcbuf[i] == '\n') {
 34 |       if (lineno == linemin) {
 35 |         start = linestart;
 36 |       }
 37 |       if (lineno == line) {
 38 |         fprintf(fp, "%s%-4d >%s %.*s\n",
 39 |           colors ? TStyleTable[TStyle_inverse] : "",
 40 |           lineno,
 41 |           colors ? TStyle_none : "",
 42 |           i - linestart,
 43 |           &srcbuf[linestart]
 44 |         );
 45 |       } else if (linemin <= lineno && lineno <= linemax) {
 46 |         fprintf(fp, "%-4d   %.*s\n", lineno, i - linestart, &srcbuf[linestart]);
 47 |       }
 48 |       if (lineno == linemax) {
 49 |         end = i;
 50 |         tail = false;
 51 |         break;
 52 |       }
 53 |       lineno++;
 54 |       linestart = i + 1;
 55 |     }
 56 |   }
 57 |   if (tail) { // no linebreak at end of file
 58 |     fprintf(fp, "% 4d   %.*s\n", lineno, len - linestart, &srcbuf[linestart]);
 59 |   }
 60 |   return true;
 61 | }
 62 | 
 63 | 
 64 | static void fprintStackTrace(FILE* nonull fp, int offsetFrames) {
 65 |   // try to show stack trace
 66 |   void* callstack[200];
 67 |   int framecount = backtrace(callstack, countof(callstack));
 68 |   if (framecount > 0) {
 69 |     char** strs = backtrace_symbols(callstack, framecount);
 70 |     if (strs != NULL) {
 71 |       fprintf(fp, "Call stack:\n");
 72 |       for (int i = offsetFrames + 1; i < framecount; ++i) {
 73 |         fprintf(fp, "  %s\n", strs[i]);
 74 |       }
 75 |       free(strs);
 76 |     }
 77 |   }
 78 | }
 79 | 
 80 | 
 81 | void WAssertf(const char* srcfile, int srcline, const char* nonull format, ...) {
 82 |   bool colors = TSTyleStderrIsTTY();
 83 |   va_list ap;
 84 |   va_start(ap, format);
 85 |   vfprintf(stderr, format, ap);
 86 |   va_end(ap);
 87 |   fputc('\n', stderr);
 88 |   if (srcfile != NULL) {
 89 |     fprintSourceFile(stderr, srcfile, srcline, /* contextLines */ 3, colors);
 90 |   }
 91 |   fprintStackTrace(stderr, /* offsetFrames = */ 1);
 92 | }
 93 | 
 94 | 
 95 | const char* _assert_joinstr(const char* s1, ... /* NULL terminated */) {
 96 |   static char buf[256] = {0};
 97 |   char* p = buf;
 98 | 
 99 |   size_t len = strlen(s1);
100 |   memcpy(p, s1, len);
101 |   p += len;
102 | 
103 |   va_list ap;
104 |   va_start(ap, s1);
105 |   while (1) {
106 |     const char* s = va_arg(ap, const char*);
107 |     if (s == NULL) {
108 |       break;
109 |     }
110 |     len = strlen(s);
111 |     memcpy(p, s, len);
112 |     p += len;
113 |     *p = '\0';
114 |   }
115 |   va_end(ap);
116 |   assertf((buf + sizeof(buf)) > p, "overflow");
117 |   *p = '\0';
118 |   return buf;
119 | }
120 | 
121 | 
122 | // Note: Since this prints to stderr, only enable this in the "test" product
123 | #ifdef W_TEST_BUILD
124 |   W_UNIT_TEST(Assert, {
125 |     const char* pch = _assert_joinstr("aa", "bb", "cc", NULL);
126 |     assert(memcmp(pch, "aabbcc", 6) == 0);
127 |     asserteq(pch[6], 0);
128 | 
129 |     // non-existant file
130 |     fprintf(stdout, "----- START TEST OUTPUT -----\n");
131 |     fprintSourceFile(stdout, __FILE__ ".xxx", 1, /* contextLines */ 3, /*colors*/ true);
132 |     // no colors, no linebreak at end
133 |     fprintSourceFile(stdout, "test/file-no-final-line-break",
134 |       2, /* contextLines */ 3, /*colors*/ false);
135 | 
136 |     fprintf(stdout, "----- THE BELOW ASSERTION IS EXPECTED TO FAIL -----\n");
137 |     WAssertf(__FILE__, __LINE__, "%s:%d: test %d", __FILE__, __LINE__, 123);
138 |     fprintf(stdout, "----- THE ABOVE ASSERTION IS EXPECTED TO FAIL -----\n");
139 |     fprintf(stdout, "----- END TEST OUTPUT -----\n");
140 |   })
141 | #endif
142 | 


--------------------------------------------------------------------------------
/src/common/assert.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "defs.h"
 3 | #include <assert.h>
 4 | //
 5 | // assertion testing
 6 | //
 7 | // assert((bool)cond)
 8 | //   prints error and calls abort() if cond is false
 9 | //   debug builds: active
10 | //   opt build:    eliminated
11 | //
12 | // assertf((bool)cond, const char* format, ...)
13 | //   prints format with arguments and calls abort() if cond is false.
14 | //   debug builds: active
15 | //   opt build:    eliminated
16 | //
17 | // checknull<T>(T expr) -> T
18 | //   evaluates expr and if the result is null, calls assert(result). Returns result.
19 | //   debug builds: active
20 | //   opt build:    pass-through
21 | //
22 | 
23 | // WAssertf prints message with format, including a stack trace if available.
24 | // If srcfile != NULL, attempts to print source code around srcline.
25 | void WAssertf(const char* nullable srcfile, int srcline, const char* nonull format, ...);
26 | 
27 | #ifdef assert
28 |   #undef assert
29 | #endif
30 | 
31 | #ifdef DEBUG
32 |   #define assertf(cond, format, ...)                                                     \
33 |     ({ if (!(cond)) {                                                                    \
34 |       WAssertf(__FILE__, __LINE__, "%s:%d: " format, __FILE__, __LINE__, ##__VA_ARGS__); \
35 |       abort();                                                                           \
36 |     } })
37 | 
38 |   #define assert(cond)                                                      \
39 |     ({ if (!(cond)) {                                                       \
40 |       WAssertf(__FILE__, __LINE__, "%s:%d: %s", __FILE__, __LINE__, #cond); \
41 |       abort();                                                              \
42 |     } })
43 | 
44 |   const char* _assert_joinstr(const char* s1, ... /* NULL terminated */);
45 | 
46 |   #define asserteq(expr, expect)                                           \
47 |     ({ auto actual = (expr);                                               \
48 |        auto expected = (expect);                                           \
49 |       if (actual != expected) {                                            \
50 |         WAssertf(__FILE__, __LINE__,                                       \
51 |           _assert_joinstr("%s:%d: %s ; got ", WFormatForValue(actual),     \
52 |                           ", expected ", WFormatForValue(expected), NULL), \
53 |           __FILE__, __LINE__, #expr, actual, expected);                    \
54 |         abort();                                                           \
55 |       }                                                                    \
56 |     })
57 | 
58 |   #define checknull(expr) \
59 |     ({ auto v = (expr); assert(v != NULL); v; })
60 | 
61 | #else
62 |   #define assertf(cond, format, ...) do{}while(0)
63 |   #define assert(cond, ...)          do{}while(0)
64 |   #define asserteq(expr, expect)     do{}while(0)
65 |   #define checknull(expr)            expr
66 | #endif
67 | 
68 | 


--------------------------------------------------------------------------------
/src/common/buf.c:
--------------------------------------------------------------------------------
 1 | #include "defs.h"
 2 | #include "memory.h"
 3 | #include "buf.h"
 4 | 
 5 | // Do not allocate more than this much extra memory in a call to _BufMakeRoomFor
 6 | #define BUF_MAX_PREALLOC (1024*1024)
 7 | 
 8 | void BufInit(Buf* b, Memory mem, size_t cap) {
 9 |   b->mem = mem;
10 |   if (cap > 0) {
11 |     b->ptr = (u8*)memalloc(mem, cap);
12 |   } else {
13 |     b->ptr = NULL;
14 |   }
15 |   b->cap = cap;
16 |   b->len = 0;
17 | }
18 | 
19 | void BufFree(Buf* b) {
20 |   if (b->ptr != NULL) {
21 |     memfree(b->mem, b->ptr);
22 |   }
23 |   #if DEBUG
24 |   memset(b, 0, sizeof(Buf));
25 |   #endif
26 | }
27 | 
28 | void _BufMakeRoomFor(Buf* b, size_t size) {
29 |   size_t cap = align2(b->len + size, 32);
30 |   // Anticipate growing more; allocate some extra space beyond what is needed:
31 |   if (cap < BUF_MAX_PREALLOC) {
32 |     cap *= 2;
33 |   } else {
34 |     // Reached the limit of preallocation size.
35 |     // Instead of doubling the allocating, add on a constant.
36 |     cap += BUF_MAX_PREALLOC;
37 |   }
38 |   b->ptr = memrealloc(b->mem, b->ptr, cap);
39 |   b->cap = cap;
40 | }
41 | 
42 | // Adds a string to the string table. Returns the strtab offset.
43 | void BufAppend(Buf* b, const void* ptr, size_t size) {
44 |   BufMakeRoomFor(b, size);
45 |   memcpy(&b->ptr[b->len], ptr, size);
46 |   b->len += size;
47 | }
48 | 
49 | u8* BufAlloc(Buf* b, size_t size) {
50 |   BufMakeRoomFor(b, size);
51 |   u8* ptr = &b->ptr[b->len];
52 |   b->len += size;
53 |   return ptr;
54 | }
55 | 
56 | u8* BufAllocz(Buf* b, size_t size) {
57 |   BufMakeRoomFor(b, size);
58 |   u8* ptr = &b->ptr[b->len];
59 |   memset(&b->ptr[b->len], 0, size);
60 |   b->len += size;
61 |   return ptr;
62 | }
63 | 
64 | void BufAppendFill(Buf* b, u8 v, size_t size) {
65 |   BufMakeRoomFor(b, size);
66 |   memset(&b->ptr[b->len], v, size);
67 |   b->len += size;
68 | }


--------------------------------------------------------------------------------
/src/common/buf.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | typedef struct Buf {
 4 |   Memory mem;
 5 |   u8*    ptr;
 6 |   size_t cap;
 7 |   size_t len;
 8 | } Buf;
 9 | 
10 | void BufInit(Buf* nonull b, Memory nullable mem, size_t cap);
11 | void BufFree(Buf* nonull b);
12 | static void BufMakeRoomFor(Buf* nonull b, size_t size); // ensures free space for at least size
13 | void BufAppend(Buf* nonull b, const void* nonull ptr, size_t size);
14 | void BufAppendFill(Buf* nonull b, u8 v, size_t size); // append size bytes of value v
15 | static void BufAppendc(Buf* b, char c); // append one byte
16 | u8*  BufAlloc(Buf* nonull b, size_t size); // like BufAppend but leaves allocated data untouched.
17 | u8*  BufAllocz(Buf* nonull b, size_t size); // zeroes segment
18 | 
19 | void _BufMakeRoomFor(Buf* b, size_t size);
20 | 
21 | inline static void BufAppendc(Buf* b, char c) {
22 |   if (b->cap <= b->len) { _BufMakeRoomFor(b, 1); }
23 |   b->ptr[b->len++] = (u8)c;
24 | }
25 | 
26 | inline static void BufMakeRoomFor(Buf* b, size_t size) {
27 |   if (b->cap - b->len < size) {
28 |     _BufMakeRoomFor(b, size);
29 |   }
30 | }
31 | 
32 | 
33 | // DefArrayBuffer allows defining a type and a set of porcelain functions around Buf
34 | // to make it act as an array holding elements of any type (ElemT).
35 | //
36 | // Prototypes:
37 | //
38 | //  void   ArrayT##Init(ArrayT* nonull a, Memory nullable mem, size_t cap);
39 | //  void   ArrayT##Free(ArrayT* nonull a);
40 | //  ElemT* ArrayT##At(ArrayT* nonull a, size_t index);
41 | //  void   ArrayT##Push(ArrayT* nonull a, ElemT v);
42 | //  ElemT  ArrayT##Pop(ArrayT* nonull a);
43 | //  ElemT* ArrayT##Alloc(ArrayT* nonull a, size_t count);
44 | //  void   ArrayT##MakeRoomFor(ArrayT* nonull a, size_t count);
45 | //
46 | #define DefArrayBuffer(ArrayT, ElemT) \
47 |   typedef Buf ArrayT; \
48 |   inline static void ArrayT##Init(ArrayT* nonull a, Memory nullable mem, size_t cap) { \
49 |     BufInit(a, mem, cap * sizeof(ElemT)); \
50 |   } \
51 |   inline static void ArrayT##Free(ArrayT* nonull a) { BufFree(a); } \
52 |   inline static ElemT* ArrayT##At(ArrayT* nonull a, size_t i) { \
53 |   	return (ElemT*)&a->ptr[i * sizeof(ElemT)]; \
54 |   } \
55 |   inline static void ArrayT##Push(ArrayT* nonull a, ElemT v) { BufAppend(a, &v, sizeof(ElemT)); }\
56 |   inline static ElemT ArrayT##Pop(ArrayT* nonull a) { \
57 |   	a->len -= sizeof(ElemT); \
58 |   	return *(ElemT*)&a->ptr[a->len]; \
59 |   } \
60 |   inline static ElemT* ArrayT##Alloc(ArrayT* nonull a, size_t count) { \
61 |     return (ElemT*)BufAlloc(a, count * sizeof(ElemT)); \
62 |   } \
63 |   inline static void ArrayT##MakeRoomFor(ArrayT* nonull a, size_t count) { \
64 |     BufMakeRoomFor(a, count * sizeof(ElemT)); \
65 |   } \
66 | /* DefArrayBuffer */
67 | 
68 | 
69 | #define ArrayBufferForEach(b, ELEMTYPE, LOCALNAME)      \
70 |   /* this "for" introduces LOCALNAME */                 \
71 |   for (auto LOCALNAME = (ELEMTYPE*)&((b)->ptr[0]),      \
72 |   	        LOCALNAME##__guard = (ELEMTYPE*)NULL;       \
73 |        LOCALNAME##__guard == NULL;                      \
74 |        LOCALNAME##__guard++)                            \
75 |   /* actual for loop */                                 \
76 |   for (                                                 \
77 |     u32 LOCALNAME##__i = 0,                             \
78 |         LOCALNAME##__end = (b)->len;                    \
79 |     LOCALNAME = (ELEMTYPE*)&((b)->ptr[LOCALNAME##__i]), \
80 |     LOCALNAME##__i < LOCALNAME##__end;                  \
81 |     LOCALNAME##__i += sizeof(ELEMTYPE)                  \
82 |   ) /* <body should follow here> */
83 | 
84 | 


--------------------------------------------------------------------------------
/src/common/defs.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <stdio.h>
  4 | #include <string.h>
  5 | #include <stdlib.h>
  6 | #include <unistd.h>
  7 | #include <sys/stat.h>
  8 | #include <fcntl.h>
  9 | #include <errno.h>
 10 | 
 11 | // target endianess
 12 | #if !defined(W_BYTE_ORDER_LE) && !defined(W_BYTE_ORDER_BE)
 13 |   #if (defined(__BIG_ENDIAN__) && !defined(__LITTLE_ENDIAN__)) || \
 14 |        (defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN)) || \
 15 |        defined(__ARMEB__) || defined(__THUMBEB__) || defined(__AARCH64EB__) || \
 16 |        defined(_MIPSEB) || defined(__MIPSEB) || defined(__MIPSEB__)
 17 |     #define W_BYTE_ORDER_BE 1
 18 |   #elif (defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)) || \
 19 |          (defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)) || \
 20 |          defined(__ARMEL__) || defined(__THUMBEL__) || defined(__AARCH64EL__) || \
 21 |          defined(_MIPSEL) || defined(__MIPSEL) || defined(__MIPSEL__)
 22 |     #define W_BYTE_ORDER_LE 1
 23 |   #else
 24 |     #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
 25 |         defined(__x86_64__) || defined(__x86_64) || \
 26 |         defined(__arm__) || defined(__arm) || defined(__ARM__) || \
 27 |         defined(__ARM) || defined(__arm64__)
 28 |       #define W_BYTE_ORDER_LE 1
 29 |     #else
 30 |       #error "can't infer endianess. Define W_BYTE_ORDER_LE or W_BYTE_ORDER_BE manually."
 31 |     #endif
 32 |   #endif
 33 | #endif
 34 | 
 35 | typedef _Bool                  bool;
 36 | typedef signed char            i8;
 37 | typedef unsigned char          u8;
 38 | typedef signed short int       i16;
 39 | typedef unsigned short int     u16;
 40 | typedef signed int             i32;
 41 | typedef unsigned int           u32;
 42 | typedef signed long long int   i64;
 43 | typedef unsigned long long int u64;
 44 | typedef float                  f32;
 45 | typedef double                 f64;
 46 | 
 47 | #ifndef true
 48 | #define true  ((bool)(1))
 49 | #define false ((bool)(0))
 50 | #endif
 51 | 
 52 | #ifndef null
 53 | #define null NULL
 54 | #endif
 55 | 
 56 | #define nonull   _Nonnull  /* note: nonull conflicts with attribute name */
 57 | #define nullable _Nullable
 58 | 
 59 | #ifndef W_ASSUME_NONNULL_BEGIN
 60 | #define W_ASSUME_NONNULL_BEGIN _Pragma("clang assume_nonnull begin")
 61 | #endif
 62 | #ifndef W_ASSUME_NONNULL_END
 63 | #define W_ASSUME_NONNULL_END   _Pragma("clang assume_nonnull end")
 64 | #endif
 65 | 
 66 | #define auto __auto_type
 67 | 
 68 | #if __has_c_attribute(returns_nonnull)
 69 |   #define nonull_return __attribute__((returns_nonnull))
 70 | #else
 71 |   #define nonull_return
 72 | #endif
 73 | 
 74 | #if __has_c_attribute(fallthrough)
 75 |   #define FALLTHROUGH [[fallthrough]]
 76 | #else
 77 |   #define FALLTHROUGH
 78 | #endif
 79 | 
 80 | #ifdef DEBUG
 81 |   // so that we can simply do: "#if DEBUG"
 82 |   #undef DEBUG
 83 |   #define DEBUG 1
 84 | #else
 85 |   #define DEBUG 0
 86 | #endif
 87 | 
 88 | // WFormatForValue returns a printf formatting pattern for the type of x
 89 | #define WFormatForValue(x) _Generic((x), \
 90 |   unsigned long long: "%llu", \
 91 |   unsigned long:      "%lu", \
 92 |   unsigned int:       "%u", \
 93 |   long long:          "%lld", \
 94 |   long:               "%ld", \
 95 |   int:                "%d", \
 96 |   char:               "%c", \
 97 |   unsigned char:      "%C", \
 98 |   const char*:        "%s", \
 99 |   char*:              "%s", \
100 |   void*:              "%p", \
101 |   const void*:        "%p", \
102 |   default:            "%p" \
103 | )
104 | 
105 | #include "assert.h"
106 | 
107 | #if DEBUG
108 |   #include <stdio.h>
109 |   #define dlog(format, ...) \
110 |     fprintf(stdout, "D " format "\t(%s:%d)\n", ##__VA_ARGS__, __FILE__, __LINE__)
111 |   #define logerr(format, ...) \
112 |     fprintf(stderr, format " (%s:%d)\n", ##__VA_ARGS__, __FILE__, __LINE__)
113 | #else
114 |   #define dlog(...)  do{}while(0)
115 |   #define logerr(format, ...) \
116 |     fprintf(stderr, format "\n", ##__VA_ARGS__)
117 | #endif
118 | 
119 | #define max(a,b) \
120 |   ({__typeof__ (a) _a = (a); \
121 |     __typeof__ (b) _b = (b); \
122 |     _a > _b ? _a : _b; })
123 | 
124 | #define min(a,b) \
125 |   ({__typeof__ (a) _a = (a); \
126 |      __typeof__ (b) _b = (b); \
127 |      _a < _b ? _a : _b; })
128 | 
129 | #ifndef offsetof
130 |   #define offsetof(st, m) ((size_t)&(((st*)0)->m))
131 | #endif
132 | 
133 | #define countof(x) \
134 |   ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
135 | 
136 | // popcount<T>
137 | #define popcount(x) _Generic((x), \
138 |   unsigned long long: __builtin_popcountll, \
139 |   unsigned long:      __builtin_popcountl, \
140 |   default:            __builtin_popcount \
141 | )(x)
142 | 
143 | // division of integer, rounding up
144 | #define W_IDIV_CEIL(x, y) (1 + (((x) - 1) / (y)))
145 | 
146 | #define die(format, ...) do { \
147 |   logerr(format, ##__VA_ARGS__); \
148 |   exit(1); \
149 | } while(0)
150 | 
151 | // T align2<T>(T x, T y) rounds up n to closest boundary w (w must be a power of two)
152 | //
153 | // E.g.
154 | //   align(0, 4) => 0
155 | //   align(1, 4) => 4
156 | //   align(2, 4) => 4
157 | //   align(3, 4) => 4
158 | //   align(4, 4) => 4
159 | //   align(5, 4) => 8
160 | //   ...
161 | //
162 | #define align2(n,w) ({ \
163 |   assert(((w) & ((w) - 1)) == 0); /* alignment w is not a power of two */ \
164 |   ((n) + ((w) - 1)) & ~((w) - 1); \
165 | })
166 | 
167 | 
168 | // // Attribute for opting out of address sanitation.
169 | // // Needed for realloc() with a null pointer.
170 | // // e.g.
171 | // // W_NO_SANITIZE_ADDRESS
172 | // // void ThisFunctionWillNotBeInstrumented() { return realloc(NULL, 1); }
173 | // #if defined(__clang__) || defined (__GNUC__)
174 | //   #define W_NO_SANITIZE_ADDRESS __attribute__((no_sanitize("address")))
175 | // #else
176 | //   #define W_NO_SANITIZE_ADDRESS
177 | // #endif
178 | 


--------------------------------------------------------------------------------
/src/common/hash.c:
--------------------------------------------------------------------------------
 1 | #include "hash.h"
 2 | 
 3 | u32 hashFNV1a(const u8* buf, size_t len) {
 4 |   const u32 prime = 0x01000193; // pow(2,24) + pow(2,8) + 0x93
 5 |   u32 hash = 0x811C9DC5; // seed
 6 |   const u8* end = buf + len;
 7 |   while (buf < end) {
 8 |     hash = (*buf++ ^ hash) * prime;
 9 |   }
10 |   return hash;
11 | }
12 | 
13 | u64 hashFNV1a64(const u8* buf, size_t len) {
14 |   const u64 prime = 0x100000001B3; // pow(2,40) + pow(2,8) + 0xb3
15 |   u64 hash = 0xCBF29CE484222325; // seed
16 |   const u8* end = buf + len;
17 |   while (buf < end) {
18 |     hash = (*buf++ ^ hash) * prime;
19 |   }
20 |   return hash;
21 | }
22 | 


--------------------------------------------------------------------------------
/src/common/hash.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "defs.h"
3 | 
4 | u32 hashFNV1a(const u8* buf, size_t len);
5 | u64 hashFNV1a64(const u8* buf, size_t len);
6 | 


--------------------------------------------------------------------------------
/src/common/hashmap.c.h:
--------------------------------------------------------------------------------
  1 | // example:
  2 | // #define HASHMAP_NAME     FooMap
  3 | // #define HASHMAP_KEY      Foo
  4 | // #define HASHMAP_KEY_HASH FooHash  // should return an unsigned integer
  5 | // #define HASHMAP_VALUE    char*
  6 | #ifndef HASHMAP_NAME
  7 | #error "please define HASHMAP_NAME"
  8 | #endif
  9 | #ifndef HASHMAP_KEY
 10 | #error "please define HASHMAP_KEY"
 11 | #endif
 12 | #ifndef HASHMAP_KEY_HASH
 13 | #error "please define HASHMAP_KEY_HASH"
 14 | #endif
 15 | #ifndef HASHMAP_VALUE
 16 | #error "please define HASHMAP_VALUE"
 17 | #endif
 18 | 
 19 | #define _HM_MAKE_FN_NAME(a, b) a ## b
 20 | #define _HM_FUN(prefix, name) _HM_MAKE_FN_NAME(prefix, name)
 21 | #define HM_FUN(name) _HM_FUN(HASHMAP_NAME, name)
 22 | 
 23 | typedef enum HMFlag {
 24 |   HMFlagNone = 0,
 25 |   HMFlagBucketMemoryDense = 1 << 0,  // bucket memory is inside map memory. used by Free
 26 | } HMFlag;
 27 | 
 28 | static const u32 bucketSize = 6; // entries per bucket
 29 | 
 30 | typedef struct {
 31 |   struct {
 32 |     HASHMAP_KEY   key;
 33 |     HASHMAP_VALUE value;
 34 |   } entries[bucketSize];
 35 | } Bucket;
 36 | 
 37 | 
 38 | void HM_FUN(Init)(HASHMAP_NAME* m, u32 initbuckets, Memory mem) {
 39 |   m->cap = initbuckets;
 40 |   m->len = 0;
 41 |   m->mem = mem;
 42 |   m->buckets = memalloc(mem, m->cap * sizeof(Bucket));
 43 | }
 44 | 
 45 | HASHMAP_NAME* HM_FUN(New)(u32 initbuckets, Memory mem) {
 46 |   // new differs from Init in that it allocates space for itself and the initial
 47 |   // buckets in one go. This is usually a little bit faster and reduces memory
 48 |   // fragmentation in cases where many hashmaps are created.
 49 |   size_t bucketSize = initbuckets * sizeof(Bucket);
 50 |   char* ptr = memalloc(mem, sizeof(HASHMAP_NAME) + bucketSize);
 51 |   auto m = (HASHMAP_NAME*)ptr;
 52 |   m->cap = initbuckets;
 53 |   m->mem = mem;
 54 |   m->flags = HMFlagBucketMemoryDense;
 55 |   if (bucketSize > 0) {
 56 |     m->buckets = ptr + sizeof(HASHMAP_NAME);
 57 |   }
 58 |   return m;
 59 | }
 60 | 
 61 | void HM_FUN(Dealloc)(HASHMAP_NAME* m) {
 62 |   // should never call Dealloc on a map created with New
 63 |   assert(!(m->flags & HMFlagBucketMemoryDense));
 64 | 
 65 |   memfree(m->mem, m->buckets);
 66 |   #if DEBUG
 67 |   m->buckets = NULL;
 68 |   m->len = 0;
 69 |   m->cap = 0;
 70 |   #endif
 71 | }
 72 | 
 73 | void HM_FUN(Free)(HASHMAP_NAME* m) {
 74 |   if (!(m->flags & HMFlagBucketMemoryDense)) {
 75 |     memfree(m->mem, m->buckets);
 76 |   }
 77 |   memfree(m->mem, m);
 78 |   #if DEBUG
 79 |   m->buckets = NULL;
 80 |   m->len = 0;
 81 |   m->cap = 0;
 82 |   #endif
 83 | }
 84 | 
 85 | 
 86 | static void mapGrow(HASHMAP_NAME* m) {
 87 |   u32 cap = m->cap * 2;
 88 |   rehash: {
 89 |     auto newbuckets = (Bucket*)memalloc(m->mem, cap * sizeof(Bucket));
 90 |     for (u32 bi = 0; bi < m->cap; bi++) {
 91 |       auto b = &((Bucket*)m->buckets)[bi];
 92 |       for (u32 i = 0; i < bucketSize; i++) {
 93 |         auto e = &b->entries[i];
 94 |         if (e->key == NULL) {
 95 |           break;
 96 |         }
 97 |         if (e->value == NULL) {
 98 |           // skip deleted entry (compactation)
 99 |           continue;
100 |         }
101 |         u32 index = ((u32)HASHMAP_KEY_HASH(e->key)) % cap;
102 |         auto newb = &newbuckets[index];
103 |         bool fit = false;
104 |         for (u32 i2 = 0; i2 < bucketSize; i2++) {
105 |           auto e2 = &newb->entries[i2];
106 |           if (e2->key == NULL) {
107 |             // found a free slot in newb
108 |             *e2 = *e;
109 |             fit = true;
110 |             break;
111 |           }
112 |         }
113 |         if (!fit) {
114 |           // no free slot found in newb; need to grow further.
115 |           memfree(m->mem, newbuckets);
116 |           cap = cap * 2;
117 |           goto rehash;
118 |         }
119 |       }
120 |     }
121 |     if (!(m->flags & HMFlagBucketMemoryDense)) {
122 |       memfree(m->mem, m->buckets);
123 |     }
124 |     m->buckets = newbuckets;
125 |     m->cap = cap;
126 |     m->flags &= ~HMFlagBucketMemoryDense;
127 |   }
128 | }
129 | 
130 | 
131 | // HM_FUN(Set) inserts key=value into m.
132 | // Returns replaced value or NULL if key did not exist in map.
133 | HASHMAP_VALUE HM_FUN(Set)(HASHMAP_NAME* m, HASHMAP_KEY key, HASHMAP_VALUE value) {
134 |   assert(value != NULL);
135 |   while (1) { // grow loop
136 |     u32 index = ((u32)HASHMAP_KEY_HASH(key)) % m->cap;
137 |     auto b = &((Bucket*)m->buckets)[index];
138 |     // dlog("bucket(key=\"%s\") #%u  b=%p e=%p", key, index, b, &b->entries[0]);
139 |     for (u32 i = 0; i < bucketSize; i++) {
140 |       auto e = &b->entries[i];
141 |       if (e->value == NULL) {
142 |         // free slot
143 |         e->key = key;
144 |         e->value = value;
145 |         m->len++;
146 |         return NULL;
147 |       }
148 |       if (e->key == key) {
149 |         // key already in map -- replace value
150 |         auto oldval = e->value;
151 |         e->value = value;
152 |         return oldval;
153 |       }
154 |       // dlog("collision key=\"%s\" <> e->key=\"%s\"", key, e->key);
155 |     }
156 |     // overloaded -- grow buckets
157 |     // dlog("grow & rehash");
158 |     mapGrow(m);
159 |   }
160 | }
161 | 
162 | 
163 | HASHMAP_VALUE HM_FUN(Del)(HASHMAP_NAME* m, HASHMAP_KEY key) {
164 |   u32 index = ((u32)HASHMAP_KEY_HASH(key)) % m->cap;
165 |   auto b = &((Bucket*)m->buckets)[index];
166 |   for (u32 i = 0; i < bucketSize; i++) {
167 |     auto e = &b->entries[i];
168 |     if (e->key == key) {
169 |       if (!e->value) {
170 |         break;
171 |       }
172 |       // mark as deleted
173 |       auto value = e->value;
174 |       e->value = NULL;
175 |       m->len--;
176 |       return value;
177 |     }
178 |   }
179 |   return NULL;
180 | }
181 | 
182 | 
183 | HASHMAP_VALUE HM_FUN(Get)(const HASHMAP_NAME* m, HASHMAP_KEY key) {
184 |   u32 index = ((u32)HASHMAP_KEY_HASH(key)) % m->cap;
185 |   auto b = &((Bucket*)m->buckets)[index];
186 |   for (u32 i = 0; i < bucketSize; i++) {
187 |     auto e = &b->entries[i];
188 |     if (e->key == key) {
189 |       return e->value;
190 |     }
191 |     if (e->key == NULL) {
192 |       break;
193 |     }
194 |   }
195 |   return NULL;
196 | }
197 | 
198 | 
199 | void HM_FUN(Clear)(HASHMAP_NAME* m) {
200 |   memset(m->buckets, 0, sizeof(Bucket) * m->cap);
201 |   m->len = 0;
202 | }
203 | 
204 | 
205 | void HM_FUN(Iter)(const HASHMAP_NAME* m, HM_FUN(Iterator)* it, void* userdata) {
206 |   bool stop = false;
207 |   for (u32 bi = 0; bi < m->cap; bi++) {
208 |     auto b = &((Bucket*)m->buckets)[bi];
209 |     for (u32 i = 0; i < bucketSize; i++) {
210 |       auto e = &b->entries[i];
211 |       if (e->key == NULL) {
212 |         break;
213 |       }
214 |       if (e->value != NULL) {
215 |         it(e->key, e->value, &stop, userdata);
216 |         if (stop) {
217 |           return;
218 |         }
219 |       }
220 |     }
221 |   }
222 | }
223 | 
224 | // static u32* hashmapDebugDistr(const HASHMAP_NAME* m) {
225 | //   u32 valindex = 0;
226 | //   u32* vals = (u32*)memalloc(m->mem, m->cap * sizeof(u32));
227 | //   for (u32 bi = 0; bi < m->cap; bi++) {
228 | //     auto b = &((Bucket*)m->buckets)[bi];
229 | //     u32 depth = 0;
230 | //     for (u32 i = 0; i < bucketSize; i++) {
231 | //       auto e = &b->entries[i];
232 | //       if (e->key == NULL) {
233 | //         break;
234 | //       }
235 | //       if (e->value != NULL) {
236 | //         depth++;
237 | //       }
238 | //     }
239 | //     vals[valindex++] = depth;
240 | //   }
241 | //   return vals;
242 | // }
243 | 
244 | #undef _HM_MAKE_FN_NAME
245 | #undef _HM_FUN
246 | #undef HM_FUN
247 | 


--------------------------------------------------------------------------------
/src/common/hashmap.h:
--------------------------------------------------------------------------------
 1 | // Note: intentionally not "#pragma once"
 2 | #include "memory.h"
 3 | // example:
 4 | // #define HASHMAP_NAME     FooMap
 5 | // #define HASHMAP_KEY      Foo
 6 | // #define HASHMAP_VALUE    char*
 7 | #ifndef HASHMAP_NAME
 8 | #error "please define HASHMAP_NAME"
 9 | #endif
10 | #ifndef HASHMAP_KEY
11 | #error "please define HASHMAP_KEY"
12 | #endif
13 | #ifndef HASHMAP_VALUE
14 | #error "please define HASHMAP_VALUE"
15 | #endif
16 | 
17 | #define _HM_MAKE_FN_NAME(a, b) a ## b
18 | #define _HM_FUN(prefix, name) _HM_MAKE_FN_NAME(prefix, name)
19 | #define HM_FUN(name) _HM_FUN(HASHMAP_NAME, name)
20 | #define HASHMAP_IS_INIT(m) ((m)->buckets != NULL)
21 | 
22 | typedef struct {
23 |   u32    cap;     // number of buckets
24 |   u32    len;     // number of key-value entries
25 |   u32    flags;   //
26 |   Memory mem;     // memory allocator. NULL = use global allocator
27 |   void*  buckets; // internal
28 | } HASHMAP_NAME;
29 | 
30 | #ifdef HASHMAP_INCLUDE_DECLARATIONS
31 | // Include declarations.
32 | // Normally these are copy-pasted and hand-converted in the user-level header.
33 | 
34 | // New creates a new map with initbuckets intial buckets.
35 | HASHMAP_NAME* HM_FUN(New)(u32 initbuckets, Memory)
36 | 
37 | // Free frees all memory of a map, including the map's memory.
38 | // Use Free when you created a map with New.
39 | // Use Dealloc when you manage the memory of the map yourself and used Init.
40 | void HM_FUN(Free)(HASHMAP_NAME*);
41 | 
42 | // Init initializes a map structure. initbuckets is the number of initial buckets.
43 | void HM_FUN(Init)(HASHMAP_NAME*, u32 initbuckets, Memory);
44 | 
45 | // Dealloc frees buckets data (but not the hashmap itself.)
46 | // The hashmap is invalid after this call. Call Init to reuse.
47 | void HM_FUN(Dealloc)(HASHMAP_NAME*);
48 | 
49 | // Get searches for key. Returns value, or NULL if not found.
50 | HASHMAP_VALUE HM_FUN(Get)(const HASHMAP_NAME*, HASHMAP_KEY key);
51 | 
52 | // Set inserts key=value into m. Returns the replaced value or NULL if not found.
53 | HASHMAP_VALUE HM_FUN(Set)(HASHMAP_NAME*, HASHMAP_KEY key, HASHMAP_VALUE value);
54 | 
55 | // Del removes value for key. Returns the removed value or NULL if not found.
56 | HASHMAP_VALUE HM_FUN(Del)(HASHMAP_NAME*, HASHMAP_KEY key);
57 | 
58 | // Clear removes all entries. In contrast to Free, map remains valid.
59 | void HM_FUN(Clear)(HASHMAP_NAME*);
60 | 
61 | // Iterator function type. Set stop=true to stop iteration.
62 | typedef void(HM_FUN(Iterator))(HASHMAP_KEY key, HASHMAP_VALUE value, bool* stop, void* userdata);
63 | 
64 | // Iter iterates over entries of the map.
65 | void HM_FUN(Iter)(const HASHMAP_NAME*, HM_FUN(Iterator)*, void* userdata);
66 | 
67 | #endif
68 | 
69 | #undef _HM_MAKE_FN_NAME
70 | #undef _HM_FUN
71 | #undef HM_FUN
72 | 


--------------------------------------------------------------------------------
/src/common/memory.c:
--------------------------------------------------------------------------------
  1 | #include "memory.h"
  2 | #include "array.h"
  3 | #include "os.h"
  4 | #include "test.h"
  5 | 
  6 | 
  7 | static size_t memPageSize = 0;
  8 | 
  9 | static void __attribute__((constructor)) init() {
 10 |   memPageSize = os_mempagesize();
 11 | }
 12 | 
 13 | Memory MemoryNew(size_t initHint) {
 14 |   if (initHint == 0) {
 15 |     initHint = memPageSize;
 16 |   }
 17 |   return create_mspace(/*capacity*/initHint, /*locked*/0);
 18 | }
 19 | 
 20 | void MemoryRecycle(Memory* memptr) {
 21 |   // TODO: see if there is a way to make dlmalloc reuse msp
 22 |   destroy_mspace(*memptr);
 23 |   *memptr = create_mspace(/*capacity*/memPageSize, /*locked*/0);
 24 | }
 25 | 
 26 | void MemoryFree(Memory mem) {
 27 |   destroy_mspace(mem);
 28 | }
 29 | 
 30 | 
 31 | char* memallocCStr(Memory mem, const char* pch, size_t len) {
 32 |   auto s = (char*)memalloc(mem, len + 1);
 33 |   memcpy(s, pch, len);
 34 |   s[len] = 0;
 35 |   return s;
 36 | }
 37 | 
 38 | char* memallocCStrConcat(Memory mem, const char* s1, ...) {
 39 |   va_list ap;
 40 | 
 41 |   size_t len1 = strlen(s1);
 42 |   size_t len = len1;
 43 |   u32 count = 0;
 44 |   va_start(ap, s1);
 45 |   while (1) {
 46 |     const char* s = va_arg(ap,const char*);
 47 |     if (s == NULL || count == 20) { // TODO: warn about limit somehow?
 48 |       break;
 49 |     }
 50 |     len += strlen(s);
 51 |   }
 52 |   va_end(ap);
 53 | 
 54 |   char* newstr = (char*)memalloc(mem, len + 1);
 55 |   char* dstptr = newstr;
 56 |   memcpy(dstptr, s1, len1);
 57 |   dstptr += len1;
 58 | 
 59 |   va_start(ap, s1);
 60 |   for (u32 i = 0; i < count; i++) {
 61 |     const char* s = va_arg(ap,const char*);
 62 |     auto len = strlen(s);
 63 |     memcpy(dstptr, s, len);
 64 |     dstptr += len;
 65 |   }
 66 |   va_end(ap);
 67 | 
 68 |   *dstptr = 0;
 69 | 
 70 |   return newstr;
 71 | }
 72 | 
 73 | 
 74 | // memsprintf is like sprintf but uses memory from mem.
 75 | char* memsprintf(Memory mem, const char* format, ...) {
 76 |   va_list ap;
 77 |   va_start(ap, format);
 78 |   size_t bufsize = (strlen(format) * 2) + 1;
 79 |   char* buf = memalloc(mem, bufsize);
 80 |   size_t idealsize = (size_t)vsnprintf(buf, bufsize, format, ap);
 81 |   if (idealsize >= bufsize) {
 82 |     // buf is too small
 83 |     buf = mspace_realloc(mem, buf, idealsize + 1);
 84 |     idealsize = (size_t)vsnprintf(buf, bufsize, format, ap);
 85 |     assert(idealsize < bufsize); // according to libc docs, this should be true
 86 |   }
 87 |   va_end(ap);
 88 |   return buf;
 89 | }
 90 | 
 91 | 
 92 | typedef struct GC {
 93 |   Array gen1, gen2;
 94 | } GC;
 95 | 
 96 | 
 97 | /*__thread*/ Memory _gmem = NULL;
 98 | /*__thread*/ GC tlsGC = { Array_INIT, Array_INIT };
 99 | 
100 | 
101 | Memory _GlobalMemory() {
102 |   return (_gmem == NULL) ? (_gmem = create_mspace(0, 0)) : _gmem;
103 | }
104 | 
105 | 
106 | void* memgcalloc(size_t size) {
107 |   void* ptr = mspace_calloc(_GlobalMemory(), 1, size);
108 |   _memgc(ptr);
109 |   return ptr;
110 | }
111 | 
112 | 
113 | void memgc_collect() {
114 |   auto gc = &tlsGC;
115 |   // dlog("memgc_collect gen1 %u, gen2 %u", gc->gen1.len, gc->gen2.len);
116 | 
117 |   // free anything in gen2
118 |   if (gc->gen2.len > 0) {
119 |     assert(_gmem != NULL);
120 | 
121 |     // Node: dlmalloc mentions that for large bulk_free sets, sorting the pointers first may
122 |     // increases locality and may increase performance. If we ever decide to performance tune
123 |     // this code, it may be worth considering.
124 | 
125 |     #if DEBUG
126 |     size_t unfreed =
127 |     #endif
128 |     mspace_bulk_free(_gmem, gc->gen2.v, gc->gen2.len);
129 | 
130 |     #if DEBUG
131 |     // unfreed is always zero in release builds as dlmalloc footers are only enabled in DEBUG.
132 |     if (unfreed > 0) {
133 |       dlog("[gc] warning: collector found %zu elements from a non-global allocator", unfreed);
134 |     }
135 |     #endif
136 | 
137 |     gc->gen2.len = 0;
138 |   }
139 | 
140 |   // swap gen1 with gen2
141 |   auto tmp = gc->gen2;
142 |   gc->gen2 = gc->gen1;
143 |   gc->gen1 = tmp;
144 | }
145 | 
146 | 
147 | // memgcmark marks ptr for garbage collection
148 | void _memgc(void* ptr) {
149 |   assert(_gmem != NULL); // ptr is allocated in the global allocator, so this should not be null
150 |   auto gc = &tlsGC;
151 |   ArrayPush(&gc->gen1, ptr, _gmem);
152 | }
153 | 
154 | 
155 | #if DEBUG
156 | static void test() {
157 |   // printf("-------------------------Memory-------------------------\n");
158 | 
159 |   u32 allocCount1 = 5;
160 | 
161 |   for (u32 i = 0; i < allocCount1; i++) {
162 |     void* ptr = memalloc(NULL, 16);
163 |     memgc(ptr);
164 |   }
165 | 
166 |   auto gc = &tlsGC;
167 | 
168 |   assert(gc->gen1.len == allocCount1);
169 | 
170 |   memgc_collect();
171 |   assert(gc->gen1.len == 0); // gen1 should always be 0 after call to memgc_collect
172 | 
173 |   assert(gc->gen2.len == allocCount1);
174 | 
175 |   u32 allocCount2 = 8;
176 | 
177 |   for (int i = 0; i < allocCount2; i++) {
178 |     memgcalloc(16);
179 |   }
180 | 
181 |   assert(gc->gen1.len == allocCount2);
182 |   assert(gc->gen2.len == allocCount1);
183 | 
184 |   memgc_collect();
185 |   assert(gc->gen1.len == 0);
186 | 
187 |   assert(gc->gen2.len == allocCount2);
188 | 
189 |   memgc_collect();
190 | 
191 |   assert(gc->gen1.len == 0);
192 |   assert(gc->gen2.len == 0);
193 | 
194 |   // test bulk_free to ensure that we get an error message logged in case foreign pointers
195 |   // are added to the GC.
196 |   // Caution: This depend on FOOTER=1 being defined for dlmalloc, which is only the case
197 |   // for DEBUG builds.
198 |   {
199 |     Memory mem = MemoryNew(0);
200 |     memgc(memalloc(mem, 16));    // add pointer from unrelated mspace to gc
201 |     memgc(memalloc(_gmem, 16)); // add pointer from the correct mspace to gc
202 |     assert(gc->gen1.len == 2);
203 |     size_t unfreed = mspace_bulk_free(_gmem, gc->gen1.v, gc->gen1.len);
204 |     assert(unfreed == 1);
205 |     MemoryFree(mem);
206 |   }
207 | 
208 |   // printf("------------------------/Memory-------------------------\n");
209 |   // exit(0);
210 | }
211 | W_UNIT_TEST(Memory, { test(); }) // W_UNIT_TEST
212 | #endif
213 | 


--------------------------------------------------------------------------------
/src/common/memory.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "defs.h"
 3 | #include "dlmalloc.h"
 4 | #include "sds.h"
 5 | 
 6 | // Memory is an isolated-space memory allocator, useful for allocating many small
 7 | // short-lived fragments of memory, like for example AST nodes.
 8 | //
 9 | // Passing NULL to mangagement functions like memalloc uses a shared global allocator
10 | // and works the same way as libc malloc, free et al.
11 | //
12 | typedef mspace Memory;
13 | 
14 | // memalloc allocates memory. Returned memory is zeroed.
15 | static void* memalloc(Memory nullable mem, size_t size) nonull_return;
16 | 
17 | // memalloct is a convenience for: (MyStructType*)memalloc(m, sizeof(MyStructType))
18 | #define memalloct(mem, TYPE) ((TYPE*)memalloc(mem, sizeof(TYPE)))
19 | 
20 | // memalloc reallocates some memory. Additional memory is NOT zeroed.
21 | static void* memrealloc(Memory nullable mem, void* nullable ptr, size_t newsize) nonull_return;
22 | 
23 | // memfree frees memory.
24 | static void memfree(Memory nullable mem, void* nonull ptr);
25 | 
26 | // memallocCStr is like strdup
27 | char* memallocCStr(Memory nullable mem, const char* nonull pch, size_t len);
28 | 
29 | // memallocCStrConcat concatenates up to 20 c-strings together.
30 | // Arguments must be terminated with NULL.
31 | char* memallocCStrConcat(Memory nullable mem, const char* nonull s1, ...);
32 | 
33 | // memsprintf is like sprintf but uses memory from mem.
34 | char* memsprintf(Memory mem, const char* format, ...);
35 | 
36 | // -----------------------------------------------------------------------------------------------
37 | // Rudimentary garbage collector for short-lived data.
38 | 
39 | // memgcalloc allocates memory that will be free'd automatically.
40 | // This is equivalent to: memgc(memalloc(NULL, size))
41 | void* memgcalloc(size_t size) nonull_return;
42 | 
43 | // memgcalloct is a convenience for: (MyStructType*)memgcalloc(sizeof(MyStructType))
44 | #define memgcalloct(mem, TYPE) ((TYPE)*memgcalloc(mem, sizeof(TYPE)))
45 | 
46 | // memgc marks ptr for garbage collection.
47 | // Memory which has been marked for garbage collection must not be freed manually.
48 | // Note: If we ever have the need, add a memgc_remove function for explicitly removing a pointer.
49 | // T memgc<T extends void*>(T ptr)
50 | #define memgc(ptr) ({ _memgc(ptr); (ptr); })
51 | 
52 | // memgcsds marks an sds string for garbage collection. (Does not work with Sym.)
53 | static sds memgcsds(sds nonull s) nonull_return;
54 | 
55 | // memgc_collect performs very basic garbage collection.
56 | // Each Memory space maintains two lists for gc: gen1 and gen2. memgc(ptr) adds to gen1.
57 | // When memgc_collect is called:
58 | // 1. every pointer in gen2 is free'd; gen2 list is emptied.
59 | // 2. every pointer in gen1 is moved to gen2.
60 | // Thus, this is NOT a generic "smart" garbage collector.
61 | // Caution: Calling memgc_collect twice in a row causes all gc objects to be free'd immediately.
62 | // Always uses the global allocator.
63 | void memgc_collect();
64 | 
65 | 
66 | // -----------------------------------------------------------------------------------------------
67 | // Memory spaces
68 | 
69 | // Create a new memory space
70 | Memory MemoryNew(size_t initHint/*=0*/);
71 | void MemoryRecycle(Memory* memptr); // recycle for reuse
72 | void MemoryFree(Memory mem);        // free all memory allocated by mem
73 | 
74 | // -----------------------------------------------------------------------------------------------
75 | // inline and internal implementations
76 | 
77 | void _memgc(void* nonull ptr);
78 | Memory _GlobalMemory() nonull_return;
79 | 
80 | inline static void* memalloc(Memory mem, size_t size) {
81 |   return mspace_calloc(mem == NULL ? _GlobalMemory() : mem, 1, size);
82 | }
83 | 
84 | inline static void* memrealloc(Memory mem, void* ptr, size_t newsize) {
85 |   return mspace_realloc(mem == NULL ? _GlobalMemory() : mem, ptr, newsize);
86 | }
87 | 
88 | inline static void memfree(Memory mem, void* ptr) {
89 |   mspace_free(mem == NULL ? _GlobalMemory() : mem, ptr);
90 | }
91 | 
92 | inline static sds memgcsds(sds s) {
93 |   _memgc( ((char*)s) - sdsHdrSize(s[-1]) );
94 |   return s;
95 | }
96 | 


--------------------------------------------------------------------------------
/src/common/os.c:
--------------------------------------------------------------------------------
 1 | #include <unistd.h> // sysconf
 2 | #include <sys/errno.h>
 3 | 
 4 | #include "defs.h"
 5 | #include "os.h"
 6 | 
 7 | static size_t _mempagesize = 0;
 8 | 
 9 | size_t os_mempagesize() {
10 |   if (_mempagesize == 0) {
11 |     auto z = sysconf(_SC_PAGESIZE);
12 |     if (z <= 0) {
13 |       _mempagesize = 1024 * 4; // usually 4kB
14 |     } else {
15 |       _mempagesize = (size_t)z;
16 |     }
17 |   }
18 |   return _mempagesize;
19 | }
20 | 
21 | 
22 | u8* os_readfile(const char* filename, size_t* size_inout, Memory mem) {
23 |   assert(size_inout != NULL);
24 | 
25 |   int fd = open(filename, O_RDONLY);
26 |   if (fd < 0) {
27 |     return NULL;
28 |   }
29 | 
30 |   struct stat st;
31 |   if (fstat(fd, &st) != 0) {
32 |     close(fd);
33 |     return NULL;
34 |   }
35 | 
36 |   size_t bufsize = (size_t)st.st_size;
37 |   size_t limit = *size_inout;
38 |   if (limit > 0 && limit < bufsize) {
39 |     bufsize = limit;
40 |   }
41 | 
42 |   u8* buf = (u8*)memalloc(mem, bufsize);
43 | 
44 |   auto nread = read(fd, buf, bufsize);
45 |   close(fd);
46 |   if (nread < 0) {
47 |     memfree(mem, buf);
48 |     *size_inout = 0;
49 |     return NULL;
50 |   }
51 | 
52 |   assert(nread == bufsize);
53 | 
54 |   *size_inout = bufsize;
55 |   return buf;
56 | }
57 | 
58 | 
59 | bool os_writefile(const char* filename, const void* ptr, size_t size) {
60 |   FILE* fp = fopen(filename, "w");
61 |   if (fp == NULL) {
62 |     return false;
63 |   }
64 |   auto z = fwrite(ptr, size, 1, fp);
65 |   fclose(fp);
66 |   return size == 0 ? z == 0 : z == 1;
67 | }
68 | 
69 | 


--------------------------------------------------------------------------------
/src/common/os.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "memory.h"
 3 | 
 4 | // os
 5 | size_t os_mempagesize();  // always returns a suitable number
 6 | 
 7 | // Read entire file into a heap-allocated buffer.
 8 | // If *size_inout is >0 then it is used as a limit of how much to read from the file.
 9 | // If size_inout is not null, it is set to the size of the returned byte array.
10 | u8* os_readfile(const char* nonull filename, size_t* nonull size_inout, Memory nullable mem);
11 | 
12 | // Write data at ptr of bytes size to file at filename.
13 | bool os_writefile(const char* nonull filename, const void* nonull ptr, size_t size);
14 | 


--------------------------------------------------------------------------------
/src/common/ptrmap.c:
--------------------------------------------------------------------------------
  1 | #include "ptrmap.h"
  2 | #include "hash.h"
  3 | #include "test.h"
  4 | 
  5 | #include <math.h> /* log2 */
  6 | #include <limits.h> /* *_MAX */
  7 | 
  8 | #if ((ULONG_MAX) > (UINT_MAX))
  9 |   // 64-bit address
 10 |   #define ptrhash(ptr) ((size_t)hashFNV1a64((const u8*)&(ptr), 8))
 11 | #else
 12 |   // 32-bit address
 13 |   #define ptrhash(ptr) ((size_t)hashFNV1a((const u8*)&(ptr), 4))
 14 | #endif
 15 | 
 16 | // This is a good and very fast hash function for small sets of sequential pointers,
 17 | // but as the address space grows the distribution worsens quickly compared to FNV1a.
 18 | // static size_t ptrhash2(void* p) {
 19 | //   // Note: the log2 call is eliminated and replaced by a constant when compiling
 20 | //   // with optimizations.
 21 | //   const size_t shift = (size_t)log2(1 + sizeof(void*));
 22 | //   return (size_t)(p) >> shift;
 23 | // }
 24 | 
 25 | // hashmap implementation
 26 | #define HASHMAP_NAME     PtrMap
 27 | #define HASHMAP_KEY      const void*
 28 | #define HASHMAP_VALUE    void*
 29 | #define HASHMAP_KEY_HASH ptrhash
 30 | #include "hashmap.c.h"
 31 | #undef HASHMAP_NAME
 32 | #undef HASHMAP_KEY
 33 | #undef HASHMAP_VALUE
 34 | #undef HASHMAP_KEY_HASH
 35 | 
 36 | 
 37 | #if DEBUG
 38 | static void testMapIterator(const void* key, void* value, bool* stop, void* userdata) {
 39 |   // dlog("\"%s\" => %zu", key, (size_t)value);
 40 |   size_t* n = (size_t*)userdata;
 41 |   (*n)++;
 42 | }
 43 | #endif
 44 | 
 45 | 
 46 | W_UNIT_TEST(PtrMap, {
 47 |   auto mem = MemoryNew(0);
 48 |   auto m = PtrMapNew(8, mem);
 49 | 
 50 |   assert(m->len == 0);
 51 | 
 52 |   #define SYM(cstr) symgeth((const u8*)(cstr), strlen(cstr))
 53 |   void* oldval;
 54 | 
 55 |   oldval = PtrMapSet(m, "hello", (void*)1);
 56 |   // dlog("PtrMapSet(hello) => %zu", (size_t)oldval);
 57 |   assert(m->len == 1);
 58 | 
 59 |   oldval = PtrMapSet(m, "hello", (void*)2);
 60 |   // dlog("PtrMapSet(hello) => %zu", (size_t)oldval);
 61 |   assert(m->len == 1);
 62 | 
 63 |   assert(PtrMapDel(m, "hello") == (void*)2);
 64 |   assert(m->len == 0);
 65 | 
 66 |   size_t n = 100;
 67 |   PtrMapSet(m, "break",       (void*)n++); assert(m->len == n - 100);
 68 |   PtrMapSet(m, "case",        (void*)n++); assert(m->len == n - 100);
 69 |   PtrMapSet(m, "const",       (void*)n++); assert(m->len == n - 100);
 70 |   PtrMapSet(m, "continue",    (void*)n++); assert(m->len == n - 100);
 71 |   PtrMapSet(m, "default",     (void*)n++); assert(m->len == n - 100);
 72 |   PtrMapSet(m, "defer",       (void*)n++); assert(m->len == n - 100);
 73 |   PtrMapSet(m, "else",        (void*)n++); assert(m->len == n - 100);
 74 |   PtrMapSet(m, "enum",        (void*)n++); assert(m->len == n - 100);
 75 |   PtrMapSet(m, "fallthrough", (void*)n++); assert(m->len == n - 100);
 76 |   PtrMapSet(m, "for",         (void*)n++); assert(m->len == n - 100);
 77 |   PtrMapSet(m, "fun",         (void*)n++); assert(m->len == n - 100);
 78 |   PtrMapSet(m, "go",          (void*)n++); assert(m->len == n - 100);
 79 |   PtrMapSet(m, "if",          (void*)n++); assert(m->len == n - 100);
 80 |   PtrMapSet(m, "import",      (void*)n++); assert(m->len == n - 100);
 81 |   PtrMapSet(m, "in",          (void*)n++); assert(m->len == n - 100);
 82 |   PtrMapSet(m, "interface",   (void*)n++); assert(m->len == n - 100);
 83 |   PtrMapSet(m, "is",          (void*)n++); assert(m->len == n - 100);
 84 |   PtrMapSet(m, "return",      (void*)n++); assert(m->len == n - 100);
 85 |   PtrMapSet(m, "select",      (void*)n++); assert(m->len == n - 100);
 86 |   PtrMapSet(m, "struct",      (void*)n++); assert(m->len == n - 100);
 87 |   PtrMapSet(m, "switch",      (void*)n++); assert(m->len == n - 100);
 88 |   PtrMapSet(m, "symbol",      (void*)n++); assert(m->len == n - 100);
 89 |   PtrMapSet(m, "type",        (void*)n++); assert(m->len == n - 100);
 90 |   PtrMapSet(m, "var",         (void*)n++); assert(m->len == n - 100);
 91 |   PtrMapSet(m, "while",       (void*)n++); assert(m->len == n - 100);
 92 |   PtrMapSet(m, "_",           (void*)n++); assert(m->len == n - 100);
 93 |   PtrMapSet(m, "int",         (void*)n++); assert(m->len == n - 100);
 94 | 
 95 |   // // print distribution of load on each bucket
 96 |   // printf("bucket,load\n");
 97 |   // u32* vals = hashmapDebugDistr(m);
 98 |   // for (u32 i = 0; i < m.cap; i++) {
 99 |   //   printf("%u,%u\n", i+1, vals[i]);
100 |   // }
101 |   // free(vals);
102 | 
103 |   // counts
104 |   n = 0;
105 |   PtrMapIter(m, testMapIterator, &n);
106 |   assert(n == 27);
107 | 
108 |   // del
109 |   assert(PtrMapSet(m, "hello", (void*)2) == NULL);
110 |   assert(PtrMapGet(m, "hello") == (void*)2);
111 |   assert(PtrMapDel(m, "hello") == (void*)2);
112 |   assert(PtrMapGet(m, "hello") == NULL);
113 |   assert(PtrMapSet(m, "hello", (void*)2) == NULL);
114 |   assert(PtrMapGet(m, "hello") == (void*)2);
115 | 
116 |   PtrMapFree(m);
117 |   MemoryFree(mem);
118 | }) // W_UNIT_TEST
119 | 


--------------------------------------------------------------------------------
/src/common/ptrmap.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "defs.h"
 3 | 
 4 | // PtrMap maps void* to void*. sizeof(PtrMap) == 3*sizeof(void*)
 5 | #define HASHMAP_NAME     PtrMap
 6 | #define HASHMAP_KEY      const void*
 7 | #define HASHMAP_VALUE    void*
 8 | #include "hashmap.h"
 9 | #undef HASHMAP_NAME
10 | #undef HASHMAP_KEY
11 | #undef HASHMAP_VALUE
12 | 
13 | // PtrMapInit initializes a map structure. initbuckets is the number of initial buckets.
14 | void PtrMapInit(PtrMap*, u32 initbuckets, Memory mem/*nullable*/);
15 | 
16 | // bool PtrMapIsInit(PtrMap*)
17 | #define PtrMapIsInit HASHMAP_IS_INIT
18 | 
19 | // PtrMapDealloc frees heap memory used by a map, but leaves PtrMap untouched.
20 | void PtrMapDealloc(PtrMap*);
21 | 
22 | // Creates and initializes a new PtrMap in mem, or global memory if mem is NULL.
23 | PtrMap* PtrMapNew(u32 initbuckets, Memory mem/*null*/);
24 | 
25 | // PtrMapFree frees PtrMap along with its data.
26 | void PtrMapFree(PtrMap*);
27 | 
28 | // PtrMapGet searches for key. Returns value, or NULL if not found.
29 | void* PtrMapGet(const PtrMap*, const void* key);
30 | 
31 | // PtrMapSet inserts key=value into m. Returns the replaced value or NULL if not found.
32 | void* PtrMapSet(PtrMap*, const void* key, void* value);
33 | 
34 | // PtrMapDel removes value for key. Returns the removed value or NULL if not found.
35 | void* PtrMapDel(PtrMap*, const void* key);
36 | 
37 | // PtrMapClear removes all entries. In contrast to PtrMapFree, map remains valid.
38 | void PtrMapClear(PtrMap*);
39 | 
40 | // Iterator function type. Set stop=true to stop iteration.
41 | typedef void(PtrMapIterator)(const void* key, void* value, bool* stop, void* userdata);
42 | 
43 | // PtrMapIter iterates over entries of the map.
44 | void PtrMapIter(const PtrMap*, PtrMapIterator*, void* userdata);
45 | 
46 | 


--------------------------------------------------------------------------------
/src/common/str.c:
--------------------------------------------------------------------------------
 1 | #include "str.h"
 2 | #include "memory.h"
 3 | #include "test.h"
 4 | 
 5 | // bytesrepr and strrepr returns a printable representation of an sds string (sds, Sym, etc.)
 6 | // using sdscatrepr which encodes non-printable ASCII chars for safe printing.
 7 | ConstStr bytesrepr(const u8* s, size_t len) {
 8 |   return memgcsds(sdscatrepr(sdsnewcap(len + 2), (const char*)s, len));
 9 | }
10 | 
11 | bool strhasprefix(ConstStr s, const char* prefix) {
12 |   size_t plen = strlen(prefix);
13 |   return sdslen(s) < plen ? false : memcmp(s, prefix, plen) == 0;
14 | }
15 | 
16 | 
17 | W_UNIT_TEST(Str, {
18 |   assert(strcmp( strrepr(sdsnew("lolcat")), "\"lolcat\"" ) == 0);
19 |   assert(strcmp( strrepr(sdsnew("lol\"cat")), "\"lol\\\"cat\"" ) == 0);
20 |   assert(strcmp( strrepr(sdsnew("lol\ncat")), "\"lol\\ncat\"" ) == 0);
21 |   assert(strcmp( strrepr(sdsnew("lol\x01 cat")), "\"lol\\x01 cat\"" ) == 0);
22 | 
23 |   assert(strhasprefix(sdsnew("lolcat"), "lol") == true);
24 |   assert(strhasprefix(sdsnew("lol"),    "lol") == true);
25 |   assert(strhasprefix(sdsnew("lo"),     "lol") == false);
26 | })
27 | 


--------------------------------------------------------------------------------
/src/common/str.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "sds.h"
 3 | #include "defs.h"
 4 | 
 5 | #define Str      sds
 6 | #define ConstStr constsds
 7 | 
 8 | inline static Str strgrow(Str nonull s, size_t addlSize) {
 9 |   return sdsMakeRoomFor(s, align2(addlSize, 128));
10 | }
11 | 
12 | // true if s starts with C-string prefix
13 | bool strhasprefix(ConstStr nonull s, const char* nonull prefix);
14 | 
15 | // strrepr returns a printable representation of an sds string (sds, Sym, etc.)
16 | // using sdscatrepr which encodes non-printable ASCII chars for safe printing.
17 | // E.g. "foo\x00bar" if the string contains a zero byte.
18 | // Returns a garbage-collected string.
19 | ConstStr bytesrepr(const u8* s, size_t len);
20 | inline static ConstStr strrepr(ConstStr s) {
21 |   return bytesrepr((const u8*)s, sdslen(s));
22 | }
23 | 


--------------------------------------------------------------------------------
/src/common/test.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <string.h>
 3 | // #include <execinfo.h>
 4 | 
 5 | #include "defs.h"
 6 | #include "test.h"
 7 | 
 8 | static WTestMode _testMode = (WTestMode)-1;
 9 | 
10 | WTestMode getTestMode() {
11 |   if (_testMode == (WTestMode)-1) {
12 |     _testMode = WTestModeNone;
13 |     char* testmode = getenv("W_TEST_MODE");
14 |     if (testmode != NULL) {
15 |       if (strcmp(testmode, "on") == 0) {
16 |         _testMode = WTestModeOn;
17 |       } else if (strcmp(testmode, "exclusive") == 0) {
18 |         _testMode = WTestModeExclusive;
19 |       }
20 |     }
21 |   }
22 |   return _testMode;
23 | }
24 | 


--------------------------------------------------------------------------------
/src/common/test.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "assert.h"
 3 | //
 4 | // testing
 5 | //
 6 | // Preprocessor macros:
 7 | //   W_TEST_BUILD is defined for the "test" target product (but not for "debug".)
 8 | //   W_UNIT_TEST_ENABLED is defined for "test" and "debug" targets (since DEBUG is.)
 9 | //   W_UNIT_TEST(name, body) defines a unit test to be run before main()
10 | //
11 | 
12 | #if DEBUG
13 |   #define W_UNIT_TEST_ENABLED 1
14 |   #define W_UNIT_TEST(name, body) \
15 |     __attribute__((constructor)) static void unit_test_##name() { \
16 |       if (getTestMode() != WTestModeNone) {                       \
17 |       printf("TEST " #name " %s\n", __FILE__);                    \
18 |       body                                                        \
19 |       }                                                           \
20 |     }
21 | #else
22 |   #define W_UNIT_TEST(name, body)
23 |   #define W_UNIT_TEST_ENABLED 0
24 | #endif
25 | 
26 | typedef enum WTestMode {
27 |                       // W_TEST_MODE  Description
28 |   WTestModeNone = 0,  // ""           testing disabled
29 |   WTestModeOn,        // "on"         testing enabled
30 |   WTestModeExclusive, // "exclusive"  only test; don't run main function
31 | } WTestMode;
32 | 
33 | // getTestMode retrieves the effective WTestMode parsed from environment W_TEST_MODE
34 | WTestMode getTestMode();
35 | 


--------------------------------------------------------------------------------
/src/common/thread.c:
--------------------------------------------------------------------------------
 1 | #include "defs.h"
 2 | #include "thread.h"
 3 | 
 4 | #if defined(__STDC_NO_THREADS__) && __STDC_NO_THREADS__
 5 |   // pthread layer
 6 |   #include "thread_pthread.c.h"
 7 | #endif
 8 | 
 9 | 
10 | ThreadStatus ThreadStart(Thread* nonull t, thrd_start_t nonull fn, void* nullable arg) {
11 |   return (ThreadStatus)thrd_create(t, fn, arg);
12 | }
13 | 
14 | 
15 | int ThreadAwait(Thread t) {
16 |   int result = 0;
17 |   thrd_join(t, &result); // ignore ThreadStatus
18 |   return result;
19 | }
20 | 
21 | 
22 | Thread ThreadSpawn(thrd_start_t nonull fn, void* nullable arg) nonull_return {
23 |   Thread t;
24 |   if (ThreadStart(&t, fn, arg) != ThreadSuccess) {
25 |     return NULL;
26 |   }
27 |   return t;
28 | }
29 | 


--------------------------------------------------------------------------------
/src/common/thread.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #if defined(__STDC_NO_THREADS__) && __STDC_NO_THREADS__
 3 |   #include "thread_pthread.h"
 4 | #else
 5 |   #include <threads.h>
 6 | #endif
 7 | 
 8 | typedef enum ThreadStatus {
 9 |   ThreadSuccess  = thrd_success,
10 |   ThreadNomem    = thrd_nomem,
11 |   ThreadTimedout = thrd_timedout,
12 |   ThreadBusy     = thrd_busy,
13 |   ThreadError    = thrd_error,
14 | } ThreadStatus;
15 | 
16 | typedef thrd_t Thread;
17 | 
18 | ThreadStatus    ThreadStart(Thread* nonull t, thrd_start_t nonull fn, void* nullable arg);
19 | Thread nullable ThreadSpawn(thrd_start_t nonull fn, void* nullable arg); // null on error
20 | int             ThreadAwait(Thread t);
21 | 


--------------------------------------------------------------------------------
/src/common/thread_pthread.c.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | Author: John Tsiombikas <nuclear@member.fsf.org>
  3 | 
  4 | I place this piece of code in the public domain. Feel free to use as you see
  5 | fit.  I'd appreciate it if you keep my name at the top of the code somehwere,
  6 | but whatever.
  7 | 
  8 | Main project site: https://github.com/jtsiomb/c11threads
  9 | */
 10 | 
 11 | // note: assumes c11threads.h has been included
 12 | 
 13 | #include <time.h>
 14 | #include <errno.h>
 15 | #include <sched.h>	/* for sched_yield */
 16 | #include <sys/time.h>
 17 | 
 18 | #ifdef __APPLE__
 19 | /* Darwin doesn't implement timed mutexes currently */
 20 | #define C11THREADS_NO_TIMED_MUTEX
 21 | #endif
 22 | 
 23 | #ifdef C11THREADS_NO_TIMED_MUTEX
 24 | #define PTHREAD_MUTEX_TIMED_NP PTHREAD_MUTEX_NORMAL
 25 | #define C11THREADS_TIMEDLOCK_POLL_INTERVAL 5000000	/* 5 ms */
 26 | #endif
 27 | 
 28 | 
 29 | static inline int thrd_create(thrd_t *thr, thrd_start_t func, void *arg)
 30 | {
 31 | 	int res = pthread_create(thr, 0, (void*(*)(void*))func, arg);
 32 | 	if(res == 0) {
 33 | 		return thrd_success;
 34 | 	}
 35 | 	return res == ENOMEM ? thrd_nomem : thrd_error;
 36 | }
 37 | 
 38 | static inline void thrd_exit(int res)
 39 | {
 40 | 	pthread_exit((void*)(long)res);
 41 | }
 42 | 
 43 | static inline int thrd_join(thrd_t thr, int *res)
 44 | {
 45 | 	void *retval;
 46 | 
 47 | 	if(pthread_join(thr, &retval) != 0) {
 48 | 		return thrd_error;
 49 | 	}
 50 | 	if(res) {
 51 | 		*res = (int)(long)retval;
 52 | 	}
 53 | 	return thrd_success;
 54 | }
 55 | 
 56 | static inline int thrd_detach(thrd_t thr)
 57 | {
 58 | 	return pthread_detach(thr) == 0 ? thrd_success : thrd_error;
 59 | }
 60 | 
 61 | static inline thrd_t thrd_current(void)
 62 | {
 63 | 	return pthread_self();
 64 | }
 65 | 
 66 | static inline int thrd_equal(thrd_t a, thrd_t b)
 67 | {
 68 | 	return pthread_equal(a, b);
 69 | }
 70 | 
 71 | static inline int thrd_sleep(const struct timespec *ts_in, struct timespec *rem_out)
 72 | {
 73 | 	if(nanosleep(ts_in, rem_out) < 0) {
 74 | 		if(errno == EINTR) return -1;
 75 | 		return -2;
 76 | 	}
 77 | 	return 0;
 78 | }
 79 | 
 80 | static inline void thrd_yield(void)
 81 | {
 82 | 	sched_yield();
 83 | }
 84 | 
 85 | /* ---- mutexes ---- */
 86 | 
 87 | static inline int mtx_init(mtx_t *mtx, int type)
 88 | {
 89 | 	int res;
 90 | 	pthread_mutexattr_t attr;
 91 | 
 92 | 	pthread_mutexattr_init(&attr);
 93 | 
 94 | 	if(type & mtx_timed) {
 95 | 		pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_TIMED_NP);
 96 | 	}
 97 | 	if(type & mtx_recursive) {
 98 | 		pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
 99 | 	}
100 | 
101 | 	res = pthread_mutex_init(mtx, &attr) == 0 ? thrd_success : thrd_error;
102 | 	pthread_mutexattr_destroy(&attr);
103 | 	return res;
104 | }
105 | 
106 | static inline void mtx_destroy(mtx_t *mtx)
107 | {
108 | 	pthread_mutex_destroy(mtx);
109 | }
110 | 
111 | static inline int mtx_lock(mtx_t *mtx)
112 | {
113 | 	int res = pthread_mutex_lock(mtx);
114 | 	if(res == EDEADLK) {
115 | 		return thrd_busy;
116 | 	}
117 | 	return res == 0 ? thrd_success : thrd_error;
118 | }
119 | 
120 | static inline int mtx_trylock(mtx_t *mtx)
121 | {
122 | 	int res = pthread_mutex_trylock(mtx);
123 | 	if(res == EBUSY) {
124 | 		return thrd_busy;
125 | 	}
126 | 	return res == 0 ? thrd_success : thrd_error;
127 | }
128 | 
129 | static inline int mtx_timedlock(mtx_t *mtx, const struct timespec *ts)
130 | {
131 | 	int res;
132 | #ifdef C11THREADS_NO_TIMED_MUTEX
133 | 	/* fake a timedlock by polling trylock in a loop and waiting for a bit */
134 | 	struct timeval now;
135 | 	struct timespec sleeptime;
136 | 
137 | 	sleeptime.tv_sec = 0;
138 | 	sleeptime.tv_nsec = C11THREADS_TIMEDLOCK_POLL_INTERVAL;
139 | 
140 | 	while((res = pthread_mutex_trylock(mtx)) == EBUSY) {
141 | 		gettimeofday(&now, NULL);
142 | 
143 | 		if(now.tv_sec > ts->tv_sec || (now.tv_sec == ts->tv_sec &&
144 | 					(now.tv_usec * 1000) >= ts->tv_nsec)) {
145 | 			return thrd_timedout;
146 | 		}
147 | 
148 | 		nanosleep(&sleeptime, NULL);
149 | 	}
150 | #else
151 | 	if((res = pthread_mutex_timedlock(mtx, ts)) == ETIMEDOUT) {
152 | 		return thrd_timedout;
153 | 	}
154 | #endif
155 | 	return res == 0 ? thrd_success : thrd_error;
156 | }
157 | 
158 | static inline int mtx_unlock(mtx_t *mtx)
159 | {
160 | 	return pthread_mutex_unlock(mtx) == 0 ? thrd_success : thrd_error;
161 | }
162 | 
163 | /* ---- condition variables ---- */
164 | 
165 | static inline int cnd_init(cnd_t *cond)
166 | {
167 | 	return pthread_cond_init(cond, 0) == 0 ? thrd_success : thrd_error;
168 | }
169 | 
170 | static inline void cnd_destroy(cnd_t *cond)
171 | {
172 | 	pthread_cond_destroy(cond);
173 | }
174 | 
175 | static inline int cnd_signal(cnd_t *cond)
176 | {
177 | 	return pthread_cond_signal(cond) == 0 ? thrd_success : thrd_error;
178 | }
179 | 
180 | static inline int cnd_broadcast(cnd_t *cond)
181 | {
182 | 	return pthread_cond_broadcast(cond) == 0 ? thrd_success : thrd_error;
183 | }
184 | 
185 | static inline int cnd_wait(cnd_t *cond, mtx_t *mtx)
186 | {
187 | 	return pthread_cond_wait(cond, mtx) == 0 ? thrd_success : thrd_error;
188 | }
189 | 
190 | static inline int cnd_timedwait(cnd_t *cond, mtx_t *mtx, const struct timespec *ts)
191 | {
192 | 	int res;
193 | 
194 | 	if((res = pthread_cond_timedwait(cond, mtx, ts)) != 0) {
195 | 		return res == ETIMEDOUT ? thrd_timedout : thrd_error;
196 | 	}
197 | 	return thrd_success;
198 | }
199 | 
200 | /* ---- thread-specific data ---- */
201 | 
202 | static inline int tss_create(tss_t *key, tss_dtor_t dtor)
203 | {
204 | 	return pthread_key_create(key, dtor) == 0 ? thrd_success : thrd_error;
205 | }
206 | 
207 | static inline void tss_delete(tss_t key)
208 | {
209 | 	pthread_key_delete(key);
210 | }
211 | 
212 | static inline int tss_set(tss_t key, void *val)
213 | {
214 | 	return pthread_setspecific(key, val) == 0 ? thrd_success : thrd_error;
215 | }
216 | 
217 | static inline void *tss_get(tss_t key)
218 | {
219 | 	return pthread_getspecific(key);
220 | }
221 | 
222 | /* ---- misc ---- */
223 | 
224 | static inline void call_once(once_flag *flag, void (*func)(void))
225 | {
226 | 	pthread_once(flag, func);
227 | }
228 | 
229 | #if __STDC_VERSION__ < 201112L || defined(C11THREADS_NO_TIMED_MUTEX)
230 | /* TODO take base into account */
231 | inline int timespec_get(struct timespec *ts, int base)
232 | {
233 | 	struct timeval tv;
234 | 
235 | 	gettimeofday(&tv, 0);
236 | 
237 | 	ts->tv_sec = tv.tv_sec;
238 | 	ts->tv_nsec = tv.tv_usec * 1000;
239 | 	return base;
240 | }
241 | #endif	/* not C11 */
242 | 


--------------------------------------------------------------------------------
/src/common/thread_pthread.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <pthread.h>
 3 | 
 4 | #define ONCE_FLAG_INIT  PTHREAD_ONCE_INIT
 5 | 
 6 | typedef pthread_t       thrd_t;
 7 | typedef pthread_mutex_t mtx_t;
 8 | typedef pthread_cond_t  cnd_t;
 9 | typedef pthread_key_t   tss_t;
10 | typedef pthread_once_t  once_flag;
11 | 
12 | typedef int  (*thrd_start_t)(void*);
13 | typedef void (*tss_dtor_t)(void*);
14 | 
15 | enum {
16 |   mtx_plain     = 0,
17 |   mtx_recursive = 1,
18 |   mtx_timed     = 2,
19 | };
20 | 
21 | enum {
22 |   thrd_success,
23 |   thrd_timedout,
24 |   thrd_busy,
25 |   thrd_error,
26 |   thrd_nomem
27 | };
28 | 


--------------------------------------------------------------------------------
/src/common/tstyle.c:
--------------------------------------------------------------------------------
 1 | #include "tstyle.h"
 2 | #include <unistd.h>  // for isatty()
 3 | 
 4 | 
 5 | const char* TStyleTable[_TStyle_MAX] = {
 6 |   "\x1b[1m",  // TStyle_bold         // : sfn('1',  '1', '22'),
 7 |   "\x1b[3m",  // TStyle_italic       // : sfn('3',  '3', '23'),
 8 |   "\x1b[4m",  // TStyle_underline    // : sfn('4',  '4', '24'),
 9 |   "\x1b[7m",  // TStyle_inverse      // : sfn('7',  '7', '27'),
10 |   "\x1b[37m", // TStyle_white        // : sfn('37', '38;2;255;255;255', '39'),
11 |   "\x1b[90m", // TStyle_grey         // : sfn('90', '38;5;244', '39'),
12 |   "\x1b[30m", // TStyle_black        // : sfn('30', '38;5;16', '39'),
13 |   "\x1b[94m", // TStyle_blue         // : sfn('34', '38;5;75', '39'),
14 |   "\x1b[96m", // TStyle_cyan         // : sfn('36', '38;5;87', '39'),
15 |   "\x1b[92m", // TStyle_green        // : sfn('32', '38;5;84', '39'),
16 |   "\x1b[95m", // TStyle_magenta      // : sfn('35', '38;5;213', '39'),
17 |   "\x1b[35m", // TStyle_purple       // : sfn('35', '38;5;141', '39'),
18 |   "\x1b[35m", // TStyle_pink         // : sfn('35', '38;5;211', '39'),
19 |   "\x1b[91m", // TStyle_red          // : sfn('31', '38;2;255;110;80', '39'),
20 |   "\x1b[33m", // TStyle_yellow       // : sfn('33', '38;5;227', '39'),
21 |   "\x1b[93m", // TStyle_lightyellow  // : sfn('93', '38;5;229', '39'),
22 |   "\x1b[33m", // TStyle_orange       // : sfn('33', '38;5;215', '39'),
23 | };
24 | 
25 | const char* TStyle_none = "\x1b[0m";
26 | const char* TStyle_noColor = "\x1b[39m";
27 | 
28 | 
29 | static int _TSTyleStdoutIsTTY = -1;
30 | static int _TSTyleStderrIsTTY = -1;
31 | 
32 | // STDIN  = 0
33 | // STDOUT = 1
34 | // STDERR = 2
35 | 
36 | bool TSTyleStdoutIsTTY() {
37 |   if (_TSTyleStdoutIsTTY == -1) {
38 |     _TSTyleStdoutIsTTY = isatty(1) ? 1 : 0;
39 |   }
40 |   return !!_TSTyleStdoutIsTTY;
41 | }
42 | 
43 | bool TSTyleStderrIsTTY() {
44 |   if (_TSTyleStderrIsTTY == -1) {
45 |     _TSTyleStderrIsTTY = isatty(1) ? 1 : 0;
46 |   }
47 |   return !!_TSTyleStderrIsTTY;
48 | }
49 | 


--------------------------------------------------------------------------------
/src/common/tstyle.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "defs.h"
 3 | #include "str.h"
 4 | 
 5 | typedef enum {
 6 |   TStyle_bold,         // : sfn('1', '1', '22'),
 7 |   TStyle_italic,       // : sfn('3', '3', '23'),
 8 |   TStyle_underline,    // : sfn('4', '4', '24'),
 9 |   TStyle_inverse,      // : sfn('7', '7', '27'),
10 |   TStyle_white,        // : sfn('37', '38;2;255;255;255', '39'),
11 |   TStyle_grey,         // : sfn('90', '38;5;244', '39'),
12 |   TStyle_black,        // : sfn('30', '38;5;16', '39'),
13 |   TStyle_blue,         // : sfn('34', '38;5;75', '39'),
14 |   TStyle_cyan,         // : sfn('36', '38;5;87', '39'),
15 |   TStyle_green,        // : sfn('32', '38;5;84', '39'),
16 |   TStyle_magenta,      // : sfn('35', '38;5;213', '39'),
17 |   TStyle_purple,       // : sfn('35', '38;5;141', '39'),
18 |   TStyle_pink,         // : sfn('35', '38;5;211', '39'),
19 |   TStyle_red,          // : sfn('31', '38;2;255;110;80', '39'),
20 |   TStyle_yellow,       // : sfn('33', '38;5;227', '39'),
21 |   TStyle_lightyellow,  // : sfn('93', '38;5;229', '39'),
22 |   TStyle_orange,       // : sfn('33', '38;5;215', '39'),
23 |   _TStyle_MAX,
24 | } TStyle;
25 | 
26 | const char* TStyleTable[_TStyle_MAX];
27 | const char* TStyle_none;
28 | const char* TStyle_noColor;
29 | 
30 | static inline Str TStyleBold(Str s) { return sdscat(s, TStyleTable[TStyle_bold]); }
31 | static inline Str TStyleItalic(Str s) { return sdscat(s, TStyleTable[TStyle_italic]); }
32 | static inline Str TStyleUnderline(Str s) { return sdscat(s, TStyleTable[TStyle_underline]); }
33 | static inline Str TStyleInverse(Str s) { return sdscat(s, TStyleTable[TStyle_inverse]); }
34 | static inline Str TStyleWhite(Str s) { return sdscat(s, TStyleTable[TStyle_white]); }
35 | static inline Str TStyleGrey(Str s) { return sdscat(s, TStyleTable[TStyle_grey]); }
36 | static inline Str TStyleBlack(Str s) { return sdscat(s, TStyleTable[TStyle_black]); }
37 | static inline Str TStyleBlue(Str s) { return sdscat(s, TStyleTable[TStyle_blue]); }
38 | static inline Str TStyleCyan(Str s) { return sdscat(s, TStyleTable[TStyle_cyan]); }
39 | static inline Str TStyleGreen(Str s) { return sdscat(s, TStyleTable[TStyle_green]); }
40 | static inline Str TStyleMagenta(Str s) { return sdscat(s, TStyleTable[TStyle_magenta]); }
41 | static inline Str TStylePurple(Str s) { return sdscat(s, TStyleTable[TStyle_purple]); }
42 | static inline Str TStylePink(Str s) { return sdscat(s, TStyleTable[TStyle_pink]); }
43 | static inline Str TStyleRed(Str s) { return sdscat(s, TStyleTable[TStyle_red]); }
44 | static inline Str TStyleYellow(Str s) { return sdscat(s, TStyleTable[TStyle_yellow]); }
45 | static inline Str TStyleLightyellow(Str s) { return sdscat(s, TStyleTable[TStyle_lightyellow]); }
46 | static inline Str TStyleOrange(Str s) { return sdscat(s, TStyleTable[TStyle_orange]); }
47 | 
48 | static inline Str TStyleNone(Str s) { return sdscat(s, TStyle_none); }
49 | static inline Str TStyleNoColor(Str s) { return sdscat(s, TStyle_noColor); }
50 | 
51 | bool TSTyleStdoutIsTTY();
52 | bool TSTyleStderrIsTTY();
53 | 


--------------------------------------------------------------------------------
/src/common/unicode.c:
--------------------------------------------------------------------------------
 1 | #include "unicode.h"
 2 | 
 3 | Rune utf8decode(const u8* buf, size_t len, u32* out_width) {
 4 |   u8 b = *buf;
 5 |   if (b < RuneSelf) {
 6 |     *out_width = 1;
 7 |     return b;
 8 |   }
 9 |   if ((b >> 5) == 0x6) {
10 |     *out_width = 2;
11 |     return len < 2 ? RuneErr
12 |                    : ((b << 6) & 0x7ff) +
13 |                      ((buf[1]) & 0x3f);
14 |   } else if ((b >> 4) == 0xE) {
15 |     *out_width = 3;
16 |     return len < 3 ? RuneErr
17 |                   : ((b << 12) & 0xffff) +
18 |                     ((buf[1] << 6) & 0xfff) +
19 |                     ((buf[2]) & 0x3f);
20 |   } else if ((b >> 3) == 0x1E) {
21 |     *out_width = 4;
22 |     return len < 4 ? RuneErr
23 |                    : ((b << 18) & 0x1fffff) +
24 |                      ((buf[1] << 12) & 0x3ffff) +
25 |                      ((buf[2] << 6) & 0xfff) +
26 |                      ((buf[3]) & 0x3f);
27 |   }
28 |   *out_width = 1;
29 |   return RuneErr;
30 | }
31 | 


--------------------------------------------------------------------------------
/src/common/unicode.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "defs.h"
 4 | 
 5 | typedef i32 Rune;
 6 | 
 7 | static const Rune RuneErr  = 0xFFFD; // Unicode replacement character
 8 | static const Rune RuneSelf = 0x80;
 9 |   // characters below RuneSelf are represented as themselves in a single byte.
10 | static const u32 UTF8Max = 4; // Maximum number of bytes of a UTF8-encoded char.
11 | 
12 | Rune utf8decode(const u8* buf, size_t len, u32* out_width);
13 | 


--------------------------------------------------------------------------------
/src/convlit.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "common/defs.h"
 3 | #include "build/build.h"
 4 | 
 5 | // convlit converts an expression to type t.
 6 | // If n is already of type t, n is simply returned.
 7 | // CCtx is used for error reporting.
 8 | 
 9 | typedef struct Node Node;
10 | 
11 | // For explicit conversions, which allows a greater range of conversions.
12 | static Node* ConvlitExplicit(CCtx* cc, Node* n, Node* t);
13 | 
14 | // For implicit conversions (e.g. operands)
15 | static Node* ConvlitImplicit(CCtx* cc, Node* n, Node* t);
16 | 
17 | 
18 | Node* convlit(CCtx* cc, Node* n, Node* t, bool explicit);
19 | inline static Node* ConvlitExplicit(CCtx* cc, Node* n, Node* t) {
20 |   return convlit(cc, n, t, /*explicit*/ true);
21 | }
22 | inline static Node* ConvlitImplicit(CCtx* cc, Node* n, Node* t) {
23 |   return convlit(cc, n, t, /*explicit*/ false);
24 | }
25 | 


--------------------------------------------------------------------------------
/src/ir/block.c:
--------------------------------------------------------------------------------
  1 | #include "ir.h"
  2 | 
  3 | 
  4 | IRBlock* IRBlockNew(IRFun* f, IRBlockKind kind, const SrcPos* pos/*?*/) {
  5 |   assert(f->bid < 0xFFFFFFFF); // too many block IDs generated
  6 |   auto b = memalloct(f->mem, IRBlock);
  7 |   b->f = f;
  8 |   b->id = f->bid++;
  9 |   b->kind = kind;
 10 |   if (pos != NULL) {
 11 |     b->pos = *pos;
 12 |   }
 13 |   ArrayInitWithStorage(&b->values, b->valuesStorage, sizeof(b->valuesStorage)/sizeof(void*));
 14 |   ArrayPush(&f->blocks, b, b->f->mem);
 15 |   return b;
 16 | }
 17 | 
 18 | 
 19 | void IRBlockDiscard(IRBlock* b) {
 20 |   assert(b->f != NULL);
 21 |   auto blocks = &b->f->blocks;
 22 | 
 23 |   #if DEBUG
 24 |   // make sure no other block refers to this block
 25 |   for (int i = 0; i < blocks->len; i++) {
 26 |     auto b2 = (IRBlock*)blocks->v[i];
 27 |     if (b2 == b) {
 28 |       continue;
 29 |     }
 30 |     assertf(b2->preds[0] != b, "b%u holds a reference to b%u (preds[0])", b2->id, b->id);
 31 |     assertf(b2->preds[1] != b, "b%u holds a reference to b%u (preds[1])", b2->id, b->id);
 32 |     assertf(b2->succs[0] != b, "b%u holds a reference to b%u (succs[0])", b2->id, b->id);
 33 |     assertf(b2->succs[1] != b, "b%u holds a reference to b%u (succs[1])", b2->id, b->id);
 34 |   }
 35 |   #endif
 36 | 
 37 |   if (blocks->v[blocks->len - 1] == b) {
 38 |     blocks->len--;
 39 |   } else {
 40 |     auto i = ArrayIndexOf(blocks, b);
 41 |     assert(i > -1);
 42 |     ArrayRemove(blocks, i, 1);
 43 |   }
 44 |   memfree(b->f->mem, b);
 45 | }
 46 | 
 47 | 
 48 | void IRBlockAddValue(IRBlock* b, IRValue* v) {
 49 |   ArrayPush(&b->values, v, b->f->mem);
 50 | }
 51 | 
 52 | void IRBlockSetControl(IRBlock* b, IRValue* v) {
 53 |   if (b->control) {
 54 |     b->control->uses--;
 55 |   }
 56 |   b->control = v;
 57 |   if (v) {
 58 |     v->uses++;
 59 |   }
 60 | }
 61 | 
 62 | 
 63 | static void IRBlockAddPred(IRBlock* b, IRBlock* pred) {
 64 |   assert(!b->sealed); // cannot modify preds after block is sealed
 65 |   // pick first available hole in fixed-size array:
 66 |   for (u32 i = 0; i < countof(b->preds); i++) {
 67 |     if (b->preds[i] == NULL) {
 68 |       b->preds[i] = pred;
 69 |       return;
 70 |     }
 71 |   }
 72 |   assert(0 && "trying to add more than countof(IRBlock.preds) blocks");
 73 | }
 74 | 
 75 | static void IRBlockAddSucc(IRBlock* b, IRBlock* succ) {
 76 |   // pick first available hole in fixed-size array:
 77 |   for (u32 i = 0; i < countof(b->succs); i++) {
 78 |     if (b->succs[i] == NULL) {
 79 |       b->succs[i] = succ;
 80 |       return;
 81 |     }
 82 |   }
 83 |   assert(0 && "trying to add more than countof(IRBlock.succs) blocks");
 84 | }
 85 | 
 86 | void IRBlockAddEdgeTo(IRBlock* b1, IRBlock* b2) {
 87 |   assert(!b1->sealed); // cannot modify preds after block is sealed
 88 |   IRBlockAddSucc(b1, b2); // b1 -> b2
 89 |   IRBlockAddPred(b2, b1); // b2 <- b1
 90 |   assert(b1->f != NULL);
 91 |   assert(b1->f == b2->f); // blocks must be part of the same function
 92 |   IRFunInvalidateCFG(b1->f);
 93 | }
 94 | 
 95 | 
 96 | void IRBlockSetPred(IRBlock* b, u32 index, IRBlock* pred) {
 97 |   assert(!b->sealed);
 98 |   assert(index < countof(b->preds));
 99 |   b->preds[index] = pred;
100 |   assert(b->f != NULL);
101 |   IRFunInvalidateCFG(b->f);
102 | }
103 | 
104 | void IRBlockDelPred(IRBlock* b, u32 index) {
105 |   assert(!b->sealed);
106 |   assert(index < countof(b->preds));
107 |   if (b->preds[index] != NULL) {
108 |     b->preds[index] = NULL;
109 |     assert(b->f != NULL);
110 |     IRFunInvalidateCFG(b->f);
111 |   }
112 | }
113 | 
114 | 
115 | void IRBlockSetSucc(IRBlock* b, u32 index, IRBlock* succ) {
116 |   assert(index < countof(b->succs));
117 |   b->succs[index] = succ;
118 |   assert(b->f != NULL);
119 |   IRFunInvalidateCFG(b->f);
120 | }
121 | 
122 | void IRBlockDelSucc(IRBlock* b, u32 index) {
123 |   assert(index < countof(b->succs));
124 |   if (b->succs[index] != NULL) {
125 |     b->succs[index] = NULL;
126 |     assert(b->f != NULL);
127 |     IRFunInvalidateCFG(b->f);
128 |   }
129 | }
130 | 
131 | 


--------------------------------------------------------------------------------
/src/ir/builder.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "ir.h"
 3 | #include "../common/array.h"
 4 | #include "../common/ptrmap.h"
 5 | #include "../build/build.h"
 6 | 
 7 | 
 8 | typedef enum IRBuilderFlags {
 9 |   IRBuilderDefault  = 0,
10 |   IRBuilderComments = 1 << 1,  // include comments in some values, for formatting
11 |   IRBuilderOpt      = 1 << 2,  // apply construction-pass [optimization]s
12 | } IRBuilderFlags;
13 | 
14 | 
15 | typedef struct IRBuilder {
16 |   Memory         mem;  // houses all IR data constructed by this builder
17 |   PtrMap         funs; // Node* => IRFun* -- generated functions
18 |   IRBuilderFlags flags;
19 |   IRPkg*         pkg;
20 | 
21 |   // state used during building
22 |   const CCtx* cc; // current source context (source-file specific)
23 |   IRBlock* b;     // current block
24 |   IRFun*   f;     // current function
25 | 
26 |   SymMap* vars; // Sym => IRValue*
27 |     // variable assignments in the current block (map from variable symbol to ssa value)
28 |     // this PtrMap is moved into defvars when a block ends (internal call to endBlock.)
29 | 
30 |   Array defvars; void* defvarsStorage[512]; // PtrMap*[]  (from vars)
31 |     // all defined variables at the end of each block. Indexed by block id.
32 |     // null indicates there are no variables in that block.
33 | 
34 |   // incompletePhis :Map<Block,Map<ByteStr,Value>>|null
35 |     // tracks pending, incomplete phis that are completed by sealBlock for
36 |     // blocks that are sealed after they have started. This happens when preds
37 |     // are not known at the time a block starts, but is known and registered
38 |     // before the block ends.
39 | 
40 | } IRBuilder;
41 | 
42 | // start a new IRPkg.
43 | // b must be zeroed memory or a reused builder.
44 | void IRBuilderInit(IRBuilder* b, IRBuilderFlags flags, const char* pkgname/*null*/);
45 | void IRBuilderFree(IRBuilder* b);
46 | 
47 | // add ast to top-level of the current IRPkg. Returns false if any errors occured.
48 | bool IRBuilderAdd(IRBuilder* b, const CCtx* cc, Node* ast);
49 | 


--------------------------------------------------------------------------------
/src/ir/fun.c:
--------------------------------------------------------------------------------
 1 | #include "ir.h"
 2 | 
 3 | 
 4 | IRFun* IRFunNew(Memory mem, Node* n) {
 5 |   assert(n->type != NULL);
 6 |   assert(n->type->kind == NFunType);
 7 |   auto f = (IRFun*)memalloc(mem, sizeof(IRFun));
 8 |   f->mem = mem;
 9 |   ArrayInitWithStorage(&f->blocks, f->blocksStorage, sizeof(f->blocksStorage)/sizeof(void*));
10 |   f->typeid = n->type->t.id;
11 |   f->name = n->fun.name; // may be NULL
12 |   f->pos = n->pos; // copy
13 |   auto params = n->type->fun.params;
14 |   f->nargs = params == NULL ? 0 : params->kind == NTuple ? params->array.a.len : 1;
15 |   return f;
16 | }
17 | 
18 | 
19 | static IRValue* getConst64(IRFun* f, TypeCode t, u64 value) {
20 | 
21 |   // TODO: simplify const cache to just hold int32 and int64 since we can store all
22 |   // values in these.
23 | 
24 |   // dlog("getConst64 t=%s value=%llX", TypeCodeName(t), value);
25 |   int addHint = 0;
26 |   auto v = IRConstCacheGet(f->consts, f->mem, t, value, &addHint);
27 |   if (v == NULL) {
28 |     auto op = IROpConstFromAST(t);
29 |     assert(IROpInfo(op)->aux != IRAuxNone);
30 |     // Create const operation and add it to the entry block of function f
31 |     v = IRValueNew(f, f->blocks.v[0], op, t, /*SrcPos*/NULL);
32 |     v->auxInt = value;
33 |     f->consts = IRConstCacheAdd(f->consts, f->mem, t, value, v, addHint);
34 |     // dlog("getConst64 add new const op=%s value=%llX => v%u", IROpNames[op], value, v->id);
35 |   } else {
36 |     // dlog("getConst64 use cached const op=%s value=%llX => v%u", IROpNames[v->op], value, v->id);
37 |   }
38 |   return v;
39 | }
40 | 
41 | // returns a constant IRValue representing n for type t
42 | IRValue* IRFunGetConstBool(IRFun* f, bool value) {
43 |   // TODO: as there are just two values; avoid using the const cache.
44 |   return getConst64(f, TypeCode_bool, value ? 1 : 0);
45 | }
46 | 
47 | // returns a constant IRValue representing n for type t
48 | IRValue* IRFunGetConstInt(IRFun* f, TypeCode t, u64 value) {
49 |   assert(TypeCodeIsInt(t));
50 |   return getConst64(f, t, value);
51 | }
52 | 
53 | IRValue* IRFunGetConstFloat(IRFun* f, TypeCode t, double value) {
54 |   assert(TypeCodeIsFloat(t));
55 |   // reintrepret bits (double is IEEE 754 in C11)
56 |   u64 ivalue = *(u64*)(&value);
57 |   return getConst64(f, t, ivalue);
58 | }
59 | 
60 | void IRFunMoveBlockToEnd(IRFun* f, u32 blockIndex) {
61 |   // moves block at index to end of f->blocks
62 |   assert(f->blocks.len > blockIndex);
63 |   if (f->blocks.len > blockIndex + 1) {
64 |     // not last
65 |     auto b = checknull(f->blocks.v[blockIndex]);
66 | 
67 |     // shift all blocks after blockIndex one step to the left
68 |     // e.g. given blockIndex=2:
69 |     //  0 1 2 3 4
70 |     // [a,b,c,d,e]
71 |     // [a,b,d,d,e]
72 |     // [a,b,d,e,e]
73 |     u32 end = f->blocks.len - 1;
74 |     u32 i = blockIndex;
75 |     for (; i < end; i++) {
76 |       f->blocks.v[i] = f->blocks.v[i + 1];
77 |     }
78 |     f->blocks.v[i] = b;
79 |   }
80 | }
81 | 
82 | void IRFunInvalidateCFG(IRFun* f) {
83 |   // TODO
84 |   // f->cachedPostorder = NULL;
85 |   // f->cachedLoopnest = NULL;
86 |   // f->cachedIdom = NULL;
87 |   // f->cachedSdom = NULL;
88 | }
89 | 


--------------------------------------------------------------------------------
/src/ir/ir.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "../common/defs.h"
  3 | #include "../common/memory.h"
  4 | #include "../common/array.h"
  5 | #include "../build/source.h"
  6 | #include "../parse/ast.h"
  7 | #include "../sym.h"
  8 | #include "op.h"
  9 | 
 10 | 
 11 | typedef enum IRBlockKind {
 12 |   IRBlockInvalid = 0,
 13 |   IRBlockCont,     // plain block with a single successor
 14 |   IRBlockFirst,    // 2 successors, always takes the first one (second is dead)
 15 |   IRBlockIf,       // 2 successors, if control goto succs[0] else goto succs[1]
 16 |   IRBlockRet,      // no successors, control value is memory result
 17 | } IRBlockKind;
 18 | 
 19 | typedef enum IRBranchPrediction {
 20 |   IRBranchUnlikely = -1,
 21 |   IRBranchUnknown  = 0,
 22 |   IRBranchLikely   = 1,
 23 | } IRBranchPrediction;
 24 | 
 25 | 
 26 | typedef struct IRPkg   IRPkg;
 27 | typedef struct IRFun   IRFun;
 28 | typedef struct IRBlock IRBlock;
 29 | typedef struct IRValue IRValue;
 30 | 
 31 | 
 32 | // Edge represents a CFG edge
 33 | typedef struct IREdge { int TODO; } IREdge;
 34 | 
 35 | 
 36 | // IRConstCache is used internally by IRFun (fun.c) and holds constants
 37 | typedef struct IRConstCache {
 38 |   u32   bmap;       // maps TypeCode => branch array index
 39 |   void* branches[]; // dense branch array
 40 | } IRConstCache;
 41 | 
 42 | 
 43 | typedef struct IRValue {
 44 |   u32      id;   // unique identifier
 45 |   IROp     op;   // operation that computes this value
 46 |   TypeCode type;
 47 |   SrcPos   pos;  // source position
 48 |   IRValue* args[3]; u8 argslen; // arguments
 49 |   union {
 50 |     i64 auxInt; // floats are stored as reinterpreted bits
 51 |   };
 52 |   u32 uses; // use count. Each appearance in args or IRBlock.control counts once.
 53 |   const char* comment; // short comment for IR formatting. Likely NULL. (memalloc)
 54 | } IRValue;
 55 | 
 56 | 
 57 | // Block represents a basic block
 58 | typedef struct IRBlock {
 59 |   IRFun*      f;        // owning function
 60 |   u32         id;       // block ID
 61 |   IRBlockKind kind;     // kind of block
 62 |   bool        sealed;   // true if no further predecessors will be added
 63 |   SrcPos      pos;      // source position
 64 |   const char* comment;  // short comment for IR formatting. May be NULL.
 65 |   IRBlock*    succs[2]; // Successor/subsequent blocks (CFG)
 66 |   IRBlock*    preds[2]; // Predecessors (CFG)
 67 | 
 68 |   // three-address code values
 69 |   Array values; void* valuesStorage[8]; // IRValue*[]
 70 | 
 71 |   // control is a value that determines how the block is exited.
 72 |   // Its value depends on the kind of the block. For instance, a IRBlockIf has a boolean
 73 |   // control value and IRBlockExit has a memory control value.
 74 |   IRValue* control;
 75 | 
 76 | } IRBlock;
 77 | 
 78 | 
 79 | // Fun represents a function
 80 | typedef struct IRFun {
 81 |   Memory   mem; // owning allocator
 82 |   Array    blocks; void* blocksStorage[4]; // IRBlock*[]
 83 |   Sym      name;   // may be NULL
 84 |   SrcPos   pos;    // source position
 85 |   u32      nargs;  // number of arguments
 86 |   Sym      typeid; // TypeCode encoding
 87 | 
 88 |   // internal; valid only during building
 89 |   u32    bid;    // block ID allocator
 90 |   u32    vid;    // value ID allocator
 91 |   IRConstCache* consts; // constants cache maps type+value => IRValue
 92 | } IRFun;
 93 | 
 94 | 
 95 | // Pkg represents a package with functions and data
 96 | typedef struct IRPkg {
 97 |   Memory      mem; // owning allocator
 98 |   const char* name; // c-string. "_" if NULL is passed for name to IRPkgNew. TODO use Sym?
 99 |   // TODO: Move the PtrMap funs from builder here. Need to make PtrMap use Memory.
100 |   Array funs; void* funsStorage[4]; // IRFun*[]
101 | } IRPkg;
102 | 
103 | 
104 | IRValue* IRValueNew(IRFun* f, IRBlock* b/*null*/, IROp op, TypeCode type, const SrcPos*/*null*/);
105 | void IRValueAddComment(IRValue* v, Memory, ConstStr comment);
106 | void IRValueAddArg(IRValue* v, IRValue* arg);
107 | 
108 | 
109 | IRBlock* IRBlockNew(IRFun* f, IRBlockKind, const SrcPos*/*nullable*/);
110 | void IRBlockDiscard(IRBlock* b); // removes it from b->f and frees memory of b.
111 | void IRBlockAddValue(IRBlock* b, IRValue* v);
112 | void IRBlockSetControl(IRBlock* b, IRValue* v/*pass null to clear*/);
113 | void IRBlockAddEdgeTo(IRBlock* b1, IRBlock* b2); // add an edge from b1 to successor block b2
114 | void IRBlockSetPred(IRBlock* b, u32 index, IRBlock* pred);
115 | void IRBlockDelPred(IRBlock* b, u32 index);
116 | void IRBlockSetSucc(IRBlock* b, u32 index, IRBlock* succ);
117 | void IRBlockDelSucc(IRBlock* b, u32 index);
118 | 
119 | 
120 | IRFun*   IRFunNew(Memory, Node* n);
121 | IRValue* IRFunGetConstBool(IRFun* f, bool value);
122 | IRValue* IRFunGetConstInt(IRFun* f, TypeCode t, u64 n);
123 | IRValue* IRFunGetConstFloat(IRFun* f, TypeCode t, double n);
124 | void     IRFunInvalidateCFG(IRFun*);
125 | void     IRFunMoveBlockToEnd(IRFun*, u32 blockIndex); // moves block at index to end of f->blocks
126 | 
127 | 
128 | IRPkg*   IRPkgNew(Memory, const char* name/*null*/);
129 | void     IRPkgAddFun(IRPkg* pkg, IRFun* f);
130 | 
131 | 
132 | Str IRReprPkgStr(const IRPkg* f, Str init/*null*/);
133 | 
134 | 
135 | // Note: Must use the same Memory for all calls to the same IRConstCache.
136 | // Note: addHint is only valid until the next call to a mutating function like Add.
137 | IRValue* IRConstCacheGet(
138 |   const IRConstCache* c, Memory, TypeCode t, u64 value, int* out_addHint);
139 | IRConstCache* IRConstCacheAdd(
140 |   IRConstCache* c, Memory, TypeCode t, u64 value, IRValue* v, int addHint);
141 | 


--------------------------------------------------------------------------------
/src/ir/pkg.c:
--------------------------------------------------------------------------------
 1 | #include "ir.h"
 2 | 
 3 | 
 4 | IRPkg* IRPkgNew(Memory mem, const char* name) {
 5 |   size_t namelen = name == NULL ? 0 : (strlen(name) + 1);
 6 |   auto pkg = (IRPkg*)memalloc(mem, sizeof(IRPkg) + namelen);
 7 | 
 8 |   pkg->mem = mem;
 9 | 
10 |   ArrayInitWithStorage(&pkg->funs, pkg->funsStorage, sizeof(pkg->funsStorage)/sizeof(void*));
11 | 
12 |   if (name == NULL) {
13 |     pkg->name = "_";
14 |   } else {
15 |     char* name2 = ((char*)pkg) + namelen;
16 |     memcpy(name2, name, namelen);
17 |     name2[namelen] = 0;
18 |     pkg->name = name2;
19 |   }
20 | 
21 |   return pkg;
22 | }
23 | 
24 | 
25 | void IRPkgAddFun(IRPkg* pkg, IRFun* f) {
26 |   ArrayPush(&pkg->funs, f, pkg->mem);
27 | }
28 | 


--------------------------------------------------------------------------------
/src/ir/repr.c:
--------------------------------------------------------------------------------
  1 | #include "ir.h"
  2 | 
  3 | typedef struct {
  4 |   Str  buf;
  5 |   bool includeTypes;
  6 | } IRRepr;
  7 | 
  8 | 
  9 | 
 10 | static void reprValue(IRRepr* r, const IRValue* v) {
 11 |   assert(v->op < Op_MAX);
 12 | 
 13 |   // vN type = Op
 14 |   r->buf = sdscatprintf(r->buf,
 15 |     "    v%-2u %-7s = %-*s",
 16 |     v->id,
 17 |     TypeCodeName(v->type),
 18 |     IROpNamesMaxLen,
 19 |     IROpNames[v->op]
 20 |   );
 21 | 
 22 |   // arg arg
 23 |   for (u8 i = 0; i < v->argslen; i++) {
 24 |     r->buf = sdscatprintf(r->buf, i+1 < v->argslen ? " v%-2u " : " v%u", v->args[i]->id);
 25 |   }
 26 | 
 27 |   // [auxInt]
 28 |   auto opinfo = IROpInfo(v->op);
 29 |   switch (opinfo->aux) {
 30 |     case IRAuxNone:
 31 |       break;
 32 |     case IRAuxBool:
 33 |     case IRAuxI8:
 34 |     case IRAuxI16:
 35 |     case IRAuxI32:
 36 |       r->buf = sdscatprintf(r->buf, " [0x%X]", (u32)v->auxInt);
 37 |       break;
 38 |     case IRAuxF32:
 39 |       r->buf = sdscatprintf(r->buf, " [%f]", *(f32*)(&v->auxInt));
 40 |       break;
 41 |     case IRAuxI64:
 42 |       r->buf = sdscatprintf(r->buf, " [0x%llX]", v->auxInt);
 43 |       break;
 44 |     case IRAuxF64:
 45 |       r->buf = sdscatprintf(r->buf, " [%f]", *(f64*)(&v->auxInt));
 46 |       break;
 47 |   }
 48 | 
 49 |   // {aux}
 50 |   // TODO non-numeric aux
 51 | 
 52 |   // comment
 53 |   if (v->comment != NULL) {
 54 |     r->buf = sdscatfmt(r->buf, "\t# %u use ; %s", v->uses, v->comment);
 55 |   } else {
 56 |     r->buf = sdscatfmt(r->buf, "\t# %u use", v->uses);
 57 |   }
 58 | 
 59 |   r->buf = sdscatlen(r->buf, "\n", 1);
 60 | }
 61 | 
 62 | 
 63 | 
 64 | static void reprBlock(IRRepr* r, const IRBlock* b) {
 65 |   // start of block header
 66 |   r->buf = sdscatfmt(r->buf, "  b%u:", b->id);
 67 | 
 68 |   // predecessors
 69 |   if (b->preds[0] != NULL) {
 70 |     if (b->preds[1] != NULL) {
 71 |       r->buf = sdscatfmt(r->buf, " <- b%u b%u", b->preds[0]->id, b->preds[1]->id);
 72 |     } else {
 73 |       r->buf = sdscatfmt(r->buf, " <- b%u", b->preds[0]->id);
 74 |     }
 75 |   } else {
 76 |     assertf(b->preds[1] == NULL, "preds are not dense");
 77 |   }
 78 | 
 79 |   // end block header
 80 |   if (b->comment != NULL) {
 81 |     r->buf = sdscatfmt(r->buf, "\t # %s", b->comment);
 82 |   }
 83 |   r->buf = sdscatc(r->buf, '\n');
 84 | 
 85 |   // values
 86 |   ArrayForEach(&b->values, IRValue, v) {
 87 |     reprValue(r, v);
 88 |   }
 89 | 
 90 |   // successors
 91 |   switch (b->kind) {
 92 |   case IRBlockInvalid:
 93 |     r->buf = sdscat(r->buf, "  ?\n");
 94 |     break;
 95 | 
 96 |   case IRBlockCont: {
 97 |     auto contb = b->succs[0];
 98 |     if (contb != NULL) {
 99 |       r->buf = sdscatfmt(r->buf, "  cont -> b%u\n", contb->id);
100 |     } else {
101 |       r->buf = sdscatfmt(r->buf, "  cont -> ?\n");
102 |     }
103 |     break;
104 |   }
105 | 
106 |   case IRBlockFirst:
107 |   case IRBlockIf: {
108 |     auto thenb = b->succs[0];
109 |     auto elseb = b->succs[1];
110 |     assert(thenb != NULL && elseb != NULL);
111 |     assertf(b->control != NULL, "missing control value");
112 |     r->buf = sdscatfmt(r->buf,
113 |       "  %s v%u -> b%u b%u\n",
114 |       b->kind == IRBlockIf ? "if" : "first",
115 |       b->control->id,
116 |       thenb->id,
117 |       elseb->id
118 |     );
119 |     break;
120 |   }
121 | 
122 |   case IRBlockRet:
123 |     assert(b->control != NULL);
124 |     r->buf = sdscatfmt(r->buf, "  ret v%u\n", b->control->id);
125 |     break;
126 | 
127 |   }
128 | 
129 |   r->buf = sdscatc(r->buf, '\n');
130 | }
131 | 
132 | 
133 | static void reprFun(IRRepr* r, const IRFun* f) {
134 |   r->buf = sdscatprintf(r->buf,
135 |     "fun %s %s %p\n",
136 |     f->name == NULL ? "_" : f->name,
137 |     f->typeid == NULL ? "()" : f->typeid,
138 |     f
139 |   );
140 |   ArrayForEach(&f->blocks, IRBlock, b) {
141 |     reprBlock(r, b);
142 |   }
143 | }
144 | 
145 | 
146 | static void reprPkg(IRRepr* r, const IRPkg* pkg) {
147 |   r->buf = sdscatfmt(r->buf, "package %s\n", pkg->name);
148 |   ArrayForEach(&pkg->funs, IRFun, f) {
149 |     reprFun(r, f);
150 |   }
151 | }
152 | 
153 | 
154 | Str IRReprPkgStr(const IRPkg* pkg, Str init) {
155 |   IRRepr r = { .buf=init, .includeTypes=true };
156 |   if (r.buf == NULL) {
157 |     r.buf = sdsempty();
158 |   }
159 |   reprPkg(&r, pkg);
160 |   return r.buf;
161 | }
162 | 


--------------------------------------------------------------------------------
/src/ir/value.c:
--------------------------------------------------------------------------------
 1 | #include "ir.h"
 2 | 
 3 | 
 4 | IRValue* IRValueNew(IRFun* f, IRBlock* b, IROp op, TypeCode type, const SrcPos* pos) {
 5 |   assert(f->vid < 0xFFFFFFFF); // too many block IDs generated
 6 |   auto v = (IRValue*)memalloc(f->mem, sizeof(IRValue));
 7 |   v->id = f->vid++;
 8 |   v->op = op;
 9 |   v->type = type;
10 |   if (pos != NULL) {
11 |     v->pos = *pos;
12 |   }
13 |   if (b != NULL) {
14 |     ArrayPush(&b->values, v, b->f->mem);
15 |   } else {
16 |     dlog("WARN IRValueNew b=NULL");
17 |   }
18 |   return v;
19 | }
20 | 
21 | void IRValueAddComment(IRValue* v, Memory mem, ConstStr comment) {
22 |   if (comment != NULL) { // allow passing NULL to do nothing
23 |     auto commentLen = sdslen(comment);
24 |     if (commentLen > 0) {
25 |       if (v->comment == NULL) {
26 |         v->comment = memallocCStr(mem, comment, commentLen);
27 |       } else {
28 |         v->comment = memallocCStrConcat(mem, v->comment, "; ", comment, NULL);
29 |       }
30 |     }
31 |   }
32 | }
33 | 
34 | void IRValueAddArg(IRValue* v, IRValue* arg) {
35 |   assert(v->argslen < countof(v->args));
36 |   v->args[v->argslen++] = arg;
37 |   arg->uses ++;
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main.c:
--------------------------------------------------------------------------------
  1 | #include "build/build.h"
  2 | #include "parse/parse.h"
  3 | #include "ir/builder.h"
  4 | #include "common/os.h"
  5 | #include "common/test.h"
  6 | 
  7 | static void errorHandler(const Source* src, SrcPos pos, ConstStr msg, void* userdata) {
  8 |   u32* errcount = (u32*)userdata;
  9 |   (*errcount)++;
 10 |   auto s = SrcPosMsg(sdsempty(), pos, msg);
 11 |   s[sdslen(s)-1] = '\n'; // repurpose NUL
 12 |   fwrite(s, sdslen(s), 1, stderr);
 13 |   sdsfree(s);
 14 | }
 15 | 
 16 | 
 17 | static void printAst(const Node* n) {
 18 |   auto s = NodeRepr(n, sdsempty());
 19 |   s = sdscatlen(s, "\n", 1);
 20 |   fwrite(s, sdslen(s), 1, stdout);
 21 |   sdsfree(s);
 22 | }
 23 | 
 24 | 
 25 | static void printIR(const IRPkg* pkg) {
 26 |   auto s = IRReprPkgStr(pkg, sdsempty());
 27 |   s = sdscatlen(s, "\n", 1);
 28 |   fwrite(s, sdslen(s), 1, stdout);
 29 |   sdsfree(s);
 30 | }
 31 | 
 32 | 
 33 | void parsefile(Str filename, Scope* pkgscope) {
 34 | 
 35 |   // load file contents
 36 |   size_t len = 0;
 37 |   auto buf = os_readfile(filename, &len, NULL);
 38 |   if (!buf) {
 39 |     die("%s: %s", filename, strerror(errno));
 40 |   }
 41 | 
 42 |   // our userdata is number of errors encountered (incremented by errorHandler)
 43 |   u32 errcount = 0;
 44 | 
 45 |   // compilation context
 46 |   CCtx cc = {0}; // TODO: share across individual, non-overlapping compile sessions
 47 |   CCtxInit(&cc, errorHandler, &errcount, filename, buf, len);
 48 | 
 49 |   printf("————————————————————————————————————————————————————————————————\n");
 50 |   printf("PARSE\n");
 51 |   // parse input
 52 |   static P parser; // shared parser (zero-initialized since it's static)
 53 |   auto file = Parse(&parser, &cc, ParseComments /*| ParseOpt*/, pkgscope);
 54 |   printAst(file);
 55 |   if (errcount != 0) { goto end; }
 56 | 
 57 |   // resolve symbols and types
 58 |   if (parser.unresolved == 0) {
 59 |     dlog("(no unresolved names; not running sym resolver)");
 60 |   } else {
 61 |     printf("————————————————————————————————————————————————————————————————\n");
 62 |     printf("RESOLVE NAMES\n");
 63 |     ResolveSym(&cc, parser.s.flags, file, pkgscope);
 64 |     printAst(file);
 65 |     if (errcount != 0) { goto end; }
 66 |   }
 67 | 
 68 |   printf("————————————————————————————————————————————————————————————————\n");
 69 |   printf("RESOLVE TYPES\n");
 70 |   ResolveType(&cc, file);
 71 |   printAst(file);
 72 |   if (errcount != 0) { goto end; }
 73 | 
 74 |   printf("————————————————————————————————————————————————————————————————\n");
 75 |   printf("BUILD IR\n");
 76 |   // build some IR
 77 |   IRBuilder irbuilder = {};
 78 |   IRBuilderInit(&irbuilder, IRBuilderComments /*| IRBuilderOpt*/, "foo"); // start a new package
 79 |   IRBuilderAdd(&irbuilder, &cc, file); // add ast to current package
 80 | 
 81 |   printf("————————————————————————————————————————————————————————————————\n");
 82 |   // print IR SLC
 83 |   printIR(irbuilder.pkg);
 84 |   IRBuilderFree(&irbuilder);
 85 | 
 86 |   // // assemble
 87 |   // AsmELF();
 88 | 
 89 |   end:
 90 |   CCtxFree(&cc);
 91 |   memgc_collect();
 92 | }
 93 | 
 94 | 
 95 | int main(int argc, char **argv) {
 96 |   if (getTestMode() == WTestModeExclusive) {
 97 |     return 0;
 98 |   }
 99 | 
100 |   if (argc < 2) {
101 |     fprintf(stderr, "usage: %s <input>...\n", argv[0]);
102 |     exit(1);
103 |   }
104 | 
105 |   // int out = 1; // stdout
106 |   // TODO: support -o <file> CLI flag.
107 |   // int out = open(argv[2], O_WRONLY | O_CREAT, 0660);
108 |   // if (out < 0) {
109 |   //   fprintf(stderr, "error opening output %s: %s\n", argv[2], strerror(errno));
110 |   //   exit(1);
111 |   // }
112 | 
113 |   auto pkgscope = ScopeNew(GetGlobalScope(), NULL);
114 |   parsefile(sdsnew(argv[1]), pkgscope);
115 | 
116 |   return 0;
117 | }
118 | 
119 | 


--------------------------------------------------------------------------------
/src/parse/parse.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "scan.h"
 3 | #include "../common/array.h"
 4 | #include "ast.h"
 5 | // #include "common/assert.h"
 6 | // #include "common/test.h"
 7 | // #include "common/memory.h"
 8 | // #include "common/str.h"
 9 | // #include "common/os.h"
10 | // #include "sym.h"
11 | 
12 | // parser
13 | typedef struct P {
14 |   S      s;          // scanner
15 |   u32    fnest;      // function nesting level (for error handling)
16 |   u32    unresolved; // number of unresolved identifiers
17 |   Scope* scope;      // current scope
18 |   CCtx*  cc;         // compilation context
19 | } P;
20 | Node* Parse(P*, CCtx*, ParseFlags, Scope* pkgscope);
21 | Node* NodeOptIfCond(Node* n); // TODO: move this and parser into a parse.h file
22 | 
23 | // Symbol resolver
24 | Node* ResolveSym(CCtx*, ParseFlags, Node*, Scope*);
25 | 
26 | // Type resolver
27 | void ResolveType(CCtx*, Node*);
28 | 


--------------------------------------------------------------------------------
/src/parse/parseint.c:
--------------------------------------------------------------------------------
 1 | #include "parseint.h"
 2 | #include <limits.h>
 3 | 
 4 | #define GEN_STRTO_X(BITS, MAXVAL) \
 5 | bool parseint##BITS(const char* pch, size_t size, int base, u##BITS* result) { \
 6 |   assert(base >= 2 && base <= 36);                                             \
 7 |   const char* s = pch;                                                         \
 8 |   const char* end = pch + size;                                                \
 9 |   u##BITS acc = 0;                                                             \
10 |   u##BITS cutoff = MAXVAL;                                                     \
11 |   u##BITS cutlim = cutoff % base;                                              \
12 |   cutoff /= base;                                                              \
13 |   int any = 0;                                                                 \
14 |   for (char c = *s; s != end; c = *++s) {                                      \
15 |     if (c >= '0' && c <= '9') {                                                \
16 |       c -= '0';                                                                \
17 |     } else if (c >= 'A' && c <= 'Z') {                                         \
18 |       c -= 'A' - 10;                                                           \
19 |     } else if (c >= 'a' && c <= 'z') {                                         \
20 |       c -= 'a' - 10;                                                           \
21 |     } else {                                                                   \
22 |       return false;                                                            \
23 |     }                                                                          \
24 |     if (c >= base) {                                                           \
25 |       return false;                                                            \
26 |     }                                                                          \
27 |     if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) {            \
28 |       any = -1;                                                                \
29 |     } else {                                                                   \
30 |       any = 1;                                                                 \
31 |       acc *= base;                                                             \
32 |       acc += c;                                                                \
33 |     }                                                                          \
34 |   }                                                                            \
35 |   if (any < 0 ||  /* more digits than what fits in acc */                      \
36 |       any == 0)                                                                \
37 |   {                                                                            \
38 |     return false;                                                              \
39 |   }                                                                            \
40 |   *result = acc;                                                               \
41 |   return true;                                                                 \
42 | }
43 | 
44 | GEN_STRTO_X(32, 0xFFFFFFFFu)
45 | GEN_STRTO_X(64, 0xFFFFFFFFFFFFFFFFull)
46 | 
47 | #ifndef NDEBUG
48 | __attribute__((constructor)) static void test() {
49 |   #define T32(cstr, base, expectnum) (({ \
50 |     u32 result = 0; \
51 |     bool ok = parseint32(cstr, strlen(cstr), base, &result); \
52 |     assert(ok || !cstr); \
53 |     if (result != expectnum) { fprintf(stderr, "result: 0x%X\n", result); } \
54 |     assert(result == expectnum || !"got: "&& result); \
55 |   }))
56 | 
57 |   #define T64(cstr, base, expectnum) (({ \
58 |     u64 result = 0; \
59 |     bool ok = parseint64(cstr, strlen(cstr), base, &result); \
60 |     assert(ok || !cstr); \
61 |     if (result != expectnum) { fprintf(stderr, "result: 0x%llX\n", result); } \
62 |     assert(result == expectnum || !"got: "&& result); \
63 |   }))
64 | 
65 |   T32("FFAA3191", 16, 0xFFAA3191);
66 |   T32("0", 16, 0);
67 |   T32("000000", 16, 0);
68 |   T32("7FFFFFFF", 16, 0x7FFFFFFF);
69 |   T32("EFFFFFFF", 16, 0xEFFFFFFF);
70 |   T32("FFFFFFFF", 16, 0xFFFFFFFF);
71 | 
72 |   // fits in s64
73 |   T64("7fffffffffffffff",       16, 0x7FFFFFFFFFFFFFFF);
74 |   T64("9223372036854775807",    10, 0x7FFFFFFFFFFFFFFF);
75 |   T64("777777777777777777777",  8,  0x7FFFFFFFFFFFFFFF);
76 |   T64("1y2p0ij32e8e7",          36, 0x7FFFFFFFFFFFFFFF);
77 | 
78 |   T64("efffffffffffffff",       16, 0xEFFFFFFFFFFFFFFF); // this caught a bug once
79 | 
80 |   T64("ffffffffffffffff",       16, 0xFFFFFFFFFFFFFFFF);
81 |   T64("18446744073709551615",   10, 0xFFFFFFFFFFFFFFFF);
82 |   T64("1777777777777777777777", 8,  0xFFFFFFFFFFFFFFFF);
83 |   T64("3w5e11264sgsf",          36, 0xFFFFFFFFFFFFFFFF);
84 | }
85 | #endif
86 | 


--------------------------------------------------------------------------------
/src/parse/parseint.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "../common/defs.h"
3 | 
4 | bool parseint32(const char* ptr, size_t len, int base, u32* result);
5 | bool parseint64(const char* ptr, size_t len, int base, u64* result);
6 | 


--------------------------------------------------------------------------------
/src/parse/scan.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "../common/defs.h"
 3 | #include "../build/build.h"
 4 | #include "../sym.h"
 5 | #include "token.h"
 6 | 
 7 | // parser & scanner flags
 8 | typedef enum {
 9 |   ParseFlagsDefault = 0,
10 |   ParseComments     = 1 << 1, // parse comments, populating S.comments
11 |   ParseOpt          = 1 << 2, // apply optimizations. might produce a non-1:1 AST/token stream
12 | } ParseFlags;
13 | 
14 | // scanned comment
15 | typedef struct Comment {
16 |   struct Comment* next; // next comment in linked list
17 |   Source*         src;  // source
18 |   const u8*       ptr;  // ptr into source
19 |   size_t          len;  // byte length
20 | } Comment;
21 | 
22 | // scanner
23 | typedef struct S {
24 |   Memory     mem;
25 |   Source*    src;          // input source
26 |   const u8*  inp;          // input buffer current pointer
27 |   const u8*  inp0;         // input buffer previous pointer
28 |   const u8*  inend;        // input buffer end
29 |   ParseFlags flags;
30 | 
31 |   Tok       tok;           // current token
32 |   const u8* tokstart;      // start of current token
33 |   const u8* tokend;        // end of current token
34 |   Sym       name;          // Current name (valid for TIdent and keywords)
35 |   bool      insertSemi;    // insert a semicolon before next newline
36 |   Comment*  comments;      // linked list head of comments scanned so far
37 |   Comment*  comments_tail; // linked list tail of comments scanned so far
38 | 
39 |   u32       lineno;     // source position line
40 |   const u8* linestart;  // source position line start pointer (for column)
41 | 
42 |   ErrorHandler* errh;
43 |   void*         userdata;
44 | } S;
45 | 
46 | // SInit initializes a scanner
47 | void SInit(S*, Memory, Source*, ParseFlags, ErrorHandler*, void* userdata);
48 | 
49 | // SNext scans the next token
50 | Tok SNext(S*);
51 | 
52 | // SSrcPos returns the source position of current token
53 | inline static SrcPos SSrcPos(S* s) {
54 |   assert(s->tokstart >= s->src->buf);
55 |   assert(s->tokstart < (s->src->buf + s->src->len));
56 |   assert(s->tokend >= s->tokstart);
57 |   assert(s->tokend <= (s->src->buf + s->src->len));
58 |   size_t offs = s->tokstart - s->src->buf;
59 |   SrcPos p = { s->src, offs, s->tokend - s->tokstart };
60 |   return p;
61 | }
62 | 


--------------------------------------------------------------------------------
/src/parse/token.c:
--------------------------------------------------------------------------------
 1 | #include "../common/defs.h"
 2 | #include "token.h"
 3 | 
 4 | const char* TokName(Tok t) {
 5 |   switch (t) {
 6 |     #define I_ENUM(name, str) case name: return str;
 7 |     TOKENS(I_ENUM)
 8 |     #undef I_ENUM
 9 | 
10 |     case TKeywordsStart: return "TKeywordsStart";
11 | 
12 |     #define I_ENUM(str, name) case name: return "keyword " #str;
13 |     TOKEN_KEYWORDS(I_ENUM)
14 |     #undef I_ENUM
15 | 
16 |     case TKeywordsEnd: return "TKeywordsEnd";
17 | 
18 |     case TMax: return "TMax";
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/parse/token.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | // Defines the Tok enum
  3 | 
  4 | // scanner tokens
  5 | #define TOKENS(_)  \
  6 |   _( TNone  , "TNone" ) \
  7 |   _( TComma , ",")      \
  8 |   _( TSemi  , ";")      \
  9 |   \
 10 |   _( T_PRIM_OPS_START , "") \
 11 |   /* primary "intrinsic" operator tokens, most of them mapping directly to IR ops */ \
 12 |   _( TStar          , "*")  \
 13 |   _( TSlash         , "/")  \
 14 |   _( TPercent       , "%")  \
 15 |   _( TShl           , "<<") \
 16 |   _( TShr           , ">>") \
 17 |   _( TAnd           , "&")  \
 18 |   _( TPlus          , "+")  \
 19 |   _( TMinus         , "-")  \
 20 |   _( TPipe          , "|")  \
 21 |   _( THat           , "^")  \
 22 |   _( TTilde         , "~")  \
 23 |   _( TExcalm        , "!")  \
 24 |   _( TEq            , "==") \
 25 |   _( TNEq           , "!=") \
 26 |   _( TLt            , "<")  \
 27 |   _( TLEq           , "<=") \
 28 |   _( TGt            , ">")  \
 29 |   _( TGEq           , ">=") \
 30 |   _( TPlusPlus      , "++") \
 31 |   _( TMinusMinus    , "--") \
 32 |   \
 33 |   _( T_PRIM_OPS_END , "") /* end of operator tokens */ \
 34 |   \
 35 |   _( TAssign        , "=")   \
 36 |   _( TShlAssign     , "<<=") \
 37 |   _( TShrAssign     , ">>=") \
 38 |   _( TPlusAssign    , "+=")  \
 39 |   _( TMinusAssign   , "-=")  \
 40 |   _( TStarAssign    , "*=")  \
 41 |   _( TSlashAssign   , "/=")  \
 42 |   _( TPercentAssign , "%=")  \
 43 |   _( TAndAssign     , "&=")  \
 44 |   _( TPipeAssign    , "|=")  \
 45 |   _( TTildeAssign   , "~=")  \
 46 |   _( THatAssign     , "^=")  \
 47 |   _( TLParen        , "(")   \
 48 |   _( TRParen        , ")")   \
 49 |   _( TLBrace        , "{")   \
 50 |   _( TRBrace        , "}")   \
 51 |   _( TLBrack        , "[")   \
 52 |   _( TRBrack        , "]")   \
 53 |   _( TAndAnd        , "&&")  \
 54 |   _( TPipePipe      , "||")  \
 55 |   _( TRArr          , "->")  \
 56 |   _( TIdent         , "identifier") \
 57 |   _( TIntLit        , "int")        \
 58 |   _( TFloatLit      , "float")      \
 59 |   _( TComment       , "comment")    \
 60 | /*END TOKENS*/
 61 | #define TOKEN_KEYWORDS(_) \
 62 |   _( as,          TAs)          \
 63 |   _( break,       TBreak)       \
 64 |   _( case,        TCase)        \
 65 |   _( continue,    TContinue)    \
 66 |   _( default,     TDefault)     \
 67 |   _( defer,       TDefer)       \
 68 |   _( else,        TElse)        \
 69 |   _( enum,        TEnum)        \
 70 |   _( for,         TFor)         \
 71 |   _( fun,         TFun)         \
 72 |   _( if,          TIf)          \
 73 |   _( import,      TImport)      \
 74 |   _( in,          TIn)          \
 75 |   _( interface,   TInterface)   \
 76 |   _( is,          TIs)          \
 77 |   _( mutable,     TMutable)     \
 78 |   _( nil,         TNil)         \
 79 |   _( return,      TReturn)      \
 80 |   _( select,      TSelect)      \
 81 |   _( struct,      TStruct)      \
 82 |   _( switch,      TSwitch)      \
 83 |   _( symbol,      TSymbol)      \
 84 |   _( type,        TType)        \
 85 |   _( while,       TWhile)       \
 86 | // Limited to a total of 31 keywords. See scan.c
 87 | //END TOKEN_KEYWORDS
 88 | 
 89 | typedef enum {
 90 |   #define I_ENUM(name, str) name,
 91 |   TOKENS(I_ENUM)
 92 |   #undef I_ENUM
 93 | 
 94 |   // TKeywordsStart is used for 0-based keyword indexing.
 95 |   // Its explicit value is used by sym.c to avoid having to regenerate keyword symbols
 96 |   // whenever a non-keyword token is added. I.e. this number can be changed freely but will
 97 |   // require regeneration of the code in sym.c.
 98 |   TKeywordsStart = 0x100,
 99 |   #define I_ENUM(_str, name) name,
100 |   TOKEN_KEYWORDS(I_ENUM)
101 |   #undef I_ENUM
102 |   TKeywordsEnd,
103 | 
104 |   TMax
105 | } Tok;
106 | 
107 | static_assert(TKeywordsEnd - TKeywordsStart <= 32, "too many keywords");
108 | 
109 | // Get printable name
110 | const char* TokName(Tok);
111 | 


--------------------------------------------------------------------------------
/src/sym.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "common/str.h"
  3 | #include "parse/token.h"
  4 | #include "types.h"
  5 | 
  6 | // Sym is a type of sds string, compatible with sds functions, with an additional header
  7 | // containing a precomputed FNV1a hash. Sym is immutable.
  8 | typedef const char* Sym;
  9 | 
 10 | // Predefinition of Node
 11 | typedef struct Node Node;
 12 | 
 13 | // Get a symbol (retrieves or interns)
 14 | Sym symget(const u8* data, size_t len, u32 hash);
 15 | 
 16 | // Hashes data and then calls symget
 17 | Sym symgeth(const u8* data, size_t len);
 18 | 
 19 | // Compare two Sym's string values.
 20 | inline static int symcmp(Sym a, Sym b) { return a == b ? 0 : strcmp(a, b); }
 21 | 
 22 | typedef struct __attribute__((__packed__)) SymHeader {
 23 |   u32             hash;
 24 |   struct sdshdr16 sh;
 25 | } SymHeader;
 26 | 
 27 | // access SymHeader from Sym/sds/const char*
 28 | #define SYM_HDR(s) ((const SymHeader*)((s) - (sizeof(SymHeader))))
 29 | 
 30 | // access FNV1a hash of s
 31 | inline static u32 symhash(Sym s) { return SYM_HDR(s)->hash; }
 32 | 
 33 | // faster alternative to sdslen, without type lookup
 34 | inline static u16 symlen(Sym s) { return SYM_HDR(s)->sh.len; }
 35 | 
 36 | // Returns the Tok representing this sym in the language syntax.
 37 | // Either returns a keyword token or TIdent if s is not a keyword.
 38 | inline static Tok symLangTok(Sym s) {
 39 |   // Bits 4-8 represents offset into Tok enum when s is a language keyword.
 40 |   u32 kwindex = SYM_HDR(s)->sh.flags >> SDS_TYPE_BITS;
 41 |   return kwindex == 0 ? TIdent : kwindex + TKeywordsStart;
 42 | }
 43 | 
 44 | // SymMap maps Sym to pointers
 45 | #define HASHMAP_NAME     SymMap
 46 | #define HASHMAP_KEY      Sym
 47 | #define HASHMAP_VALUE    void*
 48 | #include "common/hashmap.h"
 49 | #undef HASHMAP_NAME
 50 | #undef HASHMAP_KEY
 51 | #undef HASHMAP_VALUE
 52 | 
 53 | // Creates and initializes a new SymMap in mem, or global memory if mem is NULL.
 54 | SymMap* SymMapNew(u32 initbuckets, Memory mem/*null*/);
 55 | 
 56 | // SymMapInit initializes a map structure. initbuckets is the number of initial buckets.
 57 | void SymMapInit(SymMap*, u32 initbuckets, Memory mem/*null*/);
 58 | 
 59 | // SymMapFree frees SymMap along with its data.
 60 | void SymMapFree(SymMap*);
 61 | 
 62 | // SymMapDealloc frees heap memory used by a map, but leaves SymMap untouched.
 63 | void SymMapDealloc(SymMap*);
 64 | 
 65 | // SymMapGet searches for key. Returns value, or NULL if not found.
 66 | void* SymMapGet(const SymMap*, Sym key);
 67 | 
 68 | // SymMapSet inserts key=value into m. Returns the replaced value or NULL if not found.
 69 | void* SymMapSet(SymMap*, Sym key, void* value);
 70 | 
 71 | // SymMapDel removes value for key. Returns the removed value or NULL if not found.
 72 | void* SymMapDel(SymMap*, Sym key);
 73 | 
 74 | // SymMapClear removes all entries. In contrast to SymMapFree, map remains valid.
 75 | void SymMapClear(SymMap*);
 76 | 
 77 | // Iterator function type. Set stop=true to stop iteration.
 78 | typedef void(SymMapIterator)(Sym key, void* value, bool* stop, void* userdata);
 79 | 
 80 | // SymMapIter iterates over entries of the map.
 81 | void SymMapIter(const SymMap*, SymMapIterator*, void* userdata);
 82 | 
 83 | 
 84 | // symbols for language keywords (defined in token.h)
 85 | #define SYM_DEF(str, _) \
 86 |   const Sym sym_##str;
 87 | TOKEN_KEYWORDS(SYM_DEF)
 88 | #undef SYM_DEF
 89 | 
 90 | 
 91 | // symbols and AST nodes for predefined types (defined in types.h)
 92 | #define SYM_DEF(name) \
 93 |   const Sym sym_##name; \
 94 |   Node* Type_##name;
 95 | TYPE_SYMS(SYM_DEF)
 96 | #undef SYM_DEF
 97 | 
 98 | // nil is special and implemented without macros since its sym is defined by TOKEN_KEYWORDS
 99 | Node* Type_nil;
100 | Node* Const_nil;
101 | 
102 | // ideal is the type of untyped constants
103 | Node* Type_ideal;
104 | 
105 | // TypeCodeToTypeNode returns the type Node for TypeCode t.
106 | static Node* TypeCodeToTypeNode(TypeCode t);
107 | Node* const _TypeCodeToTypeNodeMap[TypeCode_CONCRETE_END];
108 | inline static Node* TypeCodeToTypeNode(TypeCode t) {
109 |   assert(t >= 0 && t < TypeCode_CONCRETE_END);
110 |   return _TypeCodeToTypeNodeMap[t];
111 | }
112 | 
113 | // symbols and AST nodes for predefined constants
114 | #define PREDEFINED_CONSTANTS(_) \
115 |   _( true,  bool, 1 ) \
116 |   _( false, bool, 0 ) \
117 | /*END PREDEFINED_CONSTANTS*/
118 | #define SYM_DEF(name, _type, _val) \
119 |   const Sym sym_##name; \
120 |   Node* Const_##name;
121 | PREDEFINED_CONSTANTS(SYM_DEF)
122 | #undef SYM_DEF
123 | 
124 | 
125 | // symbols for predefined common identifiers
126 | // predefined common identifiers (excluding types)
127 | #define PREDEFINED_IDENTS(ID) \
128 |   ID( _ ) \
129 | /*END PREDEFINED_IDENTS*/
130 | #define SYM_DEF(name) \
131 |   const Sym sym_##name;
132 | PREDEFINED_IDENTS(SYM_DEF)
133 | #undef SYM_DEF
134 | 


--------------------------------------------------------------------------------
/src/typeid.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "common/defs.h"
 3 | #include "types.h"
 4 | #include "sym.h"
 5 | 
 6 | typedef struct Node Node;
 7 | 
 8 | // GetTypeID retrieves the TypeID for the type node n.
 9 | // This function may mutate n by computing and storing id to n.t.id.
10 | Sym GetTypeID(Node* n);
11 | 
12 | // TypeEquals returns true if a and b are equivalent types (i.e. identical).
13 | bool TypeEquals(Node* a, Node* b);
14 | 
15 | // TypeConv describes the effect of converting one type to another
16 | typedef enum TypeConv {
17 |   TypeConvLossless = 0,  // conversion is "perfect". e.g. int32 -> int64
18 |   TypeConvLossy,         // conversion may be lossy. e.g. int32 -> float32
19 |   TypeConvImpossible,    // conversion is not possible. e.g. (int,int) -> bool
20 | } TypeConv;
21 | 
22 | // // TypeConversion returns the effect of converting fromType -> toType.
23 | // // intsize is the size in bytes of the "int" and "uint" types. E.g. 4 for 32-bit.
24 | // TypeConv CheckTypeConversion(Node* fromType, Node* toType, u32 intsize);
25 | 


--------------------------------------------------------------------------------
/src/types.c:
--------------------------------------------------------------------------------
 1 | #include "types.h"
 2 | 
 3 | // Lookup table TypeCode => string encoding char
 4 | const char TypeCodeEncoding[TypeCode_MAX] = {
 5 |   #define I_ENUM(name, encoding, _flags) encoding,
 6 |   TYPE_CODES(I_ENUM)
 7 |   #undef  I_ENUM
 8 | };
 9 | 
10 | 
11 | // #if DEBUG
12 | const char* _TypeCodeName[TypeCode_MAX] = {
13 |   #define I_ENUM(name, _encoding, _flags) #name,
14 |   TYPE_CODES(I_ENUM)
15 |   #undef  I_ENUM
16 | };
17 | 
18 | 
19 | const TypeCodeFlag TypeCodeFlagMap[TypeCode_MAX] = {
20 |   #define I_ENUM(_name, _encoding, flags) flags,
21 |   TYPE_CODES(I_ENUM)
22 |   #undef  I_ENUM
23 | };
24 | 
25 | const char* CTypeName(CType ct) {
26 |   switch (ct) {
27 |   case CType_INVALID: return "INVALID";
28 |   case CType_int:      return "int";
29 |   case CType_rune:     return "rune";
30 |   case CType_float:    return "float";
31 |   case CType_str:      return "str";
32 |   case CType_bool:     return "bool";
33 |   case CType_nil:      return "nil";
34 |   }
35 |   return "?";
36 | }
37 | 
38 | 
39 | // const char* TypeCodeName(TypeCode tc) {
40 | //   assert(tc > 0 && tc < TypeCode_MAX);
41 | //   return _TypeCodeName[tc];
42 | // }
43 | // #else
44 | //   // compact names where a string is formed from encoding chars + sentinels bytes.
45 | //   // E.g. "b\01\02\03\04\05\06\07\08\0f\0F\0..." Index is *2 that of TypeCode.
46 | //   static const char _TypeCodeName[TypeCode_MAX * 2] = {
47 | //     #define I_ENUM(_, enc) enc, 0,
48 | //     TYPE_CODES(I_ENUM)
49 | //     #undef  I_ENUM
50 | //   };
51 | //   const char* TypeCodeName(TypeCode tc) {
52 | //     assert(tc > 0 && tc < TypeCode_MAX);
53 | //     return &_TypeCodeName[tc * 2];
54 | //   }
55 | // #endif
56 | 


--------------------------------------------------------------------------------
/src/types.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "common/defs.h"
  3 | 
  4 | typedef enum TypeCodeFlag {
  5 |   TypeCodeFlagNone = 0,
  6 |   TypeCodeFlagSizeMask = 0b0000000000001111, // bitmask for extracting SizeN flag
  7 |   TypeCodeFlagSize1    = 1 << 0, // = 1 = 1 byte (8 bits) wide
  8 |   TypeCodeFlagSize2    = 1 << 1, // = 2 = 2 bytes (16 bits) wide
  9 |   TypeCodeFlagSize4    = 1 << 2, // = 4 = 4 bytes (32 bits) wide
 10 |   TypeCodeFlagSize8    = 1 << 3, // = 8 = 8 bytes (64 bits) wide
 11 |   TypeCodeFlagInt      = 1 << 4, // is integer
 12 |   TypeCodeFlagFloat    = 1 << 5, // is float
 13 |   TypeCodeFlagSigned   = 1 << 6, // [integers only]: is signed
 14 | } TypeCodeFlag;
 15 | 
 16 | // TypeCode with their string encoding.
 17 | // Note: misc/gen_ops.py relies on "#define TYPE_CODES" and "NUM_END".
 18 | #define TYPE_CODES(_) \
 19 |   /* named types exported in the global scope. Names must match those of TYPE_SYMS. */ \
 20 |   /* Note: numeric types are listed first as their enum value is used as dense indices. */ \
 21 |   /* Note: order of intrinsic integer types must be signed,unsigned,signed,unsigned... */ \
 22 |   /* Reordering these requires updating TypeCodeIsInt() below. */ \
 23 |   /* name       encoding */ \
 24 |   _( bool      , 'b', 0 ) \
 25 |   _( int8      , '1', TypeCodeFlagSize1 | TypeCodeFlagInt | TypeCodeFlagSigned ) \
 26 |   _( uint8     , '2', TypeCodeFlagSize1 | TypeCodeFlagInt ) \
 27 |   _( int16     , '3', TypeCodeFlagSize2 | TypeCodeFlagInt | TypeCodeFlagSigned ) \
 28 |   _( uint16    , '4', TypeCodeFlagSize2 | TypeCodeFlagInt ) \
 29 |   _( int32     , '5', TypeCodeFlagSize4 | TypeCodeFlagInt | TypeCodeFlagSigned ) \
 30 |   _( uint32    , '6', TypeCodeFlagSize4 | TypeCodeFlagInt ) \
 31 |   _( int64     , '7', TypeCodeFlagSize8 | TypeCodeFlagInt | TypeCodeFlagSigned ) \
 32 |   _( uint64    , '8', TypeCodeFlagSize8 | TypeCodeFlagInt ) \
 33 |   _( float32   , 'f', TypeCodeFlagSize4 | TypeCodeFlagFloat ) \
 34 |   _( float64   , 'F', TypeCodeFlagSize8 | TypeCodeFlagFloat ) \
 35 |   _( int       , 'i', TypeCodeFlagInt | TypeCodeFlagSigned ) \
 36 |   _( uint      , 'u', TypeCodeFlagInt ) \
 37 |   _( NUM_END, 0, 0 ) /* sentinel; not a TypeCode */ \
 38 |   _( str       , 's', 0 ) \
 39 |   _( nil       , '0', 0 ) \
 40 |   _( CONCRETE_END, 0, 0 ) /* sentinel; not a TypeCode */ \
 41 |   /* internal types not directly reachable by names in the language */ \
 42 |   _( fun       , '^', 0 ) \
 43 |   _( tuple     , '(', 0 ) _( tupleEnd  , ')', 0 ) \
 44 |   _( list      , '[', 0 ) _( listEnd   , ']', 0 ) \
 45 |   _( struct    , '{', 0 ) _( structEnd , '}', 0 ) \
 46 |   /* special type codes used in IR */ \
 47 |   _( ideal     ,  0 , 0 ) /* untyped numeric constants */ \
 48 |   _( param1    , 'P', 0 ) /* parameteric. For IR, matches other type, e.g. output == input */ \
 49 |   _( param2    , 'P', 0 )
 50 | /*END TYPE_CODES*/
 51 | 
 52 | // TypeCode identifies all basic types
 53 | typedef enum {
 54 |   #define I_ENUM(name, _encoding, _flags) TypeCode_##name,
 55 |   TYPE_CODES(I_ENUM)
 56 |   #undef  I_ENUM
 57 | 
 58 |   TypeCode_MAX
 59 | } TypeCode;
 60 | 
 61 | // order of intrinsic integer types must be signed,unsigned,signed,unsigned...
 62 | static_assert(TypeCode_int8+1  == TypeCode_uint8,  "integer order incorrect");
 63 | static_assert(TypeCode_int16+1 == TypeCode_uint16, "integer order incorrect");
 64 | static_assert(TypeCode_int32+1 == TypeCode_uint32, "integer order incorrect");
 65 | static_assert(TypeCode_int64+1 == TypeCode_uint64, "integer order incorrect");
 66 | // must be less than 32 numeric types
 67 | static_assert(TypeCode_NUM_END <= 32, "there must be no more than 32 numeric types");
 68 | 
 69 | // CType describes the constant kind of an "ideal" (untyped) constant.
 70 | // These are ordered from less dominant to more dominant -- a CType with a higher value
 71 | // takes precedence over a CType with a lower value in cases like untyped binary operations.
 72 | typedef enum CType {
 73 |   CType_INVALID,
 74 |   CType_int,
 75 |   CType_rune,
 76 |   CType_float,
 77 |   CType_str,
 78 |   CType_bool,
 79 |   CType_nil,
 80 | } CType;
 81 | const char* CTypeName(CType ct);
 82 | 
 83 | // named types exported in the global scope.
 84 | // IMPORTANT: These must match the list of TypeCodes up until CONCRETE_END.
 85 | // Looking for all type defs? sym.h puts it all together.
 86 | #define TYPE_SYMS(_) \
 87 |   _( bool    ) \
 88 |   _( int8    ) \
 89 |   _( uint8   ) \
 90 |   _( int16   ) \
 91 |   _( uint16  ) \
 92 |   _( int32   ) \
 93 |   _( uint32  ) \
 94 |   _( int64   ) \
 95 |   _( uint64  ) \
 96 |   _( float32 ) \
 97 |   _( float64 ) \
 98 |   _( int     ) \
 99 |   _( uint    ) \
100 |   _( str     ) \
101 | /*END TYPE_SYMS*/
102 | 
103 | // Note: The following function is provided by sym.h
104 | // static Node* TypeCodeToTypeNode(TypeCode t);
105 | 
106 | // Lookup table TypeCode => string encoding char
107 | const char TypeCodeEncoding[TypeCode_MAX];
108 | 
109 | // Symbolic name of type code. Eg "int32"
110 | static const char* TypeCodeName(TypeCode);
111 | const char* _TypeCodeName[TypeCode_MAX];
112 | inline static const char* TypeCodeName(TypeCode tc) {
113 |   assert(tc >= 0 && tc < TypeCode_MAX);
114 |   return _TypeCodeName[tc];
115 | }
116 | 
117 | // access TypeCodeFlag
118 | const TypeCodeFlag TypeCodeFlagMap[TypeCode_MAX];
119 | 
120 | inline static bool TypeCodeIsInt(TypeCode t) { return TypeCodeFlagMap[t] & TypeCodeFlagInt; }
121 | inline static bool TypeCodeIsFloat(TypeCode t) { return TypeCodeFlagMap[t] & TypeCodeFlagFloat; }
122 | 


--------------------------------------------------------------------------------
/test/emptyfile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsms/wlang/03eb8e72eaacfa451be06cf398762a70cd7b30c8/test/emptyfile


--------------------------------------------------------------------------------
/test/file-no-final-line-break:
--------------------------------------------------------------------------------
1 | A
2 | B
3 | C


--------------------------------------------------------------------------------