├── .editorconfig ├── .gitattributes ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── code ├── functions.tsu ├── if_expression.tsu ├── printing.tsu ├── variables.tsu └── while_loop.tsu ├── docs ├── README.md ├── macros.md ├── mangling.md └── pragmas.md ├── rustfmt.toml ├── spec.md ├── src └── main.rs ├── std └── kernel.tsu ├── tsuki-backend-llvm ├── Cargo.toml └── src │ ├── codegen.rs │ ├── control_flow.rs │ ├── expressions.rs │ ├── functions.rs │ ├── lib.rs │ ├── libc.rs │ ├── types.rs │ └── variables.rs └── tsuki-frontend ├── Cargo.toml └── src ├── ast.rs ├── astdump.rs ├── backend.rs ├── common.rs ├── functions.rs ├── lexer.rs ├── lib.rs ├── parser.rs ├── scope.rs ├── sem.rs ├── sem_literals.rs ├── sem_types ├── control_flow.rs ├── conversions.rs ├── functions.rs ├── locations.rs ├── lookups.rs ├── mod.rs ├── operators.rs ├── pragmas.rs └── types.rs └── types.rs /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | end_of_line = lf 5 | 6 | [*.{rs,tsu}] 7 | indent_size = 3 8 | indent_style = space 9 | max_line_length = 100 10 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=lf 2 | 3 | *.rs text 4 | *.toml text 5 | .editorconfig text 6 | 7 | .lite_project.lua -linguist-detectable 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .lite_workspace.lua 2 | .vscode 3 | *.md.backup 4 | bin 5 | 6 | 7 | # Added by cargo 8 | 9 | /target 10 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "tsuki" 3 | version = "0.1.0" 4 | authors = ["lqdev "] 5 | edition = "2018" 6 | 7 | [workspace] 8 | members = [ 9 | "tsuki-frontend", 10 | "tsuki-backend-llvm", 11 | ] 12 | 13 | [dependencies] 14 | tsuki-frontend = { path = "tsuki-frontend" } 15 | tsuki-backend-llvm = { path = "tsuki-backend-llvm" } 16 | 17 | structopt = "0.3.22" 18 | 19 | [profile.release] 20 | lto = true 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 liquidev 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tsuki 2 | 3 | A programming language that focuses on being fun to program in, and aiding developers in writing more robust software, all while maintaining high performance. 4 | 5 | The compiler is still in its infancy, and it'll probably take me a while before it's actually usable. In the meantime, you can check out the [spec](spec.md), which lays out the general feature set and vision of the language. 6 | 7 | ## Compiling 8 | 9 | Right now compiling tsuki isn't exactly the most trivial of tasks, and Windows is not yet supported. 10 | 11 | **Step 0.** Install a C (and C++) compiler. 12 | 13 | tsuki depends on libc and uses whatever C compiler is available on the system as `cc` to link executables. This can be overridden using the `$TSUKI_CC` or `$CC` environment variables, in that order of priority. The C++ compiler is necessary to build LLVM. 14 | 15 | **Step 1.** Compile LLVM 12. 16 | 17 | The best way to get LLVM for tsuki is to build it manually. I had pretty bad experiences with using repository LLVM, with problems ranging from missing static libraries on Ubuntu, no `llvm-config` on Windows, to random SIGILLs after a month of hiatus on Arch. 18 | 19 | So here's uncle Liquid's method of obtaining LLVM: 20 | ```shell 21 | # This is where we're going to install LLVM, so change this to some sensible path. 22 | # bash - in this case you also need to add this to .bashrc 23 | export LLVM_SYS_120_PREFIX=$HOME/llvm 24 | # fish 25 | set -Ux LLVM_SYS_120_PREFIX ~/llvm 26 | 27 | # Now it's time to get LLVM. We'll use their GitHub releases for that. 28 | mkdir -p ~/llvm 29 | wget https://github.com/llvm/llvm-project/releases/download/llvmorg-12.0.1/llvm-12.0.1.src.tar.xz 30 | tar xJf llvm-12.0.1.src.tar.xz 31 | 32 | # Now let's get the build going. 33 | cd llvm-12.0.1.src 34 | mkdir -p build 35 | cd build 36 | # If doing a release build, remove LLVM_ENABLE_ASSERTIONS, and set CMAKE_BUILD_TYPE to Release. 37 | # Also, if compiling for other platforms such as aarch64, change the target in LLVM_TARGETS_TO_BUILD. 38 | # You can find a list of all available targets, as well as some other build options, here: 39 | # https://llvm.org/docs/GettingStarted.html#local-llvm-configuration 40 | cmake .. \ 41 | -D CMAKE_INSTALL_PREFIX=$LLVM_SYS_120_PREFIX \ 42 | -D CMAKE_BUILD_TYPE=Debug \ 43 | -D LLVM_ENABLE_ASSERTIONS=1 \ 44 | -D LLVM_TARGETS_TO_BUILD=X86 \ 45 | -G Ninja 46 | # To reduce memory usage during the process of compiling LLVM, clang with the mold linker can be 47 | # used. Grab mold here: 48 | # https://github.com/rui314/mold 49 | # And add the flags: 50 | # -D CMAKE_C_COMPILER=clang 51 | # -D CMAKE_CXX_COMPILER=clang++ 52 | # -D CMAKE_CXX_LINK_FLAGS=-fuse-ld=mold 53 | # As far as I know it's not possible to use mold with gcc. 54 | 55 | # IMPORTANT: 56 | # When not using clang+mold, open a task manager or system monitor. You're going to want to look 57 | # after your memory usage. If it starts growing rapidly, cancel the build and use --parallel 1. 58 | # Linking with GNU ld uses up a lot of memory, so it's better to let it run a single linker at a 59 | # time. 60 | cmake --build . --target install --parallel 8 61 | ``` 62 | 63 | Maybe someday I'll make a dedicated script for this, but today is not that day. 64 | 65 | **Step 2.** Compile and run. 66 | 67 | With all that, running tsuki should be as simple as: 68 | ``` 69 | cargo run 70 | ``` 71 | 72 | ## Using the compiler 73 | 74 | While still in its early stages, the compiler is able to compile arbitrary user code into a working executable. The most basic usage of the compiler would be: 75 | ```sh 76 | $ tsuki --package-name main --package-root src --main-file src/main.tsu 77 | # or, abbreviated: 78 | $ tsuki -p main -r src -m src/main.tsu 79 | ``` 80 | `package_name` specifies the name of the output file, and is also used for mangling. 81 | 82 | Refer to the code examples in `code` to see what's currently implemented or being worked on. 83 | -------------------------------------------------------------------------------- /code/functions.tsu: -------------------------------------------------------------------------------- 1 | # test functions returning a value 2 | 3 | fun add_two(x: Int): Int 4 | x + 2 5 | 6 | __intrin_print_int32(add_two(1)) 7 | 8 | # test functions returning void. also calling other functions 9 | 10 | fun add_two_and_print(x: Int) 11 | val added = add_two(x) 12 | __intrin_print_int32(added) 13 | 14 | add_two_and_print(123) 15 | 16 | # test self-recursive functions 17 | 18 | fun fib(n: Int): Int 19 | if n == 0 -> 0 20 | elif n == 1 -> 1 21 | else -> fib(n - 1) + fib(n - 2) 22 | 23 | __intrin_print_int32(fib(10)) 24 | 25 | # test mutually recursive functions 26 | 27 | fun foo(x: Int) 28 | if x < 10 29 | bar(x + 2) 30 | 31 | fun bar(x: Int) 32 | if x < 20 33 | foo(x - 1) 34 | 35 | foo(0) 36 | 37 | # test return statement 38 | 39 | fun nop() 40 | return 41 | 42 | nop() 43 | 44 | fun fac(n: Int): Int 45 | var i = 0 46 | var x = 1 47 | while true 48 | if i >= n 49 | return x 50 | x = x * i 51 | i = i + 1 52 | x 53 | 54 | __intrin_print_int32(fac(10)) 55 | 56 | # Tests for implicit conversion from NoReturn to any other type. 57 | fun straightforward_return(): Int 58 | return 1 59 | 60 | fun return_from_if(x: Int): Int 61 | if x == 1 62 | return 1 63 | else 64 | return 2 65 | 66 | fun assign_return_to_variable(x: Int) 67 | val x: Int = return 68 | -------------------------------------------------------------------------------- /code/if_expression.tsu: -------------------------------------------------------------------------------- 1 | # test if statements and conditions 2 | val a = 2 3 | if a == 1 4 | __intrin_print_int32(10) 5 | elif a == 2 6 | __intrin_print_int32(15) 7 | else 8 | __intrin_print_int32(20) 9 | 10 | # test if expressions 11 | val b = a + 1 12 | val cmp = 30_i16 13 | val c = 14 | if b == 1 -> 3 15 | elif b == 2 -> 4 16 | elif b == 3 -> 5 17 | elif b != cmp -> 10 18 | else -> 6 19 | __intrin_print_int32(c) 20 | 21 | # test nested statements 22 | if a > 10 == true 23 | if a + 2 == 12 24 | __intrin_print_int32(111) 25 | elif a + 6 == 17 26 | __intrin_print_int32(222) 27 | else 28 | __intrin_print_int32(123) 29 | else 30 | # also nested expressions because why not 31 | val x = 32 | if a == 5 -> 6 33 | else -> 7 34 | __intrin_print_int32(x) 35 | 36 | # test boolean operations 37 | val eq = true == true 38 | val ne = false != true 39 | val neg = not true 40 | val neg2 = not false 41 | -------------------------------------------------------------------------------- /code/printing.tsu: -------------------------------------------------------------------------------- 1 | __intrin_print_int32(42 / 2 * 4) 2 | __intrin_print_float32(10.0) 3 | -------------------------------------------------------------------------------- /code/variables.tsu: -------------------------------------------------------------------------------- 1 | val a = 2 2 | __intrin_print_int32(a * 2) 3 | 4 | var b = 3 5 | __intrin_print_int32(b) 6 | b = 5 7 | __intrin_print_int32(b) 8 | 9 | var c = b = 1 10 | __intrin_print_int32(b) 11 | __intrin_print_int32(c) 12 | 13 | __intrin_print_int32(-c) 14 | 15 | var d: Int64 = 1 16 | -------------------------------------------------------------------------------- /code/while_loop.tsu: -------------------------------------------------------------------------------- 1 | var i = 0 2 | while i < 10 3 | __intrin_print_int32(i) 4 | i = i + 1 5 | 6 | var a = 0 7 | var b = 1 8 | while true 9 | if a + b > 100 10 | break 11 | a = a + 2 12 | b = b + 1 13 | 14 | # Check that `break` can be used as an expression. 15 | __intrin_print_int32(123) 16 | while true 17 | val yes: Int = break 18 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | This directory houses liquidev's notes on the implementation. This includes plans for the future, as well as details of the existing implementation. 2 | -------------------------------------------------------------------------------- /docs/macros.md: -------------------------------------------------------------------------------- 1 | # El Macro 2 | 3 | > A Spanish guy, knowing all about how good macro systems work. 4 | 5 | As the compiler implementation progresses, I'll be noting here some things about the macro system. 6 | 7 | ## Macro declaration 8 | 9 | The declaration of a macro is similar to a function, except it only ever takes a single argument: 10 | 11 | ``` 12 | import @std.macros as _ 13 | 14 | macro my_macro(ctx: macros.Context[:call]): macros.Error!macros.AnyResolvedNode 15 | _ # do things 16 | ``` 17 | 18 | The task of the macro is to transform the nodes found in the environment into a final node. There are a few things to note here: 19 | - The `ctx` argument tells the macro about the context it's being called in, including the _full_ call site, and type system things like the expected return type. 20 | - The type of this argument is `macros.Context[P]`. The `P` parameter is a `const macros.CallPosition`, which specifies _where_ the macro can be called. 21 | - For instance, `:call` signifies that the macro is called like a regular function, `my_macro(arg1, arg2)`. 22 | - Other positions include `:fun_pragma`, `:object_pragma`, `:union_pragma`, `:atom_pragma`, `:type_pragma`, and `:derive`. 23 | - While the `:*_pragma` kinds are self-explanatory, `:derive` is special, because it can be used together with the `derive` declaration inside `impl` blocks. It's used for deriving trait implementations automatically. 24 | 25 | The kind of the AST node output by the macro has to be valid for the call site, which is ensured by the API's strong typing, described later. 26 | 27 | ## The context 28 | 29 | The `macros.Context` object is responsible for holding information related to the callsite of the macro. It has a few functions: 30 | 31 | - `fun callsite(self): macros.CallNode` returns a handle to a node that represents the macro's callsite. 32 | - `fun callsite_env(self): macros.Environment[:call]` returns a handle to the semantic checking environment positioned at the call site. 33 | - `fun this_env(self): macros.Environment[:definition]` returns a handle to the semantic checking environment positioned at the macro's definition. 34 | - `fun expected_return_type(self): ?macros.Type` returns the type that is expected at the position of the callsite. This is non-nil only in expression macros. 35 | 36 | ## Nodes 37 | 38 | All operations on nodes within macros must always produce correct AST. This is ensured through type-safe handles to nodes, [inkwell](https://github.com/TheDan64/inkwell)-style. For instance, you can modify the first node of a `CallNode` (the called function), and push more nodes into it, but you cannot modify its second node. 39 | 40 | Additionally, the nodes you push into a `CallNode` must all satisty `Expression`, which is a trait implemented by all nodes that are valid in expression position. To allow for runtime switching on expressions, the 41 | 42 | Unary and binary operators should probably be represented as unions internally (the variants shouldn't be publicly visible), and their type should be discriminated by an atom. For instance, to construct an addition, you can do `BinaryNode.new(:add, left, right)`. To construct a pointer dereference, you can do `UnaryNode.new(:deref, left)`. Note that these operators all desugar to the small subset of AST available to macros, as described below. 43 | 44 | ### The canonical representation 45 | 46 | Macros should only ever deal with a subset of the full AST, one that's mostly portable through compiler versions. We call that subset the _canonical representation_. 47 | Additionally, AST that is input into the macro is not semantically checked beforehand, and AST that is output by the macro must always be semantically checked using the macro's environment. 48 | 49 | I don't know what's the exact set of nodes that will be exposed to macros, but I can imagine all literals being included, like: 50 | ``` 51 | # before sem 52 | True 53 | False 54 | Integer 55 | Float 56 | Identifier 57 | 58 | # both before and after sem 59 | String 60 | Atom 61 | 62 | # after sem 63 | Bool 64 | Uint8 65 | Uint16 66 | Uint32 67 | Uint64 68 | Int8 69 | Int16 70 | Int32 71 | Int64 72 | Float32 73 | Float64 74 | Symbol 75 | ``` 76 | In addition to that, we need control flow: 77 | ``` 78 | # untyped 79 | Do 80 | If 81 | 82 | # shared 83 | IfBranch 84 | ElseBranch 85 | 86 | # typed 87 | 88 | # NB: the separation here is needed so that we can ensure that the last statement in an expression 89 | # block is an expression statement 90 | DoExpression 91 | DoStatement 92 | 93 | IfExpression 94 | IfStatement 95 | 96 | While 97 | For 98 | ``` 99 | Also, definitions: 100 | ``` 101 | Fun 102 | Object 103 | Union 104 | AtomSet 105 | Type 106 | ``` 107 | While we're on the topic of functions, let's talk _calls_. For API simplicity, untyped operators desugar to three different node kinds: `Nullary`, `Unary`, and `Binary`. 108 | ``` 109 | # these are macro-exclusive, untyped, generalized versions of operators. 110 | # their extra is an atom specifying what operator the node refers to 111 | Nullary 112 | Unary 113 | Binary 114 | 115 | # untyped 116 | Call 117 | 118 | # typed 119 | ResolvedCall 120 | ``` 121 | 122 | ### Type-level separation between unresolved and resolved nodes 123 | 124 | Certain types of nodes can be unresolved or resolved, and in these cases we want to separate between them at the type level. For that, we'll use atoms in generic parameters. 125 | 126 | ``` 127 | # I am aware that nobody likes Java-long names, but readability is important. 128 | # Users do not touch this atom anyways. 129 | atom ControlFlowResolutionState 130 | :unresolved 131 | :resolved_to_expression 132 | :resolved_to_statement 133 | 134 | object DoNode[S] 135 | where S: ControlFlowResolutionState 136 | # details omitted 137 | 138 | impl UnresolvedNode for DoNode[:unresolved] 139 | # details omitted 140 | 141 | impl[S] ResolvedNode for DoNode[S] 142 | where S: ControlFlowResolutionState 143 | # details omitted 144 | ``` 145 | 146 | This way we can avoid creating a lot of types for each state a node kind can be in; we use generic `impl`s to only implement specific sets of functionality for nodes that satisfy specific states. 147 | 148 | ### Resolving nodes 149 | 150 | An `UnresolvedNode` can become a `ResolvedNode` if it is passed through an `Environment` for semantic analysis. An important distinction to make is that `ResolvedNode` trees are **incompatible** with `UnresolvedNode` trees! A resolved tree must be resolved _fully_, it cannot contain any unresolved nodes inside. This is safe-guarded by the type system. 151 | 152 | An environment exposes a few functions for resolving (semantically analyzing) nodes: 153 | ``` 154 | object Environment[K] 155 | where K: EnvironmentKind 156 | # details omitted 157 | 158 | atom EnvironmentKind 159 | :call # callsite 160 | :definition # definition site 161 | 162 | impl[K] Environment[K] 163 | fun resolve[T](self, node: T): T.Resolved 164 | where 165 | T: UnresolvedNode 166 | 167 | _ # details omitted 168 | 169 | impl Environment[:call] 170 | fun lookup(self, identifier: IdentifierNode): AnySymbolNode 171 | _ # details omitted 172 | 173 | impl Environment[:definition] 174 | fun get(self, identifier: String): AnySymbolNode 175 | _ # details omitted 176 | ``` 177 | 178 | Note that there are two separate kinds of environments that can be used for looking things up. As already mentioned in the section describing [the context](#the-context), an environment points either to the callsite, or the definition site. Any environment can be used for semantic checking, and its scope will be used for looking up identifiers. 179 | 180 | #### Symbol creation 181 | 182 | Macros cannot create bare identifiers. They can only process existing ones input into them, but all identifiers created by macros must already be resolved. In fact, all symbols created by macros come as part of full declarations. For instance, it's impossible to create a variable symbol that does not have a corresponding declaration, because then the scope of the variable is not clear. 183 | 184 | One thing that's _not_ compile-time checked with variables is their scope. The API will allow you to generate the following code: 185 | ``` 186 | print() 187 | val = 10 188 | ``` 189 | Note how `` is undeclared in the `print`; the `val` was created, then the `print` was added into the AST, and then the `val` was added afterwards. 190 | 191 | Unfortunately there is no way to model this in tsuki's type system, at least not that I know of. The compiler will reject this code before any code generation is performed, but the error message may be unclear, as the AST resulting from macros may not have proper span information. 192 | 193 | ## Symbols 194 | 195 | Symbol nodes represent identifiers that have meaning. This meaning may be type information, which is possessed by _all_ symbols, as well as extra metadata on what the symbol is more concretely. 196 | 197 | This information may be queried by using getters defined on `macros.AnySymbolNode`: 198 | ``` 199 | union AnySymbol 200 | :variable(VariableSymbol) 201 | :fun(FunSymbol) 202 | :object(ObjectSymbol) 203 | :union(UnionSymbol) 204 | :atom(AtomSymbol) 205 | 206 | object AnySymbolNode 207 | # details omitted 208 | 209 | impl AnySymbolNode 210 | ## Returns the symbol stored in the node. 211 | fun symbol(self): AnySymbol 212 | ``` 213 | 214 | The `AnySymbol` union encapsulates all the possible symbol kinds in a set that's easy to `match` over. Additionally, convenience methods are provided for converting to the inner values, for use with `if val` and the like: 215 | 216 | ``` 217 | impl AnySymbol 218 | fun as_variable(self): ?VariableSymbol 219 | _ # details omitted 220 | 221 | fun as_fun(self): ?FunSymbol 222 | _ # details omitted 223 | 224 | fun as_object(self): ?ObjectSymbol 225 | _ # details omitted 226 | 227 | fun as_union(self): ?UnionSymbol 228 | _ # details omitted 229 | 230 | fun as_atom(self): ?AtomSymbol 231 | _ # details omitted 232 | ``` 233 | 234 | ## Error handling 235 | 236 | Using `panic` in macros is forbidden, because it leads to a bad user experience. All options, results, and the like, must be unwrapped explicitly. 237 | 238 | Upon encountering invalid input, a macro can return an `:error` result with a `macros.Error` inside. This type stores information about the span the error covers, as well as the error message. 239 | 240 | Because tsuki's control flow analysis is quite simple (at least in the early stages), it cannot infer what values are possible for a given variable in a given branch of the program. This is why `unreachable()` exists; it's to mark these spots as unreachable, and if a given spot is reached, the program panics. But panicking is strictly forbidden inside macros, so the `@std.macros` module provides replacements for these common tasks. 241 | 242 | `AnyNode` (which _all_ node kinds can convert to) provides a function `error`, whose sole purpose is to produce errors: 243 | ``` 244 | impl AnyNode 245 | fun error(self, message: String): macros.Error 246 | _ # details omitted 247 | ``` 248 | 249 | Additionally, a few standalone macros exist that create an error, whose span is the callsite: 250 | ``` 251 | # Accepts a single String argument with an error message, and produces an error whose span is 252 | # the callsite. 253 | macro macro_error(ctx: Context[:call]): Error!ResolvedNode 254 | _ # details omitted 255 | 256 | # Accepts no arguments, and produces an error with the message "unreachable code reached". 257 | macro unreachable(ctx: Context[:call]): Error!ResolvedNode 258 | _ # details omitted 259 | ``` 260 | 261 | This means that usual control flow: 262 | ``` 263 | if some_cool_condition 264 | # some_cool_condition guarantees that my_based_value is not :cringe, but the compiler can't 265 | # figure that out 266 | match my_based_value 267 | :based -> _ # do stuff 268 | :cool -> _ # do more stuff 269 | :cringe -> panic("there is no cringe in this program") 270 | ``` 271 | turns to this: 272 | ``` 273 | import @std.macros for macro_error 274 | 275 | if some_cool_condition 276 | match my_based_value 277 | :based -> _ 278 | :cool -> _ 279 | :cringe -> return macro_error("there is no cringe in this program") 280 | ``` 281 | -------------------------------------------------------------------------------- /docs/mangling.md: -------------------------------------------------------------------------------- 1 | # Some notes on mangling 2 | 3 | This document outlines some considerations for implementing mangling inside the compiler. 4 | 5 | ## LLVM backend 6 | 7 | I'd prefer if mangled names were descriptive and human-readable, rather than overly mangled to the point where no human being is able to understand them (looking at you, C++). 8 | 9 | Specifics: 10 | - LLVM functions can be called whatever we want, there's no limit on which characters we can or cannot use. 11 | - Rust strings must be valid UTF-8. 12 | 13 | The following scheme shall be used in the LLVM backend: 14 | - `:.` 15 | - where `` is one of the following: 16 | - `` - eg. `function_name`, `blah1` - a valid function name (in `snake_case`) 17 | - `.` - eg. `MyObject.function` - an associated function 18 | - `.[].` - eg. `MyObject.[As[Int]].convert` - a function associated with a trait 19 | - `{}` - eg. `{0}` - anonymous functions, eg. closures and do-blocks 20 | - Paths may nest freely. `module.function.local_function` is a perfectly valid path, specifying a locally-scoped function `local_function` inside the function `function` inside the module `module`. 21 | 22 | Examples: 23 | - `std:panics.panic` - function `panic` in module `panics` of package `std` 24 | - `std:float32.Float32.sin` - function `sin` for the type `Float32` in module `float32` of package `std` 25 | - `std:int32.Int32.[Dup].dup` - function `dup` from the implementation of `Dup` for the type `Int32` in module `int32` of package `std` 26 | - `std_tests:results{12}` - 12th anonymous function in the module `results` of package `std` 27 | 28 | Other backends may use mangling schemes different to this one; after all, different targets have different requirements. 29 | 30 | # Stack traces 31 | 32 | Function names mangled using this scheme should _never_ appear in stack traces, as it leads to a terrible user experience. They get overly long and hard to read, hence I propose to drop the module's name. After all, it's already obvious from the filename. 33 | 34 | For instance, if we have a stack trace for package `test`, module `hello`, this stack trace: 35 | ``` 36 | Stack traceback: 37 | std:panics.tsu 234:4 std:panics.panic 38 | test:hello.tsu 4:2 test.hello.my_fallible_function 39 | test:hello.tsu 6:1 test.hello 40 | ``` 41 | should get turned into: 42 | ``` 43 | Stack traceback: 44 | std:panics.tsu 234:4 panic 45 | test:hello.tsu 4:2 my_fallible_function 46 | test:hello.tsu 6:1 {module code} 47 | ``` 48 | 49 | Stack traces should be compact and readable at a glance. We don't need to list _every_ single path piece. This is something most compilers nowadays get wrong. 50 | 51 | Everything after the module name should remain as-is, and the empty string previously containing only the module name should be replaced with `{module code}`. 52 | 53 | **UX above all else**. 54 | 55 | ## Some deets on the stack trace's formatting 56 | 57 | The paths shown in the stack trace should never be full paths to the `src` directory. Instead, they should be `:`. Not sure about this, but maybe the `.tsu` extension should also be dropped? 58 | 59 | The full format is the following: 60 | ``` 61 | Stack traceback: 62 | : 63 | ... 64 | ``` 65 | -------------------------------------------------------------------------------- /docs/pragmas.md: -------------------------------------------------------------------------------- 1 | # Pragmas specific to this implementation 2 | 3 | Different implementations may define their own sets of pragmas for implementing the standard library. This reference implementation of the compiler and standard library use the following pragmas. 4 | 5 | #### `compiler_builtin_type(type_identifier: Atom)` 6 | 7 | The `compiler_builtin_type` pragma may be used on `type` definitions without an `=` sign after the name, to bind builtin types to names. The `type_identifier` atom may be one of the following values, corresponding to the following built-in types: 8 | 9 | | Atom value | Type | 10 | | --- | --- | 11 | | `:noreturn` | `NoReturn` | 12 | | `:bool` | `Bool` | 13 | | `:int8` | `Int8` | 14 | | `:int16` | `Int16` | 15 | | `:int32` | `Int32` | 16 | | `:int64` | `Int64` | 17 | | `:uint8` | `Uint8` | 18 | | `:uint16` | `Uint16` | 19 | | `:uint32` | `Uint32` | 20 | | `:uint64` | `Uint64` | 21 | | `:float32` | `Float32` | 22 | | `:float64` | `Float64` | 23 | | `:size` | `Size` | 24 | 25 | Examples: 26 | 27 | ``` 28 | type NoReturn :: compiler_builtin_type(:noreturn) 29 | type Size :: compiler_builtin_type(:size) 30 | ``` 31 | 32 | These type definitions can be found in `std/std.tsu`, which is imported implicitly into each module. 33 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | tab_spaces = 3 2 | match_arm_leading_pipes = "Preserve" 3 | chain_width = 100 4 | imports_granularity = "Module" 5 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Display; 2 | use std::path::PathBuf; 3 | 4 | use structopt::StructOpt; 5 | use tsuki_backend_llvm::{ExecutableFile, LlvmBackend, LlvmBackendConfig, OptimizationLevel}; 6 | use tsuki_frontend::backend::Backend; 7 | use tsuki_frontend::common::{Errors, SourceFile}; 8 | use tsuki_frontend::AnalyzeOptions; 9 | 10 | #[derive(StructOpt)] 11 | #[structopt(name = "tsuki")] 12 | struct Options { 13 | /// The directory for storing intermediary files. 14 | #[structopt(long, parse(from_os_str))] 15 | cache_dir: Option, 16 | 17 | /// The name of the package. This is used for controlling the object file's name. 18 | #[structopt(short = "p", long)] 19 | package_name: String, 20 | 21 | /// The `src` directory of the package. 22 | #[structopt(short = "r", long)] 23 | package_root: PathBuf, 24 | 25 | /// The root directory of the standard library. 26 | #[structopt(short = "s", long)] 27 | std_path: PathBuf, 28 | 29 | /// The root source file. Must be located in the package root. 30 | #[structopt(short = "m", long)] 31 | main_file: PathBuf, 32 | 33 | /// Only check the code for validity, without compiling it. 34 | #[structopt(long)] 35 | check: bool, 36 | 37 | /// The optimization level to use when compiling. 38 | #[structopt(long, name = "level", default_value = "essential")] 39 | optimize: OptimizationLevel, 40 | 41 | /// Dumps the source code before compiling. 42 | #[structopt(long)] 43 | dump_source: bool, 44 | 45 | /// Dumps the AST directly after parsing. 46 | #[structopt(long)] 47 | dump_ast_pre_sem: bool, 48 | 49 | /// Dumps the AST after checking it semantically. 50 | #[structopt(long)] 51 | dump_ast_post_sem: bool, 52 | 53 | /// Dumps the generated LLVM IR. 54 | #[structopt(long)] 55 | dump_llvm_ir: bool, 56 | } 57 | 58 | const EXIT_COMPILE: i32 = 1; 59 | const EXIT_FATAL: i32 = 2; 60 | 61 | fn unwrap_error(r: Result) -> T 62 | where 63 | E: Display, 64 | { 65 | match r { 66 | Ok(ok) => ok, 67 | Err(error) => { 68 | eprintln!("error: {}", error); 69 | std::process::exit(EXIT_FATAL) 70 | } 71 | } 72 | } 73 | 74 | fn unwrap_errors(r: Result) -> T { 75 | match r { 76 | Ok(ok) => ok, 77 | Err(errors) => { 78 | errors.iter().for_each(|error| eprintln!("{:#}", error)); 79 | std::process::exit(EXIT_COMPILE) 80 | } 81 | } 82 | } 83 | 84 | fn main() -> Result<(), Box> { 85 | let options = Options::from_args(); 86 | let frontend_debug_options = tsuki_frontend::DebugOptions { 87 | dump_source: options.dump_source, 88 | dump_ast_pre_sem: options.dump_ast_pre_sem, 89 | dump_ast_post_sem: options.dump_ast_post_sem, 90 | }; 91 | let backend = LlvmBackend::new(LlvmBackendConfig { 92 | cache_dir: &options.cache_dir.unwrap_or(std::env::current_dir()?.join("bin")), 93 | std_path: &options.std_path, 94 | package_name: &options.package_name, 95 | // TODO: Cross-compilation. 96 | target_triple: None, 97 | optimization_level: options.optimize, 98 | frontend_debug_options, 99 | backend_debug_options: tsuki_backend_llvm::DebugOptions { 100 | dump_ir: options.dump_llvm_ir, 101 | }, 102 | }); 103 | 104 | let source = unwrap_error(std::fs::read_to_string(&options.main_file)); 105 | 106 | let source_file = unwrap_error(SourceFile::new( 107 | options.package_name, 108 | options.package_root, 109 | options.main_file, 110 | source, 111 | )); 112 | 113 | if options.check { 114 | let _ = unwrap_errors(tsuki_frontend::analyze( 115 | AnalyzeOptions { 116 | file: &source_file, 117 | std_path: options.std_path, 118 | }, 119 | &frontend_debug_options, 120 | )); 121 | } else { 122 | let object = unwrap_errors(backend.compile(source_file)); 123 | let _executable = ExecutableFile::link(backend, &[object])?; 124 | } 125 | 126 | Ok(()) 127 | } 128 | -------------------------------------------------------------------------------- /std/kernel.tsu: -------------------------------------------------------------------------------- 1 | # tsuki standard library 2 | # Copyright (C) 2021 liquidev 3 | # Licensed under the MIT license. Check the LICENSE file in the repository root for details. 4 | 5 | # The kernel is arguably the most important piece of source code in the standard library. 6 | # It provides definitions for primitive types, as well as compiler-supported types such as optionals 7 | # and results. 8 | # The kernel is loaded by the compiler before any other code in a given file, and must not be 9 | # imported manually. 10 | 11 | pub type NoReturn :: compiler_builtin_type(:noreturn) 12 | 13 | pub type Bool :: compiler_builtin_type(:bool) 14 | 15 | pub type Uint8 :: compiler_builtin_type(:uint8) 16 | pub type Uint16 :: compiler_builtin_type(:uint16) 17 | pub type Uint32 :: compiler_builtin_type(:uint32) 18 | pub type Uint64 :: compiler_builtin_type(:uint64) 19 | 20 | pub type Int8 :: compiler_builtin_type(:int8) 21 | pub type Int16 :: compiler_builtin_type(:int16) 22 | pub type Int32 :: compiler_builtin_type(:int32) 23 | pub type Int64 :: compiler_builtin_type(:int64) 24 | 25 | pub type Float32 :: compiler_builtin_type(:float32) 26 | pub type Float64 :: compiler_builtin_type(:float64) 27 | 28 | pub type Size :: compiler_builtin_type(:size) 29 | -------------------------------------------------------------------------------- /tsuki-backend-llvm/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "tsuki-backend-llvm" 3 | version = "0.1.0" 4 | edition = "2018" 5 | 6 | [dependencies] 7 | 8 | tsuki-frontend = { path = "../tsuki-frontend" } 9 | 10 | smallvec = "1.6.1" 11 | thiserror = "1.0.26" 12 | inkwell = { git = "https://github.com/TheDan64/inkwell", rev = "41857f9", features = ["llvm12-0"] } 13 | -------------------------------------------------------------------------------- /tsuki-backend-llvm/src/codegen.rs: -------------------------------------------------------------------------------- 1 | //! Common code generation state. 2 | 3 | use std::collections::HashMap; 4 | use std::fmt; 5 | 6 | use inkwell::basic_block::BasicBlock; 7 | use inkwell::builder::Builder; 8 | use inkwell::context::Context; 9 | use inkwell::module::Module; 10 | use inkwell::passes::PassManager; 11 | use inkwell::types::StructType; 12 | use inkwell::values::{BasicValueEnum, FunctionValue}; 13 | use tsuki_frontend::ast::{NodeId, NodeKind}; 14 | use tsuki_frontend::common::SourceFile; 15 | use tsuki_frontend::scope::ScopeId; 16 | use tsuki_frontend::sem::Ir; 17 | 18 | use crate::functions::Function; 19 | use crate::variables::Variables; 20 | 21 | /// Code generation state shared across functions. 22 | pub struct CodeGen<'src, 'c, 'pm> { 23 | // This field may be unused as its primary purpose currently is debugging. Production code should 24 | // not contain any `astdump::dump_ast`s or such, so this field will remain mostly unused. 25 | // This will change once debug info generation is implemented. 26 | #[allow(unused)] 27 | pub(crate) source: &'src SourceFile, 28 | pub(crate) context: &'c Context, 29 | pub(crate) module: &'pm Module<'c>, 30 | pub(crate) builder: Builder<'c>, 31 | pub(crate) pass_manager: &'pm PassManager>, 32 | 33 | pub(crate) function: Function<'c>, 34 | pub(crate) variables: Variables<'c>, 35 | 36 | /// This map stores a list of blocks to which unconditional jumps have to be appended, as a 37 | /// result of `break` expressions. 38 | /// 39 | /// The second `usize` in the tuple key is required to allow for multiple `break`s in one 40 | /// breaking scope. 41 | pub(crate) break_blocks: HashMap<(ScopeId, usize), BasicBlock<'c>>, 42 | 43 | pub(crate) unit_type: StructType<'c>, 44 | } 45 | 46 | impl<'src, 'c, 'pm> CodeGen<'src, 'c, 'pm> { 47 | pub fn new( 48 | source: &'src SourceFile, 49 | context: &'c Context, 50 | pass_manager: &'pm PassManager>, 51 | module: &'pm Module<'c>, 52 | function: Function<'c>, 53 | ) -> Self { 54 | let mut state = Self { 55 | source, 56 | context, 57 | // TODO: import, module resolution and names. 58 | module, 59 | builder: context.create_builder(), 60 | pass_manager, 61 | 62 | function, 63 | variables: Variables::new(), 64 | 65 | break_blocks: HashMap::new(), 66 | 67 | unit_type: context.struct_type(&[], false), 68 | }; 69 | state.builder.position_at_end(state.function.entry_block); 70 | // Temporary: set up some libc functions. 71 | state.load_libc(); 72 | state 73 | } 74 | 75 | /// Creates a new code generator, with the same source file, context, pass manager, and module, 76 | /// but with a different function. 77 | pub fn for_function(&self, function: Function<'c>) -> Self { 78 | Self { 79 | builder: self.context.create_builder(), 80 | function, 81 | variables: Variables::new(), 82 | unit_type: self.unit_type, 83 | break_blocks: HashMap::new(), 84 | ..*self 85 | } 86 | } 87 | 88 | /// Generates code for an arbitrary node. 89 | pub fn generate_statement(&mut self, ir: &Ir, node: NodeId) { 90 | match ir.ast.kind(node) { 91 | // Control flow 92 | NodeKind::Pass => (), 93 | NodeKind::StatementList => self.generate_statements(ir, node), 94 | NodeKind::DoStatement => { 95 | let _ = self.generate_do(ir, node); 96 | } 97 | NodeKind::IfStatement => { 98 | let _ = self.generate_if(ir, node); 99 | } 100 | NodeKind::While => self.generate_while(ir, node), 101 | 102 | // Declarations 103 | NodeKind::Val | NodeKind::Var => self.generate_variable_declaration(ir, node), 104 | NodeKind::AssignDiscard => self.generate_discarding_assignment(ir, node), 105 | NodeKind::Fun => self.generate_function(ir, node), 106 | // TODO: Remove type aliases from the IR, as they do not serve any purpose for the code 107 | // generation stage. 108 | NodeKind::Type => (), 109 | 110 | // Expressions 111 | NodeKind::Assign => { 112 | let _ = self.generate_assignment(ir, node); 113 | } 114 | _ => { 115 | let _ = self.generate_expression(ir, node); 116 | } 117 | } 118 | } 119 | 120 | /// Finishes compiling a function, by inserting a `ret` instruction at the end, as well 121 | /// as running optimizations on it. 122 | pub fn finish_function(&self, return_value: Option>) { 123 | // It seems like Rust can't really infer that I want to pass a &dyn when I .as_ref() 124 | // the option, so this requires some manual matching. 125 | match return_value { 126 | Some(v) => self.builder.build_return(Some(&v)), 127 | None => self.builder.build_return(None), 128 | }; 129 | self.pass_manager.run_on(&self.function.value); 130 | } 131 | } 132 | 133 | impl fmt::Debug for CodeGen<'_, '_, '_> { 134 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 135 | write!(f, "{}", &self.module.print_to_string().to_str().unwrap()) 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /tsuki-backend-llvm/src/control_flow.rs: -------------------------------------------------------------------------------- 1 | //! Code generation for statement lists and control flow structures. 2 | 3 | use inkwell::basic_block::BasicBlock; 4 | use inkwell::values::{BasicValueEnum, IntValue}; 5 | use smallvec::SmallVec; 6 | use tsuki_frontend::ast::{NodeId, NodeKind}; 7 | use tsuki_frontend::scope::ScopeId; 8 | use tsuki_frontend::sem::Ir; 9 | 10 | use crate::codegen::CodeGen; 11 | 12 | impl<'src, 'c, 'pm> CodeGen<'src, 'c, 'pm> { 13 | /// Generates code for a list of statements. 14 | pub(crate) fn generate_statements(&mut self, ir: &Ir, node: NodeId) { 15 | ir.ast.walk_node_list(node, |_ast, _index, node| { 16 | self.generate_statement(ir, node); 17 | }); 18 | } 19 | 20 | /// Generates code for a list of statements with a tail expression. 21 | pub(crate) fn generate_statements_with_tail_expression( 22 | &mut self, 23 | ir: &Ir, 24 | node: NodeId, 25 | ) -> BasicValueEnum<'c> { 26 | let mut tail = None; 27 | for (index, &child) in ir.ast.extra(node).as_node_list().unwrap().iter().enumerate() { 28 | if ir.ast.is_last_child(node, index) { 29 | tail = Some(self.generate_expression(ir, child)) 30 | } else { 31 | self.generate_statement(ir, child); 32 | } 33 | } 34 | if let Some(tail) = tail { 35 | tail 36 | } else { 37 | self.generate_unit_literal().into() 38 | } 39 | } 40 | 41 | /// Generates code for a `do` expression or a `do` statement. 42 | /// 43 | /// If the node is a `DoExpression`, returns `Some` with the tail expression. Otherwise 44 | /// if the kind is `DoStatement`, returns `None`. 45 | pub(crate) fn generate_do(&mut self, ir: &Ir, node: NodeId) -> Option> { 46 | match ir.ast.kind(node) { 47 | NodeKind::DoExpression => Some(self.generate_statements_with_tail_expression(ir, node)), 48 | NodeKind::DoStatement => { 49 | self.generate_statements(ir, node); 50 | None 51 | } 52 | _ => unreachable!(), 53 | } 54 | } 55 | 56 | /// Generates code for an `if` expression or an `if` statement. 57 | /// 58 | /// Return value behavior is similar to `generate_do`. 59 | pub(crate) fn generate_if(&mut self, ir: &Ir, node: NodeId) -> Option> { 60 | /// This local struct stores information about the condition of an `if` branch. 61 | struct Condition<'c> { 62 | block: BasicBlock<'c>, 63 | value: IntValue<'c>, 64 | // We store the ending block of the condition, because it may be different than the 65 | // starting block. 66 | end_block: BasicBlock<'c>, 67 | } 68 | // This local struct stores information about a single `if` branch: its condition, condition 69 | // block, and body block. 70 | struct Branch<'c> { 71 | condition: Option>, 72 | body: BasicBlock<'c>, 73 | // Similarly to the condition, we store the ending block, because it may be different than 74 | // the starting block, and it's where we must emit the final `br` instructions. 75 | end_block: BasicBlock<'c>, 76 | result: Option>, 77 | } 78 | let mut branches = SmallVec::<[Branch<'c>; 16]>::new(); 79 | let is_expression = ir.ast.kind(node) == NodeKind::IfExpression; 80 | // Unwrapping here is safe, because we are coming from an existing block 81 | // (eg. the function's %entry). 82 | let entry_block = self.builder.get_insert_block().unwrap(); 83 | 84 | // Generate code for each of the branches. The `br` instructions are added after the inner 85 | // code is generated, because all blocks have to be known beforehand. 86 | let branch_nodes = ir.ast.extra(node).as_node_list().unwrap(); 87 | for (index, &branch) in branch_nodes.iter().enumerate() { 88 | let body_block = self.context.append_basic_block( 89 | self.function.value, 90 | // For easier debugging of the IR, the block's name is determined by the branch type. 91 | match ir.ast.kind(branch) { 92 | NodeKind::IfBranch => "elif", 93 | NodeKind::ElseBranch => "else", 94 | _ => unreachable!(), 95 | }, 96 | ); 97 | // The condition is only generated for `IfBranch`es, because the `ElseBranch` does not 98 | // have a condition. 99 | let mut condition = None; 100 | if ir.ast.kind(branch) == NodeKind::IfBranch { 101 | let condition_block = if index == 0 { 102 | // We don't need a new block if this is the first branch; we can simply fall through 103 | // from the current one. 104 | entry_block 105 | } else { 106 | self.context.prepend_basic_block(body_block, "condition") 107 | }; 108 | self.builder.position_at_end(condition_block); 109 | let condition_value = self.generate_expression(ir, ir.ast.first_handle(branch)); 110 | let end_block = self.builder.get_insert_block().unwrap(); 111 | condition = Some(Condition { 112 | block: condition_block, 113 | value: condition_value.into_int_value(), 114 | end_block, 115 | }); 116 | } 117 | // Then we generate the body. 118 | self.builder.position_at_end(body_block); 119 | let result = if is_expression { 120 | Some(self.generate_statements_with_tail_expression(ir, branch)) 121 | } else { 122 | self.generate_statements(ir, branch); 123 | None 124 | }; 125 | let end_block = self.builder.get_insert_block().unwrap(); 126 | branches.push(Branch { 127 | condition, 128 | body: body_block, 129 | end_block, 130 | result, 131 | }); 132 | } 133 | // Generate the terminating %end block. After a successfully executed branch, this block is 134 | // branched to unconditionally, and is where control flow continues after the if statement 135 | // ends. 136 | let end_block = self.context.append_basic_block(self.function.value, "end"); 137 | 138 | // Now that we have all the blocks, we're ready to backpatch some `br` instructions into 139 | // the blocks. 140 | for (index, branch) in branches.iter().enumerate() { 141 | let &Branch { 142 | condition, 143 | body: body_block, 144 | end_block: branch_end_block, 145 | .. 146 | } = &branch; 147 | if let Some(condition) = condition { 148 | // The block to execute if the branch fails is dependent on whether there's a branch 149 | // after this one, and also if the branch after this one is an `else` branch without 150 | // a condition. 151 | let else_block = if let Some(next_branch) = branches.get(index + 1) { 152 | if let Some(next_condition) = &next_branch.condition { 153 | next_condition.block 154 | } else { 155 | next_branch.body 156 | } 157 | } else { 158 | end_block 159 | }; 160 | self.builder.position_at_end(condition.end_block); 161 | self.builder.build_conditional_branch(condition.value, *body_block, else_block); 162 | } 163 | self.builder.position_at_end(*branch_end_block); 164 | self.builder.build_unconditional_branch(end_block); 165 | } 166 | 167 | // Compilation is resumed normally at the %end block. 168 | self.builder.position_at_end(end_block); 169 | // In case of an if expression, we have to generate a `phi` node at the end that's going to 170 | // contain our final value. 171 | if is_expression { 172 | let typ = branches[0].result.unwrap().get_type(); 173 | let phi = self.builder.build_phi(typ, "ifresult"); 174 | for Branch { 175 | end_block, result, .. 176 | } in branches 177 | { 178 | phi.add_incoming(&[(&result.unwrap(), end_block)]); 179 | } 180 | // It's a bit strange that `phi`'s function for this is not called `as_basic_value_enum`. 181 | Some(phi.as_basic_value()) 182 | } else { 183 | None 184 | } 185 | } 186 | 187 | /// Generates code for a `while` loop. 188 | pub(crate) fn generate_while(&mut self, ir: &Ir, node: NodeId) { 189 | // Save the start block for generating the initial `br label %condition` instruction. 190 | let start_block = self.builder.get_insert_block().unwrap(); 191 | 192 | // Generate the condition block and value. 193 | let condition_block = self.context.append_basic_block(self.function.value, "while"); 194 | self.builder.position_at_end(condition_block); 195 | let condition_value = self.generate_expression(ir, ir.ast.first_handle(node)); 196 | // Save the end of the condition value, in case it generates some extra blocks. 197 | let condition_end_block = self.builder.get_insert_block().unwrap(); 198 | 199 | // Generate the loop body. 200 | let body_block = self.context.append_basic_block(self.function.value, "do"); 201 | self.builder.position_at_end(body_block); 202 | self.generate_statements(ir, node); 203 | let body_end_block = self.builder.get_insert_block().unwrap(); 204 | 205 | // Generate the final %end block. 206 | let end_block = self.context.append_basic_block(self.function.value, "end"); 207 | 208 | // Now, insert all the branch instructions. 209 | // First we start with the unconditional branch to the condition block. 210 | self.builder.position_at_end(start_block); 211 | self.builder.build_unconditional_branch(condition_block); 212 | // Then, we build the conditional branch at the end of the condition block. 213 | self.builder.position_at_end(condition_end_block); 214 | self.builder.build_conditional_branch( 215 | condition_value.into_int_value(), 216 | body_block, 217 | end_block, 218 | ); 219 | // Finally, we branch back to the condition at the end of the body. 220 | self.builder.position_at_end(body_end_block); 221 | self.builder.build_unconditional_branch(condition_block); 222 | 223 | let scope = ir.ast.scope(node).unwrap(); 224 | self.generate_break_jumps(scope, end_block); 225 | 226 | // Continue generating code at the end block. 227 | self.builder.position_at_end(end_block); 228 | } 229 | 230 | /// Generates jumps at the ends of blocks, that are results of `break`s of the breaking scope 231 | /// with the given ID. 232 | fn generate_break_jumps(&mut self, scope: ScopeId, end_block: BasicBlock<'c>) { 233 | let builder = self.context.create_builder(); 234 | let keys: SmallVec<[(ScopeId, usize); 4]> = 235 | self.break_blocks.keys().filter(|(scope_id, _)| *scope_id == scope).copied().collect(); 236 | for key in keys { 237 | let block = self.break_blocks.remove(&key).unwrap(); 238 | builder.position_at_end(block); 239 | builder.build_unconditional_branch(end_block); 240 | } 241 | } 242 | 243 | /// Generates code for a `break` expression. 244 | pub(crate) fn generate_break(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> { 245 | // Save the current block in the break_blocks table, such that it can be later referred to 246 | // by the enclosing `while` loop. 247 | let target_scope = ir.ast.scope(node).expect("Break node with no scope in IR"); 248 | let id = self.break_blocks.len(); 249 | let break_block = self.builder.get_insert_block().unwrap(); 250 | self.break_blocks.insert((target_scope, id), break_block); 251 | 252 | // Continue generating in an unreachable block with no predecessors. 253 | let unreachable_block = self.context.append_basic_block(self.function.value, "unreachable"); 254 | self.builder.position_at_end(unreachable_block); 255 | 256 | // Return a dummy value as a result of `break` being an expression. 257 | let result_type = self.get_type(&ir.types, ir.ast.type_id(node)); 258 | result_type.const_zero() 259 | } 260 | } 261 | -------------------------------------------------------------------------------- /tsuki-backend-llvm/src/expressions.rs: -------------------------------------------------------------------------------- 1 | //! Code generation for expressions. 2 | 3 | use inkwell::values::{BasicValue, BasicValueEnum, FloatValue, IntValue}; 4 | use inkwell::IntPredicate; 5 | use tsuki_frontend::ast::{NodeId, NodeKind}; 6 | use tsuki_frontend::sem::Ir; 7 | 8 | use crate::codegen::CodeGen; 9 | use crate::libc; 10 | 11 | impl<'src, 'c, 'pm> CodeGen<'src, 'c, 'pm> { 12 | /// Generates code for a Bool literal. 13 | fn generate_boolean_literal(&self, ir: &Ir, node: NodeId) -> IntValue<'c> { 14 | let typ = self.context.bool_type(); 15 | let literal = (ir.ast.kind(node) == NodeKind::True) as u64; 16 | typ.const_int(literal, false) 17 | } 18 | 19 | /// Generates code for an integer literal. 20 | fn generate_integer_literal(&self, ir: &Ir, node: NodeId) -> IntValue<'c> { 21 | let typ = self.get_type(&ir.types, ir.ast.type_id(node)).into_int_type(); 22 | typ.const_int(ir.ast.extra(node).as_uint().unwrap(), false) 23 | } 24 | 25 | /// Generates code for a float literal. 26 | fn generate_float_literal(&self, ir: &Ir, node: NodeId) -> FloatValue<'c> { 27 | let typ = self.get_type(&ir.types, ir.ast.type_id(node)).into_float_type(); 28 | typ.const_float(ir.ast.extra(node).as_float().unwrap()) 29 | } 30 | 31 | /// Generates code for boolean negation. 32 | fn generate_boolean_negation(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> { 33 | let right = self.generate_expression(ir, ir.ast.first_handle(node)); 34 | self.builder.build_not(right.into_int_value(), "nottmp").as_basic_value_enum() 35 | } 36 | 37 | /// Generates code for integer or float negation. 38 | fn generate_number_negation(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> { 39 | let right = self.generate_expression(ir, ir.ast.first_handle(node)); 40 | let typ = ir.ast.type_id(node); 41 | let kind = ir.types.kind(typ); 42 | if kind.is_integer() { 43 | let typ = self.get_type(&ir.types, typ).into_int_type(); 44 | let zero = typ.const_zero(); 45 | self.builder.build_int_sub(zero, right.into_int_value(), "negtmp").into() 46 | } else if kind.is_float() { 47 | self.builder.build_float_neg(right.into_float_value(), "fnegtmp").into() 48 | } else { 49 | unreachable!() 50 | } 51 | } 52 | 53 | /// Generates the LHS and RHS of a binary operator. 54 | fn generate_binary_operation( 55 | &mut self, 56 | ir: &Ir, 57 | node: NodeId, 58 | ) -> (BasicValueEnum<'c>, BasicValueEnum<'c>) { 59 | ( 60 | self.generate_expression(ir, ir.ast.first_handle(node)), 61 | self.generate_expression(ir, ir.ast.second_handle(node)), 62 | ) 63 | } 64 | 65 | /// Generates code for integer math. 66 | fn generate_integer_math(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> { 67 | // TODO: Panic on overflow. This can be done using LLVM's arithmetic intrinsics that return 68 | // an aggregate {T, i1}, where the second field is a flag signifying whether overflow occured. 69 | let (left_value, right_value) = self.generate_binary_operation(ir, node); 70 | let (left, right) = (left_value.into_int_value(), right_value.into_int_value()); 71 | let math = match ir.ast.kind(node) { 72 | NodeKind::Plus => self.builder.build_int_add(left, right, "addtmp"), 73 | NodeKind::Minus => self.builder.build_int_sub(left, right, "subtmp"), 74 | NodeKind::Mul => self.builder.build_int_mul(left, right, "multmp"), 75 | NodeKind::Div => { 76 | let is_signed = ir.types.kind(ir.ast.type_id(node)).unwrap_integer().is_signed(); 77 | if is_signed { 78 | self.builder.build_int_signed_div(left, right, "sdivtmp") 79 | } else { 80 | self.builder.build_int_unsigned_div(left, right, "udivtmp") 81 | } 82 | } 83 | _ => unreachable!(), 84 | }; 85 | math.as_basic_value_enum() 86 | } 87 | 88 | fn generate_float_math(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> { 89 | let (left_value, right_value) = self.generate_binary_operation(ir, node); 90 | let (left, right) = ( 91 | left_value.into_float_value(), 92 | right_value.into_float_value(), 93 | ); 94 | let math = match ir.ast.kind(node) { 95 | NodeKind::Plus => self.builder.build_float_add(left, right, "faddtmp"), 96 | NodeKind::Minus => self.builder.build_float_sub(left, right, "fsubtmp"), 97 | NodeKind::Mul => self.builder.build_float_mul(left, right, "fmultmp"), 98 | NodeKind::Div => self.builder.build_float_div(left, right, "fdivtmp"), 99 | _ => unreachable!(), 100 | }; 101 | math.as_basic_value_enum() 102 | } 103 | 104 | /// Generates code for integer and floating-point math operations. 105 | fn generate_math(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> { 106 | let typ = ir.types.kind(ir.ast.type_id(node)); 107 | if typ.is_integer() { 108 | self.generate_integer_math(ir, node) 109 | } else if typ.is_float() { 110 | self.generate_float_math(ir, node) 111 | } else { 112 | unreachable!() 113 | } 114 | } 115 | 116 | /// Generates code for an integer type conversion (`WidenUint` or `WidenInt`). 117 | fn generate_integer_conversion(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> { 118 | let inner = ir.ast.first_handle(node); 119 | let inner_value = self.generate_expression(ir, inner).into_int_value(); 120 | let dest_type = self.get_type(&ir.types, ir.ast.type_id(node)).into_int_type(); 121 | match ir.ast.kind(node) { 122 | NodeKind::WidenUint => self.builder.build_int_z_extend(inner_value, dest_type, "uwidened"), 123 | NodeKind::WidenInt => self.builder.build_int_s_extend(inner_value, dest_type, "swidened"), 124 | _ => unreachable!(), 125 | } 126 | .as_basic_value_enum() 127 | } 128 | 129 | /// Generates code for a boolean comparison. 130 | fn generate_boolean_comparison(&mut self, ir: &Ir, node: NodeId) -> IntValue<'c> { 131 | let (left_value, right_value) = self.generate_binary_operation(ir, node); 132 | let (left, right) = (left_value.into_int_value(), right_value.into_int_value()); 133 | let predicate = match ir.ast.kind(node) { 134 | NodeKind::Equal => IntPredicate::EQ, 135 | NodeKind::NotEqual => IntPredicate::NE, 136 | _ => unreachable!(), 137 | }; 138 | self.builder.build_int_compare(predicate, left, right, "boolcmp") 139 | } 140 | 141 | /// Generates code for an integer comparison. 142 | fn generate_integer_comparison(&mut self, ir: &Ir, node: NodeId) -> IntValue<'c> { 143 | let (left_value, right_value) = self.generate_binary_operation(ir, node); 144 | let (left, right) = (left_value.into_int_value(), right_value.into_int_value()); 145 | let left_type = ir.ast.type_id(ir.ast.first_handle(node)); 146 | let is_signed = ir.types.kind(left_type).unwrap_integer().is_signed(); 147 | let predicate = match ir.ast.kind(node) { 148 | NodeKind::Equal => IntPredicate::EQ, 149 | NodeKind::NotEqual => IntPredicate::NE, 150 | NodeKind::Less if is_signed => IntPredicate::SLT, 151 | NodeKind::LessEqual if is_signed => IntPredicate::SLE, 152 | NodeKind::Greater if is_signed => IntPredicate::SGT, 153 | NodeKind::GreaterEqual if is_signed => IntPredicate::SGE, 154 | NodeKind::Less if !is_signed => IntPredicate::ULT, 155 | NodeKind::LessEqual if !is_signed => IntPredicate::ULE, 156 | NodeKind::Greater if !is_signed => IntPredicate::UGT, 157 | NodeKind::GreaterEqual if !is_signed => IntPredicate::UGE, 158 | _ => unreachable!(), 159 | }; 160 | self.builder.build_int_compare(predicate, left, right, "intcmp") 161 | } 162 | 163 | /// Generates code for integer comparisons. 164 | fn generate_comparison(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> { 165 | let left_node = ir.ast.first_handle(node); 166 | let typ = ir.types.kind(ir.ast.type_id(left_node)); 167 | if typ.is_integer() { 168 | self.generate_integer_comparison(ir, node) 169 | } else if typ.is_float() { 170 | todo!() 171 | } else if typ.is_bool() { 172 | self.generate_boolean_comparison(ir, node) 173 | } else { 174 | todo!() 175 | } 176 | .as_basic_value_enum() 177 | } 178 | 179 | /// Generates code for any expression node. 180 | pub(crate) fn generate_expression(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> { 181 | match ir.ast.kind(node) { 182 | // Literals 183 | NodeKind::True | NodeKind::False => self.generate_boolean_literal(ir, node).into(), 184 | | NodeKind::Uint8 185 | | NodeKind::Uint16 186 | | NodeKind::Uint32 187 | | NodeKind::Uint64 188 | | NodeKind::Int8 189 | | NodeKind::Int16 190 | | NodeKind::Int32 191 | | NodeKind::Int64 => self.generate_integer_literal(ir, node).into(), 192 | NodeKind::Float32 | NodeKind::Float64 => self.generate_float_literal(ir, node).into(), 193 | 194 | // Variables 195 | NodeKind::Variable => self.generate_variable_reference(ir, node), 196 | 197 | // Operators 198 | NodeKind::Not => self.generate_boolean_negation(ir, node), 199 | NodeKind::Neg => self.generate_number_negation(ir, node), 200 | NodeKind::Plus | NodeKind::Minus | NodeKind::Mul | NodeKind::Div => { 201 | self.generate_math(ir, node) 202 | } 203 | | NodeKind::Equal 204 | | NodeKind::NotEqual 205 | | NodeKind::Less 206 | | NodeKind::LessEqual 207 | | NodeKind::Greater 208 | | NodeKind::GreaterEqual => self.generate_comparison(ir, node), 209 | NodeKind::Assign => self.generate_assignment(ir, node).unwrap(), 210 | 211 | // Control flow 212 | NodeKind::DoExpression => self.generate_do(ir, node).unwrap(), 213 | NodeKind::IfExpression => self.generate_if(ir, node).unwrap(), 214 | NodeKind::CallFunction => self.generate_call(ir, node), 215 | NodeKind::Break => self.generate_break(ir, node), 216 | NodeKind::Return => self.generate_return(ir, node), 217 | 218 | // Intrinsics 219 | NodeKind::WidenUint | NodeKind::WidenInt => self.generate_integer_conversion(ir, node), 220 | NodeKind::PrintInt32 | NodeKind::PrintFloat32 => { 221 | self.generate_call_like_intrinsic(ir, node) 222 | } 223 | other => unreachable!("invalid expression node: {:?}", other), 224 | } 225 | } 226 | 227 | /// Generates code for a function call-like intrinsic. 228 | fn generate_call_like_intrinsic(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> { 229 | let arguments = ir.ast.extra(node).as_node_list().unwrap(); 230 | match ir.ast.kind(node) { 231 | kind @ (NodeKind::PrintInt32 | NodeKind::PrintFloat32) => { 232 | // This is not the, um, cleanest... piece of code here, but it'll get replaced 233 | // anyway once c_import is implemented. 234 | let printf = self.module.get_function(libc::FUN_PRINTF).expect("libc must be loaded"); 235 | let zero = self.context.i32_type().const_zero(); 236 | let global_name = if kind == NodeKind::PrintInt32 { 237 | libc::GLOBAL_PRINTF_INT_FORMAT 238 | } else { 239 | libc::GLOBAL_PRINTF_FLOAT_FORMAT 240 | }; 241 | let format = self.module.get_global(global_name).unwrap(); 242 | let format_ptr = unsafe { 243 | self.builder.build_in_bounds_gep(format.as_pointer_value(), &[zero, zero], "fmt") 244 | }; 245 | let mut argument = self.generate_expression(ir, arguments[0]); 246 | // We need to convert `float` to `double` for passing to printf. 247 | if kind == NodeKind::PrintFloat32 { 248 | let f64_type = self.context.f64_type(); 249 | argument = self 250 | .builder 251 | .build_float_cast(argument.into_float_value(), f64_type, "printf_dbl") 252 | .as_basic_value_enum(); 253 | } 254 | self.builder.build_call(printf, &[format_ptr.into(), argument.into()], "_"); 255 | } 256 | _ => unreachable!(), 257 | } 258 | self.generate_unit_literal().into() 259 | } 260 | } 261 | -------------------------------------------------------------------------------- /tsuki-backend-llvm/src/functions.rs: -------------------------------------------------------------------------------- 1 | //! Code generation for functions. 2 | 3 | use inkwell::basic_block::BasicBlock; 4 | use inkwell::builder::Builder; 5 | use inkwell::context::Context; 6 | use inkwell::module::Module; 7 | use inkwell::types::{BasicType, BasicTypeEnum, FunctionType}; 8 | use inkwell::values::{BasicValue, BasicValueEnum, FunctionValue, PointerValue}; 9 | use smallvec::SmallVec; 10 | use tsuki_frontend::ast::{NodeId, NodeKind}; 11 | use tsuki_frontend::functions::FunctionId; 12 | use tsuki_frontend::sem::Ir; 13 | 14 | use crate::codegen::CodeGen; 15 | 16 | /// Data associated with a single function. 17 | pub struct Function<'c> { 18 | pub value: FunctionValue<'c>, 19 | pub entry_block: BasicBlock<'c>, 20 | } 21 | 22 | impl<'c> Function<'c> { 23 | /// Creates a new function from the given name and type. 24 | /// 25 | /// # Side effects 26 | /// 27 | /// This adds the new function to the module. 28 | pub fn add_to_module( 29 | context: &'c Context, 30 | module: &Module<'c>, 31 | name: &str, 32 | typ: FunctionType<'c>, 33 | ) -> Self { 34 | // Unfortunately this side effect of adding the function into the module is unavoidable, 35 | // as far as I know. I generally prefer code that is free of any side effects but I guess 36 | // some sacrifices have to be made. 37 | let value = module.add_function(name, typ, None); 38 | let entry_block = context.append_basic_block(value, "entry"); 39 | Self { value, entry_block } 40 | } 41 | 42 | /// Creates a `Function` from an existing `FunctionValue`. 43 | pub fn from_value(value: FunctionValue<'c>) -> Self { 44 | Self { 45 | value, 46 | entry_block: value.get_first_basic_block().expect("function did not have a basic block"), 47 | } 48 | } 49 | 50 | /// Positions the given builder at the start of the function. 51 | pub fn position_at_entry_block(&self, builder: &Builder) { 52 | if let Some(instruction) = self.entry_block.get_first_instruction() { 53 | builder.position_before(&instruction); 54 | } else { 55 | builder.position_at_end(self.entry_block); 56 | } 57 | } 58 | 59 | /// Creates a builder that adds instructions to the top of the entry block. 60 | pub fn create_entry_block_builder(&self, context: &'c Context) -> Builder<'c> { 61 | let builder = context.create_builder(); 62 | self.position_at_entry_block(&builder); 63 | builder 64 | } 65 | } 66 | 67 | impl<'src, 'c, 'pm> CodeGen<'src, 'c, 'pm> { 68 | /// Returns the function type, for the given function ID. 69 | fn get_function_type(&self, ir: &Ir, function_id: FunctionId) -> FunctionType<'c> { 70 | let parameters = ir.functions.parameters(function_id); 71 | let parameter_types: SmallVec<[BasicTypeEnum<'c>; 8]> = parameters 72 | .formal 73 | .iter() 74 | .map(|&symbol_id| { 75 | let typ = ir.symbols.type_id(symbol_id); 76 | self.get_type(&ir.types, typ) 77 | }) 78 | .collect(); 79 | // Well, that kind of sucks. AnyValueEnum does not have an fn_type method, so we need to 80 | // branch here. 81 | if ir.types.kind(parameters.return_type).is_unit() { 82 | self.context.void_type().fn_type(¶meter_types, false) 83 | } else { 84 | self.get_type(&ir.types, parameters.return_type).fn_type(¶meter_types, false) 85 | } 86 | } 87 | 88 | /// Adds functions from the IR to the module. 89 | pub fn add_functions(&self, ir: &Ir) { 90 | for function_id in ir.functions.iter() { 91 | // Skip non-local functions in the process. 92 | if ir.functions.kind(function_id).is_local() { 93 | let function_type = self.get_function_type(ir, function_id); 94 | let _ = Function::add_to_module( 95 | self.context, 96 | self.module, 97 | ir.functions.mangled_name(function_id), 98 | function_type, 99 | ); 100 | } 101 | } 102 | } 103 | 104 | /// Generates code for a function. 105 | pub fn generate_function(&self, ir: &Ir, node: NodeId) { 106 | // Get the function ID from the AST. 107 | let name_node = ir.ast.first_handle(node); 108 | let symbol_id = ir.ast.symbol_id(name_node); 109 | let function_id = ir.symbols.kind(symbol_id).unwrap_function(); 110 | 111 | // Obtain the function from the module. 112 | let function = self 113 | .module 114 | .get_function(ir.functions.mangled_name(function_id)) 115 | .expect("function does not seem to exist"); 116 | let function = Function::from_value(function); 117 | 118 | // Create a new CodeGen for generating the function's body. 119 | let mut code_gen = self.for_function(function); 120 | 121 | // Copy all the parameters into allocas. 122 | // I don't think this is _too_ terrible performance-wise, mem2reg will hopefully optimize 123 | // away most of the cases here. 124 | code_gen.function.position_at_entry_block(&code_gen.builder); 125 | let parameters = ir.functions.parameters(function_id); 126 | let mut allocas = SmallVec::<[PointerValue<'c>; 8]>::new(); 127 | for (i, parameter) in code_gen.function.value.get_param_iter().enumerate() { 128 | // While we're at it, we give all the parameters names for more readable IR. 129 | let symbol_id = parameters.formal[i]; 130 | let name = ir.symbols.name(symbol_id); 131 | parameter.set_name(name); 132 | let alloca = code_gen.builder.build_alloca(parameter.get_type(), name); 133 | code_gen.builder.build_store(alloca, parameter); 134 | allocas.push(alloca); 135 | // Also, store the alloca in the code generator's variables list. 136 | code_gen.variables.insert(symbol_id, alloca); 137 | } 138 | 139 | // Generate the function's body. 140 | let return_type = ir.functions.parameters(function_id).return_type; 141 | let return_value = if ir.types.kind(return_type).is_unit() { 142 | code_gen.generate_statements(ir, node); 143 | None 144 | } else { 145 | Some(code_gen.generate_statements_with_tail_expression(ir, node)) 146 | }; 147 | 148 | // Finish the function up. 149 | code_gen.finish_function(return_value); 150 | } 151 | 152 | /// Generates code for a function call. 153 | pub(crate) fn generate_call(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> { 154 | // Get the function we want to call. 155 | let callee_node = ir.ast.first_handle(node); 156 | let symbol_id = ir.ast.symbol_id(callee_node); 157 | let function_id = ir.symbols.kind(symbol_id).unwrap_function(); 158 | let function = self 159 | .module 160 | .get_function(ir.functions.mangled_name(function_id)) 161 | .expect("function does not seem to exist"); 162 | 163 | // Generate code for all the arguments. 164 | let mut arguments = SmallVec::<[BasicValueEnum<'c>; 8]>::new(); 165 | for &argument in ir.ast.extra(node).as_node_list().unwrap() { 166 | arguments.push(self.generate_expression(ir, argument)); 167 | } 168 | let call = self.builder.build_call(function, &arguments, "calltmp"); 169 | 170 | call.try_as_basic_value().either(|value| value, |_void| self.generate_unit_literal().into()) 171 | } 172 | 173 | /// Generates code for a `return` expression. 174 | pub(crate) fn generate_return(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> { 175 | // Finish off the current basic block with a `ret` instruction. 176 | let return_value = ir.ast.first_handle(node); 177 | let result_value = if ir.ast.kind(return_value) != NodeKind::Empty { 178 | let value = self.generate_expression(ir, return_value); 179 | self.builder.build_return(Some(&value)); 180 | let result_type = ir.ast.type_id(node); 181 | self.get_type(&ir.types, result_type) 182 | } else { 183 | self.builder.build_return(None); 184 | self.unit_type.into() 185 | }; 186 | // Then, begin a new basic block such that if there's any unreachable code past this block, 187 | // its terminator will be contained in this new block. 188 | let unreachable_block = self.context.append_basic_block(self.function.value, "unreachable"); 189 | self.builder.position_at_end(unreachable_block); 190 | 191 | result_value.const_zero() 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /tsuki-backend-llvm/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! The root of the LLVM backend. This implements high-level functionality - compiling code into 2 | //! object files, and linking those object files into executables. 3 | 4 | mod codegen; 5 | mod control_flow; 6 | mod expressions; 7 | mod functions; 8 | mod libc; 9 | mod types; 10 | mod variables; 11 | 12 | use std::fmt::{self, Display, Formatter}; 13 | use std::path::{Path, PathBuf}; 14 | use std::process::{Command, Output}; 15 | use std::str::FromStr; 16 | 17 | use inkwell::context::Context; 18 | use inkwell::passes::PassManager; 19 | use inkwell::targets::{ 20 | CodeModel, FileType, InitializationConfig, RelocMode, Target, TargetMachine, TargetTriple, 21 | }; 22 | use inkwell::values::BasicValue; 23 | use thiserror::Error; 24 | use tsuki_frontend::common::{self, Error, ErrorKind, Errors, SourceFile, Span}; 25 | use tsuki_frontend::{backend, AnalyzeOptions}; 26 | 27 | use codegen::CodeGen; 28 | 29 | use crate::functions::Function; 30 | 31 | /// Debug options for the backend. 32 | pub struct DebugOptions { 33 | pub dump_ir: bool, 34 | } 35 | 36 | impl Default for DebugOptions { 37 | fn default() -> Self { 38 | Self { dump_ir: false } 39 | } 40 | } 41 | 42 | /// Struct representing the LLVM compilation backend and options passed to it. 43 | pub struct LlvmBackend { 44 | cache_dir: PathBuf, 45 | std_path: PathBuf, 46 | executable_name: String, 47 | target_triple: TargetTriple, 48 | optimization_level: OptimizationLevel, 49 | frontend_debug_options: tsuki_frontend::DebugOptions, 50 | backend_debug_options: DebugOptions, 51 | } 52 | 53 | /// Options for creating an LLVM backend instance. 54 | pub struct LlvmBackendConfig<'c, 'e, 't> { 55 | pub cache_dir: &'c Path, 56 | pub std_path: &'c Path, 57 | pub package_name: &'e str, 58 | pub target_triple: Option<&'t str>, 59 | pub optimization_level: OptimizationLevel, 60 | pub frontend_debug_options: tsuki_frontend::DebugOptions, 61 | pub backend_debug_options: DebugOptions, 62 | } 63 | 64 | impl LlvmBackend { 65 | /// Creates a new instance of the LLVM compilation backend, with the provided options. 66 | pub fn new(config: LlvmBackendConfig) -> Self { 67 | Self { 68 | cache_dir: config.cache_dir.to_owned(), 69 | std_path: config.std_path.to_owned(), 70 | executable_name: config.package_name.to_owned(), 71 | target_triple: match config.target_triple { 72 | Some(triple) => TargetTriple::create(triple), 73 | None => TargetMachine::get_default_triple(), 74 | }, 75 | optimization_level: config.optimization_level, 76 | frontend_debug_options: config.frontend_debug_options, 77 | backend_debug_options: config.backend_debug_options, 78 | } 79 | } 80 | 81 | fn to_errors(r: Result) -> Result 82 | where 83 | E: ToString, 84 | { 85 | r.map_err(|e| { 86 | common::single_error(Error { 87 | filename: "internal error".into(), 88 | span: Span::default(), 89 | kind: ErrorKind::CodeGen(e.to_string()), 90 | }) 91 | }) 92 | } 93 | } 94 | 95 | impl backend::Backend for LlvmBackend { 96 | type Target = ObjectFile; 97 | 98 | /// Compiles the given source file to an executable. 99 | fn compile(&self, root: SourceFile) -> Result { 100 | let ir = tsuki_frontend::analyze( 101 | AnalyzeOptions { 102 | file: &root, 103 | std_path: self.std_path.clone(), 104 | }, 105 | &self.frontend_debug_options, 106 | )?; 107 | let context = Context::create(); 108 | let module = context.create_module(&root.module_name); 109 | 110 | // Set up the pass manager. 111 | let pm = PassManager::create(&module); 112 | pm.add_verifier_pass(); 113 | 114 | if self.optimization_level >= OptimizationLevel::Essential { 115 | // Constant folding passes run twice: once at startup, and once after CFG simplicifation 116 | // and mem2reg, such that constant folding is also performed after simplifying the IR to 117 | // use more SSA and less allocas. 118 | pm.add_instruction_combining_pass(); 119 | pm.add_reassociate_pass(); 120 | 121 | // TODO: Figure out what GVN (global value numbering) is. The LLVM docs for passes don't 122 | // really say much about it. 123 | // (https://llvm.org/docs/Passes.html) 124 | pm.add_gvn_pass(); 125 | 126 | // These passes simplify the control flow graph and turn memory operations into SSA form 127 | // wherever possible. 128 | pm.add_cfg_simplification_pass(); 129 | pm.add_basic_alias_analysis_pass(); 130 | pm.add_promote_memory_to_register_pass(); 131 | 132 | // As said before, constant folding is performed twice. 133 | pm.add_instruction_combining_pass(); 134 | pm.add_reassociate_pass(); 135 | } 136 | 137 | pm.initialize(); 138 | 139 | // Construct all the types. 140 | let i32_type = context.i32_type(); 141 | let main_fun_type = i32_type.fn_type(&[], false); 142 | 143 | // Create the function and the codegen state. 144 | let main_fun = Function::add_to_module(&context, &module, "main", main_fun_type); 145 | let mut state = CodeGen::new(&root, &context, &pm, &module, main_fun); 146 | state.add_functions(&ir); 147 | 148 | // Compile the module's code. 149 | state.generate_statement(&ir, ir.root_node); 150 | 151 | // Return the zero exit code. 152 | state.finish_function(Some(i32_type.const_zero().as_basic_value_enum())); 153 | 154 | if self.backend_debug_options.dump_ir { 155 | eprintln!("## LLVM IR"); 156 | eprintln!("{:?}", state); 157 | eprintln!(); 158 | } 159 | 160 | // Cross-compilation support, anyone? 161 | // Right now we initialize the native target only. 162 | Self::to_errors(Target::initialize_native(&InitializationConfig { 163 | // Honestly, I'm not sure we need _all_ the features. 164 | // TODO: Check which ones can be disabled. 165 | asm_parser: true, 166 | asm_printer: true, 167 | base: true, 168 | disassembler: true, 169 | info: true, 170 | machine_code: true, 171 | }))?; 172 | 173 | Self::to_errors(state.module.verify())?; 174 | 175 | // Set up the target machine. We won't be enabling any special features here for now. 176 | let target = Self::to_errors(Target::from_triple(&self.target_triple))?; 177 | let machine = Self::to_errors( 178 | target 179 | .create_target_machine( 180 | &self.target_triple, 181 | "generic", 182 | "", 183 | inkwell::OptimizationLevel::Default, 184 | RelocMode::Default, 185 | CodeModel::Default, 186 | ) 187 | .ok_or("target triple is not supported"), 188 | )?; 189 | state.module.set_data_layout(&machine.get_target_data().get_data_layout()); 190 | state.module.set_triple(&self.target_triple); 191 | 192 | // Create all the needed directories. 193 | let object_dir = self.cache_dir.join("object"); 194 | Self::to_errors(std::fs::create_dir_all(&self.cache_dir))?; 195 | Self::to_errors(std::fs::create_dir_all(&object_dir))?; 196 | 197 | // Do some path manipulation to figure out where the object file should be placed. 198 | let object_name = format!("{}.o", &self.executable_name); 199 | let object_path = object_dir.join(&object_name); 200 | // Delete the old object file so that LLVM isn't going to complain when writing the new one. 201 | // The result is ignored because we don't care if the file exists or not. 202 | // If we lack sufficient permissions, then LLVM will point that out anyways. 203 | let _ = std::fs::remove_file(&object_path); 204 | 205 | // Compile the object file. 206 | Self::to_errors(machine.write_to_file(&state.module, FileType::Object, &object_path))?; 207 | 208 | Ok(ObjectFile { path: object_path }) 209 | } 210 | } 211 | 212 | /// Specifies the amount of optimizations to apply when compiling. 213 | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] 214 | pub enum OptimizationLevel { 215 | /// Perform no optimizations at all. 216 | None, 217 | Essential, 218 | Release, 219 | } 220 | 221 | /// Returned when an invalid optimization level is used. 222 | #[derive(Clone, Copy, Debug)] 223 | pub struct InvalidOptimizationLevel; 224 | 225 | impl Display for InvalidOptimizationLevel { 226 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 227 | write!(f, "invalid optimization level") 228 | } 229 | } 230 | 231 | impl FromStr for OptimizationLevel { 232 | type Err = InvalidOptimizationLevel; 233 | 234 | /// Converts a string to an optimization level. 235 | /// 236 | /// Valid values include: 237 | /// - `"none"` → `None` 238 | /// - `"essential"` → `Essential` 239 | /// - `"release"` → `Release` 240 | /// 241 | /// Any other levels result in an `InvalidOptimizationLevel` error. 242 | fn from_str(s: &str) -> Result { 243 | match s { 244 | "none" => Ok(Self::None), 245 | "essential" => Ok(Self::Essential), 246 | "release" => Ok(Self::Release), 247 | _ => Err(InvalidOptimizationLevel), 248 | } 249 | } 250 | } 251 | 252 | /// Struct representing an object file built using LLVM. 253 | pub struct ObjectFile { 254 | path: PathBuf, 255 | } 256 | 257 | /// Struct representing an executable file built from linked object files. 258 | pub struct ExecutableFile { 259 | path: PathBuf, 260 | } 261 | 262 | #[derive(Debug, Error)] 263 | pub enum LinkError { 264 | #[error("no linker found; check the $TSUKI_LD and $LD environment variables.")] 265 | NoLinker, 266 | #[error("the linker exited with an error (code {0}):\n{1}")] 267 | Failure(i32, String), 268 | #[error("I/O error: {0}")] 269 | Io(#[from] std::io::Error), 270 | } 271 | 272 | impl ExecutableFile { 273 | /// Links the provided object files into an executable. 274 | /// 275 | /// This launches the standard compiler pointed to by the `$TSUKI_CC` or `$CC` environment 276 | /// variables (in that order), or the `cc` executable found in `$PATH`. 277 | pub fn link(backend: LlvmBackend, objects: &[ObjectFile]) -> Result { 278 | use std::env; 279 | 280 | let linker = env::var_os("TSUKI_CC") 281 | .or_else(|| env::var_os("CC")) 282 | .or_else(|| Some("cc".into())) 283 | .ok_or(LinkError::NoLinker)?; 284 | let output_path = backend.cache_dir.join(&backend.executable_name); 285 | 286 | let mut cmd = Command::new(linker); 287 | // Pass the output path to the linker. 288 | cmd.arg("-o"); 289 | cmd.arg(&output_path); 290 | for object in objects { 291 | cmd.arg(&object.path); 292 | } 293 | 294 | let output = cmd.output()?; 295 | if let Some(exit_code) = output.status.code() { 296 | if exit_code != 0 { 297 | let errors = std::str::from_utf8(&output.stderr).ok().unwrap_or(""); 298 | return Err(LinkError::Failure(exit_code, errors.into())); 299 | } 300 | } 301 | Ok(Self { path: output_path }) 302 | } 303 | 304 | /// Runs the executable, passing the given arguments to it. The output contains captured 305 | /// stdout and stderr, as well as the exit code. 306 | pub fn run(&self, args: &[&str]) -> Result { 307 | Command::new(&self.path).args(args).output() 308 | } 309 | } 310 | -------------------------------------------------------------------------------- /tsuki-backend-llvm/src/libc.rs: -------------------------------------------------------------------------------- 1 | use inkwell::AddressSpace; 2 | 3 | use crate::codegen::CodeGen; 4 | 5 | // Named constants for string values. This should simplify refactoring if they ever need to 6 | // be changed. 7 | pub const FUN_PRINTF: &str = "printf"; 8 | pub const GLOBAL_PRINTF_INT_FORMAT: &str = "printf_int_format"; 9 | pub const GLOBAL_PRINTF_FLOAT_FORMAT: &str = "printf_float_format"; 10 | 11 | impl<'src, 'c, 'pm> CodeGen<'src, 'c, 'pm> { 12 | fn add_const_string(&mut self, name: &str, string: &[u8]) { 13 | let s = self.context.const_string(string, true); 14 | let typ = s.get_type(); 15 | let global = self.module.add_global(typ, Some(AddressSpace::Generic), name); 16 | global.set_initializer(&s); 17 | } 18 | 19 | pub(crate) fn load_libc(&mut self) { 20 | // int printf(char *fmt, ...); 21 | let string_type = self.context.i8_type().ptr_type(AddressSpace::Generic); 22 | let i32_type = self.context.i32_type(); 23 | let printf_fn_type = i32_type.fn_type(&[string_type.into()], true); 24 | self.module.add_function(FUN_PRINTF, printf_fn_type, None); 25 | 26 | // printf format strings 27 | self.add_const_string(GLOBAL_PRINTF_INT_FORMAT, b"%i\n"); 28 | self.add_const_string(GLOBAL_PRINTF_FLOAT_FORMAT, b"%g\n"); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /tsuki-backend-llvm/src/types.rs: -------------------------------------------------------------------------------- 1 | //! Conversion from tsuki types into LLVM types. 2 | 3 | use inkwell::types::{BasicType, BasicTypeEnum, FloatType, IntType}; 4 | use inkwell::values::StructValue; 5 | use tsuki_frontend::types::{FloatSize, IntegerSize, TypeId, TypeKind, Types}; 6 | 7 | use crate::codegen::CodeGen; 8 | 9 | impl<'src, 'c, 'pm> CodeGen<'src, 'c, 'pm> { 10 | /// Generates code for a unit literal. 11 | pub(crate) fn generate_unit_literal(&self) -> StructValue<'c> { 12 | self.unit_type.const_zero() 13 | } 14 | 15 | /// Returns the integer type for the provided type, or panics if the type is not an integer type. 16 | fn get_integer_type(&self, types: &Types, typ: TypeId) -> IntType<'c> { 17 | if let TypeKind::Integer(size) = types.kind(typ) { 18 | match size { 19 | IntegerSize::U8 | IntegerSize::S8 => self.context.i8_type(), 20 | IntegerSize::U16 | IntegerSize::S16 => self.context.i16_type(), 21 | IntegerSize::U32 | IntegerSize::S32 => self.context.i32_type(), 22 | IntegerSize::U64 | IntegerSize::S64 => self.context.i64_type(), 23 | } 24 | } else { 25 | panic!("type is not an integer type") 26 | } 27 | } 28 | 29 | /// Returns the float type for the provided type, or panics if the type is not a float type. 30 | fn get_float_type(&self, types: &Types, typ: TypeId) -> FloatType<'c> { 31 | if let TypeKind::Float(size) = types.kind(typ) { 32 | match size { 33 | FloatSize::S32 => self.context.f32_type(), 34 | FloatSize::S64 => self.context.f64_type(), 35 | } 36 | } else { 37 | panic!("type is not a float type") 38 | } 39 | } 40 | 41 | pub(crate) fn get_type(&self, types: &Types, typ: TypeId) -> BasicTypeEnum<'c> { 42 | match types.kind(typ) { 43 | TypeKind::Missing => panic!("get_type called with missingtype"), 44 | TypeKind::Error => panic!("get_type called with errortype"), 45 | TypeKind::Statement => panic!("get_type called with statement type"), 46 | TypeKind::Declaration(id) => panic!("get_type called with declaration({:?}) type", id), 47 | TypeKind::Type => panic!("get_type called with type type"), 48 | TypeKind::Unit | TypeKind::NoReturn => self.unit_type.as_basic_type_enum(), 49 | TypeKind::Bool => self.context.bool_type().as_basic_type_enum(), 50 | TypeKind::Integer(_) => self.get_integer_type(types, typ).as_basic_type_enum(), 51 | TypeKind::Float(_) => self.get_float_type(types, typ).as_basic_type_enum(), 52 | TypeKind::Char => todo!(), 53 | TypeKind::Alias(alias) => self.get_type(types, *alias), 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /tsuki-backend-llvm/src/variables.rs: -------------------------------------------------------------------------------- 1 | /// Code generation for variable declarations. 2 | use inkwell::values::{BasicValueEnum, PointerValue}; 3 | use tsuki_frontend::ast::{NodeId, NodeKind}; 4 | use tsuki_frontend::scope::SymbolId; 5 | use tsuki_frontend::sem::Ir; 6 | 7 | use crate::CodeGen; 8 | 9 | /// Storage for variable values. This maps symbol IDs to `Value`s from LLVM. 10 | pub(crate) struct Variables<'c> { 11 | variables: Vec>>, 12 | } 13 | 14 | impl<'c> Variables<'c> { 15 | /// Creates and initializes a new variable value storage. 16 | pub(crate) fn new() -> Self { 17 | Self { 18 | variables: Vec::new(), 19 | } 20 | } 21 | 22 | /// Inserts a new value under the given symbol ID. 23 | pub(crate) fn insert(&mut self, symbol: SymbolId, value: PointerValue<'c>) { 24 | if self.variables.len() <= symbol.id() { 25 | self.variables.resize(symbol.id() + 1, None); 26 | } 27 | self.variables[symbol.id()] = Some(value); 28 | } 29 | 30 | /// Retrieves the value under the given symbol ID. 31 | pub(crate) fn get(&self, symbol: SymbolId) -> Option> { 32 | if symbol.id() >= self.variables.len() { 33 | None 34 | } else { 35 | self.variables[symbol.id()] 36 | } 37 | } 38 | } 39 | 40 | impl<'src, 'c, 'pm> CodeGen<'src, 'c, 'pm> { 41 | /// Generates code for a variable reference. 42 | pub(crate) fn generate_variable_reference(&self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> { 43 | let symbol_node = ir.ast.first_handle(node); 44 | let symbol = ir.ast.symbol_id(symbol_node); 45 | 46 | let alloca = self.variables.get(symbol).expect("reference to undeclared variable in IR"); 47 | self.builder.build_load(alloca, ir.symbols.name(symbol)) 48 | } 49 | 50 | /// Generates code for variable declarations. 51 | pub(crate) fn generate_variable_declaration(&mut self, ir: &Ir, node: NodeId) { 52 | let symbol_node = ir.ast.first_handle(node); 53 | let symbol = ir.ast.symbol_id(symbol_node); 54 | 55 | let value_node = ir.ast.second_handle(node); 56 | let value = self.generate_expression(ir, value_node); 57 | 58 | // A variable declaration always performs an alloca for simplicity's sake. 59 | // These allocas, loads, and stores, are optimized by mem2reg later. 60 | // To make the allocas optimizable by mem2reg, they need to be placed in the entry block of 61 | // the function, and the most obvious place to put the allocas is the top, because then they 62 | // are available to every other instruction, including loads and stores. 63 | let builder = self.function.create_entry_block_builder(self.context); 64 | let alloca = builder.build_alloca(value.get_type(), ir.symbols.name(symbol)); 65 | self.builder.build_store(alloca, value); 66 | 67 | self.variables.insert(symbol, alloca); 68 | } 69 | 70 | /// Generates code for `AssignDiscard`. 71 | pub(crate) fn generate_discarding_assignment(&mut self, ir: &Ir, node: NodeId) { 72 | let value_node = ir.ast.first_handle(node); 73 | let _ = self.generate_expression(ir, value_node); 74 | } 75 | 76 | /// Generates code for assignments to variables. 77 | pub(crate) fn generate_assignment( 78 | &mut self, 79 | ir: &Ir, 80 | node: NodeId, 81 | ) -> Option> { 82 | // When the assignment is not an expression, we do a little optimization where we don't 83 | // generate the load 84 | let result_type = ir.ast.type_id(node); 85 | let is_expression = !ir.types.kind(result_type).is_statement(); 86 | 87 | let target_node = ir.ast.first_handle(node); 88 | let target = match ir.ast.kind(target_node) { 89 | NodeKind::Variable => { 90 | let symbol_node = ir.ast.first_handle(target_node); 91 | let symbol = ir.ast.symbol_id(symbol_node); 92 | self.variables.get(symbol).expect("reference to undeclared variable in IR") 93 | } 94 | _ => unreachable!(), 95 | }; 96 | let value_node = ir.ast.second_handle(node); 97 | let value = self.generate_expression(ir, value_node); 98 | 99 | let result = if is_expression { 100 | let old_value = self.builder.build_load(target, "old"); 101 | Some(old_value) 102 | } else { 103 | None 104 | }; 105 | // Note that this does not care about mutability; that part is handled by SemTypes in the 106 | // frontend. 107 | self.builder.build_store(target, value); 108 | result 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /tsuki-frontend/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "tsuki-frontend" 3 | version = "0.1.0" 4 | edition = "2018" 5 | 6 | [dependencies] 7 | thiserror = "1.0.26" 8 | phf = { version = "0.9.0", features = ["macros"] } 9 | smallvec = "1.6.1" 10 | -------------------------------------------------------------------------------- /tsuki-frontend/src/astdump.rs: -------------------------------------------------------------------------------- 1 | //! AST pretty printer. 2 | 3 | use crate::ast::*; 4 | use crate::common::SourceFile; 5 | use crate::types::Types; 6 | 7 | #[derive(Debug)] 8 | enum Prefix { 9 | L, 10 | R, 11 | Fun, 12 | Cond, 13 | X, 14 | Name, 15 | Params, 16 | Generic, 17 | Formal, 18 | Return, 19 | Type, 20 | Constraint, 21 | } 22 | 23 | fn print_indentation(depth: usize) { 24 | for _ in 0..depth * 2 { 25 | eprint!(" "); 26 | } 27 | } 28 | 29 | fn print_source_range(file: &SourceFile, start: usize, end: usize) { 30 | eprint!("{}", &file.source[start..end]); 31 | } 32 | 33 | fn print_string_range(file: &SourceFile, start: usize, end: usize) { 34 | eprint!("{}", &file.source[start..end]); 35 | } 36 | 37 | struct State<'s, 'a, 't> { 38 | file: &'s SourceFile, 39 | ast: &'a Ast, 40 | types: Option<&'t Types>, 41 | } 42 | 43 | fn dump_node(s: &State, node: NodeId, depth: usize, prefix: Option) { 44 | let State { file, ast, types } = s; 45 | print_indentation(depth); 46 | 47 | let kind = ast.kind(node); 48 | let extra = ast.extra(node); 49 | 50 | // Optional prefix. 51 | if let Some(prefix) = prefix { 52 | eprint!("{:?}: ", prefix); 53 | } 54 | 55 | // Node header: the name, and optionally some source code. 56 | eprint!("{:?} ", kind); 57 | match kind { 58 | NodeKind::Integer | NodeKind::Float | NodeKind::Atom | NodeKind::Identifier => { 59 | let (start, end) = (ast.first(node), ast.second(node)); 60 | print_source_range(file, start, end); 61 | } 62 | NodeKind::String | NodeKind::DocComment => { 63 | let (start, end) = (ast.first(node), ast.second(node)); 64 | print_string_range(file, start, end); 65 | } 66 | NodeKind::Character => eprint!("{:?}", char::from_u32(ast.first(node) as u32)), 67 | NodeKind::Symbol => eprint!("{:?}", ast.first(node)), 68 | _ => (), 69 | } 70 | match extra { 71 | number @ (NodeData::Uint8(..) 72 | | NodeData::Uint16(..) 73 | | NodeData::Uint32(..) 74 | | NodeData::Uint64(..) 75 | | NodeData::Int8(..) 76 | | NodeData::Int16(..) 77 | | NodeData::Int32(..) 78 | | NodeData::Int64(..) 79 | | NodeData::Float32(..) 80 | | NodeData::Float64(..)) => eprint!("{:?}", number), 81 | _ => (), 82 | } 83 | if let Some(types) = types { 84 | let typ = ast.type_id(node); 85 | eprint!(" : {}", types.name(typ)); 86 | } 87 | if let Some(scope) = ast.scope(node) { 88 | eprint!(" +{:?}", scope); 89 | } 90 | eprintln!(); 91 | 92 | match kind { 93 | | NodeKind::Dot 94 | | NodeKind::Plus 95 | | NodeKind::Minus 96 | | NodeKind::Mul 97 | | NodeKind::Div 98 | | NodeKind::Pow 99 | | NodeKind::Concat 100 | | NodeKind::Lshift 101 | | NodeKind::Rshift 102 | | NodeKind::BitAnd 103 | | NodeKind::BitOr 104 | | NodeKind::BitXor 105 | | NodeKind::Equal 106 | | NodeKind::NotEqual 107 | | NodeKind::Less 108 | | NodeKind::Greater 109 | | NodeKind::LessEqual 110 | | NodeKind::GreaterEqual 111 | | NodeKind::UpTo 112 | | NodeKind::UpToInclusive 113 | | NodeKind::Assign 114 | | NodeKind::PlusAssign 115 | | NodeKind::MinusAssign 116 | | NodeKind::MulAssign 117 | | NodeKind::DivAssign 118 | | NodeKind::Push 119 | | NodeKind::Index 120 | | NodeKind::IndexAlt 121 | | NodeKind::Val 122 | | NodeKind::Var 123 | | NodeKind::Type => { 124 | let (left, right) = (ast.first_handle(node), ast.second_handle(node)); 125 | dump_node(s, left, depth + 1, Some(Prefix::L)); 126 | dump_node(s, right, depth + 1, Some(Prefix::R)); 127 | } 128 | NodeKind::Fun => { 129 | let (left, right) = (ast.first_handle(node), ast.second_handle(node)); 130 | dump_node(s, left, depth + 1, Some(Prefix::Name)); 131 | dump_node(s, right, depth + 1, Some(Prefix::Params)); 132 | } 133 | NodeKind::Parameters => { 134 | let (left, right) = (ast.first_handle(node), ast.second_handle(node)); 135 | dump_node(s, left, depth + 1, Some(Prefix::Generic)); 136 | dump_node(s, right, depth + 1, Some(Prefix::Formal)); 137 | } 138 | NodeKind::ConstrainedType => { 139 | let (left, right) = (ast.first_handle(node), ast.second_handle(node)); 140 | dump_node(s, left, depth + 1, Some(Prefix::Type)); 141 | dump_node(s, right, depth + 1, Some(Prefix::Constraint)); 142 | } 143 | | NodeKind::Check 144 | | NodeKind::Unwrap 145 | | NodeKind::Deref 146 | | NodeKind::Call 147 | | NodeKind::CallFunction 148 | | NodeKind::Not 149 | | NodeKind::Neg 150 | | NodeKind::BitNot 151 | | NodeKind::Member 152 | | NodeKind::Ref 153 | | NodeKind::IfBranch 154 | | NodeKind::While 155 | | NodeKind::Return 156 | | NodeKind::FormalParameters 157 | | NodeKind::NamedParameters 158 | | NodeKind::Variable 159 | | NodeKind::Pragmas 160 | | NodeKind::TypeName 161 | | NodeKind::Pub 162 | | NodeKind::WidenInt 163 | | NodeKind::WidenUint 164 | | NodeKind::WidenFloat => { 165 | let left = ast.first_handle(node); 166 | dump_node( 167 | s, 168 | left, 169 | depth + 1, 170 | Some(match kind { 171 | NodeKind::Check | NodeKind::Unwrap | NodeKind::Deref | NodeKind::Pragmas => { 172 | Prefix::L 173 | } 174 | | NodeKind::Not 175 | | NodeKind::Neg 176 | | NodeKind::BitNot 177 | | NodeKind::Member 178 | | NodeKind::Ref => Prefix::R, 179 | NodeKind::Call | NodeKind::CallFunction => Prefix::Fun, 180 | NodeKind::IfBranch | NodeKind::While => Prefix::Cond, 181 | NodeKind::FormalParameters => Prefix::Return, 182 | NodeKind::NamedParameters => Prefix::Type, 183 | NodeKind::TypeName => Prefix::Name, 184 | | NodeKind::Variable 185 | | NodeKind::Return 186 | | NodeKind::Pub 187 | | NodeKind::WidenInt 188 | | NodeKind::WidenUint 189 | | NodeKind::WidenFloat => Prefix::X, 190 | _ => unreachable!(), 191 | }), 192 | ); 193 | } 194 | _ => (), 195 | } 196 | 197 | match extra { 198 | NodeData::None => (), 199 | NodeData::NodeList(list) => { 200 | for &node in list { 201 | dump_node(s, node, depth + 1, None); 202 | } 203 | } 204 | _ => (), 205 | } 206 | } 207 | 208 | /// Prints the AST to stdout, starting from the given root node. 209 | pub fn dump_ast(file: &SourceFile, ast: &Ast, types: Option<&Types>, root_node: NodeId) { 210 | dump_node(&State { file, ast, types }, root_node, 0, None); 211 | } 212 | -------------------------------------------------------------------------------- /tsuki-frontend/src/backend.rs: -------------------------------------------------------------------------------- 1 | //! Module for common backend functionality. 2 | //! This can be used by CLIs and other programs that need to compile tsuki source code. 3 | 4 | use crate::common::{Errors, SourceFile}; 5 | 6 | /// Trait implemented by all backends that can compile and run tsuki source code. 7 | pub trait Backend { 8 | /// The kind of code emitted by the backend. This can be an object file, an executable file, 9 | /// or even a JIT-compiled function; you name it. 10 | type Target; 11 | 12 | /// Compiles a source file to a target. 13 | /// 14 | /// # Errors 15 | /// `Err` should return an error of kind `CodeGen`, together with a diagnostic message. 16 | /// These errors should only be thrown in dire cases where something went wrong in earlier stages 17 | /// of compilation, and the backend can't make sense of what the frontend produced. 18 | fn compile(&self, root: SourceFile) -> Result; 19 | } 20 | -------------------------------------------------------------------------------- /tsuki-frontend/src/common.rs: -------------------------------------------------------------------------------- 1 | //! Common functionality. 2 | 3 | use std::fmt; 4 | use std::path::PathBuf; 5 | 6 | use smallvec::SmallVec; 7 | 8 | use crate::ast::NodeKind; 9 | use crate::lexer::{IndentLevel, Token, TokenKind}; 10 | 11 | /// Represents a source file. 12 | pub struct SourceFile { 13 | /// The package the source file resides in. 14 | pub package: String, 15 | /// The root of the package. 16 | pub package_root: PathBuf, 17 | /// The path to the source file, relative to the package's `src` folder. 18 | pub path: PathBuf, 19 | /// The module name; that is, the package name and file path concatenated together with a colon, 20 | /// with the extension stripped, and all path separators replaced with colons `:`. 21 | pub module_name: String, 22 | /// The source code itself. 23 | pub source: String, 24 | } 25 | 26 | impl SourceFile { 27 | pub fn new( 28 | package: String, 29 | package_root: PathBuf, 30 | path: PathBuf, 31 | source: String, 32 | ) -> Result { 33 | let module_name = { 34 | let package_root = package_root 35 | .canonicalize() 36 | .map_err(|err| Error::spanless(path.clone(), ErrorKind::Io(err)))?; 37 | let path = path 38 | // Normalize the path into something that makes sense. 39 | .canonicalize() 40 | .map_err(|err| Error::spanless(path.clone(), ErrorKind::Io(err)))? 41 | // Remove the package_root prefix. 42 | .strip_prefix(&package_root) 43 | .map_err(|_| Error::spanless(path.clone(), ErrorKind::InvalidPackageRoot))? 44 | // Remove the .tsu extension. 45 | .with_extension("") 46 | // Convert it to a string. 47 | .to_str() 48 | .ok_or_else(|| Error::spanless(path.clone(), ErrorKind::InvalidUtf8InPath))? 49 | // Replace path separators with dots. 50 | .replace(std::path::MAIN_SEPARATOR, "."); 51 | // And pray to God it's correct. 52 | format!("{}:{}", package, path) 53 | }; 54 | Ok(Self { 55 | package, 56 | package_root, 57 | path, 58 | module_name, 59 | source, 60 | }) 61 | } 62 | } 63 | 64 | /// Represents a span of text in a source file. 65 | #[derive(Clone, Debug)] 66 | pub struct Span { 67 | pub byte_start: usize, 68 | pub line_start: usize, 69 | pub column_start: usize, 70 | pub byte_end: usize, 71 | pub line_end: usize, 72 | pub column_end: usize, 73 | } 74 | 75 | impl Span { 76 | pub const FIRST_BYTE: usize = 0; 77 | pub const FIRST_LINE: usize = 1; 78 | pub const FIRST_COLUMN: usize = 1; 79 | 80 | pub const INVALID_LINE: usize = 0; 81 | pub const INVALID_COLUMN: usize = 0; 82 | 83 | pub const INVALID: Self = Self { 84 | byte_start: 0, 85 | line_start: Self::INVALID_LINE, 86 | column_start: Self::INVALID_COLUMN, 87 | byte_end: 0, 88 | line_end: Self::INVALID_LINE, 89 | column_end: Self::INVALID_COLUMN, 90 | }; 91 | 92 | /// Creates and initializes a new span starting at the first possible position in a file. 93 | pub fn new() -> Self { 94 | // The first possible position is 1:1..1:1. 95 | Self { 96 | byte_start: Self::FIRST_BYTE, 97 | line_start: Self::FIRST_LINE, 98 | column_start: Self::FIRST_COLUMN, 99 | byte_end: Self::FIRST_BYTE, 100 | line_end: Self::FIRST_LINE, 101 | column_end: Self::FIRST_COLUMN, 102 | } 103 | } 104 | 105 | /// Sets the start of the span to its current end. 106 | pub fn start_over(&mut self) { 107 | self.line_start = self.line_end; 108 | self.column_start = self.column_end; 109 | } 110 | 111 | /// Increments the ending column by `n`. 112 | pub fn advance_column_by(&mut self, n: usize) { 113 | self.byte_end += n; 114 | self.column_end += n; 115 | } 116 | 117 | /// Increments the ending line and resets the ending column to the first column. 118 | pub fn advance_line(&mut self) { 119 | self.byte_end += 1; 120 | self.line_end += 1; 121 | self.column_end = Self::FIRST_COLUMN; 122 | } 123 | 124 | /// Returns whether the span is an _invalid_ span, that is, its positions are `INVALID_LINE` and 125 | /// `INVALID_COLUMN`. 126 | pub fn is_invalid(&self) -> bool { 127 | self.line_start == Self::INVALID_LINE 128 | || self.column_start == Self::INVALID_COLUMN 129 | || self.line_end == Self::INVALID_LINE 130 | || self.column_end == Self::INVALID_COLUMN 131 | } 132 | 133 | /// Joins two spans into one. The span `a` must be placed earlier in the text than `b`. 134 | pub fn join(a: &Span, b: &Span) -> Span { 135 | // We want to find the minimal and maximal lines and columns. Note that `a` is always at an 136 | // earlier position than `b`. 137 | // There's probably a simpler way of doing this. 138 | 139 | let byte_start = a.byte_start.min(b.byte_start); 140 | let byte_end = a.byte_end.max(b.byte_end); 141 | 142 | // In the first check, we use <=, because if the starting lines are equal, we want to pick the 143 | // column number from `a`. 144 | let (line_start, column_start) = if a.line_start <= b.line_start { 145 | (a.line_start, a.column_start) 146 | } else { 147 | (b.line_start, b.column_start) 148 | }; 149 | 150 | // In the second check, we use <, because if the starting lines are equal, we want to pick the 151 | // column number from `b`. 152 | let (line_end, column_end) = if a.line_end < b.line_end { 153 | (a.line_end, a.column_end) 154 | } else { 155 | (b.line_end, b.column_end) 156 | }; 157 | 158 | // Then we just join those into a final span. 159 | Span { 160 | byte_start, 161 | line_start, 162 | column_start, 163 | byte_end, 164 | line_end, 165 | column_end, 166 | } 167 | } 168 | } 169 | 170 | impl fmt::Display for Span { 171 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 172 | if f.alternate() { 173 | write!( 174 | f, 175 | "{}:{}..{}:{}", 176 | self.line_start, self.column_start, self.line_end, self.column_end 177 | )?; 178 | } else { 179 | write!(f, "{}:{}", self.line_start, self.column_start)?; 180 | } 181 | Ok(()) 182 | } 183 | } 184 | 185 | impl Default for Span { 186 | /// Initializes a span at a default, _invalid_ position. This is _not_ the same as [`Span::new`]! 187 | fn default() -> Self { 188 | Self { 189 | byte_start: Self::FIRST_BYTE, 190 | line_start: Self::INVALID_LINE, 191 | column_start: Self::INVALID_COLUMN, 192 | byte_end: Self::FIRST_BYTE, 193 | line_end: Self::INVALID_LINE, 194 | column_end: Self::INVALID_COLUMN, 195 | } 196 | } 197 | } 198 | 199 | #[derive(thiserror::Error, Debug)] 200 | pub enum ErrorKind { 201 | /* 202 | * Non-compilation errors 203 | */ 204 | #[error("invalid UTF-8 in path")] 205 | InvalidUtf8InPath, 206 | #[error("package root is not a prefix of the main file path")] 207 | InvalidPackageRoot, 208 | #[error("I/O error: {0}")] 209 | Io(#[from] std::io::Error), 210 | 211 | /* 212 | * Lexer errors 213 | */ 214 | #[error("unexpected character: {0:?}")] 215 | UnexpectedCharacter(char), 216 | #[error("indentation too deep; something's wrong with Your program.")] 217 | IndentTooDeep, 218 | #[error("CRLF line endings are not supported")] 219 | CrlfNotSupported, 220 | #[error("invalid integer literal kind: {0:?}")] 221 | InvalidIntegerLiteral(char), 222 | #[error("invalid escape sequence kind: '\\{0}'")] 223 | InvalidEscapeSequence(char), 224 | #[error("incomplete '\\{0}' escape sequence")] 225 | IncompleteEscapeSequence(char), 226 | #[error("unclosed character literal")] 227 | UnclosedCharacterLiteral, 228 | #[error("unicode character U+{0:X} is out of range")] 229 | UnicodeEscapeOutOfRange(u32), 230 | #[error("unicode escape out of range of 32-bit integers")] 231 | UnicodeEscapeOutOfRange32, 232 | #[error("unclosed string literal")] 233 | UnclosedStringLiteral, 234 | 235 | /* 236 | * Parser errors 237 | */ 238 | #[error("unexpected token in prefix position: '{0}'")] 239 | UnexpectedPrefixToken(TokenKind), 240 | #[error("unexpected token in infix position: '{0}'")] 241 | UnexpectedInfixToken(TokenKind), 242 | #[error("unexpected token in type position: '{0}'")] 243 | UnexpectedTypeToken(TokenKind), 244 | #[error("missing '{0}' to close {1}")] 245 | MissingClosingToken(TokenKind, Token), 246 | #[error("expected comma ',' or '{0}' to close {1}")] 247 | ExpectedCommaOrClosingToken(TokenKind, Token), 248 | #[error("statements must be separated by line breaks")] 249 | MissingLineBreakAfterStatement, 250 | #[error("module-level code must not be indented (got {0} spaces of indentation)")] 251 | NoIndentationExpectedAtModuleLevel(IndentLevel), 252 | #[error("indented block of level greater than {0} expected")] 253 | IndentedBlockExpected(IndentLevel), 254 | #[error("identifier expected, but got '{0}'")] 255 | IdentifierExpected(TokenKind), 256 | #[error("missing variable name (an identifier, or '_' to discard the value)")] 257 | VarNameExpected, 258 | #[error("expected '=' after variable name, but got '{0}'")] 259 | VarMissingEquals(TokenKind), 260 | #[error("expected comma ',' or colon ':' in parameter list, but got '{0}'")] 261 | ExpectedCommaOrColon(TokenKind), 262 | #[error("function parameter list expected, but got '{0}'")] 263 | FunctionParametersExpected(TokenKind), 264 | #[error("pragma argument list expected, but got '{0}'")] 265 | PragmaArgsExpected(TokenKind), 266 | #[error("`pub` must be followed by a declaration")] 267 | PubMustBeFollowedByDeclaration, 268 | 269 | /* 270 | * Sem'check errors 271 | */ 272 | // SemLiterals 273 | #[error("invalid number literal suffix: '{0}'")] 274 | InvalidNumberLiteralSuffix(String), 275 | #[error("integer {0} is too big to fit in 64 bits")] 276 | IntegerTooBig(String), 277 | #[error("integer {0} is too big to fit in {1}")] 278 | UnsignedIntegerOverflowForType(u64, String), 279 | #[error("integer {0} is too big (or too small) to fit in {1}")] 280 | SignedIntegerOverflowForType(i64, String), 281 | #[error("unsigned integers cannot be negative")] 282 | UintCannotBeNegative, 283 | #[error("integer suffixes cannot be used on float literals")] 284 | InvalidFloatSuffix, 285 | 286 | // SemTypes 287 | #[error("'{0}' is not declared in this scope")] 288 | UndeclaredSymbol(String), 289 | #[error("invalid unary operator for {0}")] 290 | InvalidUnaryOperator(String), 291 | #[error("type mismatch: expected {0}, but got {1}")] 292 | TypeMismatch(String, String), 293 | #[error("{0} arguments expected, but got {1}")] 294 | NArgumentsExpected(usize, usize), 295 | #[error("missing result value in expression")] 296 | MissingResult, 297 | #[error("result value of expression is unused; use `val _ = x` to discard it")] 298 | UnusedValue, 299 | #[error("invalid location (left hand side of assignment)")] 300 | InvalidLocation, 301 | #[error("'{0}' is not a type")] 302 | SymbolIsNotAType(String), 303 | #[error("the target is immutable and cannot be assigned to")] 304 | CannotAssignImmutableLocation, 305 | #[error("`if` condition must be a Bool")] 306 | IfConditionMustBeBool, 307 | #[error("`while` condition must be a Bool")] 308 | WhileConditionMustBeBool, 309 | #[error("expression cannot be called; make sure it is a function")] 310 | ExpressionCannotBeCalled, 311 | #[error("`return` cannot be used outside of a function")] 312 | ReturnOutsideOfFunction, 313 | #[error("`break` cannot be used outside of a loop")] 314 | BreakOutsideOfLoop, 315 | #[error("type alias does not alias any type. try adding `= YourType`")] 316 | EmptyTypeAlias, 317 | #[error("unknown pragma '{0}'")] 318 | UnknownPragma(String), 319 | #[error("invalid built-in type name")] 320 | InvalidBuiltinTypeName, 321 | 322 | /* 323 | * Internal errors 324 | * --- 325 | * Every internal error must be prefixed by " internal error:" to tell the user that 326 | * something went terribly wrong in the compiler, and that the error should be reported. 327 | */ 328 | #[error("SemTypes internal error: invalid AST node passed to annotate_node(): {0:?}")] 329 | SemTypesInvalidAstNode(NodeKind), 330 | 331 | #[error("backend internal error: code generation error: {0}")] 332 | CodeGen(String), 333 | #[error("backend internal error: execution error: {0}")] 334 | ExecutableError(String), 335 | } 336 | 337 | /// An error that can occur during lexing, parsing, semantic analysis, or code generation. 338 | #[derive(Debug)] 339 | pub struct Error { 340 | // The filename is owned because errors don't occur very often, so allocations are fine here, 341 | // and using an owned String here simplifies a bunch of code. 342 | pub filename: PathBuf, 343 | pub span: Span, 344 | pub kind: ErrorKind, 345 | } 346 | 347 | impl Error { 348 | /// Constructs an error with an invalid span, such that no span is displayed. 349 | pub fn spanless(filename: PathBuf, kind: ErrorKind) -> Self { 350 | Self { 351 | filename, 352 | span: Span::INVALID, 353 | kind, 354 | } 355 | } 356 | } 357 | 358 | impl fmt::Display for Error { 359 | /// The alternate format syntax `{:#}` can be used to display the full span of where the error 360 | /// occured, instead of its starting position only. 361 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 362 | let filename = self.filename.to_str().unwrap(); 363 | write!(f, "{}:", filename)?; 364 | if !self.span.is_invalid() { 365 | if f.alternate() { 366 | write!(f, "{:#}:", self.span)?; 367 | } else { 368 | write!(f, "{}:", self.span)?; 369 | } 370 | } 371 | write!(f, " {}", self.kind)?; 372 | Ok(()) 373 | } 374 | } 375 | 376 | pub type Errors = SmallVec<[Error; 8]>; 377 | 378 | /// Creates an `Errors` from a single error. 379 | pub fn single_error(error: Error) -> Errors { 380 | let mut errs = Errors::new(); 381 | errs.push(error); 382 | errs 383 | } 384 | -------------------------------------------------------------------------------- /tsuki-frontend/src/functions.rs: -------------------------------------------------------------------------------- 1 | //! Function registry. 2 | 3 | use smallvec::SmallVec; 4 | 5 | use crate::ast::{NodeId, NodeKind}; 6 | use crate::scope::{Mutability, ScopeId, Scopes, SymbolId, SymbolKind, Symbols, Variable}; 7 | use crate::types::{BuiltinTypes, TypeId}; 8 | 9 | /// The unique ID of a function in the registry. 10 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 11 | pub struct FunctionId(usize); 12 | 13 | /// The kind of a function. 14 | pub enum FunctionKind { 15 | /// This function was declared in the current module. 16 | Local, 17 | /// This function was declared in a different module or package. 18 | External, 19 | /// This function is imported from C. 20 | ImportC { is_varargs: bool }, 21 | /// This function is a compiler intrinsic. 22 | Intrinsic(Intrinsic), 23 | } 24 | 25 | impl FunctionKind { 26 | /// Returns whether the function kind is for a local function. 27 | pub fn is_local(&self) -> bool { 28 | matches!(self, Self::Local) 29 | } 30 | 31 | /// Returns whether the function kind is for a C varargs function. 32 | pub fn is_varargs(&self) -> bool { 33 | matches!(self, Self::ImportC { is_varargs: true }) 34 | } 35 | } 36 | 37 | /// Function parameters. 38 | pub struct Parameters { 39 | /// The names and types of formal parameters this function accepts. 40 | pub formal: SmallVec<[SymbolId; 8]>, 41 | /// The return type of the function. 42 | pub return_type: TypeId, 43 | } 44 | 45 | /// Data-oriented storage for functions. 46 | pub struct Functions { 47 | names: Vec, 48 | mangled_names: Vec, 49 | parameters: Vec, 50 | kinds: Vec, 51 | } 52 | 53 | impl Functions { 54 | /// Creates a new function registry. 55 | pub fn new() -> Self { 56 | Self { 57 | names: Vec::new(), 58 | mangled_names: Vec::new(), 59 | parameters: Vec::new(), 60 | kinds: Vec::new(), 61 | } 62 | } 63 | 64 | /// Adds a function into the registry. 65 | pub fn create( 66 | &mut self, 67 | name: String, 68 | mangled_name: String, 69 | parameters: Parameters, 70 | kind: FunctionKind, 71 | ) -> FunctionId { 72 | let id = self.names.len(); 73 | self.names.push(name); 74 | self.mangled_names.push(mangled_name); 75 | self.parameters.push(parameters); 76 | self.kinds.push(kind); 77 | FunctionId(id) 78 | } 79 | 80 | /// Returns the name of a function. 81 | pub fn name(&self, function: FunctionId) -> &str { 82 | &self.names[function.0] 83 | } 84 | 85 | /// Returns the mangled name of a function. 86 | pub fn mangled_name(&self, function: FunctionId) -> &str { 87 | &self.mangled_names[function.0] 88 | } 89 | 90 | /// Returns a reference to the function's parameters. 91 | pub fn parameters(&self, function: FunctionId) -> &Parameters { 92 | &self.parameters[function.0] 93 | } 94 | 95 | /// Returns the kind of the function. 96 | pub fn kind(&self, function: FunctionId) -> &FunctionKind { 97 | &self.kinds[function.0] 98 | } 99 | 100 | /// Returns an iterator over function IDs. 101 | pub fn iter(&self) -> FunctionsIter { 102 | FunctionsIter { 103 | current: 0, 104 | len: self.names.len(), 105 | } 106 | } 107 | } 108 | 109 | /// An intrinsic function. 110 | /// 111 | /// Each kind of intrinsic has its own node kind; 112 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 113 | pub enum Intrinsic { 114 | // TODO: Remove these once `c_import` is implemented. 115 | /// Prints an `Int32` to stdout. 116 | PrintInt32, 117 | /// Prints a `Float32` to stdout. 118 | PrintFloat32, 119 | } 120 | 121 | impl From for NodeKind { 122 | /// Converts an intrinsic to its corresponding node kind. 123 | fn from(intrinsic: Intrinsic) -> Self { 124 | match intrinsic { 125 | Intrinsic::PrintInt32 => NodeKind::PrintInt32, 126 | Intrinsic::PrintFloat32 => NodeKind::PrintFloat32, 127 | } 128 | } 129 | } 130 | 131 | /// Registers intrinsic functions in the given scope, symbol, and function registries. 132 | pub fn register_intrinsics( 133 | builtin: &BuiltinTypes, 134 | scopes: &mut Scopes, 135 | symbols: &mut Symbols, 136 | scope: ScopeId, 137 | functions: &mut Functions, 138 | ) { 139 | // TODO: replace this with stdlib declarations. 140 | macro_rules! add_intrinsic { 141 | ($name:tt, $params:tt, $return_type:expr, $intrinsic:expr $(,)?) => { 142 | let function_id = functions.create( 143 | $name.into(), 144 | String::new(), 145 | Parameters { 146 | formal: $params 147 | .iter() 148 | .map(|&(name, type_id)| { 149 | symbols.create( 150 | name, 151 | NodeId::null(), 152 | type_id, 153 | SymbolKind::Variable(Variable { 154 | mutability: Mutability::Val, 155 | }), 156 | ) 157 | }) 158 | .collect(), 159 | return_type: $return_type, 160 | }, 161 | FunctionKind::Intrinsic($intrinsic), 162 | ); 163 | let symbol_id = symbols.create( 164 | $name, 165 | NodeId::null(), 166 | builtin.t_statement, 167 | SymbolKind::Function(function_id), 168 | ); 169 | scopes.insert(scope, $name, symbol_id); 170 | }; 171 | } 172 | 173 | add_intrinsic!( 174 | "__intrin_print_int32", 175 | [("x", builtin.t_int32)], 176 | builtin.t_unit, 177 | Intrinsic::PrintInt32, 178 | ); 179 | add_intrinsic!( 180 | "__intrin_print_float32", 181 | [("x", builtin.t_float32)], 182 | builtin.t_unit, 183 | Intrinsic::PrintFloat32, 184 | ); 185 | } 186 | 187 | pub struct FunctionsIter { 188 | current: usize, 189 | len: usize, 190 | } 191 | 192 | impl Iterator for FunctionsIter { 193 | type Item = FunctionId; 194 | 195 | fn next(&mut self) -> Option { 196 | if self.current < self.len { 197 | let i = self.current; 198 | self.current += 1; 199 | Some(FunctionId(i)) 200 | } else { 201 | None 202 | } 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /tsuki-frontend/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod ast; 2 | pub mod astdump; 3 | pub mod backend; 4 | pub mod common; 5 | pub mod functions; 6 | pub mod lexer; 7 | pub mod parser; 8 | pub mod scope; 9 | pub mod sem; 10 | mod sem_literals; 11 | mod sem_types; 12 | pub mod types; 13 | 14 | use std::path::PathBuf; 15 | 16 | use common::{Errors, SourceFile}; 17 | use lexer::Lexer; 18 | use sem::{AnalyzeOptions as SemOptions, Ir}; 19 | use types::{DefaultTypes, FloatSize, IntegerSize}; 20 | 21 | /// Options for loading source files. 22 | pub struct AnalyzeOptions<'s> { 23 | pub file: &'s SourceFile, 24 | pub std_path: PathBuf, 25 | } 26 | 27 | #[derive(Debug, Clone, Copy)] 28 | pub struct DebugOptions { 29 | pub dump_source: bool, 30 | pub dump_ast_pre_sem: bool, 31 | pub dump_ast_post_sem: bool, 32 | } 33 | 34 | impl Default for DebugOptions { 35 | fn default() -> Self { 36 | Self { 37 | dump_source: false, 38 | dump_ast_pre_sem: false, 39 | dump_ast_post_sem: false, 40 | } 41 | } 42 | } 43 | 44 | /// Parses and analyzes a source file. Returns the fully analyzed, typed IR. 45 | pub fn analyze(options: AnalyzeOptions, debug: &DebugOptions) -> Result { 46 | let AnalyzeOptions { file, .. } = options; 47 | let SourceFile { source, .. } = file; 48 | let lexer = Lexer::new(file); 49 | let (ast, root_node) = parser::parse(lexer)?; 50 | 51 | if debug.dump_source { 52 | eprintln!("## Source code"); 53 | eprintln!("{}", source); 54 | eprintln!(); 55 | } 56 | 57 | for handle in ast.node_handles() { 58 | if ast.span(handle).is_invalid() { 59 | eprintln!("warning: node with invalid span: {:?}\nAST dump:", handle); 60 | astdump::dump_ast(file, &ast, None, handle); 61 | } 62 | } 63 | 64 | if debug.dump_ast_pre_sem { 65 | eprintln!("## AST (pre-sem)"); 66 | astdump::dump_ast(file, &ast, None, root_node); 67 | eprintln!(); 68 | } 69 | 70 | let ir = sem::analyze(SemOptions { 71 | file, 72 | ast, 73 | root_node, 74 | default_types: DefaultTypes { 75 | int_width: IntegerSize::S32, 76 | float_width: FloatSize::S32, 77 | size_width: IntegerSize::U64, 78 | }, 79 | })?; 80 | 81 | if debug.dump_ast_post_sem { 82 | eprintln!("## AST (post-sem)"); 83 | astdump::dump_ast(file, &ir.ast, Some(&ir.types), root_node); 84 | eprintln!(); 85 | } 86 | 87 | Ok(ir) 88 | } 89 | -------------------------------------------------------------------------------- /tsuki-frontend/src/scope.rs: -------------------------------------------------------------------------------- 1 | //! Scoping and symbols. 2 | 3 | use std::borrow::Cow; 4 | use std::collections::{HashMap, HashSet}; 5 | use std::num::NonZeroUsize; 6 | 7 | use crate::ast::NodeId; 8 | use crate::functions::FunctionId; 9 | use crate::types::TypeId; 10 | 11 | /// An ID uniquely identifying a symbol. 12 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 13 | pub struct SymbolId(usize); 14 | 15 | impl SymbolId { 16 | /// Returns the internal ID of the symbol. 17 | pub fn id(self) -> usize { 18 | self.0 19 | } 20 | 21 | /// Creates a symbol from an internal ID. 22 | pub(crate) fn new(id: usize) -> Self { 23 | Self(id) 24 | } 25 | } 26 | 27 | /// The kind of a symbol, as well as extra metadata attached to it. 28 | pub enum SymbolKind { 29 | /// A symbol that represents a variable. 30 | Variable(Variable), 31 | /// A symbol that represents a type. 32 | Type(TypeId), 33 | /// A symbol that represents a function, be it free or associated. 34 | Function(FunctionId), 35 | } 36 | 37 | impl SymbolKind { 38 | /// Unwraps a variable symbol. 39 | pub fn unwrap_variable(&self) -> &Variable { 40 | if let SymbolKind::Variable(ref variable) = self { 41 | variable 42 | } else { 43 | panic!("unwrap_variable called on a non-variable symbol") 44 | } 45 | } 46 | 47 | /// Unwraps a function symbol. 48 | pub fn unwrap_function(&self) -> FunctionId { 49 | if let &SymbolKind::Function(function_id) = self { 50 | function_id 51 | } else { 52 | panic!("unwrap_function called on a non-function symbol") 53 | } 54 | } 55 | } 56 | 57 | /// The mutability of a variable. 58 | #[derive(Clone, Copy, Debug, PartialEq, Eq)] 59 | pub enum Mutability { 60 | /// A `val` (immutable) variable. 61 | Val, 62 | /// A `var` (mutable) variable. 63 | Var, 64 | } 65 | 66 | /// Symbol data for a variable declaration. 67 | pub struct Variable { 68 | pub mutability: Mutability, 69 | } 70 | 71 | /// Symbol storage. Symbols are looked up identifiers. 72 | pub struct Symbols { 73 | names: Vec, 74 | nodes: Vec, 75 | types: Vec, 76 | kinds: Vec, 77 | } 78 | 79 | impl Symbols { 80 | /// Creates a new symbol storage. 81 | pub fn new() -> Symbols { 82 | Self { 83 | names: Vec::new(), 84 | nodes: Vec::new(), 85 | types: Vec::new(), 86 | kinds: Vec::new(), 87 | } 88 | } 89 | 90 | /// Creates a symbol from a name, handle, type, and kind. 91 | pub fn create(&mut self, name: &str, node: NodeId, typ: TypeId, kind: SymbolKind) -> SymbolId { 92 | let id = self.nodes.len(); 93 | self.names.push(name.to_owned()); 94 | self.nodes.push(node); 95 | self.types.push(typ); 96 | self.kinds.push(kind); 97 | SymbolId::new(id) 98 | } 99 | 100 | /// Returns the name of the symbol. 101 | pub fn name(&self, symbol: SymbolId) -> &str { 102 | &self.names[symbol.0] 103 | } 104 | 105 | /// Returns the symbol's ancestor node. 106 | pub fn node(&self, symbol: SymbolId) -> NodeId { 107 | self.nodes[symbol.0] 108 | } 109 | 110 | /// Returns the symbol's type. 111 | pub fn type_id(&self, symbol: SymbolId) -> TypeId { 112 | self.types[symbol.0] 113 | } 114 | 115 | /// Returns the symbol's associated data. 116 | pub fn kind(&self, symbol: SymbolId) -> &SymbolKind { 117 | &self.kinds[symbol.0] 118 | } 119 | 120 | pub fn kind_mut(&mut self, symbol: SymbolId) -> &mut SymbolKind { 121 | &mut self.kinds[symbol.0] 122 | } 123 | } 124 | 125 | /// Represents a local scope. 126 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 127 | pub struct ScopeId(NonZeroUsize); 128 | 129 | /// Struct for module and local scope management. 130 | pub struct Scopes { 131 | symbols: HashMap<(ScopeId, Cow<'static, str>), SymbolId>, 132 | breakable_scopes: HashSet, 133 | public_symbols: HashSet, 134 | scope_count: usize, 135 | } 136 | 137 | impl Scopes { 138 | /// Creates a new scope manager. 139 | pub fn new() -> Self { 140 | Self { 141 | symbols: HashMap::new(), 142 | breakable_scopes: HashSet::new(), 143 | public_symbols: HashSet::new(), 144 | scope_count: 1, 145 | } 146 | } 147 | 148 | /// Creates a new scope and returns its ID. 149 | pub fn create_scope(&mut self) -> ScopeId { 150 | let id = self.scope_count; 151 | self.scope_count += 1; 152 | ScopeId(NonZeroUsize::new(id).unwrap()) 153 | } 154 | 155 | /// Inserts a symbol to the scope, under the given identifier. If there already is a symbol with 156 | /// the given name, it's lost. 157 | pub fn insert(&mut self, scope: ScopeId, identifier: &str, symbol: SymbolId) { 158 | let _ = self.symbols.insert((scope, Cow::Owned(identifier.to_owned())), symbol); 159 | } 160 | 161 | /// Retrieves a reference to the provided identifier in the given scope, or `None` if the 162 | /// identifier is not in the given scope. 163 | pub fn get(&self, scope: ScopeId, identifier: &str) -> Option { 164 | self.symbols.get(&(scope, Cow::Borrowed(identifier))).map(|id| *id) 165 | } 166 | 167 | /// Returns whether a scope is breakable. 168 | pub fn is_breakable(&self, scope: ScopeId) -> bool { 169 | self.breakable_scopes.contains(&scope) 170 | } 171 | 172 | /// Marks a scope as breakable. 173 | pub fn set_breakable(&mut self, scope: ScopeId) { 174 | self.breakable_scopes.insert(scope); 175 | } 176 | 177 | /// Marks the given symbol as public. 178 | pub fn set_public(&mut self, symbol: SymbolId) { 179 | self.public_symbols.insert(symbol); 180 | } 181 | } 182 | 183 | /// A stack for tracking which local scopes are currently visible. 184 | /// This can also be used for identifier lookups. 185 | #[derive(Debug, Clone)] 186 | pub struct ScopeStack { 187 | scopes: Vec, 188 | } 189 | 190 | impl ScopeStack { 191 | /// Creates a new scope stack. 192 | pub fn new() -> Self { 193 | Self { scopes: Vec::new() } 194 | } 195 | 196 | /// Pushes a scope onto the top of the stack. Returns the scope ID that was pushed. 197 | pub fn push(&mut self, scope: ScopeId) -> ScopeId { 198 | self.scopes.push(scope); 199 | scope 200 | } 201 | 202 | /// Returns the scope at the top of the stack. 203 | pub fn top(&self) -> ScopeId { 204 | *self.scopes.last().expect("the scope stack must not be empty") 205 | } 206 | 207 | /// Pops the topmost scope off the stack. 208 | pub fn pop(&mut self) { 209 | let _ = self.scopes.pop(); 210 | } 211 | 212 | /// Looks for symbols with the given name in scopes on the stack, and returns the innermost one. 213 | pub fn lookup(&self, scopes: &Scopes, name: &str) -> Option { 214 | for &scope in self.scopes.iter().rev() { 215 | if let Some(symbol) = scopes.get(scope, name) { 216 | return Some(symbol); 217 | } 218 | } 219 | None 220 | } 221 | 222 | /// Returns an iterator over all scopes on the stack, from bottom to top. 223 | pub fn iter(&self) -> impl DoubleEndedIterator + '_ { 224 | self.scopes.iter().copied() 225 | } 226 | } 227 | -------------------------------------------------------------------------------- /tsuki-frontend/src/sem.rs: -------------------------------------------------------------------------------- 1 | //! The root of semantic analysis. 2 | 3 | use std::path::Path; 4 | 5 | use crate::ast::{Ast, NodeId}; 6 | use crate::common::{Error, ErrorKind, Errors, SourceFile, Span}; 7 | use crate::functions::Functions; 8 | use crate::scope::{Scopes, Symbols}; 9 | pub use crate::types::DefaultTypes; 10 | use crate::types::{BuiltinTypes, TypeLog, Types}; 11 | 12 | use crate::sem_literals::SemLiterals; 13 | use crate::sem_types::{SemTypes, SemTypesBorrows}; 14 | 15 | /// A semantic analyzer. 16 | pub(crate) trait SemPass { 17 | type Result; 18 | 19 | /// Analyzes the AST from the given root node, and returns the modified version of the AST. 20 | fn analyze(&mut self, ast: Ast, root_node: NodeId) -> Ast; 21 | 22 | /// Returns the filename string. 23 | fn filename(&self) -> &Path; 24 | /// Returns a reference to the list of errors. 25 | fn errors(&self) -> &Errors; 26 | /// Returns a mutable reference to the list of errors. 27 | fn errors_mut(&mut self) -> &mut Errors; 28 | /// Consumes `self` to return the list of errors. 29 | fn into_errors(self) -> Errors; 30 | 31 | /// Emits an error of the given kind, at the given span. 32 | fn emit_error(&mut self, kind: ErrorKind, span: Span) { 33 | let filename = self.filename().to_owned(); 34 | self.errors_mut().push(Error { 35 | filename, 36 | kind, 37 | span, 38 | }); 39 | } 40 | } 41 | 42 | /// Semantic analysis state. 43 | struct Analyzer { 44 | ast: Ast, 45 | root_node: NodeId, 46 | } 47 | 48 | impl Analyzer { 49 | /// Performs the given semantic pass, and returns the errors, if any. 50 | fn perform(mut self, mut sem: impl SemPass) -> Result { 51 | self.ast = sem.analyze(self.ast, self.root_node); 52 | 53 | if sem.errors().len() > 0 { 54 | Err(sem.into_errors()) 55 | } else { 56 | Ok(self) 57 | } 58 | } 59 | } 60 | 61 | /// Common information shared by the semantic pass. 62 | pub(crate) struct SemCommon<'s> { 63 | pub file: &'s SourceFile, 64 | pub default_types: DefaultTypes, 65 | } 66 | 67 | impl<'s> SemCommon<'s> { 68 | /// Returns the source code substring pointed to by the node's `first..second`. 69 | pub fn get_source_range_from_node(&self, ast: &Ast, node: NodeId) -> &str { 70 | let source_range = ast.first(node)..ast.second(node); 71 | &self.file.source[source_range] 72 | } 73 | } 74 | 75 | /// The intermediate representation output by the analyzer. This contains the AST and type 76 | /// information. 77 | pub struct Ir { 78 | pub ast: Ast, 79 | pub root_node: NodeId, 80 | pub types: Types, 81 | pub symbols: Symbols, 82 | pub functions: Functions, 83 | } 84 | 85 | /// The options passed to `analyze`. 86 | pub struct AnalyzeOptions<'s> { 87 | pub file: &'s SourceFile, 88 | pub ast: Ast, 89 | pub root_node: NodeId, 90 | pub default_types: DefaultTypes, 91 | } 92 | 93 | /// Analyzes and lowers the AST to a representation ready to be used by the backend. 94 | pub fn analyze(options: AnalyzeOptions) -> Result { 95 | let AnalyzeOptions { 96 | file, 97 | ast, 98 | root_node, 99 | default_types, 100 | } = options; 101 | let mut state = Analyzer { ast, root_node }; 102 | 103 | let common = SemCommon { 104 | file, 105 | default_types, 106 | }; 107 | let mut types = Types::new(); 108 | let mut type_log = TypeLog::new(); 109 | let builtin_types = BuiltinTypes::add_to(&mut types, &common.default_types); 110 | let mut scopes = Scopes::new(); 111 | let mut symbols = Symbols::new(); 112 | let mut functions = Functions::new(); 113 | 114 | // NOTE: Maybe split errors into normal and fatal? 115 | // Normal errors would be accumulated into the existing error list, but would not halt the 116 | // analysis completely. Fatal errors would halt the analysis, and would occur if something really 117 | // goes wrong inside of a phase, yielding AST that might break the phase after it. 118 | // Also, warnings anyone? 119 | state = state.perform(SemLiterals::new(&common))?; 120 | state = state.perform(SemTypes::new(SemTypesBorrows { 121 | common: &common, 122 | types: &mut types, 123 | log: &mut type_log, 124 | builtin: &builtin_types, 125 | scopes: &mut scopes, 126 | symbols: &mut symbols, 127 | functions: &mut functions, 128 | }))?; 129 | 130 | Ok(Ir { 131 | ast: state.ast, 132 | root_node, 133 | types, 134 | symbols, 135 | functions, 136 | }) 137 | } 138 | -------------------------------------------------------------------------------- /tsuki-frontend/src/sem_literals.rs: -------------------------------------------------------------------------------- 1 | //! Semantic analyzer for literal kinds. 2 | //! 3 | //! This performs some basic initial analysis to convert literal kinds from generic `Integer` and 4 | //! `Float` to concrete types `Int8`, `Int16`, etc., including negation. 5 | //! Note that `SemTypes` may perform additional conversions later down the line. 6 | 7 | use std::convert::{TryFrom, TryInto}; 8 | use std::path::Path; 9 | 10 | use smallvec::SmallVec; 11 | 12 | use crate::ast::{Ast, NodeData, NodeId, NodeKind}; 13 | use crate::common::{ErrorKind, Errors, Span}; 14 | use crate::sem::{SemCommon, SemPass}; 15 | use crate::types::{FloatSize, IntegerSize}; 16 | 17 | /// State for the `SemLiterals` analysis phase. 18 | pub(crate) struct SemLiterals<'c> { 19 | common: &'c SemCommon<'c>, 20 | errors: Errors, 21 | } 22 | 23 | /// Available suffixes for literals. 24 | #[derive(Clone, Copy, Eq, PartialEq, PartialOrd, Ord)] 25 | enum LiteralSuffix { 26 | None, 27 | I, 28 | I8, 29 | I16, 30 | I32, 31 | I64, 32 | U, 33 | U8, 34 | U16, 35 | U32, 36 | U64, 37 | F, 38 | F32, 39 | F64, 40 | } 41 | 42 | impl From for LiteralSuffix { 43 | fn from(size: IntegerSize) -> Self { 44 | match size { 45 | IntegerSize::S8 => LiteralSuffix::I8, 46 | IntegerSize::S16 => LiteralSuffix::I16, 47 | IntegerSize::S32 => LiteralSuffix::I32, 48 | IntegerSize::S64 => LiteralSuffix::I64, 49 | IntegerSize::U8 => LiteralSuffix::U8, 50 | IntegerSize::U16 => LiteralSuffix::U16, 51 | IntegerSize::U32 => LiteralSuffix::U32, 52 | IntegerSize::U64 => LiteralSuffix::U64, 53 | } 54 | } 55 | } 56 | 57 | impl From for LiteralSuffix { 58 | fn from(size: FloatSize) -> Self { 59 | match size { 60 | FloatSize::S32 => LiteralSuffix::F32, 61 | FloatSize::S64 => LiteralSuffix::F64, 62 | } 63 | } 64 | } 65 | 66 | static SUFFIXES: phf::Map<&'static str, LiteralSuffix> = phf::phf_map! { 67 | "i" => LiteralSuffix::I, 68 | "i8" => LiteralSuffix::I8, 69 | "i16" => LiteralSuffix::I16, 70 | "i32" => LiteralSuffix::I32, 71 | "i64" => LiteralSuffix::I64, 72 | "u" => LiteralSuffix::U, 73 | "u8" => LiteralSuffix::U8, 74 | "u16" => LiteralSuffix::U16, 75 | "u32" => LiteralSuffix::U32, 76 | "u64" => LiteralSuffix::U64, 77 | "f" => LiteralSuffix::F, 78 | "f32" => LiteralSuffix::F32, 79 | "f64" => LiteralSuffix::F64, 80 | }; 81 | 82 | impl<'c> SemLiterals<'c> { 83 | /// Creates a new instance of the `SemTypes` analysis phase. 84 | pub fn new(common: &'c SemCommon<'c>) -> Self { 85 | SemLiterals { 86 | common, 87 | errors: Errors::new(), 88 | } 89 | } 90 | 91 | fn split_number<'n>(&mut self, source: &'n str, span: &Span) -> (&'n str, LiteralSuffix) { 92 | if let Some(underscore) = source.rfind('_') { 93 | // Check if the index is at least the character before the last character, 94 | // and the character after it is an identifier character. 95 | let bytes = source.as_bytes(); 96 | if underscore <= source.len() - 2 97 | && matches!(bytes[underscore + 1], b'a'..=b'z' | b'A'..=b'Z') 98 | { 99 | let suffix_string = &source[underscore + 1..]; 100 | if let Some(&suffix) = SUFFIXES.get(suffix_string) { 101 | return (&source[..underscore], suffix); 102 | } else { 103 | self.emit_error( 104 | ErrorKind::InvalidNumberLiteralSuffix(source.into()), 105 | span.clone(), 106 | ); 107 | } 108 | } 109 | } 110 | (source, LiteralSuffix::None) 111 | } 112 | 113 | /// Converts an ASCII digit to a u64. 114 | fn digit_to_u64(digit: u8) -> u64 { 115 | assert!(matches!(digit, b'0'..=b'9')); 116 | (digit - b'0') as u64 117 | } 118 | 119 | fn overflow_error(&mut self, string: &str, span: Span) { 120 | self.emit_error(ErrorKind::IntegerTooBig(string.into()), span) 121 | } 122 | 123 | /// Parses the given string into a `u64`. The string must not be empty, otherwise an assertion is 124 | /// triggered. 125 | /// If an error is occured while parsing, `Err(())` is returned, and the error is added to the 126 | /// phase's error list. 127 | fn parse_integer(&mut self, string: &str, span: &Span) -> Result { 128 | assert!(!string.is_empty()); 129 | 130 | let bytes = string.as_bytes(); 131 | let mut result: u64 = Self::digit_to_u64(bytes[0]); 132 | for &c in &bytes[1..] { 133 | if matches!(c, b'0'..=b'9') { 134 | // We don't want to panic on overflow here, instead report a nice error to the user. 135 | // Hence the usage of `checked_mul` and `checked_add`. 136 | let digit = Self::digit_to_u64(c); 137 | result = 138 | result.checked_mul(10).ok_or_else(|| self.overflow_error(string, span.clone()))?; 139 | result = result 140 | .checked_add(digit) 141 | .ok_or_else(|| self.overflow_error(string, span.clone()))?; 142 | } else if c == b'_' { 143 | // Continue, because _ is a valid separating character. 144 | } else { 145 | // If a different character is found, panic! because the lexer should've already sorted 146 | // the appropriate characters out for us. 147 | panic!("unexpected character in integer literal: {}", c); 148 | } 149 | } 150 | 151 | Ok(result) 152 | } 153 | 154 | /// Converts a `u64` to a smaller unsigned integer. `type_name` and `span` are used for 155 | /// emitting errors in case of overflow. 156 | fn convert_unsigned(&mut self, x: u64, type_name: &str, span: &Span) -> R 157 | where 158 | R: Default + TryFrom, 159 | { 160 | match x.try_into() { 161 | Ok(ok) => ok, 162 | Err(..) => { 163 | let kind = ErrorKind::UnsignedIntegerOverflowForType(x.into(), type_name.into()); 164 | self.emit_error(kind, span.clone()); 165 | R::default() 166 | } 167 | } 168 | } 169 | 170 | /// Converts a `u64` to a signed integer. Emits an error using the given `type_name` and `span`, 171 | /// and returns `R::default()` in case of an overflow error. 172 | fn convert_signed(&mut self, negative: bool, x: u64, type_name: &str, span: &Span) -> R 173 | where 174 | R: Default + TryFrom, 175 | { 176 | // i64 is the largest possible signed integer in tsuki, so we use that as the source for our 177 | // conversion. Note that if we converted straight from u64 to R, the minimum negative number 178 | // edge case -128_i8 would not work. The 128 would get converted into an i8, causing an 179 | // overflow, so instead we first need to convert to an i64, then apply the sign, and 180 | // afterwards convert the i64 to R. 181 | let mut signed: i64 = match x.try_into() { 182 | Ok(ok) => ok, 183 | Err(..) => { 184 | let kind = ErrorKind::UnsignedIntegerOverflowForType(x.into(), type_name.into()); 185 | self.emit_error(kind, span.clone()); 186 | return R::default(); 187 | } 188 | }; 189 | if negative { 190 | signed *= -1; 191 | } 192 | match signed.try_into() { 193 | Ok(ok) => ok, 194 | Err(..) => { 195 | let kind = ErrorKind::SignedIntegerOverflowForType(signed.into(), type_name.into()); 196 | self.emit_error(kind, span.clone()); 197 | R::default() 198 | } 199 | } 200 | } 201 | 202 | /// Converts a `u64` to an `f64`, optionally flipping its sign around. 203 | fn convert_to_float(&self, negative: bool, x: u64) -> f64 { 204 | if negative { 205 | -(x as f64) 206 | } else { 207 | x as f64 208 | } 209 | } 210 | 211 | /// Converts the abstract Integer `node` to a concretely typed node. 212 | fn convert_integer_node( 213 | &mut self, 214 | ast: &mut Ast, 215 | node: NodeId, 216 | negative: bool, 217 | number: u64, 218 | mut suffix: LiteralSuffix, 219 | ) { 220 | match suffix { 221 | LiteralSuffix::None | LiteralSuffix::I => { 222 | suffix = LiteralSuffix::from(self.common.default_types.int_width); 223 | } 224 | LiteralSuffix::U => { 225 | suffix = LiteralSuffix::from(self.common.default_types.size_width); 226 | } 227 | LiteralSuffix::F => { 228 | suffix = LiteralSuffix::from(self.common.default_types.float_width); 229 | } 230 | _ => (), 231 | } 232 | if matches!( 233 | suffix, 234 | LiteralSuffix::U8 | LiteralSuffix::U16 | LiteralSuffix::U32 | LiteralSuffix::U64 235 | ) && negative 236 | { 237 | self.emit_error(ErrorKind::UintCannotBeNegative, ast.span(node).clone()); 238 | return; 239 | } 240 | let span = ast.span(node); 241 | let (kind, extra) = match suffix { 242 | LiteralSuffix::None | LiteralSuffix::I | LiteralSuffix::U | LiteralSuffix::F => { 243 | // These cases are canonicalized to types configured in `common.default_types`. 244 | unreachable!() 245 | } 246 | LiteralSuffix::I8 => ( 247 | NodeKind::Int8, 248 | NodeData::Int8(self.convert_signed(negative, number, "Int8", span)), 249 | ), 250 | LiteralSuffix::I16 => ( 251 | NodeKind::Int16, 252 | NodeData::Int16(self.convert_signed(negative, number, "Int16", span)), 253 | ), 254 | LiteralSuffix::I32 => ( 255 | NodeKind::Int32, 256 | NodeData::Int32(self.convert_signed(negative, number, "Int32", span)), 257 | ), 258 | LiteralSuffix::I64 => ( 259 | NodeKind::Int64, 260 | NodeData::Int64(self.convert_signed(negative, number, "Int64", span)), 261 | ), 262 | LiteralSuffix::U8 => ( 263 | NodeKind::Uint8, 264 | NodeData::Uint8(self.convert_unsigned(number, "Uint8", span)), 265 | ), 266 | LiteralSuffix::U16 => ( 267 | NodeKind::Uint16, 268 | NodeData::Uint16(self.convert_unsigned(number, "Uint16", span)), 269 | ), 270 | LiteralSuffix::U32 => ( 271 | NodeKind::Uint32, 272 | NodeData::Uint32(self.convert_unsigned(number, "Uint32", span)), 273 | ), 274 | LiteralSuffix::U64 => (NodeKind::Uint64, NodeData::Uint64(number)), 275 | LiteralSuffix::F32 => ( 276 | NodeKind::Float32, 277 | NodeData::Float32(self.convert_to_float(negative, number) as f32), 278 | ), 279 | LiteralSuffix::F64 => ( 280 | NodeKind::Float64, 281 | NodeData::Float64(self.convert_to_float(negative, number)), 282 | ), 283 | }; 284 | ast.convert(node, kind); 285 | ast.set_extra(node, extra); 286 | } 287 | 288 | /// Extracts the sign and number node from a potentially `Neg` node. The first value returned 289 | /// specifies whether the number is negative, and the second value is the actual number. 290 | fn extract_neg_node(ast: &Ast, node: NodeId) -> (bool, NodeId) { 291 | let negative = ast.kind(node) == NodeKind::Neg; 292 | let number_node = if negative { 293 | ast.first_handle(node) 294 | } else { 295 | node 296 | }; 297 | (negative, number_node) 298 | } 299 | 300 | /// Parses an integer literal to one of the type-strict kinds `Int8`, `Int16`, etc. 301 | fn analyze_integer(&mut self, ast: &mut Ast, node: NodeId) { 302 | let (negative, number_node) = Self::extract_neg_node(ast, node); 303 | let source = self.common.get_source_range_from_node(ast, number_node); 304 | assert!(!source.is_empty()); 305 | let (digits, suffix) = self.split_number(source, ast.span(node)); 306 | match self.parse_integer(digits, ast.span(node)) { 307 | Ok(number) => self.convert_integer_node(ast, node, negative, number, suffix), 308 | Err(..) => ast.convert(node, NodeKind::Error), 309 | } 310 | } 311 | 312 | /// Parses a floating point literal to an `f64`. If an error occurs, the function panics, as 313 | /// floats are not susceptible to overflow; only precision loss on large scales. 314 | fn parse_float(string: &str) -> f64 { 315 | // Parsing floats is hard. That's why we're using the Rust standard library for this purpose. 316 | // However, the standard library expects floats without underscores `_`, which tsuki 317 | // allows for. Thus, all the digits have to be first accumulated into a separate string 318 | // without these underscores. 319 | // We use a SmallVec for this purpose, so as to allocate memory on the stack for relatively 320 | // small literals. I don't think there are many cases where people use more than 32 characters 321 | // in a literal, but in these cases the SmallVec is simply going to move over to the heap. 322 | let mut digits = SmallVec::<[u8; 32]>::new(); 323 | for b in string.bytes() { 324 | if b != b'_' { 325 | digits.push(b); 326 | } 327 | } 328 | // Safety: Using `from_utf8_unchecked` is safe, as floating point literals cannot have any 329 | // UTF-8 characters in them. 330 | let filtered = unsafe { std::str::from_utf8_unchecked(&digits) }; 331 | filtered.parse::().expect("the lexer must provide only valid digits") 332 | // Idea: emit a warning when the literal suffers significant precision loss. 333 | } 334 | 335 | /// Converts the abstract `Float` node to a concrete node of kind `Float32` or `Float64`. 336 | fn convert_float_node( 337 | &mut self, 338 | ast: &mut Ast, 339 | node: NodeId, 340 | negative: bool, 341 | mut number: f64, 342 | mut suffix: LiteralSuffix, 343 | ) { 344 | match suffix { 345 | LiteralSuffix::None | LiteralSuffix::F => { 346 | suffix = LiteralSuffix::from(self.common.default_types.float_width); 347 | } 348 | _ => (), 349 | } 350 | if negative { 351 | number *= -1.0; 352 | } 353 | let (kind, extra) = match suffix { 354 | LiteralSuffix::None | LiteralSuffix::F => unreachable!(), 355 | LiteralSuffix::F32 => (NodeKind::Float32, NodeData::Float32(number as f32)), 356 | LiteralSuffix::F64 => (NodeKind::Float64, NodeData::Float64(number)), 357 | _ => { 358 | self.emit_error(ErrorKind::InvalidFloatSuffix, ast.span(node).clone()); 359 | ast.convert(node, NodeKind::Error); 360 | return; 361 | } 362 | }; 363 | ast.convert(node, kind); 364 | ast.set_extra(node, extra); 365 | } 366 | 367 | /// Parses a float to a `Float32` or a `Float64`. 368 | fn analyze_float(&mut self, ast: &mut Ast, node: NodeId) { 369 | let (negative, number_node) = Self::extract_neg_node(ast, node); 370 | let source = self.common.get_source_range_from_node(ast, number_node); 371 | assert!(!source.is_empty()); 372 | let (digits, suffix) = self.split_number(source, ast.span(node)); 373 | let number = Self::parse_float(digits); 374 | self.convert_float_node(ast, node, negative, number, suffix); 375 | } 376 | 377 | /// Walks through the sub-nodes of a branch node. 378 | fn walk_branch(&mut self, ast: &mut Ast, node: NodeId) { 379 | let left = ast.first_handle(node); 380 | match ast.kind(node) { 381 | // The negation sign `-` is not included in the literal, so these extra cases ensure that 382 | // edge cases such as -128_u8 are handled correctly without causing an overflow error. 383 | NodeKind::Neg if ast.kind(left) == NodeKind::Integer => { 384 | self.analyze_integer(ast, node); 385 | } 386 | NodeKind::Neg if ast.kind(left) == NodeKind::Float => { 387 | self.analyze_float(ast, node); 388 | } 389 | _ => { 390 | ast.walk_mut(node, |ast, child| { 391 | self.analyze_node(ast, child); 392 | }); 393 | } 394 | } 395 | } 396 | 397 | /// Analyzes the given syntax tree node. 398 | fn analyze_node(&mut self, ast: &mut Ast, node: NodeId) { 399 | match ast.kind(node) { 400 | NodeKind::Integer => self.analyze_integer(ast, node), 401 | NodeKind::Float => self.analyze_float(ast, node), 402 | kind if kind.is_branch() => self.walk_branch(ast, node), 403 | _ => (), 404 | } 405 | } 406 | } 407 | 408 | impl SemPass for SemLiterals<'_> { 409 | type Result = (); 410 | 411 | /// Performs literal resolution for the syntax tree. 412 | fn analyze(&mut self, mut ast: Ast, root_node: NodeId) -> Ast { 413 | self.analyze_node(&mut ast, root_node); 414 | ast 415 | } 416 | 417 | fn filename(&self) -> &Path { 418 | &self.common.file.path 419 | } 420 | 421 | fn errors(&self) -> &Errors { 422 | &self.errors 423 | } 424 | 425 | fn errors_mut(&mut self) -> &mut Errors { 426 | &mut self.errors 427 | } 428 | 429 | fn into_errors(self) -> Errors { 430 | self.errors 431 | } 432 | } 433 | -------------------------------------------------------------------------------- /tsuki-frontend/src/sem_types/control_flow.rs: -------------------------------------------------------------------------------- 1 | //! Type analysis for control flow constructs. 2 | 3 | use crate::ast::{Ast, NodeId, NodeKind}; 4 | use crate::common::ErrorKind; 5 | use crate::sem::SemPass; 6 | use crate::types::TypeLogEntry; 7 | 8 | use super::{NodeContext, SemTypes}; 9 | 10 | impl<'s> SemTypes<'s> { 11 | /// Annotates a "pass" (`_`) statement. 12 | pub(super) fn annotate_pass(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogEntry { 13 | self.annotate(ast, node, self.builtin.t_statement) 14 | } 15 | 16 | /// Annotates a prefix `do` block. 17 | pub(super) fn annotate_do( 18 | &mut self, 19 | ast: &mut Ast, 20 | node: NodeId, 21 | context: NodeContext, 22 | ) -> TypeLogEntry { 23 | let scope = self.scope_stack.push(self.scopes.create_scope()); 24 | ast.set_scope(node, Some(scope)); 25 | let log_entry = self.annotate_statement_list(ast, node, context); 26 | self.scope_stack.pop(); 27 | ast.convert_preserve( 28 | node, 29 | match context { 30 | NodeContext::Expression(_) => NodeKind::DoExpression, 31 | NodeContext::Statement => NodeKind::DoStatement, 32 | }, 33 | ); 34 | log_entry 35 | } 36 | 37 | /// Annotates an `if` expression or `if` statement. 38 | pub(super) fn annotate_if( 39 | &mut self, 40 | ast: &mut Ast, 41 | node: NodeId, 42 | context: NodeContext, 43 | ) -> TypeLogEntry { 44 | let mut typ = None; 45 | ast.walk_node_list_mut(node, |ast, _index, branch| { 46 | // The scope is introduced before the condition is analyzed to have proper scoping behavior 47 | // in `if val`. 48 | let scope = self.scope_stack.push(self.scopes.create_scope()); 49 | ast.set_scope(branch, Some(scope)); 50 | // Only check the condition if it's an `if` branch. `else` branches do not have 51 | // the condition. 52 | if ast.kind(branch) == NodeKind::IfBranch { 53 | let condition = ast.first_handle(branch); 54 | let condition_entry = self.annotate_node(ast, condition, context); 55 | let condition_type = self.log.type_id(condition_entry); 56 | if !self.types.kind(condition_type).is_bool() { 57 | self.emit_error( 58 | ErrorKind::IfConditionMustBeBool, 59 | ast.span(condition).clone(), 60 | ); 61 | } 62 | } 63 | let body_entry = self.annotate_statement_list(ast, branch, context); 64 | let body_type = self.log.type_id(body_entry); 65 | if let NodeContext::Expression(_) = context { 66 | match typ { 67 | None => typ = Some(body_type), 68 | Some(typ) if body_type != typ => { 69 | // The type log entry is discarded here, because more mismatch errors may 70 | // arise later in the `if` statement. 71 | let _ = self.type_mismatch(ast, node, typ, body_type); 72 | } 73 | _ => (), 74 | } 75 | } 76 | self.scope_stack.pop(); 77 | }); 78 | ast.convert_preserve( 79 | node, 80 | match context { 81 | NodeContext::Expression(_) => NodeKind::IfExpression, 82 | NodeContext::Statement => NodeKind::IfStatement, 83 | }, 84 | ); 85 | self.annotate(ast, node, typ.unwrap_or(self.builtin.t_statement)) 86 | } 87 | 88 | /// Annotates a `while` loop. 89 | pub(super) fn annotate_while(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogEntry { 90 | let condition_node = ast.first_handle(node); 91 | let condition_entry = self.annotate_node( 92 | ast, 93 | condition_node, 94 | NodeContext::expression_of_type(self.builtin.t_bool), 95 | ); 96 | let condition_type = self.log.type_id(condition_entry); 97 | if !self.types.kind(condition_type).is_bool() { 98 | return self.error(ast, condition_node, ErrorKind::WhileConditionMustBeBool); 99 | } 100 | 101 | let scope = self.scope_stack.push(self.scopes.create_scope()); 102 | ast.set_scope(node, Some(scope)); 103 | self.scopes.set_breakable(scope); 104 | let _ = self.annotate_statement_list(ast, node, NodeContext::Statement); 105 | self.scope_stack.pop(); 106 | 107 | self.annotate(ast, node, self.builtin.t_statement) 108 | } 109 | 110 | /// Annotates a `break` statement. 111 | pub(super) fn annotate_break(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogEntry { 112 | // Find out which scope the `break` is breaking. 113 | // This is later stored in the `break` node's second child. 114 | let mut break_scope = None; 115 | for scope in self.scope_stack.iter().rev() { 116 | if self.scopes.is_breakable(scope) { 117 | break_scope = Some(scope); 118 | } 119 | } 120 | if break_scope.is_none() { 121 | return self.error(ast, node, ErrorKind::BreakOutsideOfLoop); 122 | } 123 | 124 | let break_scope = break_scope.unwrap(); 125 | ast.set_scope(node, Some(break_scope)); 126 | 127 | self.annotate(ast, node, self.builtin.t_noreturn) 128 | } 129 | 130 | /// Annotates a `return` statement. 131 | pub(super) fn annotate_return(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogEntry { 132 | // `return` can only be used in a function. 133 | if let Some(function_id) = self.current_function { 134 | let expected_return_type = self.functions.parameters(function_id).return_type; 135 | let value_node = ast.first_handle(node); 136 | let return_log = if value_node != NodeId::null() { 137 | // For `return`s that do actually return something, the path is straightforward. 138 | self.annotate_node( 139 | ast, 140 | value_node, 141 | NodeContext::expression_of_type(expected_return_type), 142 | ) 143 | } else { 144 | // For `return`s that _don't_ return a value, we need to duplicate the empty node such 145 | // that it gets a unique ID that we can attach the unit type to. 146 | let value_node = ast.duplicate(value_node); 147 | ast.set_first_handle(node, value_node); 148 | self.annotate(ast, value_node, self.builtin.t_unit) 149 | }; 150 | let provided_type = self.log.type_id(return_log); 151 | if provided_type != expected_return_type { 152 | return self.type_mismatch(ast, node, expected_return_type, provided_type); 153 | } 154 | self.annotate(ast, node, self.builtin.t_noreturn) 155 | } else { 156 | self.error(ast, node, ErrorKind::ReturnOutsideOfFunction) 157 | } 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /tsuki-frontend/src/sem_types/conversions.rs: -------------------------------------------------------------------------------- 1 | //! Implicit conversions between types. 2 | 3 | use crate::ast::{Ast, NodeData, NodeId, NodeKind}; 4 | use crate::types::{FloatSize, IntegerSize, TypeId, TypeLogEntry}; 5 | 6 | use super::SemTypes; 7 | 8 | impl<'s> SemTypes<'s> { 9 | /// Widens the given integer node to the provided size. 10 | /// 11 | /// For literal nodes, this converts the literal directly. For other nodes, this wraps the node 12 | /// in a `WidenUint` or `WidenInt` with the type set to represent the new size. 13 | fn widen_integer(&mut self, ast: &mut Ast, node: NodeId, new_size: IntegerSize) -> TypeLogEntry { 14 | if ast.kind(node).is_integer() { 15 | // Shortcut path for literals. 16 | let as_uint = ast.extra(node).as_uint().unwrap(); 17 | ast.convert( 18 | node, 19 | match new_size { 20 | IntegerSize::U8 => NodeKind::Uint8, 21 | IntegerSize::U16 => NodeKind::Uint16, 22 | IntegerSize::U32 => NodeKind::Uint32, 23 | IntegerSize::U64 => NodeKind::Uint64, 24 | IntegerSize::S8 => NodeKind::Int8, 25 | IntegerSize::S16 => NodeKind::Int16, 26 | IntegerSize::S32 => NodeKind::Int32, 27 | IntegerSize::S64 => NodeKind::Int64, 28 | }, 29 | ); 30 | ast.set_extra( 31 | node, 32 | match new_size { 33 | IntegerSize::U8 => NodeData::Uint8(as_uint as u8), 34 | IntegerSize::U16 => NodeData::Uint16(as_uint as u16), 35 | IntegerSize::U32 => NodeData::Uint32(as_uint as u32), 36 | IntegerSize::U64 => NodeData::Uint64(as_uint as u64), 37 | IntegerSize::S8 => NodeData::Int8(as_uint as i8), 38 | IntegerSize::S16 => NodeData::Int16(as_uint as i16), 39 | IntegerSize::S32 => NodeData::Int32(as_uint as i32), 40 | IntegerSize::S64 => NodeData::Int64(as_uint as i64), 41 | }, 42 | ); 43 | } else { 44 | // Backend path for other nodes. 45 | if ast.kind(node).is_unsigned_integer() { 46 | ast.wrap(node, NodeKind::WidenUint); 47 | } else { 48 | ast.wrap(node, NodeKind::WidenInt); 49 | } 50 | } 51 | self.annotate( 52 | ast, 53 | node, 54 | match new_size { 55 | IntegerSize::U8 => self.builtin.t_uint8, 56 | IntegerSize::U16 => self.builtin.t_uint16, 57 | IntegerSize::U32 => self.builtin.t_uint32, 58 | IntegerSize::U64 => self.builtin.t_uint64, 59 | IntegerSize::S8 => self.builtin.t_int8, 60 | IntegerSize::S16 => self.builtin.t_int16, 61 | IntegerSize::S32 => self.builtin.t_int32, 62 | IntegerSize::S64 => self.builtin.t_int64, 63 | }, 64 | ) 65 | } 66 | 67 | /// Widens a float node to the given size. 68 | /// 69 | /// Behavior with literals is similar to `widen_integer`. 70 | fn widen_float(&mut self, ast: &mut Ast, node: NodeId, new_size: FloatSize) -> TypeLogEntry { 71 | if ast.kind(node).is_float() { 72 | let as_float = ast.extra(node).as_float().unwrap(); 73 | ast.convert( 74 | node, 75 | match new_size { 76 | FloatSize::S32 => NodeKind::Float32, 77 | FloatSize::S64 => NodeKind::Float64, 78 | }, 79 | ); 80 | ast.set_extra( 81 | node, 82 | match new_size { 83 | FloatSize::S32 => NodeData::Float32(as_float as f32), 84 | FloatSize::S64 => NodeData::Float64(as_float), 85 | }, 86 | ); 87 | } else { 88 | ast.wrap(node, NodeKind::WidenFloat); 89 | } 90 | self.annotate( 91 | ast, 92 | node, 93 | match new_size { 94 | FloatSize::S32 => self.builtin.t_float32, 95 | FloatSize::S64 => self.builtin.t_float64, 96 | }, 97 | ) 98 | } 99 | 100 | /// Attempts to convert the type `from` to type `to`. If an implicit conversion is not possible, 101 | /// returns `None`. Otherwise returns the converted type ID. 102 | pub(super) fn try_perform_implicit_conversion( 103 | &mut self, 104 | ast: &mut Ast, 105 | node: NodeId, 106 | from: TypeId, 107 | to: TypeId, 108 | ) -> Option { 109 | // If the two types are equal, there's need for conversion. 110 | if from == to { 111 | return Some(self.log.push(to, node)); 112 | } 113 | // Otherwise, compare their kinds for various traits. 114 | let from_kind = self.types.kind(from); 115 | let to_kind = self.types.kind(to); 116 | 117 | // NoReturn conversions 118 | if from_kind.is_noreturn() { 119 | return Some(self.annotate(ast, node, to)); 120 | } 121 | 122 | // Widening integer conversions 123 | if from_kind.is_integer() && to_kind.is_integer() { 124 | // Integers are only implicitly convertible to wider types of the same signedness, 125 | // eg. Int8 -> Int16, Int32 -> Int64, but not Int64 -> Int32, or Uint32 -> Int32. 126 | let from_size = from_kind.unwrap_integer(); 127 | let to_size = to_kind.unwrap_integer(); 128 | if to_size >= from_size { 129 | return Some(self.widen_integer(ast, node, to_size)); 130 | } 131 | } 132 | 133 | // Widening float conversions 134 | if from_kind.is_float() && to_kind.is_float() { 135 | // Floats are only implicitly convertible if the destination type is wider than the 136 | // source type (Float32 -> Float64). 137 | let from_size = from_kind.unwrap_float(); 138 | let to_size = to_kind.unwrap_float(); 139 | if to_size >= from_size { 140 | return Some(self.widen_float(ast, node, to_size)); 141 | } 142 | } 143 | 144 | None 145 | } 146 | 147 | /// Performs an implicit conversion without failing. 148 | pub(super) fn perform_implicit_conversion( 149 | &mut self, 150 | ast: &mut Ast, 151 | node: NodeId, 152 | from_log: TypeLogEntry, 153 | to: TypeId, 154 | ) -> TypeLogEntry { 155 | let from = self.log.type_id(from_log); 156 | self.try_perform_implicit_conversion(ast, node, from, to).unwrap_or(from_log) 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /tsuki-frontend/src/sem_types/functions.rs: -------------------------------------------------------------------------------- 1 | //! Annotating functions and introducing them into scope. 2 | 3 | use smallvec::SmallVec; 4 | 5 | use crate::ast::{Ast, NodeId, NodeKind}; 6 | use crate::common::ErrorKind; 7 | use crate::functions::{FunctionKind, Intrinsic, Parameters}; 8 | use crate::scope::{Mutability, SymbolKind, Variable}; 9 | use crate::types::{TypeId, TypeLogResult}; 10 | 11 | use super::{NodeContext, SemTypes}; 12 | 13 | impl<'s> SemTypes<'s> { 14 | fn mangle_name(&self, function_name: &str) -> String { 15 | format!("{}.{}", &self.common.file.module_name, function_name) 16 | } 17 | 18 | pub(super) fn annotate_function_declaration( 19 | &mut self, 20 | ast: &mut Ast, 21 | node: NodeId, 22 | ) -> TypeLogResult { 23 | // Check if the name is sem'd or not. If so, we are coming from a deferred sem'check, 24 | // so simply check the body and return. 25 | let name_node = ast.first_handle(node); 26 | if ast.kind(name_node) == NodeKind::Symbol { 27 | return self.annotate_function_body(ast, node); 28 | } 29 | 30 | // Get the function name. 31 | let name = self.common.get_source_range_from_node(ast, name_node); 32 | let mangled_name = self.mangle_name(&name); 33 | 34 | // Prepare all the nodes. 35 | let parameters_node = ast.second_handle(node); 36 | let formal_parameters_node = ast.second_handle(parameters_node); 37 | 38 | // Create a scope for the generic and formal parameters. 39 | // We save the declaration scope for later, as that's where we'll be adding the function 40 | // symbol itself. 41 | let declaration_scope = self.scope_stack.top(); 42 | let scope = self.scope_stack.push(self.scopes.create_scope()); 43 | ast.set_scope(node, Some(scope)); 44 | 45 | // Slurp all the parameters up into a vector. 46 | let mut parameters = SmallVec::new(); 47 | for i in 0..ast.extra(formal_parameters_node).as_node_list().unwrap().len() { 48 | let named_parameters = ast.extra(formal_parameters_node).as_node_list().unwrap()[i]; 49 | let type_node = ast.first_handle(named_parameters); 50 | let typ = self.lookup_type(ast, type_node)?; 51 | ast.walk_node_list_mut(named_parameters, |ast, _, name_node| { 52 | // Make each parameter have its own identifier in the function body. 53 | // Semantically, function parameters are just variables, introduced by some 54 | // external scope. 55 | let name = self.common.get_source_range_from_node(ast, name_node); 56 | let symbol = self.symbols.create( 57 | name, 58 | name_node, 59 | typ, 60 | SymbolKind::Variable(Variable { 61 | mutability: Mutability::Val, 62 | }), 63 | ); 64 | parameters.push(symbol); 65 | ast.convert_to_symbol(name_node, symbol); 66 | self.add_to_scope(name, symbol); 67 | }); 68 | } 69 | 70 | // Look up what the return type should be. 71 | let return_type_node = ast.first_handle(formal_parameters_node); 72 | let return_type = if ast.kind(return_type_node) != NodeKind::Empty { 73 | self.lookup_type(ast, return_type_node)? 74 | } else { 75 | // In case no return type is provided, default to the unit type `()`. 76 | self.builtin.t_unit 77 | }; 78 | 79 | // Register the function in the registry and add it to scope. 80 | // Registering the function _here_ allows for the referring to the function inside its 81 | // body, enabling recursion. 82 | let function_id = self.functions.create( 83 | name.to_owned(), 84 | mangled_name, 85 | Parameters { 86 | formal: parameters, 87 | return_type, 88 | }, 89 | FunctionKind::Local, 90 | ); 91 | let symbol_kind = SymbolKind::Function(function_id); 92 | // TODO: Function/closure types. Right now we treat function symbols as having the 93 | // 'statement' type, which isn't exactly correct. 94 | let symbol = self.symbols.create(name, node, self.builtin.t_statement, symbol_kind); 95 | self.scopes.insert(declaration_scope, name, symbol); 96 | ast.convert_to_symbol(name_node, symbol); 97 | 98 | // After all is done, pop the function's scope off. 99 | self.scope_stack.pop(); 100 | 101 | if self.is_in_module_scope() { 102 | // If we're at the top-level scope, defer sem until all items in scope have already been 103 | // declared. 104 | self.defer(node, NodeContext::Statement); 105 | Ok(self.annotate(ast, node, TypeId::null())) 106 | } else { 107 | // If we're not top-level, check the function's body. 108 | self.annotate_function_body(ast, node) 109 | } 110 | } 111 | 112 | /// Annotates a function's body. 113 | pub(super) fn annotate_function_body(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogResult { 114 | let name_node = ast.first_handle(node); 115 | let symbol_id = ast.symbol_id(name_node); 116 | let function_id = self.symbols.kind(symbol_id).unwrap_function(); 117 | 118 | // Before any analysis happens, set the current function to this one. 119 | let previous_function = self.current_function; 120 | self.current_function = Some(function_id); 121 | 122 | let return_type = self.functions.parameters(function_id).return_type; 123 | 124 | let returns_unit = self.types.kind(return_type).is_unit(); 125 | let body_log = self.annotate_statement_list( 126 | ast, 127 | node, 128 | if returns_unit { 129 | NodeContext::Statement 130 | } else { 131 | NodeContext::expression_of_type(return_type) 132 | }, 133 | ); 134 | 135 | self.current_function = previous_function; 136 | 137 | // Check that the body's return type is correct. 138 | let body_type = self.log.type_id(body_log); 139 | if !returns_unit && body_type != return_type { 140 | return Ok(self.type_mismatch(ast, node, return_type, body_type)); 141 | } 142 | 143 | let declaration_type = self.create_declaration_type(symbol_id); 144 | Ok(self.annotate(ast, node, declaration_type)) 145 | } 146 | 147 | /// Annotates a function call. 148 | pub(super) fn annotate_call(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogResult { 149 | // Extract what is being called. 150 | let callee_node = ast.first_handle(node); 151 | // TODO: Method calls. 152 | if ast.kind(callee_node) != NodeKind::Identifier { 153 | return Ok(self.error(ast, callee_node, ErrorKind::ExpressionCannotBeCalled)); 154 | } 155 | let (symbol_id, function_id) = self.lookup_function(ast, callee_node)?; 156 | // Convert the callee to a symbol. 157 | ast.convert_to_symbol(callee_node, symbol_id); 158 | 159 | // Check if we have the right amount of arguments. 160 | let given_parameter_count = ast.extra(node).as_node_list().unwrap().len(); 161 | let declared_parameter_count = self.functions.parameters(function_id).formal.len(); 162 | if given_parameter_count != declared_parameter_count { 163 | return Ok(self.error( 164 | ast, 165 | node, 166 | ErrorKind::NArgumentsExpected(declared_parameter_count, given_parameter_count), 167 | )); 168 | } 169 | // Check if all the arguments are of the correct type. 170 | // We don't immediately return after we error, so as to collect as many type mismatch 171 | // messages as we can. 172 | let mut last_error = None; 173 | ast.walk_node_list_mut(node, |ast, index, argument| { 174 | let parameters = self.functions.parameters(function_id); 175 | let expected_type = self.symbols.type_id(parameters.formal[index]); 176 | 177 | let argument_log = self.annotate_node( 178 | ast, 179 | argument, 180 | NodeContext::expression_of_type(expected_type), 181 | ); 182 | let provided_type = self.log.type_id(argument_log); 183 | 184 | // Perform implicit conversions on arguments. 185 | let argument_log = self 186 | .try_perform_implicit_conversion(ast, node, provided_type, expected_type) 187 | .unwrap_or(argument_log); 188 | // If there's a mismatch after the conversion, error. 189 | let provided_type = self.log.type_id(argument_log); 190 | if provided_type != expected_type { 191 | last_error = Some(self.type_mismatch(ast, argument, expected_type, provided_type)); 192 | } 193 | }); 194 | if let Some(error) = last_error { 195 | return Ok(error); 196 | } 197 | 198 | if let &FunctionKind::Intrinsic(intrinsic) = self.functions.kind(function_id) { 199 | // Intrinsic calls have some transformation magic going on. 200 | self.annotate_intrinsic_call(ast, node, intrinsic); 201 | } else { 202 | // For other calls, we use the CallFunction node, which is a normalized version of `Call` 203 | // that takes the form of `function_name(params)`. Even for instance functions. 204 | ast.convert_preserve(node, NodeKind::CallFunction); 205 | } 206 | 207 | let return_type = self.functions.parameters(function_id).return_type; 208 | Ok(self.annotate(ast, node, return_type)) 209 | } 210 | 211 | /// Annotates an intrinsic function call. 212 | fn annotate_intrinsic_call(&mut self, ast: &mut Ast, node: NodeId, intrinsic: Intrinsic) { 213 | ast.convert_preserve(node, NodeKind::from(intrinsic)); 214 | } 215 | } 216 | -------------------------------------------------------------------------------- /tsuki-frontend/src/sem_types/locations.rs: -------------------------------------------------------------------------------- 1 | //! Type analysis for variables, object fields, pointers, etc. 2 | 3 | use crate::ast::{Ast, NodeId, NodeKind}; 4 | use crate::common::ErrorKind; 5 | use crate::scope::{Mutability, SymbolId, SymbolKind, Variable}; 6 | use crate::types::{TypeLogEntry, TypeLogResult}; 7 | 8 | use super::{NodeContext, SemTypes}; 9 | 10 | impl<'s> SemTypes<'s> { 11 | /// Annotates a location expression, ie. variables `a`, members `.x`. 12 | pub(super) fn annotate_location(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogResult { 13 | match ast.kind(node) { 14 | NodeKind::Identifier => { 15 | let symbol = self.lookup_variable(ast, node)?; 16 | Ok(self.annotate_location_symbol(ast, node, symbol)) 17 | } 18 | // TODO: Make this into a better error. This would require slicing the source string, 19 | // which we can't do because spans don't store direct byte indices to it at the moment. 20 | _ => Err(self.error(ast, node, ErrorKind::InvalidLocation)), 21 | } 22 | } 23 | 24 | /// Annotates a symbol that refers to a location. 25 | fn annotate_location_symbol( 26 | &mut self, 27 | ast: &mut Ast, 28 | node: NodeId, 29 | symbol: SymbolId, 30 | ) -> TypeLogEntry { 31 | match self.symbols.kind(symbol) { 32 | SymbolKind::Variable(_variable) => { 33 | let typ = self.symbols.type_id(symbol); 34 | ast.convert_to_symbol(node, symbol); 35 | let log = self.annotate(ast, node, typ); 36 | ast.wrap(node, NodeKind::Variable); 37 | log 38 | } 39 | _ => self.error(ast, node, ErrorKind::InvalidLocation), 40 | } 41 | } 42 | 43 | /// Annotates an assignment. 44 | pub(super) fn annotate_assignment( 45 | &mut self, 46 | ast: &mut Ast, 47 | node: NodeId, 48 | context: NodeContext, 49 | ) -> TypeLogResult { 50 | // TODO: Pointers and assigning values to them. 51 | let (left, right) = (ast.first_handle(node), ast.second_handle(node)); 52 | let left_entry = self.annotate_location(ast, left)?; 53 | let left_type = self.log.type_id(left_entry); 54 | let right_entry = self.annotate_node(ast, right, NodeContext::expression_of_type(left_type)); 55 | let right_entry = self.perform_implicit_conversion(ast, node, right_entry, left_type); 56 | let right_type = self.log.type_id(right_entry); 57 | // Check types. 58 | if right_type != left_type { 59 | return Err(self.type_mismatch(ast, node, left_type, right_type)); 60 | } 61 | // Check mutability. 62 | // TODO: This could maybe be moved into a different check, shoving this logic into assignments 63 | // doesn't seem very clean. 64 | let target_is_mutable = match ast.kind(left) { 65 | NodeKind::Variable => { 66 | let symbol = ast.first_handle(left); 67 | let variable = self.symbols.kind(ast.symbol_id(symbol)).unwrap_variable(); 68 | variable.mutability == Mutability::Var 69 | } 70 | _ => false, 71 | }; 72 | if !target_is_mutable { 73 | return Err(self.error(ast, left, ErrorKind::CannotAssignImmutableLocation)); 74 | } 75 | Ok(match context { 76 | NodeContext::Expression(_) => self.annotate(ast, node, left_type), 77 | NodeContext::Statement => self.annotate(ast, node, self.builtin.t_statement), 78 | }) 79 | } 80 | 81 | /// Annotates a variable declaration. 82 | pub(super) fn annotate_variable_declaration( 83 | &mut self, 84 | ast: &mut Ast, 85 | node: NodeId, 86 | ) -> TypeLogResult { 87 | let kind = match ast.kind(node) { 88 | NodeKind::Val => Mutability::Val, 89 | NodeKind::Var => Mutability::Var, 90 | _ => unreachable!(), 91 | }; 92 | let variable = Variable { mutability: kind }; 93 | 94 | // Figure out the name and expected type. This expected type can be `None`, and in that case, 95 | // should be inferred from context. 96 | let left_node = ast.first_handle(node); 97 | let (name_node, expected_type) = match ast.kind(left_node) { 98 | NodeKind::VariableType => { 99 | let name_node = ast.first_handle(left_node); 100 | let type_node = ast.second_handle(left_node); 101 | let typ = self.lookup_type(ast, type_node)?; 102 | (name_node, Some(typ)) 103 | } 104 | _ => (left_node, None), 105 | }; 106 | // Normalize the LHS to the name only. 107 | ast.set_first_handle(node, name_node); 108 | 109 | // Annotate the value. 110 | let value_node = ast.second_handle(node); 111 | let value_log = self.annotate_node(ast, value_node, NodeContext::Expression(expected_type)); 112 | let value_type = self.log.type_id(value_log); 113 | 114 | // Check if the type matches if an explicit type was provided. 115 | let value_type = match expected_type { 116 | Some(typ) => { 117 | if let Some(log) = 118 | self.try_perform_implicit_conversion(ast, value_node, value_type, typ) 119 | { 120 | self.log.type_id(log) 121 | } else { 122 | let expected_name = self.types.name(typ).to_owned(); 123 | let value_name = self.types.name(value_type).to_owned(); 124 | return Err(self.error( 125 | ast, 126 | node, 127 | ErrorKind::TypeMismatch(expected_name, value_name), 128 | )); 129 | } 130 | } 131 | None => value_type, 132 | }; 133 | 134 | // Add to scope. 135 | match ast.kind(name_node) { 136 | NodeKind::Discard => { 137 | // A discarding assignment is converted to an AssignDiscard node containing 138 | // the original value. 139 | ast.convert(node, NodeKind::AssignDiscard); 140 | ast.set_first_handle(node, value_node); 141 | } 142 | NodeKind::Identifier => { 143 | // A simple symbol-binding assignment is converted into a Symbol node. 144 | let name = self.common.get_source_range_from_node(ast, name_node); 145 | let symbol = 146 | self.symbols.create(name, node, value_type, SymbolKind::Variable(variable)); 147 | ast.convert_to_symbol(name_node, symbol); 148 | self.add_to_scope(name, symbol); 149 | // The variable type annotation is less relevant to error reporting than the fact that 150 | // it's a statement. This sounds counterintuitive at first, but note that we're 151 | // requested to annotate the Val/Var node, not the variable name node, so the calling 152 | // function likely expects a statement instead of an expression. 153 | let _ = self.annotate(ast, name_node, value_type); 154 | } 155 | _ => unreachable!(), 156 | } 157 | Ok(self.annotate(ast, node, self.builtin.t_statement)) 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /tsuki-frontend/src/sem_types/lookups.rs: -------------------------------------------------------------------------------- 1 | //! Identifier lookups, generic instantiations, mutability queries. 2 | 3 | use crate::ast::{Ast, NodeId, NodeKind}; 4 | use crate::common::ErrorKind; 5 | use crate::functions::FunctionId; 6 | use crate::scope::{SymbolId, SymbolKind}; 7 | use crate::types::{TypeId, TypeLogEntry}; 8 | 9 | use super::{NodeContext, SemTypes}; 10 | 11 | impl<'s> SemTypes<'s> { 12 | // The difference between `find` and `lookup` is simple: `find` returns an Option, 13 | // `lookup` returns a Result. 14 | 15 | /// Performs a basic identifier search, looking for a symbol whose name matches the identifier 16 | /// stored in the node in the current scope. 17 | fn find_identifier(&self, ast: &Ast, node: NodeId) -> Option { 18 | assert_eq!(ast.kind(node), NodeKind::Identifier); 19 | let name = self.common.get_source_range_from_node(ast, node); 20 | self.scope_stack.lookup(&self.scopes, name) 21 | } 22 | 23 | /// Performs an error-reporting identifier lookup. 24 | fn lookup_identifier(&mut self, ast: &Ast, node: NodeId) -> Result { 25 | self.find_identifier(ast, node).ok_or_else(|| { 26 | let name = self.common.get_source_range_from_node(ast, node); 27 | self.error(ast, node, ErrorKind::UndeclaredSymbol(name.into())) 28 | }) 29 | } 30 | 31 | /// Finds the variable symbol referred to by the given identifier node. 32 | pub(super) fn lookup_variable( 33 | &mut self, 34 | ast: &Ast, 35 | node: NodeId, 36 | ) -> Result { 37 | let symbol = self.lookup_identifier(ast, node)?; 38 | if let SymbolKind::Variable(..) = self.symbols.kind(symbol) { 39 | Ok(symbol) 40 | } else { 41 | // TODO: Make this error not suck. 42 | Err(self.error(ast, node, ErrorKind::InvalidLocation)) 43 | } 44 | } 45 | 46 | /// Finds the function referred to by the given identifier node. 47 | pub(super) fn lookup_function( 48 | &mut self, 49 | ast: &Ast, 50 | node: NodeId, 51 | ) -> Result<(SymbolId, FunctionId), TypeLogEntry> { 52 | let symbol = self.lookup_identifier(ast, node)?; 53 | if let &SymbolKind::Function(id) = self.symbols.kind(symbol) { 54 | Ok((symbol, id)) 55 | } else { 56 | Err(self.error(ast, node, ErrorKind::ExpressionCannotBeCalled)) 57 | } 58 | } 59 | 60 | /// Finds the type symbol referred to by the given node. 61 | /// 62 | /// The node can be any valid type as parsed by the parser. If the type is a generic type, 63 | /// then instantiations will be performed. 64 | pub(super) fn lookup_type(&mut self, ast: &Ast, node: NodeId) -> Result { 65 | match ast.kind(node) { 66 | NodeKind::Identifier => { 67 | let symbol = self.lookup_identifier(ast, node)?; 68 | if let SymbolKind::Type(id) = self.symbols.kind(symbol) { 69 | Ok(*id) 70 | } else { 71 | let name = self.symbols.name(symbol).to_owned(); 72 | Err(self.error(ast, node, ErrorKind::SymbolIsNotAType(name))) 73 | } 74 | } 75 | _ => { 76 | unreachable!("invalid node kind for type") 77 | } 78 | } 79 | } 80 | 81 | /// Adds a symbol to the current scope. 82 | pub(super) fn add_to_scope(&mut self, name: &str, symbol: SymbolId) { 83 | let scope = self.scope_stack.top(); 84 | self.scopes.insert(scope, name, symbol); 85 | } 86 | 87 | /// Annotates a `pub` declaration. 88 | pub(super) fn annotate_pub(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogEntry { 89 | let inner = ast.first_handle(node); 90 | let inner = self.annotate_node(ast, inner, NodeContext::Statement); 91 | let typ = self.log.type_id(inner); 92 | let symbol_id = self.types.kind(typ).as_declaration().unwrap(); 93 | self.scopes.set_public(symbol_id); 94 | ast.unwrap(node); 95 | inner 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /tsuki-frontend/src/sem_types/mod.rs: -------------------------------------------------------------------------------- 1 | //! Semantic analyzer for types. 2 | 3 | mod control_flow; 4 | mod conversions; 5 | mod functions; 6 | mod locations; 7 | mod lookups; 8 | mod operators; 9 | mod pragmas; 10 | mod types; 11 | 12 | use std::path::Path; 13 | 14 | use smallvec::SmallVec; 15 | 16 | use crate::ast::{Ast, NodeId, NodeKind}; 17 | use crate::common::{ErrorKind, Errors}; 18 | use crate::functions::{register_intrinsics, FunctionId, Functions}; 19 | use crate::scope::{ScopeId, ScopeStack, Scopes, SymbolId, Symbols}; 20 | use crate::sem::{SemCommon, SemPass}; 21 | use crate::types::{BuiltinTypes, TypeId, TypeInfo, TypeKind, TypeLog, TypeLogEntry, Types}; 22 | 23 | pub(crate) struct SemTypes<'s> { 24 | common: &'s SemCommon<'s>, 25 | errors: Errors, 26 | 27 | types: &'s mut Types, 28 | log: &'s mut TypeLog, 29 | builtin: &'s BuiltinTypes, 30 | scopes: &'s mut Scopes, 31 | symbols: &'s mut Symbols, 32 | functions: &'s mut Functions, 33 | 34 | scope_stack: ScopeStack, 35 | module_scope: ScopeId, 36 | /// A stack of vectors of nodes to be sem'checked after the module's done being checked. 37 | /// 38 | /// The node ID is used to determine where the given node is placed. The scope of the node's 39 | /// body is determined from the node's metadata. 40 | deferred: SmallVec<[Vec<(NodeId, NodeContext)>; 4]>, 41 | 42 | /// The function that is currently being compiled. 43 | /// `None` if at the top level. 44 | current_function: Option, 45 | } 46 | 47 | /// Values borrowed to `SemTypes`, used during its construction. 48 | pub(crate) struct SemTypesBorrows<'s> { 49 | pub(crate) common: &'s SemCommon<'s>, 50 | pub(crate) types: &'s mut Types, 51 | pub(crate) log: &'s mut TypeLog, 52 | pub(crate) builtin: &'s BuiltinTypes, 53 | pub(crate) scopes: &'s mut Scopes, 54 | pub(crate) symbols: &'s mut Symbols, 55 | pub(crate) functions: &'s mut Functions, 56 | } 57 | 58 | /// Specifies whether a node should be annotated in expression or statement context. 59 | #[derive(Clone, Copy, PartialEq, Eq, Debug)] 60 | enum NodeContext { 61 | Expression( 62 | /// The expected type of the expression. 63 | Option, 64 | ), 65 | Statement, 66 | } 67 | 68 | impl NodeContext { 69 | fn expression() -> Self { 70 | Self::Expression(None) 71 | } 72 | 73 | fn expression_of_type(type_id: TypeId) -> Self { 74 | Self::Expression(Some(type_id)) 75 | } 76 | } 77 | 78 | impl<'s> SemTypes<'s> { 79 | /// Creates a new instance of the `SemTypes` analysis phase. 80 | pub fn new(borrows: SemTypesBorrows<'s>) -> Self { 81 | let SemTypesBorrows { 82 | common, 83 | types, 84 | log, 85 | builtin, 86 | scopes, 87 | symbols, 88 | functions, 89 | } = borrows; 90 | let mut scope_stack = ScopeStack::new(); 91 | // The scope stack is always initialized with a top-level module scope, such that there is 92 | // always a valid scope on top. 93 | let module_scope = scope_stack.push(scopes.create_scope()); 94 | builtin.register_in(scopes, symbols, module_scope); 95 | register_intrinsics(builtin, scopes, symbols, module_scope, functions); 96 | SemTypes { 97 | common, 98 | errors: Errors::new(), 99 | 100 | types, 101 | log, 102 | builtin, 103 | scopes, 104 | symbols, 105 | functions, 106 | 107 | scope_stack, 108 | module_scope, 109 | deferred: SmallVec::new(), 110 | current_function: None, 111 | } 112 | } 113 | 114 | /// Annotates the given AST with the given type, and returns the type. 115 | fn annotate(&mut self, ast: &mut Ast, node: NodeId, typ: TypeId) -> TypeLogEntry { 116 | ast.set_type_id(node, typ); 117 | self.log.push(typ, node) 118 | } 119 | 120 | /// Emits an error of the given kind, also returning the error type. 121 | fn error(&mut self, ast: &Ast, node: NodeId, kind: ErrorKind) -> TypeLogEntry { 122 | self.emit_error(kind, ast.span(node).clone()); 123 | self.log.push(self.builtin.t_error, node) 124 | } 125 | 126 | /// Emits a type mismatch error. 127 | fn type_mismatch( 128 | &mut self, 129 | ast: &Ast, 130 | node: NodeId, 131 | expected: TypeId, 132 | got: TypeId, 133 | ) -> TypeLogEntry { 134 | let expected_name = self.types.name(expected); 135 | let provided_name = self.types.name(got); 136 | let kind = ErrorKind::TypeMismatch(expected_name.to_owned(), provided_name.to_owned()); 137 | self.error(ast, node, kind) 138 | } 139 | 140 | /// Returns whether sem'checking is currently happening in the module scope. 141 | fn is_in_module_scope(&self) -> bool { 142 | self.scope_stack.top() == self.module_scope 143 | } 144 | 145 | /// Pushes a new vector of defers. 146 | fn push_defers(&mut self) { 147 | self.deferred.push(Vec::new()); 148 | } 149 | 150 | /// Pushes a new defer into the current vector of defers. 151 | fn defer(&mut self, node: NodeId, context: NodeContext) { 152 | let defers = self.deferred.last_mut().unwrap(); 153 | defers.push((node, context)); 154 | } 155 | 156 | /// Pops the current vector of defers off, and 157 | fn pop_defers(&mut self, ast: &mut Ast) { 158 | let defers = self.deferred.pop().expect("unbalanced stack of defers"); 159 | for (node, context) in defers { 160 | let scope = ast.scope(node); 161 | if let Some(scope) = scope { 162 | self.scope_stack.push(scope); 163 | } 164 | let _ = self.annotate_node(ast, node, context); 165 | if let Some(_) = scope { 166 | self.scope_stack.pop(); 167 | } 168 | } 169 | } 170 | 171 | /// Creates a new type that represents a declaration. 172 | fn create_declaration_type(&mut self, symbol: SymbolId) -> TypeId { 173 | self.types.create_type(TypeInfo { 174 | name: &format!("declaration({})", symbol.id()), 175 | kind: TypeKind::Declaration(symbol), 176 | }) 177 | } 178 | 179 | /// Annotates a literal with a concrete type. 180 | fn annotate_literal(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogEntry { 181 | let typ = match ast.kind(node) { 182 | NodeKind::True => self.builtin.t_bool, 183 | NodeKind::False => self.builtin.t_bool, 184 | NodeKind::Uint8 => self.builtin.t_uint8, 185 | NodeKind::Uint16 => self.builtin.t_uint16, 186 | NodeKind::Uint32 => self.builtin.t_uint32, 187 | NodeKind::Uint64 => self.builtin.t_uint64, 188 | NodeKind::Int8 => self.builtin.t_int8, 189 | NodeKind::Int16 => self.builtin.t_int16, 190 | NodeKind::Int32 => self.builtin.t_int32, 191 | NodeKind::Int64 => self.builtin.t_int64, 192 | NodeKind::Float32 => self.builtin.t_float32, 193 | NodeKind::Float64 => self.builtin.t_float64, 194 | NodeKind::Character => self.builtin.t_char, 195 | _ => unreachable!(), 196 | }; 197 | self.annotate(ast, node, typ) 198 | } 199 | 200 | /// Annotates statements in a list of statements. 201 | fn annotate_statement_list( 202 | &mut self, 203 | ast: &mut Ast, 204 | node: NodeId, 205 | context: NodeContext, 206 | ) -> TypeLogEntry { 207 | let mut last_log = None; 208 | ast.walk_node_list_mut(node, |ast, index, child| { 209 | // Trailing expressions in expression context statement lists get special treatment. 210 | // They are resulting expressions of the these statement lists, and thus get analyzed as 211 | // proper expressions rather than statements. They are also not subject to triggering the 212 | // UnusedValue error, as the resulting value _is_ actually used - it's the result of the 213 | // statement list. 214 | let is_last = ast.is_last_child(node, index); 215 | let log_entry = self.annotate_node( 216 | ast, 217 | child, 218 | if is_last { 219 | context 220 | } else { 221 | NodeContext::Statement 222 | }, 223 | ); 224 | let typ = self.log.type_id(log_entry); 225 | let type_kind = self.types.kind(typ); 226 | // For expressions, we have some special cases. 227 | if !type_kind.is_statement() { 228 | if is_last { 229 | // Trailing expressions get assigned to the `last_log`, so that we know what the 230 | // result of the statement list is. 231 | last_log = Some(log_entry); 232 | } else { 233 | // Other expressions are unused, which is invalid. 234 | self.emit_error(ErrorKind::UnusedValue, ast.span(child).clone()); 235 | } 236 | } 237 | }); 238 | // Statement lists in expression context must always have a trailing expression. 239 | if let NodeContext::Expression(expected_type) = context { 240 | if let Some(last_log) = last_log { 241 | if let Some(expected_type) = expected_type { 242 | self.perform_implicit_conversion(ast, node, last_log, expected_type) 243 | } else { 244 | last_log 245 | } 246 | } else { 247 | self.error(ast, node, ErrorKind::MissingResult) 248 | } 249 | } else { 250 | self.annotate(ast, node, self.builtin.t_statement) 251 | } 252 | } 253 | 254 | /// Annotates the given AST node. 255 | fn annotate_node(&mut self, ast: &mut Ast, node: NodeId, context: NodeContext) -> TypeLogEntry { 256 | let log = match ast.kind(node) { 257 | // Literals 258 | | NodeKind::True 259 | | NodeKind::False 260 | | NodeKind::Uint8 261 | | NodeKind::Uint16 262 | | NodeKind::Uint32 263 | | NodeKind::Uint64 264 | | NodeKind::Int8 265 | | NodeKind::Int16 266 | | NodeKind::Int32 267 | | NodeKind::Int64 268 | | NodeKind::Float32 269 | | NodeKind::Float64 270 | | NodeKind::Character => self.annotate_literal(ast, node), 271 | 272 | // Locations 273 | NodeKind::Identifier => self.annotate_location(ast, node).into(), 274 | 275 | // Unary operators 276 | // --- 277 | // The following operators were omitted from the generic rule: 278 | // NodeKind::Member - magic for field access in self 279 | // NodeKind::Ref - magic for creating pointers 280 | // NodeKind::Deref - magic for dereferencing 281 | NodeKind::Not | NodeKind::Neg | NodeKind::BitNot => { 282 | self.annotate_unary_operator(ast, node) 283 | } 284 | 285 | // Binary operators 286 | // --- 287 | // The following kinds were omitted from the generic rule: 288 | // NodeKind::Dot - magic for field access 289 | | NodeKind::Plus 290 | | NodeKind::Minus 291 | | NodeKind::Mul 292 | | NodeKind::Div 293 | | NodeKind::Equal 294 | | NodeKind::NotEqual 295 | | NodeKind::Less 296 | | NodeKind::LessEqual 297 | | NodeKind::Greater 298 | | NodeKind::GreaterEqual => self.annotate_binary_operator(ast, node), 299 | NodeKind::Call => self.annotate_call(ast, node).into(), 300 | NodeKind::Assign => self.annotate_assignment(ast, node, context).into(), 301 | // Other operators are to be implemented later. 302 | 303 | // Control flow 304 | NodeKind::StatementList => self.annotate_statement_list(ast, node, context), 305 | NodeKind::Pass => self.annotate_pass(ast, node), 306 | NodeKind::Do => self.annotate_do(ast, node, context), 307 | NodeKind::If => self.annotate_if(ast, node, context), 308 | NodeKind::While => self.annotate_while(ast, node), 309 | NodeKind::Break => self.annotate_break(ast, node), 310 | NodeKind::Return => self.annotate_return(ast, node), 311 | 312 | // Declarations 313 | NodeKind::Val | NodeKind::Var => self.annotate_variable_declaration(ast, node).into(), 314 | NodeKind::Fun => self.annotate_function_declaration(ast, node).into(), 315 | NodeKind::Type => self.annotate_type_alias(ast, node).into(), 316 | NodeKind::Pub => self.annotate_pub(ast, node), 317 | 318 | // Other nodes are invalid (or not implemented yet). 319 | other => self.error(ast, node, ErrorKind::SemTypesInvalidAstNode(other)), 320 | }; 321 | 322 | // In case the node's context is an expression with some return type provided, perform 323 | // implicit conversions such that the node's type matches the expected type. 324 | let log = if let NodeContext::Expression(Some(expected_type)) = context { 325 | self.perform_implicit_conversion(ast, node, log, expected_type) 326 | } else { 327 | log 328 | }; 329 | 330 | log 331 | } 332 | } 333 | 334 | impl SemPass for SemTypes<'_> { 335 | type Result = TypeLogEntry; 336 | 337 | /// Performs type analysis for the given AST node. This annotates the node with a concrete type. 338 | fn analyze(&mut self, mut ast: Ast, root_node: NodeId) -> Ast { 339 | self.push_defers(); 340 | let _ = self.annotate_node(&mut ast, root_node, NodeContext::Statement); 341 | self.pop_defers(&mut ast); 342 | ast 343 | } 344 | 345 | fn filename(&self) -> &Path { 346 | &self.common.file.path 347 | } 348 | 349 | fn errors(&self) -> &Errors { 350 | &self.errors 351 | } 352 | 353 | fn errors_mut(&mut self) -> &mut Errors { 354 | &mut self.errors 355 | } 356 | 357 | fn into_errors(self) -> Errors { 358 | self.errors 359 | } 360 | } 361 | -------------------------------------------------------------------------------- /tsuki-frontend/src/sem_types/operators.rs: -------------------------------------------------------------------------------- 1 | //! Semantic analysis for operators and compiler intrinsics. 2 | 3 | use crate::ast::{Ast, NodeId, NodeKind}; 4 | use crate::common::ErrorKind; 5 | use crate::types::TypeLogEntry; 6 | 7 | use super::{NodeContext, SemTypes}; 8 | 9 | impl<'s> SemTypes<'s> { 10 | // Currently, this does some rather simplistic analysis just to Make it Work™, but in the 11 | // future when operators will be lowered to trait instance function calls, this will be 12 | // replaced by much simpler logic and compiler intrinsics inside the stdlib. 13 | 14 | /// Annotates a unary operator with types. 15 | pub(super) fn annotate_unary_operator(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogEntry { 16 | let log_entry = self.annotate_node(ast, ast.first_handle(node), NodeContext::expression()); 17 | let right = self.log.type_id(log_entry); 18 | let right_kind = self.types.kind(right); 19 | let typ = match ast.kind(node) { 20 | NodeKind::Not if right == self.builtin.t_bool => right, 21 | NodeKind::BitNot if right_kind.is_integer() => right, 22 | NodeKind::Neg if right_kind.is_numeric() => right, 23 | _ => { 24 | let right_name = self.types.name(right); 25 | let kind = ErrorKind::InvalidUnaryOperator(right_name.into()); 26 | return self.error(ast, node, kind); 27 | } 28 | }; 29 | self.annotate(ast, node, typ) 30 | } 31 | 32 | /// Annotates a binary operator with types. 33 | pub(super) fn annotate_binary_operator(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogEntry { 34 | let (left, right) = (ast.first_handle(node), ast.second_handle(node)); 35 | let left_entry = self.annotate_node(ast, left, NodeContext::expression()); 36 | let left_type = self.log.type_id(left_entry); 37 | let right_entry = self.annotate_node(ast, right, NodeContext::expression_of_type(left_type)); 38 | let right_type = self.log.type_id(right_entry); 39 | let conversion = self.try_perform_implicit_conversion(ast, right, right_type, left_type); 40 | let left_type_kind = self.types.kind(left_type); 41 | let typ = match ast.kind(node) { 42 | // Arithmetic operators always evaluate to the same type as the LHS. 43 | NodeKind::Plus | NodeKind::Minus | NodeKind::Mul | NodeKind::Div 44 | if conversion.is_some() => 45 | { 46 | left_type 47 | } 48 | 49 | // Comparison operators always evaluate to `Bool`. 50 | NodeKind::Equal | NodeKind::NotEqual 51 | if conversion.is_some() && left_type_kind.is_bool() => 52 | { 53 | self.builtin.t_bool 54 | } 55 | | NodeKind::Equal 56 | | NodeKind::NotEqual 57 | | NodeKind::Less 58 | | NodeKind::LessEqual 59 | | NodeKind::Greater 60 | | NodeKind::GreaterEqual 61 | if conversion.is_some() && left_type_kind.is_numeric() => 62 | { 63 | self.builtin.t_bool 64 | } 65 | 66 | // Other operators, and failed conversions, raise a type mismatch error. 67 | _ => { 68 | return self.type_mismatch(ast, node, left_type, right_type); 69 | } 70 | }; 71 | self.annotate(ast, node, typ) 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /tsuki-frontend/src/sem_types/pragmas.rs: -------------------------------------------------------------------------------- 1 | //! Handling for pragma AST. 2 | 3 | use crate::ast::{Ast, NodeId, NodeKind}; 4 | 5 | use super::SemTypes; 6 | 7 | impl<'s> SemTypes<'s> { 8 | /// Splits a node that may have pragmas attached to it, to the inner part (first tuple field), 9 | /// and the pragmas (second tuple field). 10 | pub(crate) fn split_pragmas(ast: &Ast, node: NodeId) -> (NodeId, Option) { 11 | if ast.kind(node) == NodeKind::Pragmas { 12 | (ast.first_handle(node), Some(node)) 13 | } else { 14 | (node, None) 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /tsuki-frontend/src/sem_types/types.rs: -------------------------------------------------------------------------------- 1 | //! Type declarations. 2 | 3 | use crate::ast::{Ast, NodeId, NodeKind}; 4 | use crate::common::ErrorKind; 5 | use crate::scope::{SymbolId, SymbolKind}; 6 | use crate::types::{TypeInfo, TypeKind, TypeLogEntry, TypeLogResult}; 7 | 8 | use super::SemTypes; 9 | 10 | impl<'s> SemTypes<'s> { 11 | /// Annotates the AST for a type alias declaration. 12 | pub(super) fn annotate_type_alias(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogResult { 13 | let (lhs, rhs) = (ast.first_handle(node), ast.second_handle(node)); 14 | let (constrained_type, pragmas) = Self::split_pragmas(ast, lhs); 15 | 16 | // TODO: generic parameters 17 | let type_name = ast.first_handle(constrained_type); 18 | let name_identifier = ast.first_handle(type_name); 19 | let name = self.common.get_source_range_from_node(ast, name_identifier); 20 | 21 | // Interpret the right-hand side. 22 | let mut aliased_type = if rhs != NodeId::null() { 23 | // TODO: Check constraints. 24 | let underlying_type = self.lookup_type(ast, rhs)?; 25 | let alias = self.types.create_type(TypeInfo { 26 | name, 27 | kind: TypeKind::Alias(underlying_type), 28 | }); 29 | let symbol = self.symbols.create(name, node, self.builtin.t_type, SymbolKind::Type(alias)); 30 | Some(symbol) 31 | } else { 32 | None 33 | }; 34 | 35 | // Interpret pragmas. 36 | if let Some(pragmas) = pragmas { 37 | for i in 0..ast.extra(pragmas).as_node_list().unwrap().len() { 38 | let pragma = ast.extra(pragmas).as_node_list().unwrap()[i]; 39 | aliased_type = self.type_alias_pragma(ast, pragma, aliased_type)?; 40 | } 41 | } 42 | 43 | // Unwrap the resulting type. 44 | let aliased_type = 45 | aliased_type.ok_or_else(|| self.error(ast, node, ErrorKind::EmptyTypeAlias))?; 46 | // Add the alias to scope. 47 | self.add_to_scope(name, aliased_type); 48 | 49 | let declaration_type = self.create_declaration_type(aliased_type); 50 | Ok(self.annotate(ast, node, declaration_type)) 51 | } 52 | 53 | /// Interprets a pragma for a type alias declaration. 54 | fn type_alias_pragma( 55 | &mut self, 56 | ast: &mut Ast, 57 | pragma: NodeId, 58 | #[allow(unused)] mut aliased_type: Option, 59 | ) -> Result, TypeLogEntry> { 60 | let name_identifier = ast.first_handle(pragma); 61 | let name = self.common.get_source_range_from_node(ast, name_identifier); 62 | match name { 63 | "compiler_builtin_type" => { 64 | aliased_type = Some(self.pragma_compiler_builtin_type(ast, pragma)?); 65 | } 66 | other => return Err(self.error(ast, pragma, ErrorKind::UnknownPragma(other.into()))), 67 | } 68 | Ok(aliased_type) 69 | } 70 | 71 | /// Raises an error if a pragma does not have the provided number of arguments. 72 | fn pragma_expect_arguments( 73 | &mut self, 74 | ast: &Ast, 75 | pragma: NodeId, 76 | count: usize, 77 | ) -> Result<(), TypeLogEntry> { 78 | let nodes = ast.extra(pragma).as_node_list().unwrap(); 79 | if nodes.len() != 1 { 80 | return Err(self.error( 81 | ast, 82 | pragma, 83 | ErrorKind::NArgumentsExpected(count, nodes.len()), 84 | )); 85 | } 86 | Ok(()) 87 | } 88 | 89 | /// Handles the `compiler_builtin_type` pragma: creates a new symbol for a built-in type. 90 | fn pragma_compiler_builtin_type( 91 | &mut self, 92 | ast: &mut Ast, 93 | pragma: NodeId, 94 | ) -> Result { 95 | let nodes = ast.extra(pragma).as_node_list().unwrap(); 96 | self.pragma_expect_arguments(ast, pragma, 1)?; 97 | let name_node = nodes[0]; 98 | if ast.kind(name_node) != NodeKind::Atom { 99 | return Err(self.error(ast, name_node, ErrorKind::InvalidBuiltinTypeName)); 100 | } 101 | let name = self.common.get_source_range_from_node(ast, name_node); 102 | let typ = match name { 103 | "noreturn" => self.builtin.t_noreturn, 104 | "bool" => self.builtin.t_bool, 105 | "uint8" => self.builtin.t_uint8, 106 | "uint16" => self.builtin.t_uint16, 107 | "uint32" => self.builtin.t_uint32, 108 | "uint64" => self.builtin.t_uint64, 109 | "int8" => self.builtin.t_int8, 110 | "int16" => self.builtin.t_int16, 111 | "int32" => self.builtin.t_int32, 112 | "int64" => self.builtin.t_int64, 113 | "float32" => self.builtin.t_float32, 114 | "float64" => self.builtin.t_float64, 115 | "size" => self.builtin.t_size, 116 | _ => return Err(self.error(ast, name_node, ErrorKind::InvalidBuiltinTypeName)), 117 | }; 118 | let symbol = self.symbols.create( 119 | self.types.name(typ), 120 | pragma, 121 | self.builtin.t_type, 122 | SymbolKind::Type(typ), 123 | ); 124 | Ok(symbol) 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /tsuki-frontend/src/types.rs: -------------------------------------------------------------------------------- 1 | //! Storage and logging of types. 2 | 3 | // Note: Because `type` is a keyword in Rust, sometimes a truncated form `typ` is used to prevent 4 | // conflicts. 5 | 6 | use std::cmp::Ordering; 7 | use std::ops::Range; 8 | 9 | use crate::ast::NodeId; 10 | use crate::scope::{ScopeId, Scopes, SymbolId, SymbolKind, Symbols}; 11 | 12 | /// Data-oriented type storage. 13 | pub struct Types { 14 | names: Vec>, 15 | kinds: Vec, 16 | 17 | name_data: String, 18 | } 19 | 20 | /// A unique ID representing a type. 21 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 22 | pub struct TypeId(usize); 23 | 24 | impl TypeId { 25 | /// Returns the null type ID, which maps to an error type. 26 | pub fn null() -> TypeId { 27 | TypeId(0) 28 | } 29 | } 30 | 31 | /// Structure containing data for creating a type. 32 | pub struct TypeInfo<'n> { 33 | pub name: &'n str, 34 | pub kind: TypeKind, 35 | } 36 | 37 | impl Types { 38 | /// Creates a new, empty type registry. 39 | pub fn new() -> Self { 40 | let mut types = Self { 41 | names: Vec::new(), 42 | kinds: Vec::new(), 43 | name_data: String::new(), 44 | }; 45 | // Ensure the null slot is populated by the missing type. 46 | let _ = types.create_type(TypeInfo { 47 | name: "missingtype", 48 | kind: TypeKind::Missing, 49 | }); 50 | types 51 | } 52 | 53 | /// Creates a new type with the given type info. 54 | #[must_use] 55 | pub fn create_type(&mut self, info: TypeInfo<'_>) -> TypeId { 56 | let id = self.names.len(); 57 | let name_range = self.add_name(info.name); 58 | self.names.push(name_range); 59 | self.kinds.push(info.kind); 60 | TypeId(id) 61 | } 62 | 63 | /// Returns the name of the type. 64 | pub fn name(&self, typ: TypeId) -> &str { 65 | &self.name_data[self.names[typ.0].clone()] 66 | } 67 | 68 | /// Returns the kind of the given type. 69 | pub fn kind(&self, typ: TypeId) -> &TypeKind { 70 | &self.kinds[typ.0] 71 | } 72 | 73 | /// Adds a name into the local `name_data` storage. 74 | fn add_name(&mut self, name: &str) -> Range { 75 | let start = self.name_data.len(); 76 | self.name_data.push_str(name); 77 | let end = self.name_data.len(); 78 | start..end 79 | } 80 | } 81 | 82 | /// The kind of a type. 83 | pub enum TypeKind { 84 | /// The missing type is assigned to all nodes that don't get a type assigned explicitly. 85 | Missing, 86 | /// The error type is returned when type analysis fails for an AST node. 87 | Error, 88 | /// The statement type is assigned to AST nodes that do not return a value, such as loops. 89 | Statement, 90 | /// The declaration type is assigned to AST nodes that introduce a symbol into scope. 91 | Declaration(SymbolId), 92 | /// `type` is the type of all type symbols. It can't be instantiated by user code. 93 | Type, 94 | /// The unit type is a type with a single value `()`. It is the default return type for 95 | /// functions. 96 | Unit, 97 | /// The NoReturn type is assigned to expressions that do not return to the parent expression, 98 | /// eg. `return` expressions. Certain built-in functions also return `NoReturn`. 99 | /// It is implicitly convertible to any other type. 100 | NoReturn, 101 | // The rest of the primitive types is quite self-explanatory. 102 | Bool, 103 | Integer(IntegerSize), 104 | Float(FloatSize), 105 | Char, 106 | 107 | /// An alias for the type of the given ID. 108 | Alias(TypeId), 109 | } 110 | 111 | impl TypeKind { 112 | /// Returns whether the type kind represents an invalid type. 113 | pub fn is_invalid(&self) -> bool { 114 | matches!(self, TypeKind::Missing | TypeKind::Error) 115 | } 116 | 117 | /// Returns whether the type kind is the `NoReturn` type. 118 | pub fn is_noreturn(&self) -> bool { 119 | matches!(self, TypeKind::NoReturn) 120 | } 121 | 122 | /// Returns whether the type kind is the unit type. 123 | pub fn is_unit(&self) -> bool { 124 | matches!(self, TypeKind::Unit) 125 | } 126 | 127 | /// Returns whether the type kind represents the `Bool` type. 128 | pub fn is_bool(&self) -> bool { 129 | matches!(self, TypeKind::Bool) 130 | } 131 | 132 | /// Returns whether the type kind represents an integer type. 133 | pub fn is_integer(&self) -> bool { 134 | matches!(self, TypeKind::Integer(..)) 135 | } 136 | 137 | /// Returns whether the type kind represents a float type. 138 | pub fn is_float(&self) -> bool { 139 | matches!(self, TypeKind::Float(..)) 140 | } 141 | 142 | /// Returns whether the type kind represents a numeric (integer or float) type. 143 | pub fn is_numeric(&self) -> bool { 144 | self.is_integer() || self.is_float() 145 | } 146 | 147 | /// Returns whether the type kind is for a type that's valid when used as a statement. 148 | pub fn is_statement(&self) -> bool { 149 | self.is_invalid() 150 | || matches!( 151 | self, 152 | TypeKind::Statement | TypeKind::Declaration(_) | TypeKind::Unit | TypeKind::NoReturn 153 | ) 154 | } 155 | 156 | /// Unwraps the integer size stored in the type kind, panics if the kind is not an integer. 157 | pub fn unwrap_integer(&self) -> IntegerSize { 158 | match self { 159 | TypeKind::Integer(size) => *size, 160 | _ => panic!("unwrap_integer called on a type kind that is not an integer"), 161 | } 162 | } 163 | 164 | /// Unwraps the float size stored in the type kind, panics if the kind is not an float. 165 | pub fn unwrap_float(&self) -> FloatSize { 166 | match self { 167 | TypeKind::Float(size) => *size, 168 | _ => panic!("unwrap_float called on a type kind that is not a float"), 169 | } 170 | } 171 | 172 | /// Returns `Some(symbol_id)` if the type kind represents a declaration, or `None` if it doesn't. 173 | pub fn as_declaration(&self) -> Option { 174 | if let Self::Declaration(v) = self { 175 | Some(*v) 176 | } else { 177 | None 178 | } 179 | } 180 | } 181 | 182 | /// The size of an integer. `S` sizes are signed, `U` sizes are unsigned. 183 | #[derive(Clone, Copy, PartialEq, Eq)] 184 | #[repr(u8)] 185 | pub enum IntegerSize { 186 | U8, 187 | U16, 188 | U32, 189 | U64, 190 | S8, 191 | S16, 192 | S32, 193 | S64, 194 | } 195 | 196 | impl IntegerSize { 197 | /// Returns whether the size represents an unsigned integer. 198 | pub fn is_unsigned(self) -> bool { 199 | // Can't use Self here? 200 | use IntegerSize::*; 201 | matches!(self, U8 | U16 | U32 | U64) 202 | } 203 | 204 | /// Returns whether the size represents a signed integer. 205 | pub fn is_signed(self) -> bool { 206 | !self.is_unsigned() 207 | } 208 | } 209 | 210 | impl PartialOrd for IntegerSize { 211 | /// Compares two integer sizes. 212 | fn partial_cmp(&self, other: &Self) -> Option { 213 | if self.is_signed() && other.is_unsigned() { 214 | None 215 | } else { 216 | (*self as u8).partial_cmp(&(*other as u8)) 217 | } 218 | } 219 | } 220 | 221 | /// The size of a float. 222 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] 223 | pub enum FloatSize { 224 | S32, 225 | S64, 226 | } 227 | 228 | /// Configuration for "sensible default" types: `Int`, `Float`, and `Size`. 229 | pub struct DefaultTypes { 230 | pub int_width: IntegerSize, 231 | pub float_width: FloatSize, 232 | pub size_width: IntegerSize, 233 | } 234 | 235 | /// A struct containing all the built-in types. 236 | pub struct BuiltinTypes { 237 | // Special 238 | pub t_error: TypeId, 239 | pub t_unit: TypeId, 240 | pub t_noreturn: TypeId, 241 | pub t_statement: TypeId, 242 | pub t_type: TypeId, 243 | 244 | // Boolean 245 | pub t_bool: TypeId, 246 | 247 | // Integers 248 | pub t_uint8: TypeId, 249 | pub t_uint16: TypeId, 250 | pub t_uint32: TypeId, 251 | pub t_uint64: TypeId, 252 | pub t_int8: TypeId, 253 | pub t_int16: TypeId, 254 | pub t_int32: TypeId, 255 | pub t_int64: TypeId, 256 | 257 | // Floats 258 | pub t_float32: TypeId, 259 | pub t_float64: TypeId, 260 | 261 | // Int/Float aliases 262 | // NOTE: These will later be implemented in the standard library and will be configurable with 263 | // compiler switches. 264 | pub t_int: TypeId, 265 | pub t_float: TypeId, 266 | pub t_size: TypeId, 267 | 268 | // Characters 269 | pub t_char: TypeId, 270 | } 271 | 272 | impl BuiltinTypes { 273 | /// Adds all the built-in types to the given `Types` and returns them. 274 | pub fn add_to(types: &mut Types, default_types: &DefaultTypes) -> Self { 275 | let t_error = types.create_type(TypeInfo { 276 | // NOTE: Maybe look for better names than this? 277 | // Just like `statement`, the name is lowercase, but users may think that the occurrence 278 | // of errortype is an error in the compiler. Of course, it's not. 279 | // Maybe we should "unwrap" error types somehow, so that we never report errors 280 | // containing them? 281 | name: "errortype", 282 | kind: TypeKind::Error, 283 | }); 284 | let t_uint8 = types.create_type(TypeInfo { 285 | name: "Uint8", 286 | kind: TypeKind::Integer(IntegerSize::U8), 287 | }); 288 | let t_uint16 = types.create_type(TypeInfo { 289 | name: "Uint16", 290 | kind: TypeKind::Integer(IntegerSize::U16), 291 | }); 292 | let t_uint32 = types.create_type(TypeInfo { 293 | name: "Uint32", 294 | kind: TypeKind::Integer(IntegerSize::U32), 295 | }); 296 | let t_uint64 = types.create_type(TypeInfo { 297 | name: "Uint64", 298 | kind: TypeKind::Integer(IntegerSize::U64), 299 | }); 300 | let t_int8 = types.create_type(TypeInfo { 301 | name: "Int8", 302 | kind: TypeKind::Integer(IntegerSize::S8), 303 | }); 304 | let t_int16 = types.create_type(TypeInfo { 305 | name: "Int16", 306 | kind: TypeKind::Integer(IntegerSize::S16), 307 | }); 308 | let t_int32 = types.create_type(TypeInfo { 309 | name: "Int32", 310 | kind: TypeKind::Integer(IntegerSize::S32), 311 | }); 312 | let t_int64 = types.create_type(TypeInfo { 313 | name: "Int64", 314 | kind: TypeKind::Integer(IntegerSize::S64), 315 | }); 316 | let t_float32 = types.create_type(TypeInfo { 317 | name: "Float32", 318 | kind: TypeKind::Float(FloatSize::S32), 319 | }); 320 | let t_float64 = types.create_type(TypeInfo { 321 | name: "Float64", 322 | kind: TypeKind::Float(FloatSize::S64), 323 | }); 324 | Self { 325 | t_error, 326 | t_unit: types.create_type(TypeInfo { 327 | name: "()", 328 | kind: TypeKind::Unit, 329 | }), 330 | t_noreturn: types.create_type(TypeInfo { 331 | name: "NoReturn", 332 | kind: TypeKind::NoReturn, 333 | }), 334 | // Unlike all other types, the `statement` type is lowercase. This should let users know 335 | // that the "expression" in question isn't an expression after all. 336 | t_statement: types.create_type(TypeInfo { 337 | name: "statement", 338 | kind: TypeKind::Statement, 339 | }), 340 | t_type: types.create_type(TypeInfo { 341 | name: "type", 342 | kind: TypeKind::Type, 343 | }), 344 | t_bool: types.create_type(TypeInfo { 345 | name: "Bool", 346 | kind: TypeKind::Bool, 347 | }), 348 | t_uint8, 349 | t_uint16, 350 | t_uint32, 351 | t_uint64, 352 | t_int8, 353 | t_int16, 354 | t_int32, 355 | t_int64, 356 | t_float32, 357 | t_float64, 358 | 359 | t_int: match default_types.int_width { 360 | IntegerSize::S8 => t_int8, 361 | IntegerSize::S16 => t_int16, 362 | IntegerSize::S32 => t_int32, 363 | IntegerSize::S64 => t_int64, 364 | _ => panic!("int_size must be signed"), 365 | }, 366 | t_float: match default_types.float_width { 367 | FloatSize::S32 => t_float32, 368 | FloatSize::S64 => t_float64, 369 | }, 370 | t_size: match default_types.size_width { 371 | IntegerSize::U8 => t_uint8, 372 | IntegerSize::U16 => t_uint16, 373 | IntegerSize::U32 => t_uint32, 374 | IntegerSize::U64 => t_uint64, 375 | _ => panic!("index_size must be unsigned"), 376 | }, 377 | t_char: types.create_type(TypeInfo { 378 | name: "Char", 379 | kind: TypeKind::Char, 380 | }), 381 | } 382 | } 383 | 384 | /// Registers named built-in types in the given scope. 385 | /// 386 | /// TODO: Remove this in favor of the stdlib declaring the types in the prelude. 387 | pub(crate) fn register_in(&self, scopes: &mut Scopes, symbols: &mut Symbols, scope: ScopeId) { 388 | macro_rules! add_type { 389 | ($field:tt, $name:tt) => { 390 | let symbol = symbols.create( 391 | $name, 392 | NodeId::null(), 393 | self.t_type, 394 | SymbolKind::Type(self.$field), 395 | ); 396 | scopes.insert(scope, symbols.name(symbol), symbol); 397 | }; 398 | } 399 | 400 | add_type!(t_noreturn, "NoReturn"); 401 | add_type!(t_bool, "Bool"); 402 | add_type!(t_uint8, "Uint8"); 403 | add_type!(t_uint16, "Uint16"); 404 | add_type!(t_uint32, "Uint32"); 405 | add_type!(t_uint64, "Uint64"); 406 | add_type!(t_int8, "Int8"); 407 | add_type!(t_int16, "Int16"); 408 | add_type!(t_int32, "Int32"); 409 | add_type!(t_int64, "Int64"); 410 | add_type!(t_float32, "Float32"); 411 | add_type!(t_float64, "Float64"); 412 | add_type!(t_char, "Char"); 413 | 414 | add_type!(t_int, "Int"); 415 | add_type!(t_float, "Float"); 416 | add_type!(t_size, "Size"); 417 | } 418 | } 419 | 420 | /// A unique ID identifying an entry in the type log. 421 | #[derive(Clone, Copy, Debug)] 422 | #[must_use] 423 | pub struct TypeLogEntry(usize); 424 | 425 | /// An alias for a result storing a log for either a valid or an erroneous type usage. 426 | pub type TypeLogResult = Result; 427 | 428 | impl From for TypeLogEntry { 429 | /// Unwraps a successful or erroneous type log from a result. 430 | /// 431 | /// This is used to simplify returning from functions when analysis errors occur. 432 | fn from(result: Result) -> Self { 433 | match result { 434 | Ok(entry) | Err(entry) => entry, 435 | } 436 | } 437 | } 438 | 439 | /// A log storing the AST nodes from which different instances of types came from. 440 | pub struct TypeLog { 441 | types: Vec, 442 | nodes: Vec, 443 | } 444 | 445 | impl TypeLog { 446 | /// Constructs a new type log. 447 | pub fn new() -> Self { 448 | Self { 449 | types: Vec::new(), 450 | nodes: Vec::new(), 451 | } 452 | } 453 | 454 | /// Inserts a new type into the log and returns its handle. 455 | pub fn push(&mut self, typ: TypeId, node: NodeId) -> TypeLogEntry { 456 | let id = self.types.len(); 457 | self.types.push(typ); 458 | self.nodes.push(node); 459 | TypeLogEntry(id) 460 | } 461 | 462 | /// Returns the type stored in the log entry. 463 | pub fn type_id(&self, entry: TypeLogEntry) -> TypeId { 464 | self.types[entry.0] 465 | } 466 | 467 | /// Returns the source node stored in the log entry. 468 | pub fn node(&self, entry: TypeLogEntry) -> NodeId { 469 | self.nodes[entry.0] 470 | } 471 | } 472 | --------------------------------------------------------------------------------