├── .editorconfig
├── .gitattributes
├── .gitignore
├── Cargo.lock
├── Cargo.toml
├── LICENSE
├── README.md
├── code
    ├── functions.tsu
    ├── if_expression.tsu
    ├── printing.tsu
    ├── variables.tsu
    └── while_loop.tsu
├── docs
    ├── README.md
    ├── macros.md
    ├── mangling.md
    └── pragmas.md
├── rustfmt.toml
├── spec.md
├── src
    └── main.rs
├── std
    └── kernel.tsu
├── tsuki-backend-llvm
    ├── Cargo.toml
    └── src
    │   ├── codegen.rs
    │   ├── control_flow.rs
    │   ├── expressions.rs
    │   ├── functions.rs
    │   ├── lib.rs
    │   ├── libc.rs
    │   ├── types.rs
    │   └── variables.rs
└── tsuki-frontend
    ├── Cargo.toml
    └── src
        ├── ast.rs
        ├── astdump.rs
        ├── backend.rs
        ├── common.rs
        ├── functions.rs
        ├── lexer.rs
        ├── lib.rs
        ├── parser.rs
        ├── scope.rs
        ├── sem.rs
        ├── sem_literals.rs
        ├── sem_types
            ├── control_flow.rs
            ├── conversions.rs
            ├── functions.rs
            ├── locations.rs
            ├── lookups.rs
            ├── mod.rs
            ├── operators.rs
            ├── pragmas.rs
            └── types.rs
        └── types.rs


/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | end_of_line = lf
 5 | 
 6 | [*.{rs,tsu}]
 7 | indent_size = 3
 8 | indent_style = space
 9 | max_line_length = 100
10 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=lf
2 | 
3 | *.rs text
4 | *.toml text
5 | .editorconfig text
6 | 
7 | .lite_project.lua -linguist-detectable
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .lite_workspace.lua
 2 | .vscode
 3 | *.md.backup
 4 | bin
 5 | 
 6 | 
 7 | # Added by cargo
 8 | 
 9 | /target
10 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "tsuki"
 3 | version = "0.1.0"
 4 | authors = ["lqdev <liquidekgaming@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [workspace]
 8 | members = [
 9 |   "tsuki-frontend",
10 |   "tsuki-backend-llvm",
11 | ]
12 | 
13 | [dependencies]
14 | tsuki-frontend = { path = "tsuki-frontend" }
15 | tsuki-backend-llvm = { path = "tsuki-backend-llvm" }
16 | 
17 | structopt = "0.3.22"
18 | 
19 | [profile.release]
20 | lto = true
21 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 liquidev
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # tsuki
 2 | 
 3 | A programming language that focuses on being fun to program in, and aiding developers in writing more robust software, all while maintaining high performance.
 4 | 
 5 | The compiler is still in its infancy, and it'll probably take me a while before it's actually usable. In the meantime, you can check out the [spec](spec.md), which lays out the general feature set and vision of the language.
 6 | 
 7 | ## Compiling
 8 | 
 9 | Right now compiling tsuki isn't exactly the most trivial of tasks, and Windows is not yet supported.
10 | 
11 | **Step 0.** Install a C (and C++) compiler.
12 | 
13 | tsuki depends on libc and uses whatever C compiler is available on the system as `cc` to link executables. This can be overridden using the `$TSUKI_CC` or `$CC` environment variables, in that order of priority. The C++ compiler is necessary to build LLVM.
14 | 
15 | **Step 1.** Compile LLVM 12.
16 | 
17 | The best way to get LLVM for tsuki is to build it manually. I had pretty bad experiences with using repository LLVM, with problems ranging from missing static libraries on Ubuntu, no `llvm-config` on Windows, to random SIGILLs after a month of hiatus on Arch.
18 | 
19 | So here's uncle Liquid's method of obtaining LLVM:
20 | ```shell
21 | # This is where we're going to install LLVM, so change this to some sensible path.
22 | # bash - in this case you also need to add this to .bashrc
23 | export LLVM_SYS_120_PREFIX=$HOME/llvm
24 | # fish
25 | set -Ux LLVM_SYS_120_PREFIX ~/llvm
26 | 
27 | # Now it's time to get LLVM. We'll use their GitHub releases for that.
28 | mkdir -p ~/llvm
29 | wget https://github.com/llvm/llvm-project/releases/download/llvmorg-12.0.1/llvm-12.0.1.src.tar.xz
30 | tar xJf llvm-12.0.1.src.tar.xz
31 | 
32 | # Now let's get the build going.
33 | cd llvm-12.0.1.src
34 | mkdir -p build
35 | cd build
36 | # If doing a release build, remove LLVM_ENABLE_ASSERTIONS, and set CMAKE_BUILD_TYPE to Release.
37 | # Also, if compiling for other platforms such as aarch64, change the target in LLVM_TARGETS_TO_BUILD.
38 | # You can find a list of all available targets, as well as some other build options, here:
39 | # https://llvm.org/docs/GettingStarted.html#local-llvm-configuration
40 | cmake .. \
41 |    -D CMAKE_INSTALL_PREFIX=$LLVM_SYS_120_PREFIX \
42 |    -D CMAKE_BUILD_TYPE=Debug \
43 |    -D LLVM_ENABLE_ASSERTIONS=1 \
44 |    -D LLVM_TARGETS_TO_BUILD=X86 \
45 |    -G Ninja
46 | # To reduce memory usage during the process of compiling LLVM, clang with the mold linker can be
47 | # used. Grab mold here:
48 | # https://github.com/rui314/mold
49 | # And add the flags:
50 | # -D CMAKE_C_COMPILER=clang
51 | # -D CMAKE_CXX_COMPILER=clang++
52 | # -D CMAKE_CXX_LINK_FLAGS=-fuse-ld=mold
53 | # As far as I know it's not possible to use mold with gcc.
54 | 
55 | # IMPORTANT:
56 | # When not using clang+mold, open a task manager or system monitor. You're going to want to look
57 | # after your memory usage. If it starts growing rapidly, cancel the build and use --parallel 1.
58 | # Linking with GNU ld uses up a lot of memory, so it's better to let it run a single linker at a
59 | # time.
60 | cmake --build . --target install --parallel 8
61 | ```
62 | 
63 | Maybe someday I'll make a dedicated script for this, but today is not that day.
64 | 
65 | **Step 2.** Compile and run.
66 | 
67 | With all that, running tsuki should be as simple as:
68 | ```
69 | cargo run
70 | ```
71 | 
72 | ## Using the compiler
73 | 
74 | While still in its early stages, the compiler is able to compile arbitrary user code into a working executable. The most basic usage of the compiler would be:
75 | ```sh
76 | $ tsuki --package-name main --package-root src --main-file src/main.tsu
77 | # or, abbreviated:
78 | $ tsuki -p main -r src -m src/main.tsu
79 | ```
80 | `package_name` specifies the name of the output file, and is also used for mangling.
81 | 
82 | Refer to the code examples in `code` to see what's currently implemented or being worked on.
83 | 


--------------------------------------------------------------------------------
/code/functions.tsu:
--------------------------------------------------------------------------------
 1 | # test functions returning a value
 2 | 
 3 | fun add_two(x: Int): Int
 4 |    x + 2
 5 | 
 6 | __intrin_print_int32(add_two(1))
 7 | 
 8 | # test functions returning void. also calling other functions
 9 | 
10 | fun add_two_and_print(x: Int)
11 |    val added = add_two(x)
12 |    __intrin_print_int32(added)
13 | 
14 | add_two_and_print(123)
15 | 
16 | # test self-recursive functions
17 | 
18 | fun fib(n: Int): Int
19 |    if n == 0 -> 0
20 |    elif n == 1 -> 1
21 |    else -> fib(n - 1) + fib(n - 2)
22 | 
23 | __intrin_print_int32(fib(10))
24 | 
25 | # test mutually recursive functions
26 | 
27 | fun foo(x: Int)
28 |    if x < 10
29 |       bar(x + 2)
30 | 
31 | fun bar(x: Int)
32 |    if x < 20
33 |       foo(x - 1)
34 | 
35 | foo(0)
36 | 
37 | # test return statement
38 | 
39 | fun nop()
40 |    return
41 | 
42 | nop()
43 | 
44 | fun fac(n: Int): Int
45 |    var i = 0
46 |    var x = 1
47 |    while true
48 |       if i >= n
49 |          return x
50 |       x = x * i
51 |       i = i + 1
52 |    x
53 | 
54 | __intrin_print_int32(fac(10))
55 | 
56 | # Tests for implicit conversion from NoReturn to any other type.
57 | fun straightforward_return(): Int
58 |    return 1
59 | 
60 | fun return_from_if(x: Int): Int
61 |    if x == 1
62 |       return 1
63 |    else
64 |       return 2
65 | 
66 | fun assign_return_to_variable(x: Int)
67 |    val x: Int = return
68 | 


--------------------------------------------------------------------------------
/code/if_expression.tsu:
--------------------------------------------------------------------------------
 1 | # test if statements and conditions
 2 | val a = 2
 3 | if a == 1
 4 |    __intrin_print_int32(10)
 5 | elif a == 2
 6 |    __intrin_print_int32(15)
 7 | else
 8 |    __intrin_print_int32(20)
 9 | 
10 | # test if expressions
11 | val b = a + 1
12 | val cmp = 30_i16
13 | val c =
14 |    if b == 1 -> 3
15 |    elif b == 2 -> 4
16 |    elif b == 3 -> 5
17 |    elif b != cmp -> 10
18 |    else -> 6
19 | __intrin_print_int32(c)
20 | 
21 | # test nested statements
22 | if a > 10 == true
23 |    if a + 2 == 12
24 |       __intrin_print_int32(111)
25 |    elif a + 6 == 17
26 |       __intrin_print_int32(222)
27 |    else
28 |       __intrin_print_int32(123)
29 | else
30 |    # also nested expressions because why not
31 |    val x =
32 |       if a == 5 -> 6
33 |       else -> 7
34 |    __intrin_print_int32(x)
35 | 
36 | # test boolean operations
37 | val eq = true == true
38 | val ne = false != true
39 | val neg = not true
40 | val neg2 = not false
41 | 


--------------------------------------------------------------------------------
/code/printing.tsu:
--------------------------------------------------------------------------------
1 | __intrin_print_int32(42 / 2 * 4)
2 | __intrin_print_float32(10.0)
3 | 


--------------------------------------------------------------------------------
/code/variables.tsu:
--------------------------------------------------------------------------------
 1 | val a = 2
 2 | __intrin_print_int32(a * 2)
 3 | 
 4 | var b = 3
 5 | __intrin_print_int32(b)
 6 | b = 5
 7 | __intrin_print_int32(b)
 8 | 
 9 | var c = b = 1
10 | __intrin_print_int32(b)
11 | __intrin_print_int32(c)
12 | 
13 | __intrin_print_int32(-c)
14 | 
15 | var d: Int64 = 1
16 | 


--------------------------------------------------------------------------------
/code/while_loop.tsu:
--------------------------------------------------------------------------------
 1 | var i = 0
 2 | while i < 10
 3 |    __intrin_print_int32(i)
 4 |    i = i + 1
 5 | 
 6 | var a = 0
 7 | var b = 1
 8 | while true
 9 |    if a + b > 100
10 |       break
11 |    a = a + 2
12 |    b = b + 1
13 | 
14 | # Check that `break` can be used as an expression.
15 | __intrin_print_int32(123)
16 | while true
17 |    val yes: Int = break
18 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | This directory houses liquidev's notes on the implementation. This includes plans for the future, as well as details of the existing implementation.
2 | 


--------------------------------------------------------------------------------
/docs/macros.md:
--------------------------------------------------------------------------------
  1 | # El Macro
  2 | 
  3 | > A Spanish guy, knowing all about how good macro systems work.
  4 | 
  5 | As the compiler implementation progresses, I'll be noting here some things about the macro system.
  6 | 
  7 | ## Macro declaration
  8 | 
  9 | The declaration of a macro is similar to a function, except it only ever takes a single argument:
 10 | 
 11 | ```
 12 | import @std.macros as _
 13 | 
 14 | macro my_macro(ctx: macros.Context[:call]): macros.Error!macros.AnyResolvedNode
 15 |    _  # do things
 16 | ```
 17 | 
 18 | The task of the macro is to transform the nodes found in the environment into a final node. There are a few things to note here:
 19 | - The `ctx` argument tells the macro about the context it's being called in, including the _full_ call site, and type system things like the expected return type.
 20 | - The type of this argument is `macros.Context[P]`. The `P` parameter is a `const macros.CallPosition`, which specifies _where_ the macro can be called.
 21 |    - For instance, `:call` signifies that the macro is called like a regular function, `my_macro(arg1, arg2)`.
 22 |    - Other positions include `:fun_pragma`, `:object_pragma`, `:union_pragma`, `:atom_pragma`, `:type_pragma`, and `:derive`.
 23 |       - While the `:*_pragma` kinds are self-explanatory, `:derive` is special, because it can be used together with the `derive` declaration inside `impl` blocks. It's used for deriving trait implementations automatically.
 24 | 
 25 | The kind of the AST node output by the macro has to be valid for the call site, which is ensured by the API's strong typing, described later.
 26 | 
 27 | ## The context
 28 | 
 29 | The `macros.Context` object is responsible for holding information related to the callsite of the macro. It has a few functions:
 30 | 
 31 | - `fun callsite(self): macros.CallNode` returns a handle to a node that represents the macro's callsite.
 32 | - `fun callsite_env(self): macros.Environment[:call]` returns a handle to the semantic checking environment positioned at the call site.
 33 | - `fun this_env(self): macros.Environment[:definition]` returns a handle to the semantic checking environment positioned at the macro's definition.
 34 | - `fun expected_return_type(self): ?macros.Type` returns the type that is expected at the position of the callsite. This is non-nil only in expression macros.
 35 | 
 36 | ## Nodes
 37 | 
 38 | All operations on nodes within macros must always produce correct AST. This is ensured through type-safe handles to nodes, [inkwell](https://github.com/TheDan64/inkwell)-style. For instance, you can modify the first node of a `CallNode` (the called function), and push more nodes into it, but you cannot modify its second node.
 39 | 
 40 | Additionally, the nodes you push into a `CallNode` must all satisty `Expression`, which is a trait implemented by all nodes that are valid in expression position. To allow for runtime switching on expressions, the
 41 | 
 42 | Unary and binary operators should probably be represented as unions internally (the variants shouldn't be publicly visible), and their type should be discriminated by an atom. For instance, to construct an addition, you can do `BinaryNode.new(:add, left, right)`. To construct a pointer dereference, you can do `UnaryNode.new(:deref, left)`. Note that these operators all desugar to the small subset of AST available to macros, as described below.
 43 | 
 44 | ### The canonical representation
 45 | 
 46 | Macros should only ever deal with a subset of the full AST, one that's mostly portable through compiler versions. We call that subset the _canonical representation_.
 47 | Additionally, AST that is input into the macro is not semantically checked beforehand, and AST that is output by the macro must always be semantically checked using the macro's environment.
 48 | 
 49 | I don't know what's the exact set of nodes that will be exposed to macros, but I can imagine all literals being included, like:
 50 | ```
 51 | # before sem
 52 | True
 53 | False
 54 | Integer
 55 | Float
 56 | Identifier
 57 | 
 58 | # both before and after sem
 59 | String
 60 | Atom
 61 | 
 62 | # after sem
 63 | Bool
 64 | Uint8
 65 | Uint16
 66 | Uint32
 67 | Uint64
 68 | Int8
 69 | Int16
 70 | Int32
 71 | Int64
 72 | Float32
 73 | Float64
 74 | Symbol
 75 | ```
 76 | In addition to that, we need control flow:
 77 | ```
 78 | # untyped
 79 | Do
 80 | If
 81 | 
 82 | # shared
 83 | IfBranch
 84 | ElseBranch
 85 | 
 86 | # typed
 87 | 
 88 | # NB: the separation here is needed so that we can ensure that the last statement in an expression
 89 | # block is an expression statement
 90 | DoExpression
 91 | DoStatement
 92 | 
 93 | IfExpression
 94 | IfStatement
 95 | 
 96 | While
 97 | For
 98 | ```
 99 | Also, definitions:
100 | ```
101 | Fun
102 | Object
103 | Union
104 | AtomSet
105 | Type
106 | ```
107 | While we're on the topic of functions, let's talk _calls_. For API simplicity, untyped operators desugar to three different node kinds: `Nullary`, `Unary`, and `Binary`.
108 | ```
109 | # these are macro-exclusive, untyped, generalized versions of operators.
110 | # their extra is an atom specifying what operator the node refers to
111 | Nullary
112 | Unary
113 | Binary
114 | 
115 | # untyped
116 | Call
117 | 
118 | # typed
119 | ResolvedCall
120 | ```
121 | 
122 | ### Type-level separation between unresolved and resolved nodes
123 | 
124 | Certain types of nodes can be unresolved or resolved, and in these cases we want to separate between them at the type level. For that, we'll use atoms in generic parameters.
125 | 
126 | ```
127 | # I am aware that nobody likes Java-long names, but readability is important.
128 | # Users do not touch this atom anyways.
129 | atom ControlFlowResolutionState
130 |    :unresolved
131 |    :resolved_to_expression
132 |    :resolved_to_statement
133 | 
134 | object DoNode[S]
135 | where S: ControlFlowResolutionState
136 |    # details omitted
137 | 
138 | impl UnresolvedNode for DoNode[:unresolved]
139 |    # details omitted
140 | 
141 | impl[S] ResolvedNode for DoNode[S]
142 | where S: ControlFlowResolutionState
143 |    # details omitted
144 | ```
145 | 
146 | This way we can avoid creating a lot of types for each state a node kind can be in; we use generic `impl`s to only implement specific sets of functionality for nodes that satisfy specific states.
147 | 
148 | ### Resolving nodes
149 | 
150 | An `UnresolvedNode` can become a `ResolvedNode` if it is passed through an `Environment` for semantic analysis. An important distinction to make is that `ResolvedNode` trees are **incompatible** with `UnresolvedNode` trees! A resolved tree must be resolved _fully_, it cannot contain any unresolved nodes inside. This is safe-guarded by the type system.
151 | 
152 | An environment exposes a few functions for resolving (semantically analyzing) nodes:
153 | ```
154 | object Environment[K]
155 | where K: EnvironmentKind
156 |    # details omitted
157 | 
158 | atom EnvironmentKind
159 |    :call        # callsite
160 |    :definition  # definition site
161 | 
162 | impl[K] Environment[K]
163 |    fun resolve[T](self, node: T): T.Resolved
164 |    where
165 |       T: UnresolvedNode
166 | 
167 |       _  # details omitted
168 | 
169 | impl Environment[:call]
170 |    fun lookup(self, identifier: IdentifierNode): AnySymbolNode
171 |       _  # details omitted
172 | 
173 | impl Environment[:definition]
174 |    fun get(self, identifier: String): AnySymbolNode
175 |       _  # details omitted
176 | ```
177 | 
178 | Note that there are two separate kinds of environments that can be used for looking things up. As already mentioned in the section describing [the context](#the-context), an environment points either to the callsite, or the definition site. Any environment can be used for semantic checking, and its scope will be used for looking up identifiers.
179 | 
180 | #### Symbol creation
181 | 
182 | Macros cannot create bare identifiers. They can only process existing ones input into them, but all identifiers created by macros must already be resolved. In fact, all symbols created by macros come as part of full declarations. For instance, it's impossible to create a variable symbol that does not have a corresponding declaration, because then the scope of the variable is not clear.
183 | 
184 | One thing that's _not_ compile-time checked with variables is their scope. The API will allow you to generate the following code:
185 | ```
186 | print(<symbol 1>)
187 | val <symbol 1> = 10
188 | ```
189 | Note how `<symbol 1>` is undeclared in the `print`; the `val` was created, then the `print` was added into the AST, and then the `val` was added afterwards.
190 | 
191 | Unfortunately there is no way to model this in tsuki's type system, at least not that I know of. The compiler will reject this code before any code generation is performed, but the error message may be unclear, as the AST resulting from macros may not have proper span information.
192 | 
193 | ## Symbols
194 | 
195 | Symbol nodes represent identifiers that have meaning. This meaning may be type information, which is possessed by _all_ symbols, as well as extra metadata on what the symbol is more concretely.
196 | 
197 | This information may be queried by using getters defined on `macros.AnySymbolNode`:
198 | ```
199 | union AnySymbol
200 |    :variable(VariableSymbol)
201 |    :fun(FunSymbol)
202 |    :object(ObjectSymbol)
203 |    :union(UnionSymbol)
204 |    :atom(AtomSymbol)
205 | 
206 | object AnySymbolNode
207 |    # details omitted
208 | 
209 | impl AnySymbolNode
210 |    ## Returns the symbol stored in the node.
211 |    fun symbol(self): AnySymbol
212 | ```
213 | 
214 | The `AnySymbol` union encapsulates all the possible symbol kinds in a set that's easy to `match` over. Additionally, convenience methods are provided for converting to the inner values, for use with `if val` and the like:
215 | 
216 | ```
217 | impl AnySymbol
218 |    fun as_variable(self): ?VariableSymbol
219 |       _  # details omitted
220 | 
221 |    fun as_fun(self): ?FunSymbol
222 |       _  # details omitted
223 | 
224 |    fun as_object(self): ?ObjectSymbol
225 |       _  # details omitted
226 | 
227 |    fun as_union(self): ?UnionSymbol
228 |       _  # details omitted
229 | 
230 |    fun as_atom(self): ?AtomSymbol
231 |       _  # details omitted
232 | ```
233 | 
234 | ## Error handling
235 | 
236 | Using `panic` in macros is forbidden, because it leads to a bad user experience. All options, results, and the like, must be unwrapped explicitly.
237 | 
238 | Upon encountering invalid input, a macro can return an `:error` result with a `macros.Error` inside. This type stores information about the span the error covers, as well as the error message.
239 | 
240 | Because tsuki's control flow analysis is quite simple (at least in the early stages), it cannot infer what values are possible for a given variable in a given branch of the program. This is why `unreachable()` exists; it's to mark these spots as unreachable, and if a given spot is reached, the program panics. But panicking is strictly forbidden inside macros, so the `@std.macros` module provides replacements for these common tasks.
241 | 
242 | `AnyNode` (which _all_ node kinds can convert to) provides a function `error`, whose sole purpose is to produce errors:
243 | ```
244 | impl AnyNode
245 |    fun error(self, message: String): macros.Error
246 |       _  # details omitted
247 | ```
248 | 
249 | Additionally, a few standalone macros exist that create an error, whose span is the callsite:
250 | ```
251 | # Accepts a single String argument with an error message, and produces an error whose span is
252 | # the callsite.
253 | macro macro_error(ctx: Context[:call]): Error!ResolvedNode
254 |    _  # details omitted
255 | 
256 | # Accepts no arguments, and produces an error with the message "unreachable code reached".
257 | macro unreachable(ctx: Context[:call]): Error!ResolvedNode
258 |    _  # details omitted
259 | ```
260 | 
261 | This means that usual control flow:
262 | ```
263 | if some_cool_condition
264 |    # some_cool_condition guarantees that my_based_value is not :cringe, but the compiler can't
265 |    # figure that out
266 |    match my_based_value
267 |       :based -> _  # do stuff
268 |       :cool -> _   # do more stuff
269 |       :cringe -> panic("there is no cringe in this program")
270 | ```
271 | turns to this:
272 | ```
273 | import @std.macros for macro_error
274 | 
275 | if some_cool_condition
276 |    match my_based_value
277 |       :based -> _
278 |       :cool -> _
279 |       :cringe -> return macro_error("there is no cringe in this program")
280 | ```
281 | 


--------------------------------------------------------------------------------
/docs/mangling.md:
--------------------------------------------------------------------------------
 1 | # Some notes on mangling
 2 | 
 3 | This document outlines some considerations for implementing mangling inside the compiler.
 4 | 
 5 | ## LLVM backend
 6 | 
 7 | I'd prefer if mangled names were descriptive and human-readable, rather than overly mangled to the point where no human being is able to understand them (looking at you, C++).
 8 | 
 9 | Specifics:
10 | - LLVM functions can be called whatever we want, there's no limit on which characters we can or cannot use.
11 | - Rust strings must be valid UTF-8.
12 | 
13 | The following scheme shall be used in the LLVM backend:
14 | - `<package>:<module>.<path>`
15 |   - where `<path>` is one of the following:
16 |     - `<fun>` - eg. `function_name`, `blah1` - a valid function name (in `snake_case`)
17 |     - `<type>.<fun>` - eg. `MyObject.function` - an associated function
18 |     - `<type>.[<trait>].<fun>` - eg. `MyObject.[As[Int]].convert` - a function associated with a trait
19 |     - `{<integer>}` - eg. `{0}` - anonymous functions, eg. closures and do-blocks
20 |   - Paths may nest freely. `module.function.local_function` is a perfectly valid path, specifying a locally-scoped function `local_function` inside the function `function` inside the module `module`.
21 | 
22 | Examples:
23 | - `std:panics.panic` - function `panic` in module `panics` of package `std`
24 | - `std:float32.Float32.sin` - function `sin` for the type `Float32` in module `float32` of package `std`
25 | - `std:int32.Int32.[Dup].dup` - function `dup` from the implementation of `Dup` for the type `Int32` in module `int32` of package `std`
26 | - `std_tests:results{12}` - 12th anonymous function in the module `results` of package `std`
27 | 
28 | Other backends may use mangling schemes different to this one; after all, different targets have different requirements.
29 | 
30 | # Stack traces
31 | 
32 | Function names mangled using this scheme should _never_ appear in stack traces, as it leads to a terrible user experience. They get overly long and hard to read, hence I propose to drop the module's name. After all, it's already obvious from the filename.
33 | 
34 | For instance, if we have a stack trace for package `test`, module `hello`, this stack trace:
35 | ```
36 | Stack traceback:
37 |   std:panics.tsu 234:4         std:panics.panic
38 |   test:hello.tsu 4:2           test.hello.my_fallible_function
39 |   test:hello.tsu 6:1           test.hello
40 | ```
41 | should get turned into:
42 | ```
43 | Stack traceback:
44 |   std:panics.tsu 234:4         panic
45 |   test:hello.tsu 4:2           my_fallible_function
46 |   test:hello.tsu 6:1           {module code}
47 | ```
48 | 
49 | Stack traces should be compact and readable at a glance. We don't need to list _every_ single path piece. This is something most compilers nowadays get wrong.
50 | 
51 | Everything after the module name should remain as-is, and the empty string previously containing only the module name should be replaced with `{module code}`.
52 | 
53 | **UX above all else**.
54 | 
55 | ## Some deets on the stack trace's formatting
56 | 
57 | The paths shown in the stack trace should never be full paths to the `src` directory. Instead, they should be `<package name>:<path to file inside of src>`. Not sure about this, but maybe the `.tsu` extension should also be dropped?
58 | 
59 | The full format is the following:
60 | ```
61 | Stack traceback:
62 |   <file> <line>:<column>        <short function name>
63 |   ...
64 | ```
65 | 


--------------------------------------------------------------------------------
/docs/pragmas.md:
--------------------------------------------------------------------------------
 1 | # Pragmas specific to this implementation
 2 | 
 3 | Different implementations may define their own sets of pragmas for implementing the standard library. This reference implementation of the compiler and standard library use the following pragmas.
 4 | 
 5 | #### `compiler_builtin_type(type_identifier: Atom)`
 6 | 
 7 | The `compiler_builtin_type` pragma may be used on `type` definitions without an `=` sign after the name, to bind builtin types to names. The `type_identifier` atom may be one of the following values, corresponding to the following built-in types:
 8 | 
 9 | | Atom value | Type |
10 | | --- | --- |
11 | | `:noreturn` | `NoReturn` |
12 | | `:bool` | `Bool` |
13 | | `:int8` | `Int8` |
14 | | `:int16` | `Int16` |
15 | | `:int32` | `Int32` |
16 | | `:int64` | `Int64` |
17 | | `:uint8` | `Uint8` |
18 | | `:uint16` | `Uint16` |
19 | | `:uint32` | `Uint32` |
20 | | `:uint64` | `Uint64` |
21 | | `:float32` | `Float32` |
22 | | `:float64` | `Float64` |
23 | | `:size` | `Size` |
24 | 
25 | Examples:
26 | 
27 | ```
28 | type NoReturn :: compiler_builtin_type(:noreturn)
29 | type Size :: compiler_builtin_type(:size)
30 | ```
31 | 
32 | These type definitions can be found in `std/std.tsu`, which is imported implicitly into each module.
33 | 


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | tab_spaces = 3
2 | match_arm_leading_pipes = "Preserve"
3 | chain_width = 100
4 | imports_granularity = "Module"
5 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
  1 | use std::fmt::Display;
  2 | use std::path::PathBuf;
  3 | 
  4 | use structopt::StructOpt;
  5 | use tsuki_backend_llvm::{ExecutableFile, LlvmBackend, LlvmBackendConfig, OptimizationLevel};
  6 | use tsuki_frontend::backend::Backend;
  7 | use tsuki_frontend::common::{Errors, SourceFile};
  8 | use tsuki_frontend::AnalyzeOptions;
  9 | 
 10 | #[derive(StructOpt)]
 11 | #[structopt(name = "tsuki")]
 12 | struct Options {
 13 |    /// The directory for storing intermediary files.
 14 |    #[structopt(long, parse(from_os_str))]
 15 |    cache_dir: Option<PathBuf>,
 16 | 
 17 |    /// The name of the package. This is used for controlling the object file's name.
 18 |    #[structopt(short = "p", long)]
 19 |    package_name: String,
 20 | 
 21 |    /// The `src` directory of the package.
 22 |    #[structopt(short = "r", long)]
 23 |    package_root: PathBuf,
 24 | 
 25 |    /// The root directory of the standard library.
 26 |    #[structopt(short = "s", long)]
 27 |    std_path: PathBuf,
 28 | 
 29 |    /// The root source file. Must be located in the package root.
 30 |    #[structopt(short = "m", long)]
 31 |    main_file: PathBuf,
 32 | 
 33 |    /// Only check the code for validity, without compiling it.
 34 |    #[structopt(long)]
 35 |    check: bool,
 36 | 
 37 |    /// The optimization level to use when compiling.
 38 |    #[structopt(long, name = "level", default_value = "essential")]
 39 |    optimize: OptimizationLevel,
 40 | 
 41 |    /// Dumps the source code before compiling.
 42 |    #[structopt(long)]
 43 |    dump_source: bool,
 44 | 
 45 |    /// Dumps the AST directly after parsing.
 46 |    #[structopt(long)]
 47 |    dump_ast_pre_sem: bool,
 48 | 
 49 |    /// Dumps the AST after checking it semantically.
 50 |    #[structopt(long)]
 51 |    dump_ast_post_sem: bool,
 52 | 
 53 |    /// Dumps the generated LLVM IR.
 54 |    #[structopt(long)]
 55 |    dump_llvm_ir: bool,
 56 | }
 57 | 
 58 | const EXIT_COMPILE: i32 = 1;
 59 | const EXIT_FATAL: i32 = 2;
 60 | 
 61 | fn unwrap_error<T, E>(r: Result<T, E>) -> T
 62 | where
 63 |    E: Display,
 64 | {
 65 |    match r {
 66 |       Ok(ok) => ok,
 67 |       Err(error) => {
 68 |          eprintln!("error: {}", error);
 69 |          std::process::exit(EXIT_FATAL)
 70 |       }
 71 |    }
 72 | }
 73 | 
 74 | fn unwrap_errors<T>(r: Result<T, Errors>) -> T {
 75 |    match r {
 76 |       Ok(ok) => ok,
 77 |       Err(errors) => {
 78 |          errors.iter().for_each(|error| eprintln!("{:#}", error));
 79 |          std::process::exit(EXIT_COMPILE)
 80 |       }
 81 |    }
 82 | }
 83 | 
 84 | fn main() -> Result<(), Box<dyn std::error::Error>> {
 85 |    let options = Options::from_args();
 86 |    let frontend_debug_options = tsuki_frontend::DebugOptions {
 87 |       dump_source: options.dump_source,
 88 |       dump_ast_pre_sem: options.dump_ast_pre_sem,
 89 |       dump_ast_post_sem: options.dump_ast_post_sem,
 90 |    };
 91 |    let backend = LlvmBackend::new(LlvmBackendConfig {
 92 |       cache_dir: &options.cache_dir.unwrap_or(std::env::current_dir()?.join("bin")),
 93 |       std_path: &options.std_path,
 94 |       package_name: &options.package_name,
 95 |       // TODO: Cross-compilation.
 96 |       target_triple: None,
 97 |       optimization_level: options.optimize,
 98 |       frontend_debug_options,
 99 |       backend_debug_options: tsuki_backend_llvm::DebugOptions {
100 |          dump_ir: options.dump_llvm_ir,
101 |       },
102 |    });
103 | 
104 |    let source = unwrap_error(std::fs::read_to_string(&options.main_file));
105 | 
106 |    let source_file = unwrap_error(SourceFile::new(
107 |       options.package_name,
108 |       options.package_root,
109 |       options.main_file,
110 |       source,
111 |    ));
112 | 
113 |    if options.check {
114 |       let _ = unwrap_errors(tsuki_frontend::analyze(
115 |          AnalyzeOptions {
116 |             file: &source_file,
117 |             std_path: options.std_path,
118 |          },
119 |          &frontend_debug_options,
120 |       ));
121 |    } else {
122 |       let object = unwrap_errors(backend.compile(source_file));
123 |       let _executable = ExecutableFile::link(backend, &[object])?;
124 |    }
125 | 
126 |    Ok(())
127 | }
128 | 


--------------------------------------------------------------------------------
/std/kernel.tsu:
--------------------------------------------------------------------------------
 1 | # tsuki standard library
 2 | # Copyright (C) 2021 liquidev
 3 | # Licensed under the MIT license. Check the LICENSE file in the repository root for details.
 4 | 
 5 | # The kernel is arguably the most important piece of source code in the standard library.
 6 | # It provides definitions for primitive types, as well as compiler-supported types such as optionals
 7 | # and results.
 8 | # The kernel is loaded by the compiler before any other code in a given file, and must not be
 9 | # imported manually.
10 | 
11 | pub type NoReturn :: compiler_builtin_type(:noreturn)
12 | 
13 | pub type Bool :: compiler_builtin_type(:bool)
14 | 
15 | pub type Uint8 :: compiler_builtin_type(:uint8)
16 | pub type Uint16 :: compiler_builtin_type(:uint16)
17 | pub type Uint32 :: compiler_builtin_type(:uint32)
18 | pub type Uint64 :: compiler_builtin_type(:uint64)
19 | 
20 | pub type Int8 :: compiler_builtin_type(:int8)
21 | pub type Int16 :: compiler_builtin_type(:int16)
22 | pub type Int32 :: compiler_builtin_type(:int32)
23 | pub type Int64 :: compiler_builtin_type(:int64)
24 | 
25 | pub type Float32 :: compiler_builtin_type(:float32)
26 | pub type Float64 :: compiler_builtin_type(:float64)
27 | 
28 | pub type Size :: compiler_builtin_type(:size)
29 | 


--------------------------------------------------------------------------------
/tsuki-backend-llvm/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "tsuki-backend-llvm"
 3 | version = "0.1.0"
 4 | edition = "2018"
 5 | 
 6 | [dependencies]
 7 | 
 8 | tsuki-frontend = { path = "../tsuki-frontend" }
 9 | 
10 | smallvec = "1.6.1"
11 | thiserror = "1.0.26"
12 | inkwell = { git = "https://github.com/TheDan64/inkwell", rev = "41857f9", features = ["llvm12-0"] }
13 | 


--------------------------------------------------------------------------------
/tsuki-backend-llvm/src/codegen.rs:
--------------------------------------------------------------------------------
  1 | //! Common code generation state.
  2 | 
  3 | use std::collections::HashMap;
  4 | use std::fmt;
  5 | 
  6 | use inkwell::basic_block::BasicBlock;
  7 | use inkwell::builder::Builder;
  8 | use inkwell::context::Context;
  9 | use inkwell::module::Module;
 10 | use inkwell::passes::PassManager;
 11 | use inkwell::types::StructType;
 12 | use inkwell::values::{BasicValueEnum, FunctionValue};
 13 | use tsuki_frontend::ast::{NodeId, NodeKind};
 14 | use tsuki_frontend::common::SourceFile;
 15 | use tsuki_frontend::scope::ScopeId;
 16 | use tsuki_frontend::sem::Ir;
 17 | 
 18 | use crate::functions::Function;
 19 | use crate::variables::Variables;
 20 | 
 21 | /// Code generation state shared across functions.
 22 | pub struct CodeGen<'src, 'c, 'pm> {
 23 |    // This field may be unused as its primary purpose currently is debugging. Production code should
 24 |    // not contain any `astdump::dump_ast`s or such, so this field will remain mostly unused.
 25 |    // This will change once debug info generation is implemented.
 26 |    #[allow(unused)]
 27 |    pub(crate) source: &'src SourceFile,
 28 |    pub(crate) context: &'c Context,
 29 |    pub(crate) module: &'pm Module<'c>,
 30 |    pub(crate) builder: Builder<'c>,
 31 |    pub(crate) pass_manager: &'pm PassManager<FunctionValue<'c>>,
 32 | 
 33 |    pub(crate) function: Function<'c>,
 34 |    pub(crate) variables: Variables<'c>,
 35 | 
 36 |    /// This map stores a list of blocks to which unconditional jumps have to be appended, as a
 37 |    /// result of `break` expressions.
 38 |    ///
 39 |    /// The second `usize` in the tuple key is required to allow for multiple `break`s in one
 40 |    /// breaking scope.
 41 |    pub(crate) break_blocks: HashMap<(ScopeId, usize), BasicBlock<'c>>,
 42 | 
 43 |    pub(crate) unit_type: StructType<'c>,
 44 | }
 45 | 
 46 | impl<'src, 'c, 'pm> CodeGen<'src, 'c, 'pm> {
 47 |    pub fn new(
 48 |       source: &'src SourceFile,
 49 |       context: &'c Context,
 50 |       pass_manager: &'pm PassManager<FunctionValue<'c>>,
 51 |       module: &'pm Module<'c>,
 52 |       function: Function<'c>,
 53 |    ) -> Self {
 54 |       let mut state = Self {
 55 |          source,
 56 |          context,
 57 |          // TODO: import, module resolution and names.
 58 |          module,
 59 |          builder: context.create_builder(),
 60 |          pass_manager,
 61 | 
 62 |          function,
 63 |          variables: Variables::new(),
 64 | 
 65 |          break_blocks: HashMap::new(),
 66 | 
 67 |          unit_type: context.struct_type(&[], false),
 68 |       };
 69 |       state.builder.position_at_end(state.function.entry_block);
 70 |       // Temporary: set up some libc functions.
 71 |       state.load_libc();
 72 |       state
 73 |    }
 74 | 
 75 |    /// Creates a new code generator, with the same source file, context, pass manager, and module,
 76 |    /// but with a different function.
 77 |    pub fn for_function(&self, function: Function<'c>) -> Self {
 78 |       Self {
 79 |          builder: self.context.create_builder(),
 80 |          function,
 81 |          variables: Variables::new(),
 82 |          unit_type: self.unit_type,
 83 |          break_blocks: HashMap::new(),
 84 |          ..*self
 85 |       }
 86 |    }
 87 | 
 88 |    /// Generates code for an arbitrary node.
 89 |    pub fn generate_statement(&mut self, ir: &Ir, node: NodeId) {
 90 |       match ir.ast.kind(node) {
 91 |          // Control flow
 92 |          NodeKind::Pass => (),
 93 |          NodeKind::StatementList => self.generate_statements(ir, node),
 94 |          NodeKind::DoStatement => {
 95 |             let _ = self.generate_do(ir, node);
 96 |          }
 97 |          NodeKind::IfStatement => {
 98 |             let _ = self.generate_if(ir, node);
 99 |          }
100 |          NodeKind::While => self.generate_while(ir, node),
101 | 
102 |          // Declarations
103 |          NodeKind::Val | NodeKind::Var => self.generate_variable_declaration(ir, node),
104 |          NodeKind::AssignDiscard => self.generate_discarding_assignment(ir, node),
105 |          NodeKind::Fun => self.generate_function(ir, node),
106 |          // TODO: Remove type aliases from the IR, as they do not serve any purpose for the code
107 |          // generation stage.
108 |          NodeKind::Type => (),
109 | 
110 |          // Expressions
111 |          NodeKind::Assign => {
112 |             let _ = self.generate_assignment(ir, node);
113 |          }
114 |          _ => {
115 |             let _ = self.generate_expression(ir, node);
116 |          }
117 |       }
118 |    }
119 | 
120 |    /// Finishes compiling a function, by inserting a `ret` instruction at the end, as well
121 |    /// as running optimizations on it.
122 |    pub fn finish_function(&self, return_value: Option<BasicValueEnum<'c>>) {
123 |       // It seems like Rust can't really infer that I want to pass a &dyn when I .as_ref()
124 |       // the option, so this requires some manual matching.
125 |       match return_value {
126 |          Some(v) => self.builder.build_return(Some(&v)),
127 |          None => self.builder.build_return(None),
128 |       };
129 |       self.pass_manager.run_on(&self.function.value);
130 |    }
131 | }
132 | 
133 | impl fmt::Debug for CodeGen<'_, '_, '_> {
134 |    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
135 |       write!(f, "{}", &self.module.print_to_string().to_str().unwrap())
136 |    }
137 | }
138 | 


--------------------------------------------------------------------------------
/tsuki-backend-llvm/src/control_flow.rs:
--------------------------------------------------------------------------------
  1 | //! Code generation for statement lists and control flow structures.
  2 | 
  3 | use inkwell::basic_block::BasicBlock;
  4 | use inkwell::values::{BasicValueEnum, IntValue};
  5 | use smallvec::SmallVec;
  6 | use tsuki_frontend::ast::{NodeId, NodeKind};
  7 | use tsuki_frontend::scope::ScopeId;
  8 | use tsuki_frontend::sem::Ir;
  9 | 
 10 | use crate::codegen::CodeGen;
 11 | 
 12 | impl<'src, 'c, 'pm> CodeGen<'src, 'c, 'pm> {
 13 |    /// Generates code for a list of statements.
 14 |    pub(crate) fn generate_statements(&mut self, ir: &Ir, node: NodeId) {
 15 |       ir.ast.walk_node_list(node, |_ast, _index, node| {
 16 |          self.generate_statement(ir, node);
 17 |       });
 18 |    }
 19 | 
 20 |    /// Generates code for a list of statements with a tail expression.
 21 |    pub(crate) fn generate_statements_with_tail_expression(
 22 |       &mut self,
 23 |       ir: &Ir,
 24 |       node: NodeId,
 25 |    ) -> BasicValueEnum<'c> {
 26 |       let mut tail = None;
 27 |       for (index, &child) in ir.ast.extra(node).as_node_list().unwrap().iter().enumerate() {
 28 |          if ir.ast.is_last_child(node, index) {
 29 |             tail = Some(self.generate_expression(ir, child))
 30 |          } else {
 31 |             self.generate_statement(ir, child);
 32 |          }
 33 |       }
 34 |       if let Some(tail) = tail {
 35 |          tail
 36 |       } else {
 37 |          self.generate_unit_literal().into()
 38 |       }
 39 |    }
 40 | 
 41 |    /// Generates code for a `do` expression or a `do` statement.
 42 |    ///
 43 |    /// If the node is a `DoExpression`, returns `Some` with the tail expression. Otherwise
 44 |    /// if the kind is `DoStatement`, returns `None`.
 45 |    pub(crate) fn generate_do(&mut self, ir: &Ir, node: NodeId) -> Option<BasicValueEnum<'c>> {
 46 |       match ir.ast.kind(node) {
 47 |          NodeKind::DoExpression => Some(self.generate_statements_with_tail_expression(ir, node)),
 48 |          NodeKind::DoStatement => {
 49 |             self.generate_statements(ir, node);
 50 |             None
 51 |          }
 52 |          _ => unreachable!(),
 53 |       }
 54 |    }
 55 | 
 56 |    /// Generates code for an `if` expression or an `if` statement.
 57 |    ///
 58 |    /// Return value behavior is similar to `generate_do`.
 59 |    pub(crate) fn generate_if(&mut self, ir: &Ir, node: NodeId) -> Option<BasicValueEnum<'c>> {
 60 |       /// This local struct stores information about the condition of an `if` branch.
 61 |       struct Condition<'c> {
 62 |          block: BasicBlock<'c>,
 63 |          value: IntValue<'c>,
 64 |          // We store the ending block of the condition, because it may be different than the
 65 |          // starting block.
 66 |          end_block: BasicBlock<'c>,
 67 |       }
 68 |       // This local struct stores information about a single `if` branch: its condition, condition
 69 |       // block, and body block.
 70 |       struct Branch<'c> {
 71 |          condition: Option<Condition<'c>>,
 72 |          body: BasicBlock<'c>,
 73 |          // Similarly to the condition, we store the ending block, because it may be different than
 74 |          // the starting block, and it's where we must emit the final `br` instructions.
 75 |          end_block: BasicBlock<'c>,
 76 |          result: Option<BasicValueEnum<'c>>,
 77 |       }
 78 |       let mut branches = SmallVec::<[Branch<'c>; 16]>::new();
 79 |       let is_expression = ir.ast.kind(node) == NodeKind::IfExpression;
 80 |       // Unwrapping here is safe, because we are coming from an existing block
 81 |       // (eg. the function's %entry).
 82 |       let entry_block = self.builder.get_insert_block().unwrap();
 83 | 
 84 |       // Generate code for each of the branches. The `br` instructions are added after the inner
 85 |       // code is generated, because all blocks have to be known beforehand.
 86 |       let branch_nodes = ir.ast.extra(node).as_node_list().unwrap();
 87 |       for (index, &branch) in branch_nodes.iter().enumerate() {
 88 |          let body_block = self.context.append_basic_block(
 89 |             self.function.value,
 90 |             // For easier debugging of the IR, the block's name is determined by the branch type.
 91 |             match ir.ast.kind(branch) {
 92 |                NodeKind::IfBranch => "elif",
 93 |                NodeKind::ElseBranch => "else",
 94 |                _ => unreachable!(),
 95 |             },
 96 |          );
 97 |          // The condition is only generated for `IfBranch`es, because the `ElseBranch` does not
 98 |          // have a condition.
 99 |          let mut condition = None;
100 |          if ir.ast.kind(branch) == NodeKind::IfBranch {
101 |             let condition_block = if index == 0 {
102 |                // We don't need a new block if this is the first branch; we can simply fall through
103 |                // from the current one.
104 |                entry_block
105 |             } else {
106 |                self.context.prepend_basic_block(body_block, "condition")
107 |             };
108 |             self.builder.position_at_end(condition_block);
109 |             let condition_value = self.generate_expression(ir, ir.ast.first_handle(branch));
110 |             let end_block = self.builder.get_insert_block().unwrap();
111 |             condition = Some(Condition {
112 |                block: condition_block,
113 |                value: condition_value.into_int_value(),
114 |                end_block,
115 |             });
116 |          }
117 |          // Then we generate the body.
118 |          self.builder.position_at_end(body_block);
119 |          let result = if is_expression {
120 |             Some(self.generate_statements_with_tail_expression(ir, branch))
121 |          } else {
122 |             self.generate_statements(ir, branch);
123 |             None
124 |          };
125 |          let end_block = self.builder.get_insert_block().unwrap();
126 |          branches.push(Branch {
127 |             condition,
128 |             body: body_block,
129 |             end_block,
130 |             result,
131 |          });
132 |       }
133 |       // Generate the terminating %end block. After a successfully executed branch, this block is
134 |       // branched to unconditionally, and is where control flow continues after the if statement
135 |       // ends.
136 |       let end_block = self.context.append_basic_block(self.function.value, "end");
137 | 
138 |       // Now that we have all the blocks, we're ready to backpatch some `br` instructions into
139 |       // the blocks.
140 |       for (index, branch) in branches.iter().enumerate() {
141 |          let &Branch {
142 |             condition,
143 |             body: body_block,
144 |             end_block: branch_end_block,
145 |             ..
146 |          } = &branch;
147 |          if let Some(condition) = condition {
148 |             // The block to execute if the branch fails is dependent on whether there's a branch
149 |             // after this one, and also if the branch after this one is an `else` branch without
150 |             // a condition.
151 |             let else_block = if let Some(next_branch) = branches.get(index + 1) {
152 |                if let Some(next_condition) = &next_branch.condition {
153 |                   next_condition.block
154 |                } else {
155 |                   next_branch.body
156 |                }
157 |             } else {
158 |                end_block
159 |             };
160 |             self.builder.position_at_end(condition.end_block);
161 |             self.builder.build_conditional_branch(condition.value, *body_block, else_block);
162 |          }
163 |          self.builder.position_at_end(*branch_end_block);
164 |          self.builder.build_unconditional_branch(end_block);
165 |       }
166 | 
167 |       // Compilation is resumed normally at the %end block.
168 |       self.builder.position_at_end(end_block);
169 |       // In case of an if expression, we have to generate a `phi` node at the end that's going to
170 |       // contain our final value.
171 |       if is_expression {
172 |          let typ = branches[0].result.unwrap().get_type();
173 |          let phi = self.builder.build_phi(typ, "ifresult");
174 |          for Branch {
175 |             end_block, result, ..
176 |          } in branches
177 |          {
178 |             phi.add_incoming(&[(&result.unwrap(), end_block)]);
179 |          }
180 |          // It's a bit strange that `phi`'s function for this is not called `as_basic_value_enum`.
181 |          Some(phi.as_basic_value())
182 |       } else {
183 |          None
184 |       }
185 |    }
186 | 
187 |    /// Generates code for a `while` loop.
188 |    pub(crate) fn generate_while(&mut self, ir: &Ir, node: NodeId) {
189 |       // Save the start block for generating the initial `br label %condition` instruction.
190 |       let start_block = self.builder.get_insert_block().unwrap();
191 | 
192 |       // Generate the condition block and value.
193 |       let condition_block = self.context.append_basic_block(self.function.value, "while");
194 |       self.builder.position_at_end(condition_block);
195 |       let condition_value = self.generate_expression(ir, ir.ast.first_handle(node));
196 |       // Save the end of the condition value, in case it generates some extra blocks.
197 |       let condition_end_block = self.builder.get_insert_block().unwrap();
198 | 
199 |       // Generate the loop body.
200 |       let body_block = self.context.append_basic_block(self.function.value, "do");
201 |       self.builder.position_at_end(body_block);
202 |       self.generate_statements(ir, node);
203 |       let body_end_block = self.builder.get_insert_block().unwrap();
204 | 
205 |       // Generate the final %end block.
206 |       let end_block = self.context.append_basic_block(self.function.value, "end");
207 | 
208 |       // Now, insert all the branch instructions.
209 |       // First we start with the unconditional branch to the condition block.
210 |       self.builder.position_at_end(start_block);
211 |       self.builder.build_unconditional_branch(condition_block);
212 |       // Then, we build the conditional branch at the end of the condition block.
213 |       self.builder.position_at_end(condition_end_block);
214 |       self.builder.build_conditional_branch(
215 |          condition_value.into_int_value(),
216 |          body_block,
217 |          end_block,
218 |       );
219 |       // Finally, we branch back to the condition at the end of the body.
220 |       self.builder.position_at_end(body_end_block);
221 |       self.builder.build_unconditional_branch(condition_block);
222 | 
223 |       let scope = ir.ast.scope(node).unwrap();
224 |       self.generate_break_jumps(scope, end_block);
225 | 
226 |       // Continue generating code at the end block.
227 |       self.builder.position_at_end(end_block);
228 |    }
229 | 
230 |    /// Generates jumps at the ends of blocks, that are results of `break`s of the breaking scope
231 |    /// with the given ID.
232 |    fn generate_break_jumps(&mut self, scope: ScopeId, end_block: BasicBlock<'c>) {
233 |       let builder = self.context.create_builder();
234 |       let keys: SmallVec<[(ScopeId, usize); 4]> =
235 |          self.break_blocks.keys().filter(|(scope_id, _)| *scope_id == scope).copied().collect();
236 |       for key in keys {
237 |          let block = self.break_blocks.remove(&key).unwrap();
238 |          builder.position_at_end(block);
239 |          builder.build_unconditional_branch(end_block);
240 |       }
241 |    }
242 | 
243 |    /// Generates code for a `break` expression.
244 |    pub(crate) fn generate_break(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> {
245 |       // Save the current block in the break_blocks table, such that it can be later referred to
246 |       // by the enclosing `while` loop.
247 |       let target_scope = ir.ast.scope(node).expect("Break node with no scope in IR");
248 |       let id = self.break_blocks.len();
249 |       let break_block = self.builder.get_insert_block().unwrap();
250 |       self.break_blocks.insert((target_scope, id), break_block);
251 | 
252 |       // Continue generating in an unreachable block with no predecessors.
253 |       let unreachable_block = self.context.append_basic_block(self.function.value, "unreachable");
254 |       self.builder.position_at_end(unreachable_block);
255 | 
256 |       // Return a dummy value as a result of `break` being an expression.
257 |       let result_type = self.get_type(&ir.types, ir.ast.type_id(node));
258 |       result_type.const_zero()
259 |    }
260 | }
261 | 


--------------------------------------------------------------------------------
/tsuki-backend-llvm/src/expressions.rs:
--------------------------------------------------------------------------------
  1 | //! Code generation for expressions.
  2 | 
  3 | use inkwell::values::{BasicValue, BasicValueEnum, FloatValue, IntValue};
  4 | use inkwell::IntPredicate;
  5 | use tsuki_frontend::ast::{NodeId, NodeKind};
  6 | use tsuki_frontend::sem::Ir;
  7 | 
  8 | use crate::codegen::CodeGen;
  9 | use crate::libc;
 10 | 
 11 | impl<'src, 'c, 'pm> CodeGen<'src, 'c, 'pm> {
 12 |    /// Generates code for a Bool literal.
 13 |    fn generate_boolean_literal(&self, ir: &Ir, node: NodeId) -> IntValue<'c> {
 14 |       let typ = self.context.bool_type();
 15 |       let literal = (ir.ast.kind(node) == NodeKind::True) as u64;
 16 |       typ.const_int(literal, false)
 17 |    }
 18 | 
 19 |    /// Generates code for an integer literal.
 20 |    fn generate_integer_literal(&self, ir: &Ir, node: NodeId) -> IntValue<'c> {
 21 |       let typ = self.get_type(&ir.types, ir.ast.type_id(node)).into_int_type();
 22 |       typ.const_int(ir.ast.extra(node).as_uint().unwrap(), false)
 23 |    }
 24 | 
 25 |    /// Generates code for a float literal.
 26 |    fn generate_float_literal(&self, ir: &Ir, node: NodeId) -> FloatValue<'c> {
 27 |       let typ = self.get_type(&ir.types, ir.ast.type_id(node)).into_float_type();
 28 |       typ.const_float(ir.ast.extra(node).as_float().unwrap())
 29 |    }
 30 | 
 31 |    /// Generates code for boolean negation.
 32 |    fn generate_boolean_negation(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> {
 33 |       let right = self.generate_expression(ir, ir.ast.first_handle(node));
 34 |       self.builder.build_not(right.into_int_value(), "nottmp").as_basic_value_enum()
 35 |    }
 36 | 
 37 |    /// Generates code for integer or float negation.
 38 |    fn generate_number_negation(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> {
 39 |       let right = self.generate_expression(ir, ir.ast.first_handle(node));
 40 |       let typ = ir.ast.type_id(node);
 41 |       let kind = ir.types.kind(typ);
 42 |       if kind.is_integer() {
 43 |          let typ = self.get_type(&ir.types, typ).into_int_type();
 44 |          let zero = typ.const_zero();
 45 |          self.builder.build_int_sub(zero, right.into_int_value(), "negtmp").into()
 46 |       } else if kind.is_float() {
 47 |          self.builder.build_float_neg(right.into_float_value(), "fnegtmp").into()
 48 |       } else {
 49 |          unreachable!()
 50 |       }
 51 |    }
 52 | 
 53 |    /// Generates the LHS and RHS of a binary operator.
 54 |    fn generate_binary_operation(
 55 |       &mut self,
 56 |       ir: &Ir,
 57 |       node: NodeId,
 58 |    ) -> (BasicValueEnum<'c>, BasicValueEnum<'c>) {
 59 |       (
 60 |          self.generate_expression(ir, ir.ast.first_handle(node)),
 61 |          self.generate_expression(ir, ir.ast.second_handle(node)),
 62 |       )
 63 |    }
 64 | 
 65 |    /// Generates code for integer math.
 66 |    fn generate_integer_math(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> {
 67 |       // TODO: Panic on overflow. This can be done using LLVM's arithmetic intrinsics that return
 68 |       // an aggregate {T, i1}, where the second field is a flag signifying whether overflow occured.
 69 |       let (left_value, right_value) = self.generate_binary_operation(ir, node);
 70 |       let (left, right) = (left_value.into_int_value(), right_value.into_int_value());
 71 |       let math = match ir.ast.kind(node) {
 72 |          NodeKind::Plus => self.builder.build_int_add(left, right, "addtmp"),
 73 |          NodeKind::Minus => self.builder.build_int_sub(left, right, "subtmp"),
 74 |          NodeKind::Mul => self.builder.build_int_mul(left, right, "multmp"),
 75 |          NodeKind::Div => {
 76 |             let is_signed = ir.types.kind(ir.ast.type_id(node)).unwrap_integer().is_signed();
 77 |             if is_signed {
 78 |                self.builder.build_int_signed_div(left, right, "sdivtmp")
 79 |             } else {
 80 |                self.builder.build_int_unsigned_div(left, right, "udivtmp")
 81 |             }
 82 |          }
 83 |          _ => unreachable!(),
 84 |       };
 85 |       math.as_basic_value_enum()
 86 |    }
 87 | 
 88 |    fn generate_float_math(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> {
 89 |       let (left_value, right_value) = self.generate_binary_operation(ir, node);
 90 |       let (left, right) = (
 91 |          left_value.into_float_value(),
 92 |          right_value.into_float_value(),
 93 |       );
 94 |       let math = match ir.ast.kind(node) {
 95 |          NodeKind::Plus => self.builder.build_float_add(left, right, "faddtmp"),
 96 |          NodeKind::Minus => self.builder.build_float_sub(left, right, "fsubtmp"),
 97 |          NodeKind::Mul => self.builder.build_float_mul(left, right, "fmultmp"),
 98 |          NodeKind::Div => self.builder.build_float_div(left, right, "fdivtmp"),
 99 |          _ => unreachable!(),
100 |       };
101 |       math.as_basic_value_enum()
102 |    }
103 | 
104 |    /// Generates code for integer and floating-point math operations.
105 |    fn generate_math(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> {
106 |       let typ = ir.types.kind(ir.ast.type_id(node));
107 |       if typ.is_integer() {
108 |          self.generate_integer_math(ir, node)
109 |       } else if typ.is_float() {
110 |          self.generate_float_math(ir, node)
111 |       } else {
112 |          unreachable!()
113 |       }
114 |    }
115 | 
116 |    /// Generates code for an integer type conversion (`WidenUint` or `WidenInt`).
117 |    fn generate_integer_conversion(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> {
118 |       let inner = ir.ast.first_handle(node);
119 |       let inner_value = self.generate_expression(ir, inner).into_int_value();
120 |       let dest_type = self.get_type(&ir.types, ir.ast.type_id(node)).into_int_type();
121 |       match ir.ast.kind(node) {
122 |          NodeKind::WidenUint => self.builder.build_int_z_extend(inner_value, dest_type, "uwidened"),
123 |          NodeKind::WidenInt => self.builder.build_int_s_extend(inner_value, dest_type, "swidened"),
124 |          _ => unreachable!(),
125 |       }
126 |       .as_basic_value_enum()
127 |    }
128 | 
129 |    /// Generates code for a boolean comparison.
130 |    fn generate_boolean_comparison(&mut self, ir: &Ir, node: NodeId) -> IntValue<'c> {
131 |       let (left_value, right_value) = self.generate_binary_operation(ir, node);
132 |       let (left, right) = (left_value.into_int_value(), right_value.into_int_value());
133 |       let predicate = match ir.ast.kind(node) {
134 |          NodeKind::Equal => IntPredicate::EQ,
135 |          NodeKind::NotEqual => IntPredicate::NE,
136 |          _ => unreachable!(),
137 |       };
138 |       self.builder.build_int_compare(predicate, left, right, "boolcmp")
139 |    }
140 | 
141 |    /// Generates code for an integer comparison.
142 |    fn generate_integer_comparison(&mut self, ir: &Ir, node: NodeId) -> IntValue<'c> {
143 |       let (left_value, right_value) = self.generate_binary_operation(ir, node);
144 |       let (left, right) = (left_value.into_int_value(), right_value.into_int_value());
145 |       let left_type = ir.ast.type_id(ir.ast.first_handle(node));
146 |       let is_signed = ir.types.kind(left_type).unwrap_integer().is_signed();
147 |       let predicate = match ir.ast.kind(node) {
148 |          NodeKind::Equal => IntPredicate::EQ,
149 |          NodeKind::NotEqual => IntPredicate::NE,
150 |          NodeKind::Less if is_signed => IntPredicate::SLT,
151 |          NodeKind::LessEqual if is_signed => IntPredicate::SLE,
152 |          NodeKind::Greater if is_signed => IntPredicate::SGT,
153 |          NodeKind::GreaterEqual if is_signed => IntPredicate::SGE,
154 |          NodeKind::Less if !is_signed => IntPredicate::ULT,
155 |          NodeKind::LessEqual if !is_signed => IntPredicate::ULE,
156 |          NodeKind::Greater if !is_signed => IntPredicate::UGT,
157 |          NodeKind::GreaterEqual if !is_signed => IntPredicate::UGE,
158 |          _ => unreachable!(),
159 |       };
160 |       self.builder.build_int_compare(predicate, left, right, "intcmp")
161 |    }
162 | 
163 |    /// Generates code for integer comparisons.
164 |    fn generate_comparison(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> {
165 |       let left_node = ir.ast.first_handle(node);
166 |       let typ = ir.types.kind(ir.ast.type_id(left_node));
167 |       if typ.is_integer() {
168 |          self.generate_integer_comparison(ir, node)
169 |       } else if typ.is_float() {
170 |          todo!()
171 |       } else if typ.is_bool() {
172 |          self.generate_boolean_comparison(ir, node)
173 |       } else {
174 |          todo!()
175 |       }
176 |       .as_basic_value_enum()
177 |    }
178 | 
179 |    /// Generates code for any expression node.
180 |    pub(crate) fn generate_expression(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> {
181 |       match ir.ast.kind(node) {
182 |          // Literals
183 |          NodeKind::True | NodeKind::False => self.generate_boolean_literal(ir, node).into(),
184 |          | NodeKind::Uint8
185 |          | NodeKind::Uint16
186 |          | NodeKind::Uint32
187 |          | NodeKind::Uint64
188 |          | NodeKind::Int8
189 |          | NodeKind::Int16
190 |          | NodeKind::Int32
191 |          | NodeKind::Int64 => self.generate_integer_literal(ir, node).into(),
192 |          NodeKind::Float32 | NodeKind::Float64 => self.generate_float_literal(ir, node).into(),
193 | 
194 |          // Variables
195 |          NodeKind::Variable => self.generate_variable_reference(ir, node),
196 | 
197 |          // Operators
198 |          NodeKind::Not => self.generate_boolean_negation(ir, node),
199 |          NodeKind::Neg => self.generate_number_negation(ir, node),
200 |          NodeKind::Plus | NodeKind::Minus | NodeKind::Mul | NodeKind::Div => {
201 |             self.generate_math(ir, node)
202 |          }
203 |          | NodeKind::Equal
204 |          | NodeKind::NotEqual
205 |          | NodeKind::Less
206 |          | NodeKind::LessEqual
207 |          | NodeKind::Greater
208 |          | NodeKind::GreaterEqual => self.generate_comparison(ir, node),
209 |          NodeKind::Assign => self.generate_assignment(ir, node).unwrap(),
210 | 
211 |          // Control flow
212 |          NodeKind::DoExpression => self.generate_do(ir, node).unwrap(),
213 |          NodeKind::IfExpression => self.generate_if(ir, node).unwrap(),
214 |          NodeKind::CallFunction => self.generate_call(ir, node),
215 |          NodeKind::Break => self.generate_break(ir, node),
216 |          NodeKind::Return => self.generate_return(ir, node),
217 | 
218 |          // Intrinsics
219 |          NodeKind::WidenUint | NodeKind::WidenInt => self.generate_integer_conversion(ir, node),
220 |          NodeKind::PrintInt32 | NodeKind::PrintFloat32 => {
221 |             self.generate_call_like_intrinsic(ir, node)
222 |          }
223 |          other => unreachable!("invalid expression node: {:?}", other),
224 |       }
225 |    }
226 | 
227 |    /// Generates code for a function call-like intrinsic.
228 |    fn generate_call_like_intrinsic(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> {
229 |       let arguments = ir.ast.extra(node).as_node_list().unwrap();
230 |       match ir.ast.kind(node) {
231 |          kind @ (NodeKind::PrintInt32 | NodeKind::PrintFloat32) => {
232 |             // This is not the, um, cleanest... piece of code here, but it'll get replaced
233 |             // anyway once c_import is implemented.
234 |             let printf = self.module.get_function(libc::FUN_PRINTF).expect("libc must be loaded");
235 |             let zero = self.context.i32_type().const_zero();
236 |             let global_name = if kind == NodeKind::PrintInt32 {
237 |                libc::GLOBAL_PRINTF_INT_FORMAT
238 |             } else {
239 |                libc::GLOBAL_PRINTF_FLOAT_FORMAT
240 |             };
241 |             let format = self.module.get_global(global_name).unwrap();
242 |             let format_ptr = unsafe {
243 |                self.builder.build_in_bounds_gep(format.as_pointer_value(), &[zero, zero], "fmt")
244 |             };
245 |             let mut argument = self.generate_expression(ir, arguments[0]);
246 |             // We need to convert `float` to `double` for passing to printf.
247 |             if kind == NodeKind::PrintFloat32 {
248 |                let f64_type = self.context.f64_type();
249 |                argument = self
250 |                   .builder
251 |                   .build_float_cast(argument.into_float_value(), f64_type, "printf_dbl")
252 |                   .as_basic_value_enum();
253 |             }
254 |             self.builder.build_call(printf, &[format_ptr.into(), argument.into()], "_");
255 |          }
256 |          _ => unreachable!(),
257 |       }
258 |       self.generate_unit_literal().into()
259 |    }
260 | }
261 | 


--------------------------------------------------------------------------------
/tsuki-backend-llvm/src/functions.rs:
--------------------------------------------------------------------------------
  1 | //! Code generation for functions.
  2 | 
  3 | use inkwell::basic_block::BasicBlock;
  4 | use inkwell::builder::Builder;
  5 | use inkwell::context::Context;
  6 | use inkwell::module::Module;
  7 | use inkwell::types::{BasicType, BasicTypeEnum, FunctionType};
  8 | use inkwell::values::{BasicValue, BasicValueEnum, FunctionValue, PointerValue};
  9 | use smallvec::SmallVec;
 10 | use tsuki_frontend::ast::{NodeId, NodeKind};
 11 | use tsuki_frontend::functions::FunctionId;
 12 | use tsuki_frontend::sem::Ir;
 13 | 
 14 | use crate::codegen::CodeGen;
 15 | 
 16 | /// Data associated with a single function.
 17 | pub struct Function<'c> {
 18 |    pub value: FunctionValue<'c>,
 19 |    pub entry_block: BasicBlock<'c>,
 20 | }
 21 | 
 22 | impl<'c> Function<'c> {
 23 |    /// Creates a new function from the given name and type.
 24 |    ///
 25 |    /// # Side effects
 26 |    ///
 27 |    /// This adds the new function to the module.
 28 |    pub fn add_to_module(
 29 |       context: &'c Context,
 30 |       module: &Module<'c>,
 31 |       name: &str,
 32 |       typ: FunctionType<'c>,
 33 |    ) -> Self {
 34 |       // Unfortunately this side effect of adding the function into the module is unavoidable,
 35 |       // as far as I know. I generally prefer code that is free of any side effects but I guess
 36 |       // some sacrifices have to be made.
 37 |       let value = module.add_function(name, typ, None);
 38 |       let entry_block = context.append_basic_block(value, "entry");
 39 |       Self { value, entry_block }
 40 |    }
 41 | 
 42 |    /// Creates a `Function` from an existing `FunctionValue`.
 43 |    pub fn from_value(value: FunctionValue<'c>) -> Self {
 44 |       Self {
 45 |          value,
 46 |          entry_block: value.get_first_basic_block().expect("function did not have a basic block"),
 47 |       }
 48 |    }
 49 | 
 50 |    /// Positions the given builder at the start of the function.
 51 |    pub fn position_at_entry_block(&self, builder: &Builder) {
 52 |       if let Some(instruction) = self.entry_block.get_first_instruction() {
 53 |          builder.position_before(&instruction);
 54 |       } else {
 55 |          builder.position_at_end(self.entry_block);
 56 |       }
 57 |    }
 58 | 
 59 |    /// Creates a builder that adds instructions to the top of the entry block.
 60 |    pub fn create_entry_block_builder(&self, context: &'c Context) -> Builder<'c> {
 61 |       let builder = context.create_builder();
 62 |       self.position_at_entry_block(&builder);
 63 |       builder
 64 |    }
 65 | }
 66 | 
 67 | impl<'src, 'c, 'pm> CodeGen<'src, 'c, 'pm> {
 68 |    /// Returns the function type, for the given function ID.
 69 |    fn get_function_type(&self, ir: &Ir, function_id: FunctionId) -> FunctionType<'c> {
 70 |       let parameters = ir.functions.parameters(function_id);
 71 |       let parameter_types: SmallVec<[BasicTypeEnum<'c>; 8]> = parameters
 72 |          .formal
 73 |          .iter()
 74 |          .map(|&symbol_id| {
 75 |             let typ = ir.symbols.type_id(symbol_id);
 76 |             self.get_type(&ir.types, typ)
 77 |          })
 78 |          .collect();
 79 |       // Well, that kind of sucks. AnyValueEnum does not have an fn_type method, so we need to
 80 |       // branch here.
 81 |       if ir.types.kind(parameters.return_type).is_unit() {
 82 |          self.context.void_type().fn_type(&parameter_types, false)
 83 |       } else {
 84 |          self.get_type(&ir.types, parameters.return_type).fn_type(&parameter_types, false)
 85 |       }
 86 |    }
 87 | 
 88 |    /// Adds functions from the IR to the module.
 89 |    pub fn add_functions(&self, ir: &Ir) {
 90 |       for function_id in ir.functions.iter() {
 91 |          // Skip non-local functions in the process.
 92 |          if ir.functions.kind(function_id).is_local() {
 93 |             let function_type = self.get_function_type(ir, function_id);
 94 |             let _ = Function::add_to_module(
 95 |                self.context,
 96 |                self.module,
 97 |                ir.functions.mangled_name(function_id),
 98 |                function_type,
 99 |             );
100 |          }
101 |       }
102 |    }
103 | 
104 |    /// Generates code for a function.
105 |    pub fn generate_function(&self, ir: &Ir, node: NodeId) {
106 |       // Get the function ID from the AST.
107 |       let name_node = ir.ast.first_handle(node);
108 |       let symbol_id = ir.ast.symbol_id(name_node);
109 |       let function_id = ir.symbols.kind(symbol_id).unwrap_function();
110 | 
111 |       // Obtain the function from the module.
112 |       let function = self
113 |          .module
114 |          .get_function(ir.functions.mangled_name(function_id))
115 |          .expect("function does not seem to exist");
116 |       let function = Function::from_value(function);
117 | 
118 |       // Create a new CodeGen for generating the function's body.
119 |       let mut code_gen = self.for_function(function);
120 | 
121 |       // Copy all the parameters into allocas.
122 |       // I don't think this is _too_ terrible performance-wise, mem2reg will hopefully optimize
123 |       // away most of the cases here.
124 |       code_gen.function.position_at_entry_block(&code_gen.builder);
125 |       let parameters = ir.functions.parameters(function_id);
126 |       let mut allocas = SmallVec::<[PointerValue<'c>; 8]>::new();
127 |       for (i, parameter) in code_gen.function.value.get_param_iter().enumerate() {
128 |          // While we're at it, we give all the parameters names for more readable IR.
129 |          let symbol_id = parameters.formal[i];
130 |          let name = ir.symbols.name(symbol_id);
131 |          parameter.set_name(name);
132 |          let alloca = code_gen.builder.build_alloca(parameter.get_type(), name);
133 |          code_gen.builder.build_store(alloca, parameter);
134 |          allocas.push(alloca);
135 |          // Also, store the alloca in the code generator's variables list.
136 |          code_gen.variables.insert(symbol_id, alloca);
137 |       }
138 | 
139 |       // Generate the function's body.
140 |       let return_type = ir.functions.parameters(function_id).return_type;
141 |       let return_value = if ir.types.kind(return_type).is_unit() {
142 |          code_gen.generate_statements(ir, node);
143 |          None
144 |       } else {
145 |          Some(code_gen.generate_statements_with_tail_expression(ir, node))
146 |       };
147 | 
148 |       // Finish the function up.
149 |       code_gen.finish_function(return_value);
150 |    }
151 | 
152 |    /// Generates code for a function call.
153 |    pub(crate) fn generate_call(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> {
154 |       // Get the function we want to call.
155 |       let callee_node = ir.ast.first_handle(node);
156 |       let symbol_id = ir.ast.symbol_id(callee_node);
157 |       let function_id = ir.symbols.kind(symbol_id).unwrap_function();
158 |       let function = self
159 |          .module
160 |          .get_function(ir.functions.mangled_name(function_id))
161 |          .expect("function does not seem to exist");
162 | 
163 |       // Generate code for all the arguments.
164 |       let mut arguments = SmallVec::<[BasicValueEnum<'c>; 8]>::new();
165 |       for &argument in ir.ast.extra(node).as_node_list().unwrap() {
166 |          arguments.push(self.generate_expression(ir, argument));
167 |       }
168 |       let call = self.builder.build_call(function, &arguments, "calltmp");
169 | 
170 |       call.try_as_basic_value().either(|value| value, |_void| self.generate_unit_literal().into())
171 |    }
172 | 
173 |    /// Generates code for a `return` expression.
174 |    pub(crate) fn generate_return(&mut self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> {
175 |       // Finish off the current basic block with a `ret` instruction.
176 |       let return_value = ir.ast.first_handle(node);
177 |       let result_value = if ir.ast.kind(return_value) != NodeKind::Empty {
178 |          let value = self.generate_expression(ir, return_value);
179 |          self.builder.build_return(Some(&value));
180 |          let result_type = ir.ast.type_id(node);
181 |          self.get_type(&ir.types, result_type)
182 |       } else {
183 |          self.builder.build_return(None);
184 |          self.unit_type.into()
185 |       };
186 |       // Then, begin a new basic block such that if there's any unreachable code past this block,
187 |       // its terminator will be contained in this new block.
188 |       let unreachable_block = self.context.append_basic_block(self.function.value, "unreachable");
189 |       self.builder.position_at_end(unreachable_block);
190 | 
191 |       result_value.const_zero()
192 |    }
193 | }
194 | 


--------------------------------------------------------------------------------
/tsuki-backend-llvm/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! The root of the LLVM backend. This implements high-level functionality - compiling code into
  2 | //! object files, and linking those object files into executables.
  3 | 
  4 | mod codegen;
  5 | mod control_flow;
  6 | mod expressions;
  7 | mod functions;
  8 | mod libc;
  9 | mod types;
 10 | mod variables;
 11 | 
 12 | use std::fmt::{self, Display, Formatter};
 13 | use std::path::{Path, PathBuf};
 14 | use std::process::{Command, Output};
 15 | use std::str::FromStr;
 16 | 
 17 | use inkwell::context::Context;
 18 | use inkwell::passes::PassManager;
 19 | use inkwell::targets::{
 20 |    CodeModel, FileType, InitializationConfig, RelocMode, Target, TargetMachine, TargetTriple,
 21 | };
 22 | use inkwell::values::BasicValue;
 23 | use thiserror::Error;
 24 | use tsuki_frontend::common::{self, Error, ErrorKind, Errors, SourceFile, Span};
 25 | use tsuki_frontend::{backend, AnalyzeOptions};
 26 | 
 27 | use codegen::CodeGen;
 28 | 
 29 | use crate::functions::Function;
 30 | 
 31 | /// Debug options for the backend.
 32 | pub struct DebugOptions {
 33 |    pub dump_ir: bool,
 34 | }
 35 | 
 36 | impl Default for DebugOptions {
 37 |    fn default() -> Self {
 38 |       Self { dump_ir: false }
 39 |    }
 40 | }
 41 | 
 42 | /// Struct representing the LLVM compilation backend and options passed to it.
 43 | pub struct LlvmBackend {
 44 |    cache_dir: PathBuf,
 45 |    std_path: PathBuf,
 46 |    executable_name: String,
 47 |    target_triple: TargetTriple,
 48 |    optimization_level: OptimizationLevel,
 49 |    frontend_debug_options: tsuki_frontend::DebugOptions,
 50 |    backend_debug_options: DebugOptions,
 51 | }
 52 | 
 53 | /// Options for creating an LLVM backend instance.
 54 | pub struct LlvmBackendConfig<'c, 'e, 't> {
 55 |    pub cache_dir: &'c Path,
 56 |    pub std_path: &'c Path,
 57 |    pub package_name: &'e str,
 58 |    pub target_triple: Option<&'t str>,
 59 |    pub optimization_level: OptimizationLevel,
 60 |    pub frontend_debug_options: tsuki_frontend::DebugOptions,
 61 |    pub backend_debug_options: DebugOptions,
 62 | }
 63 | 
 64 | impl LlvmBackend {
 65 |    /// Creates a new instance of the LLVM compilation backend, with the provided options.
 66 |    pub fn new(config: LlvmBackendConfig) -> Self {
 67 |       Self {
 68 |          cache_dir: config.cache_dir.to_owned(),
 69 |          std_path: config.std_path.to_owned(),
 70 |          executable_name: config.package_name.to_owned(),
 71 |          target_triple: match config.target_triple {
 72 |             Some(triple) => TargetTriple::create(triple),
 73 |             None => TargetMachine::get_default_triple(),
 74 |          },
 75 |          optimization_level: config.optimization_level,
 76 |          frontend_debug_options: config.frontend_debug_options,
 77 |          backend_debug_options: config.backend_debug_options,
 78 |       }
 79 |    }
 80 | 
 81 |    fn to_errors<T, E>(r: Result<T, E>) -> Result<T, Errors>
 82 |    where
 83 |       E: ToString,
 84 |    {
 85 |       r.map_err(|e| {
 86 |          common::single_error(Error {
 87 |             filename: "internal error".into(),
 88 |             span: Span::default(),
 89 |             kind: ErrorKind::CodeGen(e.to_string()),
 90 |          })
 91 |       })
 92 |    }
 93 | }
 94 | 
 95 | impl backend::Backend for LlvmBackend {
 96 |    type Target = ObjectFile;
 97 | 
 98 |    /// Compiles the given source file to an executable.
 99 |    fn compile(&self, root: SourceFile) -> Result<Self::Target, Errors> {
100 |       let ir = tsuki_frontend::analyze(
101 |          AnalyzeOptions {
102 |             file: &root,
103 |             std_path: self.std_path.clone(),
104 |          },
105 |          &self.frontend_debug_options,
106 |       )?;
107 |       let context = Context::create();
108 |       let module = context.create_module(&root.module_name);
109 | 
110 |       // Set up the pass manager.
111 |       let pm = PassManager::create(&module);
112 |       pm.add_verifier_pass();
113 | 
114 |       if self.optimization_level >= OptimizationLevel::Essential {
115 |          // Constant folding passes run twice: once at startup, and once after CFG simplicifation
116 |          // and mem2reg, such that constant folding is also performed after simplifying the IR to
117 |          // use more SSA and less allocas.
118 |          pm.add_instruction_combining_pass();
119 |          pm.add_reassociate_pass();
120 | 
121 |          // TODO: Figure out what GVN (global value numbering) is. The LLVM docs for passes don't
122 |          // really say much about it.
123 |          // (https://llvm.org/docs/Passes.html)
124 |          pm.add_gvn_pass();
125 | 
126 |          // These passes simplify the control flow graph and turn memory operations into SSA form
127 |          // wherever possible.
128 |          pm.add_cfg_simplification_pass();
129 |          pm.add_basic_alias_analysis_pass();
130 |          pm.add_promote_memory_to_register_pass();
131 | 
132 |          // As said before, constant folding is performed twice.
133 |          pm.add_instruction_combining_pass();
134 |          pm.add_reassociate_pass();
135 |       }
136 | 
137 |       pm.initialize();
138 | 
139 |       // Construct all the types.
140 |       let i32_type = context.i32_type();
141 |       let main_fun_type = i32_type.fn_type(&[], false);
142 | 
143 |       // Create the function and the codegen state.
144 |       let main_fun = Function::add_to_module(&context, &module, "main", main_fun_type);
145 |       let mut state = CodeGen::new(&root, &context, &pm, &module, main_fun);
146 |       state.add_functions(&ir);
147 | 
148 |       // Compile the module's code.
149 |       state.generate_statement(&ir, ir.root_node);
150 | 
151 |       // Return the zero exit code.
152 |       state.finish_function(Some(i32_type.const_zero().as_basic_value_enum()));
153 | 
154 |       if self.backend_debug_options.dump_ir {
155 |          eprintln!("## LLVM IR");
156 |          eprintln!("{:?}", state);
157 |          eprintln!();
158 |       }
159 | 
160 |       // Cross-compilation support, anyone?
161 |       // Right now we initialize the native target only.
162 |       Self::to_errors(Target::initialize_native(&InitializationConfig {
163 |          // Honestly, I'm not sure we need _all_ the features.
164 |          // TODO: Check which ones can be disabled.
165 |          asm_parser: true,
166 |          asm_printer: true,
167 |          base: true,
168 |          disassembler: true,
169 |          info: true,
170 |          machine_code: true,
171 |       }))?;
172 | 
173 |       Self::to_errors(state.module.verify())?;
174 | 
175 |       // Set up the target machine. We won't be enabling any special features here for now.
176 |       let target = Self::to_errors(Target::from_triple(&self.target_triple))?;
177 |       let machine = Self::to_errors(
178 |          target
179 |             .create_target_machine(
180 |                &self.target_triple,
181 |                "generic",
182 |                "",
183 |                inkwell::OptimizationLevel::Default,
184 |                RelocMode::Default,
185 |                CodeModel::Default,
186 |             )
187 |             .ok_or("target triple is not supported"),
188 |       )?;
189 |       state.module.set_data_layout(&machine.get_target_data().get_data_layout());
190 |       state.module.set_triple(&self.target_triple);
191 | 
192 |       // Create all the needed directories.
193 |       let object_dir = self.cache_dir.join("object");
194 |       Self::to_errors(std::fs::create_dir_all(&self.cache_dir))?;
195 |       Self::to_errors(std::fs::create_dir_all(&object_dir))?;
196 | 
197 |       // Do some path manipulation to figure out where the object file should be placed.
198 |       let object_name = format!("{}.o", &self.executable_name);
199 |       let object_path = object_dir.join(&object_name);
200 |       // Delete the old object file so that LLVM isn't going to complain when writing the new one.
201 |       // The result is ignored because we don't care if the file exists or not.
202 |       // If we lack sufficient permissions, then LLVM will point that out anyways.
203 |       let _ = std::fs::remove_file(&object_path);
204 | 
205 |       // Compile the object file.
206 |       Self::to_errors(machine.write_to_file(&state.module, FileType::Object, &object_path))?;
207 | 
208 |       Ok(ObjectFile { path: object_path })
209 |    }
210 | }
211 | 
212 | /// Specifies the amount of optimizations to apply when compiling.
213 | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
214 | pub enum OptimizationLevel {
215 |    /// Perform no optimizations at all.
216 |    None,
217 |    Essential,
218 |    Release,
219 | }
220 | 
221 | /// Returned when an invalid optimization level is used.
222 | #[derive(Clone, Copy, Debug)]
223 | pub struct InvalidOptimizationLevel;
224 | 
225 | impl Display for InvalidOptimizationLevel {
226 |    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
227 |       write!(f, "invalid optimization level")
228 |    }
229 | }
230 | 
231 | impl FromStr for OptimizationLevel {
232 |    type Err = InvalidOptimizationLevel;
233 | 
234 |    /// Converts a string to an optimization level.
235 |    ///
236 |    /// Valid values include:
237 |    /// - `"none"` → `None`
238 |    /// - `"essential"` → `Essential`
239 |    /// - `"release"` → `Release`
240 |    ///
241 |    /// Any other levels result in an `InvalidOptimizationLevel` error.
242 |    fn from_str(s: &str) -> Result<Self, Self::Err> {
243 |       match s {
244 |          "none" => Ok(Self::None),
245 |          "essential" => Ok(Self::Essential),
246 |          "release" => Ok(Self::Release),
247 |          _ => Err(InvalidOptimizationLevel),
248 |       }
249 |    }
250 | }
251 | 
252 | /// Struct representing an object file built using LLVM.
253 | pub struct ObjectFile {
254 |    path: PathBuf,
255 | }
256 | 
257 | /// Struct representing an executable file built from linked object files.
258 | pub struct ExecutableFile {
259 |    path: PathBuf,
260 | }
261 | 
262 | #[derive(Debug, Error)]
263 | pub enum LinkError {
264 |    #[error("no linker found; check the $TSUKI_LD and $LD environment variables.")]
265 |    NoLinker,
266 |    #[error("the linker exited with an error (code {0}):\n{1}")]
267 |    Failure(i32, String),
268 |    #[error("I/O error: {0}")]
269 |    Io(#[from] std::io::Error),
270 | }
271 | 
272 | impl ExecutableFile {
273 |    /// Links the provided object files into an executable.
274 |    ///
275 |    /// This launches the standard compiler pointed to by the `$TSUKI_CC` or `$CC` environment
276 |    /// variables (in that order), or the `cc` executable found in `$PATH`.
277 |    pub fn link(backend: LlvmBackend, objects: &[ObjectFile]) -> Result<Self, LinkError> {
278 |       use std::env;
279 | 
280 |       let linker = env::var_os("TSUKI_CC")
281 |          .or_else(|| env::var_os("CC"))
282 |          .or_else(|| Some("cc".into()))
283 |          .ok_or(LinkError::NoLinker)?;
284 |       let output_path = backend.cache_dir.join(&backend.executable_name);
285 | 
286 |       let mut cmd = Command::new(linker);
287 |       // Pass the output path to the linker.
288 |       cmd.arg("-o");
289 |       cmd.arg(&output_path);
290 |       for object in objects {
291 |          cmd.arg(&object.path);
292 |       }
293 | 
294 |       let output = cmd.output()?;
295 |       if let Some(exit_code) = output.status.code() {
296 |          if exit_code != 0 {
297 |             let errors = std::str::from_utf8(&output.stderr).ok().unwrap_or("<invalid UTF-8>");
298 |             return Err(LinkError::Failure(exit_code, errors.into()));
299 |          }
300 |       }
301 |       Ok(Self { path: output_path })
302 |    }
303 | 
304 |    /// Runs the executable, passing the given arguments to it. The output contains captured
305 |    /// stdout and stderr, as well as the exit code.
306 |    pub fn run(&self, args: &[&str]) -> Result<Output, std::io::Error> {
307 |       Command::new(&self.path).args(args).output()
308 |    }
309 | }
310 | 


--------------------------------------------------------------------------------
/tsuki-backend-llvm/src/libc.rs:
--------------------------------------------------------------------------------
 1 | use inkwell::AddressSpace;
 2 | 
 3 | use crate::codegen::CodeGen;
 4 | 
 5 | // Named constants for string values. This should simplify refactoring if they ever need to
 6 | // be changed.
 7 | pub const FUN_PRINTF: &str = "printf";
 8 | pub const GLOBAL_PRINTF_INT_FORMAT: &str = "printf_int_format";
 9 | pub const GLOBAL_PRINTF_FLOAT_FORMAT: &str = "printf_float_format";
10 | 
11 | impl<'src, 'c, 'pm> CodeGen<'src, 'c, 'pm> {
12 |    fn add_const_string(&mut self, name: &str, string: &[u8]) {
13 |       let s = self.context.const_string(string, true);
14 |       let typ = s.get_type();
15 |       let global = self.module.add_global(typ, Some(AddressSpace::Generic), name);
16 |       global.set_initializer(&s);
17 |    }
18 | 
19 |    pub(crate) fn load_libc(&mut self) {
20 |       // int printf(char *fmt, ...);
21 |       let string_type = self.context.i8_type().ptr_type(AddressSpace::Generic);
22 |       let i32_type = self.context.i32_type();
23 |       let printf_fn_type = i32_type.fn_type(&[string_type.into()], true);
24 |       self.module.add_function(FUN_PRINTF, printf_fn_type, None);
25 | 
26 |       // printf format strings
27 |       self.add_const_string(GLOBAL_PRINTF_INT_FORMAT, b"%i\n");
28 |       self.add_const_string(GLOBAL_PRINTF_FLOAT_FORMAT, b"%g\n");
29 |    }
30 | }
31 | 


--------------------------------------------------------------------------------
/tsuki-backend-llvm/src/types.rs:
--------------------------------------------------------------------------------
 1 | //! Conversion from tsuki types into LLVM types.
 2 | 
 3 | use inkwell::types::{BasicType, BasicTypeEnum, FloatType, IntType};
 4 | use inkwell::values::StructValue;
 5 | use tsuki_frontend::types::{FloatSize, IntegerSize, TypeId, TypeKind, Types};
 6 | 
 7 | use crate::codegen::CodeGen;
 8 | 
 9 | impl<'src, 'c, 'pm> CodeGen<'src, 'c, 'pm> {
10 |    /// Generates code for a unit literal.
11 |    pub(crate) fn generate_unit_literal(&self) -> StructValue<'c> {
12 |       self.unit_type.const_zero()
13 |    }
14 | 
15 |    /// Returns the integer type for the provided type, or panics if the type is not an integer type.
16 |    fn get_integer_type(&self, types: &Types, typ: TypeId) -> IntType<'c> {
17 |       if let TypeKind::Integer(size) = types.kind(typ) {
18 |          match size {
19 |             IntegerSize::U8 | IntegerSize::S8 => self.context.i8_type(),
20 |             IntegerSize::U16 | IntegerSize::S16 => self.context.i16_type(),
21 |             IntegerSize::U32 | IntegerSize::S32 => self.context.i32_type(),
22 |             IntegerSize::U64 | IntegerSize::S64 => self.context.i64_type(),
23 |          }
24 |       } else {
25 |          panic!("type is not an integer type")
26 |       }
27 |    }
28 | 
29 |    /// Returns the float type for the provided type, or panics if the type is not a float type.
30 |    fn get_float_type(&self, types: &Types, typ: TypeId) -> FloatType<'c> {
31 |       if let TypeKind::Float(size) = types.kind(typ) {
32 |          match size {
33 |             FloatSize::S32 => self.context.f32_type(),
34 |             FloatSize::S64 => self.context.f64_type(),
35 |          }
36 |       } else {
37 |          panic!("type is not a float type")
38 |       }
39 |    }
40 | 
41 |    pub(crate) fn get_type(&self, types: &Types, typ: TypeId) -> BasicTypeEnum<'c> {
42 |       match types.kind(typ) {
43 |          TypeKind::Missing => panic!("get_type called with missingtype"),
44 |          TypeKind::Error => panic!("get_type called with errortype"),
45 |          TypeKind::Statement => panic!("get_type called with statement type"),
46 |          TypeKind::Declaration(id) => panic!("get_type called with declaration({:?}) type", id),
47 |          TypeKind::Type => panic!("get_type called with type type"),
48 |          TypeKind::Unit | TypeKind::NoReturn => self.unit_type.as_basic_type_enum(),
49 |          TypeKind::Bool => self.context.bool_type().as_basic_type_enum(),
50 |          TypeKind::Integer(_) => self.get_integer_type(types, typ).as_basic_type_enum(),
51 |          TypeKind::Float(_) => self.get_float_type(types, typ).as_basic_type_enum(),
52 |          TypeKind::Char => todo!(),
53 |          TypeKind::Alias(alias) => self.get_type(types, *alias),
54 |       }
55 |    }
56 | }
57 | 


--------------------------------------------------------------------------------
/tsuki-backend-llvm/src/variables.rs:
--------------------------------------------------------------------------------
  1 | /// Code generation for variable declarations.
  2 | use inkwell::values::{BasicValueEnum, PointerValue};
  3 | use tsuki_frontend::ast::{NodeId, NodeKind};
  4 | use tsuki_frontend::scope::SymbolId;
  5 | use tsuki_frontend::sem::Ir;
  6 | 
  7 | use crate::CodeGen;
  8 | 
  9 | /// Storage for variable values. This maps symbol IDs to `Value`s from LLVM.
 10 | pub(crate) struct Variables<'c> {
 11 |    variables: Vec<Option<PointerValue<'c>>>,
 12 | }
 13 | 
 14 | impl<'c> Variables<'c> {
 15 |    /// Creates and initializes a new variable value storage.
 16 |    pub(crate) fn new() -> Self {
 17 |       Self {
 18 |          variables: Vec::new(),
 19 |       }
 20 |    }
 21 | 
 22 |    /// Inserts a new value under the given symbol ID.
 23 |    pub(crate) fn insert(&mut self, symbol: SymbolId, value: PointerValue<'c>) {
 24 |       if self.variables.len() <= symbol.id() {
 25 |          self.variables.resize(symbol.id() + 1, None);
 26 |       }
 27 |       self.variables[symbol.id()] = Some(value);
 28 |    }
 29 | 
 30 |    /// Retrieves the value under the given symbol ID.
 31 |    pub(crate) fn get(&self, symbol: SymbolId) -> Option<PointerValue<'c>> {
 32 |       if symbol.id() >= self.variables.len() {
 33 |          None
 34 |       } else {
 35 |          self.variables[symbol.id()]
 36 |       }
 37 |    }
 38 | }
 39 | 
 40 | impl<'src, 'c, 'pm> CodeGen<'src, 'c, 'pm> {
 41 |    /// Generates code for a variable reference.
 42 |    pub(crate) fn generate_variable_reference(&self, ir: &Ir, node: NodeId) -> BasicValueEnum<'c> {
 43 |       let symbol_node = ir.ast.first_handle(node);
 44 |       let symbol = ir.ast.symbol_id(symbol_node);
 45 | 
 46 |       let alloca = self.variables.get(symbol).expect("reference to undeclared variable in IR");
 47 |       self.builder.build_load(alloca, ir.symbols.name(symbol))
 48 |    }
 49 | 
 50 |    /// Generates code for variable declarations.
 51 |    pub(crate) fn generate_variable_declaration(&mut self, ir: &Ir, node: NodeId) {
 52 |       let symbol_node = ir.ast.first_handle(node);
 53 |       let symbol = ir.ast.symbol_id(symbol_node);
 54 | 
 55 |       let value_node = ir.ast.second_handle(node);
 56 |       let value = self.generate_expression(ir, value_node);
 57 | 
 58 |       // A variable declaration always performs an alloca for simplicity's sake.
 59 |       // These allocas, loads, and stores, are optimized by mem2reg later.
 60 |       // To make the allocas optimizable by mem2reg, they need to be placed in the entry block of
 61 |       // the function, and the most obvious place to put the allocas is the top, because then they
 62 |       // are available to every other instruction, including loads and stores.
 63 |       let builder = self.function.create_entry_block_builder(self.context);
 64 |       let alloca = builder.build_alloca(value.get_type(), ir.symbols.name(symbol));
 65 |       self.builder.build_store(alloca, value);
 66 | 
 67 |       self.variables.insert(symbol, alloca);
 68 |    }
 69 | 
 70 |    /// Generates code for `AssignDiscard`.
 71 |    pub(crate) fn generate_discarding_assignment(&mut self, ir: &Ir, node: NodeId) {
 72 |       let value_node = ir.ast.first_handle(node);
 73 |       let _ = self.generate_expression(ir, value_node);
 74 |    }
 75 | 
 76 |    /// Generates code for assignments to variables.
 77 |    pub(crate) fn generate_assignment(
 78 |       &mut self,
 79 |       ir: &Ir,
 80 |       node: NodeId,
 81 |    ) -> Option<BasicValueEnum<'c>> {
 82 |       // When the assignment is not an expression, we do a little optimization where we don't
 83 |       // generate the load
 84 |       let result_type = ir.ast.type_id(node);
 85 |       let is_expression = !ir.types.kind(result_type).is_statement();
 86 | 
 87 |       let target_node = ir.ast.first_handle(node);
 88 |       let target = match ir.ast.kind(target_node) {
 89 |          NodeKind::Variable => {
 90 |             let symbol_node = ir.ast.first_handle(target_node);
 91 |             let symbol = ir.ast.symbol_id(symbol_node);
 92 |             self.variables.get(symbol).expect("reference to undeclared variable in IR")
 93 |          }
 94 |          _ => unreachable!(),
 95 |       };
 96 |       let value_node = ir.ast.second_handle(node);
 97 |       let value = self.generate_expression(ir, value_node);
 98 | 
 99 |       let result = if is_expression {
100 |          let old_value = self.builder.build_load(target, "old");
101 |          Some(old_value)
102 |       } else {
103 |          None
104 |       };
105 |       // Note that this does not care about mutability; that part is handled by SemTypes in the
106 |       // frontend.
107 |       self.builder.build_store(target, value);
108 |       result
109 |    }
110 | }
111 | 


--------------------------------------------------------------------------------
/tsuki-frontend/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "tsuki-frontend"
 3 | version = "0.1.0"
 4 | edition = "2018"
 5 | 
 6 | [dependencies]
 7 | thiserror = "1.0.26"
 8 | phf = { version = "0.9.0", features = ["macros"] }
 9 | smallvec = "1.6.1"
10 | 


--------------------------------------------------------------------------------
/tsuki-frontend/src/astdump.rs:
--------------------------------------------------------------------------------
  1 | //! AST pretty printer.
  2 | 
  3 | use crate::ast::*;
  4 | use crate::common::SourceFile;
  5 | use crate::types::Types;
  6 | 
  7 | #[derive(Debug)]
  8 | enum Prefix {
  9 |    L,
 10 |    R,
 11 |    Fun,
 12 |    Cond,
 13 |    X,
 14 |    Name,
 15 |    Params,
 16 |    Generic,
 17 |    Formal,
 18 |    Return,
 19 |    Type,
 20 |    Constraint,
 21 | }
 22 | 
 23 | fn print_indentation(depth: usize) {
 24 |    for _ in 0..depth * 2 {
 25 |       eprint!(" ");
 26 |    }
 27 | }
 28 | 
 29 | fn print_source_range(file: &SourceFile, start: usize, end: usize) {
 30 |    eprint!("{}", &file.source[start..end]);
 31 | }
 32 | 
 33 | fn print_string_range(file: &SourceFile, start: usize, end: usize) {
 34 |    eprint!("{}", &file.source[start..end]);
 35 | }
 36 | 
 37 | struct State<'s, 'a, 't> {
 38 |    file: &'s SourceFile,
 39 |    ast: &'a Ast,
 40 |    types: Option<&'t Types>,
 41 | }
 42 | 
 43 | fn dump_node(s: &State, node: NodeId, depth: usize, prefix: Option<Prefix>) {
 44 |    let State { file, ast, types } = s;
 45 |    print_indentation(depth);
 46 | 
 47 |    let kind = ast.kind(node);
 48 |    let extra = ast.extra(node);
 49 | 
 50 |    // Optional prefix.
 51 |    if let Some(prefix) = prefix {
 52 |       eprint!("{:?}: ", prefix);
 53 |    }
 54 | 
 55 |    // Node header: the name, and optionally some source code.
 56 |    eprint!("{:?} ", kind);
 57 |    match kind {
 58 |       NodeKind::Integer | NodeKind::Float | NodeKind::Atom | NodeKind::Identifier => {
 59 |          let (start, end) = (ast.first(node), ast.second(node));
 60 |          print_source_range(file, start, end);
 61 |       }
 62 |       NodeKind::String | NodeKind::DocComment => {
 63 |          let (start, end) = (ast.first(node), ast.second(node));
 64 |          print_string_range(file, start, end);
 65 |       }
 66 |       NodeKind::Character => eprint!("{:?}", char::from_u32(ast.first(node) as u32)),
 67 |       NodeKind::Symbol => eprint!("{:?}", ast.first(node)),
 68 |       _ => (),
 69 |    }
 70 |    match extra {
 71 |       number @ (NodeData::Uint8(..)
 72 |       | NodeData::Uint16(..)
 73 |       | NodeData::Uint32(..)
 74 |       | NodeData::Uint64(..)
 75 |       | NodeData::Int8(..)
 76 |       | NodeData::Int16(..)
 77 |       | NodeData::Int32(..)
 78 |       | NodeData::Int64(..)
 79 |       | NodeData::Float32(..)
 80 |       | NodeData::Float64(..)) => eprint!("{:?}", number),
 81 |       _ => (),
 82 |    }
 83 |    if let Some(types) = types {
 84 |       let typ = ast.type_id(node);
 85 |       eprint!(" : {}", types.name(typ));
 86 |    }
 87 |    if let Some(scope) = ast.scope(node) {
 88 |       eprint!(" +{:?}", scope);
 89 |    }
 90 |    eprintln!();
 91 | 
 92 |    match kind {
 93 |       | NodeKind::Dot
 94 |       | NodeKind::Plus
 95 |       | NodeKind::Minus
 96 |       | NodeKind::Mul
 97 |       | NodeKind::Div
 98 |       | NodeKind::Pow
 99 |       | NodeKind::Concat
100 |       | NodeKind::Lshift
101 |       | NodeKind::Rshift
102 |       | NodeKind::BitAnd
103 |       | NodeKind::BitOr
104 |       | NodeKind::BitXor
105 |       | NodeKind::Equal
106 |       | NodeKind::NotEqual
107 |       | NodeKind::Less
108 |       | NodeKind::Greater
109 |       | NodeKind::LessEqual
110 |       | NodeKind::GreaterEqual
111 |       | NodeKind::UpTo
112 |       | NodeKind::UpToInclusive
113 |       | NodeKind::Assign
114 |       | NodeKind::PlusAssign
115 |       | NodeKind::MinusAssign
116 |       | NodeKind::MulAssign
117 |       | NodeKind::DivAssign
118 |       | NodeKind::Push
119 |       | NodeKind::Index
120 |       | NodeKind::IndexAlt
121 |       | NodeKind::Val
122 |       | NodeKind::Var
123 |       | NodeKind::Type => {
124 |          let (left, right) = (ast.first_handle(node), ast.second_handle(node));
125 |          dump_node(s, left, depth + 1, Some(Prefix::L));
126 |          dump_node(s, right, depth + 1, Some(Prefix::R));
127 |       }
128 |       NodeKind::Fun => {
129 |          let (left, right) = (ast.first_handle(node), ast.second_handle(node));
130 |          dump_node(s, left, depth + 1, Some(Prefix::Name));
131 |          dump_node(s, right, depth + 1, Some(Prefix::Params));
132 |       }
133 |       NodeKind::Parameters => {
134 |          let (left, right) = (ast.first_handle(node), ast.second_handle(node));
135 |          dump_node(s, left, depth + 1, Some(Prefix::Generic));
136 |          dump_node(s, right, depth + 1, Some(Prefix::Formal));
137 |       }
138 |       NodeKind::ConstrainedType => {
139 |          let (left, right) = (ast.first_handle(node), ast.second_handle(node));
140 |          dump_node(s, left, depth + 1, Some(Prefix::Type));
141 |          dump_node(s, right, depth + 1, Some(Prefix::Constraint));
142 |       }
143 |       | NodeKind::Check
144 |       | NodeKind::Unwrap
145 |       | NodeKind::Deref
146 |       | NodeKind::Call
147 |       | NodeKind::CallFunction
148 |       | NodeKind::Not
149 |       | NodeKind::Neg
150 |       | NodeKind::BitNot
151 |       | NodeKind::Member
152 |       | NodeKind::Ref
153 |       | NodeKind::IfBranch
154 |       | NodeKind::While
155 |       | NodeKind::Return
156 |       | NodeKind::FormalParameters
157 |       | NodeKind::NamedParameters
158 |       | NodeKind::Variable
159 |       | NodeKind::Pragmas
160 |       | NodeKind::TypeName
161 |       | NodeKind::Pub
162 |       | NodeKind::WidenInt
163 |       | NodeKind::WidenUint
164 |       | NodeKind::WidenFloat => {
165 |          let left = ast.first_handle(node);
166 |          dump_node(
167 |             s,
168 |             left,
169 |             depth + 1,
170 |             Some(match kind {
171 |                NodeKind::Check | NodeKind::Unwrap | NodeKind::Deref | NodeKind::Pragmas => {
172 |                   Prefix::L
173 |                }
174 |                | NodeKind::Not
175 |                | NodeKind::Neg
176 |                | NodeKind::BitNot
177 |                | NodeKind::Member
178 |                | NodeKind::Ref => Prefix::R,
179 |                NodeKind::Call | NodeKind::CallFunction => Prefix::Fun,
180 |                NodeKind::IfBranch | NodeKind::While => Prefix::Cond,
181 |                NodeKind::FormalParameters => Prefix::Return,
182 |                NodeKind::NamedParameters => Prefix::Type,
183 |                NodeKind::TypeName => Prefix::Name,
184 |                | NodeKind::Variable
185 |                | NodeKind::Return
186 |                | NodeKind::Pub
187 |                | NodeKind::WidenInt
188 |                | NodeKind::WidenUint
189 |                | NodeKind::WidenFloat => Prefix::X,
190 |                _ => unreachable!(),
191 |             }),
192 |          );
193 |       }
194 |       _ => (),
195 |    }
196 | 
197 |    match extra {
198 |       NodeData::None => (),
199 |       NodeData::NodeList(list) => {
200 |          for &node in list {
201 |             dump_node(s, node, depth + 1, None);
202 |          }
203 |       }
204 |       _ => (),
205 |    }
206 | }
207 | 
208 | /// Prints the AST to stdout, starting from the given root node.
209 | pub fn dump_ast(file: &SourceFile, ast: &Ast, types: Option<&Types>, root_node: NodeId) {
210 |    dump_node(&State { file, ast, types }, root_node, 0, None);
211 | }
212 | 


--------------------------------------------------------------------------------
/tsuki-frontend/src/backend.rs:
--------------------------------------------------------------------------------
 1 | //! Module for common backend functionality.
 2 | //! This can be used by CLIs and other programs that need to compile tsuki source code.
 3 | 
 4 | use crate::common::{Errors, SourceFile};
 5 | 
 6 | /// Trait implemented by all backends that can compile and run tsuki source code.
 7 | pub trait Backend {
 8 |    /// The kind of code emitted by the backend. This can be an object file, an executable file,
 9 |    /// or even a JIT-compiled function; you name it.
10 |    type Target;
11 | 
12 |    /// Compiles a source file to a target.
13 |    ///
14 |    /// # Errors
15 |    /// `Err` should return an error of kind `CodeGen`, together with a diagnostic message.
16 |    /// These errors should only be thrown in dire cases where something went wrong in earlier stages
17 |    /// of compilation, and the backend can't make sense of what the frontend produced.
18 |    fn compile(&self, root: SourceFile) -> Result<Self::Target, Errors>;
19 | }
20 | 


--------------------------------------------------------------------------------
/tsuki-frontend/src/common.rs:
--------------------------------------------------------------------------------
  1 | //! Common functionality.
  2 | 
  3 | use std::fmt;
  4 | use std::path::PathBuf;
  5 | 
  6 | use smallvec::SmallVec;
  7 | 
  8 | use crate::ast::NodeKind;
  9 | use crate::lexer::{IndentLevel, Token, TokenKind};
 10 | 
 11 | /// Represents a source file.
 12 | pub struct SourceFile {
 13 |    /// The package the source file resides in.
 14 |    pub package: String,
 15 |    /// The root of the package.
 16 |    pub package_root: PathBuf,
 17 |    /// The path to the source file, relative to the package's `src` folder.
 18 |    pub path: PathBuf,
 19 |    /// The module name; that is, the package name and file path concatenated together with a colon,
 20 |    /// with the extension stripped, and all path separators replaced with colons `:`.
 21 |    pub module_name: String,
 22 |    /// The source code itself.
 23 |    pub source: String,
 24 | }
 25 | 
 26 | impl SourceFile {
 27 |    pub fn new(
 28 |       package: String,
 29 |       package_root: PathBuf,
 30 |       path: PathBuf,
 31 |       source: String,
 32 |    ) -> Result<Self, Error> {
 33 |       let module_name = {
 34 |          let package_root = package_root
 35 |             .canonicalize()
 36 |             .map_err(|err| Error::spanless(path.clone(), ErrorKind::Io(err)))?;
 37 |          let path = path
 38 |             // Normalize the path into something that makes sense.
 39 |             .canonicalize()
 40 |             .map_err(|err| Error::spanless(path.clone(), ErrorKind::Io(err)))?
 41 |             // Remove the package_root prefix.
 42 |             .strip_prefix(&package_root)
 43 |             .map_err(|_| Error::spanless(path.clone(), ErrorKind::InvalidPackageRoot))?
 44 |             // Remove the .tsu extension.
 45 |             .with_extension("")
 46 |             // Convert it to a string.
 47 |             .to_str()
 48 |             .ok_or_else(|| Error::spanless(path.clone(), ErrorKind::InvalidUtf8InPath))?
 49 |             // Replace path separators with dots.
 50 |             .replace(std::path::MAIN_SEPARATOR, ".");
 51 |          // And pray to God it's correct.
 52 |          format!("{}:{}", package, path)
 53 |       };
 54 |       Ok(Self {
 55 |          package,
 56 |          package_root,
 57 |          path,
 58 |          module_name,
 59 |          source,
 60 |       })
 61 |    }
 62 | }
 63 | 
 64 | /// Represents a span of text in a source file.
 65 | #[derive(Clone, Debug)]
 66 | pub struct Span {
 67 |    pub byte_start: usize,
 68 |    pub line_start: usize,
 69 |    pub column_start: usize,
 70 |    pub byte_end: usize,
 71 |    pub line_end: usize,
 72 |    pub column_end: usize,
 73 | }
 74 | 
 75 | impl Span {
 76 |    pub const FIRST_BYTE: usize = 0;
 77 |    pub const FIRST_LINE: usize = 1;
 78 |    pub const FIRST_COLUMN: usize = 1;
 79 | 
 80 |    pub const INVALID_LINE: usize = 0;
 81 |    pub const INVALID_COLUMN: usize = 0;
 82 | 
 83 |    pub const INVALID: Self = Self {
 84 |       byte_start: 0,
 85 |       line_start: Self::INVALID_LINE,
 86 |       column_start: Self::INVALID_COLUMN,
 87 |       byte_end: 0,
 88 |       line_end: Self::INVALID_LINE,
 89 |       column_end: Self::INVALID_COLUMN,
 90 |    };
 91 | 
 92 |    /// Creates and initializes a new span starting at the first possible position in a file.
 93 |    pub fn new() -> Self {
 94 |       // The first possible position is 1:1..1:1.
 95 |       Self {
 96 |          byte_start: Self::FIRST_BYTE,
 97 |          line_start: Self::FIRST_LINE,
 98 |          column_start: Self::FIRST_COLUMN,
 99 |          byte_end: Self::FIRST_BYTE,
100 |          line_end: Self::FIRST_LINE,
101 |          column_end: Self::FIRST_COLUMN,
102 |       }
103 |    }
104 | 
105 |    /// Sets the start of the span to its current end.
106 |    pub fn start_over(&mut self) {
107 |       self.line_start = self.line_end;
108 |       self.column_start = self.column_end;
109 |    }
110 | 
111 |    /// Increments the ending column by `n`.
112 |    pub fn advance_column_by(&mut self, n: usize) {
113 |       self.byte_end += n;
114 |       self.column_end += n;
115 |    }
116 | 
117 |    /// Increments the ending line and resets the ending column to the first column.
118 |    pub fn advance_line(&mut self) {
119 |       self.byte_end += 1;
120 |       self.line_end += 1;
121 |       self.column_end = Self::FIRST_COLUMN;
122 |    }
123 | 
124 |    /// Returns whether the span is an _invalid_ span, that is, its positions are `INVALID_LINE` and
125 |    /// `INVALID_COLUMN`.
126 |    pub fn is_invalid(&self) -> bool {
127 |       self.line_start == Self::INVALID_LINE
128 |          || self.column_start == Self::INVALID_COLUMN
129 |          || self.line_end == Self::INVALID_LINE
130 |          || self.column_end == Self::INVALID_COLUMN
131 |    }
132 | 
133 |    /// Joins two spans into one. The span `a` must be placed earlier in the text than `b`.
134 |    pub fn join(a: &Span, b: &Span) -> Span {
135 |       // We want to find the minimal and maximal lines and columns. Note that `a` is always at an
136 |       // earlier position than `b`.
137 |       // There's probably a simpler way of doing this.
138 | 
139 |       let byte_start = a.byte_start.min(b.byte_start);
140 |       let byte_end = a.byte_end.max(b.byte_end);
141 | 
142 |       // In the first check, we use <=, because if the starting lines are equal, we want to pick the
143 |       // column number from `a`.
144 |       let (line_start, column_start) = if a.line_start <= b.line_start {
145 |          (a.line_start, a.column_start)
146 |       } else {
147 |          (b.line_start, b.column_start)
148 |       };
149 | 
150 |       // In the second check, we use <, because if the starting lines are equal, we want to pick the
151 |       // column number from `b`.
152 |       let (line_end, column_end) = if a.line_end < b.line_end {
153 |          (a.line_end, a.column_end)
154 |       } else {
155 |          (b.line_end, b.column_end)
156 |       };
157 | 
158 |       // Then we just join those into a final span.
159 |       Span {
160 |          byte_start,
161 |          line_start,
162 |          column_start,
163 |          byte_end,
164 |          line_end,
165 |          column_end,
166 |       }
167 |    }
168 | }
169 | 
170 | impl fmt::Display for Span {
171 |    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
172 |       if f.alternate() {
173 |          write!(
174 |             f,
175 |             "{}:{}..{}:{}",
176 |             self.line_start, self.column_start, self.line_end, self.column_end
177 |          )?;
178 |       } else {
179 |          write!(f, "{}:{}", self.line_start, self.column_start)?;
180 |       }
181 |       Ok(())
182 |    }
183 | }
184 | 
185 | impl Default for Span {
186 |    /// Initializes a span at a default, _invalid_ position. This is _not_ the same as [`Span::new`]!
187 |    fn default() -> Self {
188 |       Self {
189 |          byte_start: Self::FIRST_BYTE,
190 |          line_start: Self::INVALID_LINE,
191 |          column_start: Self::INVALID_COLUMN,
192 |          byte_end: Self::FIRST_BYTE,
193 |          line_end: Self::INVALID_LINE,
194 |          column_end: Self::INVALID_COLUMN,
195 |       }
196 |    }
197 | }
198 | 
199 | #[derive(thiserror::Error, Debug)]
200 | pub enum ErrorKind {
201 |    /*
202 |     * Non-compilation errors
203 |     */
204 |    #[error("invalid UTF-8 in path")]
205 |    InvalidUtf8InPath,
206 |    #[error("package root is not a prefix of the main file path")]
207 |    InvalidPackageRoot,
208 |    #[error("I/O error: {0}")]
209 |    Io(#[from] std::io::Error),
210 | 
211 |    /*
212 |     * Lexer errors
213 |     */
214 |    #[error("unexpected character: {0:?}")]
215 |    UnexpectedCharacter(char),
216 |    #[error("indentation too deep; something's wrong with Your program.")]
217 |    IndentTooDeep,
218 |    #[error("CRLF line endings are not supported")]
219 |    CrlfNotSupported,
220 |    #[error("invalid integer literal kind: {0:?}")]
221 |    InvalidIntegerLiteral(char),
222 |    #[error("invalid escape sequence kind: '\\{0}'")]
223 |    InvalidEscapeSequence(char),
224 |    #[error("incomplete '\\{0}' escape sequence")]
225 |    IncompleteEscapeSequence(char),
226 |    #[error("unclosed character literal")]
227 |    UnclosedCharacterLiteral,
228 |    #[error("unicode character U+{0:X} is out of range")]
229 |    UnicodeEscapeOutOfRange(u32),
230 |    #[error("unicode escape out of range of 32-bit integers")]
231 |    UnicodeEscapeOutOfRange32,
232 |    #[error("unclosed string literal")]
233 |    UnclosedStringLiteral,
234 | 
235 |    /*
236 |     * Parser errors
237 |     */
238 |    #[error("unexpected token in prefix position: '{0}'")]
239 |    UnexpectedPrefixToken(TokenKind),
240 |    #[error("unexpected token in infix position: '{0}'")]
241 |    UnexpectedInfixToken(TokenKind),
242 |    #[error("unexpected token in type position: '{0}'")]
243 |    UnexpectedTypeToken(TokenKind),
244 |    #[error("missing '{0}' to close {1}")]
245 |    MissingClosingToken(TokenKind, Token),
246 |    #[error("expected comma ',' or '{0}' to close {1}")]
247 |    ExpectedCommaOrClosingToken(TokenKind, Token),
248 |    #[error("statements must be separated by line breaks")]
249 |    MissingLineBreakAfterStatement,
250 |    #[error("module-level code must not be indented (got {0} spaces of indentation)")]
251 |    NoIndentationExpectedAtModuleLevel(IndentLevel),
252 |    #[error("indented block of level greater than {0} expected")]
253 |    IndentedBlockExpected(IndentLevel),
254 |    #[error("identifier expected, but got '{0}'")]
255 |    IdentifierExpected(TokenKind),
256 |    #[error("missing variable name (an identifier, or '_' to discard the value)")]
257 |    VarNameExpected,
258 |    #[error("expected '=' after variable name, but got '{0}'")]
259 |    VarMissingEquals(TokenKind),
260 |    #[error("expected comma ',' or colon ':' in parameter list, but got '{0}'")]
261 |    ExpectedCommaOrColon(TokenKind),
262 |    #[error("function parameter list expected, but got '{0}'")]
263 |    FunctionParametersExpected(TokenKind),
264 |    #[error("pragma argument list expected, but got '{0}'")]
265 |    PragmaArgsExpected(TokenKind),
266 |    #[error("`pub` must be followed by a declaration")]
267 |    PubMustBeFollowedByDeclaration,
268 | 
269 |    /*
270 |     * Sem'check errors
271 |     */
272 |    // SemLiterals
273 |    #[error("invalid number literal suffix: '{0}'")]
274 |    InvalidNumberLiteralSuffix(String),
275 |    #[error("integer {0} is too big to fit in 64 bits")]
276 |    IntegerTooBig(String),
277 |    #[error("integer {0} is too big to fit in {1}")]
278 |    UnsignedIntegerOverflowForType(u64, String),
279 |    #[error("integer {0} is too big (or too small) to fit in {1}")]
280 |    SignedIntegerOverflowForType(i64, String),
281 |    #[error("unsigned integers cannot be negative")]
282 |    UintCannotBeNegative,
283 |    #[error("integer suffixes cannot be used on float literals")]
284 |    InvalidFloatSuffix,
285 | 
286 |    // SemTypes
287 |    #[error("'{0}' is not declared in this scope")]
288 |    UndeclaredSymbol(String),
289 |    #[error("invalid unary operator for {0}")]
290 |    InvalidUnaryOperator(String),
291 |    #[error("type mismatch: expected {0}, but got {1}")]
292 |    TypeMismatch(String, String),
293 |    #[error("{0} arguments expected, but got {1}")]
294 |    NArgumentsExpected(usize, usize),
295 |    #[error("missing result value in expression")]
296 |    MissingResult,
297 |    #[error("result value of expression is unused; use `val _ = x` to discard it")]
298 |    UnusedValue,
299 |    #[error("invalid location (left hand side of assignment)")]
300 |    InvalidLocation,
301 |    #[error("'{0}' is not a type")]
302 |    SymbolIsNotAType(String),
303 |    #[error("the target is immutable and cannot be assigned to")]
304 |    CannotAssignImmutableLocation,
305 |    #[error("`if` condition must be a Bool")]
306 |    IfConditionMustBeBool,
307 |    #[error("`while` condition must be a Bool")]
308 |    WhileConditionMustBeBool,
309 |    #[error("expression cannot be called; make sure it is a function")]
310 |    ExpressionCannotBeCalled,
311 |    #[error("`return` cannot be used outside of a function")]
312 |    ReturnOutsideOfFunction,
313 |    #[error("`break` cannot be used outside of a loop")]
314 |    BreakOutsideOfLoop,
315 |    #[error("type alias does not alias any type. try adding `= YourType`")]
316 |    EmptyTypeAlias,
317 |    #[error("unknown pragma '{0}'")]
318 |    UnknownPragma(String),
319 |    #[error("invalid built-in type name")]
320 |    InvalidBuiltinTypeName,
321 | 
322 |    /*
323 |     * Internal errors
324 |     * ---
325 |     * Every internal error must be prefixed by "<component> internal error:" to tell the user that
326 |     * something went terribly wrong in the compiler, and that the error should be reported.
327 |     */
328 |    #[error("SemTypes internal error: invalid AST node passed to annotate_node(): {0:?}")]
329 |    SemTypesInvalidAstNode(NodeKind),
330 | 
331 |    #[error("backend internal error: code generation error: {0}")]
332 |    CodeGen(String),
333 |    #[error("backend internal error: execution error: {0}")]
334 |    ExecutableError(String),
335 | }
336 | 
337 | /// An error that can occur during lexing, parsing, semantic analysis, or code generation.
338 | #[derive(Debug)]
339 | pub struct Error {
340 |    // The filename is owned because errors don't occur very often, so allocations are fine here,
341 |    // and using an owned String here simplifies a bunch of code.
342 |    pub filename: PathBuf,
343 |    pub span: Span,
344 |    pub kind: ErrorKind,
345 | }
346 | 
347 | impl Error {
348 |    /// Constructs an error with an invalid span, such that no span is displayed.
349 |    pub fn spanless(filename: PathBuf, kind: ErrorKind) -> Self {
350 |       Self {
351 |          filename,
352 |          span: Span::INVALID,
353 |          kind,
354 |       }
355 |    }
356 | }
357 | 
358 | impl fmt::Display for Error {
359 |    /// The alternate format syntax `{:#}` can be used to display the full span of where the error
360 |    /// occured, instead of its starting position only.
361 |    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
362 |       let filename = self.filename.to_str().unwrap();
363 |       write!(f, "{}:", filename)?;
364 |       if !self.span.is_invalid() {
365 |          if f.alternate() {
366 |             write!(f, "{:#}:", self.span)?;
367 |          } else {
368 |             write!(f, "{}:", self.span)?;
369 |          }
370 |       }
371 |       write!(f, " {}", self.kind)?;
372 |       Ok(())
373 |    }
374 | }
375 | 
376 | pub type Errors = SmallVec<[Error; 8]>;
377 | 
378 | /// Creates an `Errors` from a single error.
379 | pub fn single_error(error: Error) -> Errors {
380 |    let mut errs = Errors::new();
381 |    errs.push(error);
382 |    errs
383 | }
384 | 


--------------------------------------------------------------------------------
/tsuki-frontend/src/functions.rs:
--------------------------------------------------------------------------------
  1 | //! Function registry.
  2 | 
  3 | use smallvec::SmallVec;
  4 | 
  5 | use crate::ast::{NodeId, NodeKind};
  6 | use crate::scope::{Mutability, ScopeId, Scopes, SymbolId, SymbolKind, Symbols, Variable};
  7 | use crate::types::{BuiltinTypes, TypeId};
  8 | 
  9 | /// The unique ID of a function in the registry.
 10 | #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 11 | pub struct FunctionId(usize);
 12 | 
 13 | /// The kind of a function.
 14 | pub enum FunctionKind {
 15 |    /// This function was declared in the current module.
 16 |    Local,
 17 |    /// This function was declared in a different module or package.
 18 |    External,
 19 |    /// This function is imported from C.
 20 |    ImportC { is_varargs: bool },
 21 |    /// This function is a compiler intrinsic.
 22 |    Intrinsic(Intrinsic),
 23 | }
 24 | 
 25 | impl FunctionKind {
 26 |    /// Returns whether the function kind is for a local function.
 27 |    pub fn is_local(&self) -> bool {
 28 |       matches!(self, Self::Local)
 29 |    }
 30 | 
 31 |    /// Returns whether the function kind is for a C varargs function.
 32 |    pub fn is_varargs(&self) -> bool {
 33 |       matches!(self, Self::ImportC { is_varargs: true })
 34 |    }
 35 | }
 36 | 
 37 | /// Function parameters.
 38 | pub struct Parameters {
 39 |    /// The names and types of formal parameters this function accepts.
 40 |    pub formal: SmallVec<[SymbolId; 8]>,
 41 |    /// The return type of the function.
 42 |    pub return_type: TypeId,
 43 | }
 44 | 
 45 | /// Data-oriented storage for functions.
 46 | pub struct Functions {
 47 |    names: Vec<String>,
 48 |    mangled_names: Vec<String>,
 49 |    parameters: Vec<Parameters>,
 50 |    kinds: Vec<FunctionKind>,
 51 | }
 52 | 
 53 | impl Functions {
 54 |    /// Creates a new function registry.
 55 |    pub fn new() -> Self {
 56 |       Self {
 57 |          names: Vec::new(),
 58 |          mangled_names: Vec::new(),
 59 |          parameters: Vec::new(),
 60 |          kinds: Vec::new(),
 61 |       }
 62 |    }
 63 | 
 64 |    /// Adds a function into the registry.
 65 |    pub fn create(
 66 |       &mut self,
 67 |       name: String,
 68 |       mangled_name: String,
 69 |       parameters: Parameters,
 70 |       kind: FunctionKind,
 71 |    ) -> FunctionId {
 72 |       let id = self.names.len();
 73 |       self.names.push(name);
 74 |       self.mangled_names.push(mangled_name);
 75 |       self.parameters.push(parameters);
 76 |       self.kinds.push(kind);
 77 |       FunctionId(id)
 78 |    }
 79 | 
 80 |    /// Returns the name of a function.
 81 |    pub fn name(&self, function: FunctionId) -> &str {
 82 |       &self.names[function.0]
 83 |    }
 84 | 
 85 |    /// Returns the mangled name of a function.
 86 |    pub fn mangled_name(&self, function: FunctionId) -> &str {
 87 |       &self.mangled_names[function.0]
 88 |    }
 89 | 
 90 |    /// Returns a reference to the function's parameters.
 91 |    pub fn parameters(&self, function: FunctionId) -> &Parameters {
 92 |       &self.parameters[function.0]
 93 |    }
 94 | 
 95 |    /// Returns the kind of the function.
 96 |    pub fn kind(&self, function: FunctionId) -> &FunctionKind {
 97 |       &self.kinds[function.0]
 98 |    }
 99 | 
100 |    /// Returns an iterator over function IDs.
101 |    pub fn iter(&self) -> FunctionsIter {
102 |       FunctionsIter {
103 |          current: 0,
104 |          len: self.names.len(),
105 |       }
106 |    }
107 | }
108 | 
109 | /// An intrinsic function.
110 | ///
111 | /// Each kind of intrinsic has its own node kind;
112 | #[derive(Debug, Clone, Copy, PartialEq, Eq)]
113 | pub enum Intrinsic {
114 |    // TODO: Remove these once `c_import` is implemented.
115 |    /// Prints an `Int32` to stdout.
116 |    PrintInt32,
117 |    /// Prints a `Float32` to stdout.
118 |    PrintFloat32,
119 | }
120 | 
121 | impl From<Intrinsic> for NodeKind {
122 |    /// Converts an intrinsic to its corresponding node kind.
123 |    fn from(intrinsic: Intrinsic) -> Self {
124 |       match intrinsic {
125 |          Intrinsic::PrintInt32 => NodeKind::PrintInt32,
126 |          Intrinsic::PrintFloat32 => NodeKind::PrintFloat32,
127 |       }
128 |    }
129 | }
130 | 
131 | /// Registers intrinsic functions in the given scope, symbol, and function registries.
132 | pub fn register_intrinsics(
133 |    builtin: &BuiltinTypes,
134 |    scopes: &mut Scopes,
135 |    symbols: &mut Symbols,
136 |    scope: ScopeId,
137 |    functions: &mut Functions,
138 | ) {
139 |    // TODO: replace this with stdlib declarations.
140 |    macro_rules! add_intrinsic {
141 |       ($name:tt, $params:tt, $return_type:expr, $intrinsic:expr $(,)?) => {
142 |          let function_id = functions.create(
143 |             $name.into(),
144 |             String::new(),
145 |             Parameters {
146 |                formal: $params
147 |                   .iter()
148 |                   .map(|&(name, type_id)| {
149 |                      symbols.create(
150 |                         name,
151 |                         NodeId::null(),
152 |                         type_id,
153 |                         SymbolKind::Variable(Variable {
154 |                            mutability: Mutability::Val,
155 |                         }),
156 |                      )
157 |                   })
158 |                   .collect(),
159 |                return_type: $return_type,
160 |             },
161 |             FunctionKind::Intrinsic($intrinsic),
162 |          );
163 |          let symbol_id = symbols.create(
164 |             $name,
165 |             NodeId::null(),
166 |             builtin.t_statement,
167 |             SymbolKind::Function(function_id),
168 |          );
169 |          scopes.insert(scope, $name, symbol_id);
170 |       };
171 |    }
172 | 
173 |    add_intrinsic!(
174 |       "__intrin_print_int32",
175 |       [("x", builtin.t_int32)],
176 |       builtin.t_unit,
177 |       Intrinsic::PrintInt32,
178 |    );
179 |    add_intrinsic!(
180 |       "__intrin_print_float32",
181 |       [("x", builtin.t_float32)],
182 |       builtin.t_unit,
183 |       Intrinsic::PrintFloat32,
184 |    );
185 | }
186 | 
187 | pub struct FunctionsIter {
188 |    current: usize,
189 |    len: usize,
190 | }
191 | 
192 | impl Iterator for FunctionsIter {
193 |    type Item = FunctionId;
194 | 
195 |    fn next(&mut self) -> Option<Self::Item> {
196 |       if self.current < self.len {
197 |          let i = self.current;
198 |          self.current += 1;
199 |          Some(FunctionId(i))
200 |       } else {
201 |          None
202 |       }
203 |    }
204 | }
205 | 


--------------------------------------------------------------------------------
/tsuki-frontend/src/lib.rs:
--------------------------------------------------------------------------------
 1 | pub mod ast;
 2 | pub mod astdump;
 3 | pub mod backend;
 4 | pub mod common;
 5 | pub mod functions;
 6 | pub mod lexer;
 7 | pub mod parser;
 8 | pub mod scope;
 9 | pub mod sem;
10 | mod sem_literals;
11 | mod sem_types;
12 | pub mod types;
13 | 
14 | use std::path::PathBuf;
15 | 
16 | use common::{Errors, SourceFile};
17 | use lexer::Lexer;
18 | use sem::{AnalyzeOptions as SemOptions, Ir};
19 | use types::{DefaultTypes, FloatSize, IntegerSize};
20 | 
21 | /// Options for loading source files.
22 | pub struct AnalyzeOptions<'s> {
23 |    pub file: &'s SourceFile,
24 |    pub std_path: PathBuf,
25 | }
26 | 
27 | #[derive(Debug, Clone, Copy)]
28 | pub struct DebugOptions {
29 |    pub dump_source: bool,
30 |    pub dump_ast_pre_sem: bool,
31 |    pub dump_ast_post_sem: bool,
32 | }
33 | 
34 | impl Default for DebugOptions {
35 |    fn default() -> Self {
36 |       Self {
37 |          dump_source: false,
38 |          dump_ast_pre_sem: false,
39 |          dump_ast_post_sem: false,
40 |       }
41 |    }
42 | }
43 | 
44 | /// Parses and analyzes a source file. Returns the fully analyzed, typed IR.
45 | pub fn analyze(options: AnalyzeOptions, debug: &DebugOptions) -> Result<Ir, Errors> {
46 |    let AnalyzeOptions { file, .. } = options;
47 |    let SourceFile { source, .. } = file;
48 |    let lexer = Lexer::new(file);
49 |    let (ast, root_node) = parser::parse(lexer)?;
50 | 
51 |    if debug.dump_source {
52 |       eprintln!("## Source code");
53 |       eprintln!("{}", source);
54 |       eprintln!();
55 |    }
56 | 
57 |    for handle in ast.node_handles() {
58 |       if ast.span(handle).is_invalid() {
59 |          eprintln!("warning: node with invalid span: {:?}\nAST dump:", handle);
60 |          astdump::dump_ast(file, &ast, None, handle);
61 |       }
62 |    }
63 | 
64 |    if debug.dump_ast_pre_sem {
65 |       eprintln!("## AST (pre-sem)");
66 |       astdump::dump_ast(file, &ast, None, root_node);
67 |       eprintln!();
68 |    }
69 | 
70 |    let ir = sem::analyze(SemOptions {
71 |       file,
72 |       ast,
73 |       root_node,
74 |       default_types: DefaultTypes {
75 |          int_width: IntegerSize::S32,
76 |          float_width: FloatSize::S32,
77 |          size_width: IntegerSize::U64,
78 |       },
79 |    })?;
80 | 
81 |    if debug.dump_ast_post_sem {
82 |       eprintln!("## AST (post-sem)");
83 |       astdump::dump_ast(file, &ir.ast, Some(&ir.types), root_node);
84 |       eprintln!();
85 |    }
86 | 
87 |    Ok(ir)
88 | }
89 | 


--------------------------------------------------------------------------------
/tsuki-frontend/src/scope.rs:
--------------------------------------------------------------------------------
  1 | //! Scoping and symbols.
  2 | 
  3 | use std::borrow::Cow;
  4 | use std::collections::{HashMap, HashSet};
  5 | use std::num::NonZeroUsize;
  6 | 
  7 | use crate::ast::NodeId;
  8 | use crate::functions::FunctionId;
  9 | use crate::types::TypeId;
 10 | 
 11 | /// An ID uniquely identifying a symbol.
 12 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 13 | pub struct SymbolId(usize);
 14 | 
 15 | impl SymbolId {
 16 |    /// Returns the internal ID of the symbol.
 17 |    pub fn id(self) -> usize {
 18 |       self.0
 19 |    }
 20 | 
 21 |    /// Creates a symbol from an internal ID.
 22 |    pub(crate) fn new(id: usize) -> Self {
 23 |       Self(id)
 24 |    }
 25 | }
 26 | 
 27 | /// The kind of a symbol, as well as extra metadata attached to it.
 28 | pub enum SymbolKind {
 29 |    /// A symbol that represents a variable.
 30 |    Variable(Variable),
 31 |    /// A symbol that represents a type.
 32 |    Type(TypeId),
 33 |    /// A symbol that represents a function, be it free or associated.
 34 |    Function(FunctionId),
 35 | }
 36 | 
 37 | impl SymbolKind {
 38 |    /// Unwraps a variable symbol.
 39 |    pub fn unwrap_variable(&self) -> &Variable {
 40 |       if let SymbolKind::Variable(ref variable) = self {
 41 |          variable
 42 |       } else {
 43 |          panic!("unwrap_variable called on a non-variable symbol")
 44 |       }
 45 |    }
 46 | 
 47 |    /// Unwraps a function symbol.
 48 |    pub fn unwrap_function(&self) -> FunctionId {
 49 |       if let &SymbolKind::Function(function_id) = self {
 50 |          function_id
 51 |       } else {
 52 |          panic!("unwrap_function called on a non-function symbol")
 53 |       }
 54 |    }
 55 | }
 56 | 
 57 | /// The mutability of a variable.
 58 | #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 59 | pub enum Mutability {
 60 |    /// A `val` (immutable) variable.
 61 |    Val,
 62 |    /// A `var` (mutable) variable.
 63 |    Var,
 64 | }
 65 | 
 66 | /// Symbol data for a variable declaration.
 67 | pub struct Variable {
 68 |    pub mutability: Mutability,
 69 | }
 70 | 
 71 | /// Symbol storage. Symbols are looked up identifiers.
 72 | pub struct Symbols {
 73 |    names: Vec<String>,
 74 |    nodes: Vec<NodeId>,
 75 |    types: Vec<TypeId>,
 76 |    kinds: Vec<SymbolKind>,
 77 | }
 78 | 
 79 | impl Symbols {
 80 |    /// Creates a new symbol storage.
 81 |    pub fn new() -> Symbols {
 82 |       Self {
 83 |          names: Vec::new(),
 84 |          nodes: Vec::new(),
 85 |          types: Vec::new(),
 86 |          kinds: Vec::new(),
 87 |       }
 88 |    }
 89 | 
 90 |    /// Creates a symbol from a name, handle, type, and kind.
 91 |    pub fn create(&mut self, name: &str, node: NodeId, typ: TypeId, kind: SymbolKind) -> SymbolId {
 92 |       let id = self.nodes.len();
 93 |       self.names.push(name.to_owned());
 94 |       self.nodes.push(node);
 95 |       self.types.push(typ);
 96 |       self.kinds.push(kind);
 97 |       SymbolId::new(id)
 98 |    }
 99 | 
100 |    /// Returns the name of the symbol.
101 |    pub fn name(&self, symbol: SymbolId) -> &str {
102 |       &self.names[symbol.0]
103 |    }
104 | 
105 |    /// Returns the symbol's ancestor node.
106 |    pub fn node(&self, symbol: SymbolId) -> NodeId {
107 |       self.nodes[symbol.0]
108 |    }
109 | 
110 |    /// Returns the symbol's type.
111 |    pub fn type_id(&self, symbol: SymbolId) -> TypeId {
112 |       self.types[symbol.0]
113 |    }
114 | 
115 |    /// Returns the symbol's associated data.
116 |    pub fn kind(&self, symbol: SymbolId) -> &SymbolKind {
117 |       &self.kinds[symbol.0]
118 |    }
119 | 
120 |    pub fn kind_mut(&mut self, symbol: SymbolId) -> &mut SymbolKind {
121 |       &mut self.kinds[symbol.0]
122 |    }
123 | }
124 | 
125 | /// Represents a local scope.
126 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
127 | pub struct ScopeId(NonZeroUsize);
128 | 
129 | /// Struct for module and local scope management.
130 | pub struct Scopes {
131 |    symbols: HashMap<(ScopeId, Cow<'static, str>), SymbolId>,
132 |    breakable_scopes: HashSet<ScopeId>,
133 |    public_symbols: HashSet<SymbolId>,
134 |    scope_count: usize,
135 | }
136 | 
137 | impl Scopes {
138 |    /// Creates a new scope manager.
139 |    pub fn new() -> Self {
140 |       Self {
141 |          symbols: HashMap::new(),
142 |          breakable_scopes: HashSet::new(),
143 |          public_symbols: HashSet::new(),
144 |          scope_count: 1,
145 |       }
146 |    }
147 | 
148 |    /// Creates a new scope and returns its ID.
149 |    pub fn create_scope(&mut self) -> ScopeId {
150 |       let id = self.scope_count;
151 |       self.scope_count += 1;
152 |       ScopeId(NonZeroUsize::new(id).unwrap())
153 |    }
154 | 
155 |    /// Inserts a symbol to the scope, under the given identifier. If there already is a symbol with
156 |    /// the given name, it's lost.
157 |    pub fn insert(&mut self, scope: ScopeId, identifier: &str, symbol: SymbolId) {
158 |       let _ = self.symbols.insert((scope, Cow::Owned(identifier.to_owned())), symbol);
159 |    }
160 | 
161 |    /// Retrieves a reference to the provided identifier in the given scope, or `None` if the
162 |    /// identifier is not in the given scope.
163 |    pub fn get(&self, scope: ScopeId, identifier: &str) -> Option<SymbolId> {
164 |       self.symbols.get(&(scope, Cow::Borrowed(identifier))).map(|id| *id)
165 |    }
166 | 
167 |    /// Returns whether a scope is breakable.
168 |    pub fn is_breakable(&self, scope: ScopeId) -> bool {
169 |       self.breakable_scopes.contains(&scope)
170 |    }
171 | 
172 |    /// Marks a scope as breakable.
173 |    pub fn set_breakable(&mut self, scope: ScopeId) {
174 |       self.breakable_scopes.insert(scope);
175 |    }
176 | 
177 |    /// Marks the given symbol as public.
178 |    pub fn set_public(&mut self, symbol: SymbolId) {
179 |       self.public_symbols.insert(symbol);
180 |    }
181 | }
182 | 
183 | /// A stack for tracking which local scopes are currently visible.
184 | /// This can also be used for identifier lookups.
185 | #[derive(Debug, Clone)]
186 | pub struct ScopeStack {
187 |    scopes: Vec<ScopeId>,
188 | }
189 | 
190 | impl ScopeStack {
191 |    /// Creates a new scope stack.
192 |    pub fn new() -> Self {
193 |       Self { scopes: Vec::new() }
194 |    }
195 | 
196 |    /// Pushes a scope onto the top of the stack. Returns the scope ID that was pushed.
197 |    pub fn push(&mut self, scope: ScopeId) -> ScopeId {
198 |       self.scopes.push(scope);
199 |       scope
200 |    }
201 | 
202 |    /// Returns the scope at the top of the stack.
203 |    pub fn top(&self) -> ScopeId {
204 |       *self.scopes.last().expect("the scope stack must not be empty")
205 |    }
206 | 
207 |    /// Pops the topmost scope off the stack.
208 |    pub fn pop(&mut self) {
209 |       let _ = self.scopes.pop();
210 |    }
211 | 
212 |    /// Looks for symbols with the given name in scopes on the stack, and returns the innermost one.
213 |    pub fn lookup(&self, scopes: &Scopes, name: &str) -> Option<SymbolId> {
214 |       for &scope in self.scopes.iter().rev() {
215 |          if let Some(symbol) = scopes.get(scope, name) {
216 |             return Some(symbol);
217 |          }
218 |       }
219 |       None
220 |    }
221 | 
222 |    /// Returns an iterator over all scopes on the stack, from bottom to top.
223 |    pub fn iter(&self) -> impl DoubleEndedIterator<Item = ScopeId> + '_ {
224 |       self.scopes.iter().copied()
225 |    }
226 | }
227 | 


--------------------------------------------------------------------------------
/tsuki-frontend/src/sem.rs:
--------------------------------------------------------------------------------
  1 | //! The root of semantic analysis.
  2 | 
  3 | use std::path::Path;
  4 | 
  5 | use crate::ast::{Ast, NodeId};
  6 | use crate::common::{Error, ErrorKind, Errors, SourceFile, Span};
  7 | use crate::functions::Functions;
  8 | use crate::scope::{Scopes, Symbols};
  9 | pub use crate::types::DefaultTypes;
 10 | use crate::types::{BuiltinTypes, TypeLog, Types};
 11 | 
 12 | use crate::sem_literals::SemLiterals;
 13 | use crate::sem_types::{SemTypes, SemTypesBorrows};
 14 | 
 15 | /// A semantic analyzer.
 16 | pub(crate) trait SemPass {
 17 |    type Result;
 18 | 
 19 |    /// Analyzes the AST from the given root node, and returns the modified version of the AST.
 20 |    fn analyze(&mut self, ast: Ast, root_node: NodeId) -> Ast;
 21 | 
 22 |    /// Returns the filename string.
 23 |    fn filename(&self) -> &Path;
 24 |    /// Returns a reference to the list of errors.
 25 |    fn errors(&self) -> &Errors;
 26 |    /// Returns a mutable reference to the list of errors.
 27 |    fn errors_mut(&mut self) -> &mut Errors;
 28 |    /// Consumes `self` to return the list of errors.
 29 |    fn into_errors(self) -> Errors;
 30 | 
 31 |    /// Emits an error of the given kind, at the given span.
 32 |    fn emit_error(&mut self, kind: ErrorKind, span: Span) {
 33 |       let filename = self.filename().to_owned();
 34 |       self.errors_mut().push(Error {
 35 |          filename,
 36 |          kind,
 37 |          span,
 38 |       });
 39 |    }
 40 | }
 41 | 
 42 | /// Semantic analysis state.
 43 | struct Analyzer {
 44 |    ast: Ast,
 45 |    root_node: NodeId,
 46 | }
 47 | 
 48 | impl Analyzer {
 49 |    /// Performs the given semantic pass, and returns the errors, if any.
 50 |    fn perform(mut self, mut sem: impl SemPass) -> Result<Self, Errors> {
 51 |       self.ast = sem.analyze(self.ast, self.root_node);
 52 | 
 53 |       if sem.errors().len() > 0 {
 54 |          Err(sem.into_errors())
 55 |       } else {
 56 |          Ok(self)
 57 |       }
 58 |    }
 59 | }
 60 | 
 61 | /// Common information shared by the semantic pass.
 62 | pub(crate) struct SemCommon<'s> {
 63 |    pub file: &'s SourceFile,
 64 |    pub default_types: DefaultTypes,
 65 | }
 66 | 
 67 | impl<'s> SemCommon<'s> {
 68 |    /// Returns the source code substring pointed to by the node's `first..second`.
 69 |    pub fn get_source_range_from_node(&self, ast: &Ast, node: NodeId) -> &str {
 70 |       let source_range = ast.first(node)..ast.second(node);
 71 |       &self.file.source[source_range]
 72 |    }
 73 | }
 74 | 
 75 | /// The intermediate representation output by the analyzer. This contains the AST and type
 76 | /// information.
 77 | pub struct Ir {
 78 |    pub ast: Ast,
 79 |    pub root_node: NodeId,
 80 |    pub types: Types,
 81 |    pub symbols: Symbols,
 82 |    pub functions: Functions,
 83 | }
 84 | 
 85 | /// The options passed to `analyze`.
 86 | pub struct AnalyzeOptions<'s> {
 87 |    pub file: &'s SourceFile,
 88 |    pub ast: Ast,
 89 |    pub root_node: NodeId,
 90 |    pub default_types: DefaultTypes,
 91 | }
 92 | 
 93 | /// Analyzes and lowers the AST to a representation ready to be used by the backend.
 94 | pub fn analyze(options: AnalyzeOptions) -> Result<Ir, Errors> {
 95 |    let AnalyzeOptions {
 96 |       file,
 97 |       ast,
 98 |       root_node,
 99 |       default_types,
100 |    } = options;
101 |    let mut state = Analyzer { ast, root_node };
102 | 
103 |    let common = SemCommon {
104 |       file,
105 |       default_types,
106 |    };
107 |    let mut types = Types::new();
108 |    let mut type_log = TypeLog::new();
109 |    let builtin_types = BuiltinTypes::add_to(&mut types, &common.default_types);
110 |    let mut scopes = Scopes::new();
111 |    let mut symbols = Symbols::new();
112 |    let mut functions = Functions::new();
113 | 
114 |    // NOTE: Maybe split errors into normal and fatal?
115 |    // Normal errors would be accumulated into the existing error list, but would not halt the
116 |    // analysis completely. Fatal errors would halt the analysis, and would occur if something really
117 |    // goes wrong inside of a phase, yielding AST that might break the phase after it.
118 |    // Also, warnings anyone?
119 |    state = state.perform(SemLiterals::new(&common))?;
120 |    state = state.perform(SemTypes::new(SemTypesBorrows {
121 |       common: &common,
122 |       types: &mut types,
123 |       log: &mut type_log,
124 |       builtin: &builtin_types,
125 |       scopes: &mut scopes,
126 |       symbols: &mut symbols,
127 |       functions: &mut functions,
128 |    }))?;
129 | 
130 |    Ok(Ir {
131 |       ast: state.ast,
132 |       root_node,
133 |       types,
134 |       symbols,
135 |       functions,
136 |    })
137 | }
138 | 


--------------------------------------------------------------------------------
/tsuki-frontend/src/sem_literals.rs:
--------------------------------------------------------------------------------
  1 | //! Semantic analyzer for literal kinds.
  2 | //!
  3 | //! This performs some basic initial analysis to convert literal kinds from generic `Integer` and
  4 | //! `Float` to concrete types `Int8`, `Int16`, etc., including negation.
  5 | //! Note that `SemTypes` may perform additional conversions later down the line.
  6 | 
  7 | use std::convert::{TryFrom, TryInto};
  8 | use std::path::Path;
  9 | 
 10 | use smallvec::SmallVec;
 11 | 
 12 | use crate::ast::{Ast, NodeData, NodeId, NodeKind};
 13 | use crate::common::{ErrorKind, Errors, Span};
 14 | use crate::sem::{SemCommon, SemPass};
 15 | use crate::types::{FloatSize, IntegerSize};
 16 | 
 17 | /// State for the `SemLiterals` analysis phase.
 18 | pub(crate) struct SemLiterals<'c> {
 19 |    common: &'c SemCommon<'c>,
 20 |    errors: Errors,
 21 | }
 22 | 
 23 | /// Available suffixes for literals.
 24 | #[derive(Clone, Copy, Eq, PartialEq, PartialOrd, Ord)]
 25 | enum LiteralSuffix {
 26 |    None,
 27 |    I,
 28 |    I8,
 29 |    I16,
 30 |    I32,
 31 |    I64,
 32 |    U,
 33 |    U8,
 34 |    U16,
 35 |    U32,
 36 |    U64,
 37 |    F,
 38 |    F32,
 39 |    F64,
 40 | }
 41 | 
 42 | impl From<IntegerSize> for LiteralSuffix {
 43 |    fn from(size: IntegerSize) -> Self {
 44 |       match size {
 45 |          IntegerSize::S8 => LiteralSuffix::I8,
 46 |          IntegerSize::S16 => LiteralSuffix::I16,
 47 |          IntegerSize::S32 => LiteralSuffix::I32,
 48 |          IntegerSize::S64 => LiteralSuffix::I64,
 49 |          IntegerSize::U8 => LiteralSuffix::U8,
 50 |          IntegerSize::U16 => LiteralSuffix::U16,
 51 |          IntegerSize::U32 => LiteralSuffix::U32,
 52 |          IntegerSize::U64 => LiteralSuffix::U64,
 53 |       }
 54 |    }
 55 | }
 56 | 
 57 | impl From<FloatSize> for LiteralSuffix {
 58 |    fn from(size: FloatSize) -> Self {
 59 |       match size {
 60 |          FloatSize::S32 => LiteralSuffix::F32,
 61 |          FloatSize::S64 => LiteralSuffix::F64,
 62 |       }
 63 |    }
 64 | }
 65 | 
 66 | static SUFFIXES: phf::Map<&'static str, LiteralSuffix> = phf::phf_map! {
 67 |    "i" => LiteralSuffix::I,
 68 |    "i8" => LiteralSuffix::I8,
 69 |    "i16" => LiteralSuffix::I16,
 70 |    "i32" => LiteralSuffix::I32,
 71 |    "i64" => LiteralSuffix::I64,
 72 |    "u" => LiteralSuffix::U,
 73 |    "u8" => LiteralSuffix::U8,
 74 |    "u16" => LiteralSuffix::U16,
 75 |    "u32" => LiteralSuffix::U32,
 76 |    "u64" => LiteralSuffix::U64,
 77 |    "f" => LiteralSuffix::F,
 78 |    "f32" => LiteralSuffix::F32,
 79 |    "f64" => LiteralSuffix::F64,
 80 | };
 81 | 
 82 | impl<'c> SemLiterals<'c> {
 83 |    /// Creates a new instance of the `SemTypes` analysis phase.
 84 |    pub fn new(common: &'c SemCommon<'c>) -> Self {
 85 |       SemLiterals {
 86 |          common,
 87 |          errors: Errors::new(),
 88 |       }
 89 |    }
 90 | 
 91 |    fn split_number<'n>(&mut self, source: &'n str, span: &Span) -> (&'n str, LiteralSuffix) {
 92 |       if let Some(underscore) = source.rfind('_') {
 93 |          // Check if the index is at least the character before the last character,
 94 |          // and the character after it is an identifier character.
 95 |          let bytes = source.as_bytes();
 96 |          if underscore <= source.len() - 2
 97 |             && matches!(bytes[underscore + 1], b'a'..=b'z' | b'A'..=b'Z')
 98 |          {
 99 |             let suffix_string = &source[underscore + 1..];
100 |             if let Some(&suffix) = SUFFIXES.get(suffix_string) {
101 |                return (&source[..underscore], suffix);
102 |             } else {
103 |                self.emit_error(
104 |                   ErrorKind::InvalidNumberLiteralSuffix(source.into()),
105 |                   span.clone(),
106 |                );
107 |             }
108 |          }
109 |       }
110 |       (source, LiteralSuffix::None)
111 |    }
112 | 
113 |    /// Converts an ASCII digit to a u64.
114 |    fn digit_to_u64(digit: u8) -> u64 {
115 |       assert!(matches!(digit, b'0'..=b'9'));
116 |       (digit - b'0') as u64
117 |    }
118 | 
119 |    fn overflow_error(&mut self, string: &str, span: Span) {
120 |       self.emit_error(ErrorKind::IntegerTooBig(string.into()), span)
121 |    }
122 | 
123 |    /// Parses the given string into a `u64`. The string must not be empty, otherwise an assertion is
124 |    /// triggered.
125 |    /// If an error is occured while parsing, `Err(())` is returned, and the error is added to the
126 |    /// phase's error list.
127 |    fn parse_integer(&mut self, string: &str, span: &Span) -> Result<u64, ()> {
128 |       assert!(!string.is_empty());
129 | 
130 |       let bytes = string.as_bytes();
131 |       let mut result: u64 = Self::digit_to_u64(bytes[0]);
132 |       for &c in &bytes[1..] {
133 |          if matches!(c, b'0'..=b'9') {
134 |             // We don't want to panic on overflow here, instead report a nice error to the user.
135 |             // Hence the usage of `checked_mul` and `checked_add`.
136 |             let digit = Self::digit_to_u64(c);
137 |             result =
138 |                result.checked_mul(10).ok_or_else(|| self.overflow_error(string, span.clone()))?;
139 |             result = result
140 |                .checked_add(digit)
141 |                .ok_or_else(|| self.overflow_error(string, span.clone()))?;
142 |          } else if c == b'_' {
143 |             // Continue, because _ is a valid separating character.
144 |          } else {
145 |             // If a different character is found, panic! because the lexer should've already sorted
146 |             // the appropriate characters out for us.
147 |             panic!("unexpected character in integer literal: {}", c);
148 |          }
149 |       }
150 | 
151 |       Ok(result)
152 |    }
153 | 
154 |    /// Converts a `u64` to a smaller unsigned integer. `type_name` and `span` are used for
155 |    /// emitting errors in case of overflow.
156 |    fn convert_unsigned<R>(&mut self, x: u64, type_name: &str, span: &Span) -> R
157 |    where
158 |       R: Default + TryFrom<u64>,
159 |    {
160 |       match x.try_into() {
161 |          Ok(ok) => ok,
162 |          Err(..) => {
163 |             let kind = ErrorKind::UnsignedIntegerOverflowForType(x.into(), type_name.into());
164 |             self.emit_error(kind, span.clone());
165 |             R::default()
166 |          }
167 |       }
168 |    }
169 | 
170 |    /// Converts a `u64` to a signed integer. Emits an error using the given `type_name` and `span`,
171 |    /// and returns `R::default()` in case of an overflow error.
172 |    fn convert_signed<R>(&mut self, negative: bool, x: u64, type_name: &str, span: &Span) -> R
173 |    where
174 |       R: Default + TryFrom<i64>,
175 |    {
176 |       // i64 is the largest possible signed integer in tsuki, so we use that as the source for our
177 |       // conversion. Note that if we converted straight from u64 to R, the minimum negative number
178 |       // edge case -128_i8 would not work. The 128 would get converted into an i8, causing an
179 |       // overflow, so instead we first need to convert to an i64, then apply the sign, and
180 |       // afterwards convert the i64 to R.
181 |       let mut signed: i64 = match x.try_into() {
182 |          Ok(ok) => ok,
183 |          Err(..) => {
184 |             let kind = ErrorKind::UnsignedIntegerOverflowForType(x.into(), type_name.into());
185 |             self.emit_error(kind, span.clone());
186 |             return R::default();
187 |          }
188 |       };
189 |       if negative {
190 |          signed *= -1;
191 |       }
192 |       match signed.try_into() {
193 |          Ok(ok) => ok,
194 |          Err(..) => {
195 |             let kind = ErrorKind::SignedIntegerOverflowForType(signed.into(), type_name.into());
196 |             self.emit_error(kind, span.clone());
197 |             R::default()
198 |          }
199 |       }
200 |    }
201 | 
202 |    /// Converts a `u64` to an `f64`, optionally flipping its sign around.
203 |    fn convert_to_float(&self, negative: bool, x: u64) -> f64 {
204 |       if negative {
205 |          -(x as f64)
206 |       } else {
207 |          x as f64
208 |       }
209 |    }
210 | 
211 |    /// Converts the abstract Integer `node` to a concretely typed node.
212 |    fn convert_integer_node(
213 |       &mut self,
214 |       ast: &mut Ast,
215 |       node: NodeId,
216 |       negative: bool,
217 |       number: u64,
218 |       mut suffix: LiteralSuffix,
219 |    ) {
220 |       match suffix {
221 |          LiteralSuffix::None | LiteralSuffix::I => {
222 |             suffix = LiteralSuffix::from(self.common.default_types.int_width);
223 |          }
224 |          LiteralSuffix::U => {
225 |             suffix = LiteralSuffix::from(self.common.default_types.size_width);
226 |          }
227 |          LiteralSuffix::F => {
228 |             suffix = LiteralSuffix::from(self.common.default_types.float_width);
229 |          }
230 |          _ => (),
231 |       }
232 |       if matches!(
233 |          suffix,
234 |          LiteralSuffix::U8 | LiteralSuffix::U16 | LiteralSuffix::U32 | LiteralSuffix::U64
235 |       ) && negative
236 |       {
237 |          self.emit_error(ErrorKind::UintCannotBeNegative, ast.span(node).clone());
238 |          return;
239 |       }
240 |       let span = ast.span(node);
241 |       let (kind, extra) = match suffix {
242 |          LiteralSuffix::None | LiteralSuffix::I | LiteralSuffix::U | LiteralSuffix::F => {
243 |             // These cases are canonicalized to types configured in `common.default_types`.
244 |             unreachable!()
245 |          }
246 |          LiteralSuffix::I8 => (
247 |             NodeKind::Int8,
248 |             NodeData::Int8(self.convert_signed(negative, number, "Int8", span)),
249 |          ),
250 |          LiteralSuffix::I16 => (
251 |             NodeKind::Int16,
252 |             NodeData::Int16(self.convert_signed(negative, number, "Int16", span)),
253 |          ),
254 |          LiteralSuffix::I32 => (
255 |             NodeKind::Int32,
256 |             NodeData::Int32(self.convert_signed(negative, number, "Int32", span)),
257 |          ),
258 |          LiteralSuffix::I64 => (
259 |             NodeKind::Int64,
260 |             NodeData::Int64(self.convert_signed(negative, number, "Int64", span)),
261 |          ),
262 |          LiteralSuffix::U8 => (
263 |             NodeKind::Uint8,
264 |             NodeData::Uint8(self.convert_unsigned(number, "Uint8", span)),
265 |          ),
266 |          LiteralSuffix::U16 => (
267 |             NodeKind::Uint16,
268 |             NodeData::Uint16(self.convert_unsigned(number, "Uint16", span)),
269 |          ),
270 |          LiteralSuffix::U32 => (
271 |             NodeKind::Uint32,
272 |             NodeData::Uint32(self.convert_unsigned(number, "Uint32", span)),
273 |          ),
274 |          LiteralSuffix::U64 => (NodeKind::Uint64, NodeData::Uint64(number)),
275 |          LiteralSuffix::F32 => (
276 |             NodeKind::Float32,
277 |             NodeData::Float32(self.convert_to_float(negative, number) as f32),
278 |          ),
279 |          LiteralSuffix::F64 => (
280 |             NodeKind::Float64,
281 |             NodeData::Float64(self.convert_to_float(negative, number)),
282 |          ),
283 |       };
284 |       ast.convert(node, kind);
285 |       ast.set_extra(node, extra);
286 |    }
287 | 
288 |    /// Extracts the sign and number node from a potentially `Neg` node. The first value returned
289 |    /// specifies whether the number is negative, and the second value is the actual number.
290 |    fn extract_neg_node(ast: &Ast, node: NodeId) -> (bool, NodeId) {
291 |       let negative = ast.kind(node) == NodeKind::Neg;
292 |       let number_node = if negative {
293 |          ast.first_handle(node)
294 |       } else {
295 |          node
296 |       };
297 |       (negative, number_node)
298 |    }
299 | 
300 |    /// Parses an integer literal to one of the type-strict kinds `Int8`, `Int16`, etc.
301 |    fn analyze_integer(&mut self, ast: &mut Ast, node: NodeId) {
302 |       let (negative, number_node) = Self::extract_neg_node(ast, node);
303 |       let source = self.common.get_source_range_from_node(ast, number_node);
304 |       assert!(!source.is_empty());
305 |       let (digits, suffix) = self.split_number(source, ast.span(node));
306 |       match self.parse_integer(digits, ast.span(node)) {
307 |          Ok(number) => self.convert_integer_node(ast, node, negative, number, suffix),
308 |          Err(..) => ast.convert(node, NodeKind::Error),
309 |       }
310 |    }
311 | 
312 |    /// Parses a floating point literal to an `f64`. If an error occurs, the function panics, as
313 |    /// floats are not susceptible to overflow; only precision loss on large scales.
314 |    fn parse_float(string: &str) -> f64 {
315 |       // Parsing floats is hard. That's why we're using the Rust standard library for this purpose.
316 |       // However, the standard library expects floats without underscores `_`, which tsuki
317 |       // allows for. Thus, all the digits have to be first accumulated into a separate string
318 |       // without these underscores.
319 |       // We use a SmallVec for this purpose, so as to allocate memory on the stack for relatively
320 |       // small literals. I don't think there are many cases where people use more than 32 characters
321 |       // in a literal, but in these cases the SmallVec is simply going to move over to the heap.
322 |       let mut digits = SmallVec::<[u8; 32]>::new();
323 |       for b in string.bytes() {
324 |          if b != b'_' {
325 |             digits.push(b);
326 |          }
327 |       }
328 |       // Safety: Using `from_utf8_unchecked` is safe, as floating point literals cannot have any
329 |       // UTF-8 characters in them.
330 |       let filtered = unsafe { std::str::from_utf8_unchecked(&digits) };
331 |       filtered.parse::<f64>().expect("the lexer must provide only valid digits")
332 |       // Idea: emit a warning when the literal suffers significant precision loss.
333 |    }
334 | 
335 |    /// Converts the abstract `Float` node to a concrete node of kind `Float32` or `Float64`.
336 |    fn convert_float_node(
337 |       &mut self,
338 |       ast: &mut Ast,
339 |       node: NodeId,
340 |       negative: bool,
341 |       mut number: f64,
342 |       mut suffix: LiteralSuffix,
343 |    ) {
344 |       match suffix {
345 |          LiteralSuffix::None | LiteralSuffix::F => {
346 |             suffix = LiteralSuffix::from(self.common.default_types.float_width);
347 |          }
348 |          _ => (),
349 |       }
350 |       if negative {
351 |          number *= -1.0;
352 |       }
353 |       let (kind, extra) = match suffix {
354 |          LiteralSuffix::None | LiteralSuffix::F => unreachable!(),
355 |          LiteralSuffix::F32 => (NodeKind::Float32, NodeData::Float32(number as f32)),
356 |          LiteralSuffix::F64 => (NodeKind::Float64, NodeData::Float64(number)),
357 |          _ => {
358 |             self.emit_error(ErrorKind::InvalidFloatSuffix, ast.span(node).clone());
359 |             ast.convert(node, NodeKind::Error);
360 |             return;
361 |          }
362 |       };
363 |       ast.convert(node, kind);
364 |       ast.set_extra(node, extra);
365 |    }
366 | 
367 |    /// Parses a float to a `Float32` or a `Float64`.
368 |    fn analyze_float(&mut self, ast: &mut Ast, node: NodeId) {
369 |       let (negative, number_node) = Self::extract_neg_node(ast, node);
370 |       let source = self.common.get_source_range_from_node(ast, number_node);
371 |       assert!(!source.is_empty());
372 |       let (digits, suffix) = self.split_number(source, ast.span(node));
373 |       let number = Self::parse_float(digits);
374 |       self.convert_float_node(ast, node, negative, number, suffix);
375 |    }
376 | 
377 |    /// Walks through the sub-nodes of a branch node.
378 |    fn walk_branch(&mut self, ast: &mut Ast, node: NodeId) {
379 |       let left = ast.first_handle(node);
380 |       match ast.kind(node) {
381 |          // The negation sign `-` is not included in the literal, so these extra cases ensure that
382 |          // edge cases such as -128_u8 are handled correctly without causing an overflow error.
383 |          NodeKind::Neg if ast.kind(left) == NodeKind::Integer => {
384 |             self.analyze_integer(ast, node);
385 |          }
386 |          NodeKind::Neg if ast.kind(left) == NodeKind::Float => {
387 |             self.analyze_float(ast, node);
388 |          }
389 |          _ => {
390 |             ast.walk_mut(node, |ast, child| {
391 |                self.analyze_node(ast, child);
392 |             });
393 |          }
394 |       }
395 |    }
396 | 
397 |    /// Analyzes the given syntax tree node.
398 |    fn analyze_node(&mut self, ast: &mut Ast, node: NodeId) {
399 |       match ast.kind(node) {
400 |          NodeKind::Integer => self.analyze_integer(ast, node),
401 |          NodeKind::Float => self.analyze_float(ast, node),
402 |          kind if kind.is_branch() => self.walk_branch(ast, node),
403 |          _ => (),
404 |       }
405 |    }
406 | }
407 | 
408 | impl SemPass for SemLiterals<'_> {
409 |    type Result = ();
410 | 
411 |    /// Performs literal resolution for the syntax tree.
412 |    fn analyze(&mut self, mut ast: Ast, root_node: NodeId) -> Ast {
413 |       self.analyze_node(&mut ast, root_node);
414 |       ast
415 |    }
416 | 
417 |    fn filename(&self) -> &Path {
418 |       &self.common.file.path
419 |    }
420 | 
421 |    fn errors(&self) -> &Errors {
422 |       &self.errors
423 |    }
424 | 
425 |    fn errors_mut(&mut self) -> &mut Errors {
426 |       &mut self.errors
427 |    }
428 | 
429 |    fn into_errors(self) -> Errors {
430 |       self.errors
431 |    }
432 | }
433 | 


--------------------------------------------------------------------------------
/tsuki-frontend/src/sem_types/control_flow.rs:
--------------------------------------------------------------------------------
  1 | //! Type analysis for control flow constructs.
  2 | 
  3 | use crate::ast::{Ast, NodeId, NodeKind};
  4 | use crate::common::ErrorKind;
  5 | use crate::sem::SemPass;
  6 | use crate::types::TypeLogEntry;
  7 | 
  8 | use super::{NodeContext, SemTypes};
  9 | 
 10 | impl<'s> SemTypes<'s> {
 11 |    /// Annotates a "pass" (`_`) statement.
 12 |    pub(super) fn annotate_pass(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogEntry {
 13 |       self.annotate(ast, node, self.builtin.t_statement)
 14 |    }
 15 | 
 16 |    /// Annotates a prefix `do` block.
 17 |    pub(super) fn annotate_do(
 18 |       &mut self,
 19 |       ast: &mut Ast,
 20 |       node: NodeId,
 21 |       context: NodeContext,
 22 |    ) -> TypeLogEntry {
 23 |       let scope = self.scope_stack.push(self.scopes.create_scope());
 24 |       ast.set_scope(node, Some(scope));
 25 |       let log_entry = self.annotate_statement_list(ast, node, context);
 26 |       self.scope_stack.pop();
 27 |       ast.convert_preserve(
 28 |          node,
 29 |          match context {
 30 |             NodeContext::Expression(_) => NodeKind::DoExpression,
 31 |             NodeContext::Statement => NodeKind::DoStatement,
 32 |          },
 33 |       );
 34 |       log_entry
 35 |    }
 36 | 
 37 |    /// Annotates an `if` expression or `if` statement.
 38 |    pub(super) fn annotate_if(
 39 |       &mut self,
 40 |       ast: &mut Ast,
 41 |       node: NodeId,
 42 |       context: NodeContext,
 43 |    ) -> TypeLogEntry {
 44 |       let mut typ = None;
 45 |       ast.walk_node_list_mut(node, |ast, _index, branch| {
 46 |          // The scope is introduced before the condition is analyzed to have proper scoping behavior
 47 |          // in `if val`.
 48 |          let scope = self.scope_stack.push(self.scopes.create_scope());
 49 |          ast.set_scope(branch, Some(scope));
 50 |          // Only check the condition if it's an `if` branch. `else` branches do not have
 51 |          // the condition.
 52 |          if ast.kind(branch) == NodeKind::IfBranch {
 53 |             let condition = ast.first_handle(branch);
 54 |             let condition_entry = self.annotate_node(ast, condition, context);
 55 |             let condition_type = self.log.type_id(condition_entry);
 56 |             if !self.types.kind(condition_type).is_bool() {
 57 |                self.emit_error(
 58 |                   ErrorKind::IfConditionMustBeBool,
 59 |                   ast.span(condition).clone(),
 60 |                );
 61 |             }
 62 |          }
 63 |          let body_entry = self.annotate_statement_list(ast, branch, context);
 64 |          let body_type = self.log.type_id(body_entry);
 65 |          if let NodeContext::Expression(_) = context {
 66 |             match typ {
 67 |                None => typ = Some(body_type),
 68 |                Some(typ) if body_type != typ => {
 69 |                   // The type log entry is discarded here, because more mismatch errors may
 70 |                   // arise later in the `if` statement.
 71 |                   let _ = self.type_mismatch(ast, node, typ, body_type);
 72 |                }
 73 |                _ => (),
 74 |             }
 75 |          }
 76 |          self.scope_stack.pop();
 77 |       });
 78 |       ast.convert_preserve(
 79 |          node,
 80 |          match context {
 81 |             NodeContext::Expression(_) => NodeKind::IfExpression,
 82 |             NodeContext::Statement => NodeKind::IfStatement,
 83 |          },
 84 |       );
 85 |       self.annotate(ast, node, typ.unwrap_or(self.builtin.t_statement))
 86 |    }
 87 | 
 88 |    /// Annotates a `while` loop.
 89 |    pub(super) fn annotate_while(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogEntry {
 90 |       let condition_node = ast.first_handle(node);
 91 |       let condition_entry = self.annotate_node(
 92 |          ast,
 93 |          condition_node,
 94 |          NodeContext::expression_of_type(self.builtin.t_bool),
 95 |       );
 96 |       let condition_type = self.log.type_id(condition_entry);
 97 |       if !self.types.kind(condition_type).is_bool() {
 98 |          return self.error(ast, condition_node, ErrorKind::WhileConditionMustBeBool);
 99 |       }
100 | 
101 |       let scope = self.scope_stack.push(self.scopes.create_scope());
102 |       ast.set_scope(node, Some(scope));
103 |       self.scopes.set_breakable(scope);
104 |       let _ = self.annotate_statement_list(ast, node, NodeContext::Statement);
105 |       self.scope_stack.pop();
106 | 
107 |       self.annotate(ast, node, self.builtin.t_statement)
108 |    }
109 | 
110 |    /// Annotates a `break` statement.
111 |    pub(super) fn annotate_break(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogEntry {
112 |       // Find out which scope the `break` is breaking.
113 |       // This is later stored in the `break` node's second child.
114 |       let mut break_scope = None;
115 |       for scope in self.scope_stack.iter().rev() {
116 |          if self.scopes.is_breakable(scope) {
117 |             break_scope = Some(scope);
118 |          }
119 |       }
120 |       if break_scope.is_none() {
121 |          return self.error(ast, node, ErrorKind::BreakOutsideOfLoop);
122 |       }
123 | 
124 |       let break_scope = break_scope.unwrap();
125 |       ast.set_scope(node, Some(break_scope));
126 | 
127 |       self.annotate(ast, node, self.builtin.t_noreturn)
128 |    }
129 | 
130 |    /// Annotates a `return` statement.
131 |    pub(super) fn annotate_return(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogEntry {
132 |       // `return` can only be used in a function.
133 |       if let Some(function_id) = self.current_function {
134 |          let expected_return_type = self.functions.parameters(function_id).return_type;
135 |          let value_node = ast.first_handle(node);
136 |          let return_log = if value_node != NodeId::null() {
137 |             // For `return`s that do actually return something, the path is straightforward.
138 |             self.annotate_node(
139 |                ast,
140 |                value_node,
141 |                NodeContext::expression_of_type(expected_return_type),
142 |             )
143 |          } else {
144 |             // For `return`s that _don't_ return a value, we need to duplicate the empty node such
145 |             // that it gets a unique ID that we can attach the unit type to.
146 |             let value_node = ast.duplicate(value_node);
147 |             ast.set_first_handle(node, value_node);
148 |             self.annotate(ast, value_node, self.builtin.t_unit)
149 |          };
150 |          let provided_type = self.log.type_id(return_log);
151 |          if provided_type != expected_return_type {
152 |             return self.type_mismatch(ast, node, expected_return_type, provided_type);
153 |          }
154 |          self.annotate(ast, node, self.builtin.t_noreturn)
155 |       } else {
156 |          self.error(ast, node, ErrorKind::ReturnOutsideOfFunction)
157 |       }
158 |    }
159 | }
160 | 


--------------------------------------------------------------------------------
/tsuki-frontend/src/sem_types/conversions.rs:
--------------------------------------------------------------------------------
  1 | //! Implicit conversions between types.
  2 | 
  3 | use crate::ast::{Ast, NodeData, NodeId, NodeKind};
  4 | use crate::types::{FloatSize, IntegerSize, TypeId, TypeLogEntry};
  5 | 
  6 | use super::SemTypes;
  7 | 
  8 | impl<'s> SemTypes<'s> {
  9 |    /// Widens the given integer node to the provided size.
 10 |    ///
 11 |    /// For literal nodes, this converts the literal directly. For other nodes, this wraps the node
 12 |    /// in a `WidenUint` or `WidenInt` with the type set to represent the new size.
 13 |    fn widen_integer(&mut self, ast: &mut Ast, node: NodeId, new_size: IntegerSize) -> TypeLogEntry {
 14 |       if ast.kind(node).is_integer() {
 15 |          // Shortcut path for literals.
 16 |          let as_uint = ast.extra(node).as_uint().unwrap();
 17 |          ast.convert(
 18 |             node,
 19 |             match new_size {
 20 |                IntegerSize::U8 => NodeKind::Uint8,
 21 |                IntegerSize::U16 => NodeKind::Uint16,
 22 |                IntegerSize::U32 => NodeKind::Uint32,
 23 |                IntegerSize::U64 => NodeKind::Uint64,
 24 |                IntegerSize::S8 => NodeKind::Int8,
 25 |                IntegerSize::S16 => NodeKind::Int16,
 26 |                IntegerSize::S32 => NodeKind::Int32,
 27 |                IntegerSize::S64 => NodeKind::Int64,
 28 |             },
 29 |          );
 30 |          ast.set_extra(
 31 |             node,
 32 |             match new_size {
 33 |                IntegerSize::U8 => NodeData::Uint8(as_uint as u8),
 34 |                IntegerSize::U16 => NodeData::Uint16(as_uint as u16),
 35 |                IntegerSize::U32 => NodeData::Uint32(as_uint as u32),
 36 |                IntegerSize::U64 => NodeData::Uint64(as_uint as u64),
 37 |                IntegerSize::S8 => NodeData::Int8(as_uint as i8),
 38 |                IntegerSize::S16 => NodeData::Int16(as_uint as i16),
 39 |                IntegerSize::S32 => NodeData::Int32(as_uint as i32),
 40 |                IntegerSize::S64 => NodeData::Int64(as_uint as i64),
 41 |             },
 42 |          );
 43 |       } else {
 44 |          // Backend path for other nodes.
 45 |          if ast.kind(node).is_unsigned_integer() {
 46 |             ast.wrap(node, NodeKind::WidenUint);
 47 |          } else {
 48 |             ast.wrap(node, NodeKind::WidenInt);
 49 |          }
 50 |       }
 51 |       self.annotate(
 52 |          ast,
 53 |          node,
 54 |          match new_size {
 55 |             IntegerSize::U8 => self.builtin.t_uint8,
 56 |             IntegerSize::U16 => self.builtin.t_uint16,
 57 |             IntegerSize::U32 => self.builtin.t_uint32,
 58 |             IntegerSize::U64 => self.builtin.t_uint64,
 59 |             IntegerSize::S8 => self.builtin.t_int8,
 60 |             IntegerSize::S16 => self.builtin.t_int16,
 61 |             IntegerSize::S32 => self.builtin.t_int32,
 62 |             IntegerSize::S64 => self.builtin.t_int64,
 63 |          },
 64 |       )
 65 |    }
 66 | 
 67 |    /// Widens a float node to the given size.
 68 |    ///
 69 |    /// Behavior with literals is similar to `widen_integer`.
 70 |    fn widen_float(&mut self, ast: &mut Ast, node: NodeId, new_size: FloatSize) -> TypeLogEntry {
 71 |       if ast.kind(node).is_float() {
 72 |          let as_float = ast.extra(node).as_float().unwrap();
 73 |          ast.convert(
 74 |             node,
 75 |             match new_size {
 76 |                FloatSize::S32 => NodeKind::Float32,
 77 |                FloatSize::S64 => NodeKind::Float64,
 78 |             },
 79 |          );
 80 |          ast.set_extra(
 81 |             node,
 82 |             match new_size {
 83 |                FloatSize::S32 => NodeData::Float32(as_float as f32),
 84 |                FloatSize::S64 => NodeData::Float64(as_float),
 85 |             },
 86 |          );
 87 |       } else {
 88 |          ast.wrap(node, NodeKind::WidenFloat);
 89 |       }
 90 |       self.annotate(
 91 |          ast,
 92 |          node,
 93 |          match new_size {
 94 |             FloatSize::S32 => self.builtin.t_float32,
 95 |             FloatSize::S64 => self.builtin.t_float64,
 96 |          },
 97 |       )
 98 |    }
 99 | 
100 |    /// Attempts to convert the type `from` to type `to`. If an implicit conversion is not possible,
101 |    /// returns `None`. Otherwise returns the converted type ID.
102 |    pub(super) fn try_perform_implicit_conversion(
103 |       &mut self,
104 |       ast: &mut Ast,
105 |       node: NodeId,
106 |       from: TypeId,
107 |       to: TypeId,
108 |    ) -> Option<TypeLogEntry> {
109 |       // If the two types are equal, there's need for conversion.
110 |       if from == to {
111 |          return Some(self.log.push(to, node));
112 |       }
113 |       // Otherwise, compare their kinds for various traits.
114 |       let from_kind = self.types.kind(from);
115 |       let to_kind = self.types.kind(to);
116 | 
117 |       // NoReturn conversions
118 |       if from_kind.is_noreturn() {
119 |          return Some(self.annotate(ast, node, to));
120 |       }
121 | 
122 |       // Widening integer conversions
123 |       if from_kind.is_integer() && to_kind.is_integer() {
124 |          // Integers are only implicitly convertible to wider types of the same signedness,
125 |          // eg. Int8 -> Int16, Int32 -> Int64, but not Int64 -> Int32, or Uint32 -> Int32.
126 |          let from_size = from_kind.unwrap_integer();
127 |          let to_size = to_kind.unwrap_integer();
128 |          if to_size >= from_size {
129 |             return Some(self.widen_integer(ast, node, to_size));
130 |          }
131 |       }
132 | 
133 |       // Widening float conversions
134 |       if from_kind.is_float() && to_kind.is_float() {
135 |          // Floats are only implicitly convertible if the destination type is wider than the
136 |          // source type (Float32 -> Float64).
137 |          let from_size = from_kind.unwrap_float();
138 |          let to_size = to_kind.unwrap_float();
139 |          if to_size >= from_size {
140 |             return Some(self.widen_float(ast, node, to_size));
141 |          }
142 |       }
143 | 
144 |       None
145 |    }
146 | 
147 |    /// Performs an implicit conversion without failing.
148 |    pub(super) fn perform_implicit_conversion(
149 |       &mut self,
150 |       ast: &mut Ast,
151 |       node: NodeId,
152 |       from_log: TypeLogEntry,
153 |       to: TypeId,
154 |    ) -> TypeLogEntry {
155 |       let from = self.log.type_id(from_log);
156 |       self.try_perform_implicit_conversion(ast, node, from, to).unwrap_or(from_log)
157 |    }
158 | }
159 | 


--------------------------------------------------------------------------------
/tsuki-frontend/src/sem_types/functions.rs:
--------------------------------------------------------------------------------
  1 | //! Annotating functions and introducing them into scope.
  2 | 
  3 | use smallvec::SmallVec;
  4 | 
  5 | use crate::ast::{Ast, NodeId, NodeKind};
  6 | use crate::common::ErrorKind;
  7 | use crate::functions::{FunctionKind, Intrinsic, Parameters};
  8 | use crate::scope::{Mutability, SymbolKind, Variable};
  9 | use crate::types::{TypeId, TypeLogResult};
 10 | 
 11 | use super::{NodeContext, SemTypes};
 12 | 
 13 | impl<'s> SemTypes<'s> {
 14 |    fn mangle_name(&self, function_name: &str) -> String {
 15 |       format!("{}.{}", &self.common.file.module_name, function_name)
 16 |    }
 17 | 
 18 |    pub(super) fn annotate_function_declaration(
 19 |       &mut self,
 20 |       ast: &mut Ast,
 21 |       node: NodeId,
 22 |    ) -> TypeLogResult {
 23 |       // Check if the name is sem'd or not. If so, we are coming from a deferred sem'check,
 24 |       // so simply check the body and return.
 25 |       let name_node = ast.first_handle(node);
 26 |       if ast.kind(name_node) == NodeKind::Symbol {
 27 |          return self.annotate_function_body(ast, node);
 28 |       }
 29 | 
 30 |       // Get the function name.
 31 |       let name = self.common.get_source_range_from_node(ast, name_node);
 32 |       let mangled_name = self.mangle_name(&name);
 33 | 
 34 |       // Prepare all the nodes.
 35 |       let parameters_node = ast.second_handle(node);
 36 |       let formal_parameters_node = ast.second_handle(parameters_node);
 37 | 
 38 |       // Create a scope for the generic and formal parameters.
 39 |       // We save the declaration scope for later, as that's where we'll be adding the function
 40 |       // symbol itself.
 41 |       let declaration_scope = self.scope_stack.top();
 42 |       let scope = self.scope_stack.push(self.scopes.create_scope());
 43 |       ast.set_scope(node, Some(scope));
 44 | 
 45 |       // Slurp all the parameters up into a vector.
 46 |       let mut parameters = SmallVec::new();
 47 |       for i in 0..ast.extra(formal_parameters_node).as_node_list().unwrap().len() {
 48 |          let named_parameters = ast.extra(formal_parameters_node).as_node_list().unwrap()[i];
 49 |          let type_node = ast.first_handle(named_parameters);
 50 |          let typ = self.lookup_type(ast, type_node)?;
 51 |          ast.walk_node_list_mut(named_parameters, |ast, _, name_node| {
 52 |             // Make each parameter have its own identifier in the function body.
 53 |             // Semantically, function parameters are just variables, introduced by some
 54 |             // external scope.
 55 |             let name = self.common.get_source_range_from_node(ast, name_node);
 56 |             let symbol = self.symbols.create(
 57 |                name,
 58 |                name_node,
 59 |                typ,
 60 |                SymbolKind::Variable(Variable {
 61 |                   mutability: Mutability::Val,
 62 |                }),
 63 |             );
 64 |             parameters.push(symbol);
 65 |             ast.convert_to_symbol(name_node, symbol);
 66 |             self.add_to_scope(name, symbol);
 67 |          });
 68 |       }
 69 | 
 70 |       // Look up what the return type should be.
 71 |       let return_type_node = ast.first_handle(formal_parameters_node);
 72 |       let return_type = if ast.kind(return_type_node) != NodeKind::Empty {
 73 |          self.lookup_type(ast, return_type_node)?
 74 |       } else {
 75 |          // In case no return type is provided, default to the unit type `()`.
 76 |          self.builtin.t_unit
 77 |       };
 78 | 
 79 |       // Register the function in the registry and add it to scope.
 80 |       // Registering the function _here_ allows for the referring to the function inside its
 81 |       // body, enabling recursion.
 82 |       let function_id = self.functions.create(
 83 |          name.to_owned(),
 84 |          mangled_name,
 85 |          Parameters {
 86 |             formal: parameters,
 87 |             return_type,
 88 |          },
 89 |          FunctionKind::Local,
 90 |       );
 91 |       let symbol_kind = SymbolKind::Function(function_id);
 92 |       // TODO: Function/closure types. Right now we treat function symbols as having the
 93 |       // 'statement' type, which isn't exactly correct.
 94 |       let symbol = self.symbols.create(name, node, self.builtin.t_statement, symbol_kind);
 95 |       self.scopes.insert(declaration_scope, name, symbol);
 96 |       ast.convert_to_symbol(name_node, symbol);
 97 | 
 98 |       // After all is done, pop the function's scope off.
 99 |       self.scope_stack.pop();
100 | 
101 |       if self.is_in_module_scope() {
102 |          // If we're at the top-level scope, defer sem until all items in scope have already been
103 |          // declared.
104 |          self.defer(node, NodeContext::Statement);
105 |          Ok(self.annotate(ast, node, TypeId::null()))
106 |       } else {
107 |          // If we're not top-level, check the function's body.
108 |          self.annotate_function_body(ast, node)
109 |       }
110 |    }
111 | 
112 |    /// Annotates a function's body.
113 |    pub(super) fn annotate_function_body(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogResult {
114 |       let name_node = ast.first_handle(node);
115 |       let symbol_id = ast.symbol_id(name_node);
116 |       let function_id = self.symbols.kind(symbol_id).unwrap_function();
117 | 
118 |       // Before any analysis happens, set the current function to this one.
119 |       let previous_function = self.current_function;
120 |       self.current_function = Some(function_id);
121 | 
122 |       let return_type = self.functions.parameters(function_id).return_type;
123 | 
124 |       let returns_unit = self.types.kind(return_type).is_unit();
125 |       let body_log = self.annotate_statement_list(
126 |          ast,
127 |          node,
128 |          if returns_unit {
129 |             NodeContext::Statement
130 |          } else {
131 |             NodeContext::expression_of_type(return_type)
132 |          },
133 |       );
134 | 
135 |       self.current_function = previous_function;
136 | 
137 |       // Check that the body's return type is correct.
138 |       let body_type = self.log.type_id(body_log);
139 |       if !returns_unit && body_type != return_type {
140 |          return Ok(self.type_mismatch(ast, node, return_type, body_type));
141 |       }
142 | 
143 |       let declaration_type = self.create_declaration_type(symbol_id);
144 |       Ok(self.annotate(ast, node, declaration_type))
145 |    }
146 | 
147 |    /// Annotates a function call.
148 |    pub(super) fn annotate_call(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogResult {
149 |       // Extract what is being called.
150 |       let callee_node = ast.first_handle(node);
151 |       // TODO: Method calls.
152 |       if ast.kind(callee_node) != NodeKind::Identifier {
153 |          return Ok(self.error(ast, callee_node, ErrorKind::ExpressionCannotBeCalled));
154 |       }
155 |       let (symbol_id, function_id) = self.lookup_function(ast, callee_node)?;
156 |       // Convert the callee to a symbol.
157 |       ast.convert_to_symbol(callee_node, symbol_id);
158 | 
159 |       // Check if we have the right amount of arguments.
160 |       let given_parameter_count = ast.extra(node).as_node_list().unwrap().len();
161 |       let declared_parameter_count = self.functions.parameters(function_id).formal.len();
162 |       if given_parameter_count != declared_parameter_count {
163 |          return Ok(self.error(
164 |             ast,
165 |             node,
166 |             ErrorKind::NArgumentsExpected(declared_parameter_count, given_parameter_count),
167 |          ));
168 |       }
169 |       // Check if all the arguments are of the correct type.
170 |       // We don't immediately return after we error, so as to collect as many type mismatch
171 |       // messages as we can.
172 |       let mut last_error = None;
173 |       ast.walk_node_list_mut(node, |ast, index, argument| {
174 |          let parameters = self.functions.parameters(function_id);
175 |          let expected_type = self.symbols.type_id(parameters.formal[index]);
176 | 
177 |          let argument_log = self.annotate_node(
178 |             ast,
179 |             argument,
180 |             NodeContext::expression_of_type(expected_type),
181 |          );
182 |          let provided_type = self.log.type_id(argument_log);
183 | 
184 |          // Perform implicit conversions on arguments.
185 |          let argument_log = self
186 |             .try_perform_implicit_conversion(ast, node, provided_type, expected_type)
187 |             .unwrap_or(argument_log);
188 |          // If there's a mismatch after the conversion, error.
189 |          let provided_type = self.log.type_id(argument_log);
190 |          if provided_type != expected_type {
191 |             last_error = Some(self.type_mismatch(ast, argument, expected_type, provided_type));
192 |          }
193 |       });
194 |       if let Some(error) = last_error {
195 |          return Ok(error);
196 |       }
197 | 
198 |       if let &FunctionKind::Intrinsic(intrinsic) = self.functions.kind(function_id) {
199 |          // Intrinsic calls have some transformation magic going on.
200 |          self.annotate_intrinsic_call(ast, node, intrinsic);
201 |       } else {
202 |          // For other calls, we use the CallFunction node, which is a normalized version of `Call`
203 |          // that takes the form of `function_name(params)`. Even for instance functions.
204 |          ast.convert_preserve(node, NodeKind::CallFunction);
205 |       }
206 | 
207 |       let return_type = self.functions.parameters(function_id).return_type;
208 |       Ok(self.annotate(ast, node, return_type))
209 |    }
210 | 
211 |    /// Annotates an intrinsic function call.
212 |    fn annotate_intrinsic_call(&mut self, ast: &mut Ast, node: NodeId, intrinsic: Intrinsic) {
213 |       ast.convert_preserve(node, NodeKind::from(intrinsic));
214 |    }
215 | }
216 | 


--------------------------------------------------------------------------------
/tsuki-frontend/src/sem_types/locations.rs:
--------------------------------------------------------------------------------
  1 | //! Type analysis for variables, object fields, pointers, etc.
  2 | 
  3 | use crate::ast::{Ast, NodeId, NodeKind};
  4 | use crate::common::ErrorKind;
  5 | use crate::scope::{Mutability, SymbolId, SymbolKind, Variable};
  6 | use crate::types::{TypeLogEntry, TypeLogResult};
  7 | 
  8 | use super::{NodeContext, SemTypes};
  9 | 
 10 | impl<'s> SemTypes<'s> {
 11 |    /// Annotates a location expression, ie. variables `a`, members `.x`.
 12 |    pub(super) fn annotate_location(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogResult {
 13 |       match ast.kind(node) {
 14 |          NodeKind::Identifier => {
 15 |             let symbol = self.lookup_variable(ast, node)?;
 16 |             Ok(self.annotate_location_symbol(ast, node, symbol))
 17 |          }
 18 |          // TODO: Make this into a better error. This would require slicing the source string,
 19 |          // which we can't do because spans don't store direct byte indices to it at the moment.
 20 |          _ => Err(self.error(ast, node, ErrorKind::InvalidLocation)),
 21 |       }
 22 |    }
 23 | 
 24 |    /// Annotates a symbol that refers to a location.
 25 |    fn annotate_location_symbol(
 26 |       &mut self,
 27 |       ast: &mut Ast,
 28 |       node: NodeId,
 29 |       symbol: SymbolId,
 30 |    ) -> TypeLogEntry {
 31 |       match self.symbols.kind(symbol) {
 32 |          SymbolKind::Variable(_variable) => {
 33 |             let typ = self.symbols.type_id(symbol);
 34 |             ast.convert_to_symbol(node, symbol);
 35 |             let log = self.annotate(ast, node, typ);
 36 |             ast.wrap(node, NodeKind::Variable);
 37 |             log
 38 |          }
 39 |          _ => self.error(ast, node, ErrorKind::InvalidLocation),
 40 |       }
 41 |    }
 42 | 
 43 |    /// Annotates an assignment.
 44 |    pub(super) fn annotate_assignment(
 45 |       &mut self,
 46 |       ast: &mut Ast,
 47 |       node: NodeId,
 48 |       context: NodeContext,
 49 |    ) -> TypeLogResult {
 50 |       // TODO: Pointers and assigning values to them.
 51 |       let (left, right) = (ast.first_handle(node), ast.second_handle(node));
 52 |       let left_entry = self.annotate_location(ast, left)?;
 53 |       let left_type = self.log.type_id(left_entry);
 54 |       let right_entry = self.annotate_node(ast, right, NodeContext::expression_of_type(left_type));
 55 |       let right_entry = self.perform_implicit_conversion(ast, node, right_entry, left_type);
 56 |       let right_type = self.log.type_id(right_entry);
 57 |       // Check types.
 58 |       if right_type != left_type {
 59 |          return Err(self.type_mismatch(ast, node, left_type, right_type));
 60 |       }
 61 |       // Check mutability.
 62 |       // TODO: This could maybe be moved into a different check, shoving this logic into assignments
 63 |       // doesn't seem very clean.
 64 |       let target_is_mutable = match ast.kind(left) {
 65 |          NodeKind::Variable => {
 66 |             let symbol = ast.first_handle(left);
 67 |             let variable = self.symbols.kind(ast.symbol_id(symbol)).unwrap_variable();
 68 |             variable.mutability == Mutability::Var
 69 |          }
 70 |          _ => false,
 71 |       };
 72 |       if !target_is_mutable {
 73 |          return Err(self.error(ast, left, ErrorKind::CannotAssignImmutableLocation));
 74 |       }
 75 |       Ok(match context {
 76 |          NodeContext::Expression(_) => self.annotate(ast, node, left_type),
 77 |          NodeContext::Statement => self.annotate(ast, node, self.builtin.t_statement),
 78 |       })
 79 |    }
 80 | 
 81 |    /// Annotates a variable declaration.
 82 |    pub(super) fn annotate_variable_declaration(
 83 |       &mut self,
 84 |       ast: &mut Ast,
 85 |       node: NodeId,
 86 |    ) -> TypeLogResult {
 87 |       let kind = match ast.kind(node) {
 88 |          NodeKind::Val => Mutability::Val,
 89 |          NodeKind::Var => Mutability::Var,
 90 |          _ => unreachable!(),
 91 |       };
 92 |       let variable = Variable { mutability: kind };
 93 | 
 94 |       // Figure out the name and expected type. This expected type can be `None`, and in that case,
 95 |       // should be inferred from context.
 96 |       let left_node = ast.first_handle(node);
 97 |       let (name_node, expected_type) = match ast.kind(left_node) {
 98 |          NodeKind::VariableType => {
 99 |             let name_node = ast.first_handle(left_node);
100 |             let type_node = ast.second_handle(left_node);
101 |             let typ = self.lookup_type(ast, type_node)?;
102 |             (name_node, Some(typ))
103 |          }
104 |          _ => (left_node, None),
105 |       };
106 |       // Normalize the LHS to the name only.
107 |       ast.set_first_handle(node, name_node);
108 | 
109 |       // Annotate the value.
110 |       let value_node = ast.second_handle(node);
111 |       let value_log = self.annotate_node(ast, value_node, NodeContext::Expression(expected_type));
112 |       let value_type = self.log.type_id(value_log);
113 | 
114 |       // Check if the type matches if an explicit type was provided.
115 |       let value_type = match expected_type {
116 |          Some(typ) => {
117 |             if let Some(log) =
118 |                self.try_perform_implicit_conversion(ast, value_node, value_type, typ)
119 |             {
120 |                self.log.type_id(log)
121 |             } else {
122 |                let expected_name = self.types.name(typ).to_owned();
123 |                let value_name = self.types.name(value_type).to_owned();
124 |                return Err(self.error(
125 |                   ast,
126 |                   node,
127 |                   ErrorKind::TypeMismatch(expected_name, value_name),
128 |                ));
129 |             }
130 |          }
131 |          None => value_type,
132 |       };
133 | 
134 |       // Add to scope.
135 |       match ast.kind(name_node) {
136 |          NodeKind::Discard => {
137 |             // A discarding assignment is converted to an AssignDiscard node containing
138 |             // the original value.
139 |             ast.convert(node, NodeKind::AssignDiscard);
140 |             ast.set_first_handle(node, value_node);
141 |          }
142 |          NodeKind::Identifier => {
143 |             // A simple symbol-binding assignment is converted into a Symbol node.
144 |             let name = self.common.get_source_range_from_node(ast, name_node);
145 |             let symbol =
146 |                self.symbols.create(name, node, value_type, SymbolKind::Variable(variable));
147 |             ast.convert_to_symbol(name_node, symbol);
148 |             self.add_to_scope(name, symbol);
149 |             // The variable type annotation is less relevant to error reporting than the fact that
150 |             // it's a statement. This sounds counterintuitive at first, but note that we're
151 |             // requested to annotate the Val/Var node, not the variable name node, so the calling
152 |             // function likely expects a statement instead of an expression.
153 |             let _ = self.annotate(ast, name_node, value_type);
154 |          }
155 |          _ => unreachable!(),
156 |       }
157 |       Ok(self.annotate(ast, node, self.builtin.t_statement))
158 |    }
159 | }
160 | 


--------------------------------------------------------------------------------
/tsuki-frontend/src/sem_types/lookups.rs:
--------------------------------------------------------------------------------
 1 | //! Identifier lookups, generic instantiations, mutability queries.
 2 | 
 3 | use crate::ast::{Ast, NodeId, NodeKind};
 4 | use crate::common::ErrorKind;
 5 | use crate::functions::FunctionId;
 6 | use crate::scope::{SymbolId, SymbolKind};
 7 | use crate::types::{TypeId, TypeLogEntry};
 8 | 
 9 | use super::{NodeContext, SemTypes};
10 | 
11 | impl<'s> SemTypes<'s> {
12 |    // The difference between `find` and `lookup` is simple: `find` returns an Option,
13 |    // `lookup` returns a Result.
14 | 
15 |    /// Performs a basic identifier search, looking for a symbol whose name matches the identifier
16 |    /// stored in the node in the current scope.
17 |    fn find_identifier(&self, ast: &Ast, node: NodeId) -> Option<SymbolId> {
18 |       assert_eq!(ast.kind(node), NodeKind::Identifier);
19 |       let name = self.common.get_source_range_from_node(ast, node);
20 |       self.scope_stack.lookup(&self.scopes, name)
21 |    }
22 | 
23 |    /// Performs an error-reporting identifier lookup.
24 |    fn lookup_identifier(&mut self, ast: &Ast, node: NodeId) -> Result<SymbolId, TypeLogEntry> {
25 |       self.find_identifier(ast, node).ok_or_else(|| {
26 |          let name = self.common.get_source_range_from_node(ast, node);
27 |          self.error(ast, node, ErrorKind::UndeclaredSymbol(name.into()))
28 |       })
29 |    }
30 | 
31 |    /// Finds the variable symbol referred to by the given identifier node.
32 |    pub(super) fn lookup_variable(
33 |       &mut self,
34 |       ast: &Ast,
35 |       node: NodeId,
36 |    ) -> Result<SymbolId, TypeLogEntry> {
37 |       let symbol = self.lookup_identifier(ast, node)?;
38 |       if let SymbolKind::Variable(..) = self.symbols.kind(symbol) {
39 |          Ok(symbol)
40 |       } else {
41 |          // TODO: Make this error not suck.
42 |          Err(self.error(ast, node, ErrorKind::InvalidLocation))
43 |       }
44 |    }
45 | 
46 |    /// Finds the function referred to by the given identifier node.
47 |    pub(super) fn lookup_function(
48 |       &mut self,
49 |       ast: &Ast,
50 |       node: NodeId,
51 |    ) -> Result<(SymbolId, FunctionId), TypeLogEntry> {
52 |       let symbol = self.lookup_identifier(ast, node)?;
53 |       if let &SymbolKind::Function(id) = self.symbols.kind(symbol) {
54 |          Ok((symbol, id))
55 |       } else {
56 |          Err(self.error(ast, node, ErrorKind::ExpressionCannotBeCalled))
57 |       }
58 |    }
59 | 
60 |    /// Finds the type symbol referred to by the given node.
61 |    ///
62 |    /// The node can be any valid type as parsed by the parser. If the type is a generic type,
63 |    /// then instantiations will be performed.
64 |    pub(super) fn lookup_type(&mut self, ast: &Ast, node: NodeId) -> Result<TypeId, TypeLogEntry> {
65 |       match ast.kind(node) {
66 |          NodeKind::Identifier => {
67 |             let symbol = self.lookup_identifier(ast, node)?;
68 |             if let SymbolKind::Type(id) = self.symbols.kind(symbol) {
69 |                Ok(*id)
70 |             } else {
71 |                let name = self.symbols.name(symbol).to_owned();
72 |                Err(self.error(ast, node, ErrorKind::SymbolIsNotAType(name)))
73 |             }
74 |          }
75 |          _ => {
76 |             unreachable!("invalid node kind for type")
77 |          }
78 |       }
79 |    }
80 | 
81 |    /// Adds a symbol to the current scope.
82 |    pub(super) fn add_to_scope(&mut self, name: &str, symbol: SymbolId) {
83 |       let scope = self.scope_stack.top();
84 |       self.scopes.insert(scope, name, symbol);
85 |    }
86 | 
87 |    /// Annotates a `pub` declaration.
88 |    pub(super) fn annotate_pub(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogEntry {
89 |       let inner = ast.first_handle(node);
90 |       let inner = self.annotate_node(ast, inner, NodeContext::Statement);
91 |       let typ = self.log.type_id(inner);
92 |       let symbol_id = self.types.kind(typ).as_declaration().unwrap();
93 |       self.scopes.set_public(symbol_id);
94 |       ast.unwrap(node);
95 |       inner
96 |    }
97 | }
98 | 


--------------------------------------------------------------------------------
/tsuki-frontend/src/sem_types/mod.rs:
--------------------------------------------------------------------------------
  1 | //! Semantic analyzer for types.
  2 | 
  3 | mod control_flow;
  4 | mod conversions;
  5 | mod functions;
  6 | mod locations;
  7 | mod lookups;
  8 | mod operators;
  9 | mod pragmas;
 10 | mod types;
 11 | 
 12 | use std::path::Path;
 13 | 
 14 | use smallvec::SmallVec;
 15 | 
 16 | use crate::ast::{Ast, NodeId, NodeKind};
 17 | use crate::common::{ErrorKind, Errors};
 18 | use crate::functions::{register_intrinsics, FunctionId, Functions};
 19 | use crate::scope::{ScopeId, ScopeStack, Scopes, SymbolId, Symbols};
 20 | use crate::sem::{SemCommon, SemPass};
 21 | use crate::types::{BuiltinTypes, TypeId, TypeInfo, TypeKind, TypeLog, TypeLogEntry, Types};
 22 | 
 23 | pub(crate) struct SemTypes<'s> {
 24 |    common: &'s SemCommon<'s>,
 25 |    errors: Errors,
 26 | 
 27 |    types: &'s mut Types,
 28 |    log: &'s mut TypeLog,
 29 |    builtin: &'s BuiltinTypes,
 30 |    scopes: &'s mut Scopes,
 31 |    symbols: &'s mut Symbols,
 32 |    functions: &'s mut Functions,
 33 | 
 34 |    scope_stack: ScopeStack,
 35 |    module_scope: ScopeId,
 36 |    /// A stack of vectors of nodes to be sem'checked after the module's done being checked.
 37 |    ///
 38 |    /// The node ID is used to determine where the given node is placed. The scope of the node's
 39 |    /// body is determined from the node's metadata.
 40 |    deferred: SmallVec<[Vec<(NodeId, NodeContext)>; 4]>,
 41 | 
 42 |    /// The function that is currently being compiled.
 43 |    /// `None` if at the top level.
 44 |    current_function: Option<FunctionId>,
 45 | }
 46 | 
 47 | /// Values borrowed to `SemTypes`, used during its construction.
 48 | pub(crate) struct SemTypesBorrows<'s> {
 49 |    pub(crate) common: &'s SemCommon<'s>,
 50 |    pub(crate) types: &'s mut Types,
 51 |    pub(crate) log: &'s mut TypeLog,
 52 |    pub(crate) builtin: &'s BuiltinTypes,
 53 |    pub(crate) scopes: &'s mut Scopes,
 54 |    pub(crate) symbols: &'s mut Symbols,
 55 |    pub(crate) functions: &'s mut Functions,
 56 | }
 57 | 
 58 | /// Specifies whether a node should be annotated in expression or statement context.
 59 | #[derive(Clone, Copy, PartialEq, Eq, Debug)]
 60 | enum NodeContext {
 61 |    Expression(
 62 |       /// The expected type of the expression.
 63 |       Option<TypeId>,
 64 |    ),
 65 |    Statement,
 66 | }
 67 | 
 68 | impl NodeContext {
 69 |    fn expression() -> Self {
 70 |       Self::Expression(None)
 71 |    }
 72 | 
 73 |    fn expression_of_type(type_id: TypeId) -> Self {
 74 |       Self::Expression(Some(type_id))
 75 |    }
 76 | }
 77 | 
 78 | impl<'s> SemTypes<'s> {
 79 |    /// Creates a new instance of the `SemTypes` analysis phase.
 80 |    pub fn new(borrows: SemTypesBorrows<'s>) -> Self {
 81 |       let SemTypesBorrows {
 82 |          common,
 83 |          types,
 84 |          log,
 85 |          builtin,
 86 |          scopes,
 87 |          symbols,
 88 |          functions,
 89 |       } = borrows;
 90 |       let mut scope_stack = ScopeStack::new();
 91 |       // The scope stack is always initialized with a top-level module scope, such that there is
 92 |       // always a valid scope on top.
 93 |       let module_scope = scope_stack.push(scopes.create_scope());
 94 |       builtin.register_in(scopes, symbols, module_scope);
 95 |       register_intrinsics(builtin, scopes, symbols, module_scope, functions);
 96 |       SemTypes {
 97 |          common,
 98 |          errors: Errors::new(),
 99 | 
100 |          types,
101 |          log,
102 |          builtin,
103 |          scopes,
104 |          symbols,
105 |          functions,
106 | 
107 |          scope_stack,
108 |          module_scope,
109 |          deferred: SmallVec::new(),
110 |          current_function: None,
111 |       }
112 |    }
113 | 
114 |    /// Annotates the given AST with the given type, and returns the type.
115 |    fn annotate(&mut self, ast: &mut Ast, node: NodeId, typ: TypeId) -> TypeLogEntry {
116 |       ast.set_type_id(node, typ);
117 |       self.log.push(typ, node)
118 |    }
119 | 
120 |    /// Emits an error of the given kind, also returning the error type.
121 |    fn error(&mut self, ast: &Ast, node: NodeId, kind: ErrorKind) -> TypeLogEntry {
122 |       self.emit_error(kind, ast.span(node).clone());
123 |       self.log.push(self.builtin.t_error, node)
124 |    }
125 | 
126 |    /// Emits a type mismatch error.
127 |    fn type_mismatch(
128 |       &mut self,
129 |       ast: &Ast,
130 |       node: NodeId,
131 |       expected: TypeId,
132 |       got: TypeId,
133 |    ) -> TypeLogEntry {
134 |       let expected_name = self.types.name(expected);
135 |       let provided_name = self.types.name(got);
136 |       let kind = ErrorKind::TypeMismatch(expected_name.to_owned(), provided_name.to_owned());
137 |       self.error(ast, node, kind)
138 |    }
139 | 
140 |    /// Returns whether sem'checking is currently happening in the module scope.
141 |    fn is_in_module_scope(&self) -> bool {
142 |       self.scope_stack.top() == self.module_scope
143 |    }
144 | 
145 |    /// Pushes a new vector of defers.
146 |    fn push_defers(&mut self) {
147 |       self.deferred.push(Vec::new());
148 |    }
149 | 
150 |    /// Pushes a new defer into the current vector of defers.
151 |    fn defer(&mut self, node: NodeId, context: NodeContext) {
152 |       let defers = self.deferred.last_mut().unwrap();
153 |       defers.push((node, context));
154 |    }
155 | 
156 |    /// Pops the current vector of defers off, and
157 |    fn pop_defers(&mut self, ast: &mut Ast) {
158 |       let defers = self.deferred.pop().expect("unbalanced stack of defers");
159 |       for (node, context) in defers {
160 |          let scope = ast.scope(node);
161 |          if let Some(scope) = scope {
162 |             self.scope_stack.push(scope);
163 |          }
164 |          let _ = self.annotate_node(ast, node, context);
165 |          if let Some(_) = scope {
166 |             self.scope_stack.pop();
167 |          }
168 |       }
169 |    }
170 | 
171 |    /// Creates a new type that represents a declaration.
172 |    fn create_declaration_type(&mut self, symbol: SymbolId) -> TypeId {
173 |       self.types.create_type(TypeInfo {
174 |          name: &format!("declaration({})", symbol.id()),
175 |          kind: TypeKind::Declaration(symbol),
176 |       })
177 |    }
178 | 
179 |    /// Annotates a literal with a concrete type.
180 |    fn annotate_literal(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogEntry {
181 |       let typ = match ast.kind(node) {
182 |          NodeKind::True => self.builtin.t_bool,
183 |          NodeKind::False => self.builtin.t_bool,
184 |          NodeKind::Uint8 => self.builtin.t_uint8,
185 |          NodeKind::Uint16 => self.builtin.t_uint16,
186 |          NodeKind::Uint32 => self.builtin.t_uint32,
187 |          NodeKind::Uint64 => self.builtin.t_uint64,
188 |          NodeKind::Int8 => self.builtin.t_int8,
189 |          NodeKind::Int16 => self.builtin.t_int16,
190 |          NodeKind::Int32 => self.builtin.t_int32,
191 |          NodeKind::Int64 => self.builtin.t_int64,
192 |          NodeKind::Float32 => self.builtin.t_float32,
193 |          NodeKind::Float64 => self.builtin.t_float64,
194 |          NodeKind::Character => self.builtin.t_char,
195 |          _ => unreachable!(),
196 |       };
197 |       self.annotate(ast, node, typ)
198 |    }
199 | 
200 |    /// Annotates statements in a list of statements.
201 |    fn annotate_statement_list(
202 |       &mut self,
203 |       ast: &mut Ast,
204 |       node: NodeId,
205 |       context: NodeContext,
206 |    ) -> TypeLogEntry {
207 |       let mut last_log = None;
208 |       ast.walk_node_list_mut(node, |ast, index, child| {
209 |          // Trailing expressions in expression context statement lists get special treatment.
210 |          // They are resulting expressions of the these statement lists, and thus get analyzed as
211 |          // proper expressions rather than statements. They are also not subject to triggering the
212 |          // UnusedValue error, as the resulting value _is_ actually used - it's the result of the
213 |          // statement list.
214 |          let is_last = ast.is_last_child(node, index);
215 |          let log_entry = self.annotate_node(
216 |             ast,
217 |             child,
218 |             if is_last {
219 |                context
220 |             } else {
221 |                NodeContext::Statement
222 |             },
223 |          );
224 |          let typ = self.log.type_id(log_entry);
225 |          let type_kind = self.types.kind(typ);
226 |          // For expressions, we have some special cases.
227 |          if !type_kind.is_statement() {
228 |             if is_last {
229 |                // Trailing expressions get assigned to the `last_log`, so that we know what the
230 |                // result of the statement list is.
231 |                last_log = Some(log_entry);
232 |             } else {
233 |                // Other expressions are unused, which is invalid.
234 |                self.emit_error(ErrorKind::UnusedValue, ast.span(child).clone());
235 |             }
236 |          }
237 |       });
238 |       // Statement lists in expression context must always have a trailing expression.
239 |       if let NodeContext::Expression(expected_type) = context {
240 |          if let Some(last_log) = last_log {
241 |             if let Some(expected_type) = expected_type {
242 |                self.perform_implicit_conversion(ast, node, last_log, expected_type)
243 |             } else {
244 |                last_log
245 |             }
246 |          } else {
247 |             self.error(ast, node, ErrorKind::MissingResult)
248 |          }
249 |       } else {
250 |          self.annotate(ast, node, self.builtin.t_statement)
251 |       }
252 |    }
253 | 
254 |    /// Annotates the given AST node.
255 |    fn annotate_node(&mut self, ast: &mut Ast, node: NodeId, context: NodeContext) -> TypeLogEntry {
256 |       let log = match ast.kind(node) {
257 |          // Literals
258 |          | NodeKind::True
259 |          | NodeKind::False
260 |          | NodeKind::Uint8
261 |          | NodeKind::Uint16
262 |          | NodeKind::Uint32
263 |          | NodeKind::Uint64
264 |          | NodeKind::Int8
265 |          | NodeKind::Int16
266 |          | NodeKind::Int32
267 |          | NodeKind::Int64
268 |          | NodeKind::Float32
269 |          | NodeKind::Float64
270 |          | NodeKind::Character => self.annotate_literal(ast, node),
271 | 
272 |          // Locations
273 |          NodeKind::Identifier => self.annotate_location(ast, node).into(),
274 | 
275 |          // Unary operators
276 |          // ---
277 |          // The following operators were omitted from the generic rule:
278 |          // NodeKind::Member - magic for field access in self
279 |          // NodeKind::Ref - magic for creating pointers
280 |          // NodeKind::Deref - magic for dereferencing
281 |          NodeKind::Not | NodeKind::Neg | NodeKind::BitNot => {
282 |             self.annotate_unary_operator(ast, node)
283 |          }
284 | 
285 |          // Binary operators
286 |          // ---
287 |          // The following kinds were omitted from the generic rule:
288 |          // NodeKind::Dot - magic for field access
289 |          | NodeKind::Plus
290 |          | NodeKind::Minus
291 |          | NodeKind::Mul
292 |          | NodeKind::Div
293 |          | NodeKind::Equal
294 |          | NodeKind::NotEqual
295 |          | NodeKind::Less
296 |          | NodeKind::LessEqual
297 |          | NodeKind::Greater
298 |          | NodeKind::GreaterEqual => self.annotate_binary_operator(ast, node),
299 |          NodeKind::Call => self.annotate_call(ast, node).into(),
300 |          NodeKind::Assign => self.annotate_assignment(ast, node, context).into(),
301 |          // Other operators are to be implemented later.
302 | 
303 |          // Control flow
304 |          NodeKind::StatementList => self.annotate_statement_list(ast, node, context),
305 |          NodeKind::Pass => self.annotate_pass(ast, node),
306 |          NodeKind::Do => self.annotate_do(ast, node, context),
307 |          NodeKind::If => self.annotate_if(ast, node, context),
308 |          NodeKind::While => self.annotate_while(ast, node),
309 |          NodeKind::Break => self.annotate_break(ast, node),
310 |          NodeKind::Return => self.annotate_return(ast, node),
311 | 
312 |          // Declarations
313 |          NodeKind::Val | NodeKind::Var => self.annotate_variable_declaration(ast, node).into(),
314 |          NodeKind::Fun => self.annotate_function_declaration(ast, node).into(),
315 |          NodeKind::Type => self.annotate_type_alias(ast, node).into(),
316 |          NodeKind::Pub => self.annotate_pub(ast, node),
317 | 
318 |          // Other nodes are invalid (or not implemented yet).
319 |          other => self.error(ast, node, ErrorKind::SemTypesInvalidAstNode(other)),
320 |       };
321 | 
322 |       // In case the node's context is an expression with some return type provided, perform
323 |       // implicit conversions such that the node's type matches the expected type.
324 |       let log = if let NodeContext::Expression(Some(expected_type)) = context {
325 |          self.perform_implicit_conversion(ast, node, log, expected_type)
326 |       } else {
327 |          log
328 |       };
329 | 
330 |       log
331 |    }
332 | }
333 | 
334 | impl SemPass for SemTypes<'_> {
335 |    type Result = TypeLogEntry;
336 | 
337 |    /// Performs type analysis for the given AST node. This annotates the node with a concrete type.
338 |    fn analyze(&mut self, mut ast: Ast, root_node: NodeId) -> Ast {
339 |       self.push_defers();
340 |       let _ = self.annotate_node(&mut ast, root_node, NodeContext::Statement);
341 |       self.pop_defers(&mut ast);
342 |       ast
343 |    }
344 | 
345 |    fn filename(&self) -> &Path {
346 |       &self.common.file.path
347 |    }
348 | 
349 |    fn errors(&self) -> &Errors {
350 |       &self.errors
351 |    }
352 | 
353 |    fn errors_mut(&mut self) -> &mut Errors {
354 |       &mut self.errors
355 |    }
356 | 
357 |    fn into_errors(self) -> Errors {
358 |       self.errors
359 |    }
360 | }
361 | 


--------------------------------------------------------------------------------
/tsuki-frontend/src/sem_types/operators.rs:
--------------------------------------------------------------------------------
 1 | //! Semantic analysis for operators and compiler intrinsics.
 2 | 
 3 | use crate::ast::{Ast, NodeId, NodeKind};
 4 | use crate::common::ErrorKind;
 5 | use crate::types::TypeLogEntry;
 6 | 
 7 | use super::{NodeContext, SemTypes};
 8 | 
 9 | impl<'s> SemTypes<'s> {
10 |    // Currently, this does some rather simplistic analysis just to Make it Work™, but in the
11 |    // future when operators will be lowered to trait instance function calls, this will be
12 |    // replaced by much simpler logic and compiler intrinsics inside the stdlib.
13 | 
14 |    /// Annotates a unary operator with types.
15 |    pub(super) fn annotate_unary_operator(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogEntry {
16 |       let log_entry = self.annotate_node(ast, ast.first_handle(node), NodeContext::expression());
17 |       let right = self.log.type_id(log_entry);
18 |       let right_kind = self.types.kind(right);
19 |       let typ = match ast.kind(node) {
20 |          NodeKind::Not if right == self.builtin.t_bool => right,
21 |          NodeKind::BitNot if right_kind.is_integer() => right,
22 |          NodeKind::Neg if right_kind.is_numeric() => right,
23 |          _ => {
24 |             let right_name = self.types.name(right);
25 |             let kind = ErrorKind::InvalidUnaryOperator(right_name.into());
26 |             return self.error(ast, node, kind);
27 |          }
28 |       };
29 |       self.annotate(ast, node, typ)
30 |    }
31 | 
32 |    /// Annotates a binary operator with types.
33 |    pub(super) fn annotate_binary_operator(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogEntry {
34 |       let (left, right) = (ast.first_handle(node), ast.second_handle(node));
35 |       let left_entry = self.annotate_node(ast, left, NodeContext::expression());
36 |       let left_type = self.log.type_id(left_entry);
37 |       let right_entry = self.annotate_node(ast, right, NodeContext::expression_of_type(left_type));
38 |       let right_type = self.log.type_id(right_entry);
39 |       let conversion = self.try_perform_implicit_conversion(ast, right, right_type, left_type);
40 |       let left_type_kind = self.types.kind(left_type);
41 |       let typ = match ast.kind(node) {
42 |          // Arithmetic operators always evaluate to the same type as the LHS.
43 |          NodeKind::Plus | NodeKind::Minus | NodeKind::Mul | NodeKind::Div
44 |             if conversion.is_some() =>
45 |          {
46 |             left_type
47 |          }
48 | 
49 |          // Comparison operators always evaluate to `Bool`.
50 |          NodeKind::Equal | NodeKind::NotEqual
51 |             if conversion.is_some() && left_type_kind.is_bool() =>
52 |          {
53 |             self.builtin.t_bool
54 |          }
55 |          | NodeKind::Equal
56 |          | NodeKind::NotEqual
57 |          | NodeKind::Less
58 |          | NodeKind::LessEqual
59 |          | NodeKind::Greater
60 |          | NodeKind::GreaterEqual
61 |             if conversion.is_some() && left_type_kind.is_numeric() =>
62 |          {
63 |             self.builtin.t_bool
64 |          }
65 | 
66 |          // Other operators, and failed conversions, raise a type mismatch error.
67 |          _ => {
68 |             return self.type_mismatch(ast, node, left_type, right_type);
69 |          }
70 |       };
71 |       self.annotate(ast, node, typ)
72 |    }
73 | }
74 | 


--------------------------------------------------------------------------------
/tsuki-frontend/src/sem_types/pragmas.rs:
--------------------------------------------------------------------------------
 1 | //! Handling for pragma AST.
 2 | 
 3 | use crate::ast::{Ast, NodeId, NodeKind};
 4 | 
 5 | use super::SemTypes;
 6 | 
 7 | impl<'s> SemTypes<'s> {
 8 |    /// Splits a node that may have pragmas attached to it, to the inner part (first tuple field),
 9 |    /// and the pragmas (second tuple field).
10 |    pub(crate) fn split_pragmas(ast: &Ast, node: NodeId) -> (NodeId, Option<NodeId>) {
11 |       if ast.kind(node) == NodeKind::Pragmas {
12 |          (ast.first_handle(node), Some(node))
13 |       } else {
14 |          (node, None)
15 |       }
16 |    }
17 | }
18 | 


--------------------------------------------------------------------------------
/tsuki-frontend/src/sem_types/types.rs:
--------------------------------------------------------------------------------
  1 | //! Type declarations.
  2 | 
  3 | use crate::ast::{Ast, NodeId, NodeKind};
  4 | use crate::common::ErrorKind;
  5 | use crate::scope::{SymbolId, SymbolKind};
  6 | use crate::types::{TypeInfo, TypeKind, TypeLogEntry, TypeLogResult};
  7 | 
  8 | use super::SemTypes;
  9 | 
 10 | impl<'s> SemTypes<'s> {
 11 |    /// Annotates the AST for a type alias declaration.
 12 |    pub(super) fn annotate_type_alias(&mut self, ast: &mut Ast, node: NodeId) -> TypeLogResult {
 13 |       let (lhs, rhs) = (ast.first_handle(node), ast.second_handle(node));
 14 |       let (constrained_type, pragmas) = Self::split_pragmas(ast, lhs);
 15 | 
 16 |       // TODO: generic parameters
 17 |       let type_name = ast.first_handle(constrained_type);
 18 |       let name_identifier = ast.first_handle(type_name);
 19 |       let name = self.common.get_source_range_from_node(ast, name_identifier);
 20 | 
 21 |       // Interpret the right-hand side.
 22 |       let mut aliased_type = if rhs != NodeId::null() {
 23 |          // TODO: Check constraints.
 24 |          let underlying_type = self.lookup_type(ast, rhs)?;
 25 |          let alias = self.types.create_type(TypeInfo {
 26 |             name,
 27 |             kind: TypeKind::Alias(underlying_type),
 28 |          });
 29 |          let symbol = self.symbols.create(name, node, self.builtin.t_type, SymbolKind::Type(alias));
 30 |          Some(symbol)
 31 |       } else {
 32 |          None
 33 |       };
 34 | 
 35 |       // Interpret pragmas.
 36 |       if let Some(pragmas) = pragmas {
 37 |          for i in 0..ast.extra(pragmas).as_node_list().unwrap().len() {
 38 |             let pragma = ast.extra(pragmas).as_node_list().unwrap()[i];
 39 |             aliased_type = self.type_alias_pragma(ast, pragma, aliased_type)?;
 40 |          }
 41 |       }
 42 | 
 43 |       // Unwrap the resulting type.
 44 |       let aliased_type =
 45 |          aliased_type.ok_or_else(|| self.error(ast, node, ErrorKind::EmptyTypeAlias))?;
 46 |       // Add the alias to scope.
 47 |       self.add_to_scope(name, aliased_type);
 48 | 
 49 |       let declaration_type = self.create_declaration_type(aliased_type);
 50 |       Ok(self.annotate(ast, node, declaration_type))
 51 |    }
 52 | 
 53 |    /// Interprets a pragma for a type alias declaration.
 54 |    fn type_alias_pragma(
 55 |       &mut self,
 56 |       ast: &mut Ast,
 57 |       pragma: NodeId,
 58 |       #[allow(unused)] mut aliased_type: Option<SymbolId>,
 59 |    ) -> Result<Option<SymbolId>, TypeLogEntry> {
 60 |       let name_identifier = ast.first_handle(pragma);
 61 |       let name = self.common.get_source_range_from_node(ast, name_identifier);
 62 |       match name {
 63 |          "compiler_builtin_type" => {
 64 |             aliased_type = Some(self.pragma_compiler_builtin_type(ast, pragma)?);
 65 |          }
 66 |          other => return Err(self.error(ast, pragma, ErrorKind::UnknownPragma(other.into()))),
 67 |       }
 68 |       Ok(aliased_type)
 69 |    }
 70 | 
 71 |    /// Raises an error if a pragma does not have the provided number of arguments.
 72 |    fn pragma_expect_arguments(
 73 |       &mut self,
 74 |       ast: &Ast,
 75 |       pragma: NodeId,
 76 |       count: usize,
 77 |    ) -> Result<(), TypeLogEntry> {
 78 |       let nodes = ast.extra(pragma).as_node_list().unwrap();
 79 |       if nodes.len() != 1 {
 80 |          return Err(self.error(
 81 |             ast,
 82 |             pragma,
 83 |             ErrorKind::NArgumentsExpected(count, nodes.len()),
 84 |          ));
 85 |       }
 86 |       Ok(())
 87 |    }
 88 | 
 89 |    /// Handles the `compiler_builtin_type` pragma: creates a new symbol for a built-in type.
 90 |    fn pragma_compiler_builtin_type(
 91 |       &mut self,
 92 |       ast: &mut Ast,
 93 |       pragma: NodeId,
 94 |    ) -> Result<SymbolId, TypeLogEntry> {
 95 |       let nodes = ast.extra(pragma).as_node_list().unwrap();
 96 |       self.pragma_expect_arguments(ast, pragma, 1)?;
 97 |       let name_node = nodes[0];
 98 |       if ast.kind(name_node) != NodeKind::Atom {
 99 |          return Err(self.error(ast, name_node, ErrorKind::InvalidBuiltinTypeName));
100 |       }
101 |       let name = self.common.get_source_range_from_node(ast, name_node);
102 |       let typ = match name {
103 |          "noreturn" => self.builtin.t_noreturn,
104 |          "bool" => self.builtin.t_bool,
105 |          "uint8" => self.builtin.t_uint8,
106 |          "uint16" => self.builtin.t_uint16,
107 |          "uint32" => self.builtin.t_uint32,
108 |          "uint64" => self.builtin.t_uint64,
109 |          "int8" => self.builtin.t_int8,
110 |          "int16" => self.builtin.t_int16,
111 |          "int32" => self.builtin.t_int32,
112 |          "int64" => self.builtin.t_int64,
113 |          "float32" => self.builtin.t_float32,
114 |          "float64" => self.builtin.t_float64,
115 |          "size" => self.builtin.t_size,
116 |          _ => return Err(self.error(ast, name_node, ErrorKind::InvalidBuiltinTypeName)),
117 |       };
118 |       let symbol = self.symbols.create(
119 |          self.types.name(typ),
120 |          pragma,
121 |          self.builtin.t_type,
122 |          SymbolKind::Type(typ),
123 |       );
124 |       Ok(symbol)
125 |    }
126 | }
127 | 


--------------------------------------------------------------------------------
/tsuki-frontend/src/types.rs:
--------------------------------------------------------------------------------
  1 | //! Storage and logging of types.
  2 | 
  3 | // Note: Because `type` is a keyword in Rust, sometimes a truncated form `typ` is used to prevent
  4 | // conflicts.
  5 | 
  6 | use std::cmp::Ordering;
  7 | use std::ops::Range;
  8 | 
  9 | use crate::ast::NodeId;
 10 | use crate::scope::{ScopeId, Scopes, SymbolId, SymbolKind, Symbols};
 11 | 
 12 | /// Data-oriented type storage.
 13 | pub struct Types {
 14 |    names: Vec<Range<usize>>,
 15 |    kinds: Vec<TypeKind>,
 16 | 
 17 |    name_data: String,
 18 | }
 19 | 
 20 | /// A unique ID representing a type.
 21 | #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 22 | pub struct TypeId(usize);
 23 | 
 24 | impl TypeId {
 25 |    /// Returns the null type ID, which maps to an error type.
 26 |    pub fn null() -> TypeId {
 27 |       TypeId(0)
 28 |    }
 29 | }
 30 | 
 31 | /// Structure containing data for creating a type.
 32 | pub struct TypeInfo<'n> {
 33 |    pub name: &'n str,
 34 |    pub kind: TypeKind,
 35 | }
 36 | 
 37 | impl Types {
 38 |    /// Creates a new, empty type registry.
 39 |    pub fn new() -> Self {
 40 |       let mut types = Self {
 41 |          names: Vec::new(),
 42 |          kinds: Vec::new(),
 43 |          name_data: String::new(),
 44 |       };
 45 |       // Ensure the null slot is populated by the missing type.
 46 |       let _ = types.create_type(TypeInfo {
 47 |          name: "missingtype",
 48 |          kind: TypeKind::Missing,
 49 |       });
 50 |       types
 51 |    }
 52 | 
 53 |    /// Creates a new type with the given type info.
 54 |    #[must_use]
 55 |    pub fn create_type(&mut self, info: TypeInfo<'_>) -> TypeId {
 56 |       let id = self.names.len();
 57 |       let name_range = self.add_name(info.name);
 58 |       self.names.push(name_range);
 59 |       self.kinds.push(info.kind);
 60 |       TypeId(id)
 61 |    }
 62 | 
 63 |    /// Returns the name of the type.
 64 |    pub fn name(&self, typ: TypeId) -> &str {
 65 |       &self.name_data[self.names[typ.0].clone()]
 66 |    }
 67 | 
 68 |    /// Returns the kind of the given type.
 69 |    pub fn kind(&self, typ: TypeId) -> &TypeKind {
 70 |       &self.kinds[typ.0]
 71 |    }
 72 | 
 73 |    /// Adds a name into the local `name_data` storage.
 74 |    fn add_name(&mut self, name: &str) -> Range<usize> {
 75 |       let start = self.name_data.len();
 76 |       self.name_data.push_str(name);
 77 |       let end = self.name_data.len();
 78 |       start..end
 79 |    }
 80 | }
 81 | 
 82 | /// The kind of a type.
 83 | pub enum TypeKind {
 84 |    /// The missing type is assigned to all nodes that don't get a type assigned explicitly.
 85 |    Missing,
 86 |    /// The error type is returned when type analysis fails for an AST node.
 87 |    Error,
 88 |    /// The statement type is assigned to AST nodes that do not return a value, such as loops.
 89 |    Statement,
 90 |    /// The declaration type is assigned to AST nodes that introduce a symbol into scope.
 91 |    Declaration(SymbolId),
 92 |    /// `type` is the type of all type symbols. It can't be instantiated by user code.
 93 |    Type,
 94 |    /// The unit type is a type with a single value `()`. It is the default return type for
 95 |    /// functions.
 96 |    Unit,
 97 |    /// The NoReturn type is assigned to expressions that do not return to the parent expression,
 98 |    /// eg. `return` expressions. Certain built-in functions also return `NoReturn`.
 99 |    /// It is implicitly convertible to any other type.
100 |    NoReturn,
101 |    // The rest of the primitive types is quite self-explanatory.
102 |    Bool,
103 |    Integer(IntegerSize),
104 |    Float(FloatSize),
105 |    Char,
106 | 
107 |    /// An alias for the type of the given ID.
108 |    Alias(TypeId),
109 | }
110 | 
111 | impl TypeKind {
112 |    /// Returns whether the type kind represents an invalid type.
113 |    pub fn is_invalid(&self) -> bool {
114 |       matches!(self, TypeKind::Missing | TypeKind::Error)
115 |    }
116 | 
117 |    /// Returns whether the type kind is the `NoReturn` type.
118 |    pub fn is_noreturn(&self) -> bool {
119 |       matches!(self, TypeKind::NoReturn)
120 |    }
121 | 
122 |    /// Returns whether the type kind is the unit type.
123 |    pub fn is_unit(&self) -> bool {
124 |       matches!(self, TypeKind::Unit)
125 |    }
126 | 
127 |    /// Returns whether the type kind represents the `Bool` type.
128 |    pub fn is_bool(&self) -> bool {
129 |       matches!(self, TypeKind::Bool)
130 |    }
131 | 
132 |    /// Returns whether the type kind represents an integer type.
133 |    pub fn is_integer(&self) -> bool {
134 |       matches!(self, TypeKind::Integer(..))
135 |    }
136 | 
137 |    /// Returns whether the type kind represents a float type.
138 |    pub fn is_float(&self) -> bool {
139 |       matches!(self, TypeKind::Float(..))
140 |    }
141 | 
142 |    /// Returns whether the type kind represents a numeric (integer or float) type.
143 |    pub fn is_numeric(&self) -> bool {
144 |       self.is_integer() || self.is_float()
145 |    }
146 | 
147 |    /// Returns whether the type kind is for a type that's valid when used as a statement.
148 |    pub fn is_statement(&self) -> bool {
149 |       self.is_invalid()
150 |          || matches!(
151 |             self,
152 |             TypeKind::Statement | TypeKind::Declaration(_) | TypeKind::Unit | TypeKind::NoReturn
153 |          )
154 |    }
155 | 
156 |    /// Unwraps the integer size stored in the type kind, panics if the kind is not an integer.
157 |    pub fn unwrap_integer(&self) -> IntegerSize {
158 |       match self {
159 |          TypeKind::Integer(size) => *size,
160 |          _ => panic!("unwrap_integer called on a type kind that is not an integer"),
161 |       }
162 |    }
163 | 
164 |    /// Unwraps the float size stored in the type kind, panics if the kind is not an float.
165 |    pub fn unwrap_float(&self) -> FloatSize {
166 |       match self {
167 |          TypeKind::Float(size) => *size,
168 |          _ => panic!("unwrap_float called on a type kind that is not a float"),
169 |       }
170 |    }
171 | 
172 |    /// Returns `Some(symbol_id)` if the type kind represents a declaration, or `None` if it doesn't.
173 |    pub fn as_declaration(&self) -> Option<SymbolId> {
174 |       if let Self::Declaration(v) = self {
175 |          Some(*v)
176 |       } else {
177 |          None
178 |       }
179 |    }
180 | }
181 | 
182 | /// The size of an integer. `S` sizes are signed, `U` sizes are unsigned.
183 | #[derive(Clone, Copy, PartialEq, Eq)]
184 | #[repr(u8)]
185 | pub enum IntegerSize {
186 |    U8,
187 |    U16,
188 |    U32,
189 |    U64,
190 |    S8,
191 |    S16,
192 |    S32,
193 |    S64,
194 | }
195 | 
196 | impl IntegerSize {
197 |    /// Returns whether the size represents an unsigned integer.
198 |    pub fn is_unsigned(self) -> bool {
199 |       // Can't use Self here?
200 |       use IntegerSize::*;
201 |       matches!(self, U8 | U16 | U32 | U64)
202 |    }
203 | 
204 |    /// Returns whether the size represents a signed integer.
205 |    pub fn is_signed(self) -> bool {
206 |       !self.is_unsigned()
207 |    }
208 | }
209 | 
210 | impl PartialOrd for IntegerSize {
211 |    /// Compares two integer sizes.
212 |    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
213 |       if self.is_signed() && other.is_unsigned() {
214 |          None
215 |       } else {
216 |          (*self as u8).partial_cmp(&(*other as u8))
217 |       }
218 |    }
219 | }
220 | 
221 | /// The size of a float.
222 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
223 | pub enum FloatSize {
224 |    S32,
225 |    S64,
226 | }
227 | 
228 | /// Configuration for "sensible default" types: `Int`, `Float`, and `Size`.
229 | pub struct DefaultTypes {
230 |    pub int_width: IntegerSize,
231 |    pub float_width: FloatSize,
232 |    pub size_width: IntegerSize,
233 | }
234 | 
235 | /// A struct containing all the built-in types.
236 | pub struct BuiltinTypes {
237 |    // Special
238 |    pub t_error: TypeId,
239 |    pub t_unit: TypeId,
240 |    pub t_noreturn: TypeId,
241 |    pub t_statement: TypeId,
242 |    pub t_type: TypeId,
243 | 
244 |    // Boolean
245 |    pub t_bool: TypeId,
246 | 
247 |    // Integers
248 |    pub t_uint8: TypeId,
249 |    pub t_uint16: TypeId,
250 |    pub t_uint32: TypeId,
251 |    pub t_uint64: TypeId,
252 |    pub t_int8: TypeId,
253 |    pub t_int16: TypeId,
254 |    pub t_int32: TypeId,
255 |    pub t_int64: TypeId,
256 | 
257 |    // Floats
258 |    pub t_float32: TypeId,
259 |    pub t_float64: TypeId,
260 | 
261 |    // Int/Float aliases
262 |    // NOTE: These will later be implemented in the standard library and will be configurable with
263 |    // compiler switches.
264 |    pub t_int: TypeId,
265 |    pub t_float: TypeId,
266 |    pub t_size: TypeId,
267 | 
268 |    // Characters
269 |    pub t_char: TypeId,
270 | }
271 | 
272 | impl BuiltinTypes {
273 |    /// Adds all the built-in types to the given `Types` and returns them.
274 |    pub fn add_to(types: &mut Types, default_types: &DefaultTypes) -> Self {
275 |       let t_error = types.create_type(TypeInfo {
276 |          // NOTE: Maybe look for better names than this?
277 |          // Just like `statement`, the name is lowercase, but users may think that the occurrence
278 |          // of errortype is an error in the compiler. Of course, it's not.
279 |          // Maybe we should "unwrap" error types somehow, so that we never report errors
280 |          // containing them?
281 |          name: "errortype",
282 |          kind: TypeKind::Error,
283 |       });
284 |       let t_uint8 = types.create_type(TypeInfo {
285 |          name: "Uint8",
286 |          kind: TypeKind::Integer(IntegerSize::U8),
287 |       });
288 |       let t_uint16 = types.create_type(TypeInfo {
289 |          name: "Uint16",
290 |          kind: TypeKind::Integer(IntegerSize::U16),
291 |       });
292 |       let t_uint32 = types.create_type(TypeInfo {
293 |          name: "Uint32",
294 |          kind: TypeKind::Integer(IntegerSize::U32),
295 |       });
296 |       let t_uint64 = types.create_type(TypeInfo {
297 |          name: "Uint64",
298 |          kind: TypeKind::Integer(IntegerSize::U64),
299 |       });
300 |       let t_int8 = types.create_type(TypeInfo {
301 |          name: "Int8",
302 |          kind: TypeKind::Integer(IntegerSize::S8),
303 |       });
304 |       let t_int16 = types.create_type(TypeInfo {
305 |          name: "Int16",
306 |          kind: TypeKind::Integer(IntegerSize::S16),
307 |       });
308 |       let t_int32 = types.create_type(TypeInfo {
309 |          name: "Int32",
310 |          kind: TypeKind::Integer(IntegerSize::S32),
311 |       });
312 |       let t_int64 = types.create_type(TypeInfo {
313 |          name: "Int64",
314 |          kind: TypeKind::Integer(IntegerSize::S64),
315 |       });
316 |       let t_float32 = types.create_type(TypeInfo {
317 |          name: "Float32",
318 |          kind: TypeKind::Float(FloatSize::S32),
319 |       });
320 |       let t_float64 = types.create_type(TypeInfo {
321 |          name: "Float64",
322 |          kind: TypeKind::Float(FloatSize::S64),
323 |       });
324 |       Self {
325 |          t_error,
326 |          t_unit: types.create_type(TypeInfo {
327 |             name: "()",
328 |             kind: TypeKind::Unit,
329 |          }),
330 |          t_noreturn: types.create_type(TypeInfo {
331 |             name: "NoReturn",
332 |             kind: TypeKind::NoReturn,
333 |          }),
334 |          // Unlike all other types, the `statement` type is lowercase. This should let users know
335 |          // that the "expression" in question isn't an expression after all.
336 |          t_statement: types.create_type(TypeInfo {
337 |             name: "statement",
338 |             kind: TypeKind::Statement,
339 |          }),
340 |          t_type: types.create_type(TypeInfo {
341 |             name: "type",
342 |             kind: TypeKind::Type,
343 |          }),
344 |          t_bool: types.create_type(TypeInfo {
345 |             name: "Bool",
346 |             kind: TypeKind::Bool,
347 |          }),
348 |          t_uint8,
349 |          t_uint16,
350 |          t_uint32,
351 |          t_uint64,
352 |          t_int8,
353 |          t_int16,
354 |          t_int32,
355 |          t_int64,
356 |          t_float32,
357 |          t_float64,
358 | 
359 |          t_int: match default_types.int_width {
360 |             IntegerSize::S8 => t_int8,
361 |             IntegerSize::S16 => t_int16,
362 |             IntegerSize::S32 => t_int32,
363 |             IntegerSize::S64 => t_int64,
364 |             _ => panic!("int_size must be signed"),
365 |          },
366 |          t_float: match default_types.float_width {
367 |             FloatSize::S32 => t_float32,
368 |             FloatSize::S64 => t_float64,
369 |          },
370 |          t_size: match default_types.size_width {
371 |             IntegerSize::U8 => t_uint8,
372 |             IntegerSize::U16 => t_uint16,
373 |             IntegerSize::U32 => t_uint32,
374 |             IntegerSize::U64 => t_uint64,
375 |             _ => panic!("index_size must be unsigned"),
376 |          },
377 |          t_char: types.create_type(TypeInfo {
378 |             name: "Char",
379 |             kind: TypeKind::Char,
380 |          }),
381 |       }
382 |    }
383 | 
384 |    /// Registers named built-in types in the given scope.
385 |    ///
386 |    /// TODO: Remove this in favor of the stdlib declaring the types in the prelude.
387 |    pub(crate) fn register_in(&self, scopes: &mut Scopes, symbols: &mut Symbols, scope: ScopeId) {
388 |       macro_rules! add_type {
389 |          ($field:tt, $name:tt) => {
390 |             let symbol = symbols.create(
391 |                $name,
392 |                NodeId::null(),
393 |                self.t_type,
394 |                SymbolKind::Type(self.$field),
395 |             );
396 |             scopes.insert(scope, symbols.name(symbol), symbol);
397 |          };
398 |       }
399 | 
400 |       add_type!(t_noreturn, "NoReturn");
401 |       add_type!(t_bool, "Bool");
402 |       add_type!(t_uint8, "Uint8");
403 |       add_type!(t_uint16, "Uint16");
404 |       add_type!(t_uint32, "Uint32");
405 |       add_type!(t_uint64, "Uint64");
406 |       add_type!(t_int8, "Int8");
407 |       add_type!(t_int16, "Int16");
408 |       add_type!(t_int32, "Int32");
409 |       add_type!(t_int64, "Int64");
410 |       add_type!(t_float32, "Float32");
411 |       add_type!(t_float64, "Float64");
412 |       add_type!(t_char, "Char");
413 | 
414 |       add_type!(t_int, "Int");
415 |       add_type!(t_float, "Float");
416 |       add_type!(t_size, "Size");
417 |    }
418 | }
419 | 
420 | /// A unique ID identifying an entry in the type log.
421 | #[derive(Clone, Copy, Debug)]
422 | #[must_use]
423 | pub struct TypeLogEntry(usize);
424 | 
425 | /// An alias for a result storing a log for either a valid or an erroneous type usage.
426 | pub type TypeLogResult = Result<TypeLogEntry, TypeLogEntry>;
427 | 
428 | impl From<TypeLogResult> for TypeLogEntry {
429 |    /// Unwraps a successful or erroneous type log from a result.
430 |    ///
431 |    /// This is used to simplify returning from functions when analysis errors occur.
432 |    fn from(result: Result<TypeLogEntry, TypeLogEntry>) -> Self {
433 |       match result {
434 |          Ok(entry) | Err(entry) => entry,
435 |       }
436 |    }
437 | }
438 | 
439 | /// A log storing the AST nodes from which different instances of types came from.
440 | pub struct TypeLog {
441 |    types: Vec<TypeId>,
442 |    nodes: Vec<NodeId>,
443 | }
444 | 
445 | impl TypeLog {
446 |    /// Constructs a new type log.
447 |    pub fn new() -> Self {
448 |       Self {
449 |          types: Vec::new(),
450 |          nodes: Vec::new(),
451 |       }
452 |    }
453 | 
454 |    /// Inserts a new type into the log and returns its handle.
455 |    pub fn push(&mut self, typ: TypeId, node: NodeId) -> TypeLogEntry {
456 |       let id = self.types.len();
457 |       self.types.push(typ);
458 |       self.nodes.push(node);
459 |       TypeLogEntry(id)
460 |    }
461 | 
462 |    /// Returns the type stored in the log entry.
463 |    pub fn type_id(&self, entry: TypeLogEntry) -> TypeId {
464 |       self.types[entry.0]
465 |    }
466 | 
467 |    /// Returns the source node stored in the log entry.
468 |    pub fn node(&self, entry: TypeLogEntry) -> NodeId {
469 |       self.nodes[entry.0]
470 |    }
471 | }
472 | 


--------------------------------------------------------------------------------