├── .ci ├── Dockerfile └── buildbase-pipelines.yaml ├── .editorconfig ├── .gitignore ├── .vscode └── launch.json ├── Cargo.lock ├── Cargo.toml ├── LICENSE.md ├── README.md ├── STRUCTURE.md ├── azure-pipelines.yaml ├── build.sh ├── docs ├── 00-getting-started.md ├── 10-syntax.md ├── 12-parsing.md ├── 15-data-layout.md ├── 90-syntax-transforms.md └── index.md ├── justfile ├── spec ├── README.md ├── assignment.ulg ├── basics.ulg ├── bench │ ├── README.md │ └── fib.ulg ├── bool.ulg ├── collatz.ulg ├── compare.ulg ├── euler1.ulg ├── evens.ulg ├── factorial.ulg ├── fail │ ├── README.md │ ├── assign_to_immutable.ulg │ ├── assign_to_undefined.ulg │ ├── badcalls.ulg │ ├── badvars.ulg │ ├── dupe_fn_param.ulg │ ├── if_else_badtypes.ulg │ ├── mismatched_assign.ulg │ ├── undefined.ulg │ ├── undefined_func.ulg │ └── wrong_type_var.ulg ├── fib.ulg ├── fizzbuzz.ulg ├── funcfact.ulg ├── functions.ulg ├── gcd.ulg ├── hello.ulg ├── hello_world.ulg ├── identity.ulg ├── if.ulg ├── literals.ulg ├── long_string.ulg ├── malformed │ ├── README.md │ ├── half_ternary.ulg │ ├── invalid_calls.ulg │ ├── missing_ident.ulg │ ├── param_missing_type.ulg │ ├── unexpected_plus.ulg │ ├── unterminated_bracket.ulg │ ├── unterminated_fn.ulg │ ├── unterminated_string.ulg │ ├── unterminated_string2.ulg │ └── unterminated_string3.ulg ├── maybe_invert.ulg ├── modulus.ulg ├── operators.ulg ├── prefix.ulg ├── printing.ulg ├── scopes.ulg ├── string_concat.ulg ├── types.ulg ├── unimplemneted │ ├── README.md │ ├── array_types.ulg │ └── tuple_types.ulg ├── until.ulg ├── variables.ulg └── while.ulg ├── specs.py └── src ├── compile.rs ├── compile ├── error.rs ├── linker.rs ├── lower.rs ├── lower_context.rs ├── options.rs └── string_builtins.rs ├── diag.rs ├── low_loader.rs ├── low_loader ├── builder.rs ├── context.rs ├── function.rs ├── module.rs ├── pass_manager.rs ├── targets.rs ├── types.rs └── value.rs ├── main.rs ├── meta.rs ├── sem.rs ├── sem ├── binder.rs ├── operators.rs ├── sem_ctx.rs ├── tree.rs └── types.rs ├── syntax.rs └── syntax ├── node.rs ├── parse.rs ├── parse ├── checkparse_tests.rs └── tokeniser.rs ├── syntax_builder.rs ├── text.rs ├── text ├── intern.rs ├── position.rs └── source_text.rs ├── tree.rs └── tree ├── expression.rs ├── operators.rs ├── seplist.rs ├── token.rs ├── trivia.rs └── types.rs /.ci/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | RUN apt-get update && \ 3 | apt-get install -y wget curl gnupg2 lsb-release software-properties-common && \ 4 | curl -O https://apt.llvm.org/llvm.sh && \ 5 | chmod +x llvm.sh && \ 6 | ./llvm.sh 9 && \ 7 | apt-get install -y sudo && \ 8 | apt-get install -y python3 && \ 9 | apt-get install -y llvm-9-dev && \ 10 | apt-get install -y clang binutils && \ 11 | apt-get install -y zlib1g-dev 12 | -------------------------------------------------------------------------------- /.ci/buildbase-pipelines.yaml: -------------------------------------------------------------------------------- 1 | pool: 2 | vmImage: 'ubuntu-16.04' 3 | 4 | trigger: 5 | paths: 6 | include: 7 | - '.ci' 8 | 9 | variables: 10 | dockerId: iwillspeak 11 | imageName: ullage-linux-build-base 12 | 13 | steps: 14 | - script: | 15 | docker build -t $(dockerId)/$(imageName) . 16 | echo $pswd | docker login -u $(dockerId) --password-stdin 17 | docker push $(dockerId)/$(imageName) 18 | workingDirectory: .ci 19 | env: 20 | pswd: $(dockerPassword) 21 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | 2 | root = true 3 | 4 | [*.rs] 5 | end_of_line = lf 6 | charset = utf-8 7 | trim_trailing_whitespace = true 8 | indent_style = space 9 | indent_size = 4 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Cargo build output 2 | target/ 3 | # Docket output from docs build 4 | docs/build/ 5 | # Binaries for spec tests 6 | specbin/ 7 | a.out -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "type": "lldb", 9 | "request": "launch", 10 | "terminal": "integrated", 11 | "name": "Debug executable 'ullage'", 12 | "cargo": { 13 | "args": [ 14 | "build", 15 | "--bin=ullage", 16 | "--package=ullage" 17 | ], 18 | "filter": { 19 | "kind": "bin" 20 | } 21 | }, 22 | "args": [], 23 | "cwd": "${workspaceFolder}" 24 | }, 25 | { 26 | "type": "lldb", 27 | "request": "launch", 28 | "name": "Debug unit tests in executable 'ullage'", 29 | "cargo": { 30 | "args": [ 31 | "test", 32 | "--no-run", 33 | "--bin=ullage", 34 | "--package=ullage" 35 | ], 36 | "filter": { 37 | "kind": "bin" 38 | } 39 | }, 40 | "args": [], 41 | "cwd": "${workspaceFolder}" 42 | } 43 | ] 44 | } -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "aho-corasick" 7 | version = "0.7.18" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" 10 | dependencies = [ 11 | "memchr", 12 | ] 13 | 14 | [[package]] 15 | name = "autocfg" 16 | version = "1.1.0" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 19 | 20 | [[package]] 21 | name = "bitflags" 22 | version = "1.3.2" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 25 | 26 | [[package]] 27 | name = "cc" 28 | version = "1.0.73" 29 | source = "registry+https://github.com/rust-lang/crates.io-index" 30 | checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" 31 | 32 | [[package]] 33 | name = "cfg-if" 34 | version = "1.0.0" 35 | source = "registry+https://github.com/rust-lang/crates.io-index" 36 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 37 | 38 | [[package]] 39 | name = "docopt" 40 | version = "1.1.1" 41 | source = "registry+https://github.com/rust-lang/crates.io-index" 42 | checksum = "7f3f119846c823f9eafcf953a8f6ffb6ed69bf6240883261a7f13b634579a51f" 43 | dependencies = [ 44 | "lazy_static", 45 | "regex", 46 | "serde", 47 | "strsim", 48 | ] 49 | 50 | [[package]] 51 | name = "fastrand" 52 | version = "1.7.0" 53 | source = "registry+https://github.com/rust-lang/crates.io-index" 54 | checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" 55 | dependencies = [ 56 | "instant", 57 | ] 58 | 59 | [[package]] 60 | name = "hashbrown" 61 | version = "0.12.2" 62 | source = "registry+https://github.com/rust-lang/crates.io-index" 63 | checksum = "607c8a29735385251a339424dd462993c0fed8fa09d378f259377df08c126022" 64 | 65 | [[package]] 66 | name = "indexmap" 67 | version = "1.9.1" 68 | source = "registry+https://github.com/rust-lang/crates.io-index" 69 | checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e" 70 | dependencies = [ 71 | "autocfg", 72 | "hashbrown", 73 | ] 74 | 75 | [[package]] 76 | name = "instant" 77 | version = "0.1.12" 78 | source = "registry+https://github.com/rust-lang/crates.io-index" 79 | checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" 80 | dependencies = [ 81 | "cfg-if", 82 | ] 83 | 84 | [[package]] 85 | name = "lazy_static" 86 | version = "1.4.0" 87 | source = "registry+https://github.com/rust-lang/crates.io-index" 88 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 89 | 90 | [[package]] 91 | name = "libc" 92 | version = "0.2.126" 93 | source = "registry+https://github.com/rust-lang/crates.io-index" 94 | checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" 95 | 96 | [[package]] 97 | name = "llvm-sys" 98 | version = "90.2.1" 99 | source = "registry+https://github.com/rust-lang/crates.io-index" 100 | checksum = "d5fe45e64c8db814191fedc1787c3fb8eb2e2c8af897727b3e21df2562c4a01c" 101 | dependencies = [ 102 | "cc", 103 | "lazy_static", 104 | "libc", 105 | "regex", 106 | "semver 0.9.0", 107 | ] 108 | 109 | [[package]] 110 | name = "llvm-sys" 111 | version = "100.2.3" 112 | source = "registry+https://github.com/rust-lang/crates.io-index" 113 | checksum = "982d0c7337299c4a91bd5d52a86d26cf47e0e84f3b9017874a0b5c667c3cb612" 114 | dependencies = [ 115 | "cc", 116 | "lazy_static", 117 | "libc", 118 | "regex", 119 | "semver 0.9.0", 120 | ] 121 | 122 | [[package]] 123 | name = "llvm-sys" 124 | version = "130.0.4" 125 | source = "registry+https://github.com/rust-lang/crates.io-index" 126 | checksum = "bdb6ea20e8a348f6db0b43a7f009fa7d981d22edf4cbe2e0c7b2247dbb25be61" 127 | dependencies = [ 128 | "cc", 129 | "lazy_static", 130 | "libc", 131 | "regex", 132 | "semver 0.11.0", 133 | ] 134 | 135 | [[package]] 136 | name = "memchr" 137 | version = "2.5.0" 138 | source = "registry+https://github.com/rust-lang/crates.io-index" 139 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" 140 | 141 | [[package]] 142 | name = "pest" 143 | version = "2.3.0" 144 | source = "registry+https://github.com/rust-lang/crates.io-index" 145 | checksum = "4b0560d531d1febc25a3c9398a62a71256c0178f2e3443baedd9ad4bb8c9deb4" 146 | dependencies = [ 147 | "thiserror", 148 | "ucd-trie", 149 | ] 150 | 151 | [[package]] 152 | name = "proc-macro2" 153 | version = "1.0.40" 154 | source = "registry+https://github.com/rust-lang/crates.io-index" 155 | checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7" 156 | dependencies = [ 157 | "unicode-ident", 158 | ] 159 | 160 | [[package]] 161 | name = "quote" 162 | version = "1.0.20" 163 | source = "registry+https://github.com/rust-lang/crates.io-index" 164 | checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804" 165 | dependencies = [ 166 | "proc-macro2", 167 | ] 168 | 169 | [[package]] 170 | name = "redox_syscall" 171 | version = "0.2.13" 172 | source = "registry+https://github.com/rust-lang/crates.io-index" 173 | checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" 174 | dependencies = [ 175 | "bitflags", 176 | ] 177 | 178 | [[package]] 179 | name = "regex" 180 | version = "1.6.0" 181 | source = "registry+https://github.com/rust-lang/crates.io-index" 182 | checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" 183 | dependencies = [ 184 | "aho-corasick", 185 | "memchr", 186 | "regex-syntax", 187 | ] 188 | 189 | [[package]] 190 | name = "regex-syntax" 191 | version = "0.6.27" 192 | source = "registry+https://github.com/rust-lang/crates.io-index" 193 | checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" 194 | 195 | [[package]] 196 | name = "remove_dir_all" 197 | version = "0.5.3" 198 | source = "registry+https://github.com/rust-lang/crates.io-index" 199 | checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" 200 | dependencies = [ 201 | "winapi", 202 | ] 203 | 204 | [[package]] 205 | name = "semver" 206 | version = "0.9.0" 207 | source = "registry+https://github.com/rust-lang/crates.io-index" 208 | checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" 209 | dependencies = [ 210 | "semver-parser 0.7.0", 211 | ] 212 | 213 | [[package]] 214 | name = "semver" 215 | version = "0.11.0" 216 | source = "registry+https://github.com/rust-lang/crates.io-index" 217 | checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6" 218 | dependencies = [ 219 | "semver-parser 0.10.2", 220 | ] 221 | 222 | [[package]] 223 | name = "semver-parser" 224 | version = "0.7.0" 225 | source = "registry+https://github.com/rust-lang/crates.io-index" 226 | checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" 227 | 228 | [[package]] 229 | name = "semver-parser" 230 | version = "0.10.2" 231 | source = "registry+https://github.com/rust-lang/crates.io-index" 232 | checksum = "00b0bef5b7f9e0df16536d3961cfb6e84331c065b4066afb39768d0e319411f7" 233 | dependencies = [ 234 | "pest", 235 | ] 236 | 237 | [[package]] 238 | name = "serde" 239 | version = "1.0.139" 240 | source = "registry+https://github.com/rust-lang/crates.io-index" 241 | checksum = "0171ebb889e45aa68b44aee0859b3eede84c6f5f5c228e6f140c0b2a0a46cad6" 242 | dependencies = [ 243 | "serde_derive", 244 | ] 245 | 246 | [[package]] 247 | name = "serde_derive" 248 | version = "1.0.139" 249 | source = "registry+https://github.com/rust-lang/crates.io-index" 250 | checksum = "dc1d3230c1de7932af58ad8ffbe1d784bd55efd5a9d84ac24f69c72d83543dfb" 251 | dependencies = [ 252 | "proc-macro2", 253 | "quote", 254 | "syn", 255 | ] 256 | 257 | [[package]] 258 | name = "strsim" 259 | version = "0.10.0" 260 | source = "registry+https://github.com/rust-lang/crates.io-index" 261 | checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" 262 | 263 | [[package]] 264 | name = "syn" 265 | version = "1.0.98" 266 | source = "registry+https://github.com/rust-lang/crates.io-index" 267 | checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd" 268 | dependencies = [ 269 | "proc-macro2", 270 | "quote", 271 | "unicode-ident", 272 | ] 273 | 274 | [[package]] 275 | name = "tempfile" 276 | version = "3.3.0" 277 | source = "registry+https://github.com/rust-lang/crates.io-index" 278 | checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" 279 | dependencies = [ 280 | "cfg-if", 281 | "fastrand", 282 | "libc", 283 | "redox_syscall", 284 | "remove_dir_all", 285 | "winapi", 286 | ] 287 | 288 | [[package]] 289 | name = "thiserror" 290 | version = "1.0.32" 291 | source = "registry+https://github.com/rust-lang/crates.io-index" 292 | checksum = "f5f6586b7f764adc0231f4c79be7b920e766bb2f3e51b3661cdb263828f19994" 293 | dependencies = [ 294 | "thiserror-impl", 295 | ] 296 | 297 | [[package]] 298 | name = "thiserror-impl" 299 | version = "1.0.32" 300 | source = "registry+https://github.com/rust-lang/crates.io-index" 301 | checksum = "12bafc5b54507e0149cdf1b145a5d80ab80a90bcd9275df43d4fff68460f6c21" 302 | dependencies = [ 303 | "proc-macro2", 304 | "quote", 305 | "syn", 306 | ] 307 | 308 | [[package]] 309 | name = "ucd-trie" 310 | version = "0.1.4" 311 | source = "registry+https://github.com/rust-lang/crates.io-index" 312 | checksum = "89570599c4fe5585de2b388aab47e99f7fa4e9238a1399f707a02e356058141c" 313 | 314 | [[package]] 315 | name = "ullage" 316 | version = "0.1.0" 317 | dependencies = [ 318 | "docopt", 319 | "indexmap", 320 | "libc", 321 | "llvm-sys 100.2.3", 322 | "llvm-sys 130.0.4", 323 | "llvm-sys 90.2.1", 324 | "serde", 325 | "tempfile", 326 | ] 327 | 328 | [[package]] 329 | name = "unicode-ident" 330 | version = "1.0.1" 331 | source = "registry+https://github.com/rust-lang/crates.io-index" 332 | checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c" 333 | 334 | [[package]] 335 | name = "winapi" 336 | version = "0.3.9" 337 | source = "registry+https://github.com/rust-lang/crates.io-index" 338 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 339 | dependencies = [ 340 | "winapi-i686-pc-windows-gnu", 341 | "winapi-x86_64-pc-windows-gnu", 342 | ] 343 | 344 | [[package]] 345 | name = "winapi-i686-pc-windows-gnu" 346 | version = "0.4.0" 347 | source = "registry+https://github.com/rust-lang/crates.io-index" 348 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 349 | 350 | [[package]] 351 | name = "winapi-x86_64-pc-windows-gnu" 352 | version = "0.4.0" 353 | source = "registry+https://github.com/rust-lang/crates.io-index" 354 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 355 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ullage" 3 | version = "0.1.0" 4 | authors = ["Will Speak "] 5 | description = "The Ullage Programming Language" 6 | license = "MIT" 7 | # documentation = "no public docs yet" 8 | repository = "https://github.com/iwillspeak/ullage" 9 | readme = "README.md" 10 | keywords = [ "llvm", "parser", "compiler" ] 11 | categories = [ "parsing" ] 12 | edition = "2018" 13 | 14 | [features] 15 | default = [ "llvm-13" ] 16 | 17 | [dependencies] 18 | llvm-9 = { package = "llvm-sys", version = "90", optional = true } 19 | llvm-10 = { package = "llvm-sys", version = "100", optional = true } 20 | llvm-13 = { package = "llvm-sys", version = "130", optional = true } 21 | docopt = "1.1" 22 | serde = { version = "1.0", features = ["derive"] } 23 | tempfile = "3.1" 24 | libc = "0.2" 25 | indexmap = "1.5" 26 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # Ullage is Open Source! 2 | 3 | Ullage is copyright 2016 Will Speak and licensed under MIT terms. 4 | 5 | ## The MIT License (MIT) 6 | Copyright (c) 2016 Will Speak 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ullage 2 | 3 | [![Build Status][build_status_image]][build_status] 4 | 5 | A statically-typed compiled language defined by a simple grammar. 6 | 7 | ## Current Status 8 | 9 | It is now possible to write simple programs. The following program 10 | computes 9 factorial: 11 | 12 | fn fact(n: Number): Number 13 | var acc = 1 14 | var i = 1 15 | while i < n 16 | acc = acc * i 17 | i = i + 1 18 | end 19 | acc 20 | end 21 | 22 | print fact(9) # => 362880 23 | 24 | You can also mess around with constant strings: 25 | 26 | # FizzBuzz 27 | # 28 | # Implementation of the legendary `FizzBuzz` algorithm. 29 | fn fizzbuzz(n: Number): String 30 | (print 'fizzbuzz') if mod(n, 15) == 0 else 31 | (print 'fizz') if mod(n, 3) == 0 else 32 | (print 'buzz') if mod(n, 5) == 0 else 33 | print_num(n) 34 | end 35 | 36 | ## Building and Testing 37 | 38 | The main build is performed by `cargo`. For running the functional 39 | tests and benchmarks you'll need Python and to `cargo install just`. The suggested process is to 40 | use the `build.sh` script: 41 | 42 | * `$ ./build.sh` will build the compiler `target/release/ullage`. 43 | * `$ ./build.sh test` will build the compiler and run the test suite 44 | from `specs/`. 45 | * `$ ./build.sh bench` will run the benchmarks from `spec/bench/`. 46 | 47 | ## License 48 | 49 | Ullage is open source, under the [MIT License](LICENSE.md). 50 | 51 | ## Features and Progress 52 | 53 | * [ ] Custom data structures 54 | * [ ] Pattern matching 55 | * [ ] First-class functions 56 | 57 | ### Lexer 58 | 59 | * [x] Recognise words, numbers, comments, operators and white-space 60 | * [x] Position information on each token 61 | * [ ] Interpolated strings 62 | * [x] Expose whitespace to the parser 63 | 64 | ### Parser 65 | 66 | * [x] Parse base constructs 67 | * [ ] For loops and iterators 68 | * [ ] Traditional `if` blocks 69 | * [x] Keep track of _all_ underlying tokens 70 | * [x] Expose position & span information on syntax nodes 71 | * [ ] Round-trippable/pretty-printable trees 72 | 73 | ### Code Generation / Lowering 74 | 75 | * [x] Create LLVM module and lower basic constructs 76 | * [ ] Array indexing 77 | * [x] Arbitrary types for local variables 78 | * [ ] Heap allocated types 79 | * [x] Lowering of `String` type 80 | * [ ] User-defined types 81 | * [ ] RC garbage collection (#26) 82 | * [ ] Library output types (LLVM ir, LLVM bc, object, staticlib, dylib, exe) 83 | * [x] Control of target machine & features 84 | * [x] Optimisation 85 | * [ ] Linker support: 86 | * [x] `clang` - macOS linker default 87 | * [ ] gold - GNU ld 88 | * [ ] lld/llvm-link 89 | * [ ] Microsoft LINK 90 | 91 | 92 | [build_status_image]: https://dev.azure.com/iwillspeak/GitHub/_apis/build/status/iwillspeak.ullage?branchName=main 93 | [build_status]: https://dev.azure.com/iwillspeak/GitHub/_build/latest?definitionId=2&branchName=main 94 | -------------------------------------------------------------------------------- /STRUCTURE.md: -------------------------------------------------------------------------------- 1 | # Structure of the Repository 2 | 3 | This is an outline of the planned structure of the repository. It details the general layout, the Rust module hierarchy, and some of the significant classes. 4 | 5 | ## Important Folders 6 | 7 | The language parser and compiler are written in Rust. The source lives in the `src/` folder. 8 | 9 | Functional tests for the language consist of a set of source files in `spec/`. These files contain specially formatted comments which are interpreted by the Python test runner `specs.py` when run. 10 | 11 | Documentation, written in Markdown, is in the `docs/` folder. It can be compiled into a static site with [`docket`](http://willspeak.me/docket/). 12 | 13 | ## Rust modules 14 | 15 | There are four main rust modules in the project. Three of them (`syntax`, `sem` and `compile`) correspond to the main stages of the compilation pipeline. The final module contains abstractions around LLVM itself. 16 | 17 | ### `syntax` 18 | 19 | This module contains all of the logic required to parse source text into a syntax expression tree (AST). 20 | 21 | * `Token` - Represents a single lexeme in the language. 22 | * `Expression` - represents a node in the syntax expression tree. Expressions are composed from one or more `Token`s 23 | * `TypeRef` - Represents a reference to a type. This could be a simple type like `Num`, or a more complex one like `[Num]`. 24 | 25 | ## `sem` 26 | 27 | This module is responsible for semantic analysis. It contains a more detailed semantic expression tree, a syntax expression tree visitor to transform a syntax tree into a semantic tree, and a set of transforms which can be applied to the semantic tree. 28 | 29 | The semantic expression tree contains metadata about types and has each identifier resolved. 30 | 31 | ## `compile` 32 | 33 | This module is responsible for lowering and evaluating semantic expression trees. 34 | 35 | * `Compiler` - An expression visitor which walks `sem::Expression`s and compiles them into native modules. 36 | * `LowerContext` - The context required when lowering a given expression tree to LLVM IR. 37 | 38 | ## `low_loader` 39 | 40 | This module contains high-level safe wrappers around the LLVM API. It's not intended to expose the whole API. Instead it provides just the modules and abstractions needed to make working with LLVM ergonomic. I'd like to get this to the point where it provides enough abstraction so that there's no LLVMisms in the interface. It would then be possible to replace the backend with something like [CraneLift](https://github.com/CraneStation/cranelift). Maybe even depending on the target we are compiling for (e.g. cranelif for WASM or JIT or whatever). 41 | -------------------------------------------------------------------------------- /azure-pipelines.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | containers: 3 | - container: buildbase 4 | image: 'iwillspeak/ullage-linux-build-base' 5 | options: '-m 4G' 6 | 7 | jobs: 8 | - job: macOS 9 | pool: 10 | vmImage: 'macOS-10.15' 11 | strategy: 12 | matrix: 13 | LLVM9: 14 | llvm_tag: 'llvm\@9' 15 | features: 'llvm-9' 16 | llvm_ver: '90' 17 | LLVM10: 18 | llvm_tag: 'llvm' 19 | features: 'llvm-10' 20 | llvm_ver: '100' 21 | steps: 22 | - script: 'brew install $(llvm_tag)' 23 | displayName: Brew install LLVM 24 | - script: "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs -o rustup.sh && sh rustup.sh -y" 25 | displayName: Rustup 26 | - script: | 27 | export PATH=/usr/local/bin:$PATH # Local bin (brew) 28 | source ~/.cargo/env 29 | export LLVM_SYS_$(llvm_ver)_PREFIX=/usr/local/opt/$(llvm_tag) 30 | export LLVM_CONFIG_PATH=${LLVM_SYS_$(llvm_ver)_PREFIX}/bin/llvm-config 31 | cargo install just 32 | ./build.sh features=$(features) test 33 | displayName: './build.sh test' 34 | - job: Linux 35 | pool: 36 | vmImage: 'ubuntu-16.04' 37 | container: buildbase 38 | steps: 39 | - script: | 40 | python3 --version 41 | export PATH=${PATH}:~/.local/bin/ 42 | displayName: Python 43 | - script: "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs -o rustup.sh && sh rustup.sh -y" 44 | displayName: Rustup 45 | - script: | 46 | source ~/.cargo/env 47 | export PATH=${PATH}:~/.local/bin/:/usr/lib/llvm-9/bin 48 | export LLVM_SYS_90_PREFIX=/usr/lib/llvm-9 49 | export LLVM_CONFIG_PATH=${LLVM_SYS_90_PREFIX}/bin/llvm-config 50 | cargo install just 51 | ./build.sh test 52 | displayName: './build.sh test' 53 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | set -e 4 | 5 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 6 | 7 | function check_for() 8 | { 9 | command -v $1 >/dev/null 2>&1 || { echo >&2 "$1 is required but it's not installed. Aborting."; exit 1; } 10 | } 11 | 12 | check_for python3 13 | 14 | echo "Clang version:" 15 | clang --version 16 | 17 | echo "Rust version" 18 | rustc --version 19 | 20 | just "$@" 21 | -------------------------------------------------------------------------------- /docs/00-getting-started.md: -------------------------------------------------------------------------------- 1 | # Getting Started 2 | 3 | This is only known to work on my machine at the moment. I'm running macOS and stable Rust If you'd still like to give it a go then make sure you have: 4 | 5 | * Rust - to compile the compiler 6 | * Just - to run help commands 7 | * Python - to run the functional tests 8 | * Clang - Used to link the output to create the final executables. 9 | 10 | The compiler is written in Rust and built with *Cargo*. Functional tests are defined by a collection of source in `spec/` and run by `specs.py`. It's easier to run the tests with a virtual environment. Rather than running `cargo` or python directly use the `build.sh` script. This will set up the required python environment as required. 11 | 12 | ## Source 13 | 14 | The source is all available [at GitHub](https://github.com/iwillspeak/ullage). Pull requests, comments and issues are welcome. Any thoughts I find interesting during development might be posted to [my blog](http://willspeak.me/). 15 | 16 | ## Building & Running the Compiler 17 | 18 | The compiler can be built with `./build.sh build --release` from the root directory. This should create a `target/release/ullage` executable. With this executable in your path you can compile an example program with `$ ullage -o hello specs/hello.ulg`. This will produce an executable called `hello` in the current directory based on the source file `specs/hello.ulg`. Running `$ ./hello` should then print 1337 to the terminal. For full usage instructions on the compiler run `$ ullage --help`. 19 | -------------------------------------------------------------------------------- /docs/10-syntax.md: -------------------------------------------------------------------------------- 1 | # Syntax & Grammar 2 | 3 | Ullage files are plain-old UTF-8. The language itself is built mainly around *words* rather than fancy sigils; more reminiscent of Ruby than C and friends. 4 | 5 | [TOC] 6 | 7 | ## Tokens 8 | 9 | Source text is treated as one of four basic token types: *words*, *punctuation*, *literals* and *whitespace*. Words and whitespace are unicode-aware. 10 | 11 | ### Words 12 | 13 | Word tokens start with an alphabetic character or an underscore. They may then contain any number of alphanumeric or underscore characters. 14 | 15 | Examples of words are: `foo`, `fn`, `_1` and `∂`. Some words have special meanings in the grammar: 16 | 17 | if unless else while until end fn var let print 18 | 19 | ### Punctuation 20 | 21 | Punctuation characters, such as `-` and `!=` are used to represent operators in the language. Currently a handful of punctuation characters are recognised: `=`, `==`, `!`, `!=`, `+`, `-`, `*`, `/`, `(`, `)`, `[`, `]`, `,`, `:`, `<`, and `>`. 22 | 23 | ### Literals 24 | 25 | Literals are used to represent constant input values to the program. Literals can be used to specify values of any primitive type (`Number`, `Bool` and `String` so far). Numeric literals consist of one or more consecutive digits: `0`, `42`, `1337`. Although `-47` evaluates to a negative number the `-` isn't part of the literal; in fact it is an operator. 26 | 27 | ### Whitespace 28 | 29 | Whitespace tokens are made up of one or more *space* characters. These *space* characters are either Unicode whitespace, such as tabs & spaces, or comments. Comments are introduced with a `#` and continue to the end of the line. 30 | 31 | # This is a comment! 32 | 33 | ## Types 34 | 35 | There are three main base types: `Number`, `String` and `Bool`. These can be extended by creating arrays and tuples. 36 | 37 | ### `Bool` 38 | 39 | A boolean holds a single bit of information. Boolean values can be created wit the literals `true` and `false`; or as the result of a comparison (`foo == 100`). 40 | 41 | ### `Number` 42 | 43 | Number values hold whole numbers, or integers. Currently only decimal numeric literals are supported. All number values are stored in a 64 bit integer value. 44 | 45 | ### `String` 46 | 47 | String literals define a sequence of unicode code points. All strings in the language are UTF-8. 48 | 49 | 'I am a string' 50 | 51 | ### Arrays 52 | 53 | An array type is declared by wrapping an existing type in `[]`. For example `[Number]` is the type for an array of `Number` values. To create a new array an array literal can be used: 54 | 55 | [1, 2, 3, 4] 56 | 57 | All the values in an array must be of a single type. 58 | 59 | ### Tuples 60 | 61 | Tuples are similar to arrays but can contain values with different types. Tuples are defined by wrapping a comma-separated list of types in `()`. For example the type `(Number, Bool)` defines a two-element tuple with the first element a `Number` and the second a `Bool`. Instances of a tuple can be created by wrapping values in `()`: 62 | 63 | (100, false) 64 | 65 | ## Variables 66 | 67 | Variables are introduced with the `var` keyword followed by an identifier and an optional type. Immutable variables can be introduced with the `let` keyword. 68 | 69 | let foo = 100 70 | var bar: Number = 10 71 | 72 | ## Operators 73 | 74 | Ullage has both infix and prefix operators. Operators are grouped by precedence. Precedence can be overridden or enforced with parentheses. 75 | 76 | The following infix operators exist, in precedence order from lowest to highest: 77 | 78 | * `==`, `!=`, `<`, `>` - Comparison operators 79 | * `+`, `-` - Addition and Subtraction 80 | * `*`, `/` - Multiplication and division 81 | 82 | The following infix operators exist. All prefix operators bind directly to the next expression: 83 | 84 | * `-`, `+`, `!` 85 | 86 | ## Compound Expressions 87 | 88 | As well as just simple expressions Ullage supports compound ones too. There aren't that many at this time: 89 | 90 | ### While loop 91 | 92 | A basic while loop has the form: 93 | 94 | ``` 95 | while foo 96 | do_stuff() 97 | .. 98 | end 99 | ``` 100 | 101 | This continues until the expression `foo` is false. 102 | 103 | There is also an alternate form, the `until` loop which continues until the expression is true: 104 | 105 | ``` 106 | until bar 107 | do_other_stuff() 108 | end 109 | ``` 110 | 111 | ### Print Expression 112 | 113 | The `print` word acts as a prefix operator. It will print the expression immediately to the right of it to standard output and return the value. 114 | 115 | ``` 116 | print 'hello world' 117 | ``` 118 | 119 | ### Ternary Conditional Expression 120 | 121 | The ternary expression in Ullage has two forms: `if` and `ulesss`. 122 | 123 | ``` 124 | foo if bar else baz 125 | ``` 126 | 127 | will evaluate the expression on the left hand side if the condition `bar` is true, or the right hand size `baz` if false. 128 | 129 | The `unless` expression does the opposite: 130 | 131 | ``` 132 | biff unless bop else buzz 133 | ``` 134 | 135 | ### Function Expression 136 | 137 | Finally an `fn` expression can be used to introduce a function: 138 | 139 | ``` 140 | fn mod(n: Number, d: Number): Number 141 | n if n < d else mod(n - d, d) 142 | end 143 | ``` 144 | 145 | This defines a function named `mod` which takes two `Number` parameters, `n` and `d`, and returns a `Number`. 146 | 147 | ### Index Expressions 148 | 149 | An index expression uses `[]` to access elements from an array. Array indices start at `0` for the first element: 150 | 151 | ``` 152 | let fuzz = [1, 2, 3, 4] 153 | print fuzz[2] # => 3 154 | ``` 155 | 156 | ### Call Expression 157 | 158 | Functions and function-like types can be invoked with a call expression. A call expression begins with a left-hand side which references an invokeable, such as a function's identifier. This is followed by the parameters to the function enclosed in parenthesis `()`: 159 | 160 | ``` 161 | fn add(n: Number, m: Number): Number 162 | n + m 163 | end 164 | 165 | print add(add(1, 2), 3) # => 6 166 | ``` 167 | 168 | ## Grammar 169 | 170 | The following grammar defines the language: 171 | 172 | 173 | identifier = WORD 174 | ; 175 | 176 | expression = nud [led] 177 | ; 178 | 179 | expressions = expression* 180 | ; 181 | 182 | type_ref = ":" ty 183 | ; 184 | 185 | ty = WORD 186 | | "[" ty "]" 187 | | "(" [(ty (, ty )*] ")" 188 | ; 189 | 190 | optional_type_ref = [type_ref] 191 | ; 192 | 193 | typed_id = identifier optional_type_ref 194 | ; 195 | 196 | declaration = identifier optional_type_ref "=" expression 197 | ; 198 | 199 | block = expression* "end" 200 | ; 201 | 202 | ternary_body = expression "else" expression 203 | ; 204 | 205 | led = "==" expression 206 | | "!=" expression 207 | | "<" expression 208 | | ">" expression 209 | | "+" expression 210 | | "-" expression 211 | | "*" expression 212 | | "/" expression 213 | | "[" expression "]" 214 | | "(" [expression ("," expression)*] ")" 215 | | "if" ternary_body 216 | | "unless" ternary_body 217 | ; 218 | 219 | nud = "fn" identifier "(" [typed_id (, typed_id)*] ")" type_ref block "end" 220 | | ("until" | "while") expression block "end" 221 | | "let" declaration 222 | | "var" declaration 223 | | "print" expression 224 | | "true" 225 | | "false" 226 | | WORD 227 | | LITERAL 228 | | "+" expression 229 | | "-" expression 230 | | "!" expression 231 | | "(" expression ")" 232 | ; 233 | 234 | 235 | -------------------------------------------------------------------------------- /docs/12-parsing.md: -------------------------------------------------------------------------------- 1 | # Parsing and Syntax Trees 2 | 3 | The parser takes an input `SourceText` and produces a full-fidelity 4 | `SyntaxTree`. In theory each character in the source exists as a 5 | `Token` or `TriviaToken` within this tree. This initial tree is 6 | intended to provide as rich a possible model of the underlying source 7 | text to allow for syntax transformations in the future. It is later 8 | transformed into an abstract representation of the semantics of the 9 | code by the `sem` module. 10 | 11 | [TOC] 12 | 13 | ## Structure 14 | 15 | Source is represented by a `SourceText` type. This exposes characters 16 | as `&str` slices or via the `walk_chars` method for tokenisation. The 17 | source text also contains line information and has the ability to take 18 | a position and convert it into a line, column pair. 19 | 20 | Parsing creates a lexer which implements token iteration for a given 21 | `SourceText`. Each token has a `Span` and `TokenKind`. Consumption of 22 | tokens from the iterator by `Parser::expect` stubs out missing tokens 23 | as well as recording errors in a collection of `Diagnostic`s. 24 | 25 | Rather than returning a `Result` type from the parser instead a valid 26 | `SyntaxTree` is always be returned. It is the client's responsibility 27 | to inspect the tree for a given parse for diagnostics and act 28 | accordingly. This allows clients to parse malformed source text and 29 | should allow the compiler to produce more error information in each 30 | pass. 31 | 32 | ### Structure 33 | 34 | The structure looks something like this: 35 | 36 | * `syntax/parse/` - Parser and tokenisation logic 37 | * `syntax/tree/` - Tree node types. 38 | * `syntax/text/` - Source buffer abstraction. 39 | * `diag.rs` - Diagnostic implementation. 40 | 41 | ### Diagnostics 42 | 43 | At the moment diagnostics are just string values and position 44 | information. More metadata can be added later. It might be useful to 45 | add 'level' information to each diagnostic to allow for warnings. 46 | 47 | It would be nice to have some kind of diagnostic pretty printing 48 | rather than relying on the `main` method to walk the diagnostics and 49 | write them to stderr manually. 50 | -------------------------------------------------------------------------------- /docs/15-data-layout.md: -------------------------------------------------------------------------------- 1 | # Data Layout 2 | 3 | For the initial version of the language we just need `Bool`, `Number` 4 | and `String` to have defined layout. It is probably worth thinking 5 | about the future structure of arrays, tuples, and structs though. 6 | 7 | ## Value and Reference Semantics 8 | 9 | The plan is that `Bool`, `Number`, and `String` will all have value 10 | semantics. That is a modification of a `String` value in one place 11 | will not affect its appearence in another. Such behaviour is referred 12 | to as "value semantics". This is similar to `Copy` types in rust and 13 | `struct` types in C#. I feel that tuple types should also have value 14 | semantics. Similar to `ValueTuple` in C#. 15 | 16 | Array and structure types instead will have reference semantics. This 17 | means that passing a structure to a `fn` will allow the function to 18 | modify the structure value. This is similar to reference types in C# 19 | and `&mut` reference in Rust. 20 | 21 | ## Type Layouts 22 | 23 | For the primitive types we have the following type layouts from 24 | language type to LLVM type: 25 | 26 | * `Bool` -> `i1` 27 | * `Number` -> `i64` 28 | 29 | String types are represented as a pair of length, data: 30 | 31 | * `String` -> `<{u32,[0 x u8]}>*` 32 | 33 | The value of the string is encoded directly as part of the 34 | pair. Allocation of a string uses a variable length array to contain a 35 | sequence of utf-8 characters. There are a few problems with this: 36 | 37 | * The expectation is that strings are rarely modified and we could 38 | probably share a single buffer between string instances and use 39 | reference counting to control mutable access. 40 | * This needs some knowledge of when a value is 'dropped' to free the 41 | correct amount of memory. 42 | 43 | Given these concerns we could lay a string out as: 44 | 45 | * `String` -> `<{u32, u32, [0 x u8]}>*` 46 | 47 | In this representation each string has a pointer to a reference 48 | counted backing buffer. This should reduce copy-size of each string 49 | and means that a string reference would again have a single easily 50 | known size. We still need to know when the reference should be 51 | deallocated however. 52 | 53 | ## Garbage Collection 54 | 55 | Rather than aiming to control access to data as Rust does the language 56 | should provide a garbage collection mechanism to clean up data once no 57 | one references it. There are a few alternatives for this: 58 | 59 | * Don't deallocate - Probably useful to get us off the ground 60 | * Reference counting. E.g. Swift's ARC & Python. 61 | * Simple mark & sweep GC. 62 | 63 | For a full mark and sweep or other collector the code generated needs 64 | to insert GC statepoints. For this reason i'm tempted to head towards 65 | the second option. I place of statepoints we will need to decide in 66 | the lower pass where to insert RC retain and release code to maintain 67 | the count. Could this work the same way as Rust's ARC model? In that 68 | case the code just needs to know a fixed point in the scope where each 69 | value is deallocated. 70 | -------------------------------------------------------------------------------- /docs/90-syntax-transforms.md: -------------------------------------------------------------------------------- 1 | # Syntax Transformations 2 | 3 | Currently we take the AST and transform it into a semantic tree. To 4 | better support IDE-like features we will need to support more 5 | variations in transformation. 6 | 7 | With input trees immutable we could have a syntax transformer which 8 | uses the visitor pattern. Would then have a `visit_` for each kind of 9 | `Expression` and have them produce a new expression. New expressions 10 | would have synthesised `Span` information. 11 | 12 | Initial optimisations/transformations would be a desugaring pass over 13 | this AST before lowering to the `sem` tree. Maybe even want to do the 14 | type check pass at this level and pass a fully-formed type checked 15 | tree to `sem`. If we do add a desugaring pass then the output of that 16 | would be nice to have as an `--emit` value. 17 | 18 | This would be useful to observe the desugaring of things like `for` 19 | into `while`. 20 | 21 | Further use of these would be to implement a language server in the 22 | future and some kind of code formatter. 23 | 24 | ## Round-tripping POC 25 | 26 | Given initial support for syntax transformations we could have a 27 | visitor for the tree which writes the tree to the standard output to 28 | prove out the round-tripping. Add a new set of tests which just 29 | round-trips all of the code from the specs. 30 | 31 | API surface could be similar to Minsk's `Compilation::EmitTree`. I'm 32 | thinking we come up with a trait something like `TreeSink` and 33 | implement it for `IO::Write` or similar. Can then have a similar 34 | `emit_tree` which walks the tree and pushes it to the writer. 35 | 36 | I'm thinking we might also want to have this tree emission part of a 37 | higher level `--emit=` flag on the compiler front end. Other values 38 | for now would be to emit the `Debug` version of the parsed tree and to 39 | emit the compilation result as IR, an object file or a linked 40 | executable. 41 | 42 | ## Interaction with `dumpast` and `dumptree` 43 | 44 | The current flags for dumping compilation state may be best grouped 45 | under a single `--dump=` flag. This could allow for different 46 | outputs: 47 | 48 | * Plain round-tripped source 49 | * Formatted source 50 | * Syntax tree structure 51 | * Desugared source 52 | * HTML syntax tree 53 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | A statically-typed compiled language defined by a simple grammar. The 2 | parser started as an experiment in top-down operator precedence 3 | parsing in Rust. It has since started developing into a small 4 | language. 5 | -------------------------------------------------------------------------------- /justfile: -------------------------------------------------------------------------------- 1 | features := "llvm-13" 2 | 3 | build: 4 | cargo build --release --no-default-features --features={{features}} 5 | 6 | test: build 7 | cargo test --no-default-features --features={{features}} 8 | python3 specs.py 9 | 10 | clean: 11 | rm -f a.out 12 | rm -rf specbin/ 13 | cargo clean 14 | 15 | docs: 16 | cd docs/; docket 17 | 18 | clippy: 19 | cargo clippy 20 | 21 | bench opt_level="3": build 22 | #!/usr/bin/env python3 23 | import os 24 | import glob 25 | import subprocess 26 | 27 | for bench in glob.glob("spec/bench/*.ulg"): 28 | output = bench.lstrip('spec/').rstrip('.ulg') 29 | output = os.path.join("specbin", "bench", output) 30 | try: 31 | os.makedirs(os.path.dirname(output)) 32 | except OSError: 33 | pass 34 | print("bench={0}, output={1}, opt={2}".format(bench, output, {{opt_level}})) 35 | subprocess.call(["target/release/ullage", bench, "-O{{opt_level}}", "-o", output]) 36 | subprocess.call(["time", output]) -------------------------------------------------------------------------------- /spec/README.md: -------------------------------------------------------------------------------- 1 | # Spec Tests 2 | 3 | This directory and it's subdirectories contain a set of specifications 4 | for the language structured as a selection of source files. Each 5 | source file contains comments which describe assertions about the 6 | compilation. 7 | 8 | ## Assertions 9 | 10 | Assertions are encoded in comments. Comments start with a given sigil 11 | sequence and the rest of the line is the parameter to the assertion. 12 | 13 | * ` # => ` - Verify output. All standard output from the 14 | program must be matched by one of these assertions. 15 | * ` # !> ` - Verify error. Checks that the compilation fails 16 | and that the given error is printed to stderr. 17 | * ` # !!skip` - Skips running the output. Just checks that the code 18 | is parsed and compiles. 19 | 20 | ## Structure 21 | 22 | Tests can be arranged into subdirectories to group similar 23 | tests. Subdirectories of interest are: 24 | 25 | * `bench/` - Code benchmarks. 26 | * `malformed/` - Broken input that shouldn't parse correctly. 27 | * `fail/` - Syntactically valid code that is semantically invalid and 28 | should fail compilation. 29 | -------------------------------------------------------------------------------- /spec/assignment.ulg: -------------------------------------------------------------------------------- 1 | var foo = 100 2 | print foo # => 100 3 | print foo = foo / 2 # => 50 4 | print foo # => 50 -------------------------------------------------------------------------------- /spec/basics.ulg: -------------------------------------------------------------------------------- 1 | print let foo = 100 #=> 100 2 | print let bar = 192 #=> 192 3 | print let baz = bar * (100 / foo) #=> 192 4 | print 1 if baz != 0 else -1 #=> 1 5 | print let 中 = 99 - (10 / bar) #=> 99 6 | fn foo(): Number 7 | let fiz = 100 8 | end 9 | print foo() # => 100 -------------------------------------------------------------------------------- /spec/bench/README.md: -------------------------------------------------------------------------------- 1 | # Benchmarking 2 | 3 | This folder contains files which can be used to benchmark the 4 | performance of the generated code. Each file should have a `#!!skip` 5 | attribute so that they aren't run when the spec tests are executed. 6 | 7 | The benchkarks can be run with `./build.sh bench` from the project 8 | root. 9 | 10 | ## `fib.ulg` 11 | 12 | This benchmark performs the same Fibonacci computation from 13 | to allow some form of comparision 14 | with popular lanugages. 15 | -------------------------------------------------------------------------------- /spec/bench/fib.ulg: -------------------------------------------------------------------------------- 1 | #!!skip 2 | 3 | fn fib(n: Number): Number 4 | 1 if n <= 1 else fib(n - 1) + fib(n - 2) 5 | end 6 | 7 | print fib(46) # => 2971215073 -------------------------------------------------------------------------------- /spec/bool.ulg: -------------------------------------------------------------------------------- 1 | var foo: Bool = false 2 | 3 | print foo # => false 4 | 5 | foo = !foo 6 | 7 | print foo # => true -------------------------------------------------------------------------------- /spec/collatz.ulg: -------------------------------------------------------------------------------- 1 | # Modululs remainder 2 | # 3 | # Returns the modulus remainder of n/d 4 | fn mod(n: Number, d: Number): Number 5 | n if n < d else mod(n - d, d) 6 | end 7 | 8 | # Internal Collatz implementation 9 | fn collatz_internal(x: Number, steps: Number): Number 10 | steps 11 | if x == 1 12 | else collatz_internal(x / 2 if mod(x,2) == 0 else (3 * x) + 1, steps + 1) 13 | end 14 | 15 | # Find how long a given number takes to converge on 1 16 | fn collatz(x: Number): Number 17 | collatz_internal(x, 0) 18 | end 19 | 20 | print collatz(17) # => 12 21 | print collatz(1) # => 0 22 | print collatz(9) # => 19 -------------------------------------------------------------------------------- /spec/compare.ulg: -------------------------------------------------------------------------------- 1 | # Minimum 2 | # 3 | # Returns the smallest of the two arguments. 4 | fn min(a: Number, b: Number): Number 5 | a if a < b else b 6 | end 7 | 8 | # Maximum 9 | # 10 | # Returns the largest of the two arguments 11 | fn max(a: Number, b: Number): Number 12 | a if a > b else b 13 | end 14 | 15 | print min(1, 100) # => 1 16 | print min(100, 1) # => 1 17 | print min(50, 50) # => 50 18 | print min(7, min(20, 3)) # => 3 19 | 20 | print max(1, 2) # => 2 21 | print max(2, 100) # => 100 22 | print max(2 * 2, 4 * 4) # => 16 23 | print max(max(1, 4), 19) # => 19 24 | 25 | print min(max(min(1, 100), 23), -1) # => -1 -------------------------------------------------------------------------------- /spec/euler1.ulg: -------------------------------------------------------------------------------- 1 | # Sum all the multiples of 3 or 5 below 1000 2 | 3 | fn mod(n: Number, d: Number): Number 4 | while n >= d 5 | n = n - d 6 | end 7 | n 8 | end 9 | 10 | fn is_multiple(n: Number, d: Number): Bool 11 | mod(n, d) == 0 12 | end 13 | 14 | fn sum_to(n: Number): Number 15 | var i = 0 16 | var s = 0 17 | while i < n 18 | s = ((s + i) if is_multiple(i, 3) else 19 | ((s + i) if is_multiple(i, 5) else s)) 20 | i = i + 1 21 | end 22 | s 23 | end 24 | 25 | print is_multiple(3, 3) #=> true 26 | print is_multiple(9, 3) #=> true 27 | print is_multiple(8, 3) #=> false 28 | 29 | print sum_to(10) #=> 23 30 | print sum_to(1000) # => 233168 31 | -------------------------------------------------------------------------------- /spec/evens.ulg: -------------------------------------------------------------------------------- 1 | fn is_even(n: Number): Number 2 | 1 if n == 0 else is_odd(n - 1) 3 | end 4 | 5 | fn is_odd(n: Number): Number 6 | 0 if n == 0 else is_even(n - 1) 7 | end 8 | 9 | print is_even(0) == 1 # => true 10 | print is_odd(1) == 1 # => true 11 | print is_odd(8) == 1 # => false 12 | print is_odd(21) == 1 # => true 13 | print is_even(5) == 1 # => false -------------------------------------------------------------------------------- /spec/factorial.ulg: -------------------------------------------------------------------------------- 1 | # the number we want to compute the factorial of 2 | var n = 5 3 | var acc = 1 4 | 5 | while n > 0 6 | acc = acc * n 7 | n = n - 1 8 | end 9 | 10 | print acc # => 120 11 | -------------------------------------------------------------------------------- /spec/fail/README.md: -------------------------------------------------------------------------------- 1 | # Compilation Failure Tests 2 | 3 | This directory contains examples of syntactically valid but 4 | semantically invalid code. They should parse OK, but will fail 5 | compilation. 6 | -------------------------------------------------------------------------------- /spec/fail/assign_to_immutable.ulg: -------------------------------------------------------------------------------- 1 | var a = 100 2 | let b = a 3 | a = 100 4 | b = 100 # !> Can't assign to 'b' -------------------------------------------------------------------------------- /spec/fail/assign_to_undefined.ulg: -------------------------------------------------------------------------------- 1 | foo = 123 # !> Can't assign to 'foo' -------------------------------------------------------------------------------- /spec/fail/badcalls.ulg: -------------------------------------------------------------------------------- 1 | # Test function to emit invalid calls to 2 | fn foo(a: Number, b: String): Number 3 | a 4 | end 5 | 6 | foo() # !> Too few arguments to call 7 | foo(100) # !> Too few arguments to call 8 | foo(100, '', 100) # !> Too many arguments to call 9 | foo(false, '', 100) # !> Too many arguments to call 10 | foo(100, '', '', false) # !> Too many arguments to call 11 | 12 | foo(100, 100) # !> 12:9:error: Invalid argument. Expected 'String' but found 'Number' 13 | 14 | # !> 16:4:error: Invalid argument. Expected 'Number' but found 'String' 15 | # !> 16:13:error: Invalid argument. Expected 'String' but found 'Bool' 16 | foo('hello', false) 17 | 18 | let bar = 100 19 | 20 | # !> 21:0:error: Called item is not a function 21 | bar() 22 | 23 | # !> 24:0:error: Called item is not a function 24 | false() -------------------------------------------------------------------------------- /spec/fail/badvars.ulg: -------------------------------------------------------------------------------- 1 | # !> 2:4:error: Initialiser doesn't match declaration type for 'foo' 2 | let foo: String = 100 3 | 4 | let baz = foo 5 | 6 | let buz: Number = baz # !> 6:4:error: Initialiser doesn't match declaration type for 'buz' -------------------------------------------------------------------------------- /spec/fail/dupe_fn_param.ulg: -------------------------------------------------------------------------------- 1 | # !> 2:27:error: Duplicate function parameter 'a' 2 | fn foo(a: Number, b: Bool, a: String): Number 3 | 100 4 | end -------------------------------------------------------------------------------- /spec/fail/if_else_badtypes.ulg: -------------------------------------------------------------------------------- 1 | # !> 2:13:error: Condition expression should be 'Bool' but is 'Number' 2 | let a = 1 if 2 else 3 3 | 4 | # !> 5:8:error: If and else have mismatched types 5 | let b = true if 1 != 0 else '' -------------------------------------------------------------------------------- /spec/fail/mismatched_assign.ulg: -------------------------------------------------------------------------------- 1 | let foo = 100 2 | foo = 101 # !> Can't assign to 'foo', it isn't mutable 3 | 4 | var bar = 'a string' 5 | bar = 4356 # !> Type mismatch in assignment to 'bar' 6 | -------------------------------------------------------------------------------- /spec/fail/undefined.ulg: -------------------------------------------------------------------------------- 1 | im_not_defined_yet # !> Can't find 'im_not_defined_yet' in this scope -------------------------------------------------------------------------------- /spec/fail/undefined_func.ulg: -------------------------------------------------------------------------------- 1 | fn foo(): Number 2 | compute_number() # !> Can't find 'compute_number' in this scope 3 | end -------------------------------------------------------------------------------- /spec/fail/wrong_type_var.ulg: -------------------------------------------------------------------------------- 1 | let nope: String = 100 # !> Initialiser doesn't match declaration type for 'nope' 2 | -------------------------------------------------------------------------------- /spec/fib.ulg: -------------------------------------------------------------------------------- 1 | # Fibonacci Number 2 | # 3 | # Computes the `n`th iteration of the Fibonacci sequence. 4 | fn fib(n: Number): Number 5 | var iters = 0 6 | var low = 0 7 | var high = 1 8 | while iters < n 9 | iters = iters + 1 10 | let old_low = low 11 | low = high 12 | high = low + old_low 13 | end 14 | low 15 | end 16 | 17 | print fib(10) # => 55 18 | print fib(0) # => 0 19 | print fib(1) # => 1 20 | print fib(11) # => 89 -------------------------------------------------------------------------------- /spec/fizzbuzz.ulg: -------------------------------------------------------------------------------- 1 | var i = 1 2 | while i < 20 3 | fizzbuzz(i) # => 1 4 | # => 2 5 | # => fizz 6 | # => 4 7 | # => buzz 8 | # => fizz 9 | # => 7 10 | # => 8 11 | # => fizz 12 | # => buzz 13 | # => 11 14 | # => fizz 15 | # => 13 16 | # => 14 17 | # => fizzbuzz 18 | # => 16 19 | # => 17 20 | # => fizz 21 | # => 19 22 | i = i + 1 23 | end 24 | 25 | # Modulus Remainder 26 | # 27 | # Returns the remainder after modulus division 28 | fn mod(n: Number, divisor: Number): Number 29 | var ret = n 30 | until ret < divisor 31 | ret = ret - divisor 32 | end 33 | ret 34 | end 35 | 36 | # Printing Fudge 37 | # 38 | # Prints the number, but coerces the type to String so we can use it 39 | # in the same ternary expression as string print expressions. 40 | fn fudge(n: Number): String 41 | print n 42 | '' 43 | end 44 | 45 | # FizzBuzz 46 | # 47 | # Implementation of the legendary `FizzBuzz` algorithm. 48 | fn fizzbuzz(n: Number): String 49 | (print 'fizzbuzz') if mod(n, 15) == 0 else 50 | (print 'fizz') if mod(n, 3) == 0 else 51 | (print 'buzz') if mod(n, 5) == 0 else 52 | fudge(n) 53 | end -------------------------------------------------------------------------------- /spec/funcfact.ulg: -------------------------------------------------------------------------------- 1 | fn fact(n: Number): Number 2 | var acc = 1 3 | var i = n 4 | while i > 0 5 | acc = acc * i 6 | i = i - 1 7 | end 8 | acc 9 | end 10 | 11 | print fact(0) == fact(1) # => true 12 | print fact(5) # => 120 13 | print fact(9) # => 362880 -------------------------------------------------------------------------------- /spec/functions.ulg: -------------------------------------------------------------------------------- 1 | fn test(): Number 2 | 1000 3 | end 4 | 5 | print test() #=> 1000 6 | 7 | fn printer(n: Number): Number 8 | print n 9 | end 10 | 11 | printer(1000) # => 1000 12 | printer(98 / 2) # => 49 13 | 14 | fn add(a: Number, b: Number): Number 15 | a + b 16 | end 17 | 18 | print add(1002, -1000) # => 2 19 | print add(99, 1) # => 100 -------------------------------------------------------------------------------- /spec/gcd.ulg: -------------------------------------------------------------------------------- 1 | ## Greatest Common Divisor 2 | # 3 | # Finds the largest divisor of `a` and `b` 4 | fn gcd(a: Number, b: Number): Number 5 | a if b == 0 else gcd(b, mod(a, b)) 6 | end 7 | 8 | ## Modulus Remainder 9 | # 10 | # Returns the modulus remainder after division. Iteratively compues 11 | # the result for efficiency. 12 | fn mod(n: Number, m: Number): Number 13 | var ret = n 14 | until ret < m 15 | ret = ret - m 16 | end 17 | ret 18 | end 19 | 20 | print gcd(8, 12) # => 4 21 | 22 | print gcd(42, 56) # => 14 23 | print gcd(461952, 116298) # => 18 24 | print gcd(7966496, 314080416) # => 32 25 | print gcd(24826148, 45296490) # => 526 26 | print gcd(12, 0) # => 12 27 | print gcd(0, 0) # => 0 28 | print gcd(0, 9) # => 9 -------------------------------------------------------------------------------- /spec/hello.ulg: -------------------------------------------------------------------------------- 1 | print 1337 # => 1337 2 | -------------------------------------------------------------------------------- /spec/hello_world.ulg: -------------------------------------------------------------------------------- 1 | print 'Hello World!' # => Hello World! 2 | -------------------------------------------------------------------------------- /spec/identity.ulg: -------------------------------------------------------------------------------- 1 | 2 | print 0 # => 0 3 | print 0 == 1 # => false 4 | print (0 + 1) == 1 # => true 5 | 6 | var n: Number = ident(0) 7 | let m = ident(0 + 1) 8 | 9 | print n == 1 # => false 10 | print m == 1 # => true 11 | 12 | print ident(0) == 1 # => false 13 | print ident(0 + 1) == 1 # => true 14 | 15 | # identity function 16 | fn ident(n: Number): Number 17 | n 18 | end 19 | -------------------------------------------------------------------------------- /spec/if.ulg: -------------------------------------------------------------------------------- 1 | print 1111 if 100 == 100 else 0000 # => 1111 2 | print 1234 if 567 == 890 else -900 # => -900 3 | print 90-1 if 4/2 == 1+1 else 22-9 # => 89 4 | 5 | print 1 unless 2 == 3 else 4 # => 1 6 | print 0 unless 0 == 0 else 1 # => 1 7 | 8 | print 1 if 2 != 3 else 4 # => 1 9 | print 0 if 1 < 2 else 3 # => 0 10 | print 0 if 1 > 2 else 3 # => 3 11 | 12 | print 1 if !1 == 3 else 3 # => 3 13 | print 1 if !(1 == 3) else 3 # => 1 14 | 15 | print 1 if true else 2 # => 1 16 | print 1 if false else 2 # => 2 -------------------------------------------------------------------------------- /spec/literals.ulg: -------------------------------------------------------------------------------- 1 | print 100 #=> 100 2 | 1772773 3 | print 10000 #=> 10000 4 | 123123123 -------------------------------------------------------------------------------- /spec/long_string.ulg: -------------------------------------------------------------------------------- 1 | var s = 'test' 2 | var i = 100 3 | while i > 0 4 | s = '[' + s + ']' 5 | i = i - 1 6 | end 7 | 8 | print s # => [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[test]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] -------------------------------------------------------------------------------- /spec/malformed/README.md: -------------------------------------------------------------------------------- 1 | # Malformed Inputs 2 | 3 | This directory contains a set of malformed source code to test the compiler's error messages. These tests aren't exhaustive; corner cases in tokenisation and parsing should be tested more in the checklex and checkparse unit tests. 4 | -------------------------------------------------------------------------------- /spec/malformed/half_ternary.ulg: -------------------------------------------------------------------------------- 1 | 0 if # !> 3:0:error: Expected expression but found end of file 2 | # !> error: expecting: keyword, found: end of file 3 | -------------------------------------------------------------------------------- /spec/malformed/invalid_calls.ulg: -------------------------------------------------------------------------------- 1 | fn foo(n: Number, b: Bool): Number 2 | n if b else 2 3 | end 4 | 5 | # !> 6:4:error: unexpected token: expected expression but found ',' 6 | foo(,) 7 | 8 | # it's OK to have a trailing `,` 9 | foo(100, false,) 10 | 11 | # don't go nuts though... 12 | # !> 13:15:error: unexpected token: expected expression but found ',' 13 | foo(969, true, ,) -------------------------------------------------------------------------------- /spec/malformed/missing_ident.ulg: -------------------------------------------------------------------------------- 1 | # !> 2:3:error: expected identifier, found: '(' 2 | fn (expr: Number): Number 3 | # !> 4:6:error: unexpected token: expected expression but found ')' 4 | 100 ) 5 | end 6 | -------------------------------------------------------------------------------- /spec/malformed/param_missing_type.ulg: -------------------------------------------------------------------------------- 1 | # !> 2:7:error: Parameter 'missing_type' missing type 2 | fn foo(missing_type): Number 3 | 1000 4 | end 5 | 6 | # !> 7:12:error: Reference to undefined type 7 | fn bar(bad: Bad): Number 8 | 1000 9 | end 10 | -------------------------------------------------------------------------------- /spec/malformed/unexpected_plus.ulg: -------------------------------------------------------------------------------- 1 | 2 | (0+) 3 | # !> unexpected token: expected expression but found ')' 4 | -------------------------------------------------------------------------------- /spec/malformed/unterminated_bracket.ulg: -------------------------------------------------------------------------------- 1 | ( 2 | # !> error: could not parse source: one or more errors: 3 | # !> 5:0:error: Expected expression but found end of file 4 | # !> error: expecting: ')', found: end of file 5 | -------------------------------------------------------------------------------- /spec/malformed/unterminated_fn.ulg: -------------------------------------------------------------------------------- 1 | fn foo(): Number 2 | # !> expecting: keyword, found: end of file 3 | -------------------------------------------------------------------------------- /spec/malformed/unterminated_string.ulg: -------------------------------------------------------------------------------- 1 | # !> could not parse source: one or more errors: 2 | # !> error: unrecognised character 3 | ' 4 | -------------------------------------------------------------------------------- /spec/malformed/unterminated_string2.ulg: -------------------------------------------------------------------------------- 1 | # the whitespace in this ensure the junk token is shoud be attached to 2 | # the trailing trivia of the first number literal 3 | 4 | # !> could not parse source: one or more errors: 5 | # !> error: unrecognised character 6 | 0' 7 | 0 -------------------------------------------------------------------------------- /spec/malformed/unterminated_string3.ulg: -------------------------------------------------------------------------------- 1 | # the whitespace in this ensure the junk token is shoud be attached to 2 | # the end of file token. 3 | 4 | # !> could not parse source: one or more errors: 5 | # !> error: unrecognised character 6 | 0 7 | ' 8 | -------------------------------------------------------------------------------- /spec/maybe_invert.ulg: -------------------------------------------------------------------------------- 1 | print maybe_invert(100, false) # => 100 2 | print maybe_invert(100, true) # => -100 3 | print maybe_invert(-83, true) # => 83 4 | 5 | fn maybe_invert(invertee: Number, invert: Bool): Number 6 | -invertee if invert else invertee 7 | end -------------------------------------------------------------------------------- /spec/modulus.ulg: -------------------------------------------------------------------------------- 1 | ## Modulus 2 | # 3 | # Returns the modulus remainder after division 4 | fn mod(n: Number, d: Number): Number 5 | until n < d 6 | n = n - d 7 | end 8 | n 9 | end 10 | 11 | print mod(5, 3) # => 2 -------------------------------------------------------------------------------- /spec/operators.ulg: -------------------------------------------------------------------------------- 1 | print 100 # => 100 2 | print -50 # => -50 3 | print 1-1 # => 0 4 | print 1 -1 # => 0 5 | print 1 - -1 # => 2 6 | print 100 + -50 # => 50 7 | print -50 + 100 + -50 # => 0 8 | print 2 + 3 * 4 #=> 14 9 | print 20 - 3 * 4 #=> 8 10 | print 2 + 6 / 3 #=> 4 11 | print 2 - 6 / 3 #=> 0 12 | print 5 < 6 # => true 13 | print 5 <= 5 # => true 14 | print 5 > 6 # => false 15 | print 5 >= 5 # => true 16 | print !!((-100 + + 100) == 0) # => true -------------------------------------------------------------------------------- /spec/prefix.ulg: -------------------------------------------------------------------------------- 1 | print !false # => true 2 | print !true # => false 3 | print +100 # => 100 4 | print -100 # => -100 5 | print - - 100 # => 100 6 | print !!false # => false -------------------------------------------------------------------------------- /spec/printing.ulg: -------------------------------------------------------------------------------- 1 | # First we try to print a value out 2 | print 1234 # => 1234 3 | 4 | # then we check that printing a value returns the correct result 5 | print print print 1969 # => 1969 6 | # => 1969 7 | # => 1969 8 | 9 | # Printing of string literals 10 | print 'foo' # => foo 11 | 12 | # Type info is tracked through variable assignments 13 | let bar = 'bar' 14 | print bar # => bar 15 | 16 | # Type info is tracked through function calls 17 | fn print_str(s: String): String 18 | print s 19 | end 20 | 21 | print_str('hello') # => hello 22 | 23 | ## Tests for printing out 'large' numbers. This makes sure we print 24 | ## out 64 bit values correctly 25 | print 2147483647 # => 2147483647 26 | print -2147483648 # => -2147483648 27 | print 2971215073 # => 2971215073 28 | print 9223372036854775807 #=> 9223372036854775807 29 | # Since negative integers aren't actually literals, just applications 30 | # of a prefix operator we can't actually _write_ the value in 31 | # full. Instead compute it. 32 | print -9223372036854775807 - 1 # => -9223372036854775808 33 | -------------------------------------------------------------------------------- /spec/scopes.ulg: -------------------------------------------------------------------------------- 1 | var a = 0 2 | print a # => 0 3 | var a = 1 4 | print a # => 1 5 | let a = a * 2 6 | print a # => 2 7 | -------------------------------------------------------------------------------- /spec/string_concat.ulg: -------------------------------------------------------------------------------- 1 | print 'hello' # => hello 2 | print 'world' # => world 3 | print 'hello' + ' world' # => hello world 4 | 5 | let space: String = ' ' 6 | let foobar: String = 'foo' + ' ' + 'bar' 7 | 8 | print foobar # => foo bar -------------------------------------------------------------------------------- /spec/types.ulg: -------------------------------------------------------------------------------- 1 | # shows that types are hanlded differently by print 2 | print 123 # => 123 3 | print true # => true -------------------------------------------------------------------------------- /spec/unimplemneted/README.md: -------------------------------------------------------------------------------- 1 | # Unimplemented Features 2 | 3 | This directory contains specs which demonstrate as-yet unimplemented language features. The idea is to check that some kind of expected error message is printed if an attempt is made to use the feature. 4 | -------------------------------------------------------------------------------- /spec/unimplemneted/array_types.ulg: -------------------------------------------------------------------------------- 1 | # !> array types are not yet supported 2 | var foo: [Number] = 0 3 | -------------------------------------------------------------------------------- /spec/unimplemneted/tuple_types.ulg: -------------------------------------------------------------------------------- 1 | # !> tuple types are not yet supported 2 | var foo: (String, Number, Bool) = 0 3 | -------------------------------------------------------------------------------- /spec/until.ulg: -------------------------------------------------------------------------------- 1 | var bar = 0 2 | until bar > 5 3 | print bar = bar + 2 # => 2 4 | # => 4 5 | # => 6 6 | end -------------------------------------------------------------------------------- /spec/variables.ulg: -------------------------------------------------------------------------------- 1 | let hello = 100 2 | let world = 198 3 | print hello + world # => 298 4 | let there = 9000 5 | let world = 20000 6 | print hello - there + world # => 11100 7 | print let yup = 100 - -hello # => 200 8 | 9 | var foo = -1001 10 | print foo # => -1001 11 | foo = foo + 1000 12 | print foo # => -1 -------------------------------------------------------------------------------- /spec/while.ulg: -------------------------------------------------------------------------------- 1 | var foo = 0 2 | while foo < 10 3 | print foo = foo + 1 # => 1 4 | # => 2 5 | # => 3 6 | # => 4 7 | # => 5 8 | # => 6 9 | # => 7 10 | # => 8 11 | # => 9 12 | # => 10 13 | end -------------------------------------------------------------------------------- /specs.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | import fnmatch 6 | import subprocess 7 | import re 8 | import collections 9 | import itertools 10 | import threading 11 | 12 | Expectations = collections.namedtuple('Expectations', ['expects', 'failure_expects', 'skip_run']) 13 | 14 | LINKER_KIND = "Object" 15 | EXPECT_PATTERN = re.compile(r'#\s?=>\s?(.+)') 16 | EXPECT_ERR_PATTERN = re.compile(r'#\s?!>\s?(.+)') 17 | SKIP_PATTERN = re.compile(r'#\s?!!skip') 18 | 19 | class Error(Exception): 20 | def __init__(self, error): 21 | self.error = error 22 | 23 | class ExitCodeMismatchError(Error): 24 | def __init__(self, message, code, output): 25 | message = "{} exit={}, out='{}', err='\n{}'".format( 26 | message, code, output[0].decode('utf-8'), output[1].decode('utf-8')) 27 | super(ExitCodeMismatchError, self).__init__(message) 28 | 29 | class OutputMissingError(Error): 30 | def __init__(self, output, expected): 31 | output = "\n >> ".join(output.strip().split('\n')) 32 | expected = '", "'.join(expected) 33 | msg = 'Expected "{}" in output. \n found >> {}'.format(expected, output) 34 | super(OutputMissingError, self).__init__(msg) 35 | 36 | class OutputMismatchError(Error): 37 | def __init__(self, expected, actual): 38 | msg = 'Expected "{}", found "{}"'.format(expected, actual) 39 | super(OutputMismatchError, self).__init__(msg) 40 | 41 | def add_matches(pattern, line, expects): 42 | match = pattern.search(line) 43 | if match: 44 | expects.append(match.group(1)) 45 | 46 | def parse_spec(path): 47 | expects = [] 48 | failure_expects = [] 49 | skip_run = False 50 | with open(path, encoding='utf-8') as f: 51 | for line in f.readlines(): 52 | add_matches(EXPECT_PATTERN, line, expects) 53 | add_matches(EXPECT_ERR_PATTERN, line, failure_expects) 54 | if SKIP_PATTERN.search(line): 55 | skip_run = True 56 | return Expectations(expects, failure_expects, skip_run) 57 | 58 | def check_output(lines, expects): 59 | """Check that Output Matches Expectations 60 | 61 | Given a string representing the output of a command and 62 | a list of expected lines check that the output of the 63 | command matches. 64 | """ 65 | 66 | lines = lines.strip().split('\n') 67 | for actual, expected in itertools.zip_longest(lines, expects): 68 | if not actual or not expected: 69 | raise OutputMismatchError(expected, actual) 70 | if actual != expected: 71 | raise OutputMismatchError(expected, actual) 72 | 73 | def check_expected_exit(exit_code, output, expectations): 74 | """Check the Exit of a Program 75 | 76 | Given the output and exit status of a program make sure we got a 77 | successful exit, or a non-zero exit code with the expected 78 | failures in the program's standard error output. 79 | """ 80 | 81 | # if we got killed by a signal we don't want to check for errors 82 | if exit_code < 0: 83 | raise ExitCodeMismatchError( 84 | "Compilation was killed by signal '{}'".format(-exit_code), 85 | exit_code, output) 86 | 87 | # If we were expecting a compilation failure make sure we got it 88 | if expectations.failure_expects: 89 | if exit_code == 0: 90 | raise ExitCodeMismatchError( 91 | "Expected failure but compilation succeeded", 92 | exit_code, output) 93 | check_compilation_failure( 94 | output[1].decode('utf-8'), expectations.failure_expects) 95 | # No compilation failure but we got one. 96 | elif exit_code != 0: 97 | raise ExitCodeMismatchError( 98 | "Expected successfull exit", exit_code, output) 99 | 100 | def check_compilation_failure(output, failure_expects): 101 | """Check Failure Output 102 | 103 | Given the output of a failed compilation command check that any 104 | failure expectations are met. 105 | """ 106 | 107 | fails = list(failure_expects) 108 | for line in output.strip().split('\n'): 109 | # Check we haven't found the source printed out again... 110 | if EXPECT_ERR_PATTERN.search(line): 111 | continue 112 | if fails and fails[0] in line: 113 | fails.pop(0) 114 | if fails: 115 | raise OutputMissingError(output, fails) 116 | 117 | def run_spec(path): 118 | """Compile and Run the Given Spec 119 | 120 | Compiles the spec, examining any compilation errors. If none are 121 | found then the resulting executable is run and expectations 122 | from the input file are matched against the output. 123 | """ 124 | 125 | expectations = parse_spec(path) 126 | out = "specbin/{}".format(os.path.basename(path).split('.')[0]) 127 | compile_cmd = subprocess.Popen(["target/release/ullage", path, "--link-kind", LINKER_KIND, "-o", out], stdout=subprocess.PIPE, stderr=subprocess.PIPE) 128 | 129 | # Give the compiler 5 seconds to run, and return an error on timeout 130 | timer = threading.Timer(5, compile_cmd.kill) 131 | try: 132 | timer.start() 133 | output = compile_cmd.communicate() 134 | exit_code = compile_cmd.returncode 135 | check_expected_exit(exit_code, output, expectations) 136 | finally: 137 | timer.cancel() 138 | 139 | if expectations.skip_run or compile_cmd.returncode != 0: 140 | return 141 | run_cmd = subprocess.Popen(out, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 142 | output = run_cmd.communicate() 143 | if run_cmd.returncode != 0: 144 | raise ExitCodeMismatchError("Expected successfull exit code", run_cmd.returncode, output) 145 | check_output(output[0].decode('utf-8'), expectations.expects) 146 | 147 | 148 | def glob_for(path, extension): 149 | """Glob For Files 150 | 151 | Recursively walks a directory tree and finds files matching a 152 | given extension. Used to find the files to test. 153 | """ 154 | 155 | ext_glob = '*.{}'.format(extension) 156 | for root, dirnames, filenames in os.walk(path): 157 | for filename in fnmatch.filter(filenames, ext_glob): 158 | yield os.path.join(root, filename) 159 | 160 | def main(argv): 161 | try: 162 | os.mkdir("specbin/") 163 | except OSError: 164 | pass 165 | 166 | failures = 0 167 | for spec in glob_for('spec/', 'ulg'): 168 | try: 169 | run_spec(spec) 170 | sys.stdout.write('.') 171 | sys.stdout.flush() 172 | except Error as e: 173 | err = '\n{}: {}: {}'.format(spec, type(e).__name__, e.error) 174 | print(err, file=sys.stderr) 175 | failures += 1 176 | 177 | # newline follwing all those .s 178 | print('\n') 179 | print('-' * 40) 180 | if failures: 181 | print('{} tests failed'.format(failures)) 182 | else: 183 | print('All tests passed') 184 | return failures 185 | 186 | if __name__ == '__main__': 187 | sys.exit(main(sys.argv[1:])) 188 | -------------------------------------------------------------------------------- /src/compile.rs: -------------------------------------------------------------------------------- 1 | //! This module contians the code required to compile a parsed tree 2 | //! down to LLVM bytecode. 3 | 4 | use crate::diag::Diagnostic; 5 | use crate::low_loader::prelude::*; 6 | use crate::sem; 7 | use crate::syntax; 8 | use linker::Linker; 9 | use std::path::Path; 10 | use std::process::Command; 11 | use tempfile::Builder; 12 | 13 | pub use self::error::{CompError, CompResult}; 14 | pub use self::options::{CompilationOptions, OptimisationLevel}; 15 | 16 | pub mod error; 17 | pub mod linker; 18 | pub mod options; 19 | 20 | mod lower; 21 | mod lower_context; 22 | mod string_builtins; 23 | 24 | /// Add the Core Declarations to the Module 25 | /// 26 | /// This method is responsible for making sure that 27 | /// declarations/definitions of any builtin funtions are emitted. 28 | fn add_core_decls(ctx: &mut Context, module: &mut Module) -> CompResult<()> { 29 | add_printf_decl(ctx, module); 30 | module.add_global(ctx.const_str("%lld\n"), "printf_num_format"); 31 | module.add_global(ctx.const_str("%s\n"), "printf_cstr_format"); 32 | module.add_global(ctx.const_str("%.*s\n"), "printf_ustr_format"); 33 | module.add_global(ctx.const_str("true"), "print_true"); 34 | module.add_global(ctx.const_str("false"), "print_false"); 35 | Ok(()) 36 | } 37 | 38 | /// Add a Printf Declaration to the Module 39 | /// 40 | /// Creates a new function in the given module which maps to the 41 | /// `printf` function. This will be used by the `print` operator 42 | /// to write output. 43 | fn add_printf_decl(ctx: &mut Context, module: &mut Module) { 44 | let mut params = [ctx.cstr_type()]; 45 | let int_type = ctx.int_type(32); 46 | let mut printf = ctx.add_varargs_function(module, "printf", int_type, &mut params); 47 | printf.set_calling_convention(CallConvention::CDecl); 48 | } 49 | 50 | /// Compilation State 51 | /// 52 | /// Encompases the inputs and settings for a given compilation. 53 | pub struct Compilation { 54 | /// The `Expression`s which are being compiled. 55 | expr: sem::Expression, 56 | /// The options for this compilation 57 | options: CompilationOptions, 58 | /// diagnostics from this compilation 59 | diagnostics: Vec, 60 | } 61 | 62 | impl Compilation { 63 | /// Create a new compilation 64 | /// 65 | /// # Parameters 66 | /// * `source` - The source text for the program 67 | /// * `expr` - the expression to compile 68 | /// * `opts` - The compilation options 69 | #[allow(clippy::new_ret_no_self)] 70 | pub fn new(tree: syntax::SyntaxTree, opts: CompilationOptions) -> CompResult { 71 | let mut binder = sem::Binder::new(sem::Scope::new()); 72 | let sem_expr = binder.bind_tree(tree); 73 | Ok(Compilation { 74 | expr: sem_expr, 75 | options: opts, 76 | diagnostics: binder.take_diagnostics(), 77 | }) 78 | } 79 | 80 | /// Emit 81 | /// 82 | /// Performs the compilation, emitting the results to the given file. 83 | pub fn emit(self, target: &Target, output_path: &Path) -> CompResult<()> { 84 | if !self.diagnostics.is_empty() { 85 | return Err(CompError::Generic( 86 | "can't emit a compilation contianing diagnostics".into(), 87 | )); 88 | } 89 | 90 | let mut ctx = Context::new(); 91 | let name = output_path 92 | .file_stem() 93 | .and_then(|s| s.to_str()) 94 | .unwrap_or("fallback_module_name"); 95 | let mut module = ctx.add_module(name); 96 | module.set_target(target); 97 | 98 | add_core_decls(&mut ctx, &mut module)?; 99 | 100 | let fun = { 101 | let mut lower_ctx = lower_context::LowerContext::new(&mut ctx, &mut module); 102 | lower_ctx.add_intrinsics(); 103 | lower_ctx.add_core_types(); 104 | lower::lower_as_main(&mut lower_ctx, self.expr)? 105 | }; 106 | 107 | fun.verify_or_panic(); 108 | module.verify_or_panic(); 109 | 110 | let linker = self.options.linker.unwrap_or_else(Linker::default); 111 | 112 | // Create a tempdir to write the LLVM IR or bitcode to 113 | let temp_file = Builder::new() 114 | .prefix("ullage") 115 | .suffix(linker.asset_ty.extension()) 116 | .tempfile()?; 117 | 118 | // check if we have optimiation enabled and run the 119 | // corresponding optimisations if we do. 120 | if let Some((level, size)) = self.options.opt_level.unpack() { 121 | module.run_optimiser(level, size); 122 | } 123 | 124 | // Check what we have, and dump it to the screen 125 | if self.options.dump_ir { 126 | module.dump(); 127 | } 128 | module.write_to_file(&target, temp_file.path(), linker.asset_ty.file_kind())?; 129 | 130 | // Shell out to Clang to link the final assembly 131 | let output = Command::new(linker.cmd.executable()) 132 | .arg(temp_file.path()) 133 | .arg(format!("--target={}", target.triple())) 134 | .arg("-o") 135 | .arg(output_path) 136 | .output()?; 137 | let status = output.status; 138 | 139 | if status.success() { 140 | Ok(()) 141 | } else { 142 | Err(CompError::link_fail(status.code(), output.stderr)) 143 | } 144 | } 145 | 146 | /// Does the compilation have any diagnostics to emit? 147 | pub fn has_diagnostics(&self) -> bool { 148 | !self.diagnostics.is_empty() 149 | } 150 | 151 | /// Borrow a slice of the diagnostics in this compilation 152 | pub fn diagnostics(&self) -> &[Diagnostic] { 153 | &self.diagnostics 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /src/compile/error.rs: -------------------------------------------------------------------------------- 1 | //! Compilation error module. Contains the Result and Error types for 2 | //! the compile module. 3 | 4 | use std::{fmt::Display, io}; 5 | 6 | /// Represents the different types of errors which can be encountered 7 | /// when compiling. 8 | #[derive(Debug)] 9 | pub enum CompError { 10 | /// Generic Error String 11 | Generic(String), 12 | 13 | /// Linker Failure 14 | Linker(LinkerError), 15 | 16 | /// Wrapped IO Error 17 | IO(io::Error), 18 | } 19 | 20 | impl std::error::Error for CompError { 21 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { 22 | match self { 23 | CompError::Linker(e) => Some(e), 24 | CompError::IO(e) => Some(e), 25 | _ => None, 26 | } 27 | } 28 | } 29 | 30 | impl Display for CompError { 31 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 32 | match self { 33 | CompError::Generic(msg) => write!(f, "compilation error: {}", msg), 34 | CompError::Linker(cause) => write!(f, "linker failed.: {}", cause), 35 | CompError::IO(cause) => write!(f, "IO error: {}", cause), 36 | } 37 | } 38 | } 39 | 40 | /// Compilation result. Returned from each compilation stage. 41 | pub type CompResult = Result; 42 | 43 | /// Link Failure Type 44 | /// 45 | /// Used to group together the different failure modes for the linker. 46 | #[derive(Debug)] 47 | pub enum LinkerError { 48 | /// The linker failed with a known exit status 49 | WithExitStatus(i32, String), 50 | 51 | /// The linker failed with an unknown exit status 52 | UnknownFailure(String), 53 | } 54 | 55 | impl std::error::Error for LinkerError {} 56 | 57 | impl Display for LinkerError { 58 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 59 | match self { 60 | LinkerError::WithExitStatus(status, msg) => { 61 | write!(f, "linker returned exit status {}: {}", status, msg) 62 | } 63 | LinkerError::UnknownFailure(msg) => write!(f, "unknown linker error: {}", msg), 64 | } 65 | } 66 | } 67 | 68 | impl From for CompError { 69 | /// Convert untyped errors to generic compilation errors. 70 | fn from(s: String) -> Self { 71 | CompError::Generic(s) 72 | } 73 | } 74 | 75 | impl From for CompError { 76 | fn from(e: io::Error) -> Self { 77 | CompError::IO(e) 78 | } 79 | } 80 | 81 | impl CompError { 82 | /// Compilation Linker Error 83 | /// 84 | /// When the linker has failed and caused compilation to fail. 85 | pub fn link_fail(exit_status: Option, stderr: Vec) -> Self { 86 | let stderr = String::from_utf8(stderr).unwrap(); 87 | CompError::Linker(match exit_status { 88 | Some(status) => LinkerError::WithExitStatus(status, stderr), 89 | None => LinkerError::UnknownFailure(stderr), 90 | }) 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/compile/linker.rs: -------------------------------------------------------------------------------- 1 | //! Linker Abstractions 2 | //! 3 | //! This module contains types to deal with linking object files. The 4 | //! main struct `Linker` specifies the information needed to perform 5 | //! a link. 6 | 7 | use std::default::Default; 8 | 9 | use crate::low_loader::prelude::OutputFileKind; 10 | 11 | /// The information for performing a link 12 | pub struct Linker { 13 | /// The linker command. Currently only `clang` is supported. 14 | pub cmd: LinkerCommand, 15 | /// The intermediate asset type the linker expects 16 | pub asset_ty: LinkerAssetType, 17 | } 18 | 19 | /// The executable type to use for linking 20 | #[derive(Debug, Copy, Clone)] 21 | pub enum LinkerCommand { 22 | /// The Clang c compiler 23 | Clang, 24 | } 25 | 26 | /// The intermediate asset type to pass to the linker 27 | #[derive(Debug, Copy, Clone)] 28 | pub enum LinkerAssetType { 29 | /// LLVM IR text files 30 | LlvmIr, 31 | /// LLVM IR bticode files 32 | LlvmBc, 33 | /// Native object 34 | Object, 35 | } 36 | 37 | impl Linker { 38 | /// Create a new linker from the command and asset type 39 | pub fn new(cmd: LinkerCommand, asset_ty: LinkerAssetType) -> Self { 40 | Linker { cmd, asset_ty } 41 | } 42 | 43 | /// Create a linker from the given command 44 | pub fn from_command(cmd: LinkerCommand) -> Self { 45 | Linker { 46 | cmd, 47 | asset_ty: cmd.default_asset_ty(), 48 | } 49 | } 50 | } 51 | 52 | impl Default for Linker { 53 | fn default() -> Self { 54 | Linker::from_command(LinkerCommand::default()) 55 | } 56 | } 57 | 58 | impl LinkerCommand { 59 | /// Get the Default Asset Type for this Linker 60 | pub fn default_asset_ty(&self) -> LinkerAssetType { 61 | LinkerAssetType::LlvmBc 62 | } 63 | 64 | /// Get the executable this command should call. 65 | /// 66 | pub fn executable(&self) -> &str { 67 | // FIXME: instead of exposing a &str. We should 68 | // make the linker buidl the command 69 | // rather than the compiler. 70 | match *self { 71 | LinkerCommand::Clang => "clang", 72 | } 73 | } 74 | } 75 | 76 | impl Default for LinkerCommand { 77 | fn default() -> Self { 78 | LinkerCommand::Clang 79 | } 80 | } 81 | 82 | impl LinkerAssetType { 83 | /// Get the file extension for the asset type 84 | pub fn extension(&self) -> &str { 85 | match *self { 86 | LinkerAssetType::LlvmIr => ".ll", 87 | LinkerAssetType::LlvmBc => ".bc", 88 | LinkerAssetType::Object => ".o", 89 | } 90 | } 91 | 92 | /// Get the file kind for this asset type 93 | pub(crate) fn file_kind(&self) -> OutputFileKind { 94 | match *self { 95 | LinkerAssetType::LlvmIr => OutputFileKind::LLVMIl, 96 | LinkerAssetType::LlvmBc => OutputFileKind::Bitcode, 97 | LinkerAssetType::Object => OutputFileKind::NativeObject, 98 | } 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/compile/lower_context.rs: -------------------------------------------------------------------------------- 1 | //! Lower Context 2 | //! 3 | //! The lower context defines the state which is passed around as an 4 | //! expresion is lowered to LLVM. 5 | 6 | use crate::low_loader::prelude::*; 7 | use crate::sem::{BuiltinType, Typ}; 8 | use std::collections::HashMap; 9 | 10 | /// Lower Context 11 | /// 12 | /// Pairs an LLVM Context with a single LLVM Module. Used as the 13 | /// target when lowering a tree to LLVM. 14 | pub struct LowerContext<'a> { 15 | /// The LLVM Context this lower context is using. 16 | pub llvm_ctx: &'a mut Context, 17 | /// The LLVM Module this context is building IR into. 18 | pub module: &'a mut Module, 19 | 20 | /// Map of Ty values to LLVM Types 21 | ty_map: HashMap, 22 | } 23 | 24 | impl<'a> LowerContext<'a> { 25 | /// Create a New Lowering Context 26 | /// 27 | /// Wraps the given module and LLVM context to create the required 28 | /// context for lowering ASTs. 29 | pub fn new(ctx: &'a mut Context, module: &'a mut Module) -> Self { 30 | LowerContext { 31 | llvm_ctx: ctx, 32 | module, 33 | ty_map: Default::default(), 34 | } 35 | } 36 | 37 | /// Add LLVM Intrinsic Declarations 38 | /// 39 | /// Updates the module to add declarations for the LLVM intrinsics 40 | /// we care about. Need to find a better way to create these. 41 | pub fn add_intrinsics(&mut self) { 42 | let i8ptr = self.llvm_ctx.pointer_type(self.llvm_ctx.int_type(8)); 43 | let i32ty = self.llvm_ctx.int_type(32); 44 | 45 | self.llvm_ctx.add_function( 46 | self.module, 47 | "llvm.memcpy.p0i8.p0i8.i32", 48 | self.llvm_ctx.void_type(), 49 | &mut [i8ptr, i8ptr, i32ty, self.llvm_ctx.bool_type()], 50 | ); 51 | } 52 | 53 | /// Add Core LLVM Types 54 | /// 55 | /// Adds entries to the type map for the bulitin types mappign 56 | /// them to their underlying LLVM representation. 57 | pub fn add_core_types(&mut self) { 58 | let lang_string = self.llvm_ctx.pointer_type(self.llvm_ctx.struct_type(vec![ 59 | self.llvm_ctx.int_type(32), 60 | self.llvm_ctx.array_type(self.llvm_ctx.int_type(8), 0), 61 | ])); 62 | self.add_type(Typ::Builtin(BuiltinType::String), lang_string); 63 | let llvm_bool = self.llvm_ctx.bool_type(); 64 | self.add_type(Typ::Builtin(BuiltinType::Bool), llvm_bool); 65 | let llvm_number = self.llvm_ctx.int_type(64); 66 | self.add_type(Typ::Builtin(BuiltinType::Number), llvm_number); 67 | } 68 | 69 | /// Add a Type to the Context 70 | /// 71 | /// Makes the given type available for lowering in the current 72 | /// context by providing a mapping through to an LLVM type. 73 | pub fn add_type(&mut self, ty: Typ, llvm_ty: LLVMTypeRef) { 74 | self.ty_map.insert(ty, llvm_ty); 75 | } 76 | 77 | /// Look up a Given Type 78 | pub fn llvm_type(&self, ty: Typ) -> Option { 79 | self.ty_map.get(&ty).cloned() 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/compile/options.rs: -------------------------------------------------------------------------------- 1 | //! Compilation Options 2 | //! 3 | //! This module defines the options structure used to tweak 4 | //! compilation output. 5 | 6 | use super::linker::Linker; 7 | use crate::low_loader::pass_manager as pm; 8 | 9 | /// Compilation Options 10 | /// 11 | /// This is used to control how each `Compilation` instance behaves. 12 | #[derive(Default)] 13 | pub struct CompilationOptions { 14 | /// Dump the LLVM IR when the module is compiled 15 | pub dump_ir: bool, 16 | /// Optimisation level to use when emitting code 17 | pub opt_level: OptimisationLevel, 18 | /// Linker option 19 | pub linker: Option, 20 | } 21 | 22 | /// Optimisation levels 23 | /// 24 | /// Defines the different levels of optimisation that the compiler 25 | /// supports. These levels are usually controlled from the command 26 | /// line. 27 | pub enum OptimisationLevel { 28 | /// No optimisation 29 | Off, 30 | /// Low optimisation, same as -O1 31 | Low, 32 | /// Medium optimisation. Same as -O2 33 | Med, 34 | /// High optimisation. Same as -O3 35 | High, 36 | /// Optimise for size not speed 37 | Size, 38 | } 39 | 40 | impl CompilationOptions { 41 | /// Se the `dump_ir` flag 42 | /// 43 | /// Enables or disables dumping the LLVM IR when modules are 44 | /// compiled. 45 | pub fn with_dump_ir(self, dump_ir: bool) -> Self { 46 | CompilationOptions { dump_ir, ..self } 47 | } 48 | 49 | /// Set the Optimisation Level 50 | /// 51 | /// Controls the optimisation level for the given options. 52 | pub fn with_opt_level(self, opt_level: OptimisationLevel) -> Self { 53 | CompilationOptions { opt_level, ..self } 54 | } 55 | 56 | /// Set the linker command to use 57 | pub fn with_linker(self, linker: Linker) -> Self { 58 | CompilationOptions { 59 | linker: Some(linker), 60 | ..self 61 | } 62 | } 63 | } 64 | 65 | impl Default for OptimisationLevel { 66 | fn default() -> Self { 67 | OptimisationLevel::Off 68 | } 69 | } 70 | 71 | impl OptimisationLevel { 72 | /// Unpack an Optimistaion Level 73 | /// 74 | /// Retrieves a (level, size) tuple which defines how to configure 75 | /// the LLVM optimiser for this optimisation level. 76 | pub fn unpack(&self) -> Option<(pm::OptLevel, pm::OptSize)> { 77 | use crate::OptimisationLevel::*; 78 | match self { 79 | Off => None, 80 | Low => Some((pm::OptLevel::Low, pm::OptSize::Off)), 81 | Med => Some((pm::OptLevel::Medium, pm::OptSize::Off)), 82 | High => Some((pm::OptLevel::High, pm::OptSize::Off)), 83 | Size => Some((pm::OptLevel::Medium, pm::OptSize::Size)), 84 | } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/compile/string_builtins.rs: -------------------------------------------------------------------------------- 1 | //! String Builtins 2 | //! 3 | //! This module contains logic for interacting with string values. 4 | 5 | use super::lower_context::LowerContext; 6 | use crate::low_loader::prelude::*; 7 | 8 | /// String Copy Guts 9 | /// 10 | /// Copies the body of a source string into a destiation string's 11 | /// buffer. This is used by the lowering of stirng concatentation. 12 | pub(crate) fn string_copy_guts( 13 | ctx: &mut LowerContext<'_>, 14 | builder: &mut Builder, 15 | dest: LLVMValueRef, 16 | src: LLVMValueRef, 17 | len: LLVMValueRef, 18 | offset: Option, 19 | ) { 20 | let memcpy = ctx 21 | .module 22 | .find_function("llvm.memcpy.p0i8.p0i8.i32") 23 | .expect("can't find memcpy intrinsic"); 24 | 25 | let src_buffer = string_get_buffer(builder, src); 26 | let src_buffer = builder.build_gep( 27 | src_buffer, 28 | &mut [ctx.llvm_ctx.const_int(0), ctx.llvm_ctx.const_int(0)], 29 | ); 30 | let dest_buffer = string_get_buffer(builder, dest); 31 | let offset = offset.unwrap_or_else(|| ctx.llvm_ctx.const_int(0)); 32 | let dest_buffer = builder.build_gep(dest_buffer, &mut [ctx.llvm_ctx.const_int(0), offset]); 33 | 34 | builder.build_void_call( 35 | &memcpy, 36 | &mut [dest_buffer, src_buffer, len, ctx.llvm_ctx.const_bool(false)], 37 | ); 38 | } 39 | 40 | /// Get String's Buffer Pointer 41 | /// 42 | /// Returns a poitner to the buffer which contains the `String`'s 43 | /// body. This is a `[0 x i8]*`. It can be converted into a poitner to 44 | /// a given character offset with a GEP instruction] 45 | pub(crate) fn string_get_buffer(builder: &mut Builder, s: LLVMValueRef) -> LLVMValueRef { 46 | builder.build_struct_gep(s, 1) 47 | } 48 | 49 | /// Get String Length 50 | /// 51 | /// Reads the length field out of the `String`'s internal 52 | /// representation. This is a constant-time operation and returns the 53 | /// length in bytes. 54 | pub(crate) fn string_get_len(builder: &mut Builder, val: LLVMValueRef) -> LLVMValueRef { 55 | let len_field = builder.build_struct_gep(val, 0); 56 | builder.build_load(len_field) 57 | } 58 | 59 | /// Set Stirng Length 60 | /// 61 | /// Set the internal string length field to a new value. 62 | pub(crate) fn string_set_len(builder: &mut Builder, val: LLVMValueRef, size: LLVMValueRef) { 63 | let len_field = builder.build_struct_gep(val, 0); 64 | builder.build_store(size, len_field); 65 | } 66 | -------------------------------------------------------------------------------- /src/diag.rs: -------------------------------------------------------------------------------- 1 | //! Compilation Diagnostics 2 | //! 3 | //! The data structures used to keep track of compilation diagnostic 4 | //! information. This includes the main `Diagnostic` type which 5 | //! represnts a syntax or sematic error during compilation. 6 | //! 7 | //! The types in this module will usually be consumed by pushing 8 | //! diagnostics into a `DiagnosticBag`. 9 | 10 | use crate::syntax::text::Span; 11 | 12 | /// The main `Daignostic` type. Each diagnostic consists of a primary 13 | /// message, location, and level. 14 | pub struct Diagnostic { 15 | /// The diagnostic message generated by the compiler 16 | pub message: String, 17 | 18 | /// The location that this diagnostic occured at 19 | pub span: Span, 20 | } 21 | 22 | impl Diagnostic { 23 | /// Constructs a new diagnostic from the given message and 24 | /// location. 25 | /// 26 | /// # Parameters 27 | /// * `message` - the diagnostic message to emit 28 | /// * `span` - The locaiton the diagnostic is tied to. If there 29 | /// is no logical location for this diagnostic then 30 | /// `DUMMY_SPAN` should be used. 31 | pub fn new(message: S, span: Span) -> Self 32 | where 33 | S: Into, 34 | { 35 | Diagnostic { 36 | message: message.into(), 37 | span, 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/low_loader.rs: -------------------------------------------------------------------------------- 1 | //! Low Loader 2 | //! 3 | //! Low-level bindings to LLVM. This module provides a set of safe 4 | //! wrappers around LLVM objects and types to allow them to be used 5 | //! more ergonomically from Rust code. 6 | //! 7 | //! # Key Types 8 | //! 9 | //! Any use of low_loader requires creating a [`Context`] first. This 10 | //! type controls access to [`Module`]s and [`Type`]s. Each Module 11 | //! represents a single 'compilation unit' which maps down to a single 12 | //! output object when compiled. 13 | //! 14 | //! After creating a module LLVM IR can be added using a [`Builder`] 15 | //! object. The builder has a set of factory methods for appending IR 16 | //! to a given basic block. 17 | //! 18 | //! [`Context`]: ./context/struct.Context.html 19 | //! [`Module`]: ./module/struct.Module.html 20 | //! [`Type`]: ./types/enum.Type.html 21 | //! [`Builder`]: ./builder/struct.Builder.html 22 | 23 | #![deny(missing_docs)] 24 | 25 | #[cfg(feature = "llvm-10")] 26 | use llvm_10 as llvm_sys; 27 | #[cfg(feature = "llvm-13")] 28 | use llvm_13 as llvm_sys; 29 | #[cfg(feature = "llvm-9")] 30 | use llvm_9 as llvm_sys; 31 | 32 | pub mod builder; 33 | pub mod context; 34 | pub mod function; 35 | pub mod module; 36 | pub mod pass_manager; 37 | pub mod targets; 38 | pub mod types; 39 | pub mod value; 40 | 41 | /// Prelude Module 42 | /// 43 | /// This module just re-exports useful types to help cut down on using 44 | /// statements. 45 | pub mod prelude { 46 | pub use super::builder::Builder; 47 | pub use super::builder::Predicate; 48 | pub use super::context::Context; 49 | pub use super::function::{CallConvention, Function}; 50 | pub use super::module::{Module, OutputFileKind}; 51 | pub use super::targets::{Target, TargetLookupError}; 52 | pub use super::types::Type; 53 | pub use super::value::Value; 54 | 55 | // FIXME: only expose Value in public interface. 56 | pub use super::llvm_sys::prelude::LLVMTypeRef; 57 | pub use super::llvm_sys::prelude::LLVMValueRef; 58 | } 59 | -------------------------------------------------------------------------------- /src/low_loader/builder.rs: -------------------------------------------------------------------------------- 1 | //! LLVM IR Builder Wrapper 2 | //! 3 | //! Contains a Rust wrapper for dealing with LLVM Intermediate 4 | //! Representation Builders. These objects are responsible for 5 | //! creating instructions and adding them to basic blocks. Essentially 6 | //! they make up the ponity end of the whole thing. Useful! 7 | 8 | use super::llvm_sys::core; 9 | use super::llvm_sys::prelude::*; 10 | use super::llvm_sys::*; 11 | 12 | use std::ffi::{CStr, CString}; 13 | use std::os::raw::c_uint; 14 | 15 | use super::function::Function; 16 | 17 | /// IR Builder 18 | /// 19 | /// Creating yo instructions and manipulating yo basic blocks. 20 | #[derive(Debug, PartialEq)] 21 | pub struct Builder { 22 | raw: LLVMBuilderRef, 23 | } 24 | 25 | /// Comparison Predicate Type 26 | /// 27 | /// Choice of comparison operators. These will be mapped through to 28 | /// `LLVMIntPreidcate` or `LLVMRealPredicate`s depending on the types 29 | /// being used. 30 | #[derive(Copy, Clone)] 31 | pub enum Predicate { 32 | /// Equality predicate 33 | Eq, 34 | /// Inequality predicate 35 | Neq, 36 | /// Arithmetic less than comparison 37 | Lt, 38 | /// Arithmetic less than or equal comparison 39 | LtEq, 40 | /// Arithmetic greter than comparision 41 | Gt, 42 | /// Arithmetic greater than or equal comparison 43 | GtEq, 44 | } 45 | 46 | impl Builder { 47 | /// Create a Builder from a Raw Pointer 48 | /// 49 | /// Takes ownership of the given builder pointer. The builder can 50 | /// then be manipulated through the returned object and will be 51 | /// disposed of when this object leaves scope. 52 | pub fn from_raw(raw: LLVMBuilderRef) -> Self { 53 | Builder { raw } 54 | } 55 | 56 | /// Build at the End of a Block 57 | /// 58 | /// Takes the builder, points it at the end of the basic block. 59 | pub fn position_at_end(&mut self, block: LLVMBasicBlockRef) { 60 | unsafe { 61 | core::LLVMPositionBuilderAtEnd(self.raw, block); 62 | } 63 | } 64 | 65 | /// Add a Ret Instrution 66 | /// 67 | /// Returns control from the current function 68 | /// immediately. Consumes this build context as t the current 69 | /// basic block can't have any more instructions added after a 70 | /// terminator instruciton. 71 | pub fn build_ret(self, value: LLVMValueRef) { 72 | unsafe { 73 | core::LLVMBuildRet(self.raw, value); 74 | } 75 | } 76 | 77 | /// Build a Call to a `void` Function 78 | /// 79 | /// The built value produces nothing so no value is returned. 80 | pub fn build_void_call(&mut self, function: &Function, args: &mut [LLVMValueRef]) { 81 | self.build_named_call(function, args, None); 82 | } 83 | 84 | /// Build a Call Instruction 85 | /// 86 | /// Emits a call to the given function. 87 | pub fn build_call(&mut self, function: &Function, args: &mut [LLVMValueRef]) -> LLVMValueRef { 88 | self.build_named_call(function, args, Some("call")) 89 | } 90 | 91 | /// Build a Call with a Name 92 | /// 93 | /// Emits a call instruction. The name provided to the call 94 | /// controls the resulting temporary value. If no name is provided 95 | /// then the result isn't bound to any temporary. 96 | pub fn build_named_call( 97 | &mut self, 98 | function: &Function, 99 | args: &mut [LLVMValueRef], 100 | name: Option<&str>, 101 | ) -> LLVMValueRef { 102 | let name = name.map(|n| CString::new(n).unwrap()); 103 | static EMPTY_NAME: [libc::c_char; 1] = [0]; 104 | unsafe { 105 | let call = core::LLVMBuildCall( 106 | self.raw, 107 | function.as_raw(), 108 | args.as_mut_ptr(), 109 | args.len() as c_uint, 110 | name.as_ref().map_or(EMPTY_NAME.as_ptr(), |n| n.as_ptr()), 111 | ); 112 | core::LLVMSetInstructionCallConv(call, function.call_conv().into()); 113 | call 114 | } 115 | } 116 | 117 | /// Build a GEP 118 | /// 119 | /// GEP, or GetElementPointer, retrieves a pointer to an element in an item. 120 | pub fn build_gep(&mut self, value: LLVMValueRef, indices: &mut [LLVMValueRef]) -> LLVMValueRef { 121 | unsafe { 122 | let name = CStr::from_bytes_with_nul_unchecked(b"gep\0"); 123 | core::LLVMBuildGEP( 124 | self.raw, 125 | value, 126 | indices.as_mut_ptr(), 127 | indices.len() as c_uint, 128 | name.as_ptr(), 129 | ) 130 | } 131 | } 132 | 133 | /// Build a Structure GEP 134 | /// 135 | /// Loads an element from a given structure pointer. 136 | pub fn build_struct_gep(&mut self, struct_val: LLVMValueRef, index: u32) -> LLVMValueRef { 137 | unsafe { 138 | let name = CStr::from_bytes_with_nul_unchecked(b"sgep\0"); 139 | core::LLVMBuildStructGEP(self.raw, struct_val, index as c_uint, name.as_ptr()) 140 | } 141 | } 142 | 143 | /// Extract a Value from an Aggregate 144 | /// 145 | /// Reads a value from a structure at the given struct offset. 146 | pub fn build_extract_value(&mut self, struct_val: LLVMValueRef, index: u32) -> LLVMValueRef { 147 | unsafe { 148 | let name = CStr::from_bytes_with_nul_unchecked(b"extracted\0"); 149 | core::LLVMBuildExtractValue(self.raw, struct_val, index as c_uint, name.as_ptr()) 150 | } 151 | } 152 | 153 | /// Build an Integer Negation 154 | pub fn build_neg(&mut self, value: LLVMValueRef) -> LLVMValueRef { 155 | unsafe { 156 | let name = CStr::from_bytes_with_nul_unchecked(b"negated\0"); 157 | core::LLVMBuildNeg(self.raw, value, name.as_ptr()) 158 | } 159 | } 160 | 161 | /// Build an Integer Add 162 | pub fn build_add(&mut self, lhs: LLVMValueRef, rhs: LLVMValueRef) -> LLVMValueRef { 163 | unsafe { 164 | let name = CStr::from_bytes_with_nul_unchecked(b"addtmp\0"); 165 | core::LLVMBuildAdd(self.raw, lhs, rhs, name.as_ptr()) 166 | } 167 | } 168 | 169 | /// Build an Integer Subtraction 170 | pub fn build_sub(&mut self, lhs: LLVMValueRef, rhs: LLVMValueRef) -> LLVMValueRef { 171 | unsafe { 172 | let name = CStr::from_bytes_with_nul_unchecked(b"subtmp\0"); 173 | core::LLVMBuildSub(self.raw, lhs, rhs, name.as_ptr()) 174 | } 175 | } 176 | 177 | /// Build an Integer Multiplication 178 | pub fn build_mul(&mut self, lhs: LLVMValueRef, rhs: LLVMValueRef) -> LLVMValueRef { 179 | unsafe { 180 | let name = CStr::from_bytes_with_nul_unchecked(b"multmp\0"); 181 | core::LLVMBuildMul(self.raw, lhs, rhs, name.as_ptr()) 182 | } 183 | } 184 | 185 | /// Build a Signed Integer Division 186 | pub fn build_sdiv(&mut self, lhs: LLVMValueRef, rhs: LLVMValueRef) -> LLVMValueRef { 187 | unsafe { 188 | let name = CStr::from_bytes_with_nul_unchecked(b"divtmp\0"); 189 | core::LLVMBuildSDiv(self.raw, lhs, rhs, name.as_ptr()) 190 | } 191 | } 192 | 193 | /// Build an Integer Comparision 194 | pub fn build_icmp( 195 | &mut self, 196 | op: Predicate, 197 | lhs: LLVMValueRef, 198 | rhs: LLVMValueRef, 199 | ) -> LLVMValueRef { 200 | let op = match op { 201 | Predicate::Eq => LLVMIntPredicate::LLVMIntEQ, 202 | Predicate::Neq => LLVMIntPredicate::LLVMIntNE, 203 | Predicate::Lt => LLVMIntPredicate::LLVMIntSLT, 204 | Predicate::Gt => LLVMIntPredicate::LLVMIntSGT, 205 | Predicate::LtEq => LLVMIntPredicate::LLVMIntSLE, 206 | Predicate::GtEq => LLVMIntPredicate::LLVMIntSGE, 207 | }; 208 | unsafe { 209 | let name = CStr::from_bytes_with_nul_unchecked(b"cmptemp\0"); 210 | core::LLVMBuildICmp(self.raw, op, lhs, rhs, name.as_ptr()) 211 | } 212 | } 213 | 214 | /// Build an Allocate Instruction 215 | /// 216 | /// Creates a new value allocated for the remainder of the current 217 | /// stack frame. 218 | pub fn build_alloca(&mut self, typ: LLVMTypeRef, name: &str) -> LLVMValueRef { 219 | let name = CString::new(name).unwrap(); 220 | unsafe { core::LLVMBuildAlloca(self.raw, typ, name.as_ptr()) } 221 | } 222 | 223 | /// Build a `malloc` Instruction 224 | /// 225 | /// Creates a new value allocated on the heap. Retusna pointer to 226 | /// the new value. 227 | pub fn build_malloc( 228 | &mut self, 229 | typ: LLVMTypeRef, 230 | count: Option, 231 | name: &str, 232 | ) -> LLVMValueRef { 233 | let name = CString::new(name).unwrap(); 234 | match count { 235 | Some(count) => unsafe { 236 | core::LLVMBuildArrayMalloc(self.raw, typ, count, name.as_ptr()) 237 | }, 238 | None => unsafe { core::LLVMBuildMalloc(self.raw, typ, name.as_ptr()) }, 239 | } 240 | } 241 | 242 | /// Create a Conditional Branch 243 | /// 244 | /// If the condition is true then execution continues in the first 245 | /// block, otherwise execution will move to the second block. 246 | pub fn build_cond_br( 247 | &mut self, 248 | cond: LLVMValueRef, 249 | iftrue: LLVMBasicBlockRef, 250 | iffalse: LLVMBasicBlockRef, 251 | ) { 252 | unsafe { 253 | core::LLVMBuildCondBr(self.raw, cond, iftrue, iffalse); 254 | } 255 | } 256 | 257 | /// Create an Unconditional Branch 258 | pub fn build_br(&mut self, block: LLVMBasicBlockRef) { 259 | unsafe { 260 | core::LLVMBuildBr(self.raw, block); 261 | } 262 | } 263 | 264 | /// Load from Variable 265 | pub fn build_load(&mut self, var: LLVMValueRef) -> LLVMValueRef { 266 | unsafe { 267 | let name = CStr::from_bytes_with_nul_unchecked(b"loaded\0"); 268 | core::LLVMBuildLoad(self.raw, var, name.as_ptr()) 269 | } 270 | } 271 | 272 | /// Store to Variable 273 | pub fn build_store(&mut self, val: LLVMValueRef, var: LLVMValueRef) -> LLVMValueRef { 274 | unsafe { core::LLVMBuildStore(self.raw, val, var) } 275 | } 276 | 277 | /// Built a Not 278 | pub fn build_not(&mut self, val: LLVMValueRef) -> LLVMValueRef { 279 | unsafe { 280 | let name = CStr::from_bytes_with_nul_unchecked(b"not\0"); 281 | core::LLVMBuildNot(self.raw, val, name.as_ptr()) 282 | } 283 | } 284 | 285 | /// Bitcast 286 | /// 287 | /// Re-interpret the input value to be of the given type. This 288 | /// just transforms how the underlying bits are interpreted rather 289 | /// than performing any smarter coercion. 290 | pub fn build_bitcast( 291 | &mut self, 292 | val: LLVMValueRef, 293 | typ: LLVMTypeRef, 294 | name: &str, 295 | ) -> LLVMValueRef { 296 | unsafe { 297 | let name = CString::new(name).unwrap(); 298 | core::LLVMBuildBitCast(self.raw, val, typ, name.as_ptr()) 299 | } 300 | } 301 | } 302 | 303 | impl Drop for Builder { 304 | /// Disponse this Builder 305 | fn drop(&mut self) { 306 | unsafe { 307 | core::LLVMDisposeBuilder(self.raw); 308 | } 309 | } 310 | } 311 | -------------------------------------------------------------------------------- /src/low_loader/context.rs: -------------------------------------------------------------------------------- 1 | //! LLVM Context Wrapper 2 | //! 3 | //! Contains a Rust wrapper for dealing with LLVM Context objects. 4 | 5 | use super::llvm_sys::prelude::*; 6 | use super::llvm_sys::{core, target}; 7 | use super::prelude::*; 8 | use std::ffi::CString; 9 | use std::os::raw::c_uint; 10 | 11 | /// Ensure Initialised 12 | /// 13 | /// Makes sure that the LLVM library has been initialised to support 14 | /// the features we want to use. This function can safely be called 15 | /// any number of times but will only initialise LLVM once. 16 | /// 17 | /// # Panics 18 | /// 19 | /// If any of the LLVM subsystems can't be successfully initialised 20 | /// then this function will panic. 21 | pub(crate) fn ensure_initialised() { 22 | use std::sync::Once; 23 | 24 | static INIT: Once = Once::new(); 25 | 26 | INIT.call_once(|| { 27 | unsafe { 28 | // Initialise all targets. This is required so we can look 29 | // targets up from the target registry and use them if 30 | // cross compiling. 31 | target::LLVM_InitializeAllTargets(); 32 | target::LLVM_InitializeAllTargetInfos(); 33 | // target::LLVM_InitializeAllAsmPrinters(); 34 | // target::LLVM_InitializeAllAsmParsers(); 35 | target::LLVM_InitializeAllTargetMCs(); 36 | if target::LLVM_InitializeNativeAsmPrinter() != 0 { 37 | panic!("Could not initialise ASM Printer"); 38 | } 39 | } 40 | }); 41 | } 42 | 43 | /// Context 44 | /// 45 | /// A context groups together all the LLVM objects used when 46 | /// compiling. 47 | /// 48 | /// The LLVM context holds the global state for compilation. This 49 | /// includes types and modules. LLVM context objects aren't 50 | /// guaranteed to be thread safe, and shouldn't be shared between 51 | /// threasds. We'll enforce this by taking `&mut self` when 52 | /// meddling with the context. 53 | #[derive(Debug, PartialEq)] 54 | pub struct Context(LLVMContextRef); 55 | 56 | impl Context { 57 | /// Create Context 58 | /// 59 | /// You'll probably only need one of these per 'program' you want 60 | /// to evaluate. Modules, types and execution from one context 61 | /// can't be used with another context. 62 | /// 63 | /// # Returns 64 | /// 65 | /// Returns a new compilation context instance. 66 | /// 67 | /// # Panics 68 | /// 69 | /// If the underlying LLVM library can't be initialised then this 70 | /// function will panic. 71 | pub fn new() -> Self { 72 | ensure_initialised(); 73 | Context(unsafe { core::LLVMContextCreate() }) 74 | } 75 | 76 | /// Add Module 77 | /// 78 | /// Creates a new LLVM module in this context. 79 | pub fn add_module(&mut self, name: &str) -> Module { 80 | let mod_name = CString::new(name).unwrap(); 81 | Module::from_raw(unsafe { 82 | core::LLVMModuleCreateWithNameInContext(mod_name.as_ptr(), self.as_raw()) 83 | }) 84 | } 85 | 86 | /// Add a Function to the Module 87 | /// 88 | /// Creates a new function in the module. The function has no body 89 | /// attached. If nothing extra is done with the returned 90 | /// `Fucntion` then it will serve as an external declaration/import. 91 | pub fn add_function( 92 | &mut self, 93 | module: &mut Module, 94 | name: &str, 95 | ret_type: LLVMTypeRef, 96 | params: &mut [LLVMTypeRef], 97 | ) -> Function { 98 | self.add_function_internal(module, name, ret_type, params, false) 99 | } 100 | 101 | /// Ad a Function with Variable Arguments 102 | /// 103 | /// Creates a new function in the module in the same way as 104 | /// `add_function`. In addition the function is declared with a 105 | /// variable argument list. 106 | pub fn add_varargs_function( 107 | &mut self, 108 | module: &mut Module, 109 | name: &str, 110 | ret_type: LLVMTypeRef, 111 | params: &mut [LLVMTypeRef], 112 | ) -> Function { 113 | self.add_function_internal(module, name, ret_type, params, true) 114 | } 115 | 116 | /// Internal Add Function 117 | /// 118 | /// Thinner wrapper over `LLVMAddfunction`. Clients should use 119 | /// `add_function` or `add_varargs_function`. 120 | fn add_function_internal( 121 | &mut self, 122 | module: &mut Module, 123 | name: &str, 124 | ret_type: LLVMTypeRef, 125 | params: &mut [LLVMTypeRef], 126 | varargs: bool, 127 | ) -> Function { 128 | let varargs = if varargs { 1 } else { 0 }; 129 | 130 | // Create a function to be used to evaluate our expression 131 | let function_type = unsafe { 132 | let param_count = params.len(); 133 | let params = params.as_mut_ptr(); 134 | core::LLVMFunctionType(ret_type, params, param_count as c_uint, varargs) 135 | }; 136 | 137 | let function_name = CString::new(name).unwrap(); 138 | 139 | // Function::from_raw is `unsafe` because it doesn't verify 140 | // that the value you give it is an LLVM Function. I think we 141 | // can be sure this one is though :-p 142 | unsafe { 143 | Function::from_raw(core::LLVMAddFunction( 144 | module.as_raw(), 145 | function_name.as_ptr(), 146 | function_type, 147 | )) 148 | } 149 | } 150 | 151 | /// Add a Basic Block to a given Function 152 | /// 153 | /// Creates a basic block and add it to the function. 154 | pub fn add_block(&mut self, fun: &mut Function, name: &str) -> LLVMBasicBlockRef { 155 | let block_name = CString::new(name).unwrap(); 156 | unsafe { 157 | core::LLVMAppendBasicBlockInContext(self.as_raw(), fun.as_raw(), block_name.as_ptr()) 158 | } 159 | } 160 | 161 | /// Create an IR Builder 162 | /// 163 | /// Creates and initalises a new IR Builder in this `Context`. 164 | pub fn add_builder(&mut self) -> Builder { 165 | Builder::from_raw(unsafe { core::LLVMCreateBuilderInContext(self.as_raw()) }) 166 | } 167 | 168 | /// Create A Constant Value 169 | /// 170 | /// The returned value is a constant 64 bit integer with the given 171 | /// value. 172 | pub fn const_int(&self, i: i64) -> LLVMValueRef { 173 | unsafe { 174 | let int64 = core::LLVMInt64TypeInContext(self.as_raw()); 175 | core::LLVMConstInt(int64, i as u64, 1) 176 | } 177 | } 178 | 179 | /// Create a Constant Value with a Given Width 180 | /// 181 | /// Used when the width shouldn't be 64 bits. 182 | pub fn const_int_width(&self, i: i64, width: u32) -> LLVMValueRef { 183 | unsafe { 184 | let int_ty = core::LLVMIntTypeInContext(self.as_raw(), width); 185 | core::LLVMConstInt(int_ty, i as u64, 1) 186 | } 187 | } 188 | 189 | /// Create a Constant Character Value 190 | pub fn const_char(&self, i: u8) -> LLVMValueRef { 191 | unsafe { 192 | let int8 = core::LLVMInt8TypeInContext(self.as_raw()); 193 | core::LLVMConstInt(int8, u64::from(i), 0) 194 | } 195 | } 196 | 197 | /// Create a Constant Bool 198 | /// 199 | /// The returned value is a constant 1-bit integer with the given 200 | /// boolean value mapped to `true` => `1`, `false` => `0`. 201 | pub fn const_bool(&self, b: bool) -> LLVMValueRef { 202 | let mapped = if b { 1 } else { 0 }; 203 | unsafe { 204 | let int1 = core::LLVMInt1TypeInContext(self.as_raw()); 205 | core::LLVMConstInt(int1, mapped, 0) 206 | } 207 | } 208 | 209 | /// Create a Constant String Value 210 | /// 211 | /// The returned value is a constant i8 array with characters from 212 | /// the given string stored as UTF8. 213 | pub fn const_str(&self, s: &str) -> LLVMValueRef { 214 | let mut bytes: Vec<_> = s.bytes().map(|b| self.const_char(b)).collect(); 215 | bytes.push(self.const_char(0)); 216 | unsafe { 217 | let int8 = core::LLVMInt8TypeInContext(self.as_raw()); 218 | core::LLVMConstArray(int8, bytes.as_mut_ptr(), bytes.len() as c_uint) 219 | } 220 | } 221 | 222 | /// Create a Structure Contstant 223 | /// 224 | /// Initialses a new structrure based on the given values. 225 | pub fn const_struct(&self, mut values: Vec) -> LLVMValueRef { 226 | let len = values.len(); 227 | unsafe { 228 | core::LLVMConstStructInContext(self.as_raw(), values.as_mut_ptr(), len as c_uint, 0) 229 | } 230 | } 231 | 232 | /// Raw Borrow 233 | /// 234 | /// # Safety 235 | /// 236 | /// This method returns a raw pointer to the underlying 237 | /// LLVMContext. It's up to you to make sure it doesn't outlive 238 | /// the `Context`, and to make sure you don't break any of LLVMs 239 | /// thread safety requirements. 240 | pub unsafe fn as_raw(&self) -> LLVMContextRef { 241 | let &Context(raw_ctx) = self; 242 | raw_ctx 243 | } 244 | 245 | /// A Sized Integer Type in this Context 246 | /// 247 | /// This looks up the integer type of a given `width` in the LLVM 248 | /// Context and returns it. Multiple calls should return the same 249 | /// type for the same width integer. 250 | pub fn int_type(&self, width: usize) -> LLVMTypeRef { 251 | unsafe { core::LLVMIntTypeInContext(self.as_raw(), width as c_uint) } 252 | } 253 | 254 | /// Boolean Type in this Context 255 | /// 256 | /// Looks up the boolean type from LLVM. This is just a 1-bit 257 | /// integer type under the hood. 258 | pub fn bool_type(&self) -> LLVMTypeRef { 259 | self.int_type(1) 260 | } 261 | 262 | /// Get the Raw C String Type 263 | /// 264 | /// Looks up the c-style 'pointer to character' string type in the 265 | /// context. This is different from the langauage's string 266 | /// type. It is intended to be used when creating FFI calls. 267 | pub fn cstr_type(&self) -> LLVMTypeRef { 268 | unsafe { 269 | let int8 = core::LLVMInt8TypeInContext(self.as_raw()); 270 | core::LLVMPointerType(int8, 0) 271 | } 272 | } 273 | 274 | /// Create a Structure Type 275 | /// 276 | /// Given a set of fields create a structure type with fields 277 | /// layed out in that order. 278 | pub fn struct_type(&self, mut fields: Vec) -> LLVMTypeRef { 279 | let len = fields.len(); 280 | unsafe { 281 | core::LLVMStructTypeInContext(self.as_raw(), fields.as_mut_ptr(), len as c_uint, 0) 282 | } 283 | } 284 | 285 | /// Create an Array Type 286 | /// 287 | /// Returns a type which represents a contiguous array of the 288 | /// inner type. 289 | pub fn array_type(&self, inner: LLVMTypeRef, size: usize) -> LLVMTypeRef { 290 | unsafe { core::LLVMArrayType(inner, size as c_uint) } 291 | } 292 | 293 | /// Create a Pointer Type 294 | /// 295 | /// Wraps a given type to creat a poitner to it. 296 | pub fn pointer_type(&self, inner: LLVMTypeRef) -> LLVMTypeRef { 297 | unsafe { core::LLVMPointerType(inner, 0) } 298 | } 299 | 300 | /// Get the Void Type 301 | pub fn void_type(&self) -> LLVMTypeRef { 302 | unsafe { core::LLVMVoidTypeInContext(self.0) } 303 | } 304 | 305 | /// Get the LLVM Type from a Value 306 | /// 307 | /// Inspects a given LLVM Value and returns the type as known by 308 | /// LLVM. This is basically jsut an `LLVMTypeOf` call. 309 | pub fn get_type(&self, value: LLVMValueRef) -> LLVMTypeRef { 310 | unsafe { core::LLVMTypeOf(value) } 311 | } 312 | } 313 | 314 | impl Drop for Context { 315 | fn drop(&mut self) { 316 | unsafe { 317 | core::LLVMContextDispose(self.as_raw()); 318 | } 319 | } 320 | } 321 | 322 | impl Default for Context { 323 | fn default() -> Self { 324 | Context::new() 325 | } 326 | } 327 | -------------------------------------------------------------------------------- /src/low_loader/function.rs: -------------------------------------------------------------------------------- 1 | //! LLVM Function Wrapper 2 | //! 3 | //! A wrapper around an LLVM function. 4 | 5 | use super::llvm_sys::analysis; 6 | use super::llvm_sys::core; 7 | use super::llvm_sys::prelude::*; 8 | use super::llvm_sys::LLVMCallConv; 9 | 10 | /// Function 11 | /// 12 | /// A single function in a given module. 13 | #[derive(Debug, PartialEq)] 14 | pub struct Function { 15 | raw: LLVMValueRef, 16 | call_conv: CallConvention, 17 | } 18 | 19 | /// Calling Contentions 20 | /// 21 | /// This is a subset of the LLVM calling contentions. 22 | #[derive(Debug, Copy, Clone, PartialEq)] 23 | pub enum CallConvention { 24 | /// THe `fastcall` calling contention 25 | Fastcall, 26 | /// The C Calling Contention 27 | CDecl, 28 | } 29 | 30 | impl From for libc::c_uint { 31 | fn from(call_convention: CallConvention) -> Self { 32 | let llvm_conv = match call_convention { 33 | CallConvention::Fastcall => LLVMCallConv::LLVMFastCallConv, 34 | CallConvention::CDecl => LLVMCallConv::LLVMCCallConv, 35 | }; 36 | llvm_conv as libc::c_uint 37 | } 38 | } 39 | 40 | impl From for CallConvention { 41 | fn from(llvm_conv: libc::c_uint) -> Self { 42 | if llvm_conv == LLVMCallConv::LLVMFastCallConv as libc::c_uint { 43 | CallConvention::Fastcall 44 | } else { 45 | CallConvention::CDecl 46 | } 47 | } 48 | } 49 | 50 | impl Function { 51 | /// Wrap an Existing Funciton 52 | /// 53 | /// Takes ownership of the given function and provides more 54 | /// stronlgy typed access to it. 55 | /// 56 | /// # Safety 57 | /// 58 | /// The value should be a function. This is not checked. 59 | pub unsafe fn from_raw(raw: LLVMValueRef) -> Self { 60 | Function { 61 | raw, 62 | call_conv: core::LLVMGetFunctionCallConv(raw).into(), 63 | } 64 | } 65 | 66 | /// Verify the Function 67 | /// 68 | /// Makes LLVM check the funciton is valid. If the function is not 69 | /// valid we will panic to signal the error. This is intended for 70 | /// debugging the compiler's output. 71 | pub fn verify_or_panic(&self) { 72 | use self::analysis::*; 73 | let verified = unsafe { 74 | LLVMVerifyFunction(self.raw, LLVMVerifierFailureAction::LLVMPrintMessageAction) 75 | }; 76 | 77 | if verified == 1 { 78 | unsafe { 79 | core::LLVMDumpValue(self.raw); 80 | } 81 | panic!("Function failed verification!") 82 | } 83 | } 84 | 85 | /// Get a Function's Parameter 86 | /// 87 | /// Returns a value that can be used to access the `n`th function 88 | /// argument. 89 | pub fn get_param(&self, n: u32) -> LLVMValueRef { 90 | unsafe { core::LLVMGetParam(self.as_raw(), n) } 91 | } 92 | 93 | /// Raw Borrow 94 | /// 95 | /// # Safety 96 | /// 97 | /// This method returns a raw pointer to the underlying 98 | /// LLVMValue. It's up to you to make sure it doesn't outlive the 99 | /// `Function`, and to make sure you don't break any of LLVMs 100 | /// thread safety requirements. 101 | pub unsafe fn as_raw(&self) -> LLVMValueRef { 102 | self.raw 103 | } 104 | 105 | /// Get the Function Calling Convention 106 | /// 107 | /// Returns the calling convention that is set for the current 108 | /// function. This defaults to CDecl if no convention is set. 109 | pub fn call_conv(&self) -> CallConvention { 110 | self.call_conv 111 | } 112 | 113 | /// Set the Function's Calling Convention 114 | /// 115 | /// Updates the calling convention for the function 116 | /// delcaration. We use fastcall for our calling convention and 117 | /// cdecl for c interop. 118 | pub fn set_calling_convention(&mut self, call_convention: CallConvention) { 119 | self.call_conv = call_convention; 120 | unsafe { 121 | core::LLVMSetFunctionCallConv(self.raw, call_convention.into()); 122 | } 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/low_loader/module.rs: -------------------------------------------------------------------------------- 1 | //! LLVM Module Wrapper 2 | //! 3 | //! Contains types and wrappers for dealing with LLVM Modules. 4 | 5 | use super::function::Function; 6 | use super::llvm_sys::prelude::*; 7 | use super::llvm_sys::target_machine; 8 | use super::llvm_sys::{analysis, bit_writer, core}; 9 | use super::pass_manager::{OptLevel, OptSize, PassManagerBuilder}; 10 | use super::targets::Target; 11 | 12 | use std::ffi::{CStr, CString}; 13 | use std::path::Path; 14 | use std::ptr; 15 | 16 | /// Module 17 | /// 18 | /// A module repsents a single code unit. It maps down to a library or 19 | /// executable when compiled by LLVM. This type provides a safe 20 | /// abstraction around the raw `LLVMModule` type. 21 | #[derive(Debug, PartialEq)] 22 | pub struct Module { 23 | raw: LLVMModuleRef, 24 | } 25 | 26 | /// The kind of output file to write 27 | /// 28 | /// Used when writing modules to disk. 29 | #[derive(Debug, PartialEq)] 30 | pub enum OutputFileKind { 31 | /// LLVM IL files 32 | LLVMIl, 33 | /// LLVM Bitcode file 34 | Bitcode, 35 | /// Native executable object files 36 | NativeObject, 37 | } 38 | 39 | impl Module { 40 | /// Module from Raw 41 | /// 42 | /// Creates a new module from a raw module reference. This takes 43 | /// ownership of the given module. When the returned Module 44 | /// instance goes out of scope the module will be disposed. 45 | /// 46 | /// *Note*: You shouldn't need to use this directly, instead modules 47 | /// can be created with `Context::add_module`. 48 | pub fn from_raw(mod_ref: LLVMModuleRef) -> Self { 49 | Module { raw: mod_ref } 50 | } 51 | 52 | /// Set the Modules's Target 53 | /// 54 | /// Defines which targe the module is being comiled for. This can 55 | /// enable target-specific optimisations in the compilation of 56 | /// this module. 57 | pub fn set_target(&mut self, target: &Target) { 58 | let triple = CString::new(target.norm_triple()).unwrap(); 59 | unsafe { 60 | core::LLVMSetTarget(self.as_raw(), triple.as_ptr()); 61 | } 62 | } 63 | 64 | /// Dump the Module 65 | /// 66 | /// Writes a representation of the module to standard output. This 67 | /// is intended to be used as an aid to debugging. 68 | pub fn dump(&self) { 69 | unsafe { core::LLVMDumpModule(self.raw) } 70 | } 71 | 72 | /// Verify the Module 73 | /// 74 | /// Checks that the whole module is valid before continuing 75 | pub fn verify_or_panic(&self) { 76 | let verified = unsafe { 77 | analysis::LLVMVerifyModule( 78 | self.as_raw(), 79 | analysis::LLVMVerifierFailureAction::LLVMPrintMessageAction, 80 | ptr::null_mut(), 81 | ) 82 | }; 83 | if verified != 0 { 84 | panic!("Module failed validation"); 85 | } 86 | } 87 | 88 | /// Run the Optimisation Passes over the Module 89 | /// 90 | /// Given a target optimisation level transform the module to 91 | /// improve exectuion speed. 92 | /// 93 | /// # Parameters 94 | /// * `level` - the optimisation level to target. 95 | /// * `size` - Enum to control size optimisation. 96 | pub fn run_optimiser(&mut self, level: OptLevel, size: OptSize) { 97 | let pass_manager = PassManagerBuilder::new() 98 | .with_opt_level(level) 99 | .with_opt_size(size) 100 | .create_module_pass_manager(); 101 | 102 | pass_manager.run(self); 103 | } 104 | 105 | /// Write the Module to the Given File as LLVM IR or Bitcode 106 | /// 107 | /// The kind of file written depends on `kind`. 108 | pub fn write_to_file( 109 | &self, 110 | target: &Target, 111 | path: &Path, 112 | kind: OutputFileKind, 113 | ) -> Result<(), String> { 114 | let path = path.to_str().and_then(|s| CString::new(s).ok()).unwrap(); 115 | 116 | unsafe { 117 | let mut message = ptr::null_mut(); 118 | let r = match kind { 119 | OutputFileKind::LLVMIl => { 120 | core::LLVMPrintModuleToFile(self.raw, path.as_ptr(), &mut message) 121 | } 122 | OutputFileKind::Bitcode => { 123 | bit_writer::LLVMWriteBitcodeToFile(self.raw, path.as_ptr()) 124 | } 125 | OutputFileKind::NativeObject => { 126 | let trip = CString::new(target.triple()).unwrap(); 127 | // To emit code we need to do a few things: 128 | // * Create an LLVM TargetMachine from our target. 129 | // * Create a pass manager 130 | // * Call targetMachine emit to file 131 | let tm = target_machine::LLVMCreateTargetMachine( 132 | target.as_llvm_target(), 133 | trip.as_ptr(), 134 | target_machine::LLVMGetHostCPUName(), 135 | target_machine::LLVMGetHostCPUFeatures(), 136 | target_machine::LLVMCodeGenOptLevel::LLVMCodeGenLevelDefault, 137 | target_machine::LLVMRelocMode::LLVMRelocDefault, 138 | target_machine::LLVMCodeModel::LLVMCodeModelSmall, 139 | ); 140 | let r = target_machine::LLVMTargetMachineEmitToFile( 141 | tm, 142 | self.as_raw(), 143 | path.as_ptr() as *mut _, 144 | target_machine::LLVMCodeGenFileType::LLVMObjectFile, 145 | &mut message, 146 | ); 147 | target_machine::LLVMDisposeTargetMachine(tm); 148 | r 149 | } 150 | }; 151 | if r == 0 { 152 | Ok(()) 153 | } else { 154 | let err_str = CStr::from_ptr(message).to_string_lossy().into(); 155 | core::LLVMDisposeMessage(message); 156 | Err(err_str) 157 | } 158 | } 159 | } 160 | 161 | /// Find a Function by Name 162 | pub fn find_function(&self, name: &str) -> Option { 163 | let function_name = CString::new(name).unwrap(); 164 | unsafe { 165 | let found = core::LLVMGetNamedFunction(self.as_raw(), function_name.as_ptr()); 166 | if found.is_null() { 167 | None 168 | } else { 169 | Some(Function::from_raw(found)) 170 | } 171 | } 172 | } 173 | 174 | /// Add a Global Variable 175 | pub fn add_global(&mut self, initialiser: LLVMValueRef, name: &str) -> LLVMValueRef { 176 | let global_name = CString::new(name).unwrap(); 177 | unsafe { 178 | let global = core::LLVMAddGlobal( 179 | self.as_raw(), 180 | core::LLVMTypeOf(initialiser), 181 | global_name.as_ptr(), 182 | ); 183 | core::LLVMSetInitializer(global, initialiser); 184 | global 185 | } 186 | } 187 | 188 | /// Find a Global Variable in the Module by Name 189 | /// 190 | /// Looks up a given global variale in the module and returns 191 | /// it. If the variable doesn't exist in the module then `None` is 192 | /// returned. 193 | pub fn find_global(&self, name: &str) -> Option { 194 | let global_name = CString::new(name).unwrap(); 195 | unsafe { 196 | let found = core::LLVMGetNamedGlobal(self.as_raw(), global_name.as_ptr()); 197 | if found.is_null() { 198 | None 199 | } else { 200 | Some(found) 201 | } 202 | } 203 | } 204 | 205 | /// Raw Borrow 206 | /// 207 | /// # Safety 208 | /// 209 | /// This method returns a raw pointer to the underlying 210 | /// LLVMModule. It's up to you to make sure it doesn't outlive the 211 | /// `Module`, and to make sure you don't break any of LLVMs thread 212 | /// safety requirements. 213 | pub unsafe fn as_raw(&self) -> LLVMModuleRef { 214 | self.raw 215 | } 216 | } 217 | 218 | impl Drop for Module { 219 | fn drop(&mut self) { 220 | unsafe { core::LLVMDisposeModule(self.raw) } 221 | } 222 | } 223 | 224 | impl From for LLVMModuleRef { 225 | /// Consume the wrapped module and return it's interal module 226 | /// reference. This transfers the ownership of the module back to 227 | /// the caller preventing the it from being automaticaly freed. 228 | fn from(m: Module) -> LLVMModuleRef { 229 | unsafe { 230 | // an apparently nicer alterantive to calling `forget` we 231 | // instead create a `ManuallyDrop` item and then don't 232 | // drop it here. 233 | std::mem::ManuallyDrop::new(m).as_raw() 234 | } 235 | } 236 | } 237 | -------------------------------------------------------------------------------- /src/low_loader/pass_manager.rs: -------------------------------------------------------------------------------- 1 | //! Pass Manager 2 | //! 3 | //! This module defines a safe wrapper around the `PassManager` and 4 | //! `PassManagerBuilder` LLVM C APIs. 5 | //! 6 | //! Pass managers are used to run a given set of transforms against 7 | //! either a module or function. 8 | 9 | use super::module::Module; 10 | 11 | use super::llvm_sys::core; 12 | use super::llvm_sys::prelude::*; 13 | use super::llvm_sys::transforms::pass_manager_builder as pm_builder; 14 | 15 | /// LLVM Module PassManager 16 | pub struct ModulePassManager(LLVMPassManagerRef); 17 | 18 | /// Builder API for the PassManager 19 | pub struct PassManagerBuilder(pm_builder::LLVMPassManagerBuilderRef); 20 | 21 | /// Optimisation Levels 22 | /// 23 | /// Used to control the level of optimisaton that the pass manager 24 | /// builder should target when adding passes 25 | pub enum OptLevel { 26 | /// No Speed Optimisation 27 | Off, 28 | /// Some Optimisation for Speed 29 | /// 30 | /// This corresponds to the `-O1` optimisation flag. 31 | Low, 32 | /// Medium Optimisation for Speed 33 | /// 34 | /// This corresponds to the `-O2` optimisation flag. 35 | Medium, 36 | /// Full Optimisation for Speed 37 | /// 38 | /// This corresponds to the `-O1` optimisation flag. 39 | High, 40 | } 41 | 42 | /// Size Optimisation Levels 43 | /// 44 | /// Used to control the code size that the optimisation passes 45 | /// target. This can be used to encourage the optimiser to favour 46 | /// smaller binaries rather than faster ones. 47 | pub enum OptSize { 48 | /// Size Optimisation Disabled 49 | Off, 50 | /// Normal Size Optimisations 51 | /// 52 | /// This corresponds to the `-Os` optimisation flag. 53 | Size, 54 | /// Full Size Optimisations 55 | /// 56 | /// This corresponds to the `-Oz` optimisation flag. 57 | SizeFull, 58 | } 59 | 60 | impl From for ::libc::c_uint { 61 | fn from(level: OptLevel) -> Self { 62 | use self::OptLevel::*; 63 | match level { 64 | Off => 0, 65 | Low => 1, 66 | Medium => 2, 67 | High => 3, 68 | } 69 | } 70 | } 71 | 72 | impl From for ::libc::c_uint { 73 | fn from(size: OptSize) -> Self { 74 | use self::OptSize::*; 75 | match size { 76 | Off => 0, 77 | Size => 1, 78 | SizeFull => 2, 79 | } 80 | } 81 | } 82 | 83 | impl Drop for PassManagerBuilder { 84 | fn drop(&mut self) { 85 | unsafe { 86 | pm_builder::LLVMPassManagerBuilderDispose(self.0); 87 | } 88 | } 89 | } 90 | 91 | impl Default for PassManagerBuilder { 92 | fn default() -> Self { 93 | PassManagerBuilder::new() 94 | } 95 | } 96 | 97 | impl PassManagerBuilder { 98 | /// Create a Pass Manager Builder 99 | pub fn new() -> Self { 100 | super::context::ensure_initialised(); 101 | let raw = unsafe { pm_builder::LLVMPassManagerBuilderCreate() }; 102 | PassManagerBuilder(raw) 103 | } 104 | 105 | /// Set the Optimisation Level 106 | /// 107 | /// Controls the optimisation level the pass manager bulder will 108 | /// target. 109 | pub fn with_opt_level(self, level: OptLevel) -> Self { 110 | unsafe { 111 | pm_builder::LLVMPassManagerBuilderSetOptLevel(self.0, level.into()); 112 | } 113 | self 114 | } 115 | 116 | /// Set the Size Optimisation Level 117 | /// 118 | /// Controls the optimiser's preference for smaller code size over 119 | /// speed. Traditionally setting this to anything other than `Off` 120 | /// is paired with setting the optimisation level to `Med`. 121 | pub fn with_opt_size(self, size: OptSize) -> Self { 122 | unsafe { 123 | pm_builder::LLVMPassManagerBuilderSetSizeLevel(self.0, size.into()); 124 | } 125 | self 126 | } 127 | 128 | /// Create a Moduel Pass Manager 129 | /// 130 | /// Takes the current state of the pass manager builder and 131 | /// creates a new module pass manager populated with the 132 | /// configured passes. 133 | pub fn create_module_pass_manager(&self) -> ModulePassManager { 134 | let pm = unsafe { core::LLVMCreatePassManager() }; 135 | unsafe { 136 | pm_builder::LLVMPassManagerBuilderPopulateModulePassManager(self.0, pm); 137 | } 138 | ModulePassManager(pm) 139 | } 140 | } 141 | 142 | impl ModulePassManager { 143 | /// Run the Pass Manager 144 | /// 145 | /// Attempt to transform the given module with the pass 146 | /// manager. Returns a boolean representing if the pass manager 147 | /// made changes to the module. 148 | pub fn run(&self, module: &mut Module) -> bool { 149 | unsafe { core::LLVMRunPassManager(self.0, module.as_raw()) != 0 } 150 | } 151 | } 152 | 153 | #[cfg(test)] 154 | mod test { 155 | 156 | use super::*; 157 | 158 | #[test] 159 | fn test_build_module_pass_manager() { 160 | let _pm = PassManagerBuilder::new().create_module_pass_manager(); 161 | } 162 | 163 | #[test] 164 | fn test_set_optimisation_settings() { 165 | let _pm = PassManagerBuilder::new() 166 | .with_opt_level(OptLevel::Medium) 167 | .with_opt_size(OptSize::SizeFull) 168 | .create_module_pass_manager(); 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /src/low_loader/targets.rs: -------------------------------------------------------------------------------- 1 | //! LLVM Targets 2 | 3 | use super::llvm_sys::core::LLVMDisposeMessage; 4 | use super::llvm_sys::target_machine::*; 5 | use std::ffi::{CStr, CString}; 6 | use std::fmt::Display; 7 | use std::{fmt, ptr}; 8 | 9 | /// Compilation Target 10 | /// 11 | /// Represents a validated target triple. Can be used to build a 12 | /// target machine to compile to. 13 | pub struct Target { 14 | llvm_target: LLVMTargetRef, 15 | triple: String, 16 | } 17 | 18 | /// Target Lookup Error 19 | /// 20 | /// Returned if a target couldn't be resolved from the given triple. 21 | #[derive(Debug)] 22 | pub struct TargetLookupError(String); 23 | 24 | impl std::error::Error for TargetLookupError {} 25 | 26 | impl Display for TargetLookupError { 27 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 28 | write!(f, "Could not find target: '{}'", self.0) 29 | } 30 | } 31 | 32 | impl fmt::Display for Target { 33 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 34 | writeln!(f, "Target information for {}:", self.triple)?; 35 | writeln!(f, " name: {}", self.name())?; 36 | writeln!(f, " description: {}", self.description())?; 37 | writeln!(f, " normalised triple: {}", self.norm_triple())?; 38 | writeln!(f, " has asm backend: {}", self.has_asm_backend()) 39 | } 40 | } 41 | 42 | impl Default for Target { 43 | /// Get the Default (host) Target 44 | fn default() -> Self { 45 | let trip = get_default_triple(); 46 | Target::from_triple(&trip).expect("Default triple should be found") 47 | } 48 | } 49 | 50 | impl Target { 51 | /// Create a Target from a Triple 52 | /// 53 | /// Looks the given target trip up and returns the coresponding 54 | /// concrete target. If the triple isn't a valid compilation targe 55 | /// then a `TargetLookupError` is returned. 56 | pub fn from_triple(triple: &str) -> Result { 57 | super::context::ensure_initialised(); 58 | 59 | let ffi_trip = CString::new(triple).unwrap(); 60 | 61 | let mut target: LLVMTargetRef = ptr::null_mut(); 62 | let mut error: *mut libc::c_char = ptr::null_mut(); 63 | let r = unsafe { 64 | LLVMGetTargetFromTriple( 65 | ffi_trip.as_ptr(), 66 | &mut target as *mut LLVMTargetRef, 67 | &mut error as *mut *mut libc::c_char, 68 | ) 69 | }; 70 | if r != 0 { 71 | let err = unsafe { CStr::from_ptr(error) }; 72 | let err = err.to_string_lossy().to_string(); 73 | unsafe { LLVMDisposeMessage(error) }; 74 | return Err(TargetLookupError(err)); 75 | } 76 | Ok(Target { 77 | llvm_target: target, 78 | triple: triple.to_owned(), 79 | }) 80 | } 81 | 82 | /// Get the underlying LLVM target reference from the target 83 | /// 84 | /// # Safety 85 | /// 86 | /// The lifetime of the returned reference is tied to the lifetime of 87 | /// the `Target`. It should not be used outside that scope. 88 | pub unsafe fn as_llvm_target(&self) -> LLVMTargetRef { 89 | self.llvm_target 90 | } 91 | 92 | /// Get the Target name 93 | /// 94 | /// Retrieves the logical name for this target 95 | pub fn name(&self) -> &str { 96 | let name = unsafe { CStr::from_ptr(LLVMGetTargetName(self.llvm_target)) }; 97 | name.to_str().expect("Target name should be valid unicode") 98 | } 99 | 100 | /// Get the Target Descirption 101 | /// 102 | /// Retrieves the short description of the target 103 | pub fn description(&self) -> &str { 104 | let desc = unsafe { CStr::from_ptr(LLVMGetTargetDescription(self.llvm_target)) }; 105 | desc.to_str() 106 | .expect("Target description should be valid unicode") 107 | } 108 | 109 | /// Get the Target Triple 110 | /// 111 | /// Retrieves the triple that was used to create this target. 112 | pub fn triple(&self) -> &str { 113 | &self.triple[..] 114 | } 115 | 116 | /// Get the Normalised Target Triple 117 | /// 118 | /// This is the canonical version of the target triple 119 | pub fn norm_triple(&self) -> &str { 120 | let ffi_trip = CString::new(&self.triple[..]).unwrap(); 121 | let norm_trip = unsafe { CStr::from_ptr(LLVMNormalizeTargetTriple(ffi_trip.as_ptr())) }; 122 | norm_trip 123 | .to_str() 124 | .expect("normalised triple should be valid unicode") 125 | } 126 | 127 | /// Does the Target have an ASM Backend 128 | pub fn has_asm_backend(&self) -> bool { 129 | let has_backend = unsafe { LLVMTargetHasAsmBackend(self.llvm_target) }; 130 | has_backend != 0 131 | } 132 | } 133 | 134 | /// Get the Default Target Triple 135 | /// 136 | /// Returns the triple for the native target. To be used as a fallback 137 | /// if no triple is specified by the user. 138 | pub fn get_default_triple() -> String { 139 | let native_target = unsafe { CStr::from_ptr(LLVMGetDefaultTargetTriple()) }; 140 | native_target.to_string_lossy().to_owned().to_string() 141 | } 142 | 143 | /// Dump Available LLVM Targets 144 | /// 145 | /// Prints a list of LLVM target triple available in this build. 146 | pub fn dump_targets() { 147 | super::context::ensure_initialised(); 148 | 149 | let native_target = unsafe { LLVMGetDefaultTargetTriple() }; 150 | 151 | println!("default triple: {:?}", unsafe { 152 | CStr::from_ptr(native_target) 153 | }); 154 | println!(); 155 | println!("supported targets:"); 156 | let mut target = unsafe { LLVMGetFirstTarget() }; 157 | while !target.is_null() { 158 | let name = unsafe { CStr::from_ptr(LLVMGetTargetName(target)) }; 159 | let desc = unsafe { CStr::from_ptr(LLVMGetTargetDescription(target)) }; 160 | println!(" * {} ({})", name.to_str().unwrap(), desc.to_str().unwrap()); 161 | target = unsafe { LLVMGetNextTarget(target) }; 162 | } 163 | } 164 | 165 | #[cfg(test)] 166 | mod test { 167 | 168 | use super::*; 169 | 170 | #[test] 171 | fn create_default_triple() { 172 | let default: Target = Default::default(); 173 | assert_eq!(get_default_triple(), default.triple()); 174 | assert_eq!(default.triple(), default.norm_triple()); 175 | } 176 | 177 | #[test] 178 | fn create_invalid_triple() { 179 | let r = Target::from_triple("im-not-valid"); 180 | assert!(r.is_err()); 181 | } 182 | 183 | #[test] 184 | fn create_known_triple() { 185 | let target = Target::from_triple("aarch64-linux-gnu").unwrap(); 186 | assert_eq!("aarch64", target.name()); 187 | assert_eq!("AArch64 (little endian)", target.description()); 188 | assert_eq!(true, target.has_asm_backend()); 189 | assert_eq!("aarch64-linux-gnu", target.triple()); 190 | assert_eq!("aarch64-unknown-linux-gnu", target.norm_triple()); 191 | } 192 | 193 | #[test] 194 | fn create_long_triple() { 195 | let target = Target::from_triple("arm-eabi-linux-gnu-elf").unwrap(); 196 | assert_eq!("arm", target.name()); 197 | assert_eq!("ARM", target.description()); 198 | assert_eq!(true, target.has_asm_backend()); 199 | assert_eq!("arm-eabi-linux-gnu-elf", target.triple()); 200 | assert_eq!("arm-eabi-linux-gnu-elf", target.norm_triple()); 201 | } 202 | } 203 | -------------------------------------------------------------------------------- /src/low_loader/types.rs: -------------------------------------------------------------------------------- 1 | //! LLVM Types 2 | 3 | use super::llvm_sys::prelude::*; 4 | use super::llvm_sys::{core, LLVMTypeKind}; 5 | 6 | /// Floating Point Sizes 7 | /// 8 | /// LLVM floating point types can only be one of a given set of 9 | /// sizes. Some of these sizes are architecture-specific. 10 | #[allow(non_camel_case_types)] 11 | pub enum FloatWidth { 12 | /// Hafl-width float (16 bits) 13 | Half, 14 | /// Standard float (32 bits) 15 | Float, 16 | /// Double-precision float (64 bits) 17 | Double, 18 | /// X86 / 8087 'extended precision' float (80 bits) 19 | X86_FP80, 20 | /// 128 bit float (112 bit mantissa) 21 | FP128, 22 | /// PowerPC Specific 128 bit float 23 | PPC_FP128, 24 | } 25 | 26 | /// Type Enumeration 27 | /// 28 | /// Represents the different types possible for a given value. 29 | pub enum Type { 30 | /// The void type represents the absence of a value and has no 31 | /// size. Void can't be used as the target type of a pointer. 32 | Void, 33 | 34 | /// Floating point number of a given width. 35 | Float(FloatWidth), 36 | 37 | /// A code label. 38 | Label, 39 | 40 | /// An arbitrary-sized integer type. 41 | Int(usize), 42 | 43 | /// A callable funciton. 44 | Function, 45 | 46 | /// A structure type. 47 | /// 48 | /// Represents a collection of values together in memory. 49 | Struct, 50 | 51 | /// An n-dimensional array type. 52 | Array, 53 | 54 | /// A memory location. 55 | Pointer, 56 | 57 | /// A 'vector' type 58 | /// 59 | /// Used to represent a collection of primitives which can be 60 | /// processed in parallel. 61 | Vector, 62 | 63 | /// A metadata value 64 | Metadata, 65 | 66 | /// An X86 MMX Value 67 | MMX, 68 | 69 | /// A value which can't be inspected 70 | Token, 71 | } 72 | 73 | impl From for Type { 74 | fn from(llvm_type: LLVMTypeRef) -> Self { 75 | use self::LLVMTypeKind::*; 76 | match unsafe { core::LLVMGetTypeKind(llvm_type) } { 77 | LLVMVoidTypeKind => Type::Void, 78 | LLVMHalfTypeKind => Type::Float(FloatWidth::Half), 79 | LLVMFloatTypeKind => Type::Float(FloatWidth::Half), 80 | LLVMDoubleTypeKind => Type::Float(FloatWidth::Double), 81 | LLVMX86_FP80TypeKind => Type::Float(FloatWidth::X86_FP80), 82 | LLVMFP128TypeKind => Type::Float(FloatWidth::FP128), 83 | LLVMPPC_FP128TypeKind => Type::Float(FloatWidth::PPC_FP128), 84 | LLVMLabelTypeKind => Type::Label, 85 | LLVMIntegerTypeKind => { 86 | Type::Int(unsafe { core::LLVMGetIntTypeWidth(llvm_type) } as usize) 87 | } 88 | LLVMFunctionTypeKind => Type::Function, 89 | LLVMStructTypeKind => Type::Struct, 90 | LLVMArrayTypeKind => Type::Array, 91 | LLVMPointerTypeKind => Type::Pointer, 92 | LLVMVectorTypeKind => Type::Vector, 93 | LLVMMetadataTypeKind => Type::Metadata, 94 | LLVMX86_MMXTypeKind => Type::MMX, 95 | LLVMTokenTypeKind => Type::Token, 96 | // Other types are not yet supported. 97 | _ => unimplemented!(), 98 | } 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/low_loader/value.rs: -------------------------------------------------------------------------------- 1 | //! LLVM Value Wrappers 2 | //! 3 | //! This module contains wrapping code and types for the LLVM 4 | //! Value. The intention is to prevent exposing raw LLVM types from 5 | //! `low_loader`. 6 | 7 | use super::llvm_sys::prelude::LLVMValueRef; 8 | 9 | /// Wrapped Value Reference 10 | #[derive(Debug, PartialEq)] 11 | pub struct Value(LLVMValueRef); 12 | 13 | // Allow conversion from our wrapped type to the underlying LLVM 14 | // one. This is intended more as an escape hatch while converting code 15 | // to use the new safe wrappers rather than as a permanent solution. 16 | impl From for LLVMValueRef { 17 | /// From Value 18 | /// 19 | /// Convert a wrapped value into a raw LLVM value reference. 20 | fn from(v: Value) -> Self { 21 | let Value(inner) = v; 22 | inner 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | //! Expression tree parsing using Top-Down Operator Precedence 2 | //! parsing. 3 | 4 | #![warn(missing_docs)] 5 | 6 | pub mod compile; 7 | pub mod diag; 8 | pub mod low_loader; 9 | pub mod meta; 10 | pub mod sem; 11 | pub mod syntax; 12 | 13 | use crate::compile::*; 14 | use crate::low_loader::targets; 15 | use crate::syntax::text::DUMMY_SPAN; 16 | use crate::syntax::*; 17 | use docopt::Docopt; 18 | use serde::{Deserialize, Deserializer}; 19 | use std::fmt; 20 | use std::path::Path; 21 | use std::process::*; 22 | 23 | /// Usage Information 24 | /// 25 | /// This is a [Docopt] compliant usage description of this program. 26 | /// 27 | /// [Docopt]: http://docopt.org/ 28 | const USAGE: &str = " 29 | Ullage Compiler 30 | 31 | Usage: 32 | ullage [--version --help] 33 | ullage [options] [-o ] [] 34 | 35 | Options: 36 | -h, --help Show this message. 37 | --version Show version. 38 | -O, --optimise= Set the compilation optimisation level. 39 | 0 = off, 1 = low, 2 = medium, 3 = high, s = size. 40 | -o, --output= Write the output to . 41 | --link-kind= Set the link type to perform. 42 | --target= Set the compilation target triple. 43 | --link-mode= Set the type of intermediate assets to produce 44 | for linking. Use `llvmIr`, or `llvmBc`. 45 | --dumpir Dump the LLVM IR for the module. 46 | --dumpast Dump the syntax tree to stdout and exit. 47 | --prettytree Dump a prettified summary of the syntax tree. 48 | --dumptargets Dump the available targets and exit. 49 | --dumptargetinfo Dump information about the given triple. 50 | "; 51 | 52 | /// Program Arguments 53 | /// 54 | /// Structure to capture the command line arguments for the 55 | /// program. This is filled in for us by Docopt. 56 | #[derive(Debug, Deserialize)] 57 | struct Args { 58 | flag_output: Option, 59 | flag_optimise: Option, 60 | flag_target: Option, 61 | flag_link_mode: Option, 62 | arg_file: Option, 63 | 64 | // TODO: maybe move these dump options into a single flag? 65 | flag_dumpast: bool, 66 | flag_prettytree: bool, 67 | flag_dumpir: bool, 68 | flag_dumptargets: bool, 69 | flag_dumptargetinfo: bool, 70 | } 71 | 72 | /// Optimisation Level 73 | /// 74 | /// Used to hold the requested optimisation level 75 | #[derive(Debug)] 76 | enum OptFlag { 77 | /// numeric optimisation level 78 | Numeric(u64), 79 | /// size optimisation 80 | Size, 81 | } 82 | 83 | /// Liker Mode 84 | /// 85 | /// Chose the type of intermediate assets to produce when 86 | /// performnig the link. 87 | #[derive(Debug, Deserialize)] 88 | enum LinkMode { 89 | /// Intermediate langauge files 90 | #[serde(rename = "il")] 91 | LlvmIr, 92 | /// Bitcode files 93 | #[serde(rename = "bc")] 94 | LlvmBc, 95 | /// Native Objects 96 | #[serde(rename = "obj")] 97 | Objects, 98 | } 99 | 100 | /// Custom Deserialiser for Optimisation Flags 101 | /// 102 | /// This deserialiser will handle both numeric values and 's' or 103 | /// 'size'. Numbers greater than 3 are accepted, and transformed into 104 | /// range when converting to the stronger `OptimisationLevel` type. 105 | impl<'de> Deserialize<'de> for OptFlag { 106 | fn deserialize(d: D) -> Result 107 | where 108 | D: Deserializer<'de>, 109 | { 110 | struct OptFlagVisitor; 111 | impl<'de> serde::de::Visitor<'de> for OptFlagVisitor { 112 | type Value = OptFlag; 113 | 114 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 115 | formatter.write_str("a number from range 0..3, or s for size") 116 | } 117 | 118 | fn visit_u64(self, n: u64) -> Result 119 | where 120 | E: serde::de::Error, 121 | { 122 | Ok(OptFlag::Numeric(n)) 123 | } 124 | 125 | fn visit_str(self, value: &str) -> Result 126 | where 127 | E: serde::de::Error, 128 | { 129 | match value { 130 | "size" | "s" => Ok(OptFlag::Size), 131 | s => s.parse::().map(OptFlag::Numeric).map_err(|_| { 132 | let err = format!("Could not deserialize '{}' as optimisation flag", value); 133 | E::custom(err) 134 | }), 135 | } 136 | } 137 | } 138 | 139 | d.deserialize_identifier(OptFlagVisitor) 140 | } 141 | } 142 | 143 | impl From for OptimisationLevel { 144 | fn from(flag: OptFlag) -> Self { 145 | match flag { 146 | OptFlag::Numeric(level) => match level { 147 | 0 => OptimisationLevel::Off, 148 | 1 => OptimisationLevel::Low, 149 | 2 => OptimisationLevel::Med, 150 | _ => OptimisationLevel::High, 151 | }, 152 | OptFlag::Size => OptimisationLevel::Size, 153 | } 154 | } 155 | } 156 | 157 | impl From for linker::Linker { 158 | fn from(mode: LinkMode) -> Self { 159 | linker::Linker::new( 160 | linker::LinkerCommand::default(), 161 | match mode { 162 | LinkMode::LlvmIr => linker::LinkerAssetType::LlvmIr, 163 | LinkMode::LlvmBc => linker::LinkerAssetType::LlvmBc, 164 | LinkMode::Objects => linker::LinkerAssetType::Object, 165 | }, 166 | ) 167 | } 168 | } 169 | 170 | /// Main 171 | /// 172 | /// The main function for `ullage`. Parses the options and runs the 173 | /// selected command. 174 | fn main() { 175 | let args: Args = Docopt::new(USAGE) 176 | .and_then(|d| { 177 | d.help(true) 178 | .version(Some(meta::descriptive_version())) 179 | .deserialize() 180 | }) 181 | .unwrap_or_else(|e| e.exit()); 182 | 183 | if args.flag_dumptargets { 184 | targets::dump_targets(); 185 | if args.arg_file.is_none() { 186 | exit(0); 187 | } 188 | } 189 | 190 | let triple = args.flag_target.unwrap_or_else(targets::get_default_triple); 191 | let target = targets::Target::from_triple(&triple).unwrap_or_else(|e| { 192 | eprintln!("error: could not create target: {}", e); 193 | exit(1); 194 | }); 195 | 196 | if args.flag_dumptargetinfo { 197 | println!("{}", target); 198 | if args.arg_file.is_none() { 199 | exit(0); 200 | } 201 | } 202 | 203 | let output_path = &args.flag_output.unwrap_or_else(|| "a.out".to_string()); 204 | let output_path = Path::new(&output_path); 205 | 206 | // Load the file into memory, so we can parse it into a syntax tree 207 | let source = if let Some(path) = args.arg_file { 208 | text::SourceText::from_path(Path::new(&path)) 209 | } else { 210 | text::SourceText::from_stdin() 211 | }; 212 | let source = source.unwrap_or_else(|e| { 213 | eprintln!("error: could not read input: {}", e); 214 | exit(1) 215 | }); 216 | 217 | // Parse the module 218 | let tree = syntax::SyntaxTree::parse(&source); 219 | if tree.has_diagnostics() { 220 | eprintln!("error: could not parse source: one or more errors:"); 221 | dump_diagnostics(&source, tree.diagnostics()); 222 | exit(1) 223 | }; 224 | 225 | // Are we just dumping the AST or compiling the whole thing? 226 | if args.flag_dumpast { 227 | println!("parsed AST: {:#?}", tree.root()); 228 | exit(0); 229 | } 230 | if args.flag_prettytree { 231 | tree.write_to(&mut std::io::stdout()).unwrap(); 232 | exit(0); 233 | } 234 | 235 | let mut options = CompilationOptions::default() 236 | .with_dump_ir(args.flag_dumpir) 237 | .with_opt_level( 238 | args.flag_optimise 239 | .map_or(OptimisationLevel::Off, |o| o.into()), 240 | ); 241 | if let Some(link_mode) = args.flag_link_mode { 242 | let linker = linker::Linker::from(link_mode); 243 | options = options.with_linker(linker); 244 | } 245 | 246 | let comp = match Compilation::new(tree, options) { 247 | Ok(c) => c, 248 | Err(e) => handle_comp_err(&e), 249 | }; 250 | 251 | if comp.has_diagnostics() { 252 | dump_diagnostics(&source, comp.diagnostics()); 253 | let diag_count = comp.diagnostics().len(); 254 | eprintln!("error: compilation failed with {} errors", diag_count); 255 | exit(1); 256 | } 257 | 258 | // Create a compilation, and emit to the output path 259 | let emit_result = comp.emit(&target, &output_path); 260 | 261 | // Print any failures encountered and return a failure status 262 | if let Err(e) = emit_result { 263 | handle_comp_err(&e); 264 | } 265 | } 266 | 267 | /// Write Dignostics to STDERR 268 | /// 269 | fn dump_diagnostics(source: &text::SourceText, diagnostics: &[diag::Diagnostic]) { 270 | for error in diagnostics.iter() { 271 | if error.span == DUMMY_SPAN { 272 | eprintln!("{}:error: {}", source.name(), error.message); 273 | } else { 274 | let pos = source.line_pos(error.span.start()); 275 | eprintln!( 276 | "{}:{}:{}:error: {}", 277 | source.name(), 278 | pos.0, 279 | pos.1, 280 | error.message 281 | ); 282 | let (s, e) = source.line_extents(error.span); 283 | eprintln!(" |"); 284 | let mut line_no = pos.0; 285 | for line in source.slice(s, e).lines() { 286 | eprintln!("{:4} | {}", line_no, line); 287 | line_no += 1; 288 | } 289 | eprintln!(""); 290 | } 291 | } 292 | } 293 | 294 | /// Handles a Compilation Error 295 | /// 296 | /// Prints the error to standard output and exits the process. 297 | fn handle_comp_err(err: &CompError) -> ! { 298 | eprintln!("error: compilation error: {}", err); 299 | exit(1); 300 | } 301 | -------------------------------------------------------------------------------- /src/meta.rs: -------------------------------------------------------------------------------- 1 | //! # Meta Module 2 | //! 3 | //! This module contains metadata about the compiler driver. It's 4 | //! basically just used to expose the version number. 5 | 6 | /// Version Number 7 | /// 8 | /// The version number of the crate (as known by Cargo) as a 9 | /// string. If the exe wasn't built by Cargo then this will be 10 | /// empty. 11 | const VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION"); 12 | 13 | /// Retrieve the Version Numer 14 | /// 15 | /// If no version number is available then 'unknown' is returned. 16 | pub fn version() -> &'static str { 17 | VERSION.unwrap_or("unknown") 18 | } 19 | 20 | /// Retrieve the descriptive version 21 | /// 22 | /// This is the program name concatenated with the version number 23 | pub fn descriptive_version() -> String { 24 | format!("ullage {}", version()) 25 | } 26 | -------------------------------------------------------------------------------- /src/sem.rs: -------------------------------------------------------------------------------- 1 | //! Semantic Analysis and Translation 2 | //! 3 | //! This module is responsible for translating the syntactic 4 | //! representation of a program, as produced by the parser, into a 5 | //! semantically rich model ready to be lowered for execution. 6 | 7 | mod binder; 8 | mod operators; 9 | mod sem_ctx; 10 | mod tree; 11 | mod types; 12 | 13 | pub use self::binder::{Binder, Scope, Symbol}; 14 | pub use self::sem_ctx::SemCtx; 15 | pub use self::tree::{Expression, ExpressionKind}; 16 | pub use self::types::{BuiltinType, Typ}; 17 | -------------------------------------------------------------------------------- /src/sem/operators.rs: -------------------------------------------------------------------------------- 1 | //! # Semantic Operators 2 | //! 3 | //! This module provides semantic undestanding of the builtin 4 | //! operators. The main entry point is the 5 | 6 | use super::types::{BuiltinType, Typ}; 7 | use crate::syntax::*; 8 | 9 | /// The Semantic Operator 10 | /// 11 | /// Semantically bound operator. This is an operator with knowlege of 12 | /// the types it is to be bound to. 13 | pub struct SemOp { 14 | pub lhs_typ: Typ, 15 | pub rhs_typ: Typ, 16 | pub op: InfixOp, 17 | pub result_typ: Typ, 18 | } 19 | 20 | fn num_op(op: InfixOp) -> Option { 21 | Some(SemOp { 22 | lhs_typ: Typ::Builtin(BuiltinType::Number), 23 | rhs_typ: Typ::Builtin(BuiltinType::Number), 24 | op, 25 | result_typ: Typ::Builtin(BuiltinType::Number), 26 | }) 27 | } 28 | 29 | fn comp_op(op: InfixOp) -> Option { 30 | Some(SemOp { 31 | lhs_typ: Typ::Builtin(BuiltinType::Number), 32 | rhs_typ: Typ::Builtin(BuiltinType::Number), 33 | op, 34 | result_typ: Typ::Builtin(BuiltinType::Bool), 35 | }) 36 | } 37 | 38 | /// Find Operator 39 | /// 40 | /// Searches for the result type for a given operator. 41 | pub fn find_builtin_op(op: InfixOp, lhs_typ: Typ, rhs_typ: Typ) -> Option { 42 | match (op, lhs_typ, rhs_typ) { 43 | (InfixOp::Add, Typ::Builtin(BuiltinType::String), Typ::Builtin(BuiltinType::String)) => { 44 | Some(SemOp { 45 | lhs_typ, 46 | rhs_typ, 47 | op, 48 | result_typ: Typ::Builtin(BuiltinType::String), 49 | }) 50 | } 51 | (InfixOp::Add, Typ::Builtin(BuiltinType::Number), Typ::Builtin(BuiltinType::Number)) => { 52 | num_op(op) 53 | } 54 | (InfixOp::Sub, _, _) | (InfixOp::Mul, _, _) | (InfixOp::Div, _, _) => num_op(op), 55 | 56 | (InfixOp::Eq, _, _) 57 | | (InfixOp::NotEq, _, _) 58 | | (InfixOp::Lt, _, _) 59 | | (InfixOp::LtEq, _, _) 60 | | (InfixOp::Gt, _, _) 61 | | (InfixOp::GtEq, _, _) => comp_op(op), 62 | 63 | _ => None, 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/sem/sem_ctx.rs: -------------------------------------------------------------------------------- 1 | //! Semantic Context 2 | //! 3 | //! This module defines the state that is passes while transforming 4 | //! expressions from AST representation to semantic. 5 | 6 | use super::types::{BuiltinType, Typ}; 7 | use crate::diag::Diagnostic; 8 | use crate::syntax::text::{Ident, SourceText}; 9 | use crate::syntax::tree::TokenKind; 10 | use crate::syntax::TypeRef; 11 | use std::collections::HashMap; 12 | 13 | /// SemCtx Structure 14 | /// 15 | /// Holds the context when trasnforming. This is basically the current 16 | /// type state information along with symbol table. 17 | pub struct SemCtx<'a> { 18 | /// Symbol Table for Local Variables 19 | locals: Vec>, 20 | /// Named types map 21 | named_types: HashMap, 22 | /// The source text 23 | source: &'a SourceText, 24 | /// A collection of diagnostics emitted when transforming 25 | diagnostics: Vec, 26 | } 27 | 28 | impl<'a> SemCtx<'a> { 29 | /// Create a new Semantic Context 30 | pub fn new(source: &'a SourceText) -> Self { 31 | SemCtx { 32 | locals: vec![HashMap::new()], 33 | named_types: [ 34 | (source.intern("String"), Typ::Builtin(BuiltinType::String)), 35 | (source.intern("Bool"), Typ::Builtin(BuiltinType::Bool)), 36 | (source.intern("Number"), Typ::Builtin(BuiltinType::Number)), 37 | ] 38 | .iter() 39 | .cloned() 40 | .collect(), 41 | source, 42 | diagnostics: Vec::new(), 43 | } 44 | } 45 | 46 | /// Find Type in Context 47 | /// 48 | /// Returns the `sem::Typ` declaration for the type if one is 49 | /// available. 50 | pub fn sem_ty(&self, ast_ty: &TypeRef) -> Option { 51 | Some(match *ast_ty { 52 | TypeRef::Unit(..) => Typ::Unit, 53 | TypeRef::Simple(ref name) => { 54 | let id = match name.kind { 55 | TokenKind::Word(id) => id, 56 | _ => panic!("Expected word token"), 57 | }; 58 | return self.named_types.get(&id).cloned(); 59 | } 60 | // TODO: array and tuple types 61 | TypeRef::Array(..) => unimplemented!("array types are not yet supported"), 62 | TypeRef::Tuple(..) => unimplemented!("tuple types are not yet supported"), 63 | TypeRef::Missing => panic!("Can't lower missing type"), 64 | }) 65 | } 66 | 67 | /// Add Local 68 | /// 69 | /// Inserts a local declaration into the locals map. 70 | pub fn add_local(&mut self, id: Ident, typ: Typ) { 71 | self.locals[0].insert(id, typ); 72 | } 73 | 74 | /// Find a Local Declaration 75 | pub fn find_local(&self, id: Ident) -> Option { 76 | self.locals[0].get(&id).cloned() 77 | } 78 | 79 | /// Push Scope 80 | /// 81 | /// Add a new empty scope to the top of the scope stack 82 | pub fn push_scope(&mut self) { 83 | self.locals.push(HashMap::new()) 84 | } 85 | 86 | /// Pop Scope 87 | /// 88 | /// Remove and discard the top scope from the stack 89 | pub fn pop_scope(&mut self) { 90 | self.locals.pop(); 91 | } 92 | 93 | /// Borrow the Source 94 | pub fn source(&self) -> &SourceText { 95 | &self.source 96 | } 97 | 98 | /// Emit a diagnostic into the context 99 | pub fn emit(&mut self, diagnostic: Diagnostic) { 100 | self.diagnostics.push(diagnostic); 101 | } 102 | 103 | /// Check if there are any diagnostics in the translation session 104 | pub fn has_diagnostics(&self) -> bool { 105 | !self.diagnostics.is_empty() 106 | } 107 | 108 | /// Get the diagnostics 109 | pub fn into_diagnostics(self) -> Vec { 110 | self.diagnostics 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/sem/tree.rs: -------------------------------------------------------------------------------- 1 | //! Semantic Tree 2 | //! 3 | //! This module contains the types used to construct the 4 | //! decorated/semantic expression tree. 5 | 6 | use super::types::*; 7 | use crate::syntax::{Constant, InfixOp, PrefixOp}; 8 | 9 | /// A Function Decclaration 10 | /// 11 | /// Represents the context contained in the semantic tree for a 12 | /// function declaration. Defined as a struct for convenience. 13 | #[derive(Debug, PartialEq)] 14 | pub struct FnDecl { 15 | /// The declaration's logical name 16 | pub ident: String, 17 | 18 | /// The return type of the function 19 | pub ret_ty: Typ, 20 | 21 | /// Parameters to the function 22 | pub params: Vec, 23 | 24 | /// The body of the function 25 | /// 26 | /// The function's return is the value of the expression 27 | pub body: Box, 28 | } 29 | 30 | /// Variable Declaration 31 | /// 32 | /// Represents the binding of a given type to an identifier to create 33 | /// a variable. Used both for local variable declarations as well as 34 | /// function parameters. 35 | #[derive(Debug, PartialEq)] 36 | pub struct VarDecl { 37 | /// The logical name of the declataion 38 | /// 39 | /// FIXME: proper idents rather than strings in the bound tree. 40 | pub ident: String, 41 | 42 | /// The type of the identifier, if one was specified or inferred. 43 | pub ty: Typ, 44 | } 45 | 46 | /// A Semantically Decorated Expression 47 | /// 48 | /// This struct represents the expression tree after semantic 49 | /// analysis. This is no longer guaranteed to be a a lieral 50 | /// representation of the code as it was written. 51 | #[derive(Debug, PartialEq)] 52 | pub struct Expression { 53 | /// The contents of this expression 54 | /// 55 | /// This is a union of the different expression kinds. 56 | pub kind: ExpressionKind, 57 | 58 | /// The type of this node 59 | /// 60 | /// All expressions in the tree will have some form of type. Error 61 | /// expressions or expressions where the type can't be calculated 62 | /// due to a syntax or semantic error in the program are typed 63 | /// `Typ::Error`. Parts of the tree where the type has yet to be 64 | /// inferred are typed `Typ::Unknown`. 65 | /// 66 | /// Before lowering a well-formed tree should contain neither of 67 | /// these types. 68 | pub typ: Typ, 69 | } 70 | 71 | /// The Expression Kind Enum 72 | /// 73 | /// This enum contains a variant for the different types of expression 74 | /// in the semantic tree. This is similar to the `syntax::Expression` 75 | /// enum however some information may have been elided or reordered to 76 | /// better suit the lowering process. 77 | #[derive(Debug, PartialEq)] 78 | pub enum ExpressionKind { 79 | /// Invalid Expression 80 | /// 81 | /// Emitted when transformation or binding can't produce a 82 | /// properly typed or bound expression. This should only be 83 | /// produced if a diagnostic has already been raised. Further 84 | /// passes can safely return error if an `Error` expression is 85 | /// encountered to avoid error cascades. 86 | Error, 87 | 88 | /// Identifier 89 | /// 90 | /// Variable reference. If this is an lvalue then it represents a 91 | /// write to the named variable, otherwise a read. Identifier 92 | /// expressions appear in other compound expressions such as 93 | /// functions when those expressions reference bound values. 94 | Identifier(String), 95 | 96 | /// Literal Value 97 | /// 98 | /// A constant value. This is just plucked straight from the 99 | /// syntax tree. 100 | Literal(Constant), 101 | 102 | /// A prefix operator 103 | Prefix(PrefixOp, Box), 104 | 105 | /// An infix operator 106 | /// 107 | /// This only counts comparison and arithmetic operators as infix 108 | /// operators. Assignment is handled by the assignment node. 109 | Infix(Box, InfixOp, Box), 110 | 111 | /// Call Expression 112 | /// 113 | /// Represents the applicaiton of arguments to a function. The 114 | /// callee is represented as an expression as more than just 115 | /// identifiers are callable. 116 | Call(Box, Vec), 117 | 118 | /// Assignment 119 | Assignment(String, Box), 120 | 121 | /// Index Expression 122 | Index(Box, Box), 123 | 124 | /// If Then Else 125 | IfThenElse(Box, Box, Box), 126 | 127 | /// Function Declaraiton 128 | /// 129 | /// A function declaration expression both registers a function in 130 | /// the symbol table and represents the callable function itself. 131 | Function(FnDecl), 132 | 133 | /// Loop with Condition 134 | Loop(Box, Box), 135 | 136 | /// A Sequence of Expressions 137 | Sequence(Vec), 138 | 139 | /// Print Expression 140 | /// 141 | /// Converts the inner expression to a string and prints it to 142 | /// standard output. This underpins the spec tests by allowing 143 | /// simple output. 144 | Print(Box), 145 | 146 | /// Variable Declaration 147 | Declaration(VarDecl, bool, Box), 148 | } 149 | 150 | impl Expression { 151 | /// Create a New Expression from parts 152 | /// 153 | /// Constructs a new semantic expression tree node from 154 | /// constituent parts. The type information for a given node can 155 | /// be set to `Typ::Unknown` if no type inference has yet been run 156 | /// for this expression. 157 | pub fn new(kind: ExpressionKind, typ: Typ) -> Self { 158 | Expression { kind, typ } 159 | } 160 | 161 | /// Create an Error Expresion 162 | /// 163 | /// Convenience function for returning error expressions. Error 164 | /// expressions have a kind of `ExpressionKind::Error` and a type 165 | /// of `Typ::Error`. They are used to mark invalid or 166 | /// uncalculateable portions of the bound tree. 167 | pub fn error() -> Self { 168 | Expression::new(ExpressionKind::Error, Typ::Error) 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /src/sem/types.rs: -------------------------------------------------------------------------------- 1 | //! Rich Type Information 2 | //! 3 | //! This module contains the `Typ` structure which defines the 4 | //! properties of builtin and user-specified types along with the 5 | //! global type registry which is used during compilation for type 6 | //! checking and inferrance. 7 | 8 | use crate::syntax::text::Ident; 9 | use std::borrow::Cow; 10 | 11 | /// Semantic Type 12 | /// 13 | /// This enum defines the different type values that each node in the 14 | /// semantic tree could have. 15 | #[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)] 16 | pub enum Typ { 17 | /// The given expression is invalid. Futher binding or inference 18 | /// based off this type is meaningless. 19 | Error, 20 | 21 | /// Type yet to be inferred. 22 | Unknown, 23 | 24 | /// The unit type 25 | Unit, 26 | 27 | /// One of the given basic types in the language. 28 | Builtin(BuiltinType), 29 | 30 | /// A function. We can't store the type in here properly as that 31 | /// would break the `Copy` of `typ`. Instead we just store the 32 | /// ident of the function to use later when looking it back up. 33 | /// It's major HAXX. 34 | /// 35 | /// # Issues 36 | /// 37 | /// FIXME: We need to create a sepration between the 38 | /// trivially-copyable `Typ` and some `TypeInfo` which contains 39 | /// the full information for the type rather than using this 40 | /// id-stashing workaround 41 | Function(Ident), 42 | } 43 | 44 | impl Typ { 45 | /// Returns the name of a given type 46 | pub fn name(&self) -> Cow<'_, str> { 47 | Cow::Borrowed(match *self { 48 | Typ::Error => "!ERROR!", 49 | Typ::Unknown => "_", 50 | Typ::Unit => "()", 51 | Typ::Builtin(ref b) => match *b { 52 | BuiltinType::Number => "Number", 53 | BuiltinType::Bool => "Bool", 54 | BuiltinType::String => "String", 55 | }, 56 | Typ::Function(id) => return Cow::Owned(format!("Function({:?})", id)), 57 | }) 58 | } 59 | } 60 | 61 | /// The fixed builtin types 62 | #[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)] 63 | pub enum BuiltinType { 64 | /// 64 bit numerical value 65 | Number, 66 | /// Boolean 67 | Bool, 68 | /// String type. This is a pointer-lenght pair 69 | String, 70 | } 71 | -------------------------------------------------------------------------------- /src/syntax.rs: -------------------------------------------------------------------------------- 1 | //! Syntax parsing and expression tree 2 | //! 3 | //! This module provides a simple abstract syntax tree, and a parser 4 | //! implementation which recognises a simple lanugage using 5 | //! Pratt-style operator precedence parsing. 6 | 7 | mod node; 8 | pub mod parse; 9 | pub mod syntax_builder; 10 | pub mod text; 11 | pub mod tree; 12 | 13 | pub use self::node::*; 14 | pub use self::tree::expression::*; 15 | pub use self::tree::operators::*; 16 | pub use self::tree::types::*; 17 | pub use self::tree::*; 18 | -------------------------------------------------------------------------------- /src/syntax/node.rs: -------------------------------------------------------------------------------- 1 | //! Syntax node abstractions 2 | 3 | use super::text::{SourceText, Span}; 4 | 5 | /// Syntax Node 6 | /// 7 | /// This trait abstracts over things which can appear in a given 8 | /// `SyntaxTree`. 9 | pub trait SyntaxNode { 10 | /// A short description of the node. Used for tree pretty 11 | /// printing. 12 | fn description(&self, source: &SourceText) -> std::borrow::Cow; 13 | 14 | /// Get the span this node covers in the tree 15 | /// 16 | /// If the node has no real location then `DUMMY_SPAN` is returned 17 | fn span(&self) -> Span; 18 | } 19 | -------------------------------------------------------------------------------- /src/syntax/syntax_builder.rs: -------------------------------------------------------------------------------- 1 | //! Syntax Tree Factory 2 | //! 3 | //! Ergonomic methods to build new expression trees 4 | //! 5 | //! Used for building expression trees by hand in code rather than the 6 | //! parser. 7 | 8 | use super::text::Ident; 9 | use super::*; 10 | 11 | /// Build a raw string literal 12 | /// 13 | /// Takes a string and builds a string literal expression from it. The 14 | /// token is stubbed and will contian a dummy span. 15 | pub fn raw_string>(value: S) -> Expression { 16 | let value = value.into(); 17 | Expression::constant_string( 18 | Token::new(TokenKind::Literal(Literal::RawString(value.clone()))), 19 | value, 20 | ) 21 | } 22 | 23 | /// Constant Boolean Expression 24 | /// 25 | /// Takes a given bool value and builds a stubbed-out token for 26 | /// it. The token will contain a dummy span. 27 | pub fn const_bool(value: bool) -> Expression { 28 | Expression::constant_bool(word(if value { Ident::True } else { Ident::False }), value) 29 | } 30 | 31 | /// Constant numeric value 32 | /// 33 | /// Takes a given numeric value and builds a stubbed-out token for 34 | /// it. The token will contain a dummy span. 35 | pub fn const_num(value: i64) -> Expression { 36 | Expression::constant_num( 37 | Token::new(TokenKind::Literal(Literal::Number(value))), 38 | value, 39 | ) 40 | } 41 | 42 | /// Identifier Expression. Reads a value from a variable or provides a 43 | /// reference to a function or other named item. 44 | pub fn ident_expr(value: Ident) -> Expression { 45 | Expression::identifier(word(value), value) 46 | } 47 | 48 | /// Word token from identifier. Wraps the identifier in a token with 49 | /// missing position information 50 | pub fn word(id: Ident) -> Token { 51 | Token::new(TokenKind::Word(id)) 52 | } 53 | -------------------------------------------------------------------------------- /src/syntax/text.rs: -------------------------------------------------------------------------------- 1 | //! Source Text 2 | //! 3 | //! This module contains abstractions relating to the text of the 4 | //! program. The main memer is the `SourceText` structure which 5 | //! provides backing for the source code as well as line information. 6 | //! 7 | //! Positions within the source are represnted by the `Pos`, `Span` 8 | //! and `Location` types. 9 | 10 | mod intern; 11 | mod position; 12 | mod source_text; 13 | 14 | pub use self::intern::{Ident, Interner}; 15 | pub use self::position::{Pos, Span, DUMMY_SPAN}; 16 | pub use self::source_text::SourceText; 17 | -------------------------------------------------------------------------------- /src/syntax/text/intern.rs: -------------------------------------------------------------------------------- 1 | //! String Interning 2 | //! 3 | //! This module contains types to model interned strings. The idea is 4 | //! that a string can be stored in an `Interner` only once. The 5 | //! contents of the string can't be accessed without the `Interner`, 6 | //! but interned strings can be compared for equality quickly. 7 | 8 | use indexmap::IndexSet; 9 | 10 | /// Interner 11 | /// 12 | /// Keeps a list of intered strings and a map to look them up. 13 | pub struct Interner { 14 | lookup: IndexSet, 15 | } 16 | 17 | /// Interned String 18 | /// 19 | /// Represents a kind of interned string value. Known keywords appear 20 | /// in here to allow them to be easily matched. 21 | #[derive(Debug, PartialEq, Hash, Eq, Copy, Clone)] 22 | pub enum Ident { 23 | /// the `else` keyword 24 | Else, 25 | /// the `end` keyword 26 | End, 27 | /// the `false` keyword 28 | False, 29 | /// the `fn` keyword 30 | Fn, 31 | /// the `if` keyword 32 | If, 33 | /// the `let` keyword 34 | Let, 35 | /// the `print` keyword 36 | Print, 37 | /// the `true` keyword 38 | True, 39 | /// the `unless` keyword 40 | Unless, 41 | /// the `until` keyword 42 | Until, 43 | /// the `var` keyword 44 | Var, 45 | /// the `while` keyword 46 | While, 47 | /// Other unknown identifier values. 48 | Unknown(usize), 49 | } 50 | 51 | impl Interner { 52 | /// Create an Empty Interner 53 | pub fn new() -> Self { 54 | Default::default() 55 | } 56 | 57 | /// Intern a String 58 | /// 59 | /// If the string is already in this `Interner` then the existing 60 | /// inered string is returned. If this is a new string a new 61 | /// intern value is allocated and returned. 62 | pub fn intern(&mut self, value: &str) -> Ident { 63 | match value { 64 | "else" => Ident::Else, 65 | "end" => Ident::End, 66 | "false" => Ident::False, 67 | "fn" => Ident::Fn, 68 | "if" => Ident::If, 69 | "let" => Ident::Let, 70 | "print" => Ident::Print, 71 | "true" => Ident::True, 72 | "unless" => Ident::Unless, 73 | "until" => Ident::Until, 74 | "var" => Ident::Var, 75 | "while" => Ident::While, 76 | _ => self.intern_unknown(value), 77 | } 78 | } 79 | 80 | /// Intern a New Value 81 | /// 82 | /// When we have checked that the value isn't a known ident this 83 | /// can be used to create a new `Ident::Unknown` entry in the 84 | /// table. 85 | fn intern_unknown(&mut self, value: &str) -> Ident { 86 | let (index, _) = self.lookup.insert_full(value.into()); 87 | Ident::Unknown(index) 88 | } 89 | 90 | /// Borrow the Interned value 91 | /// 92 | /// Used to conver the interned value back to a string. 93 | pub fn interned_value(&self, ident: Ident) -> &str { 94 | match ident { 95 | Ident::Else => "else", 96 | Ident::End => "end", 97 | Ident::False => "false", 98 | Ident::Fn => "fn", 99 | Ident::If => "if", 100 | Ident::Let => "let", 101 | Ident::Print => "print", 102 | Ident::True => "true", 103 | Ident::Unless => "unless", 104 | Ident::Until => "until", 105 | Ident::Var => "var", 106 | Ident::While => "while", 107 | Ident::Unknown(index) => self.lookup.get_index(index).map(|s| &s[..]).unwrap_or(""), 108 | } 109 | } 110 | } 111 | 112 | impl Default for Interner { 113 | fn default() -> Self { 114 | Interner { 115 | lookup: IndexSet::default(), 116 | } 117 | } 118 | } 119 | 120 | #[cfg(test)] 121 | mod test { 122 | 123 | use super::*; 124 | 125 | #[test] 126 | fn intern_dedupes_words() { 127 | let mut interner = Interner::default(); 128 | 129 | let foo1 = interner.intern("foo"); 130 | let foo2 = interner.intern("foo"); 131 | let bar = interner.intern("bar"); 132 | 133 | assert_eq!(foo1, foo2); 134 | assert_ne!(foo1, bar); 135 | assert_ne!(foo2, bar); 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /src/syntax/text/position.rs: -------------------------------------------------------------------------------- 1 | //! Syntax Position Information 2 | //! 3 | //! This module provides the types needed to represent positions with 4 | //! a buffer. 5 | 6 | /// Source Buffer Position 7 | /// 8 | /// Used to represent a position within a the source of a compilation 9 | /// session. 10 | #[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)] 11 | pub struct Pos(usize); 12 | 13 | /// Source Buffer Span 14 | /// 15 | /// A span represents a range of positions within the source. Each 16 | /// span is deliniated by the start and end `Pos`s. Spans can be 17 | /// used to identify the extent of lexemes in the AST, and ranges of 18 | /// interest when emitting error information. 19 | #[derive(Debug, PartialEq, Copy, Clone)] 20 | pub struct Span { 21 | start: Pos, 22 | end: Pos, 23 | } 24 | 25 | /// Dummy Span used when no real span is avaiable. 26 | pub const DUMMY_SPAN: Span = Span { 27 | start: Pos(0), 28 | end: Pos(0), 29 | }; 30 | 31 | impl Pos { 32 | /// Get the Byte Offset 33 | /// 34 | /// Returns the offset from the beginning of the `SourceText` for 35 | /// this position. 36 | pub fn offset(self) -> usize { 37 | self.0 38 | } 39 | } 40 | 41 | impl std::ops::Add for Pos { 42 | type Output = Self; 43 | 44 | fn add(self, other: Pos) -> Self { 45 | Pos(self.0 + other.0) 46 | } 47 | } 48 | 49 | impl From for Pos { 50 | fn from(offset: usize) -> Self { 51 | Pos(offset) 52 | } 53 | } 54 | 55 | impl Span { 56 | /// Create a Span Around two Cursors 57 | /// 58 | /// The returned span starts after the first cursor and finishes 59 | /// before the second one. The cursors themselves can be thought 60 | /// to point 'between' the characters in the buffer. 61 | pub fn new(start: Pos, end: Pos) -> Self { 62 | Span { start, end } 63 | } 64 | 65 | /// Create a zero-width span at a given position 66 | /// 67 | /// The retunred span has the same start and end position. 68 | pub fn new_at(pos: Pos) -> Self { 69 | Span { 70 | start: pos, 71 | end: pos, 72 | } 73 | } 74 | 75 | /// Create a `Span` enclosing two existing spans. This makes a new 76 | /// span with extents that encompas the highest and lowest `Pos` 77 | /// in either span. 78 | pub fn enclosing(first: Span, second: Span) -> Self { 79 | let start = std::cmp::min(first.start(), second.start()); 80 | let end = std::cmp::max(first.end(), second.end()); 81 | Span::new(start, end) 82 | } 83 | 84 | /// Get the starting position of this span 85 | pub fn start(&self) -> Pos { 86 | self.start 87 | } 88 | 89 | /// Get the end position of this span 90 | pub fn end(&self) -> Pos { 91 | self.end 92 | } 93 | } 94 | 95 | #[cfg(test)] 96 | pub mod test { 97 | 98 | use super::*; 99 | 100 | #[test] 101 | fn pos_from_usize() { 102 | let pos = Pos::from(1); 103 | assert_eq!(Pos(1), pos); 104 | } 105 | 106 | #[test] 107 | fn pos_converts_back_to_offset() { 108 | let pos = Pos::from(123); 109 | assert_eq!(123, pos.offset()); 110 | } 111 | 112 | #[test] 113 | fn span_from_cursor_pair() { 114 | let span = Span::new(1.into(), 3.into()); 115 | assert_eq!(Pos::from(1), span.start); 116 | assert_eq!(Pos::from(3), span.end); 117 | } 118 | 119 | #[test] 120 | fn span_has_start_end() { 121 | let span_loc = Span::new(12.into(), 43.into()); 122 | 123 | assert_eq!(12, span_loc.start().offset()); 124 | assert_eq!(43, span_loc.end().offset()); 125 | } 126 | 127 | #[test] 128 | fn span_at_pos() { 129 | let pos = Pos::from(123); 130 | let span = Span::new_at(pos); 131 | 132 | assert_eq!(123, span.start().offset()); 133 | assert_eq!(123, span.end().offset()); 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /src/syntax/text/source_text.rs: -------------------------------------------------------------------------------- 1 | //! Source Text 2 | //! 3 | //! This module contains a a structure to represent the source text of 4 | //! the program. It is used to hold the source code in memory for 5 | //! iteration by the lexer. Indexing in to the source is possible 6 | //! using `Location`s, and `Location`s can be turned into `(line, 7 | //! col)` position pairs for displaying in diagnostics. 8 | 9 | use super::{Ident, Interner, Pos, Span}; 10 | use std::cell::RefCell; 11 | use std::fs::File; 12 | use std::io::{self, prelude::*}; 13 | use std::path::Path; 14 | 15 | /// Source Text Struct 16 | /// 17 | /// A source text is a pair of a string containing the contents of the 18 | /// file or other input and a start position. 19 | pub struct SourceText { 20 | /// The contents of the source text 21 | source: String, 22 | /// The offsets of the beginning of each line. Can be used to 23 | /// convert a character offset into the (line, column) 24 | line_offsets: Vec, 25 | /// The name of this source code. Used when reporting diagnostics. 26 | name: String, 27 | /// String interner to create identifiers 28 | /// 29 | /// FIXME: Should this live here? 30 | interner: RefCell, 31 | } 32 | 33 | impl SourceText { 34 | /// Create a `SourceText` for the given string 35 | pub fn new>(source: T) -> Self { 36 | SourceText::with_name(source, "") 37 | } 38 | 39 | /// Create a `SourceText` with a known name 40 | pub fn with_name, U: Into>(source: T, name: U) -> Self { 41 | let source = source.into(); 42 | let name = name.into(); 43 | let line_offsets = get_line_offsets(&source[..]); 44 | SourceText { 45 | source, 46 | line_offsets, 47 | name, 48 | interner: Default::default(), 49 | } 50 | } 51 | 52 | /// Create a source text from standard input 53 | /// 54 | /// Reads the contents of `io:;stdin` to a buffer and creats a new 55 | /// source tex from that. 56 | pub fn from_stdin() -> io::Result { 57 | let mut s = String::new(); 58 | io::stdin().read_to_string(&mut s)?; 59 | Ok(SourceText::with_name(s, "")) 60 | } 61 | 62 | /// Create a source text from a file 63 | /// 64 | /// Reads the contents of a given file path into a buffer and 65 | /// creates a new source text from that. 66 | pub fn from_path>(path: P) -> io::Result { 67 | let mut s = String::new(); 68 | File::open(path.as_ref())?.read_to_string(&mut s)?; 69 | Ok(SourceText::with_name( 70 | s, 71 | path.as_ref().display().to_string(), 72 | )) 73 | } 74 | 75 | /// Get the Starting Position 76 | /// 77 | /// Retunrns the index into the buffer which points to the first 78 | /// character. As this is a 'cursor' which points 'between' the 79 | /// characters even an empty source will have at least one 80 | /// distinct position. 81 | pub fn start(&self) -> Pos { 82 | Pos::from(0) 83 | } 84 | 85 | /// Get Line Count 86 | /// 87 | /// Returns the number of lines in the source text. 88 | pub fn line_count(&self) -> usize { 89 | self.line_offsets.len() 90 | } 91 | 92 | /// Get the name of the source text 93 | pub fn name(&self) -> &str { 94 | &self.name 95 | } 96 | 97 | /// Intern a String Value 98 | pub fn intern(&self, value: &str) -> Ident { 99 | self.interner.borrow_mut().intern(value) 100 | } 101 | 102 | /// Lookup the value of an identifier 103 | pub fn interned_value(&self, ident: Ident) -> String { 104 | self.interner.borrow().interned_value(ident).into() 105 | } 106 | 107 | /// Get Line Position 108 | /// 109 | /// Returns the `(line, col)` position of the given position in 110 | /// the source. 111 | pub fn line_pos>(&self, pos: T) -> (usize, usize) { 112 | let offset = pos.into().offset(); 113 | match self.line_offsets.binary_search(&offset) { 114 | Ok(index) => (index + 1, 0), 115 | Err(index) => { 116 | let nearest_line_start = self.line_offsets[index - 1]; 117 | (index, offset - nearest_line_start) 118 | } 119 | } 120 | } 121 | 122 | /// Get the positions at the extents of the given span 123 | pub fn line_extents(&self, span: Span) -> (Pos, Pos) { 124 | let start_offset = span.start().offset(); 125 | let end_offset = span.end().offset(); 126 | let index = match self.line_offsets.binary_search(&start_offset) { 127 | Ok(index) => index, 128 | Err(index) => index - 1, 129 | }; 130 | let begin_pos = Pos::from(self.line_offsets[index]); 131 | for line_offset in &self.line_offsets[index..] { 132 | if *line_offset > end_offset { 133 | return (begin_pos, Pos::from(*line_offset)); 134 | } 135 | } 136 | // if we couldn't find the start of a line after this one then 137 | // return to the end of the string. 138 | (begin_pos, Pos::from(self.source.len())) 139 | } 140 | 141 | /// Slice into the Source 142 | pub fn slice(&self, start: Pos, end: Pos) -> &str { 143 | &self.source[start.offset()..end.offset()] 144 | } 145 | 146 | /// Walk the Source Characters 147 | pub fn walk_chars(&self, start: Pos) -> impl Iterator + '_ { 148 | self.source[start.offset()..] 149 | .chars() 150 | .scan(start, |pos, ch| { 151 | let next = Pos::from(pos.offset() + ch.len_utf8()); 152 | *pos = next; 153 | Some((ch, next)) 154 | }) 155 | } 156 | } 157 | 158 | fn get_line_offsets(source: &str) -> Vec { 159 | std::iter::once(0) 160 | .chain(source.match_indices('\n').map(|(idx, _)| idx + 1)) 161 | .collect() 162 | } 163 | 164 | #[cfg(test)] 165 | mod test { 166 | 167 | use super::*; 168 | 169 | #[test] 170 | fn source_empty_has_single_line() { 171 | let source = SourceText::new(""); 172 | assert_eq!(1, source.line_count()); 173 | } 174 | 175 | #[test] 176 | fn source_with_windows_newline() { 177 | let source = SourceText::new("\r\n"); 178 | assert_eq!(2, source.line_count()); 179 | } 180 | 181 | #[test] 182 | fn source_with_multiple_lines() { 183 | let source = SourceText::new( 184 | r#" 185 | fn hello(world: String): String 186 | print 'hello ' + world 187 | end 188 | "#, 189 | ); 190 | assert_eq!(5, source.line_count()); 191 | } 192 | 193 | #[test] 194 | fn source_pos_to_line_col() { 195 | let source = SourceText::new( 196 | r#" 197 | # Modululs remainder 198 | # 199 | # Returns the modulus remainder of n/d 200 | fn mod(n: Number, d: Number): Number 201 | n if n < d else mod(n - d, d) 202 | end 203 | "#, 204 | ); 205 | 206 | assert_eq!((1, 0), source.line_pos(0)); 207 | assert_eq!((2, 1), source.line_pos(2)); 208 | assert_eq!((4, 8), source.line_pos(32)); 209 | assert_eq!((4, 38), source.line_pos(62)); 210 | assert_eq!((5, 0), source.line_pos(63)); 211 | assert_eq!((5, 30), source.line_pos(93)); 212 | assert_eq!((8, 0), source.line_pos(137)); 213 | } 214 | } 215 | -------------------------------------------------------------------------------- /src/syntax/tree.rs: -------------------------------------------------------------------------------- 1 | //! Syntax Tree 2 | //! 3 | //! This module defines the types which make up the parsed syntax 4 | //! tree. This tree defines the full strcuture of a parsed source file 5 | //! before any semantic transformation is done. 6 | 7 | pub mod expression; 8 | pub mod operators; 9 | mod seplist; 10 | mod token; 11 | mod trivia; 12 | pub mod types; 13 | 14 | use std::io::{self, prelude::*}; 15 | 16 | use crate::diag::Diagnostic; 17 | use crate::parse::Parser; 18 | use crate::text::SourceText; 19 | 20 | pub use self::seplist::{SepList, SepListBuilder}; 21 | pub use self::token::{Literal, Token, TokenKind}; 22 | pub use self::trivia::{TriviaToken, TriviaTokenKind}; 23 | 24 | use self::expression::Expression; 25 | use super::SyntaxNode; 26 | 27 | /// Syntax tree 28 | /// 29 | /// The syntax tree represents the parsed source of a given file. It 30 | /// contains multiple expressions followed by an end of file token. 31 | pub struct SyntaxTree<'a> { 32 | /// The root of the main expression tree 33 | root: Expression, 34 | /// Diagnostics related to the given tree 35 | diagnostics: Vec, 36 | /// End token 37 | end: Token, 38 | /// The source for this tree 39 | source: &'a SourceText, 40 | } 41 | 42 | impl<'a> SyntaxTree<'a> { 43 | /// Create a new syntax tree 44 | /// 45 | /// The syntax tree represents a single parsed item of source 46 | /// text. Syntax trees are usually constructed by the compiler. 47 | /// 48 | /// # Parameters 49 | /// 50 | /// * `root`: The body of the file. This could be an empty 51 | /// sequence if the file is empty 52 | /// * `diagnostics`: Diagnostics raised in the parsing of the 53 | /// source. 54 | /// * `end`: The closing EOF token. This may have some leading 55 | /// trivia attached and is therefore required for a 56 | /// full-fidelity tree. 57 | pub fn new( 58 | source: &'a SourceText, 59 | root: Expression, 60 | diagnostics: Vec, 61 | end: Token, 62 | ) -> Self { 63 | SyntaxTree { 64 | root, 65 | diagnostics, 66 | end, 67 | source, 68 | } 69 | } 70 | 71 | /// Parse a tree from source text 72 | pub fn parse(source: &'a SourceText) -> Self { 73 | Parser::new(source).parse() 74 | } 75 | 76 | /// Parse a source tree containing a single expression 77 | pub fn parse_single(source: &'a SourceText) -> Self { 78 | Parser::new(source).parse_single() 79 | } 80 | 81 | /// Get the root of the tree 82 | pub fn root(&self) -> &Expression { 83 | &self.root 84 | } 85 | 86 | /// Get the end token 87 | pub fn end(&self) -> &Token { 88 | &self.end 89 | } 90 | 91 | /// Get diagnostics 92 | pub fn diagnostics(&self) -> &[Diagnostic] { 93 | &self.diagnostics 94 | } 95 | 96 | /// Check if the tree has buffered diagnostics 97 | pub fn has_diagnostics(&self) -> bool { 98 | !self.diagnostics.is_empty() 99 | } 100 | 101 | /// Get the Root Expression 102 | /// 103 | /// Accesses the base of the expression tree. The only other part 104 | /// of the tree is the `end` token. 105 | pub fn root_expression(&self) -> &Expression { 106 | &self.root 107 | } 108 | 109 | /// Access the Borrowed Source 110 | /// 111 | /// Allows access to the source this syntax tree was parsed from. 112 | pub fn source(&self) -> &'a SourceText { 113 | self.source 114 | } 115 | 116 | /// Dump the Expression Tree 117 | /// 118 | /// Walks the subnodes of this tree and prints a text representation 119 | /// of them as an ASCII tree 120 | pub fn write_to(&self, writer: &mut W) -> io::Result<()> 121 | where 122 | W: io::Write, 123 | { 124 | let mut writer = io::BufWriter::new(writer); 125 | let mut prefix = String::new(); 126 | pretty_tree(&mut writer, self.source, self.root(), &mut prefix, "•") 127 | } 128 | } 129 | 130 | /// Walks the subnodes of this tree and prints a text representation 131 | /// of them as an ASCII tree. 132 | fn pretty_tree( 133 | writer: &mut io::BufWriter, 134 | source: &SourceText, 135 | expr: &Expression, 136 | prefix: &mut String, 137 | lead: &str, 138 | ) -> io::Result<()> 139 | where 140 | W: io::Write, 141 | { 142 | writeln!(writer, "{}{} {}", prefix, lead, expr.description(source))?; 143 | let children: Vec<&Expression> = match expr { 144 | Expression::Identifier(_) => Vec::new(), 145 | Expression::Literal(_) => Vec::new(), 146 | Expression::Prefix(p) => vec![&p.inner], 147 | Expression::Infix(i) => vec![&i.left, &i.right], 148 | Expression::Call(c) => std::iter::once(&*c.callee) 149 | .chain(c.arguments.iter()) 150 | .collect(), 151 | Expression::Index(i) => vec![&i.index, &i.indexee], 152 | Expression::IfThenElse(i) => vec![&i.cond, &i.if_true, &i.if_false], 153 | Expression::Function(f) => vec![&f.body.contents], 154 | Expression::Loop(l) => vec![&l.condition, &l.body.contents], 155 | Expression::Sequence(s) => s.iter().collect(), 156 | Expression::Print(p) => vec![&p.inner], 157 | Expression::Declaration(d) => vec![&d.initialiser], 158 | Expression::Grouping(g) => vec![&g.inner], 159 | }; 160 | 161 | let orig_prefix_len = prefix.len(); 162 | match lead { 163 | "└─" => prefix.push_str(" "), 164 | "├─" => prefix.push_str("│ "), 165 | _ => (), 166 | } 167 | if let Some((last, rest)) = children.split_last() { 168 | for child in rest { 169 | pretty_tree(writer, source, child, prefix, "├─")?; 170 | } 171 | pretty_tree(writer, source, last, prefix, "└─")?; 172 | } 173 | if orig_prefix_len < prefix.len() { 174 | prefix.truncate(orig_prefix_len); 175 | } 176 | Ok(()) 177 | } 178 | 179 | #[cfg(test)] 180 | mod test { 181 | 182 | use super::*; 183 | use crate::syntax::text::{SourceText, DUMMY_SPAN}; 184 | 185 | #[test] 186 | fn tree_without_diagnositcs_reports_false() { 187 | let source = SourceText::new(""); 188 | let tree = SyntaxTree::new( 189 | &source, 190 | Expression::empty(), 191 | Vec::new(), 192 | Token::new(TokenKind::End), 193 | ); 194 | 195 | assert_ne!(true, tree.has_diagnostics()); 196 | } 197 | 198 | #[test] 199 | fn tree_with_diagnostics_reports_true() { 200 | let source = SourceText::new(""); 201 | let tree = SyntaxTree::new( 202 | &source, 203 | Expression::empty(), 204 | vec![Diagnostic::new("error: test", DUMMY_SPAN)], 205 | Token::new(TokenKind::End), 206 | ); 207 | 208 | assert_eq!(true, tree.has_diagnostics()); 209 | } 210 | 211 | #[test] 212 | fn tree_write_to_string() { 213 | let source = SourceText::new("(1 + 2) - 3"); 214 | let tree = SyntaxTree::parse(&source); 215 | let mut buff = Vec::new(); 216 | 217 | tree.write_to(&mut buff).unwrap(); 218 | let written = String::from_utf8(buff).unwrap(); 219 | 220 | assert_eq!( 221 | " 222 | • Sequence 223 | └─ Infix 224 | ├─ Grouping 225 | │ └─ Infix 226 | │ ├─ Literal 227 | │ └─ Literal 228 | └─ Literal 229 | " 230 | .trim(), 231 | written.trim() 232 | ); 233 | } 234 | } 235 | -------------------------------------------------------------------------------- /src/syntax/tree/operators.rs: -------------------------------------------------------------------------------- 1 | //! Enums for Operator Types 2 | //! 3 | //! There are two groups of operators in the syntax tree, infix and 4 | //! prefix. This module contains a pair of enums used to discrimiate 5 | //! between these variants. 6 | 7 | /// Represents an AST prefix operator. 8 | #[derive(Debug, PartialEq, Copy, Clone)] 9 | pub enum PrefixOp { 10 | /// Arithmetic Identity 11 | /// 12 | /// Represents the application of the `+` operator to an 13 | /// expression. This doesn't affect the inner expression at all, 14 | /// but is accepted for orthogonality with `-`.. 15 | Identity, 16 | 17 | /// Unary Airthmetic Negation 18 | /// 19 | /// Represents the application of the `-` operator to an 20 | /// expression. Can be used to arithmetically negate a computed 21 | /// expression or literal value. 22 | Negate, 23 | 24 | /// Unary Boolean Negation 25 | /// 26 | /// Represents the application of the `!` operator to an 27 | /// expression. Can be used to perfrom boolean negation on an 28 | /// expression or literal value. 29 | Not, 30 | } 31 | 32 | /// Represents an AST infix operator 33 | #[derive(Debug, PartialEq, Copy, Clone)] 34 | pub enum InfixOp { 35 | /// Assignment Operator (`=`) 36 | Assign, 37 | 38 | /// Arithmetic Addition (`+`) 39 | Add, 40 | /// Arithmetic Subtraction (`-`) 41 | Sub, 42 | /// Arithmetic Multiplication (`*`) 43 | Mul, 44 | /// Arithmetic Division (`/`) 45 | Div, 46 | 47 | /// Boolean Equals (`==`) 48 | Eq, 49 | /// Boolean Not Equals (`!=`) 50 | NotEq, 51 | /// Less than comparision operator (`<`) 52 | Lt, 53 | /// Less than or equals operator (`<=`) 54 | LtEq, 55 | /// Greater than comparison operator (`>`) 56 | Gt, 57 | /// Greater than or equals operator (`>=`) 58 | GtEq, 59 | } 60 | -------------------------------------------------------------------------------- /src/syntax/tree/seplist.rs: -------------------------------------------------------------------------------- 1 | //! Separeted Syntax List 2 | //! 3 | //! This module holds the definition of the `SepList` 4 | //! type. Separed lists are used in the syntax tree to hold delimited 5 | //! items such as parameter or argument lists. 6 | //! 7 | //! A `SepList` is made up of two lists of items, the main tokens and 8 | //! the separators. 9 | 10 | use super::Token; 11 | use std::marker::PhantomData; 12 | 13 | /// The separated list type holds a list of syntax items and the 14 | /// separators between then. 15 | #[derive(Debug, PartialEq)] 16 | pub struct SepList { 17 | /// The items in the list 18 | items: Vec, 19 | /// The separators between the items 20 | separators: Vec, 21 | } 22 | 23 | impl SepList { 24 | /// Create a new seplist from the given items and separators 25 | /// 26 | /// The separator length should be equal or 1 shorter than the 27 | /// items length. 28 | pub fn new(items: Vec, separators: Vec) -> Self { 29 | SepList { items, separators } 30 | } 31 | 32 | /// Create an empty separated list 33 | /// 34 | /// The new list will contain no items and no separators. This is 35 | /// mainly useful for tests or when fabricating trees by hand. The 36 | /// parser will usually genrate an empty list by calling 37 | /// `SepList::builder().build()` 38 | pub fn empty() -> Self { 39 | SepList::new(Vec::new(), Vec::new()) 40 | } 41 | 42 | /// Create a list builder. This provides a structured way of 43 | /// incrementally building a separated list. 44 | pub fn builder() -> SepListBuilder { 45 | SepListBuilder { 46 | items: Vec::new(), 47 | separators: Vec::new(), 48 | state: Default::default(), 49 | } 50 | } 51 | 52 | /// Borrow the separators as a slice 53 | /// 54 | /// Standard iteration of this collection just accesses the main 55 | /// items. This allows access to the separators too. 56 | pub fn separators(&self) -> &[S] { 57 | &self.separators 58 | } 59 | } 60 | 61 | impl std::ops::Deref for SepList { 62 | type Target = [T]; 63 | 64 | fn deref(&self) -> &[T] { 65 | &self.items 66 | } 67 | } 68 | 69 | /// Fluent typestate API for builing a separated list 70 | pub struct SepListBuilder { 71 | /// The buffered items for this list 72 | items: Vec, 73 | /// The buffered separators for this list 74 | separators: Vec, 75 | /// Phantom state data 76 | state: PhantomData, 77 | } 78 | 79 | /// Initial state for the separated list builder 80 | pub struct Item {} 81 | 82 | /// Separated list builder state when item has been seen 83 | pub struct Separator {} 84 | 85 | impl SepListBuilder { 86 | /// Finish building the list 87 | pub fn build(self) -> SepList { 88 | SepList::new(self.items, self.separators) 89 | } 90 | } 91 | 92 | impl SepListBuilder { 93 | /// Push an item into the separated list and wait for a separator 94 | pub fn push_item(mut self, item: T) -> SepListBuilder { 95 | self.items.push(item); 96 | SepListBuilder { 97 | items: self.items, 98 | separators: self.separators, 99 | state: Default::default(), 100 | } 101 | } 102 | } 103 | 104 | impl SepListBuilder { 105 | /// Push a separator onto the list and wait for another item 106 | pub fn push_sep(mut self, sep: S) -> SepListBuilder { 107 | self.separators.push(sep); 108 | SepListBuilder { 109 | items: self.items, 110 | separators: self.separators, 111 | state: Default::default(), 112 | } 113 | } 114 | } 115 | 116 | #[cfg(test)] 117 | mod test { 118 | 119 | use super::*; 120 | 121 | #[test] 122 | fn create_new_seplist() { 123 | let empty = SepList::<(), u32>::new(Vec::new(), Vec::new()); 124 | let with_items = SepList::new(vec![1, 2, 4, 8], vec![',', '!', '*']); 125 | 126 | assert_eq!(0, empty.len()); 127 | assert_eq!(4, with_items.len()); 128 | } 129 | 130 | #[test] 131 | fn seplist_builder() { 132 | let list = SepList::builder() 133 | .push_item(123) 134 | .push_sep(',') 135 | .push_item(456) 136 | .push_sep('.') 137 | .build(); 138 | 139 | assert_eq!(2, list.len()); 140 | assert_eq!(Some(&123), list.get(0)); 141 | assert_eq!(Some(&456), list.get(1)); 142 | assert_eq!(None, list.get(2)); 143 | assert_eq!(Some(&','), list.separators().get(0)); 144 | assert_eq!(Some(&'.'), list.separators().get(1)); 145 | assert_eq!(None, list.separators().get(2)); 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /src/syntax/tree/token.rs: -------------------------------------------------------------------------------- 1 | //! Syntax Token 2 | //! 3 | //! A lexeme in the token stream. Tokens are produced by the 4 | //! `Tokeniser` when parsing a source text. 5 | 6 | use super::super::text::{Ident, Span, DUMMY_SPAN}; 7 | use super::TriviaToken; 8 | use std::fmt; 9 | 10 | /// A Syntax Token 11 | /// 12 | /// Syntax tokens are produced by the lexer and contain metadata about 13 | /// their position in the source text. 14 | #[derive(Debug)] 15 | pub struct Token { 16 | /// The `TokenKind` for this token. Public to allow matching over 17 | /// different token kinds. 18 | pub kind: TokenKind, 19 | span: Span, 20 | leading: Vec, 21 | trailing: Vec, 22 | } 23 | 24 | /// Literal Value 25 | /// 26 | /// Represents any constant / literal value in the syntax tree. 27 | #[derive(Debug, PartialEq, Clone)] 28 | pub enum Literal { 29 | /// A literal string 30 | RawString(String), 31 | 32 | /// A numeric literal 33 | Number(i64), 34 | } 35 | 36 | /// Token Kind 37 | /// 38 | /// The data held by a token. This is usually just the token type. For 39 | /// some tokens, like `Word` we also store their value. For all token 40 | /// kinds the underlying source can be retrieved from the `Token`'s 41 | /// span. 42 | #[derive(Debug, PartialEq, Clone)] 43 | pub enum TokenKind { 44 | /// A string of alpahbetic characters. This could be a langauge 45 | /// keyword or a variable or type identifier. 46 | Word(Ident), 47 | 48 | /// Literal Value. Represents either a string or numeric literal 49 | /// in the source text. Booleans are not represented by literals 50 | /// and instead are just `Word` tokens. 51 | Literal(Literal), 52 | 53 | /// The `=` character 54 | Equals, 55 | 56 | /// The `==` operator 57 | DoubleEquals, 58 | 59 | /// The `!` character 60 | Bang, 61 | 62 | /// The `!=` operator 63 | BangEquals, 64 | 65 | /// The `+` character 66 | Plus, 67 | 68 | /// The `-` character 69 | Minus, 70 | 71 | /// The `*` character 72 | Star, 73 | 74 | /// The `/` character 75 | Slash, 76 | 77 | /// The `(` character 78 | OpenBracket, 79 | 80 | /// The `)` character 81 | CloseBracket, 82 | 83 | /// The `[` character 84 | OpenSqBracket, 85 | 86 | /// The `]` character 87 | CloseSqBracket, 88 | 89 | /// The `,` character 90 | Comma, 91 | 92 | /// The `:` character 93 | Colon, 94 | 95 | /// The `<` character 96 | LessThan, 97 | 98 | /// The `<=` operator 99 | LessThanEqual, 100 | 101 | /// The `>` character 102 | MoreThan, 103 | 104 | /// The `>=` operator 105 | MoreThanEqual, 106 | 107 | /// The end of the token stream. This is retuend indefinitely once 108 | /// the lexer reaches the end of the source text. 109 | End, 110 | } 111 | 112 | impl fmt::Display for Literal { 113 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 114 | match self { 115 | Literal::RawString(s) => write!(f, "'{}'", s), 116 | Literal::Number(n) => write!(f, "{}", n), 117 | } 118 | } 119 | } 120 | 121 | impl fmt::Display for TokenKind { 122 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 123 | write!( 124 | f, 125 | "{}", 126 | match self { 127 | TokenKind::Word(id) => match id { 128 | Ident::Unknown(_) => "identifier", 129 | _ => "keyword", 130 | }, 131 | TokenKind::Literal(lit) => return write!(f, "literal value {}", lit), 132 | TokenKind::Equals => "'='", 133 | TokenKind::DoubleEquals => "'=='", 134 | TokenKind::Bang => "'!'", 135 | TokenKind::BangEquals => "'!='", 136 | TokenKind::Plus => "'+'", 137 | TokenKind::Minus => "'-'", 138 | TokenKind::Star => "'*'", 139 | TokenKind::Slash => "'/'", 140 | TokenKind::OpenBracket => "'('", 141 | TokenKind::CloseBracket => "')'", 142 | TokenKind::OpenSqBracket => "'['", 143 | TokenKind::CloseSqBracket => "']'", 144 | TokenKind::Comma => "','", 145 | TokenKind::Colon => "':'", 146 | TokenKind::LessThan => "'<'", 147 | TokenKind::LessThanEqual => "'<='", 148 | TokenKind::MoreThan => "'>'", 149 | TokenKind::MoreThanEqual => "'>='", 150 | TokenKind::End => "end of file", 151 | } 152 | ) 153 | } 154 | } 155 | 156 | impl Token { 157 | /// Create a Token from a Kind 158 | pub fn new(kind: TokenKind) -> Self { 159 | Token::with_span(DUMMY_SPAN, kind) 160 | } 161 | 162 | /// Create a token from a position and kind 163 | pub fn with_span(span: Span, kind: TokenKind) -> Self { 164 | Token { 165 | span, 166 | leading: Vec::new(), 167 | trailing: Vec::new(), 168 | kind, 169 | } 170 | } 171 | 172 | /// Get the Span of a Token 173 | pub fn span(&self) -> Span { 174 | self.span 175 | } 176 | 177 | /// Inspect the leading trivia 178 | pub fn leading(&self) -> &[TriviaToken] { 179 | &self.leading[..] 180 | } 181 | 182 | /// Inspect the trailing trivia 183 | pub fn trailing(&self) -> &[TriviaToken] { 184 | &self.trailing[..] 185 | } 186 | 187 | /// Set leading trivia 188 | pub fn with_leading_trivia(self, leading: Vec) -> Self { 189 | Token { leading, ..self } 190 | } 191 | 192 | /// Set trailing trivia 193 | pub fn with_trailing_trivia(self, trailing: Vec) -> Self { 194 | Token { trailing, ..self } 195 | } 196 | 197 | /// Left binding power. This controls the precedence of 198 | /// the symbol when being parsed as an infix operator. 199 | /// 200 | /// Returns the associativity, or binding power, for the given 201 | /// token. This is used when deciding if to parse the `led()` 202 | /// of this token. 203 | pub fn lbp(&self) -> u32 { 204 | match self.kind { 205 | TokenKind::Equals => 10, 206 | 207 | // ternary if 208 | TokenKind::Word(Ident::If) | TokenKind::Word(Ident::Unless) => 20, 209 | 210 | // boolean conditional operators 211 | TokenKind::DoubleEquals 212 | | TokenKind::BangEquals 213 | | TokenKind::LessThan 214 | | TokenKind::LessThanEqual 215 | | TokenKind::MoreThan 216 | | TokenKind::MoreThanEqual => 40, 217 | 218 | // Arithmetic operators 219 | TokenKind::Plus | TokenKind::Minus => 50, 220 | 221 | TokenKind::Star | TokenKind::Slash => 60, 222 | 223 | // Index/Call operators 224 | TokenKind::OpenBracket | TokenKind::OpenSqBracket => 80, 225 | 226 | _ => Self::MIN_LBP, 227 | } 228 | } 229 | 230 | /// The minimum binding power of any token. This is used by the 231 | /// parser to parse root level expressions. 232 | /// 233 | /// For any token `MIN_LBP` is less than or equal to the token's 234 | /// binding power. 235 | pub const MIN_LBP: u32 = 0; 236 | 237 | /// The maximum binding power of any token. This is used by the 238 | /// parser to parse right-associative expressions. 239 | /// 240 | /// For any token `MAX_LBP` is strictly greatre than the token's 241 | /// binding power. 242 | pub const MAX_LBP: u32 = 100; 243 | } 244 | 245 | impl PartialEq for Token { 246 | fn eq(&self, other: &Token) -> bool { 247 | self.kind == other.kind 248 | && (self.span == DUMMY_SPAN || other.span == DUMMY_SPAN || self.span == other.span) 249 | } 250 | } 251 | 252 | #[cfg(test)] 253 | mod test { 254 | 255 | use super::super::super::text::Pos; 256 | use super::super::TriviaTokenKind; 257 | use super::*; 258 | 259 | #[test] 260 | fn test_token_equality_ignores_dummy_span() { 261 | assert_eq!( 262 | Token::new(TokenKind::OpenBracket), 263 | Token::new(TokenKind::OpenBracket) 264 | ); 265 | assert_eq!( 266 | Token::new(TokenKind::CloseBracket), 267 | Token::with_span(DUMMY_SPAN, TokenKind::CloseBracket) 268 | ); 269 | assert_eq!( 270 | Token::new(TokenKind::Comma), 271 | Token::with_span(Span::new(Pos::from(0), Pos::from(1)), TokenKind::Comma) 272 | ); 273 | assert_ne!( 274 | Token::with_span(Span::new(Pos::from(1), Pos::from(2)), TokenKind::LessThan), 275 | Token::with_span(Span::new(Pos::from(0), Pos::from(1)), TokenKind::LessThan) 276 | ); 277 | } 278 | 279 | #[test] 280 | fn test_token_equality_ignores_trivia() { 281 | assert_eq!( 282 | Token::new(TokenKind::Bang), 283 | Token::new(TokenKind::Bang).with_leading_trivia(vec![TriviaToken::with_span( 284 | Span::new(Pos::from(0), Pos::from(1)), 285 | TriviaTokenKind::Newline 286 | )]) 287 | ); 288 | assert_eq!( 289 | Token::new(TokenKind::Equals).with_trailing_trivia(vec![TriviaToken::with_span( 290 | Span::new(Pos::from(0), Pos::from(1)), 291 | TriviaTokenKind::Whitespace 292 | )]), 293 | Token::new(TokenKind::Equals) 294 | ); 295 | } 296 | } 297 | -------------------------------------------------------------------------------- /src/syntax/tree/trivia.rs: -------------------------------------------------------------------------------- 1 | //! Trivia Tokens 2 | //! 3 | //! Trivia tokens represent parts of a parse which are not important 4 | //! to the semantic analysis of the program. 5 | //! 6 | //! The idea of trivia tokens is to allow a parsed syntax tree to be 7 | //! re-written and then serialised to allow code tranformations 8 | //! without loss of things like comments and indentation. 9 | 10 | use super::super::text::Span; 11 | 12 | /// Trivia Token 13 | /// 14 | /// Trivia tokens appear in the leading or trailing trivia of main 15 | /// tokens. They should be attached to the 'closest' token in the 16 | /// token stream, for some value of closest. 17 | #[derive(Debug, PartialEq, Copy, Clone)] 18 | pub struct TriviaToken { 19 | kind: TriviaTokenKind, 20 | span: Span, 21 | } 22 | 23 | /// Trivia Token Kind 24 | /// 25 | /// The data held by a `TriviaToken` 26 | #[derive(Debug, PartialEq, Copy, Clone)] 27 | pub enum TriviaTokenKind { 28 | /// A whitespace token 29 | Whitespace, 30 | /// Unrecognised characters 31 | Junk, 32 | /// A single line comment 33 | Comment, 34 | /// A newline character 35 | Newline, 36 | } 37 | 38 | impl TriviaToken { 39 | /// Create a new triva token 40 | pub fn with_span(span: Span, kind: TriviaTokenKind) -> Self { 41 | TriviaToken { span, kind } 42 | } 43 | 44 | /// Get the `Span` of this trivia 45 | pub fn span(&self) -> Span { 46 | self.span 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/syntax/tree/types.rs: -------------------------------------------------------------------------------- 1 | //! Syntax Types 2 | //! 3 | //! This module contians the structures used in the syntax tree to 4 | //! reference types. 5 | 6 | use super::super::text::{SourceText, Span, DUMMY_SPAN}; 7 | use super::super::{SepList, SyntaxNode}; 8 | use super::Token; 9 | use std::borrow::Cow; 10 | 11 | /// Type Reference 12 | /// 13 | /// Represents a reference to a type. This could be a simple reference 14 | /// to a named type or a complex type such as an array or Tuple. 15 | #[derive(Debug, PartialEq)] 16 | pub enum TypeRef { 17 | /// Simple Named Type 18 | Simple(Box), 19 | /// The Unit Type 20 | Unit(Box, Box), 21 | /// A non-empty Tuple 22 | Tuple(Box, SepList, Box), 23 | /// An Array Type 24 | Array(Box, Box, Box), 25 | /// Missing type. Used to represent type information being missing 26 | /// at a given location. 27 | Missing, 28 | } 29 | 30 | /// Type Annotation 31 | /// 32 | /// Reference to a type annotated to a variable or function. This is a 33 | /// type reference and the accompanying `:` token. 34 | #[derive(Debug, PartialEq)] 35 | pub struct TypeAnno { 36 | /// The `:` Token 37 | pub anno_tok: Box, 38 | /// The type reference 39 | pub type_ref: TypeRef, 40 | } 41 | 42 | impl TypeRef { 43 | /// Create a New Simple Type 44 | /// 45 | /// A simple type is a direct reference to a non-generic non-array 46 | /// type, such as `Num` or `String`. We keep track of the token 47 | /// and the inner identifier separately for convenience. 48 | pub fn simple(tok: Token) -> Self { 49 | TypeRef::Simple(Box::new(tok)) 50 | } 51 | 52 | /// Create a new Unit Type Reference 53 | /// 54 | /// The unit type is represented as a struct with no contents. It 55 | /// has special meaning in some areas as it can be used to idicate 56 | /// the absence of a value. 57 | pub fn unit(open: Token, close: Token) -> Self { 58 | TypeRef::Unit(Box::new(open), Box::new(close)) 59 | } 60 | 61 | /// Create a Tuple Type 62 | /// 63 | /// A tuple type is an ordered collection of values. Each value 64 | /// can be of a different type. 65 | pub fn tuple(open: Token, inner: SepList, close: Token) -> Self { 66 | if inner.is_empty() { 67 | Self::unit(open, close) 68 | } else { 69 | TypeRef::Tuple(Box::new(open), inner, Box::new(close)) 70 | } 71 | } 72 | 73 | /// Create an Array Type 74 | /// 75 | /// An array type represents a contiguous collection of another 76 | /// type. 77 | pub fn array(open: Token, inner: TypeRef, close: Token) -> Self { 78 | TypeRef::Array(Box::new(open), Box::new(inner), Box::new(close)) 79 | } 80 | 81 | /// Create a missing type 82 | pub fn missing() -> Self { 83 | TypeRef::Missing 84 | } 85 | } 86 | 87 | impl SyntaxNode for TypeRef { 88 | fn description(&self, source: &SourceText) -> Cow { 89 | match self { 90 | TypeRef::Array(..) => "Type ".into(), 91 | TypeRef::Missing => "Type ".into(), 92 | TypeRef::Simple(t) => { 93 | format!("Type `{}`", source.slice(t.span().start(), t.span().end())).into() 94 | } 95 | _ => "err.into".into(), 96 | } 97 | } 98 | 99 | fn span(&self) -> Span { 100 | match self { 101 | TypeRef::Array(open, _, close) => Span::enclosing(open.span(), close.span()), 102 | TypeRef::Missing => DUMMY_SPAN, 103 | TypeRef::Simple(token) => token.span(), 104 | TypeRef::Tuple(open, _, close) => Span::enclosing(open.span(), close.span()), 105 | TypeRef::Unit(open, close) => Span::enclosing(open.span(), close.span()), 106 | } 107 | } 108 | } 109 | 110 | impl TypeAnno { 111 | /// Create a Type Annotation 112 | /// 113 | /// Constructs a new `TypeAnno` structure with the given 114 | /// annotation separation token and inner type reference. 115 | pub fn new(anno_tok: Token, inner_ty: TypeRef) -> Self { 116 | TypeAnno { 117 | anno_tok: Box::new(anno_tok), 118 | type_ref: inner_ty, 119 | } 120 | } 121 | } 122 | --------------------------------------------------------------------------------