├── .ci
    ├── Dockerfile
    └── buildbase-pipelines.yaml
├── .editorconfig
├── .gitignore
├── .vscode
    └── launch.json
├── Cargo.lock
├── Cargo.toml
├── LICENSE.md
├── README.md
├── STRUCTURE.md
├── azure-pipelines.yaml
├── build.sh
├── docs
    ├── 00-getting-started.md
    ├── 10-syntax.md
    ├── 12-parsing.md
    ├── 15-data-layout.md
    ├── 90-syntax-transforms.md
    └── index.md
├── justfile
├── spec
    ├── README.md
    ├── assignment.ulg
    ├── basics.ulg
    ├── bench
    │   ├── README.md
    │   └── fib.ulg
    ├── bool.ulg
    ├── collatz.ulg
    ├── compare.ulg
    ├── euler1.ulg
    ├── evens.ulg
    ├── factorial.ulg
    ├── fail
    │   ├── README.md
    │   ├── assign_to_immutable.ulg
    │   ├── assign_to_undefined.ulg
    │   ├── badcalls.ulg
    │   ├── badvars.ulg
    │   ├── dupe_fn_param.ulg
    │   ├── if_else_badtypes.ulg
    │   ├── mismatched_assign.ulg
    │   ├── undefined.ulg
    │   ├── undefined_func.ulg
    │   └── wrong_type_var.ulg
    ├── fib.ulg
    ├── fizzbuzz.ulg
    ├── funcfact.ulg
    ├── functions.ulg
    ├── gcd.ulg
    ├── hello.ulg
    ├── hello_world.ulg
    ├── identity.ulg
    ├── if.ulg
    ├── literals.ulg
    ├── long_string.ulg
    ├── malformed
    │   ├── README.md
    │   ├── half_ternary.ulg
    │   ├── invalid_calls.ulg
    │   ├── missing_ident.ulg
    │   ├── param_missing_type.ulg
    │   ├── unexpected_plus.ulg
    │   ├── unterminated_bracket.ulg
    │   ├── unterminated_fn.ulg
    │   ├── unterminated_string.ulg
    │   ├── unterminated_string2.ulg
    │   └── unterminated_string3.ulg
    ├── maybe_invert.ulg
    ├── modulus.ulg
    ├── operators.ulg
    ├── prefix.ulg
    ├── printing.ulg
    ├── scopes.ulg
    ├── string_concat.ulg
    ├── types.ulg
    ├── unimplemneted
    │   ├── README.md
    │   ├── array_types.ulg
    │   └── tuple_types.ulg
    ├── until.ulg
    ├── variables.ulg
    └── while.ulg
├── specs.py
└── src
    ├── compile.rs
    ├── compile
        ├── error.rs
        ├── linker.rs
        ├── lower.rs
        ├── lower_context.rs
        ├── options.rs
        └── string_builtins.rs
    ├── diag.rs
    ├── low_loader.rs
    ├── low_loader
        ├── builder.rs
        ├── context.rs
        ├── function.rs
        ├── module.rs
        ├── pass_manager.rs
        ├── targets.rs
        ├── types.rs
        └── value.rs
    ├── main.rs
    ├── meta.rs
    ├── sem.rs
    ├── sem
        ├── binder.rs
        ├── operators.rs
        ├── sem_ctx.rs
        ├── tree.rs
        └── types.rs
    ├── syntax.rs
    └── syntax
        ├── node.rs
        ├── parse.rs
        ├── parse
            ├── checkparse_tests.rs
            └── tokeniser.rs
        ├── syntax_builder.rs
        ├── text.rs
        ├── text
            ├── intern.rs
            ├── position.rs
            └── source_text.rs
        ├── tree.rs
        └── tree
            ├── expression.rs
            ├── operators.rs
            ├── seplist.rs
            ├── token.rs
            ├── trivia.rs
            └── types.rs


/.ci/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:20.04
 2 | RUN apt-get update && \
 3 | 	apt-get install -y wget curl gnupg2 lsb-release software-properties-common && \
 4 | 	curl -O https://apt.llvm.org/llvm.sh && \
 5 | 	chmod +x llvm.sh && \
 6 | 	./llvm.sh 9 && \
 7 | 	apt-get install -y sudo && \
 8 | 	apt-get install -y python3 && \
 9 | 	apt-get install -y llvm-9-dev && \
10 | 	apt-get install -y clang binutils && \
11 | 	apt-get install -y zlib1g-dev
12 | 


--------------------------------------------------------------------------------
/.ci/buildbase-pipelines.yaml:
--------------------------------------------------------------------------------
 1 | pool:
 2 |   vmImage: 'ubuntu-16.04'
 3 | 
 4 | trigger:
 5 |   paths:
 6 |     include:
 7 |       - '.ci'
 8 | 
 9 | variables:
10 |   dockerId: iwillspeak
11 |   imageName: ullage-linux-build-base
12 | 
13 | steps:
14 | - script: |
15 |     docker build -t $(dockerId)/$(imageName) .
16 |     echo $pswd | docker login -u $(dockerId) --password-stdin
17 |     docker push $(dockerId)/$(imageName)
18 |   workingDirectory: .ci
19 |   env:
20 |     pswd: $(dockerPassword)
21 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | 
 2 | root = true
 3 | 
 4 | [*.rs]
 5 | end_of_line = lf
 6 | charset = utf-8
 7 | trim_trailing_whitespace = true
 8 | indent_style = space
 9 | indent_size = 4
10 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Cargo build output
2 | target/
3 | # Docket output from docs build
4 | docs/build/
5 | # Binaries for spec tests
6 | specbin/
7 | a.out


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // Use IntelliSense to learn about possible attributes.
 3 |     // Hover to view descriptions of existing attributes.
 4 |     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
 5 |     "version": "0.2.0",
 6 |     "configurations": [
 7 |         {
 8 |             "type": "lldb",
 9 |             "request": "launch",
10 |             "terminal": "integrated",
11 |             "name": "Debug executable 'ullage'",
12 |             "cargo": {
13 |                 "args": [
14 |                     "build",
15 |                     "--bin=ullage",
16 |                     "--package=ullage"
17 |                 ],
18 |                 "filter": {
19 |                     "kind": "bin"
20 |                 }
21 |             },
22 |             "args": [],
23 |             "cwd": "${workspaceFolder}"
24 |         },
25 |         {
26 |             "type": "lldb",
27 |             "request": "launch",
28 |             "name": "Debug unit tests in executable 'ullage'",
29 |             "cargo": {
30 |                 "args": [
31 |                     "test",
32 |                     "--no-run",
33 |                     "--bin=ullage",
34 |                     "--package=ullage"
35 |                 ],
36 |                 "filter": {
37 |                     "kind": "bin"
38 |                 }
39 |             },
40 |             "args": [],
41 |             "cwd": "${workspaceFolder}"
42 |         }
43 |     ]
44 | }


--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Cargo.
  2 | # It is not intended for manual editing.
  3 | version = 3
  4 | 
  5 | [[package]]
  6 | name = "aho-corasick"
  7 | version = "0.7.18"
  8 | source = "registry+https://github.com/rust-lang/crates.io-index"
  9 | checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
 10 | dependencies = [
 11 |  "memchr",
 12 | ]
 13 | 
 14 | [[package]]
 15 | name = "autocfg"
 16 | version = "1.1.0"
 17 | source = "registry+https://github.com/rust-lang/crates.io-index"
 18 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
 19 | 
 20 | [[package]]
 21 | name = "bitflags"
 22 | version = "1.3.2"
 23 | source = "registry+https://github.com/rust-lang/crates.io-index"
 24 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 25 | 
 26 | [[package]]
 27 | name = "cc"
 28 | version = "1.0.73"
 29 | source = "registry+https://github.com/rust-lang/crates.io-index"
 30 | checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
 31 | 
 32 | [[package]]
 33 | name = "cfg-if"
 34 | version = "1.0.0"
 35 | source = "registry+https://github.com/rust-lang/crates.io-index"
 36 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 37 | 
 38 | [[package]]
 39 | name = "docopt"
 40 | version = "1.1.1"
 41 | source = "registry+https://github.com/rust-lang/crates.io-index"
 42 | checksum = "7f3f119846c823f9eafcf953a8f6ffb6ed69bf6240883261a7f13b634579a51f"
 43 | dependencies = [
 44 |  "lazy_static",
 45 |  "regex",
 46 |  "serde",
 47 |  "strsim",
 48 | ]
 49 | 
 50 | [[package]]
 51 | name = "fastrand"
 52 | version = "1.7.0"
 53 | source = "registry+https://github.com/rust-lang/crates.io-index"
 54 | checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf"
 55 | dependencies = [
 56 |  "instant",
 57 | ]
 58 | 
 59 | [[package]]
 60 | name = "hashbrown"
 61 | version = "0.12.2"
 62 | source = "registry+https://github.com/rust-lang/crates.io-index"
 63 | checksum = "607c8a29735385251a339424dd462993c0fed8fa09d378f259377df08c126022"
 64 | 
 65 | [[package]]
 66 | name = "indexmap"
 67 | version = "1.9.1"
 68 | source = "registry+https://github.com/rust-lang/crates.io-index"
 69 | checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e"
 70 | dependencies = [
 71 |  "autocfg",
 72 |  "hashbrown",
 73 | ]
 74 | 
 75 | [[package]]
 76 | name = "instant"
 77 | version = "0.1.12"
 78 | source = "registry+https://github.com/rust-lang/crates.io-index"
 79 | checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
 80 | dependencies = [
 81 |  "cfg-if",
 82 | ]
 83 | 
 84 | [[package]]
 85 | name = "lazy_static"
 86 | version = "1.4.0"
 87 | source = "registry+https://github.com/rust-lang/crates.io-index"
 88 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
 89 | 
 90 | [[package]]
 91 | name = "libc"
 92 | version = "0.2.126"
 93 | source = "registry+https://github.com/rust-lang/crates.io-index"
 94 | checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
 95 | 
 96 | [[package]]
 97 | name = "llvm-sys"
 98 | version = "90.2.1"
 99 | source = "registry+https://github.com/rust-lang/crates.io-index"
100 | checksum = "d5fe45e64c8db814191fedc1787c3fb8eb2e2c8af897727b3e21df2562c4a01c"
101 | dependencies = [
102 |  "cc",
103 |  "lazy_static",
104 |  "libc",
105 |  "regex",
106 |  "semver 0.9.0",
107 | ]
108 | 
109 | [[package]]
110 | name = "llvm-sys"
111 | version = "100.2.3"
112 | source = "registry+https://github.com/rust-lang/crates.io-index"
113 | checksum = "982d0c7337299c4a91bd5d52a86d26cf47e0e84f3b9017874a0b5c667c3cb612"
114 | dependencies = [
115 |  "cc",
116 |  "lazy_static",
117 |  "libc",
118 |  "regex",
119 |  "semver 0.9.0",
120 | ]
121 | 
122 | [[package]]
123 | name = "llvm-sys"
124 | version = "130.0.4"
125 | source = "registry+https://github.com/rust-lang/crates.io-index"
126 | checksum = "bdb6ea20e8a348f6db0b43a7f009fa7d981d22edf4cbe2e0c7b2247dbb25be61"
127 | dependencies = [
128 |  "cc",
129 |  "lazy_static",
130 |  "libc",
131 |  "regex",
132 |  "semver 0.11.0",
133 | ]
134 | 
135 | [[package]]
136 | name = "memchr"
137 | version = "2.5.0"
138 | source = "registry+https://github.com/rust-lang/crates.io-index"
139 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
140 | 
141 | [[package]]
142 | name = "pest"
143 | version = "2.3.0"
144 | source = "registry+https://github.com/rust-lang/crates.io-index"
145 | checksum = "4b0560d531d1febc25a3c9398a62a71256c0178f2e3443baedd9ad4bb8c9deb4"
146 | dependencies = [
147 |  "thiserror",
148 |  "ucd-trie",
149 | ]
150 | 
151 | [[package]]
152 | name = "proc-macro2"
153 | version = "1.0.40"
154 | source = "registry+https://github.com/rust-lang/crates.io-index"
155 | checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7"
156 | dependencies = [
157 |  "unicode-ident",
158 | ]
159 | 
160 | [[package]]
161 | name = "quote"
162 | version = "1.0.20"
163 | source = "registry+https://github.com/rust-lang/crates.io-index"
164 | checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804"
165 | dependencies = [
166 |  "proc-macro2",
167 | ]
168 | 
169 | [[package]]
170 | name = "redox_syscall"
171 | version = "0.2.13"
172 | source = "registry+https://github.com/rust-lang/crates.io-index"
173 | checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42"
174 | dependencies = [
175 |  "bitflags",
176 | ]
177 | 
178 | [[package]]
179 | name = "regex"
180 | version = "1.6.0"
181 | source = "registry+https://github.com/rust-lang/crates.io-index"
182 | checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b"
183 | dependencies = [
184 |  "aho-corasick",
185 |  "memchr",
186 |  "regex-syntax",
187 | ]
188 | 
189 | [[package]]
190 | name = "regex-syntax"
191 | version = "0.6.27"
192 | source = "registry+https://github.com/rust-lang/crates.io-index"
193 | checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244"
194 | 
195 | [[package]]
196 | name = "remove_dir_all"
197 | version = "0.5.3"
198 | source = "registry+https://github.com/rust-lang/crates.io-index"
199 | checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
200 | dependencies = [
201 |  "winapi",
202 | ]
203 | 
204 | [[package]]
205 | name = "semver"
206 | version = "0.9.0"
207 | source = "registry+https://github.com/rust-lang/crates.io-index"
208 | checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
209 | dependencies = [
210 |  "semver-parser 0.7.0",
211 | ]
212 | 
213 | [[package]]
214 | name = "semver"
215 | version = "0.11.0"
216 | source = "registry+https://github.com/rust-lang/crates.io-index"
217 | checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6"
218 | dependencies = [
219 |  "semver-parser 0.10.2",
220 | ]
221 | 
222 | [[package]]
223 | name = "semver-parser"
224 | version = "0.7.0"
225 | source = "registry+https://github.com/rust-lang/crates.io-index"
226 | checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
227 | 
228 | [[package]]
229 | name = "semver-parser"
230 | version = "0.10.2"
231 | source = "registry+https://github.com/rust-lang/crates.io-index"
232 | checksum = "00b0bef5b7f9e0df16536d3961cfb6e84331c065b4066afb39768d0e319411f7"
233 | dependencies = [
234 |  "pest",
235 | ]
236 | 
237 | [[package]]
238 | name = "serde"
239 | version = "1.0.139"
240 | source = "registry+https://github.com/rust-lang/crates.io-index"
241 | checksum = "0171ebb889e45aa68b44aee0859b3eede84c6f5f5c228e6f140c0b2a0a46cad6"
242 | dependencies = [
243 |  "serde_derive",
244 | ]
245 | 
246 | [[package]]
247 | name = "serde_derive"
248 | version = "1.0.139"
249 | source = "registry+https://github.com/rust-lang/crates.io-index"
250 | checksum = "dc1d3230c1de7932af58ad8ffbe1d784bd55efd5a9d84ac24f69c72d83543dfb"
251 | dependencies = [
252 |  "proc-macro2",
253 |  "quote",
254 |  "syn",
255 | ]
256 | 
257 | [[package]]
258 | name = "strsim"
259 | version = "0.10.0"
260 | source = "registry+https://github.com/rust-lang/crates.io-index"
261 | checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
262 | 
263 | [[package]]
264 | name = "syn"
265 | version = "1.0.98"
266 | source = "registry+https://github.com/rust-lang/crates.io-index"
267 | checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd"
268 | dependencies = [
269 |  "proc-macro2",
270 |  "quote",
271 |  "unicode-ident",
272 | ]
273 | 
274 | [[package]]
275 | name = "tempfile"
276 | version = "3.3.0"
277 | source = "registry+https://github.com/rust-lang/crates.io-index"
278 | checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4"
279 | dependencies = [
280 |  "cfg-if",
281 |  "fastrand",
282 |  "libc",
283 |  "redox_syscall",
284 |  "remove_dir_all",
285 |  "winapi",
286 | ]
287 | 
288 | [[package]]
289 | name = "thiserror"
290 | version = "1.0.32"
291 | source = "registry+https://github.com/rust-lang/crates.io-index"
292 | checksum = "f5f6586b7f764adc0231f4c79be7b920e766bb2f3e51b3661cdb263828f19994"
293 | dependencies = [
294 |  "thiserror-impl",
295 | ]
296 | 
297 | [[package]]
298 | name = "thiserror-impl"
299 | version = "1.0.32"
300 | source = "registry+https://github.com/rust-lang/crates.io-index"
301 | checksum = "12bafc5b54507e0149cdf1b145a5d80ab80a90bcd9275df43d4fff68460f6c21"
302 | dependencies = [
303 |  "proc-macro2",
304 |  "quote",
305 |  "syn",
306 | ]
307 | 
308 | [[package]]
309 | name = "ucd-trie"
310 | version = "0.1.4"
311 | source = "registry+https://github.com/rust-lang/crates.io-index"
312 | checksum = "89570599c4fe5585de2b388aab47e99f7fa4e9238a1399f707a02e356058141c"
313 | 
314 | [[package]]
315 | name = "ullage"
316 | version = "0.1.0"
317 | dependencies = [
318 |  "docopt",
319 |  "indexmap",
320 |  "libc",
321 |  "llvm-sys 100.2.3",
322 |  "llvm-sys 130.0.4",
323 |  "llvm-sys 90.2.1",
324 |  "serde",
325 |  "tempfile",
326 | ]
327 | 
328 | [[package]]
329 | name = "unicode-ident"
330 | version = "1.0.1"
331 | source = "registry+https://github.com/rust-lang/crates.io-index"
332 | checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c"
333 | 
334 | [[package]]
335 | name = "winapi"
336 | version = "0.3.9"
337 | source = "registry+https://github.com/rust-lang/crates.io-index"
338 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
339 | dependencies = [
340 |  "winapi-i686-pc-windows-gnu",
341 |  "winapi-x86_64-pc-windows-gnu",
342 | ]
343 | 
344 | [[package]]
345 | name = "winapi-i686-pc-windows-gnu"
346 | version = "0.4.0"
347 | source = "registry+https://github.com/rust-lang/crates.io-index"
348 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
349 | 
350 | [[package]]
351 | name = "winapi-x86_64-pc-windows-gnu"
352 | version = "0.4.0"
353 | source = "registry+https://github.com/rust-lang/crates.io-index"
354 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
355 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "ullage"
 3 | version = "0.1.0"
 4 | authors = ["Will Speak <will@willspeak.me>"]
 5 | description = "The Ullage Programming Language"
 6 | license = "MIT"
 7 | # documentation = "no public docs yet"
 8 | repository = "https://github.com/iwillspeak/ullage"
 9 | readme = "README.md"
10 | keywords = [ "llvm", "parser", "compiler" ]
11 | categories = [ "parsing" ]
12 | edition = "2018"
13 | 
14 | [features]
15 | default = [ "llvm-13" ]
16 | 
17 | [dependencies]
18 | llvm-9 = { package = "llvm-sys", version = "90", optional = true }
19 | llvm-10 = { package = "llvm-sys", version = "100", optional = true }
20 | llvm-13 = { package = "llvm-sys", version = "130", optional = true }
21 | docopt = "1.1"
22 | serde = { version = "1.0", features = ["derive"] }
23 | tempfile = "3.1"
24 | libc = "0.2"
25 | indexmap = "1.5"
26 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # Ullage is Open Source!
 2 | 
 3 | Ullage is copyright 2016 Will Speak and licensed under MIT terms.
 4 | 
 5 | ## The MIT License (MIT)
 6 | Copyright (c) 2016 Will Speak
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Ullage
 2 | 
 3 | [![Build Status][build_status_image]][build_status]
 4 | 
 5 | A statically-typed compiled language defined by a simple grammar.
 6 | 
 7 | ## Current Status
 8 | 
 9 | It is now possible to write simple programs. The following program
10 | computes 9 factorial:
11 | 
12 |     fn fact(n: Number): Number
13 |         var acc = 1
14 |         var i = 1
15 |         while i < n
16 |             acc = acc * i
17 |             i = i + 1
18 |         end
19 |         acc
20 |     end
21 |     
22 |     print fact(9) # => 362880
23 | 
24 | You can also mess around with constant strings:
25 | 
26 |     # FizzBuzz
27 |     #
28 |     # Implementation of the legendary `FizzBuzz` algorithm.
29 |     fn fizzbuzz(n: Number): String
30 |         (print 'fizzbuzz') if mod(n, 15) == 0 else
31 |         (print 'fizz') if mod(n, 3) == 0 else
32 |         (print 'buzz') if mod(n, 5) == 0 else
33 |         print_num(n)
34 |     end
35 | 
36 | ## Building and Testing
37 | 
38 | The main build is performed by `cargo`. For running the functional
39 | tests and benchmarks you'll need Python and to `cargo install just`. The suggested process is to
40 | use the `build.sh` script:
41 | 
42 |  * `$ ./build.sh` will build the compiler `target/release/ullage`.
43 |  * `$ ./build.sh test` will build the compiler and run the test suite
44 |    from `specs/`.
45 |  * `$ ./build.sh bench` will run the benchmarks from `spec/bench/`.
46 | 
47 | ## License
48 | 
49 | Ullage is open source, under the [MIT License](LICENSE.md).
50 | 
51 | ## Features and Progress
52 | 
53 |  * [ ] Custom data structures
54 |  * [ ] Pattern matching
55 |  * [ ] First-class functions
56 | 
57 | ### Lexer
58 | 
59 |  * [x] Recognise words, numbers, comments, operators and white-space
60 |  * [x] Position information on each token
61 |  * [ ] Interpolated strings
62 |  * [x] Expose whitespace to the parser
63 | 
64 | ### Parser
65 | 
66 |  * [x] Parse base constructs
67 |  * [ ] For loops and iterators
68 |  * [ ] Traditional `if` blocks
69 |  * [x] Keep track of _all_ underlying tokens
70 |  * [x] Expose position & span information on syntax nodes
71 |  * [ ] Round-trippable/pretty-printable trees
72 | 
73 | ### Code Generation / Lowering
74 | 
75 |  * [x] Create LLVM module and lower basic constructs
76 |  * [ ] Array indexing
77 |  * [x] Arbitrary types for local variables
78 |  * [ ] Heap allocated types
79 |     * [x] Lowering of `String` type
80 |     * [ ] User-defined types
81 |     * [ ] RC garbage collection (#26)
82 |  * [ ] Library output types (LLVM ir, LLVM bc, object, staticlib, dylib, exe)
83 |  * [x] Control of target machine & features
84 |  * [x] Optimisation
85 |  * [ ] Linker support:
86 |    * [x] `clang` - macOS linker default
87 |    * [ ] gold - GNU ld
88 |    * [ ] lld/llvm-link
89 |    * [ ] Microsoft LINK
90 | 
91 | 
92 |  [build_status_image]: https://dev.azure.com/iwillspeak/GitHub/_apis/build/status/iwillspeak.ullage?branchName=main
93 |  [build_status]: https://dev.azure.com/iwillspeak/GitHub/_build/latest?definitionId=2&branchName=main
94 | 


--------------------------------------------------------------------------------
/STRUCTURE.md:
--------------------------------------------------------------------------------
 1 | # Structure of the Repository
 2 | 
 3 | This is an outline of the planned structure of the repository. It details the general layout, the Rust module hierarchy,  and some of the significant classes.
 4 | 
 5 | ## Important Folders
 6 | 
 7 | The language parser and compiler are written in Rust. The source lives in the `src/` folder.
 8 | 
 9 | Functional tests for the language consist of a set of source files in `spec/`. These files contain specially formatted comments which are interpreted by the Python test runner `specs.py` when run.
10 | 
11 | Documentation, written in Markdown, is in the `docs/` folder. It can be compiled into a static site with [`docket`](http://willspeak.me/docket/).
12 | 
13 | ## Rust modules
14 | 
15 | There are four main rust modules in the project. Three of them (`syntax`, `sem` and `compile`) correspond to the main stages of the compilation pipeline. The final module contains abstractions around LLVM itself.
16 | 
17 | ### `syntax`
18 | 
19 | This module contains all of the logic required to parse source text into a syntax expression tree (AST).
20 | 
21 | * `Token` - Represents a single lexeme in the language.
22 | * `Expression` - represents a node in the syntax expression tree. Expressions are composed from one or more `Token`s
23 | * `TypeRef` - Represents a reference to a type. This could be a simple type like `Num`, or a more complex one like `[Num]`.
24 | 
25 | ## `sem`
26 | 
27 | This module is responsible for semantic analysis. It contains a more detailed semantic expression tree, a syntax expression tree visitor to transform a syntax tree into a semantic tree, and a set of transforms which can be applied to the semantic tree.
28 | 
29 | The semantic expression tree contains metadata about types and has each identifier resolved.
30 | 
31 | ## `compile`
32 | 
33 | This module is responsible for lowering and evaluating semantic expression trees.
34 | 
35 | * `Compiler` - An expression visitor which walks `sem::Expression`s and compiles them into native modules.
36 | * `LowerContext` - The context required when lowering a given expression tree to LLVM IR.
37 | 
38 | ## `low_loader`
39 | 
40 | This module contains high-level safe wrappers around the LLVM API. It's not intended to expose the whole API. Instead it provides just the modules and abstractions needed to make working with LLVM ergonomic. I'd like to get this to the point where it provides enough abstraction so that there's no LLVMisms in the interface. It would then be possible to replace the backend with something like [CraneLift](https://github.com/CraneStation/cranelift). Maybe even depending on the target we are compiling for (e.g. cranelif for WASM or JIT or whatever).
41 | 


--------------------------------------------------------------------------------
/azure-pipelines.yaml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |   containers:
 3 |   - container: buildbase
 4 |     image: 'iwillspeak/ullage-linux-build-base'
 5 |     options: '-m 4G'
 6 | 
 7 | jobs:
 8 | - job: macOS
 9 |   pool:
10 |     vmImage: 'macOS-10.15'
11 |   strategy:
12 |     matrix:
13 |       LLVM9:
14 |         llvm_tag: 'llvm\@9'
15 |         features: 'llvm-9'
16 |         llvm_ver: '90'
17 |       LLVM10:
18 |         llvm_tag: 'llvm'
19 |         features: 'llvm-10'
20 |         llvm_ver: '100'
21 |   steps:
22 |     - script: 'brew install $(llvm_tag)'
23 |       displayName: Brew install LLVM
24 |     - script: "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs -o rustup.sh && sh rustup.sh -y"
25 |       displayName: Rustup
26 |     - script: |
27 |         export PATH=/usr/local/bin:$PATH             # Local bin (brew)
28 |         source ~/.cargo/env
29 |         export LLVM_SYS_$(llvm_ver)_PREFIX=/usr/local/opt/$(llvm_tag)
30 |         export LLVM_CONFIG_PATH=${LLVM_SYS_$(llvm_ver)_PREFIX}/bin/llvm-config
31 |         cargo install just
32 |         ./build.sh features=$(features) test
33 |       displayName: './build.sh test'
34 | - job: Linux
35 |   pool:
36 |     vmImage: 'ubuntu-16.04'
37 |   container: buildbase
38 |   steps:
39 |     - script: |
40 |         python3 --version
41 |         export PATH=${PATH}:~/.local/bin/
42 |       displayName: Python
43 |     - script: "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs -o rustup.sh && sh rustup.sh -y"
44 |       displayName: Rustup
45 |     - script: |
46 |         source ~/.cargo/env
47 |         export PATH=${PATH}:~/.local/bin/:/usr/lib/llvm-9/bin
48 |         export LLVM_SYS_90_PREFIX=/usr/lib/llvm-9
49 |         export LLVM_CONFIG_PATH=${LLVM_SYS_90_PREFIX}/bin/llvm-config
50 |         cargo install just
51 |         ./build.sh test
52 |       displayName: './build.sh test'
53 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env bash
 2 | 
 3 | set -e
 4 | 
 5 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 6 | 
 7 | function check_for()
 8 | {
 9 | 	command -v $1 >/dev/null 2>&1 || { echo >&2 "$1 is required but it's not installed.  Aborting."; exit 1; }
10 | }
11 | 
12 | check_for python3
13 | 
14 | echo "Clang version:"
15 | clang --version
16 | 
17 | echo "Rust version"
18 | rustc --version
19 | 
20 | just "$@"
21 | 


--------------------------------------------------------------------------------
/docs/00-getting-started.md:
--------------------------------------------------------------------------------
 1 | # Getting Started
 2 | 
 3 | This is only known to work on my machine at the moment. I'm running macOS and stable Rust If you'd still like to give it a go then make sure you have:
 4 | 
 5 |  * Rust - to compile the compiler
 6 |  * Just - to run help commands
 7 |  * Python - to run the functional tests
 8 |  * Clang - Used to link the output to create the final executables.
 9 | 
10 | The compiler is written in Rust and built with *Cargo*. Functional tests are defined by a collection of source in `spec/` and run by `specs.py`. It's easier to run the tests with a virtual environment. Rather than running `cargo` or python directly use the `build.sh` script. This will set up the required python environment as required.
11 | 
12 | ## Source
13 | 
14 | The source is all available [at GitHub](https://github.com/iwillspeak/ullage). Pull requests, comments and issues are welcome. Any thoughts I find interesting during development might be posted to [my blog](http://willspeak.me/).
15 | 
16 | ## Building & Running the Compiler
17 | 
18 | The compiler can be built with `./build.sh build --release` from the root directory. This should create a `target/release/ullage` executable. With this executable in your path you can compile an example program with `$ ullage -o hello specs/hello.ulg`. This will produce an executable called `hello` in the current directory based on the source file `specs/hello.ulg`. Running `$ ./hello` should then print 1337 to the terminal. For full usage instructions on the compiler run `$ ullage --help`.
19 | 


--------------------------------------------------------------------------------
/docs/10-syntax.md:
--------------------------------------------------------------------------------
  1 | # Syntax & Grammar
  2 | 
  3 | Ullage files are plain-old UTF-8. The language itself is built mainly around *words* rather than fancy sigils; more reminiscent of Ruby than C and friends.
  4 | 
  5 | [TOC]
  6 | 
  7 | ## Tokens
  8 | 
  9 | Source text is treated as one of four basic token types: *words*, *punctuation*, *literals* and *whitespace*. Words and whitespace are unicode-aware.
 10 | 
 11 | ### Words
 12 | 
 13 | Word tokens start with an alphabetic character or an underscore. They may then contain any number of alphanumeric or underscore characters.
 14 | 
 15 | Examples of words are: `foo`, `fn`, `_1` and `∂`. Some words have special meanings in the grammar:
 16 | 
 17 |     if unless else while until end fn var let print
 18 | 
 19 | ### Punctuation
 20 | 
 21 | Punctuation characters, such as `-` and `!=` are used to represent operators in the language. Currently a handful of punctuation characters are recognised: `=`, `==`, `!`, `!=`, `+`, `-`, `*`, `/`, `(`, `)`, `[`, `]`, `,`, `:`, `<`, and `>`. 
 22 | 
 23 | ### Literals
 24 | 
 25 | Literals are used to represent constant input values to the program. Literals can be used to specify values of any primitive type (`Number`, `Bool` and `String` so far). Numeric literals consist of one or more consecutive digits: `0`, `42`, `1337`. Although `-47` evaluates to a negative number the `-` isn't part of the literal; in fact it is an operator.
 26 | 
 27 | ### Whitespace
 28 | 
 29 | Whitespace tokens are made up of one or more *space* characters. These *space* characters are either Unicode whitespace, such as tabs & spaces, or comments. Comments are introduced with a `#` and continue to the end of the line.
 30 | 
 31 |     # This is a comment!
 32 | 
 33 | ## Types
 34 | 
 35 | There are three main base types: `Number`, `String` and `Bool`. These can be extended by creating arrays and tuples.
 36 | 
 37 | ### `Bool`
 38 | 
 39 | A boolean holds a single bit of information. Boolean values can be created wit the literals `true` and `false`; or as the result of a comparison (`foo == 100`).
 40 | 
 41 | ### `Number`
 42 | 
 43 | Number values hold whole numbers, or integers. Currently only decimal numeric literals are supported. All number values are stored in a 64 bit integer value.
 44 | 
 45 | ### `String`
 46 | 
 47 | String literals define a sequence of unicode code points. All strings in the language are UTF-8.
 48 | 
 49 |     'I am a string'
 50 | 
 51 | ### Arrays
 52 | 
 53 | An array type is declared by wrapping an existing type in `[]`. For example `[Number]` is the type for an array of `Number` values. To create a new array an array literal can be used:
 54 | 
 55 |     [1, 2, 3, 4]
 56 | 
 57 | All the values in an array must be of a single type.
 58 | 
 59 | ### Tuples
 60 | 
 61 | Tuples are similar to arrays but can contain values with different types. Tuples are defined by wrapping a comma-separated list of types in `()`. For example the type `(Number, Bool)` defines a two-element tuple with the first element a `Number` and the second a `Bool`. Instances of a tuple can be created by wrapping values in `()`:
 62 | 
 63 |     (100, false)
 64 | 
 65 | ## Variables
 66 | 
 67 | Variables are introduced with the `var` keyword followed by an identifier and an optional type. Immutable variables can be introduced with the `let` keyword.
 68 | 
 69 |     let foo = 100
 70 |     var bar: Number = 10
 71 | 
 72 | ## Operators
 73 | 
 74 | Ullage has both infix and prefix operators. Operators are grouped by precedence. Precedence can be overridden or enforced with parentheses.
 75 | 
 76 | The following infix operators exist, in precedence order from lowest to highest:
 77 | 
 78 |  * `==`, `!=`, `<`, `>` - Comparison operators
 79 |  * `+`, `-` - Addition and Subtraction
 80 |  * `*`, `/` - Multiplication and division
 81 | 
 82 | The following infix operators exist. All prefix operators bind directly to the next expression:
 83 | 
 84 |  * `-`, `+`, `!`
 85 | 
 86 | ## Compound Expressions
 87 | 
 88 | As well as just simple expressions Ullage supports compound ones too. There aren't that many at this time:
 89 | 
 90 | ### While loop
 91 | 
 92 | A basic while loop has the form:
 93 | 
 94 | ```
 95 | while foo
 96 |    do_stuff()
 97 |    ..
 98 | end
 99 | ```
100 | 
101 | This continues until the expression `foo` is false.
102 | 
103 | There is also an alternate form, the `until` loop which continues until the expression is true:
104 | 
105 | ```
106 | until bar
107 |    do_other_stuff()
108 | end
109 | ```
110 | 
111 | ### Print Expression
112 | 
113 | The `print` word acts as a prefix operator. It will print the expression immediately to the right of it to standard output and return the value.
114 | 
115 | ```
116 | print 'hello world'
117 | ```
118 | 
119 | ### Ternary Conditional Expression
120 | 
121 | The ternary expression in Ullage has two forms: `if` and `ulesss`.
122 | 
123 | ```
124 | foo if bar else baz
125 | ```
126 | 
127 | will evaluate the expression on the left hand side if the condition `bar` is true, or the right hand size `baz` if false.
128 | 
129 | The `unless` expression does the opposite:
130 | 
131 | ```
132 | biff unless bop else buzz
133 | ```
134 | 
135 | ### Function Expression
136 | 
137 | Finally an `fn` expression can be used to introduce a function:
138 | 
139 | ```
140 | fn mod(n: Number, d: Number): Number
141 |    n if n < d else mod(n - d, d)
142 | end
143 | ```
144 | 
145 | This defines a function named `mod` which takes two `Number` parameters, `n` and `d`, and returns a `Number`.
146 | 
147 | ### Index Expressions
148 | 
149 | An index expression uses `[]` to access elements from an array. Array indices start at `0` for the first element:
150 | 
151 | ```
152 | let fuzz = [1, 2, 3, 4]
153 | print fuzz[2] # => 3
154 | ```
155 | 
156 | ### Call Expression
157 | 
158 | Functions and function-like types can be invoked with a call expression. A call expression begins with a left-hand side which references an invokeable, such as a function's identifier. This is followed by the parameters to the function enclosed in parenthesis `()`:
159 | 
160 | ```
161 | fn add(n: Number, m: Number): Number
162 |     n + m
163 | end
164 | 
165 | print add(add(1, 2), 3) # => 6
166 | ```
167 | 
168 | ## Grammar
169 | 
170 | The following grammar defines the language:
171 | 
172 | 
173 |     identifier = WORD
174 |                ;
175 |     
176 |     expression = nud [led]
177 |                ;
178 |     
179 |     expressions = expression*
180 |                 ;
181 |     
182 |     type_ref = ":" ty
183 |              ;
184 |     
185 |     ty = WORD
186 |         | "[" ty "]"
187 |         | "(" [(ty (, ty )*] ")"
188 |         ;
189 |     
190 |     optional_type_ref = [type_ref]
191 |                       ;
192 |     
193 |     typed_id = identifier optional_type_ref
194 |              ;
195 |     
196 |     declaration = identifier optional_type_ref "=" expression
197 |                  ;
198 |     
199 |     block = expression* "end"
200 |            ;
201 |     
202 |     ternary_body = expression "else" expression
203 |                  ;
204 |     
205 |     led = "==" expression
206 |         | "!=" expression
207 |         | "<" expression
208 |         | ">" expression
209 |         | "+" expression
210 |         | "-" expression
211 |         | "*" expression
212 |         | "/" expression
213 |         | "[" expression "]"
214 |         | "(" [expression ("," expression)*] ")"
215 |         | "if" ternary_body
216 |         | "unless" ternary_body
217 |         ;
218 |     
219 |     nud = "fn" identifier "(" [typed_id (, typed_id)*] ")" type_ref block "end"
220 |         | ("until" | "while") expression block "end"
221 |         | "let" declaration
222 |         | "var" declaration
223 |         | "print" expression
224 |         | "true"
225 |         | "false"
226 |         | WORD
227 |         | LITERAL
228 |         | "+" expression
229 |         | "-" expression
230 |         | "!" expression
231 |         | "(" expression ")"
232 |         ;
233 | 
234 |     
235 | 


--------------------------------------------------------------------------------
/docs/12-parsing.md:
--------------------------------------------------------------------------------
 1 | # Parsing and Syntax Trees
 2 | 
 3 | The parser takes an input `SourceText` and produces a full-fidelity
 4 | `SyntaxTree`. In theory each character in the source exists as a
 5 | `Token` or `TriviaToken` within this tree. This initial tree is
 6 | intended to provide as rich a possible model of the underlying source
 7 | text to allow for syntax transformations in the future. It is later
 8 | transformed into an abstract representation of the semantics of the
 9 | code by the `sem` module.
10 | 
11 | [TOC]
12 | 
13 | ## Structure
14 | 
15 | Source is represented by a `SourceText` type. This exposes characters
16 | as `&str` slices or via the `walk_chars` method for tokenisation. The
17 | source text also contains line information and has the ability to take
18 | a position and convert it into a line, column pair.
19 | 
20 | Parsing creates a lexer which implements token iteration for a given
21 | `SourceText`. Each token has a `Span` and `TokenKind`. Consumption of
22 | tokens from the iterator by `Parser::expect` stubs out missing tokens
23 | as well as recording errors in a collection of `Diagnostic`s.
24 | 
25 | Rather than returning a `Result` type from the parser instead a valid
26 | `SyntaxTree` is always be returned. It is the client's responsibility
27 | to inspect the tree for a given parse for diagnostics and act
28 | accordingly. This allows clients to parse malformed source text and
29 | should allow the compiler to produce more error information in each
30 | pass.
31 | 
32 | ### Structure
33 | 
34 | The structure looks something like this:
35 | 
36 |  * `syntax/parse/` - Parser and tokenisation logic
37 |  * `syntax/tree/` - Tree node types.
38 |  * `syntax/text/` - Source buffer abstraction.
39 |  * `diag.rs` - Diagnostic implementation.
40 | 
41 | ### Diagnostics
42 | 
43 | At the moment diagnostics are just string values and position
44 | information. More metadata can be added later. It might be useful to
45 | add 'level' information to each diagnostic to allow for warnings.
46 | 
47 | It would be nice to have some kind of diagnostic pretty printing
48 | rather than relying on the `main` method to walk the diagnostics and
49 | write them to stderr manually.
50 | 


--------------------------------------------------------------------------------
/docs/15-data-layout.md:
--------------------------------------------------------------------------------
 1 | # Data Layout
 2 | 
 3 | For the initial version of the language we just need `Bool`, `Number`
 4 | and `String` to have defined layout. It is probably worth thinking
 5 | about the future structure of arrays, tuples, and structs though.
 6 | 
 7 | ## Value and Reference Semantics
 8 | 
 9 | The plan is that `Bool`, `Number`, and `String` will all have value
10 | semantics. That is a modification of a `String` value in one place
11 | will not affect its appearence in another. Such behaviour is referred
12 | to as "value semantics". This is similar to `Copy` types in rust and
13 | `struct` types in C#. I feel that tuple types should also have value
14 | semantics. Similar to `ValueTuple` in C#.
15 | 
16 | Array and structure types instead will have reference semantics. This
17 | means that passing a structure to a `fn` will allow the function to
18 | modify the structure value. This is similar to reference types in C#
19 | and `&mut` reference in Rust.
20 | 
21 | ## Type Layouts
22 | 
23 | For the primitive types we have the following type layouts from
24 | language type to LLVM type:
25 | 
26 |  * `Bool` -> `i1`
27 |  * `Number` -> `i64`
28 | 
29 | String types are represented as a pair of length, data:
30 | 
31 |  * `String` -> `<{u32,[0 x u8]}>*`
32 | 
33 | The value of the string is encoded directly as part of the
34 | pair. Allocation of a string uses a variable length array to contain a
35 | sequence of utf-8 characters. There are a few problems with this:
36 | 
37 |  * The expectation is that strings are rarely modified and we could
38 |    probably share a single buffer between string instances and use
39 |    reference counting to control mutable access.
40 |  * This needs some knowledge of when a value is 'dropped' to free the
41 |    correct amount of memory.
42 |  
43 |  Given these concerns we could lay a string out as:
44 |  
45 |   * `String` -> `<{u32, u32, [0 x u8]}>*`
46 | 
47 | In this representation each string has a pointer to a reference
48 | counted backing buffer. This should reduce copy-size of each string
49 | and means that a string reference would again have a single easily
50 | known size. We still need to know when the reference should be
51 | deallocated however.
52 | 
53 | ## Garbage Collection
54 | 
55 | Rather than aiming to control access to data as Rust does the language
56 | should provide a garbage collection mechanism to clean up data once no
57 | one references it. There are a few alternatives for this:
58 | 
59 |  * Don't deallocate - Probably useful to get us off the ground
60 |  * Reference counting. E.g. Swift's ARC & Python.
61 |  * Simple mark & sweep GC.
62 |  
63 | For a full mark and sweep or other collector the code generated needs
64 | to insert GC statepoints. For this reason i'm tempted to head towards
65 | the second option. I place of statepoints we will need to decide in
66 | the lower pass where to insert RC retain and release code to maintain
67 | the count. Could this work the same way as Rust's ARC model? In that
68 | case the code just needs to know a fixed point in the scope where each
69 | value is deallocated.
70 | 


--------------------------------------------------------------------------------
/docs/90-syntax-transforms.md:
--------------------------------------------------------------------------------
 1 | # Syntax Transformations
 2 | 
 3 | Currently we take the AST and transform it into a semantic tree. To
 4 | better support IDE-like features we will need to support more
 5 | variations in transformation.
 6 | 
 7 | With input trees immutable we could have a syntax transformer which
 8 | uses the visitor pattern. Would then have a `visit_` for each kind of
 9 | `Expression` and have them produce a new expression. New expressions
10 | would have synthesised `Span` information.
11 | 
12 | Initial optimisations/transformations would be a desugaring pass over
13 | this AST before lowering to the `sem` tree. Maybe even want to do the
14 | type check pass at this level and pass a fully-formed type checked
15 | tree to `sem`. If we do add a desugaring pass then the output of that
16 | would be nice to have as an `--emit` value.
17 | 
18 | This would be useful to observe the desugaring of things like `for`
19 | into `while`.
20 | 
21 | Further use of these would be to implement a language server in the
22 | future and some kind of code formatter.
23 | 
24 | ## Round-tripping POC
25 | 
26 | Given initial support for syntax transformations we could have a
27 | visitor for the tree which writes the tree to the standard output to
28 | prove out the round-tripping. Add a new set of tests which just
29 | round-trips all of the code from the specs.
30 | 
31 | API surface could be similar to Minsk's `Compilation::EmitTree`. I'm
32 | thinking we come up with a trait something like `TreeSink` and
33 | implement it for `IO::Write` or similar. Can then have a similar
34 | `emit_tree` which walks the tree and pushes it to the writer.
35 | 
36 | I'm thinking we might also want to have this tree emission part of a
37 | higher level `--emit=` flag on the compiler front end. Other values
38 | for now would be to emit the `Debug` version of the parsed tree and to
39 | emit the compilation result as IR, an object file or a linked
40 | executable.
41 | 
42 | ## Interaction with `dumpast` and `dumptree`
43 | 
44 | The current flags for dumping compilation state may be best grouped
45 | under a single `--dump=<type>` flag. This could allow for different
46 | outputs:
47 | 
48 |  * Plain round-tripped source
49 |  * Formatted source
50 |  * Syntax tree structure
51 |  * Desugared source
52 |  * HTML syntax tree
53 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | A statically-typed compiled language defined by a simple grammar. The
2 | parser started as an experiment in top-down operator precedence
3 | parsing in Rust. It has since started developing into a small
4 | language.
5 | 


--------------------------------------------------------------------------------
/justfile:
--------------------------------------------------------------------------------
 1 | features := "llvm-13"
 2 | 
 3 | build:
 4 |     cargo build --release --no-default-features --features={{features}}
 5 | 
 6 | test: build
 7 |     cargo test --no-default-features --features={{features}}
 8 |     python3 specs.py
 9 | 
10 | clean:
11 |     rm -f a.out
12 |     rm -rf specbin/
13 |     cargo clean
14 | 
15 | docs:
16 |     cd docs/; docket
17 | 
18 | clippy:
19 |     cargo clippy
20 | 
21 | bench opt_level="3": build
22 |     #!/usr/bin/env python3
23 |     import os
24 |     import glob
25 |     import subprocess
26 |     
27 |     for bench in glob.glob("spec/bench/*.ulg"):
28 |         output = bench.lstrip('spec/').rstrip('.ulg')
29 |         output = os.path.join("specbin", "bench", output)
30 |         try:
31 |             os.makedirs(os.path.dirname(output))
32 |         except OSError:
33 |             pass
34 |         print("bench={0}, output={1}, opt={2}".format(bench, output, {{opt_level}}))
35 |         subprocess.call(["target/release/ullage", bench, "-O{{opt_level}}", "-o", output])
36 |         subprocess.call(["time", output])


--------------------------------------------------------------------------------
/spec/README.md:
--------------------------------------------------------------------------------
 1 | # Spec Tests
 2 | 
 3 | This directory and it's subdirectories contain a set of specifications
 4 | for the language structured as a selection of source files. Each
 5 | source file contains comments which describe assertions about the
 6 | compilation.
 7 | 
 8 | ## Assertions
 9 | 
10 | Assertions are encoded in comments. Comments start with a given sigil
11 | sequence and the rest of the line is the parameter to the assertion.
12 | 
13 |  * ` # => <output>` - Verify output. All standard output from the
14 |    program must be matched by one of these assertions.
15 |  * ` # !> <error>` - Verify error. Checks that the compilation fails
16 |    and that the given error is printed to stderr.
17 |  * ` # !!skip` - Skips running the output. Just checks that the code
18 |    is parsed and compiles.
19 | 
20 | ## Structure
21 | 
22 | Tests can be arranged into subdirectories to group similar
23 | tests. Subdirectories of interest are:
24 | 
25 |  * `bench/` - Code benchmarks.
26 |  * `malformed/` - Broken input that shouldn't parse correctly.
27 |  * `fail/` - Syntactically valid code that is semantically invalid and
28 |    should fail compilation.
29 | 


--------------------------------------------------------------------------------
/spec/assignment.ulg:
--------------------------------------------------------------------------------
1 | var foo = 100
2 | print foo # => 100
3 | print foo = foo / 2 # => 50
4 | print foo # => 50


--------------------------------------------------------------------------------
/spec/basics.ulg:
--------------------------------------------------------------------------------
1 | print let foo = 100 #=> 100
2 | print let bar = 192 #=> 192
3 | print let baz = bar * (100 / foo) #=> 192
4 | print 1 if baz != 0 else -1 #=> 1
5 | print let 中 = 99 - (10 / bar) #=> 99
6 | fn foo(): Number
7 |    let fiz = 100
8 | end
9 | print foo() # => 100


--------------------------------------------------------------------------------
/spec/bench/README.md:
--------------------------------------------------------------------------------
 1 | # Benchmarking
 2 | 
 3 | This folder contains files which can be used to benchmark the
 4 | performance of the generated code. Each file should have a `#!!skip`
 5 | attribute so that they aren't run when the spec tests are executed.
 6 | 
 7 | The benchkarks can be run with `./build.sh bench` from the project
 8 | root.
 9 | 
10 | ## `fib.ulg`
11 | 
12 | This benchmark performs the same Fibonacci computation from
13 | <https://github.com/drujensen/fib> to allow some form of comparision
14 | with popular lanugages.
15 | 


--------------------------------------------------------------------------------
/spec/bench/fib.ulg:
--------------------------------------------------------------------------------
1 | #!!skip
2 | 
3 | fn fib(n: Number): Number
4 |    1 if n <= 1 else fib(n - 1) + fib(n - 2)
5 | end
6 | 
7 | print fib(46) # => 2971215073


--------------------------------------------------------------------------------
/spec/bool.ulg:
--------------------------------------------------------------------------------
1 | var foo: Bool = false
2 | 
3 | print foo # => false
4 | 
5 | foo = !foo
6 | 
7 | print foo # => true


--------------------------------------------------------------------------------
/spec/collatz.ulg:
--------------------------------------------------------------------------------
 1 | # Modululs remainder
 2 | #
 3 | # Returns the modulus remainder of n/d
 4 | fn mod(n: Number, d: Number): Number
 5 |    n if n < d else mod(n - d, d)
 6 | end
 7 | 
 8 | # Internal Collatz implementation
 9 | fn collatz_internal(x: Number, steps: Number): Number
10 | 	steps
11 | 	if x == 1
12 | 	else collatz_internal(x / 2 if mod(x,2) == 0 else (3 * x) + 1, steps + 1)
13 | end
14 | 
15 | # Find how long a given number takes to converge on 1
16 | fn collatz(x: Number): Number
17 |    collatz_internal(x, 0)
18 | end
19 | 
20 | print collatz(17) # => 12
21 | print collatz(1)  # => 0
22 | print collatz(9)  # => 19


--------------------------------------------------------------------------------
/spec/compare.ulg:
--------------------------------------------------------------------------------
 1 | # Minimum
 2 | #
 3 | # Returns the smallest of the two arguments.
 4 | fn min(a: Number, b: Number): Number
 5 |    a if a < b else b
 6 | end
 7 | 
 8 | # Maximum
 9 | #
10 | # Returns the largest of the two arguments
11 | fn max(a: Number, b: Number): Number
12 |    a if a > b else b
13 | end
14 | 
15 | print min(1, 100) # => 1
16 | print min(100, 1) # => 1
17 | print min(50, 50) # => 50
18 | print min(7, min(20, 3)) # => 3
19 | 
20 | print max(1, 2) # => 2
21 | print max(2, 100) # => 100
22 | print max(2 * 2, 4 * 4) # => 16
23 | print max(max(1, 4), 19) # => 19
24 | 
25 | print min(max(min(1, 100), 23), -1) # => -1


--------------------------------------------------------------------------------
/spec/euler1.ulg:
--------------------------------------------------------------------------------
 1 | # Sum all the multiples of 3 or 5 below 1000
 2 | 
 3 | fn mod(n: Number, d: Number): Number
 4 |     while n >= d
 5 |    	    n = n - d
 6 |     end
 7 | 	n
 8 | end
 9 | 
10 | fn is_multiple(n: Number, d: Number): Bool
11 |     mod(n, d) == 0
12 | end
13 | 
14 | fn sum_to(n: Number): Number
15 |    var i = 0
16 |    var s = 0
17 |    while i < n
18 |      s = ((s + i) if is_multiple(i, 3) else
19 | 	 	  ((s + i) if is_multiple(i, 5) else s))
20 |      i = i + 1
21 |    end
22 |    s
23 | end
24 | 
25 | print is_multiple(3, 3) #=> true
26 | print is_multiple(9, 3) #=> true
27 | print is_multiple(8, 3) #=> false
28 | 
29 | print sum_to(10) #=> 23
30 | print sum_to(1000) # => 233168
31 | 


--------------------------------------------------------------------------------
/spec/evens.ulg:
--------------------------------------------------------------------------------
 1 | fn is_even(n: Number): Number
 2 |    1 if n == 0 else is_odd(n - 1)
 3 | end
 4 | 
 5 | fn is_odd(n: Number): Number
 6 |    0 if n == 0 else is_even(n - 1)
 7 | end
 8 | 
 9 | print is_even(0) == 1 # => true
10 | print is_odd(1)  == 1 # => true
11 | print is_odd(8)  == 1 # => false
12 | print is_odd(21) == 1 # => true
13 | print is_even(5) == 1 # => false


--------------------------------------------------------------------------------
/spec/factorial.ulg:
--------------------------------------------------------------------------------
 1 | # the number we want to compute the factorial of
 2 | var n = 5
 3 | var acc = 1
 4 | 
 5 | while n > 0
 6 |   acc = acc * n
 7 |   n = n - 1
 8 | end
 9 | 
10 | print acc # => 120
11 | 


--------------------------------------------------------------------------------
/spec/fail/README.md:
--------------------------------------------------------------------------------
1 | # Compilation Failure Tests
2 | 
3 | This directory contains examples of syntactically valid but
4 | semantically invalid code. They should parse OK, but will fail
5 | compilation.
6 | 


--------------------------------------------------------------------------------
/spec/fail/assign_to_immutable.ulg:
--------------------------------------------------------------------------------
1 | var a = 100
2 | let b = a
3 | a = 100
4 | b = 100 # !> Can't assign to 'b'


--------------------------------------------------------------------------------
/spec/fail/assign_to_undefined.ulg:
--------------------------------------------------------------------------------
1 | foo = 123 # !> Can't assign to 'foo'


--------------------------------------------------------------------------------
/spec/fail/badcalls.ulg:
--------------------------------------------------------------------------------
 1 | # Test function to emit invalid calls to
 2 | fn foo(a: Number, b: String): Number
 3 |    a
 4 | end
 5 | 
 6 | foo() # !> Too few arguments to call
 7 | foo(100) # !> Too few arguments to call
 8 | foo(100, '', 100) # !> Too many arguments to call
 9 | foo(false, '', 100) # !> Too many arguments to call
10 | foo(100, '', '', false) # !> Too many arguments to call
11 | 
12 | foo(100, 100) # !> 12:9:error: Invalid argument. Expected 'String' but found 'Number'
13 | 
14 | # !> 16:4:error: Invalid argument. Expected 'Number' but found 'String'
15 | # !> 16:13:error: Invalid argument. Expected 'String' but found 'Bool'
16 | foo('hello', false)
17 | 
18 | let bar = 100
19 | 
20 | # !> 21:0:error: Called item is not a function
21 | bar()
22 | 
23 | # !> 24:0:error: Called item is not a function
24 | false()


--------------------------------------------------------------------------------
/spec/fail/badvars.ulg:
--------------------------------------------------------------------------------
1 | # !> 2:4:error: Initialiser doesn't match declaration type for 'foo'
2 | let foo: String = 100
3 | 
4 | let baz = foo
5 | 
6 | let buz: Number = baz # !> 6:4:error: Initialiser doesn't match declaration type for 'buz'


--------------------------------------------------------------------------------
/spec/fail/dupe_fn_param.ulg:
--------------------------------------------------------------------------------
1 | # !> 2:27:error: Duplicate function parameter 'a'
2 | fn foo(a: Number, b: Bool, a: String): Number
3 |    100
4 | end


--------------------------------------------------------------------------------
/spec/fail/if_else_badtypes.ulg:
--------------------------------------------------------------------------------
1 | # !> 2:13:error: Condition expression should be 'Bool' but is 'Number'
2 | let a = 1 if 2 else 3
3 | 
4 | # !> 5:8:error: If and else have mismatched types
5 | let b = true if 1 != 0 else ''


--------------------------------------------------------------------------------
/spec/fail/mismatched_assign.ulg:
--------------------------------------------------------------------------------
1 | let foo = 100
2 | foo = 101 # !> Can't assign to 'foo', it isn't mutable
3 | 
4 | var bar = 'a string'
5 | bar = 4356 # !> Type mismatch in assignment to 'bar'
6 | 


--------------------------------------------------------------------------------
/spec/fail/undefined.ulg:
--------------------------------------------------------------------------------
1 | im_not_defined_yet # !> Can't find 'im_not_defined_yet' in this scope


--------------------------------------------------------------------------------
/spec/fail/undefined_func.ulg:
--------------------------------------------------------------------------------
1 | fn foo(): Number
2 |    compute_number() # !> Can't find 'compute_number' in this scope
3 | end


--------------------------------------------------------------------------------
/spec/fail/wrong_type_var.ulg:
--------------------------------------------------------------------------------
1 | let nope: String = 100 # !> Initialiser doesn't match declaration type for 'nope'
2 | 


--------------------------------------------------------------------------------
/spec/fib.ulg:
--------------------------------------------------------------------------------
 1 | # Fibonacci Number
 2 | #
 3 | # Computes the `n`th iteration of the Fibonacci sequence.
 4 | fn fib(n: Number): Number
 5 |   var iters = 0
 6 |   var low = 0
 7 |   var high = 1
 8 |   while iters < n
 9 | 	iters = iters + 1
10 |     let old_low = low
11 | 	low = high
12 | 	high = low + old_low
13 |   end
14 |   low
15 | end
16 | 
17 | print fib(10) # => 55
18 | print fib(0) # => 0
19 | print fib(1) # => 1
20 | print fib(11) # => 89


--------------------------------------------------------------------------------
/spec/fizzbuzz.ulg:
--------------------------------------------------------------------------------
 1 | var i = 1
 2 | while i < 20
 3 |     fizzbuzz(i) # => 1
 4 |                 # => 2
 5 |                 # => fizz
 6 |                 # => 4
 7 |                 # => buzz
 8 |                 # => fizz
 9 |                 # => 7
10 |                 # => 8
11 |                 # => fizz
12 |                 # => buzz
13 |                 # => 11
14 |                 # => fizz
15 |                 # => 13
16 |                 # => 14
17 |                 # => fizzbuzz
18 | 				# => 16
19 | 				# => 17
20 | 				# => fizz
21 | 				# => 19
22 |     i = i + 1
23 | end
24 | 
25 | # Modulus Remainder
26 | #
27 | # Returns the remainder after modulus division
28 | fn mod(n: Number, divisor: Number): Number
29 |    var ret = n
30 |    until ret < divisor
31 |        ret = ret - divisor
32 |    end
33 |    ret
34 | end
35 | 
36 | # Printing Fudge
37 | #
38 | # Prints the number, but coerces the type to String so we can use it
39 | # in the same ternary expression as string print expressions.
40 | fn fudge(n: Number): String
41 |    print n
42 |    ''
43 | end
44 | 
45 | # FizzBuzz
46 | #
47 | # Implementation of the legendary `FizzBuzz` algorithm.
48 | fn fizzbuzz(n: Number): String
49 |    (print 'fizzbuzz') if mod(n, 15) == 0 else
50 |    (print 'fizz') if mod(n, 3) == 0 else
51 |    (print 'buzz') if mod(n, 5) == 0 else
52 |    fudge(n)
53 | end


--------------------------------------------------------------------------------
/spec/funcfact.ulg:
--------------------------------------------------------------------------------
 1 | fn fact(n: Number): Number
 2 |     var acc = 1
 3 |     var i = n
 4 |     while i > 0
 5 |         acc = acc * i
 6 |         i = i - 1
 7 |     end
 8 |     acc
 9 | end
10 | 
11 | print fact(0) == fact(1) # => true
12 | print fact(5) # => 120
13 | print fact(9) # => 362880


--------------------------------------------------------------------------------
/spec/functions.ulg:
--------------------------------------------------------------------------------
 1 | fn test(): Number
 2 |    1000
 3 | end
 4 | 
 5 | print test() #=> 1000
 6 | 
 7 | fn printer(n: Number): Number
 8 |   print n
 9 | end
10 | 
11 | printer(1000) # => 1000
12 | printer(98 / 2) # => 49
13 | 
14 | fn add(a: Number, b: Number): Number
15 |    a + b
16 | end
17 | 
18 | print add(1002, -1000) # => 2
19 | print add(99, 1) # => 100


--------------------------------------------------------------------------------
/spec/gcd.ulg:
--------------------------------------------------------------------------------
 1 | ## Greatest Common Divisor
 2 | #
 3 | # Finds the largest divisor of `a` and `b`
 4 | fn gcd(a: Number, b: Number): Number
 5 |    a if b == 0 else gcd(b, mod(a, b))
 6 | end
 7 | 
 8 | ## Modulus Remainder
 9 | #
10 | # Returns the modulus remainder after division. Iteratively compues
11 | # the result for efficiency.
12 | fn mod(n: Number, m: Number): Number
13 |     var ret = n
14 |     until ret < m
15 |         ret = ret - m
16 |     end
17 | 	ret
18 | end
19 | 
20 | print gcd(8, 12) # => 4
21 | 
22 | print gcd(42, 56) # => 14
23 | print gcd(461952, 116298) # => 18
24 | print gcd(7966496, 314080416) # => 32
25 | print gcd(24826148, 45296490) # => 526
26 | print gcd(12, 0) # => 12
27 | print gcd(0, 0) # => 0
28 | print gcd(0, 9) # => 9


--------------------------------------------------------------------------------
/spec/hello.ulg:
--------------------------------------------------------------------------------
1 | print 1337 # => 1337
2 | 


--------------------------------------------------------------------------------
/spec/hello_world.ulg:
--------------------------------------------------------------------------------
1 | print 'Hello World!' # => Hello World!
2 | 


--------------------------------------------------------------------------------
/spec/identity.ulg:
--------------------------------------------------------------------------------
 1 | 
 2 | print 0 # => 0
 3 | print 0 == 1 # => false
 4 | print (0 + 1) == 1 # => true
 5 | 
 6 | var n: Number = ident(0)
 7 | let m = ident(0 + 1)
 8 | 
 9 | print n == 1 # => false
10 | print m == 1 # => true
11 | 
12 | print ident(0) == 1 # => false
13 | print ident(0 + 1) == 1 # => true
14 | 
15 | # identity function
16 | fn ident(n: Number): Number
17 |    n
18 | end
19 | 


--------------------------------------------------------------------------------
/spec/if.ulg:
--------------------------------------------------------------------------------
 1 | print 1111 if 100 == 100 else 0000 # => 1111
 2 | print 1234 if 567 == 890 else -900 # => -900
 3 | print 90-1 if 4/2 == 1+1 else 22-9 # => 89
 4 | 
 5 | print 1 unless 2 == 3 else 4 # => 1
 6 | print 0 unless 0 == 0 else 1 # => 1
 7 | 
 8 | print 1 if 2 != 3 else 4 # => 1
 9 | print 0 if 1 < 2 else 3 # => 0
10 | print 0 if 1 > 2 else 3 # => 3
11 | 
12 | print 1 if !1 == 3 else 3 # => 3
13 | print 1 if !(1 == 3) else 3 # => 1
14 | 
15 | print 1 if true else 2 # => 1
16 | print 1 if false else 2 # => 2


--------------------------------------------------------------------------------
/spec/literals.ulg:
--------------------------------------------------------------------------------
1 | print 100 #=> 100
2 | 1772773
3 | print 10000 #=> 10000
4 | 123123123


--------------------------------------------------------------------------------
/spec/long_string.ulg:
--------------------------------------------------------------------------------
1 | var s = 'test'
2 | var i = 100
3 | while i > 0
4 | 	  s = '[' + s + ']'
5 | 	  i = i - 1
6 | end
7 | 
8 | print s # => [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[test]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]


--------------------------------------------------------------------------------
/spec/malformed/README.md:
--------------------------------------------------------------------------------
1 | # Malformed Inputs
2 | 
3 | This directory contains a set of malformed source code to test the compiler's error messages. These tests aren't exhaustive; corner cases in tokenisation and parsing should be tested more in the checklex and checkparse unit tests.
4 | 


--------------------------------------------------------------------------------
/spec/malformed/half_ternary.ulg:
--------------------------------------------------------------------------------
1 | 0 if # !> 3:0:error: Expected expression but found end of file
2 | # !> error: expecting: keyword, found: end of file
3 | 


--------------------------------------------------------------------------------
/spec/malformed/invalid_calls.ulg:
--------------------------------------------------------------------------------
 1 | fn foo(n: Number, b: Bool): Number
 2 |    n if b else 2
 3 | end
 4 | 
 5 | # !> 6:4:error: unexpected token: expected expression but found ','
 6 | foo(,)
 7 | 
 8 | # it's OK to have a trailing `,`
 9 | foo(100, false,)
10 | 
11 | # don't go nuts though...
12 | # !> 13:15:error: unexpected token: expected expression but found ','
13 | foo(969, true, ,)


--------------------------------------------------------------------------------
/spec/malformed/missing_ident.ulg:
--------------------------------------------------------------------------------
1 | # !> 2:3:error: expected identifier, found: '('
2 | fn (expr: Number): Number
3 |   # !> 4:6:error: unexpected token: expected expression but found ')'
4 |   100 )
5 | end
6 | 


--------------------------------------------------------------------------------
/spec/malformed/param_missing_type.ulg:
--------------------------------------------------------------------------------
 1 | # !> 2:7:error: Parameter 'missing_type' missing type
 2 | fn foo(missing_type): Number
 3 |  1000
 4 | end
 5 | 
 6 | # !> 7:12:error: Reference to undefined type
 7 | fn bar(bad: Bad): Number
 8 |  1000
 9 | end
10 | 


--------------------------------------------------------------------------------
/spec/malformed/unexpected_plus.ulg:
--------------------------------------------------------------------------------
1 | 
2 | (0+)
3 | # !> unexpected token: expected expression but found ')'
4 | 


--------------------------------------------------------------------------------
/spec/malformed/unterminated_bracket.ulg:
--------------------------------------------------------------------------------
1 | (
2 | # !> error: could not parse source: one or more errors:
3 | # !> 5:0:error: Expected expression but found end of file
4 | # !> error: expecting: ')', found: end of file
5 | 


--------------------------------------------------------------------------------
/spec/malformed/unterminated_fn.ulg:
--------------------------------------------------------------------------------
1 | fn foo(): Number
2 | # !> expecting: keyword, found: end of file
3 | 


--------------------------------------------------------------------------------
/spec/malformed/unterminated_string.ulg:
--------------------------------------------------------------------------------
1 | # !> could not parse source: one or more errors:
2 | # !> error: unrecognised character
3 | '
4 | 


--------------------------------------------------------------------------------
/spec/malformed/unterminated_string2.ulg:
--------------------------------------------------------------------------------
1 | # the whitespace in this ensure the junk token is shoud be attached to
2 | # the trailing trivia of the first number literal
3 | 
4 | # !> could not parse source: one or more errors:
5 | # !> error: unrecognised character
6 | 0'
7 | 0


--------------------------------------------------------------------------------
/spec/malformed/unterminated_string3.ulg:
--------------------------------------------------------------------------------
1 | # the whitespace in this ensure the junk token is shoud be attached to
2 | # the end of file token.
3 | 
4 | # !> could not parse source: one or more errors:
5 | # !> error: unrecognised character
6 | 0
7 | '
8 | 


--------------------------------------------------------------------------------
/spec/maybe_invert.ulg:
--------------------------------------------------------------------------------
1 | print maybe_invert(100, false) # => 100
2 | print maybe_invert(100, true) # => -100
3 | print maybe_invert(-83, true) # => 83
4 | 
5 | fn maybe_invert(invertee: Number, invert: Bool): Number
6 |    -invertee if invert else invertee
7 | end


--------------------------------------------------------------------------------
/spec/modulus.ulg:
--------------------------------------------------------------------------------
 1 | ## Modulus
 2 | #
 3 | # Returns the modulus remainder after division
 4 | fn mod(n: Number, d: Number): Number
 5 |    until n < d
 6 |        n = n - d
 7 |    end
 8 |    n
 9 | end
10 | 
11 | print mod(5, 3) # => 2


--------------------------------------------------------------------------------
/spec/operators.ulg:
--------------------------------------------------------------------------------
 1 | print 100 # => 100
 2 | print -50 # => -50
 3 | print 1-1  # => 0
 4 | print 1 -1 # => 0
 5 | print 1 - -1 # => 2
 6 | print 100 + -50 # => 50
 7 | print -50 + 100 + -50 # => 0
 8 | print 2 + 3 * 4  #=> 14
 9 | print 20 - 3 * 4 #=> 8
10 | print 2 + 6 / 3  #=> 4
11 | print 2 - 6 / 3  #=> 0
12 | print 5 < 6 # => true
13 | print 5 <= 5 # => true
14 | print 5 > 6 # => false
15 | print 5 >= 5 # => true
16 | print !!((-100 + + 100) == 0) # => true


--------------------------------------------------------------------------------
/spec/prefix.ulg:
--------------------------------------------------------------------------------
1 | print !false # => true
2 | print !true # => false
3 | print +100 # => 100
4 | print -100 # => -100
5 | print - - 100 # => 100
6 | print !!false # => false


--------------------------------------------------------------------------------
/spec/printing.ulg:
--------------------------------------------------------------------------------
 1 | # First we try to print a value out
 2 | print 1234 # => 1234
 3 | 
 4 | # then we check that printing a value returns the correct result
 5 | print print print 1969 # => 1969
 6 |                        # => 1969
 7 |                        # => 1969
 8 | 
 9 | # Printing of string literals
10 | print 'foo' # => foo
11 | 
12 | # Type info is tracked through variable assignments
13 | let bar = 'bar'
14 | print bar # => bar
15 | 
16 | # Type info is tracked through function calls
17 | fn print_str(s: String): String
18 |    print s
19 | end
20 | 
21 | print_str('hello') # => hello
22 | 
23 | ## Tests for printing out 'large' numbers. This makes sure we print
24 | ## out 64 bit values correctly
25 | print 2147483647 # => 2147483647
26 | print -2147483648 # => -2147483648
27 | print 2971215073 # => 2971215073
28 | print 9223372036854775807 #=> 9223372036854775807
29 | # Since negative integers aren't actually literals, just applications
30 | # of a prefix operator we can't actually _write_ the value in
31 | # full. Instead compute it.
32 | print -9223372036854775807 - 1 # => -9223372036854775808
33 | 


--------------------------------------------------------------------------------
/spec/scopes.ulg:
--------------------------------------------------------------------------------
1 | var a = 0
2 | print a # => 0
3 | var a = 1
4 | print a # => 1
5 | let a = a * 2
6 | print a # => 2
7 | 


--------------------------------------------------------------------------------
/spec/string_concat.ulg:
--------------------------------------------------------------------------------
1 | print 'hello' # => hello
2 | print 'world' # => world
3 | print 'hello' + ' world' # => hello world
4 | 
5 | let space: String = ' '
6 | let foobar: String = 'foo' + ' ' + 'bar'
7 | 
8 | print foobar # => foo bar


--------------------------------------------------------------------------------
/spec/types.ulg:
--------------------------------------------------------------------------------
1 | # shows that types are hanlded differently by print
2 | print 123 # => 123
3 | print true # => true


--------------------------------------------------------------------------------
/spec/unimplemneted/README.md:
--------------------------------------------------------------------------------
1 | # Unimplemented Features
2 | 
3 | This directory contains specs which demonstrate as-yet unimplemented language features. The idea is to check that some kind of expected error message is printed if an attempt is made to use the feature.
4 | 


--------------------------------------------------------------------------------
/spec/unimplemneted/array_types.ulg:
--------------------------------------------------------------------------------
1 | # !> array types are not yet supported
2 | var foo: [Number] = 0
3 | 


--------------------------------------------------------------------------------
/spec/unimplemneted/tuple_types.ulg:
--------------------------------------------------------------------------------
1 | # !> tuple types are not yet supported
2 | var foo: (String, Number, Bool) = 0
3 | 


--------------------------------------------------------------------------------
/spec/until.ulg:
--------------------------------------------------------------------------------
1 | var bar = 0
2 | until bar > 5
3 | 	  print bar = bar + 2 # => 2
4 | 	                      # => 4
5 | 						  # => 6
6 | end


--------------------------------------------------------------------------------
/spec/variables.ulg:
--------------------------------------------------------------------------------
 1 | let hello = 100
 2 | let world = 198
 3 | print hello + world # => 298
 4 | let there = 9000
 5 | let world = 20000
 6 | print hello - there + world # => 11100
 7 | print let yup = 100 - -hello # => 200
 8 | 
 9 | var foo = -1001
10 | print foo # => -1001
11 | foo = foo + 1000
12 | print foo # => -1


--------------------------------------------------------------------------------
/spec/while.ulg:
--------------------------------------------------------------------------------
 1 | var foo = 0
 2 | while foo < 10
 3 | 	  print foo = foo + 1 # => 1
 4 | 	                      # => 2
 5 | 	                      # => 3
 6 | 	                      # => 4
 7 | 	                      # => 5
 8 | 	                      # => 6
 9 | 	                      # => 7
10 | 	                      # => 8
11 | 	                      # => 9
12 | 	                      # => 10
13 | end


--------------------------------------------------------------------------------
/specs.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python3
  2 | 
  3 | import os
  4 | import sys
  5 | import fnmatch
  6 | import subprocess
  7 | import re
  8 | import collections
  9 | import itertools
 10 | import threading
 11 | 
 12 | Expectations = collections.namedtuple('Expectations', ['expects', 'failure_expects', 'skip_run'])
 13 | 
 14 | LINKER_KIND = "Object"
 15 | EXPECT_PATTERN = re.compile(r'#\s?=>\s?(.+)')
 16 | EXPECT_ERR_PATTERN = re.compile(r'#\s?!>\s?(.+)')
 17 | SKIP_PATTERN = re.compile(r'#\s?!!skip')
 18 | 
 19 | class Error(Exception):
 20 |     def __init__(self, error):
 21 |         self.error = error
 22 | 
 23 | class ExitCodeMismatchError(Error):
 24 |     def __init__(self, message, code, output):
 25 |         message = "{} exit={}, out='{}', err='\n{}'".format(
 26 |             message, code, output[0].decode('utf-8'), output[1].decode('utf-8'))
 27 |         super(ExitCodeMismatchError, self).__init__(message)
 28 | 
 29 | class OutputMissingError(Error):
 30 |     def __init__(self, output, expected):
 31 |         output = "\n       >> ".join(output.strip().split('\n'))
 32 |         expected = '", "'.join(expected)
 33 |         msg = 'Expected "{}" in output. \n found >> {}'.format(expected, output)
 34 |         super(OutputMissingError, self).__init__(msg)
 35 | 
 36 | class OutputMismatchError(Error):
 37 |     def __init__(self, expected, actual):
 38 |         msg = 'Expected "{}", found "{}"'.format(expected, actual)
 39 |         super(OutputMismatchError, self).__init__(msg)
 40 | 
 41 | def add_matches(pattern, line, expects):
 42 |     match = pattern.search(line)
 43 |     if match:
 44 |         expects.append(match.group(1))
 45 | 
 46 | def parse_spec(path):
 47 |     expects = []
 48 |     failure_expects = []
 49 |     skip_run = False
 50 |     with open(path, encoding='utf-8') as f:
 51 |         for line in f.readlines():
 52 |             add_matches(EXPECT_PATTERN, line, expects)
 53 |             add_matches(EXPECT_ERR_PATTERN, line, failure_expects)
 54 |             if SKIP_PATTERN.search(line):
 55 |                 skip_run = True
 56 |     return Expectations(expects, failure_expects, skip_run)
 57 | 
 58 | def check_output(lines, expects):
 59 |     """Check that Output Matches Expectations
 60 | 
 61 |     Given a string representing the output of a command and
 62 |     a list of expected lines check that the output of the
 63 |     command matches.
 64 |     """
 65 | 
 66 |     lines = lines.strip().split('\n')
 67 |     for actual, expected in itertools.zip_longest(lines, expects):
 68 |         if not actual or not expected:
 69 |             raise OutputMismatchError(expected, actual)
 70 |         if actual != expected:
 71 |             raise OutputMismatchError(expected, actual)
 72 | 
 73 | def check_expected_exit(exit_code, output, expectations):
 74 |     """Check the Exit of a Program
 75 | 
 76 |     Given the output and exit status of a program make sure we got a
 77 |     successful exit, or a non-zero exit code with the expected
 78 |     failures in the program's standard error output.
 79 |     """
 80 | 
 81 |     # if we got killed by a signal we don't want to check for errors
 82 |     if exit_code < 0:
 83 |         raise ExitCodeMismatchError(
 84 |             "Compilation was killed by signal '{}'".format(-exit_code),
 85 |             exit_code, output)
 86 | 
 87 |     # If we were expecting a compilation failure make sure we got it
 88 |     if expectations.failure_expects:
 89 |         if exit_code == 0:
 90 |             raise ExitCodeMismatchError(
 91 |                 "Expected failure but compilation succeeded",
 92 |                 exit_code, output)
 93 |         check_compilation_failure(
 94 |             output[1].decode('utf-8'), expectations.failure_expects)
 95 |     # No compilation failure but we got one.
 96 |     elif exit_code != 0:
 97 |         raise ExitCodeMismatchError(
 98 |             "Expected successfull exit", exit_code, output)
 99 | 
100 | def check_compilation_failure(output, failure_expects):
101 |     """Check Failure Output
102 | 
103 |     Given the output of a failed compilation command check that any
104 |     failure expectations are met.
105 |     """
106 | 
107 |     fails = list(failure_expects)
108 |     for line in output.strip().split('\n'):
109 |         # Check we haven't found the source printed out again...
110 |         if EXPECT_ERR_PATTERN.search(line):
111 |             continue
112 |         if fails and fails[0] in line:
113 |             fails.pop(0)
114 |     if fails:
115 |         raise OutputMissingError(output, fails)
116 | 
117 | def run_spec(path):
118 |     """Compile and Run the Given Spec
119 | 
120 |     Compiles the spec, examining any compilation errors. If none are
121 |     found then the resulting executable is run and expectations
122 |     from the input file are matched against the output.
123 |     """
124 | 
125 |     expectations = parse_spec(path)
126 |     out = "specbin/{}".format(os.path.basename(path).split('.')[0])
127 |     compile_cmd = subprocess.Popen(["target/release/ullage", path, "--link-kind", LINKER_KIND, "-o", out], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
128 | 
129 |     # Give the compiler 5 seconds to run, and return an error on timeout
130 |     timer = threading.Timer(5, compile_cmd.kill)
131 |     try:
132 |         timer.start()
133 |         output = compile_cmd.communicate()
134 |         exit_code = compile_cmd.returncode
135 |         check_expected_exit(exit_code, output, expectations)
136 |     finally:
137 |         timer.cancel()
138 | 
139 |     if expectations.skip_run or compile_cmd.returncode != 0:
140 |         return
141 |     run_cmd = subprocess.Popen(out, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
142 |     output = run_cmd.communicate()
143 |     if run_cmd.returncode != 0:
144 |         raise ExitCodeMismatchError("Expected successfull exit code", run_cmd.returncode, output)
145 |     check_output(output[0].decode('utf-8'), expectations.expects)
146 | 
147 | 
148 | def glob_for(path, extension):
149 |     """Glob For Files
150 | 
151 |     Recursively walks a directory tree and finds files matching a
152 |     given extension. Used to find the files to test.
153 |     """
154 | 
155 |     ext_glob = '*.{}'.format(extension)
156 |     for root, dirnames, filenames in os.walk(path):
157 |         for filename in fnmatch.filter(filenames, ext_glob):
158 |             yield os.path.join(root, filename)
159 | 
160 | def main(argv):
161 |     try:
162 |         os.mkdir("specbin/")
163 |     except OSError:
164 |         pass
165 | 
166 |     failures = 0
167 |     for spec in glob_for('spec/', 'ulg'):
168 |         try:
169 |             run_spec(spec)
170 |             sys.stdout.write('.')
171 |             sys.stdout.flush()
172 |         except Error as e:
173 |             err = '\n{}: {}: {}'.format(spec, type(e).__name__, e.error)
174 |             print(err, file=sys.stderr)
175 |             failures += 1
176 | 
177 |     # newline follwing all those .s
178 |     print('\n')
179 |     print('-' * 40)
180 |     if failures:
181 |         print('{} tests failed'.format(failures))
182 |     else:
183 |         print('All tests passed')
184 |     return failures
185 | 
186 | if __name__ == '__main__':
187 |     sys.exit(main(sys.argv[1:]))
188 | 


--------------------------------------------------------------------------------
/src/compile.rs:
--------------------------------------------------------------------------------
  1 | //! This module contians the code required to compile a parsed tree
  2 | //! down to LLVM bytecode.
  3 | 
  4 | use crate::diag::Diagnostic;
  5 | use crate::low_loader::prelude::*;
  6 | use crate::sem;
  7 | use crate::syntax;
  8 | use linker::Linker;
  9 | use std::path::Path;
 10 | use std::process::Command;
 11 | use tempfile::Builder;
 12 | 
 13 | pub use self::error::{CompError, CompResult};
 14 | pub use self::options::{CompilationOptions, OptimisationLevel};
 15 | 
 16 | pub mod error;
 17 | pub mod linker;
 18 | pub mod options;
 19 | 
 20 | mod lower;
 21 | mod lower_context;
 22 | mod string_builtins;
 23 | 
 24 | /// Add the Core Declarations to the Module
 25 | ///
 26 | /// This method is responsible for making sure that
 27 | /// declarations/definitions of any builtin funtions are emitted.
 28 | fn add_core_decls(ctx: &mut Context, module: &mut Module) -> CompResult<()> {
 29 |     add_printf_decl(ctx, module);
 30 |     module.add_global(ctx.const_str("%lld\n"), "printf_num_format");
 31 |     module.add_global(ctx.const_str("%s\n"), "printf_cstr_format");
 32 |     module.add_global(ctx.const_str("%.*s\n"), "printf_ustr_format");
 33 |     module.add_global(ctx.const_str("true"), "print_true");
 34 |     module.add_global(ctx.const_str("false"), "print_false");
 35 |     Ok(())
 36 | }
 37 | 
 38 | /// Add a Printf Declaration to the Module
 39 | ///
 40 | /// Creates a new function in the given module which maps to the
 41 | /// `printf` function. This will be used by the `print` operator
 42 | /// to write output.
 43 | fn add_printf_decl(ctx: &mut Context, module: &mut Module) {
 44 |     let mut params = [ctx.cstr_type()];
 45 |     let int_type = ctx.int_type(32);
 46 |     let mut printf = ctx.add_varargs_function(module, "printf", int_type, &mut params);
 47 |     printf.set_calling_convention(CallConvention::CDecl);
 48 | }
 49 | 
 50 | /// Compilation State
 51 | ///
 52 | /// Encompases the inputs and settings for a given compilation.
 53 | pub struct Compilation {
 54 |     /// The `Expression`s which are being compiled.
 55 |     expr: sem::Expression,
 56 |     /// The options for this compilation
 57 |     options: CompilationOptions,
 58 |     /// diagnostics from this compilation
 59 |     diagnostics: Vec<Diagnostic>,
 60 | }
 61 | 
 62 | impl Compilation {
 63 |     /// Create a new compilation
 64 |     ///
 65 |     /// # Parameters
 66 |     ///  * `source` - The source text for the program
 67 |     ///  * `expr` - the expression to compile
 68 |     ///  * `opts` - The compilation options
 69 |     #[allow(clippy::new_ret_no_self)]
 70 |     pub fn new(tree: syntax::SyntaxTree, opts: CompilationOptions) -> CompResult<Self> {
 71 |         let mut binder = sem::Binder::new(sem::Scope::new());
 72 |         let sem_expr = binder.bind_tree(tree);
 73 |         Ok(Compilation {
 74 |             expr: sem_expr,
 75 |             options: opts,
 76 |             diagnostics: binder.take_diagnostics(),
 77 |         })
 78 |     }
 79 | 
 80 |     /// Emit
 81 |     ///
 82 |     /// Performs the compilation, emitting the results to the given file.
 83 |     pub fn emit(self, target: &Target, output_path: &Path) -> CompResult<()> {
 84 |         if !self.diagnostics.is_empty() {
 85 |             return Err(CompError::Generic(
 86 |                 "can't emit a compilation contianing diagnostics".into(),
 87 |             ));
 88 |         }
 89 | 
 90 |         let mut ctx = Context::new();
 91 |         let name = output_path
 92 |             .file_stem()
 93 |             .and_then(|s| s.to_str())
 94 |             .unwrap_or("fallback_module_name");
 95 |         let mut module = ctx.add_module(name);
 96 |         module.set_target(target);
 97 | 
 98 |         add_core_decls(&mut ctx, &mut module)?;
 99 | 
100 |         let fun = {
101 |             let mut lower_ctx = lower_context::LowerContext::new(&mut ctx, &mut module);
102 |             lower_ctx.add_intrinsics();
103 |             lower_ctx.add_core_types();
104 |             lower::lower_as_main(&mut lower_ctx, self.expr)?
105 |         };
106 | 
107 |         fun.verify_or_panic();
108 |         module.verify_or_panic();
109 | 
110 |         let linker = self.options.linker.unwrap_or_else(Linker::default);
111 | 
112 |         // Create a tempdir to write the LLVM IR or bitcode to
113 |         let temp_file = Builder::new()
114 |             .prefix("ullage")
115 |             .suffix(linker.asset_ty.extension())
116 |             .tempfile()?;
117 | 
118 |         // check if we have optimiation enabled and run the
119 |         // corresponding optimisations if we do.
120 |         if let Some((level, size)) = self.options.opt_level.unpack() {
121 |             module.run_optimiser(level, size);
122 |         }
123 | 
124 |         // Check what we have, and dump it to the screen
125 |         if self.options.dump_ir {
126 |             module.dump();
127 |         }
128 |         module.write_to_file(&target, temp_file.path(), linker.asset_ty.file_kind())?;
129 | 
130 |         // Shell out to Clang to link the final assembly
131 |         let output = Command::new(linker.cmd.executable())
132 |             .arg(temp_file.path())
133 |             .arg(format!("--target={}", target.triple()))
134 |             .arg("-o")
135 |             .arg(output_path)
136 |             .output()?;
137 |         let status = output.status;
138 | 
139 |         if status.success() {
140 |             Ok(())
141 |         } else {
142 |             Err(CompError::link_fail(status.code(), output.stderr))
143 |         }
144 |     }
145 | 
146 |     /// Does the compilation have any diagnostics to emit?
147 |     pub fn has_diagnostics(&self) -> bool {
148 |         !self.diagnostics.is_empty()
149 |     }
150 | 
151 |     /// Borrow a slice of the diagnostics in this compilation
152 |     pub fn diagnostics(&self) -> &[Diagnostic] {
153 |         &self.diagnostics
154 |     }
155 | }
156 | 


--------------------------------------------------------------------------------
/src/compile/error.rs:
--------------------------------------------------------------------------------
 1 | //! Compilation error module. Contains the Result and Error types for
 2 | //! the compile module.
 3 | 
 4 | use std::{fmt::Display, io};
 5 | 
 6 | /// Represents the different types of errors which can be encountered
 7 | /// when compiling.
 8 | #[derive(Debug)]
 9 | pub enum CompError {
10 |     /// Generic Error String
11 |     Generic(String),
12 | 
13 |     /// Linker Failure
14 |     Linker(LinkerError),
15 | 
16 |     /// Wrapped IO Error
17 |     IO(io::Error),
18 | }
19 | 
20 | impl std::error::Error for CompError {
21 |     fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
22 |         match self {
23 |             CompError::Linker(e) => Some(e),
24 |             CompError::IO(e) => Some(e),
25 |             _ => None,
26 |         }
27 |     }
28 | }
29 | 
30 | impl Display for CompError {
31 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32 |         match self {
33 |             CompError::Generic(msg) => write!(f, "compilation error: {}", msg),
34 |             CompError::Linker(cause) => write!(f, "linker failed.: {}", cause),
35 |             CompError::IO(cause) => write!(f, "IO error: {}", cause),
36 |         }
37 |     }
38 | }
39 | 
40 | /// Compilation result. Returned from each compilation stage.
41 | pub type CompResult<T> = Result<T, CompError>;
42 | 
43 | /// Link Failure Type
44 | ///
45 | /// Used to group together the different failure modes for the linker.
46 | #[derive(Debug)]
47 | pub enum LinkerError {
48 |     /// The linker failed with a known exit status
49 |     WithExitStatus(i32, String),
50 | 
51 |     /// The linker failed with an unknown exit status
52 |     UnknownFailure(String),
53 | }
54 | 
55 | impl std::error::Error for LinkerError {}
56 | 
57 | impl Display for LinkerError {
58 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
59 |         match self {
60 |             LinkerError::WithExitStatus(status, msg) => {
61 |                 write!(f, "linker returned exit status {}: {}", status, msg)
62 |             }
63 |             LinkerError::UnknownFailure(msg) => write!(f, "unknown linker error: {}", msg),
64 |         }
65 |     }
66 | }
67 | 
68 | impl From<String> for CompError {
69 |     /// Convert untyped errors to generic compilation errors.
70 |     fn from(s: String) -> Self {
71 |         CompError::Generic(s)
72 |     }
73 | }
74 | 
75 | impl From<io::Error> for CompError {
76 |     fn from(e: io::Error) -> Self {
77 |         CompError::IO(e)
78 |     }
79 | }
80 | 
81 | impl CompError {
82 |     /// Compilation Linker Error
83 |     ///
84 |     /// When the linker has failed and caused compilation to fail.
85 |     pub fn link_fail(exit_status: Option<i32>, stderr: Vec<u8>) -> Self {
86 |         let stderr = String::from_utf8(stderr).unwrap();
87 |         CompError::Linker(match exit_status {
88 |             Some(status) => LinkerError::WithExitStatus(status, stderr),
89 |             None => LinkerError::UnknownFailure(stderr),
90 |         })
91 |     }
92 | }
93 | 


--------------------------------------------------------------------------------
/src/compile/linker.rs:
--------------------------------------------------------------------------------
  1 | //! Linker Abstractions
  2 | //!
  3 | //! This module contains types to deal with linking object files. The
  4 | //! main struct `Linker` specifies the information needed to perform
  5 | //! a link.
  6 | 
  7 | use std::default::Default;
  8 | 
  9 | use crate::low_loader::prelude::OutputFileKind;
 10 | 
 11 | /// The information for performing a link
 12 | pub struct Linker {
 13 |     /// The linker command. Currently only `clang` is supported.
 14 |     pub cmd: LinkerCommand,
 15 |     /// The intermediate asset type the linker expects
 16 |     pub asset_ty: LinkerAssetType,
 17 | }
 18 | 
 19 | /// The executable type to use for linking
 20 | #[derive(Debug, Copy, Clone)]
 21 | pub enum LinkerCommand {
 22 |     /// The Clang c compiler
 23 |     Clang,
 24 | }
 25 | 
 26 | /// The intermediate asset type to pass to the linker
 27 | #[derive(Debug, Copy, Clone)]
 28 | pub enum LinkerAssetType {
 29 |     /// LLVM IR text files
 30 |     LlvmIr,
 31 |     /// LLVM IR bticode files
 32 |     LlvmBc,
 33 |     /// Native object
 34 |     Object,
 35 | }
 36 | 
 37 | impl Linker {
 38 |     /// Create a new linker from the command and asset type
 39 |     pub fn new(cmd: LinkerCommand, asset_ty: LinkerAssetType) -> Self {
 40 |         Linker { cmd, asset_ty }
 41 |     }
 42 | 
 43 |     /// Create a linker from the given command
 44 |     pub fn from_command(cmd: LinkerCommand) -> Self {
 45 |         Linker {
 46 |             cmd,
 47 |             asset_ty: cmd.default_asset_ty(),
 48 |         }
 49 |     }
 50 | }
 51 | 
 52 | impl Default for Linker {
 53 |     fn default() -> Self {
 54 |         Linker::from_command(LinkerCommand::default())
 55 |     }
 56 | }
 57 | 
 58 | impl LinkerCommand {
 59 |     /// Get the Default Asset Type for this Linker
 60 |     pub fn default_asset_ty(&self) -> LinkerAssetType {
 61 |         LinkerAssetType::LlvmBc
 62 |     }
 63 | 
 64 |     /// Get the executable this command should call.
 65 |     ///
 66 |     pub fn executable(&self) -> &str {
 67 |         // FIXME: instead of exposing a &str. We should
 68 |         //        make the linker buidl the command
 69 |         //        rather than the compiler.
 70 |         match *self {
 71 |             LinkerCommand::Clang => "clang",
 72 |         }
 73 |     }
 74 | }
 75 | 
 76 | impl Default for LinkerCommand {
 77 |     fn default() -> Self {
 78 |         LinkerCommand::Clang
 79 |     }
 80 | }
 81 | 
 82 | impl LinkerAssetType {
 83 |     /// Get the file extension for the asset type
 84 |     pub fn extension(&self) -> &str {
 85 |         match *self {
 86 |             LinkerAssetType::LlvmIr => ".ll",
 87 |             LinkerAssetType::LlvmBc => ".bc",
 88 |             LinkerAssetType::Object => ".o",
 89 |         }
 90 |     }
 91 | 
 92 |     /// Get the file kind for this asset type
 93 |     pub(crate) fn file_kind(&self) -> OutputFileKind {
 94 |         match *self {
 95 |             LinkerAssetType::LlvmIr => OutputFileKind::LLVMIl,
 96 |             LinkerAssetType::LlvmBc => OutputFileKind::Bitcode,
 97 |             LinkerAssetType::Object => OutputFileKind::NativeObject,
 98 |         }
 99 |     }
100 | }
101 | 


--------------------------------------------------------------------------------
/src/compile/lower_context.rs:
--------------------------------------------------------------------------------
 1 | //! Lower Context
 2 | //!
 3 | //! The lower context defines the state which is passed around as an
 4 | //! expresion is lowered to LLVM.
 5 | 
 6 | use crate::low_loader::prelude::*;
 7 | use crate::sem::{BuiltinType, Typ};
 8 | use std::collections::HashMap;
 9 | 
10 | /// Lower Context
11 | ///
12 | /// Pairs an LLVM Context with a single LLVM Module. Used as the
13 | /// target when lowering a tree to LLVM.
14 | pub struct LowerContext<'a> {
15 |     /// The LLVM Context this lower context is using.
16 |     pub llvm_ctx: &'a mut Context,
17 |     /// The LLVM Module this context is building IR into.
18 |     pub module: &'a mut Module,
19 | 
20 |     /// Map of Ty values to LLVM Types
21 |     ty_map: HashMap<Typ, LLVMTypeRef>,
22 | }
23 | 
24 | impl<'a> LowerContext<'a> {
25 |     /// Create a New Lowering Context
26 |     ///
27 |     /// Wraps the given module and LLVM context to create the required
28 |     /// context for lowering ASTs.
29 |     pub fn new(ctx: &'a mut Context, module: &'a mut Module) -> Self {
30 |         LowerContext {
31 |             llvm_ctx: ctx,
32 |             module,
33 |             ty_map: Default::default(),
34 |         }
35 |     }
36 | 
37 |     /// Add LLVM Intrinsic Declarations
38 |     ///
39 |     /// Updates the module to add declarations for the LLVM intrinsics
40 |     /// we care about. Need to find a better way to create these.
41 |     pub fn add_intrinsics(&mut self) {
42 |         let i8ptr = self.llvm_ctx.pointer_type(self.llvm_ctx.int_type(8));
43 |         let i32ty = self.llvm_ctx.int_type(32);
44 | 
45 |         self.llvm_ctx.add_function(
46 |             self.module,
47 |             "llvm.memcpy.p0i8.p0i8.i32",
48 |             self.llvm_ctx.void_type(),
49 |             &mut [i8ptr, i8ptr, i32ty, self.llvm_ctx.bool_type()],
50 |         );
51 |     }
52 | 
53 |     /// Add Core LLVM Types
54 |     ///
55 |     /// Adds entries to the type map for the bulitin types mappign
56 |     /// them to their underlying LLVM representation.
57 |     pub fn add_core_types(&mut self) {
58 |         let lang_string = self.llvm_ctx.pointer_type(self.llvm_ctx.struct_type(vec![
59 |             self.llvm_ctx.int_type(32),
60 |             self.llvm_ctx.array_type(self.llvm_ctx.int_type(8), 0),
61 |         ]));
62 |         self.add_type(Typ::Builtin(BuiltinType::String), lang_string);
63 |         let llvm_bool = self.llvm_ctx.bool_type();
64 |         self.add_type(Typ::Builtin(BuiltinType::Bool), llvm_bool);
65 |         let llvm_number = self.llvm_ctx.int_type(64);
66 |         self.add_type(Typ::Builtin(BuiltinType::Number), llvm_number);
67 |     }
68 | 
69 |     /// Add a Type to the Context
70 |     ///
71 |     /// Makes the given type available for lowering in the current
72 |     /// context by providing a mapping through to an LLVM type.
73 |     pub fn add_type(&mut self, ty: Typ, llvm_ty: LLVMTypeRef) {
74 |         self.ty_map.insert(ty, llvm_ty);
75 |     }
76 | 
77 |     /// Look up a Given Type
78 |     pub fn llvm_type(&self, ty: Typ) -> Option<LLVMTypeRef> {
79 |         self.ty_map.get(&ty).cloned()
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/src/compile/options.rs:
--------------------------------------------------------------------------------
 1 | //! Compilation Options
 2 | //!
 3 | //! This module defines the options structure used to tweak
 4 | //! compilation output.
 5 | 
 6 | use super::linker::Linker;
 7 | use crate::low_loader::pass_manager as pm;
 8 | 
 9 | /// Compilation Options
10 | ///
11 | /// This is used to control how each `Compilation` instance behaves.
12 | #[derive(Default)]
13 | pub struct CompilationOptions {
14 |     /// Dump the LLVM IR when the module is compiled
15 |     pub dump_ir: bool,
16 |     /// Optimisation level to use when emitting code
17 |     pub opt_level: OptimisationLevel,
18 |     /// Linker option
19 |     pub linker: Option<Linker>,
20 | }
21 | 
22 | /// Optimisation levels
23 | ///
24 | /// Defines the different levels of optimisation that the compiler
25 | /// supports. These levels are usually controlled from the command
26 | /// line.
27 | pub enum OptimisationLevel {
28 |     /// No optimisation
29 |     Off,
30 |     /// Low optimisation, same as -O1
31 |     Low,
32 |     /// Medium optimisation. Same as -O2
33 |     Med,
34 |     /// High optimisation. Same as -O3
35 |     High,
36 |     /// Optimise for size not speed
37 |     Size,
38 | }
39 | 
40 | impl CompilationOptions {
41 |     /// Se the `dump_ir` flag
42 |     ///
43 |     /// Enables or disables dumping the LLVM IR when modules are
44 |     /// compiled.
45 |     pub fn with_dump_ir(self, dump_ir: bool) -> Self {
46 |         CompilationOptions { dump_ir, ..self }
47 |     }
48 | 
49 |     /// Set the Optimisation Level
50 |     ///
51 |     /// Controls the optimisation level for the given options.
52 |     pub fn with_opt_level(self, opt_level: OptimisationLevel) -> Self {
53 |         CompilationOptions { opt_level, ..self }
54 |     }
55 | 
56 |     /// Set the linker command to use
57 |     pub fn with_linker(self, linker: Linker) -> Self {
58 |         CompilationOptions {
59 |             linker: Some(linker),
60 |             ..self
61 |         }
62 |     }
63 | }
64 | 
65 | impl Default for OptimisationLevel {
66 |     fn default() -> Self {
67 |         OptimisationLevel::Off
68 |     }
69 | }
70 | 
71 | impl OptimisationLevel {
72 |     /// Unpack an Optimistaion Level
73 |     ///
74 |     /// Retrieves a (level, size) tuple which defines how to configure
75 |     /// the LLVM optimiser for this optimisation level.
76 |     pub fn unpack(&self) -> Option<(pm::OptLevel, pm::OptSize)> {
77 |         use crate::OptimisationLevel::*;
78 |         match self {
79 |             Off => None,
80 |             Low => Some((pm::OptLevel::Low, pm::OptSize::Off)),
81 |             Med => Some((pm::OptLevel::Medium, pm::OptSize::Off)),
82 |             High => Some((pm::OptLevel::High, pm::OptSize::Off)),
83 |             Size => Some((pm::OptLevel::Medium, pm::OptSize::Size)),
84 |         }
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/src/compile/string_builtins.rs:
--------------------------------------------------------------------------------
 1 | //! String Builtins
 2 | //!
 3 | //! This module contains logic for interacting with string values.
 4 | 
 5 | use super::lower_context::LowerContext;
 6 | use crate::low_loader::prelude::*;
 7 | 
 8 | /// String Copy Guts
 9 | ///
10 | /// Copies the body of a source string into a destiation string's
11 | /// buffer. This is used by the lowering of stirng concatentation.
12 | pub(crate) fn string_copy_guts(
13 |     ctx: &mut LowerContext<'_>,
14 |     builder: &mut Builder,
15 |     dest: LLVMValueRef,
16 |     src: LLVMValueRef,
17 |     len: LLVMValueRef,
18 |     offset: Option<LLVMValueRef>,
19 | ) {
20 |     let memcpy = ctx
21 |         .module
22 |         .find_function("llvm.memcpy.p0i8.p0i8.i32")
23 |         .expect("can't find memcpy intrinsic");
24 | 
25 |     let src_buffer = string_get_buffer(builder, src);
26 |     let src_buffer = builder.build_gep(
27 |         src_buffer,
28 |         &mut [ctx.llvm_ctx.const_int(0), ctx.llvm_ctx.const_int(0)],
29 |     );
30 |     let dest_buffer = string_get_buffer(builder, dest);
31 |     let offset = offset.unwrap_or_else(|| ctx.llvm_ctx.const_int(0));
32 |     let dest_buffer = builder.build_gep(dest_buffer, &mut [ctx.llvm_ctx.const_int(0), offset]);
33 | 
34 |     builder.build_void_call(
35 |         &memcpy,
36 |         &mut [dest_buffer, src_buffer, len, ctx.llvm_ctx.const_bool(false)],
37 |     );
38 | }
39 | 
40 | /// Get String's Buffer Pointer
41 | ///
42 | /// Returns a poitner to the buffer which contains the `String`'s
43 | /// body. This is a `[0 x i8]*`. It can be converted into a poitner to
44 | /// a given character offset with a GEP instruction]
45 | pub(crate) fn string_get_buffer(builder: &mut Builder, s: LLVMValueRef) -> LLVMValueRef {
46 |     builder.build_struct_gep(s, 1)
47 | }
48 | 
49 | /// Get String Length
50 | ///
51 | /// Reads the length field out of the `String`'s internal
52 | /// representation. This is a constant-time operation and returns the
53 | /// length in bytes.
54 | pub(crate) fn string_get_len(builder: &mut Builder, val: LLVMValueRef) -> LLVMValueRef {
55 |     let len_field = builder.build_struct_gep(val, 0);
56 |     builder.build_load(len_field)
57 | }
58 | 
59 | /// Set Stirng Length
60 | ///
61 | /// Set the internal string length field to a new value.
62 | pub(crate) fn string_set_len(builder: &mut Builder, val: LLVMValueRef, size: LLVMValueRef) {
63 |     let len_field = builder.build_struct_gep(val, 0);
64 |     builder.build_store(size, len_field);
65 | }
66 | 


--------------------------------------------------------------------------------
/src/diag.rs:
--------------------------------------------------------------------------------
 1 | //! Compilation Diagnostics
 2 | //!
 3 | //! The data structures used to keep track of compilation diagnostic
 4 | //! information. This includes the main `Diagnostic` type which
 5 | //! represnts a syntax or sematic error during compilation.
 6 | //!
 7 | //! The types in this module will usually be consumed by pushing
 8 | //! diagnostics into a `DiagnosticBag`.
 9 | 
10 | use crate::syntax::text::Span;
11 | 
12 | /// The main `Daignostic` type. Each diagnostic consists of a primary
13 | /// message, location, and level.
14 | pub struct Diagnostic {
15 |     /// The diagnostic message generated by the compiler
16 |     pub message: String,
17 | 
18 |     /// The location that this diagnostic occured at
19 |     pub span: Span,
20 | }
21 | 
22 | impl Diagnostic {
23 |     /// Constructs a new diagnostic from the given message and
24 |     /// location.
25 |     ///
26 |     /// # Parameters
27 |     ///  * `message` - the diagnostic message to emit
28 |     ///  * `span` - The locaiton the diagnostic is tied to. If there
29 |     ///             is no logical location for this diagnostic then
30 |     ///             `DUMMY_SPAN` should be used.
31 |     pub fn new<S>(message: S, span: Span) -> Self
32 |     where
33 |         S: Into<String>,
34 |     {
35 |         Diagnostic {
36 |             message: message.into(),
37 |             span,
38 |         }
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/src/low_loader.rs:
--------------------------------------------------------------------------------
 1 | //! Low Loader
 2 | //!
 3 | //! Low-level bindings to LLVM. This module provides a set of safe
 4 | //! wrappers around LLVM objects and types to allow them to be used
 5 | //! more ergonomically from Rust code.
 6 | //!
 7 | //! # Key Types
 8 | //!
 9 | //! Any use of low_loader requires creating a [`Context`] first. This
10 | //! type controls access to [`Module`]s and [`Type`]s. Each Module
11 | //! represents a single 'compilation unit' which maps down to a single
12 | //! output object when compiled.
13 | //!
14 | //! After creating a module LLVM IR can be added using a [`Builder`]
15 | //! object. The builder has a set of factory methods for appending IR
16 | //! to a given basic block.
17 | //!
18 | //! [`Context`]: ./context/struct.Context.html
19 | //! [`Module`]: ./module/struct.Module.html
20 | //! [`Type`]: ./types/enum.Type.html
21 | //! [`Builder`]: ./builder/struct.Builder.html
22 | 
23 | #![deny(missing_docs)]
24 | 
25 | #[cfg(feature = "llvm-10")]
26 | use llvm_10 as llvm_sys;
27 | #[cfg(feature = "llvm-13")]
28 | use llvm_13 as llvm_sys;
29 | #[cfg(feature = "llvm-9")]
30 | use llvm_9 as llvm_sys;
31 | 
32 | pub mod builder;
33 | pub mod context;
34 | pub mod function;
35 | pub mod module;
36 | pub mod pass_manager;
37 | pub mod targets;
38 | pub mod types;
39 | pub mod value;
40 | 
41 | /// Prelude Module
42 | ///
43 | /// This module just re-exports useful types to help cut down on using
44 | /// statements.
45 | pub mod prelude {
46 |     pub use super::builder::Builder;
47 |     pub use super::builder::Predicate;
48 |     pub use super::context::Context;
49 |     pub use super::function::{CallConvention, Function};
50 |     pub use super::module::{Module, OutputFileKind};
51 |     pub use super::targets::{Target, TargetLookupError};
52 |     pub use super::types::Type;
53 |     pub use super::value::Value;
54 | 
55 |     // FIXME: only expose Value in public interface.
56 |     pub use super::llvm_sys::prelude::LLVMTypeRef;
57 |     pub use super::llvm_sys::prelude::LLVMValueRef;
58 | }
59 | 


--------------------------------------------------------------------------------
/src/low_loader/builder.rs:
--------------------------------------------------------------------------------
  1 | //! LLVM IR Builder Wrapper
  2 | //!
  3 | //! Contains a Rust wrapper for dealing with LLVM Intermediate
  4 | //! Representation Builders. These objects are responsible for
  5 | //! creating instructions and adding them to basic blocks. Essentially
  6 | //! they make up the ponity end of the whole thing. Useful!
  7 | 
  8 | use super::llvm_sys::core;
  9 | use super::llvm_sys::prelude::*;
 10 | use super::llvm_sys::*;
 11 | 
 12 | use std::ffi::{CStr, CString};
 13 | use std::os::raw::c_uint;
 14 | 
 15 | use super::function::Function;
 16 | 
 17 | /// IR Builder
 18 | ///
 19 | /// Creating yo instructions and manipulating yo basic blocks.
 20 | #[derive(Debug, PartialEq)]
 21 | pub struct Builder {
 22 |     raw: LLVMBuilderRef,
 23 | }
 24 | 
 25 | /// Comparison Predicate Type
 26 | ///
 27 | /// Choice of comparison operators. These will be mapped through to
 28 | /// `LLVMIntPreidcate` or `LLVMRealPredicate`s depending on the types
 29 | /// being used.
 30 | #[derive(Copy, Clone)]
 31 | pub enum Predicate {
 32 |     /// Equality predicate
 33 |     Eq,
 34 |     /// Inequality predicate
 35 |     Neq,
 36 |     /// Arithmetic less than comparison
 37 |     Lt,
 38 |     /// Arithmetic less than or equal comparison
 39 |     LtEq,
 40 |     /// Arithmetic greter than comparision
 41 |     Gt,
 42 |     /// Arithmetic greater than or equal comparison
 43 |     GtEq,
 44 | }
 45 | 
 46 | impl Builder {
 47 |     /// Create a Builder from a Raw Pointer
 48 |     ///
 49 |     /// Takes ownership of the given builder pointer. The builder can
 50 |     /// then be manipulated through the returned object and will be
 51 |     /// disposed of when this object leaves scope.
 52 |     pub fn from_raw(raw: LLVMBuilderRef) -> Self {
 53 |         Builder { raw }
 54 |     }
 55 | 
 56 |     /// Build at the End of a Block
 57 |     ///
 58 |     /// Takes the builder, points it at the end of the basic block.
 59 |     pub fn position_at_end(&mut self, block: LLVMBasicBlockRef) {
 60 |         unsafe {
 61 |             core::LLVMPositionBuilderAtEnd(self.raw, block);
 62 |         }
 63 |     }
 64 | 
 65 |     /// Add a Ret Instrution
 66 |     ///
 67 |     /// Returns control from the current function
 68 |     /// immediately. Consumes this build context as t the current
 69 |     /// basic block can't have any more instructions added after a
 70 |     /// terminator instruciton.
 71 |     pub fn build_ret(self, value: LLVMValueRef) {
 72 |         unsafe {
 73 |             core::LLVMBuildRet(self.raw, value);
 74 |         }
 75 |     }
 76 | 
 77 |     /// Build a Call to a `void` Function
 78 |     ///
 79 |     /// The built value produces nothing so no value is returned.
 80 |     pub fn build_void_call(&mut self, function: &Function, args: &mut [LLVMValueRef]) {
 81 |         self.build_named_call(function, args, None);
 82 |     }
 83 | 
 84 |     /// Build a Call Instruction
 85 |     ///
 86 |     /// Emits a call to the given function.
 87 |     pub fn build_call(&mut self, function: &Function, args: &mut [LLVMValueRef]) -> LLVMValueRef {
 88 |         self.build_named_call(function, args, Some("call"))
 89 |     }
 90 | 
 91 |     /// Build a Call with a Name
 92 |     ///
 93 |     /// Emits a call instruction. The name provided to the call
 94 |     /// controls the resulting temporary value. If no name is provided
 95 |     /// then the result isn't bound to any temporary.
 96 |     pub fn build_named_call(
 97 |         &mut self,
 98 |         function: &Function,
 99 |         args: &mut [LLVMValueRef],
100 |         name: Option<&str>,
101 |     ) -> LLVMValueRef {
102 |         let name = name.map(|n| CString::new(n).unwrap());
103 |         static EMPTY_NAME: [libc::c_char; 1] = [0];
104 |         unsafe {
105 |             let call = core::LLVMBuildCall(
106 |                 self.raw,
107 |                 function.as_raw(),
108 |                 args.as_mut_ptr(),
109 |                 args.len() as c_uint,
110 |                 name.as_ref().map_or(EMPTY_NAME.as_ptr(), |n| n.as_ptr()),
111 |             );
112 |             core::LLVMSetInstructionCallConv(call, function.call_conv().into());
113 |             call
114 |         }
115 |     }
116 | 
117 |     /// Build a GEP
118 |     ///
119 |     /// GEP, or GetElementPointer, retrieves a pointer to an element in an item.
120 |     pub fn build_gep(&mut self, value: LLVMValueRef, indices: &mut [LLVMValueRef]) -> LLVMValueRef {
121 |         unsafe {
122 |             let name = CStr::from_bytes_with_nul_unchecked(b"gep\0");
123 |             core::LLVMBuildGEP(
124 |                 self.raw,
125 |                 value,
126 |                 indices.as_mut_ptr(),
127 |                 indices.len() as c_uint,
128 |                 name.as_ptr(),
129 |             )
130 |         }
131 |     }
132 | 
133 |     /// Build a Structure GEP
134 |     ///
135 |     /// Loads an element from a given structure pointer.
136 |     pub fn build_struct_gep(&mut self, struct_val: LLVMValueRef, index: u32) -> LLVMValueRef {
137 |         unsafe {
138 |             let name = CStr::from_bytes_with_nul_unchecked(b"sgep\0");
139 |             core::LLVMBuildStructGEP(self.raw, struct_val, index as c_uint, name.as_ptr())
140 |         }
141 |     }
142 | 
143 |     /// Extract a Value from an Aggregate
144 |     ///
145 |     /// Reads a value from a structure at the given struct offset.
146 |     pub fn build_extract_value(&mut self, struct_val: LLVMValueRef, index: u32) -> LLVMValueRef {
147 |         unsafe {
148 |             let name = CStr::from_bytes_with_nul_unchecked(b"extracted\0");
149 |             core::LLVMBuildExtractValue(self.raw, struct_val, index as c_uint, name.as_ptr())
150 |         }
151 |     }
152 | 
153 |     /// Build an Integer Negation
154 |     pub fn build_neg(&mut self, value: LLVMValueRef) -> LLVMValueRef {
155 |         unsafe {
156 |             let name = CStr::from_bytes_with_nul_unchecked(b"negated\0");
157 |             core::LLVMBuildNeg(self.raw, value, name.as_ptr())
158 |         }
159 |     }
160 | 
161 |     /// Build an Integer Add
162 |     pub fn build_add(&mut self, lhs: LLVMValueRef, rhs: LLVMValueRef) -> LLVMValueRef {
163 |         unsafe {
164 |             let name = CStr::from_bytes_with_nul_unchecked(b"addtmp\0");
165 |             core::LLVMBuildAdd(self.raw, lhs, rhs, name.as_ptr())
166 |         }
167 |     }
168 | 
169 |     /// Build an Integer Subtraction
170 |     pub fn build_sub(&mut self, lhs: LLVMValueRef, rhs: LLVMValueRef) -> LLVMValueRef {
171 |         unsafe {
172 |             let name = CStr::from_bytes_with_nul_unchecked(b"subtmp\0");
173 |             core::LLVMBuildSub(self.raw, lhs, rhs, name.as_ptr())
174 |         }
175 |     }
176 | 
177 |     /// Build an Integer Multiplication
178 |     pub fn build_mul(&mut self, lhs: LLVMValueRef, rhs: LLVMValueRef) -> LLVMValueRef {
179 |         unsafe {
180 |             let name = CStr::from_bytes_with_nul_unchecked(b"multmp\0");
181 |             core::LLVMBuildMul(self.raw, lhs, rhs, name.as_ptr())
182 |         }
183 |     }
184 | 
185 |     /// Build a Signed Integer Division
186 |     pub fn build_sdiv(&mut self, lhs: LLVMValueRef, rhs: LLVMValueRef) -> LLVMValueRef {
187 |         unsafe {
188 |             let name = CStr::from_bytes_with_nul_unchecked(b"divtmp\0");
189 |             core::LLVMBuildSDiv(self.raw, lhs, rhs, name.as_ptr())
190 |         }
191 |     }
192 | 
193 |     /// Build an Integer Comparision
194 |     pub fn build_icmp(
195 |         &mut self,
196 |         op: Predicate,
197 |         lhs: LLVMValueRef,
198 |         rhs: LLVMValueRef,
199 |     ) -> LLVMValueRef {
200 |         let op = match op {
201 |             Predicate::Eq => LLVMIntPredicate::LLVMIntEQ,
202 |             Predicate::Neq => LLVMIntPredicate::LLVMIntNE,
203 |             Predicate::Lt => LLVMIntPredicate::LLVMIntSLT,
204 |             Predicate::Gt => LLVMIntPredicate::LLVMIntSGT,
205 |             Predicate::LtEq => LLVMIntPredicate::LLVMIntSLE,
206 |             Predicate::GtEq => LLVMIntPredicate::LLVMIntSGE,
207 |         };
208 |         unsafe {
209 |             let name = CStr::from_bytes_with_nul_unchecked(b"cmptemp\0");
210 |             core::LLVMBuildICmp(self.raw, op, lhs, rhs, name.as_ptr())
211 |         }
212 |     }
213 | 
214 |     /// Build an Allocate Instruction
215 |     ///
216 |     /// Creates a new value allocated for the remainder of the current
217 |     /// stack frame.
218 |     pub fn build_alloca(&mut self, typ: LLVMTypeRef, name: &str) -> LLVMValueRef {
219 |         let name = CString::new(name).unwrap();
220 |         unsafe { core::LLVMBuildAlloca(self.raw, typ, name.as_ptr()) }
221 |     }
222 | 
223 |     /// Build a `malloc` Instruction
224 |     ///
225 |     /// Creates a new value allocated on the heap. Retusna pointer to
226 |     /// the new value.
227 |     pub fn build_malloc(
228 |         &mut self,
229 |         typ: LLVMTypeRef,
230 |         count: Option<LLVMValueRef>,
231 |         name: &str,
232 |     ) -> LLVMValueRef {
233 |         let name = CString::new(name).unwrap();
234 |         match count {
235 |             Some(count) => unsafe {
236 |                 core::LLVMBuildArrayMalloc(self.raw, typ, count, name.as_ptr())
237 |             },
238 |             None => unsafe { core::LLVMBuildMalloc(self.raw, typ, name.as_ptr()) },
239 |         }
240 |     }
241 | 
242 |     /// Create a Conditional Branch
243 |     ///
244 |     /// If the condition is true then execution continues in the first
245 |     /// block, otherwise execution will move to the second block.
246 |     pub fn build_cond_br(
247 |         &mut self,
248 |         cond: LLVMValueRef,
249 |         iftrue: LLVMBasicBlockRef,
250 |         iffalse: LLVMBasicBlockRef,
251 |     ) {
252 |         unsafe {
253 |             core::LLVMBuildCondBr(self.raw, cond, iftrue, iffalse);
254 |         }
255 |     }
256 | 
257 |     /// Create an Unconditional Branch
258 |     pub fn build_br(&mut self, block: LLVMBasicBlockRef) {
259 |         unsafe {
260 |             core::LLVMBuildBr(self.raw, block);
261 |         }
262 |     }
263 | 
264 |     /// Load from Variable
265 |     pub fn build_load(&mut self, var: LLVMValueRef) -> LLVMValueRef {
266 |         unsafe {
267 |             let name = CStr::from_bytes_with_nul_unchecked(b"loaded\0");
268 |             core::LLVMBuildLoad(self.raw, var, name.as_ptr())
269 |         }
270 |     }
271 | 
272 |     /// Store to Variable
273 |     pub fn build_store(&mut self, val: LLVMValueRef, var: LLVMValueRef) -> LLVMValueRef {
274 |         unsafe { core::LLVMBuildStore(self.raw, val, var) }
275 |     }
276 | 
277 |     /// Built a Not
278 |     pub fn build_not(&mut self, val: LLVMValueRef) -> LLVMValueRef {
279 |         unsafe {
280 |             let name = CStr::from_bytes_with_nul_unchecked(b"not\0");
281 |             core::LLVMBuildNot(self.raw, val, name.as_ptr())
282 |         }
283 |     }
284 | 
285 |     /// Bitcast
286 |     ///
287 |     /// Re-interpret the input value to be of the given type. This
288 |     /// just transforms how the underlying bits are interpreted rather
289 |     /// than performing any smarter coercion.
290 |     pub fn build_bitcast(
291 |         &mut self,
292 |         val: LLVMValueRef,
293 |         typ: LLVMTypeRef,
294 |         name: &str,
295 |     ) -> LLVMValueRef {
296 |         unsafe {
297 |             let name = CString::new(name).unwrap();
298 |             core::LLVMBuildBitCast(self.raw, val, typ, name.as_ptr())
299 |         }
300 |     }
301 | }
302 | 
303 | impl Drop for Builder {
304 |     /// Disponse this Builder
305 |     fn drop(&mut self) {
306 |         unsafe {
307 |             core::LLVMDisposeBuilder(self.raw);
308 |         }
309 |     }
310 | }
311 | 


--------------------------------------------------------------------------------
/src/low_loader/context.rs:
--------------------------------------------------------------------------------
  1 | //! LLVM Context Wrapper
  2 | //!
  3 | //! Contains a Rust wrapper for dealing with LLVM Context objects.
  4 | 
  5 | use super::llvm_sys::prelude::*;
  6 | use super::llvm_sys::{core, target};
  7 | use super::prelude::*;
  8 | use std::ffi::CString;
  9 | use std::os::raw::c_uint;
 10 | 
 11 | /// Ensure Initialised
 12 | ///
 13 | /// Makes sure that the LLVM library has been initialised to support
 14 | /// the features we want to use. This function can safely be called
 15 | /// any number of times but will only initialise LLVM once.
 16 | ///
 17 | /// # Panics
 18 | ///
 19 | /// If any of the LLVM subsystems can't be successfully initialised
 20 | /// then this function will panic.
 21 | pub(crate) fn ensure_initialised() {
 22 |     use std::sync::Once;
 23 | 
 24 |     static INIT: Once = Once::new();
 25 | 
 26 |     INIT.call_once(|| {
 27 |         unsafe {
 28 |             // Initialise all targets. This is required so we can look
 29 |             // targets up from the target registry and use them if
 30 |             // cross compiling.
 31 |             target::LLVM_InitializeAllTargets();
 32 |             target::LLVM_InitializeAllTargetInfos();
 33 |             // target::LLVM_InitializeAllAsmPrinters();
 34 |             // target::LLVM_InitializeAllAsmParsers();
 35 |             target::LLVM_InitializeAllTargetMCs();
 36 |             if target::LLVM_InitializeNativeAsmPrinter() != 0 {
 37 |                 panic!("Could not initialise ASM Printer");
 38 |             }
 39 |         }
 40 |     });
 41 | }
 42 | 
 43 | /// Context
 44 | ///
 45 | /// A context groups together all the LLVM objects used when
 46 | /// compiling.
 47 | ///
 48 | /// The LLVM context holds the global state for compilation. This
 49 | /// includes types and modules. LLVM context objects aren't
 50 | /// guaranteed to be thread safe, and shouldn't be shared between
 51 | /// threasds. We'll enforce this by taking `&mut self` when
 52 | /// meddling with the context.
 53 | #[derive(Debug, PartialEq)]
 54 | pub struct Context(LLVMContextRef);
 55 | 
 56 | impl Context {
 57 |     /// Create Context
 58 |     ///
 59 |     /// You'll probably only need one of these per 'program' you want
 60 |     /// to evaluate. Modules, types and execution from one context
 61 |     /// can't be used with another context.
 62 |     ///
 63 |     /// # Returns
 64 |     ///
 65 |     /// Returns a new compilation context instance.
 66 |     ///
 67 |     /// # Panics
 68 |     ///
 69 |     /// If the underlying LLVM library can't be initialised then this
 70 |     /// function will panic.
 71 |     pub fn new() -> Self {
 72 |         ensure_initialised();
 73 |         Context(unsafe { core::LLVMContextCreate() })
 74 |     }
 75 | 
 76 |     /// Add Module
 77 |     ///
 78 |     /// Creates a new LLVM module in this context.
 79 |     pub fn add_module(&mut self, name: &str) -> Module {
 80 |         let mod_name = CString::new(name).unwrap();
 81 |         Module::from_raw(unsafe {
 82 |             core::LLVMModuleCreateWithNameInContext(mod_name.as_ptr(), self.as_raw())
 83 |         })
 84 |     }
 85 | 
 86 |     /// Add a Function to the Module
 87 |     ///
 88 |     /// Creates a new function in the module. The function has no body
 89 |     /// attached. If nothing extra is done with the returned
 90 |     /// `Fucntion` then it will serve as an external declaration/import.
 91 |     pub fn add_function(
 92 |         &mut self,
 93 |         module: &mut Module,
 94 |         name: &str,
 95 |         ret_type: LLVMTypeRef,
 96 |         params: &mut [LLVMTypeRef],
 97 |     ) -> Function {
 98 |         self.add_function_internal(module, name, ret_type, params, false)
 99 |     }
100 | 
101 |     /// Ad a Function with Variable Arguments
102 |     ///
103 |     /// Creates a new function in the module in the same way as
104 |     /// `add_function`. In addition the function is declared with a
105 |     /// variable argument list.
106 |     pub fn add_varargs_function(
107 |         &mut self,
108 |         module: &mut Module,
109 |         name: &str,
110 |         ret_type: LLVMTypeRef,
111 |         params: &mut [LLVMTypeRef],
112 |     ) -> Function {
113 |         self.add_function_internal(module, name, ret_type, params, true)
114 |     }
115 | 
116 |     /// Internal Add Function
117 |     ///
118 |     /// Thinner wrapper over `LLVMAddfunction`. Clients should use
119 |     /// `add_function` or `add_varargs_function`.
120 |     fn add_function_internal(
121 |         &mut self,
122 |         module: &mut Module,
123 |         name: &str,
124 |         ret_type: LLVMTypeRef,
125 |         params: &mut [LLVMTypeRef],
126 |         varargs: bool,
127 |     ) -> Function {
128 |         let varargs = if varargs { 1 } else { 0 };
129 | 
130 |         // Create a function to be used to evaluate our expression
131 |         let function_type = unsafe {
132 |             let param_count = params.len();
133 |             let params = params.as_mut_ptr();
134 |             core::LLVMFunctionType(ret_type, params, param_count as c_uint, varargs)
135 |         };
136 | 
137 |         let function_name = CString::new(name).unwrap();
138 | 
139 |         // Function::from_raw is `unsafe` because it doesn't verify
140 |         // that the value you give it is an LLVM Function. I think we
141 |         // can be sure this one is though :-p
142 |         unsafe {
143 |             Function::from_raw(core::LLVMAddFunction(
144 |                 module.as_raw(),
145 |                 function_name.as_ptr(),
146 |                 function_type,
147 |             ))
148 |         }
149 |     }
150 | 
151 |     /// Add a Basic Block to a given Function
152 |     ///
153 |     /// Creates a basic block and add it to the function.
154 |     pub fn add_block(&mut self, fun: &mut Function, name: &str) -> LLVMBasicBlockRef {
155 |         let block_name = CString::new(name).unwrap();
156 |         unsafe {
157 |             core::LLVMAppendBasicBlockInContext(self.as_raw(), fun.as_raw(), block_name.as_ptr())
158 |         }
159 |     }
160 | 
161 |     /// Create an IR Builder
162 |     ///
163 |     /// Creates and initalises a new IR Builder in this `Context`.
164 |     pub fn add_builder(&mut self) -> Builder {
165 |         Builder::from_raw(unsafe { core::LLVMCreateBuilderInContext(self.as_raw()) })
166 |     }
167 | 
168 |     /// Create A Constant Value
169 |     ///
170 |     /// The returned value is a constant 64 bit integer with the given
171 |     /// value.
172 |     pub fn const_int(&self, i: i64) -> LLVMValueRef {
173 |         unsafe {
174 |             let int64 = core::LLVMInt64TypeInContext(self.as_raw());
175 |             core::LLVMConstInt(int64, i as u64, 1)
176 |         }
177 |     }
178 | 
179 |     /// Create a Constant Value with a Given Width
180 |     ///
181 |     /// Used when the width shouldn't be 64 bits.
182 |     pub fn const_int_width(&self, i: i64, width: u32) -> LLVMValueRef {
183 |         unsafe {
184 |             let int_ty = core::LLVMIntTypeInContext(self.as_raw(), width);
185 |             core::LLVMConstInt(int_ty, i as u64, 1)
186 |         }
187 |     }
188 | 
189 |     /// Create a Constant Character Value
190 |     pub fn const_char(&self, i: u8) -> LLVMValueRef {
191 |         unsafe {
192 |             let int8 = core::LLVMInt8TypeInContext(self.as_raw());
193 |             core::LLVMConstInt(int8, u64::from(i), 0)
194 |         }
195 |     }
196 | 
197 |     /// Create a Constant Bool
198 |     ///
199 |     /// The returned value is a constant 1-bit integer with the given
200 |     /// boolean value mapped to `true` => `1`, `false` => `0`.
201 |     pub fn const_bool(&self, b: bool) -> LLVMValueRef {
202 |         let mapped = if b { 1 } else { 0 };
203 |         unsafe {
204 |             let int1 = core::LLVMInt1TypeInContext(self.as_raw());
205 |             core::LLVMConstInt(int1, mapped, 0)
206 |         }
207 |     }
208 | 
209 |     /// Create a Constant String Value
210 |     ///
211 |     /// The returned value is a constant i8 array with characters from
212 |     /// the given string stored as UTF8.
213 |     pub fn const_str(&self, s: &str) -> LLVMValueRef {
214 |         let mut bytes: Vec<_> = s.bytes().map(|b| self.const_char(b)).collect();
215 |         bytes.push(self.const_char(0));
216 |         unsafe {
217 |             let int8 = core::LLVMInt8TypeInContext(self.as_raw());
218 |             core::LLVMConstArray(int8, bytes.as_mut_ptr(), bytes.len() as c_uint)
219 |         }
220 |     }
221 | 
222 |     /// Create a Structure Contstant
223 |     ///
224 |     /// Initialses a new structrure based on the given values.
225 |     pub fn const_struct(&self, mut values: Vec<LLVMValueRef>) -> LLVMValueRef {
226 |         let len = values.len();
227 |         unsafe {
228 |             core::LLVMConstStructInContext(self.as_raw(), values.as_mut_ptr(), len as c_uint, 0)
229 |         }
230 |     }
231 | 
232 |     /// Raw Borrow
233 |     ///
234 |     /// # Safety
235 |     ///
236 |     /// This method returns a raw pointer to the underlying
237 |     /// LLVMContext. It's up to you to make sure it doesn't outlive
238 |     /// the `Context`, and to make sure you don't break any of LLVMs
239 |     /// thread safety requirements.
240 |     pub unsafe fn as_raw(&self) -> LLVMContextRef {
241 |         let &Context(raw_ctx) = self;
242 |         raw_ctx
243 |     }
244 | 
245 |     /// A Sized Integer Type in this Context
246 |     ///
247 |     /// This looks up the integer type of a given `width` in the LLVM
248 |     /// Context and returns it. Multiple calls should return the same
249 |     /// type for the same width integer.
250 |     pub fn int_type(&self, width: usize) -> LLVMTypeRef {
251 |         unsafe { core::LLVMIntTypeInContext(self.as_raw(), width as c_uint) }
252 |     }
253 | 
254 |     /// Boolean Type in this Context
255 |     ///
256 |     /// Looks up the boolean type from LLVM. This is just a 1-bit
257 |     /// integer type under the hood.
258 |     pub fn bool_type(&self) -> LLVMTypeRef {
259 |         self.int_type(1)
260 |     }
261 | 
262 |     /// Get the Raw C String Type
263 |     ///
264 |     /// Looks up the c-style 'pointer to character' string type in the
265 |     /// context. This is different from the langauage's string
266 |     /// type. It is intended to be used when creating FFI calls.
267 |     pub fn cstr_type(&self) -> LLVMTypeRef {
268 |         unsafe {
269 |             let int8 = core::LLVMInt8TypeInContext(self.as_raw());
270 |             core::LLVMPointerType(int8, 0)
271 |         }
272 |     }
273 | 
274 |     /// Create a Structure Type
275 |     ///
276 |     /// Given a set of fields create a structure type with fields
277 |     /// layed out in that order.
278 |     pub fn struct_type(&self, mut fields: Vec<LLVMTypeRef>) -> LLVMTypeRef {
279 |         let len = fields.len();
280 |         unsafe {
281 |             core::LLVMStructTypeInContext(self.as_raw(), fields.as_mut_ptr(), len as c_uint, 0)
282 |         }
283 |     }
284 | 
285 |     /// Create an Array Type
286 |     ///
287 |     /// Returns a type which represents a contiguous array of the
288 |     /// inner type.
289 |     pub fn array_type(&self, inner: LLVMTypeRef, size: usize) -> LLVMTypeRef {
290 |         unsafe { core::LLVMArrayType(inner, size as c_uint) }
291 |     }
292 | 
293 |     /// Create a Pointer Type
294 |     ///
295 |     /// Wraps a given type to creat a poitner to it.
296 |     pub fn pointer_type(&self, inner: LLVMTypeRef) -> LLVMTypeRef {
297 |         unsafe { core::LLVMPointerType(inner, 0) }
298 |     }
299 | 
300 |     /// Get the Void Type
301 |     pub fn void_type(&self) -> LLVMTypeRef {
302 |         unsafe { core::LLVMVoidTypeInContext(self.0) }
303 |     }
304 | 
305 |     /// Get the LLVM Type from a Value
306 |     ///
307 |     /// Inspects a given LLVM Value and returns the type as known by
308 |     /// LLVM. This is basically jsut an `LLVMTypeOf` call.
309 |     pub fn get_type(&self, value: LLVMValueRef) -> LLVMTypeRef {
310 |         unsafe { core::LLVMTypeOf(value) }
311 |     }
312 | }
313 | 
314 | impl Drop for Context {
315 |     fn drop(&mut self) {
316 |         unsafe {
317 |             core::LLVMContextDispose(self.as_raw());
318 |         }
319 |     }
320 | }
321 | 
322 | impl Default for Context {
323 |     fn default() -> Self {
324 |         Context::new()
325 |     }
326 | }
327 | 


--------------------------------------------------------------------------------
/src/low_loader/function.rs:
--------------------------------------------------------------------------------
  1 | //! LLVM Function Wrapper
  2 | //!
  3 | //! A wrapper around an LLVM function.
  4 | 
  5 | use super::llvm_sys::analysis;
  6 | use super::llvm_sys::core;
  7 | use super::llvm_sys::prelude::*;
  8 | use super::llvm_sys::LLVMCallConv;
  9 | 
 10 | /// Function
 11 | ///
 12 | /// A single function in a given module.
 13 | #[derive(Debug, PartialEq)]
 14 | pub struct Function {
 15 |     raw: LLVMValueRef,
 16 |     call_conv: CallConvention,
 17 | }
 18 | 
 19 | /// Calling Contentions
 20 | ///
 21 | /// This is a subset of the LLVM calling contentions.
 22 | #[derive(Debug, Copy, Clone, PartialEq)]
 23 | pub enum CallConvention {
 24 |     /// THe `fastcall` calling contention
 25 |     Fastcall,
 26 |     /// The C Calling Contention
 27 |     CDecl,
 28 | }
 29 | 
 30 | impl From<CallConvention> for libc::c_uint {
 31 |     fn from(call_convention: CallConvention) -> Self {
 32 |         let llvm_conv = match call_convention {
 33 |             CallConvention::Fastcall => LLVMCallConv::LLVMFastCallConv,
 34 |             CallConvention::CDecl => LLVMCallConv::LLVMCCallConv,
 35 |         };
 36 |         llvm_conv as libc::c_uint
 37 |     }
 38 | }
 39 | 
 40 | impl From<libc::c_uint> for CallConvention {
 41 |     fn from(llvm_conv: libc::c_uint) -> Self {
 42 |         if llvm_conv == LLVMCallConv::LLVMFastCallConv as libc::c_uint {
 43 |             CallConvention::Fastcall
 44 |         } else {
 45 |             CallConvention::CDecl
 46 |         }
 47 |     }
 48 | }
 49 | 
 50 | impl Function {
 51 |     /// Wrap an Existing Funciton
 52 |     ///
 53 |     /// Takes ownership of the given function and provides more
 54 |     /// stronlgy typed access to it.
 55 |     ///
 56 |     /// # Safety
 57 |     ///
 58 |     /// The value should be a function. This is not checked.
 59 |     pub unsafe fn from_raw(raw: LLVMValueRef) -> Self {
 60 |         Function {
 61 |             raw,
 62 |             call_conv: core::LLVMGetFunctionCallConv(raw).into(),
 63 |         }
 64 |     }
 65 | 
 66 |     /// Verify the Function
 67 |     ///
 68 |     /// Makes LLVM check the funciton is valid. If the function is not
 69 |     /// valid we will panic to signal the error. This is intended for
 70 |     /// debugging the compiler's output.
 71 |     pub fn verify_or_panic(&self) {
 72 |         use self::analysis::*;
 73 |         let verified = unsafe {
 74 |             LLVMVerifyFunction(self.raw, LLVMVerifierFailureAction::LLVMPrintMessageAction)
 75 |         };
 76 | 
 77 |         if verified == 1 {
 78 |             unsafe {
 79 |                 core::LLVMDumpValue(self.raw);
 80 |             }
 81 |             panic!("Function failed verification!")
 82 |         }
 83 |     }
 84 | 
 85 |     /// Get a Function's Parameter
 86 |     ///
 87 |     /// Returns a value that can be used to access the `n`th function
 88 |     /// argument.
 89 |     pub fn get_param(&self, n: u32) -> LLVMValueRef {
 90 |         unsafe { core::LLVMGetParam(self.as_raw(), n) }
 91 |     }
 92 | 
 93 |     /// Raw Borrow
 94 |     ///
 95 |     /// # Safety
 96 |     ///
 97 |     /// This method returns a raw pointer to the underlying
 98 |     /// LLVMValue. It's up to you to make sure it doesn't outlive the
 99 |     /// `Function`, and to make sure you don't break any of LLVMs
100 |     /// thread safety requirements.
101 |     pub unsafe fn as_raw(&self) -> LLVMValueRef {
102 |         self.raw
103 |     }
104 | 
105 |     /// Get the Function Calling Convention
106 |     ///
107 |     /// Returns the calling convention that is set for the current
108 |     /// function. This defaults to CDecl if no convention is set.
109 |     pub fn call_conv(&self) -> CallConvention {
110 |         self.call_conv
111 |     }
112 | 
113 |     /// Set the Function's Calling Convention
114 |     ///
115 |     /// Updates the calling convention for the function
116 |     /// delcaration. We use fastcall for our calling convention and
117 |     /// cdecl for c interop.
118 |     pub fn set_calling_convention(&mut self, call_convention: CallConvention) {
119 |         self.call_conv = call_convention;
120 |         unsafe {
121 |             core::LLVMSetFunctionCallConv(self.raw, call_convention.into());
122 |         }
123 |     }
124 | }
125 | 


--------------------------------------------------------------------------------
/src/low_loader/module.rs:
--------------------------------------------------------------------------------
  1 | //! LLVM Module Wrapper
  2 | //!
  3 | //! Contains types and wrappers for dealing with LLVM Modules.
  4 | 
  5 | use super::function::Function;
  6 | use super::llvm_sys::prelude::*;
  7 | use super::llvm_sys::target_machine;
  8 | use super::llvm_sys::{analysis, bit_writer, core};
  9 | use super::pass_manager::{OptLevel, OptSize, PassManagerBuilder};
 10 | use super::targets::Target;
 11 | 
 12 | use std::ffi::{CStr, CString};
 13 | use std::path::Path;
 14 | use std::ptr;
 15 | 
 16 | /// Module
 17 | ///
 18 | /// A module repsents a single code unit. It maps down to a library or
 19 | /// executable when compiled by LLVM. This type provides a safe
 20 | /// abstraction around the raw `LLVMModule` type.
 21 | #[derive(Debug, PartialEq)]
 22 | pub struct Module {
 23 |     raw: LLVMModuleRef,
 24 | }
 25 | 
 26 | /// The kind of output file to write
 27 | ///
 28 | /// Used when writing modules to disk.
 29 | #[derive(Debug, PartialEq)]
 30 | pub enum OutputFileKind {
 31 |     /// LLVM IL files
 32 |     LLVMIl,
 33 |     /// LLVM Bitcode file
 34 |     Bitcode,
 35 |     /// Native executable object files
 36 |     NativeObject,
 37 | }
 38 | 
 39 | impl Module {
 40 |     /// Module from Raw
 41 |     ///
 42 |     /// Creates a new module from a raw module reference. This takes
 43 |     /// ownership of the given module. When the returned Module
 44 |     /// instance goes out of scope the module will be disposed.
 45 |     ///
 46 |     /// *Note*: You shouldn't need to use this directly, instead modules
 47 |     /// can be created with `Context::add_module`.
 48 |     pub fn from_raw(mod_ref: LLVMModuleRef) -> Self {
 49 |         Module { raw: mod_ref }
 50 |     }
 51 | 
 52 |     /// Set the Modules's Target
 53 |     ///
 54 |     /// Defines which targe the module is being comiled for. This can
 55 |     /// enable target-specific optimisations in the compilation of
 56 |     /// this module.
 57 |     pub fn set_target(&mut self, target: &Target) {
 58 |         let triple = CString::new(target.norm_triple()).unwrap();
 59 |         unsafe {
 60 |             core::LLVMSetTarget(self.as_raw(), triple.as_ptr());
 61 |         }
 62 |     }
 63 | 
 64 |     /// Dump the Module
 65 |     ///
 66 |     /// Writes a representation of the module to standard output. This
 67 |     /// is intended to be used as an aid to debugging.
 68 |     pub fn dump(&self) {
 69 |         unsafe { core::LLVMDumpModule(self.raw) }
 70 |     }
 71 | 
 72 |     /// Verify the Module
 73 |     ///
 74 |     /// Checks that the whole module is valid before continuing
 75 |     pub fn verify_or_panic(&self) {
 76 |         let verified = unsafe {
 77 |             analysis::LLVMVerifyModule(
 78 |                 self.as_raw(),
 79 |                 analysis::LLVMVerifierFailureAction::LLVMPrintMessageAction,
 80 |                 ptr::null_mut(),
 81 |             )
 82 |         };
 83 |         if verified != 0 {
 84 |             panic!("Module failed validation");
 85 |         }
 86 |     }
 87 | 
 88 |     /// Run the Optimisation Passes over the Module
 89 |     ///
 90 |     /// Given a target optimisation level transform the module to
 91 |     /// improve exectuion speed.
 92 |     ///
 93 |     /// # Parameters
 94 |     ///  * `level` - the optimisation level to target.
 95 |     ///  * `size` - Enum to control size optimisation.
 96 |     pub fn run_optimiser(&mut self, level: OptLevel, size: OptSize) {
 97 |         let pass_manager = PassManagerBuilder::new()
 98 |             .with_opt_level(level)
 99 |             .with_opt_size(size)
100 |             .create_module_pass_manager();
101 | 
102 |         pass_manager.run(self);
103 |     }
104 | 
105 |     /// Write the Module to the Given File as LLVM IR or Bitcode
106 |     ///
107 |     /// The kind of file written depends on `kind`.
108 |     pub fn write_to_file(
109 |         &self,
110 |         target: &Target,
111 |         path: &Path,
112 |         kind: OutputFileKind,
113 |     ) -> Result<(), String> {
114 |         let path = path.to_str().and_then(|s| CString::new(s).ok()).unwrap();
115 | 
116 |         unsafe {
117 |             let mut message = ptr::null_mut();
118 |             let r = match kind {
119 |                 OutputFileKind::LLVMIl => {
120 |                     core::LLVMPrintModuleToFile(self.raw, path.as_ptr(), &mut message)
121 |                 }
122 |                 OutputFileKind::Bitcode => {
123 |                     bit_writer::LLVMWriteBitcodeToFile(self.raw, path.as_ptr())
124 |                 }
125 |                 OutputFileKind::NativeObject => {
126 |                     let trip = CString::new(target.triple()).unwrap();
127 |                     // To emit code we need to do a few things:
128 |                     //  * Create an LLVM TargetMachine from our target.
129 |                     //  * Create a pass manager
130 |                     //  * Call targetMachine emit to file
131 |                     let tm = target_machine::LLVMCreateTargetMachine(
132 |                         target.as_llvm_target(),
133 |                         trip.as_ptr(),
134 |                         target_machine::LLVMGetHostCPUName(),
135 |                         target_machine::LLVMGetHostCPUFeatures(),
136 |                         target_machine::LLVMCodeGenOptLevel::LLVMCodeGenLevelDefault,
137 |                         target_machine::LLVMRelocMode::LLVMRelocDefault,
138 |                         target_machine::LLVMCodeModel::LLVMCodeModelSmall,
139 |                     );
140 |                     let r = target_machine::LLVMTargetMachineEmitToFile(
141 |                         tm,
142 |                         self.as_raw(),
143 |                         path.as_ptr() as *mut _,
144 |                         target_machine::LLVMCodeGenFileType::LLVMObjectFile,
145 |                         &mut message,
146 |                     );
147 |                     target_machine::LLVMDisposeTargetMachine(tm);
148 |                     r
149 |                 }
150 |             };
151 |             if r == 0 {
152 |                 Ok(())
153 |             } else {
154 |                 let err_str = CStr::from_ptr(message).to_string_lossy().into();
155 |                 core::LLVMDisposeMessage(message);
156 |                 Err(err_str)
157 |             }
158 |         }
159 |     }
160 | 
161 |     /// Find a Function by Name
162 |     pub fn find_function(&self, name: &str) -> Option<Function> {
163 |         let function_name = CString::new(name).unwrap();
164 |         unsafe {
165 |             let found = core::LLVMGetNamedFunction(self.as_raw(), function_name.as_ptr());
166 |             if found.is_null() {
167 |                 None
168 |             } else {
169 |                 Some(Function::from_raw(found))
170 |             }
171 |         }
172 |     }
173 | 
174 |     /// Add a Global Variable
175 |     pub fn add_global(&mut self, initialiser: LLVMValueRef, name: &str) -> LLVMValueRef {
176 |         let global_name = CString::new(name).unwrap();
177 |         unsafe {
178 |             let global = core::LLVMAddGlobal(
179 |                 self.as_raw(),
180 |                 core::LLVMTypeOf(initialiser),
181 |                 global_name.as_ptr(),
182 |             );
183 |             core::LLVMSetInitializer(global, initialiser);
184 |             global
185 |         }
186 |     }
187 | 
188 |     /// Find a Global Variable in the Module by Name
189 |     ///
190 |     /// Looks up a given global variale in the module and returns
191 |     /// it. If the variable doesn't exist in the module then `None` is
192 |     /// returned.
193 |     pub fn find_global(&self, name: &str) -> Option<LLVMValueRef> {
194 |         let global_name = CString::new(name).unwrap();
195 |         unsafe {
196 |             let found = core::LLVMGetNamedGlobal(self.as_raw(), global_name.as_ptr());
197 |             if found.is_null() {
198 |                 None
199 |             } else {
200 |                 Some(found)
201 |             }
202 |         }
203 |     }
204 | 
205 |     /// Raw Borrow
206 |     ///
207 |     /// # Safety
208 |     ///
209 |     /// This method returns a raw pointer to the underlying
210 |     /// LLVMModule. It's up to you to make sure it doesn't outlive the
211 |     /// `Module`, and to make sure you don't break any of LLVMs thread
212 |     /// safety requirements.
213 |     pub unsafe fn as_raw(&self) -> LLVMModuleRef {
214 |         self.raw
215 |     }
216 | }
217 | 
218 | impl Drop for Module {
219 |     fn drop(&mut self) {
220 |         unsafe { core::LLVMDisposeModule(self.raw) }
221 |     }
222 | }
223 | 
224 | impl From<Module> for LLVMModuleRef {
225 |     /// Consume the wrapped module and return it's interal module
226 |     /// reference. This transfers the ownership of the module back to
227 |     /// the caller preventing the it from being automaticaly freed.
228 |     fn from(m: Module) -> LLVMModuleRef {
229 |         unsafe {
230 |             // an apparently nicer alterantive to calling `forget` we
231 |             // instead create a `ManuallyDrop` item and then don't
232 |             // drop it here.
233 |             std::mem::ManuallyDrop::new(m).as_raw()
234 |         }
235 |     }
236 | }
237 | 


--------------------------------------------------------------------------------
/src/low_loader/pass_manager.rs:
--------------------------------------------------------------------------------
  1 | //! Pass Manager
  2 | //!
  3 | //! This module defines a safe wrapper around the `PassManager` and
  4 | //! `PassManagerBuilder` LLVM C APIs.
  5 | //!
  6 | //! Pass managers are used to run a given set of transforms against
  7 | //! either a module or function.
  8 | 
  9 | use super::module::Module;
 10 | 
 11 | use super::llvm_sys::core;
 12 | use super::llvm_sys::prelude::*;
 13 | use super::llvm_sys::transforms::pass_manager_builder as pm_builder;
 14 | 
 15 | /// LLVM Module PassManager
 16 | pub struct ModulePassManager(LLVMPassManagerRef);
 17 | 
 18 | /// Builder API for the PassManager
 19 | pub struct PassManagerBuilder(pm_builder::LLVMPassManagerBuilderRef);
 20 | 
 21 | /// Optimisation Levels
 22 | ///
 23 | /// Used to control the level of optimisaton that the pass manager
 24 | /// builder should target when adding passes
 25 | pub enum OptLevel {
 26 |     /// No Speed Optimisation
 27 |     Off,
 28 |     /// Some Optimisation for Speed
 29 |     ///
 30 |     /// This corresponds to the `-O1` optimisation flag.
 31 |     Low,
 32 |     /// Medium Optimisation for Speed
 33 |     ///
 34 |     /// This corresponds to the `-O2` optimisation flag.
 35 |     Medium,
 36 |     /// Full Optimisation for Speed
 37 |     ///
 38 |     /// This corresponds to the `-O1` optimisation flag.
 39 |     High,
 40 | }
 41 | 
 42 | /// Size Optimisation Levels
 43 | ///
 44 | /// Used to control the code size that the optimisation passes
 45 | /// target. This can be used to encourage the optimiser to favour
 46 | /// smaller binaries rather than faster ones.
 47 | pub enum OptSize {
 48 |     /// Size Optimisation Disabled
 49 |     Off,
 50 |     /// Normal Size Optimisations
 51 |     ///
 52 |     /// This corresponds to the `-Os` optimisation flag.
 53 |     Size,
 54 |     /// Full Size Optimisations
 55 |     ///
 56 |     /// This corresponds to the `-Oz` optimisation flag.
 57 |     SizeFull,
 58 | }
 59 | 
 60 | impl From<OptLevel> for ::libc::c_uint {
 61 |     fn from(level: OptLevel) -> Self {
 62 |         use self::OptLevel::*;
 63 |         match level {
 64 |             Off => 0,
 65 |             Low => 1,
 66 |             Medium => 2,
 67 |             High => 3,
 68 |         }
 69 |     }
 70 | }
 71 | 
 72 | impl From<OptSize> for ::libc::c_uint {
 73 |     fn from(size: OptSize) -> Self {
 74 |         use self::OptSize::*;
 75 |         match size {
 76 |             Off => 0,
 77 |             Size => 1,
 78 |             SizeFull => 2,
 79 |         }
 80 |     }
 81 | }
 82 | 
 83 | impl Drop for PassManagerBuilder {
 84 |     fn drop(&mut self) {
 85 |         unsafe {
 86 |             pm_builder::LLVMPassManagerBuilderDispose(self.0);
 87 |         }
 88 |     }
 89 | }
 90 | 
 91 | impl Default for PassManagerBuilder {
 92 |     fn default() -> Self {
 93 |         PassManagerBuilder::new()
 94 |     }
 95 | }
 96 | 
 97 | impl PassManagerBuilder {
 98 |     /// Create a Pass Manager Builder
 99 |     pub fn new() -> Self {
100 |         super::context::ensure_initialised();
101 |         let raw = unsafe { pm_builder::LLVMPassManagerBuilderCreate() };
102 |         PassManagerBuilder(raw)
103 |     }
104 | 
105 |     /// Set the Optimisation Level
106 |     ///
107 |     /// Controls the optimisation level the pass manager bulder will
108 |     /// target.
109 |     pub fn with_opt_level(self, level: OptLevel) -> Self {
110 |         unsafe {
111 |             pm_builder::LLVMPassManagerBuilderSetOptLevel(self.0, level.into());
112 |         }
113 |         self
114 |     }
115 | 
116 |     /// Set the Size Optimisation Level
117 |     ///
118 |     /// Controls the optimiser's preference for smaller code size over
119 |     /// speed. Traditionally setting this to anything other than `Off`
120 |     /// is paired with setting the optimisation level to `Med`.
121 |     pub fn with_opt_size(self, size: OptSize) -> Self {
122 |         unsafe {
123 |             pm_builder::LLVMPassManagerBuilderSetSizeLevel(self.0, size.into());
124 |         }
125 |         self
126 |     }
127 | 
128 |     /// Create a Moduel Pass Manager
129 |     ///
130 |     /// Takes the current state of the pass manager builder and
131 |     /// creates a new module pass manager populated with the
132 |     /// configured passes.
133 |     pub fn create_module_pass_manager(&self) -> ModulePassManager {
134 |         let pm = unsafe { core::LLVMCreatePassManager() };
135 |         unsafe {
136 |             pm_builder::LLVMPassManagerBuilderPopulateModulePassManager(self.0, pm);
137 |         }
138 |         ModulePassManager(pm)
139 |     }
140 | }
141 | 
142 | impl ModulePassManager {
143 |     /// Run the Pass Manager
144 |     ///
145 |     /// Attempt to transform the given module with the pass
146 |     /// manager. Returns a boolean representing if the pass manager
147 |     /// made changes to the module.
148 |     pub fn run(&self, module: &mut Module) -> bool {
149 |         unsafe { core::LLVMRunPassManager(self.0, module.as_raw()) != 0 }
150 |     }
151 | }
152 | 
153 | #[cfg(test)]
154 | mod test {
155 | 
156 |     use super::*;
157 | 
158 |     #[test]
159 |     fn test_build_module_pass_manager() {
160 |         let _pm = PassManagerBuilder::new().create_module_pass_manager();
161 |     }
162 | 
163 |     #[test]
164 |     fn test_set_optimisation_settings() {
165 |         let _pm = PassManagerBuilder::new()
166 |             .with_opt_level(OptLevel::Medium)
167 |             .with_opt_size(OptSize::SizeFull)
168 |             .create_module_pass_manager();
169 |     }
170 | }
171 | 


--------------------------------------------------------------------------------
/src/low_loader/targets.rs:
--------------------------------------------------------------------------------
  1 | //! LLVM Targets
  2 | 
  3 | use super::llvm_sys::core::LLVMDisposeMessage;
  4 | use super::llvm_sys::target_machine::*;
  5 | use std::ffi::{CStr, CString};
  6 | use std::fmt::Display;
  7 | use std::{fmt, ptr};
  8 | 
  9 | /// Compilation Target
 10 | ///
 11 | /// Represents a validated target triple. Can be used to build a
 12 | /// target machine to compile to.
 13 | pub struct Target {
 14 |     llvm_target: LLVMTargetRef,
 15 |     triple: String,
 16 | }
 17 | 
 18 | /// Target Lookup Error
 19 | ///
 20 | /// Returned if a target couldn't be resolved from the given triple.
 21 | #[derive(Debug)]
 22 | pub struct TargetLookupError(String);
 23 | 
 24 | impl std::error::Error for TargetLookupError {}
 25 | 
 26 | impl Display for TargetLookupError {
 27 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 28 |         write!(f, "Could not find target: '{}'", self.0)
 29 |     }
 30 | }
 31 | 
 32 | impl fmt::Display for Target {
 33 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 34 |         writeln!(f, "Target information for {}:", self.triple)?;
 35 |         writeln!(f, " name: {}", self.name())?;
 36 |         writeln!(f, " description: {}", self.description())?;
 37 |         writeln!(f, " normalised triple: {}", self.norm_triple())?;
 38 |         writeln!(f, " has asm backend: {}", self.has_asm_backend())
 39 |     }
 40 | }
 41 | 
 42 | impl Default for Target {
 43 |     /// Get the Default (host) Target
 44 |     fn default() -> Self {
 45 |         let trip = get_default_triple();
 46 |         Target::from_triple(&trip).expect("Default triple should be found")
 47 |     }
 48 | }
 49 | 
 50 | impl Target {
 51 |     /// Create a Target from a Triple
 52 |     ///
 53 |     /// Looks the given target trip up and returns the coresponding
 54 |     /// concrete target. If the triple isn't a valid compilation targe
 55 |     /// then a `TargetLookupError` is returned.
 56 |     pub fn from_triple(triple: &str) -> Result<Self, TargetLookupError> {
 57 |         super::context::ensure_initialised();
 58 | 
 59 |         let ffi_trip = CString::new(triple).unwrap();
 60 | 
 61 |         let mut target: LLVMTargetRef = ptr::null_mut();
 62 |         let mut error: *mut libc::c_char = ptr::null_mut();
 63 |         let r = unsafe {
 64 |             LLVMGetTargetFromTriple(
 65 |                 ffi_trip.as_ptr(),
 66 |                 &mut target as *mut LLVMTargetRef,
 67 |                 &mut error as *mut *mut libc::c_char,
 68 |             )
 69 |         };
 70 |         if r != 0 {
 71 |             let err = unsafe { CStr::from_ptr(error) };
 72 |             let err = err.to_string_lossy().to_string();
 73 |             unsafe { LLVMDisposeMessage(error) };
 74 |             return Err(TargetLookupError(err));
 75 |         }
 76 |         Ok(Target {
 77 |             llvm_target: target,
 78 |             triple: triple.to_owned(),
 79 |         })
 80 |     }
 81 | 
 82 |     /// Get the underlying LLVM target reference from the target
 83 |     ///
 84 |     /// # Safety
 85 |     ///
 86 |     /// The lifetime of the returned reference is tied to the lifetime of
 87 |     /// the `Target`. It should not be used outside that scope.
 88 |     pub unsafe fn as_llvm_target(&self) -> LLVMTargetRef {
 89 |         self.llvm_target
 90 |     }
 91 | 
 92 |     /// Get the Target name
 93 |     ///
 94 |     /// Retrieves the logical name for this target
 95 |     pub fn name(&self) -> &str {
 96 |         let name = unsafe { CStr::from_ptr(LLVMGetTargetName(self.llvm_target)) };
 97 |         name.to_str().expect("Target name should be valid unicode")
 98 |     }
 99 | 
100 |     /// Get the Target Descirption
101 |     ///
102 |     /// Retrieves the short description of the target
103 |     pub fn description(&self) -> &str {
104 |         let desc = unsafe { CStr::from_ptr(LLVMGetTargetDescription(self.llvm_target)) };
105 |         desc.to_str()
106 |             .expect("Target description should be valid unicode")
107 |     }
108 | 
109 |     /// Get the Target Triple
110 |     ///
111 |     /// Retrieves the triple that was used to create this target.
112 |     pub fn triple(&self) -> &str {
113 |         &self.triple[..]
114 |     }
115 | 
116 |     /// Get the Normalised Target Triple
117 |     ///
118 |     /// This is the canonical version of the target triple
119 |     pub fn norm_triple(&self) -> &str {
120 |         let ffi_trip = CString::new(&self.triple[..]).unwrap();
121 |         let norm_trip = unsafe { CStr::from_ptr(LLVMNormalizeTargetTriple(ffi_trip.as_ptr())) };
122 |         norm_trip
123 |             .to_str()
124 |             .expect("normalised triple should be valid unicode")
125 |     }
126 | 
127 |     /// Does the Target have an ASM Backend
128 |     pub fn has_asm_backend(&self) -> bool {
129 |         let has_backend = unsafe { LLVMTargetHasAsmBackend(self.llvm_target) };
130 |         has_backend != 0
131 |     }
132 | }
133 | 
134 | /// Get the Default Target Triple
135 | ///
136 | /// Returns the triple for the native target. To be used as a fallback
137 | /// if no triple is specified by the user.
138 | pub fn get_default_triple() -> String {
139 |     let native_target = unsafe { CStr::from_ptr(LLVMGetDefaultTargetTriple()) };
140 |     native_target.to_string_lossy().to_owned().to_string()
141 | }
142 | 
143 | /// Dump Available LLVM Targets
144 | ///
145 | /// Prints a list of LLVM target triple available in this build.
146 | pub fn dump_targets() {
147 |     super::context::ensure_initialised();
148 | 
149 |     let native_target = unsafe { LLVMGetDefaultTargetTriple() };
150 | 
151 |     println!("default triple: {:?}", unsafe {
152 |         CStr::from_ptr(native_target)
153 |     });
154 |     println!();
155 |     println!("supported targets:");
156 |     let mut target = unsafe { LLVMGetFirstTarget() };
157 |     while !target.is_null() {
158 |         let name = unsafe { CStr::from_ptr(LLVMGetTargetName(target)) };
159 |         let desc = unsafe { CStr::from_ptr(LLVMGetTargetDescription(target)) };
160 |         println!(" * {} ({})", name.to_str().unwrap(), desc.to_str().unwrap());
161 |         target = unsafe { LLVMGetNextTarget(target) };
162 |     }
163 | }
164 | 
165 | #[cfg(test)]
166 | mod test {
167 | 
168 |     use super::*;
169 | 
170 |     #[test]
171 |     fn create_default_triple() {
172 |         let default: Target = Default::default();
173 |         assert_eq!(get_default_triple(), default.triple());
174 |         assert_eq!(default.triple(), default.norm_triple());
175 |     }
176 | 
177 |     #[test]
178 |     fn create_invalid_triple() {
179 |         let r = Target::from_triple("im-not-valid");
180 |         assert!(r.is_err());
181 |     }
182 | 
183 |     #[test]
184 |     fn create_known_triple() {
185 |         let target = Target::from_triple("aarch64-linux-gnu").unwrap();
186 |         assert_eq!("aarch64", target.name());
187 |         assert_eq!("AArch64 (little endian)", target.description());
188 |         assert_eq!(true, target.has_asm_backend());
189 |         assert_eq!("aarch64-linux-gnu", target.triple());
190 |         assert_eq!("aarch64-unknown-linux-gnu", target.norm_triple());
191 |     }
192 | 
193 |     #[test]
194 |     fn create_long_triple() {
195 |         let target = Target::from_triple("arm-eabi-linux-gnu-elf").unwrap();
196 |         assert_eq!("arm", target.name());
197 |         assert_eq!("ARM", target.description());
198 |         assert_eq!(true, target.has_asm_backend());
199 |         assert_eq!("arm-eabi-linux-gnu-elf", target.triple());
200 |         assert_eq!("arm-eabi-linux-gnu-elf", target.norm_triple());
201 |     }
202 | }
203 | 


--------------------------------------------------------------------------------
/src/low_loader/types.rs:
--------------------------------------------------------------------------------
  1 | //! LLVM Types
  2 | 
  3 | use super::llvm_sys::prelude::*;
  4 | use super::llvm_sys::{core, LLVMTypeKind};
  5 | 
  6 | /// Floating Point Sizes
  7 | ///
  8 | /// LLVM floating point types can only be one of a given set of
  9 | /// sizes. Some of these sizes are architecture-specific.
 10 | #[allow(non_camel_case_types)]
 11 | pub enum FloatWidth {
 12 |     /// Hafl-width float (16 bits)
 13 |     Half,
 14 |     /// Standard float (32 bits)
 15 |     Float,
 16 |     /// Double-precision float (64 bits)
 17 |     Double,
 18 |     /// X86 / 8087 'extended precision' float (80 bits)
 19 |     X86_FP80,
 20 |     /// 128 bit float (112 bit mantissa)
 21 |     FP128,
 22 |     /// PowerPC Specific 128 bit float
 23 |     PPC_FP128,
 24 | }
 25 | 
 26 | /// Type Enumeration
 27 | ///
 28 | /// Represents the different types possible for a given value.
 29 | pub enum Type {
 30 |     /// The void type represents the absence of a value and has no
 31 |     /// size. Void can't be used as the target type of a pointer.
 32 |     Void,
 33 | 
 34 |     /// Floating point number of a given width.
 35 |     Float(FloatWidth),
 36 | 
 37 |     /// A code label.
 38 |     Label,
 39 | 
 40 |     /// An arbitrary-sized integer type.
 41 |     Int(usize),
 42 | 
 43 |     /// A callable funciton.
 44 |     Function,
 45 | 
 46 |     /// A structure type.
 47 |     ///
 48 |     /// Represents a collection of values together in memory.
 49 |     Struct,
 50 | 
 51 |     /// An n-dimensional array type.
 52 |     Array,
 53 | 
 54 |     /// A memory location.
 55 |     Pointer,
 56 | 
 57 |     /// A 'vector' type
 58 |     ///
 59 |     /// Used to represent a collection of primitives which can be
 60 |     /// processed in parallel.
 61 |     Vector,
 62 | 
 63 |     /// A metadata value
 64 |     Metadata,
 65 | 
 66 |     /// An X86 MMX Value
 67 |     MMX,
 68 | 
 69 |     /// A value which can't be inspected
 70 |     Token,
 71 | }
 72 | 
 73 | impl From<LLVMTypeRef> for Type {
 74 |     fn from(llvm_type: LLVMTypeRef) -> Self {
 75 |         use self::LLVMTypeKind::*;
 76 |         match unsafe { core::LLVMGetTypeKind(llvm_type) } {
 77 |             LLVMVoidTypeKind => Type::Void,
 78 |             LLVMHalfTypeKind => Type::Float(FloatWidth::Half),
 79 |             LLVMFloatTypeKind => Type::Float(FloatWidth::Half),
 80 |             LLVMDoubleTypeKind => Type::Float(FloatWidth::Double),
 81 |             LLVMX86_FP80TypeKind => Type::Float(FloatWidth::X86_FP80),
 82 |             LLVMFP128TypeKind => Type::Float(FloatWidth::FP128),
 83 |             LLVMPPC_FP128TypeKind => Type::Float(FloatWidth::PPC_FP128),
 84 |             LLVMLabelTypeKind => Type::Label,
 85 |             LLVMIntegerTypeKind => {
 86 |                 Type::Int(unsafe { core::LLVMGetIntTypeWidth(llvm_type) } as usize)
 87 |             }
 88 |             LLVMFunctionTypeKind => Type::Function,
 89 |             LLVMStructTypeKind => Type::Struct,
 90 |             LLVMArrayTypeKind => Type::Array,
 91 |             LLVMPointerTypeKind => Type::Pointer,
 92 |             LLVMVectorTypeKind => Type::Vector,
 93 |             LLVMMetadataTypeKind => Type::Metadata,
 94 |             LLVMX86_MMXTypeKind => Type::MMX,
 95 |             LLVMTokenTypeKind => Type::Token,
 96 |             // Other types are not yet supported.
 97 |             _ => unimplemented!(),
 98 |         }
 99 |     }
100 | }
101 | 


--------------------------------------------------------------------------------
/src/low_loader/value.rs:
--------------------------------------------------------------------------------
 1 | //! LLVM Value Wrappers
 2 | //!
 3 | //! This module contains wrapping code and types for the LLVM
 4 | //! Value. The intention is to prevent exposing raw LLVM types from
 5 | //! `low_loader`.
 6 | 
 7 | use super::llvm_sys::prelude::LLVMValueRef;
 8 | 
 9 | /// Wrapped Value Reference
10 | #[derive(Debug, PartialEq)]
11 | pub struct Value(LLVMValueRef);
12 | 
13 | // Allow conversion from our wrapped type to the underlying LLVM
14 | // one. This is intended more as an escape hatch while converting code
15 | // to use the new safe wrappers rather than as a permanent solution.
16 | impl From<Value> for LLVMValueRef {
17 |     /// From Value
18 |     ///
19 |     /// Convert a wrapped value into a raw LLVM value reference.
20 |     fn from(v: Value) -> Self {
21 |         let Value(inner) = v;
22 |         inner
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
  1 | //! Expression tree parsing using Top-Down Operator Precedence
  2 | //! parsing.
  3 | 
  4 | #![warn(missing_docs)]
  5 | 
  6 | pub mod compile;
  7 | pub mod diag;
  8 | pub mod low_loader;
  9 | pub mod meta;
 10 | pub mod sem;
 11 | pub mod syntax;
 12 | 
 13 | use crate::compile::*;
 14 | use crate::low_loader::targets;
 15 | use crate::syntax::text::DUMMY_SPAN;
 16 | use crate::syntax::*;
 17 | use docopt::Docopt;
 18 | use serde::{Deserialize, Deserializer};
 19 | use std::fmt;
 20 | use std::path::Path;
 21 | use std::process::*;
 22 | 
 23 | /// Usage Information
 24 | ///
 25 | /// This is a [Docopt] compliant usage description of this program.
 26 | ///
 27 | ///  [Docopt]: http://docopt.org/
 28 | const USAGE: &str = "
 29 | Ullage Compiler
 30 | 
 31 | Usage:
 32 |   ullage [--version --help]
 33 |   ullage [options] [-o <outfile>] [<file>]
 34 | 
 35 | Options:
 36 |   -h, --help             Show this message.
 37 |   --version              Show version.
 38 |   -O, --optimise=<lvl>   Set the compilation optimisation level.
 39 |                          0 = off, 1 = low, 2 = medium, 3 = high, s = size.
 40 |   -o, --output=<out>     Write the output to <out>.
 41 |   --link-kind=<type>     Set the link type to perform.
 42 |   --target=<triple>      Set the compilation target triple.
 43 |   --link-mode=<mode>     Set the type of intermediate assets to produce
 44 |                          for linking. Use `llvmIr`, or `llvmBc`.
 45 |   --dumpir               Dump the LLVM IR for the module.
 46 |   --dumpast              Dump the syntax tree to stdout and exit.
 47 |   --prettytree           Dump a prettified summary of the syntax tree.
 48 |   --dumptargets          Dump the available targets and exit.
 49 |   --dumptargetinfo       Dump information about the given triple.
 50 | ";
 51 | 
 52 | /// Program Arguments
 53 | ///
 54 | /// Structure to capture the command line arguments for the
 55 | /// program. This is filled in for us by Docopt.
 56 | #[derive(Debug, Deserialize)]
 57 | struct Args {
 58 |     flag_output: Option<String>,
 59 |     flag_optimise: Option<OptFlag>,
 60 |     flag_target: Option<String>,
 61 |     flag_link_mode: Option<LinkMode>,
 62 |     arg_file: Option<String>,
 63 | 
 64 |     // TODO: maybe move these dump options into a single flag?
 65 |     flag_dumpast: bool,
 66 |     flag_prettytree: bool,
 67 |     flag_dumpir: bool,
 68 |     flag_dumptargets: bool,
 69 |     flag_dumptargetinfo: bool,
 70 | }
 71 | 
 72 | /// Optimisation Level
 73 | ///
 74 | /// Used to hold the requested optimisation level
 75 | #[derive(Debug)]
 76 | enum OptFlag {
 77 |     /// numeric optimisation level
 78 |     Numeric(u64),
 79 |     /// size optimisation
 80 |     Size,
 81 | }
 82 | 
 83 | /// Liker Mode
 84 | ///
 85 | /// Chose the type of intermediate assets to produce when
 86 | /// performnig the link.
 87 | #[derive(Debug, Deserialize)]
 88 | enum LinkMode {
 89 |     /// Intermediate langauge files
 90 |     #[serde(rename = "il")]
 91 |     LlvmIr,
 92 |     /// Bitcode files
 93 |     #[serde(rename = "bc")]
 94 |     LlvmBc,
 95 |     /// Native Objects
 96 |     #[serde(rename = "obj")]
 97 |     Objects,
 98 | }
 99 | 
100 | /// Custom Deserialiser for Optimisation Flags
101 | ///
102 | /// This deserialiser will handle both numeric values and 's' or
103 | /// 'size'. Numbers greater than 3 are accepted, and transformed into
104 | /// range when converting to the stronger `OptimisationLevel` type.
105 | impl<'de> Deserialize<'de> for OptFlag {
106 |     fn deserialize<D>(d: D) -> Result<Self, D::Error>
107 |     where
108 |         D: Deserializer<'de>,
109 |     {
110 |         struct OptFlagVisitor;
111 |         impl<'de> serde::de::Visitor<'de> for OptFlagVisitor {
112 |             type Value = OptFlag;
113 | 
114 |             fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
115 |                 formatter.write_str("a number from range 0..3, or s for size")
116 |             }
117 | 
118 |             fn visit_u64<E>(self, n: u64) -> Result<Self::Value, E>
119 |             where
120 |                 E: serde::de::Error,
121 |             {
122 |                 Ok(OptFlag::Numeric(n))
123 |             }
124 | 
125 |             fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
126 |             where
127 |                 E: serde::de::Error,
128 |             {
129 |                 match value {
130 |                     "size" | "s" => Ok(OptFlag::Size),
131 |                     s => s.parse::<u64>().map(OptFlag::Numeric).map_err(|_| {
132 |                         let err = format!("Could not deserialize '{}' as optimisation flag", value);
133 |                         E::custom(err)
134 |                     }),
135 |                 }
136 |             }
137 |         }
138 | 
139 |         d.deserialize_identifier(OptFlagVisitor)
140 |     }
141 | }
142 | 
143 | impl From<OptFlag> for OptimisationLevel {
144 |     fn from(flag: OptFlag) -> Self {
145 |         match flag {
146 |             OptFlag::Numeric(level) => match level {
147 |                 0 => OptimisationLevel::Off,
148 |                 1 => OptimisationLevel::Low,
149 |                 2 => OptimisationLevel::Med,
150 |                 _ => OptimisationLevel::High,
151 |             },
152 |             OptFlag::Size => OptimisationLevel::Size,
153 |         }
154 |     }
155 | }
156 | 
157 | impl From<LinkMode> for linker::Linker {
158 |     fn from(mode: LinkMode) -> Self {
159 |         linker::Linker::new(
160 |             linker::LinkerCommand::default(),
161 |             match mode {
162 |                 LinkMode::LlvmIr => linker::LinkerAssetType::LlvmIr,
163 |                 LinkMode::LlvmBc => linker::LinkerAssetType::LlvmBc,
164 |                 LinkMode::Objects => linker::LinkerAssetType::Object,
165 |             },
166 |         )
167 |     }
168 | }
169 | 
170 | /// Main
171 | ///
172 | /// The main function for `ullage`. Parses the options and runs the
173 | /// selected command.
174 | fn main() {
175 |     let args: Args = Docopt::new(USAGE)
176 |         .and_then(|d| {
177 |             d.help(true)
178 |                 .version(Some(meta::descriptive_version()))
179 |                 .deserialize()
180 |         })
181 |         .unwrap_or_else(|e| e.exit());
182 | 
183 |     if args.flag_dumptargets {
184 |         targets::dump_targets();
185 |         if args.arg_file.is_none() {
186 |             exit(0);
187 |         }
188 |     }
189 | 
190 |     let triple = args.flag_target.unwrap_or_else(targets::get_default_triple);
191 |     let target = targets::Target::from_triple(&triple).unwrap_or_else(|e| {
192 |         eprintln!("error: could not create target: {}", e);
193 |         exit(1);
194 |     });
195 | 
196 |     if args.flag_dumptargetinfo {
197 |         println!("{}", target);
198 |         if args.arg_file.is_none() {
199 |             exit(0);
200 |         }
201 |     }
202 | 
203 |     let output_path = &args.flag_output.unwrap_or_else(|| "a.out".to_string());
204 |     let output_path = Path::new(&output_path);
205 | 
206 |     // Load the file into memory, so we can parse it into a syntax tree
207 |     let source = if let Some(path) = args.arg_file {
208 |         text::SourceText::from_path(Path::new(&path))
209 |     } else {
210 |         text::SourceText::from_stdin()
211 |     };
212 |     let source = source.unwrap_or_else(|e| {
213 |         eprintln!("error: could not read input: {}", e);
214 |         exit(1)
215 |     });
216 | 
217 |     // Parse the module
218 |     let tree = syntax::SyntaxTree::parse(&source);
219 |     if tree.has_diagnostics() {
220 |         eprintln!("error: could not parse source: one or more errors:");
221 |         dump_diagnostics(&source, tree.diagnostics());
222 |         exit(1)
223 |     };
224 | 
225 |     // Are we just dumping the AST or compiling the whole thing?
226 |     if args.flag_dumpast {
227 |         println!("parsed AST: {:#?}", tree.root());
228 |         exit(0);
229 |     }
230 |     if args.flag_prettytree {
231 |         tree.write_to(&mut std::io::stdout()).unwrap();
232 |         exit(0);
233 |     }
234 | 
235 |     let mut options = CompilationOptions::default()
236 |         .with_dump_ir(args.flag_dumpir)
237 |         .with_opt_level(
238 |             args.flag_optimise
239 |                 .map_or(OptimisationLevel::Off, |o| o.into()),
240 |         );
241 |     if let Some(link_mode) = args.flag_link_mode {
242 |         let linker = linker::Linker::from(link_mode);
243 |         options = options.with_linker(linker);
244 |     }
245 | 
246 |     let comp = match Compilation::new(tree, options) {
247 |         Ok(c) => c,
248 |         Err(e) => handle_comp_err(&e),
249 |     };
250 | 
251 |     if comp.has_diagnostics() {
252 |         dump_diagnostics(&source, comp.diagnostics());
253 |         let diag_count = comp.diagnostics().len();
254 |         eprintln!("error: compilation failed with {} errors", diag_count);
255 |         exit(1);
256 |     }
257 | 
258 |     // Create a compilation, and emit to the output path
259 |     let emit_result = comp.emit(&target, &output_path);
260 | 
261 |     // Print any failures encountered and return a failure status
262 |     if let Err(e) = emit_result {
263 |         handle_comp_err(&e);
264 |     }
265 | }
266 | 
267 | /// Write Dignostics to STDERR
268 | ///
269 | fn dump_diagnostics(source: &text::SourceText, diagnostics: &[diag::Diagnostic]) {
270 |     for error in diagnostics.iter() {
271 |         if error.span == DUMMY_SPAN {
272 |             eprintln!("{}:error: {}", source.name(), error.message);
273 |         } else {
274 |             let pos = source.line_pos(error.span.start());
275 |             eprintln!(
276 |                 "{}:{}:{}:error: {}",
277 |                 source.name(),
278 |                 pos.0,
279 |                 pos.1,
280 |                 error.message
281 |             );
282 |             let (s, e) = source.line_extents(error.span);
283 |             eprintln!("     |");
284 |             let mut line_no = pos.0;
285 |             for line in source.slice(s, e).lines() {
286 |                 eprintln!("{:4} | {}", line_no, line);
287 |                 line_no += 1;
288 |             }
289 |             eprintln!("");
290 |         }
291 |     }
292 | }
293 | 
294 | /// Handles a Compilation Error
295 | ///
296 | /// Prints the error to standard output and exits the process.
297 | fn handle_comp_err(err: &CompError) -> ! {
298 |     eprintln!("error: compilation error: {}", err);
299 |     exit(1);
300 | }
301 | 


--------------------------------------------------------------------------------
/src/meta.rs:
--------------------------------------------------------------------------------
 1 | //! # Meta Module
 2 | //!
 3 | //! This module contains metadata about the compiler driver. It's
 4 | //! basically just used to expose the version number.
 5 | 
 6 | /// Version Number
 7 | ///
 8 | /// The version number of the crate (as known by Cargo) as a
 9 | /// string. If the exe wasn't built by Cargo then this will be
10 | /// empty.
11 | const VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION");
12 | 
13 | /// Retrieve the Version Numer
14 | ///
15 | /// If no version number is available then 'unknown' is returned.
16 | pub fn version() -> &'static str {
17 |     VERSION.unwrap_or("unknown")
18 | }
19 | 
20 | /// Retrieve the descriptive version
21 | ///
22 | /// This is the program name concatenated with the version number
23 | pub fn descriptive_version() -> String {
24 |     format!("ullage {}", version())
25 | }
26 | 


--------------------------------------------------------------------------------
/src/sem.rs:
--------------------------------------------------------------------------------
 1 | //! Semantic Analysis and Translation
 2 | //!
 3 | //! This module is responsible for translating the syntactic
 4 | //! representation of a program, as produced by the parser, into a
 5 | //! semantically rich model ready to be lowered for execution.
 6 | 
 7 | mod binder;
 8 | mod operators;
 9 | mod sem_ctx;
10 | mod tree;
11 | mod types;
12 | 
13 | pub use self::binder::{Binder, Scope, Symbol};
14 | pub use self::sem_ctx::SemCtx;
15 | pub use self::tree::{Expression, ExpressionKind};
16 | pub use self::types::{BuiltinType, Typ};
17 | 


--------------------------------------------------------------------------------
/src/sem/operators.rs:
--------------------------------------------------------------------------------
 1 | //! # Semantic Operators
 2 | //!
 3 | //! This module provides semantic undestanding of the builtin
 4 | //! operators. The main entry point is the
 5 | 
 6 | use super::types::{BuiltinType, Typ};
 7 | use crate::syntax::*;
 8 | 
 9 | /// The Semantic Operator
10 | ///
11 | /// Semantically bound operator. This is an operator with knowlege of
12 | /// the types it is to be bound to.
13 | pub struct SemOp {
14 |     pub lhs_typ: Typ,
15 |     pub rhs_typ: Typ,
16 |     pub op: InfixOp,
17 |     pub result_typ: Typ,
18 | }
19 | 
20 | fn num_op(op: InfixOp) -> Option<SemOp> {
21 |     Some(SemOp {
22 |         lhs_typ: Typ::Builtin(BuiltinType::Number),
23 |         rhs_typ: Typ::Builtin(BuiltinType::Number),
24 |         op,
25 |         result_typ: Typ::Builtin(BuiltinType::Number),
26 |     })
27 | }
28 | 
29 | fn comp_op(op: InfixOp) -> Option<SemOp> {
30 |     Some(SemOp {
31 |         lhs_typ: Typ::Builtin(BuiltinType::Number),
32 |         rhs_typ: Typ::Builtin(BuiltinType::Number),
33 |         op,
34 |         result_typ: Typ::Builtin(BuiltinType::Bool),
35 |     })
36 | }
37 | 
38 | /// Find Operator
39 | ///
40 | /// Searches for the result type for a given operator.
41 | pub fn find_builtin_op(op: InfixOp, lhs_typ: Typ, rhs_typ: Typ) -> Option<SemOp> {
42 |     match (op, lhs_typ, rhs_typ) {
43 |         (InfixOp::Add, Typ::Builtin(BuiltinType::String), Typ::Builtin(BuiltinType::String)) => {
44 |             Some(SemOp {
45 |                 lhs_typ,
46 |                 rhs_typ,
47 |                 op,
48 |                 result_typ: Typ::Builtin(BuiltinType::String),
49 |             })
50 |         }
51 |         (InfixOp::Add, Typ::Builtin(BuiltinType::Number), Typ::Builtin(BuiltinType::Number)) => {
52 |             num_op(op)
53 |         }
54 |         (InfixOp::Sub, _, _) | (InfixOp::Mul, _, _) | (InfixOp::Div, _, _) => num_op(op),
55 | 
56 |         (InfixOp::Eq, _, _)
57 |         | (InfixOp::NotEq, _, _)
58 |         | (InfixOp::Lt, _, _)
59 |         | (InfixOp::LtEq, _, _)
60 |         | (InfixOp::Gt, _, _)
61 |         | (InfixOp::GtEq, _, _) => comp_op(op),
62 | 
63 |         _ => None,
64 |     }
65 | }
66 | 


--------------------------------------------------------------------------------
/src/sem/sem_ctx.rs:
--------------------------------------------------------------------------------
  1 | //! Semantic Context
  2 | //!
  3 | //! This module defines the state that is passes while transforming
  4 | //! expressions from AST representation to semantic.
  5 | 
  6 | use super::types::{BuiltinType, Typ};
  7 | use crate::diag::Diagnostic;
  8 | use crate::syntax::text::{Ident, SourceText};
  9 | use crate::syntax::tree::TokenKind;
 10 | use crate::syntax::TypeRef;
 11 | use std::collections::HashMap;
 12 | 
 13 | /// SemCtx Structure
 14 | ///
 15 | /// Holds the context when trasnforming. This is basically the current
 16 | /// type state information along with symbol table.
 17 | pub struct SemCtx<'a> {
 18 |     /// Symbol Table for Local Variables
 19 |     locals: Vec<HashMap<Ident, Typ>>,
 20 |     /// Named types map
 21 |     named_types: HashMap<Ident, Typ>,
 22 |     /// The source text
 23 |     source: &'a SourceText,
 24 |     /// A collection of diagnostics emitted when transforming
 25 |     diagnostics: Vec<Diagnostic>,
 26 | }
 27 | 
 28 | impl<'a> SemCtx<'a> {
 29 |     /// Create a new Semantic Context
 30 |     pub fn new(source: &'a SourceText) -> Self {
 31 |         SemCtx {
 32 |             locals: vec![HashMap::new()],
 33 |             named_types: [
 34 |                 (source.intern("String"), Typ::Builtin(BuiltinType::String)),
 35 |                 (source.intern("Bool"), Typ::Builtin(BuiltinType::Bool)),
 36 |                 (source.intern("Number"), Typ::Builtin(BuiltinType::Number)),
 37 |             ]
 38 |             .iter()
 39 |             .cloned()
 40 |             .collect(),
 41 |             source,
 42 |             diagnostics: Vec::new(),
 43 |         }
 44 |     }
 45 | 
 46 |     /// Find Type in Context
 47 |     ///
 48 |     /// Returns the `sem::Typ` declaration for the type if one is
 49 |     /// available.
 50 |     pub fn sem_ty(&self, ast_ty: &TypeRef) -> Option<Typ> {
 51 |         Some(match *ast_ty {
 52 |             TypeRef::Unit(..) => Typ::Unit,
 53 |             TypeRef::Simple(ref name) => {
 54 |                 let id = match name.kind {
 55 |                     TokenKind::Word(id) => id,
 56 |                     _ => panic!("Expected word token"),
 57 |                 };
 58 |                 return self.named_types.get(&id).cloned();
 59 |             }
 60 |             // TODO: array and tuple types
 61 |             TypeRef::Array(..) => unimplemented!("array types are not yet supported"),
 62 |             TypeRef::Tuple(..) => unimplemented!("tuple types are not yet supported"),
 63 |             TypeRef::Missing => panic!("Can't lower missing type"),
 64 |         })
 65 |     }
 66 | 
 67 |     /// Add Local
 68 |     ///
 69 |     /// Inserts a local declaration into the locals map.
 70 |     pub fn add_local(&mut self, id: Ident, typ: Typ) {
 71 |         self.locals[0].insert(id, typ);
 72 |     }
 73 | 
 74 |     /// Find a Local Declaration
 75 |     pub fn find_local(&self, id: Ident) -> Option<Typ> {
 76 |         self.locals[0].get(&id).cloned()
 77 |     }
 78 | 
 79 |     /// Push Scope
 80 |     ///
 81 |     /// Add a new empty scope to the top of the scope stack
 82 |     pub fn push_scope(&mut self) {
 83 |         self.locals.push(HashMap::new())
 84 |     }
 85 | 
 86 |     /// Pop Scope
 87 |     ///
 88 |     /// Remove and discard the top scope from the stack
 89 |     pub fn pop_scope(&mut self) {
 90 |         self.locals.pop();
 91 |     }
 92 | 
 93 |     /// Borrow the Source
 94 |     pub fn source(&self) -> &SourceText {
 95 |         &self.source
 96 |     }
 97 | 
 98 |     /// Emit a diagnostic into the context
 99 |     pub fn emit(&mut self, diagnostic: Diagnostic) {
100 |         self.diagnostics.push(diagnostic);
101 |     }
102 | 
103 |     /// Check if there are any diagnostics in the translation session
104 |     pub fn has_diagnostics(&self) -> bool {
105 |         !self.diagnostics.is_empty()
106 |     }
107 | 
108 |     /// Get the diagnostics
109 |     pub fn into_diagnostics(self) -> Vec<Diagnostic> {
110 |         self.diagnostics
111 |     }
112 | }
113 | 


--------------------------------------------------------------------------------
/src/sem/tree.rs:
--------------------------------------------------------------------------------
  1 | //! Semantic Tree
  2 | //!
  3 | //! This module contains the types used to construct the
  4 | //! decorated/semantic expression tree.
  5 | 
  6 | use super::types::*;
  7 | use crate::syntax::{Constant, InfixOp, PrefixOp};
  8 | 
  9 | /// A Function Decclaration
 10 | ///
 11 | /// Represents the context contained in the semantic tree for a
 12 | /// function declaration. Defined as a struct for convenience.
 13 | #[derive(Debug, PartialEq)]
 14 | pub struct FnDecl {
 15 |     /// The declaration's logical name
 16 |     pub ident: String,
 17 | 
 18 |     /// The return type of the function
 19 |     pub ret_ty: Typ,
 20 | 
 21 |     /// Parameters to the function
 22 |     pub params: Vec<VarDecl>,
 23 | 
 24 |     /// The body of the function
 25 |     ///
 26 |     /// The function's return is the value of the expression
 27 |     pub body: Box<Expression>,
 28 | }
 29 | 
 30 | /// Variable Declaration
 31 | ///
 32 | /// Represents the binding of a given type to an identifier to create
 33 | /// a variable. Used both for local variable declarations as well as
 34 | /// function parameters.
 35 | #[derive(Debug, PartialEq)]
 36 | pub struct VarDecl {
 37 |     /// The logical name of the declataion
 38 |     ///
 39 |     /// FIXME: proper idents rather than strings in the bound tree.
 40 |     pub ident: String,
 41 | 
 42 |     /// The type of the identifier, if one was specified or inferred.
 43 |     pub ty: Typ,
 44 | }
 45 | 
 46 | /// A Semantically Decorated Expression
 47 | ///
 48 | /// This struct represents the expression tree after semantic
 49 | /// analysis. This is no longer guaranteed to be a a lieral
 50 | /// representation of the code as it was written.
 51 | #[derive(Debug, PartialEq)]
 52 | pub struct Expression {
 53 |     /// The contents of this expression
 54 |     ///
 55 |     /// This is a union of the different expression kinds.
 56 |     pub kind: ExpressionKind,
 57 | 
 58 |     /// The type of this node
 59 |     ///
 60 |     /// All expressions in the tree will have some form of type. Error
 61 |     /// expressions or expressions where the type can't be calculated
 62 |     /// due to a syntax or semantic error in the program are typed
 63 |     /// `Typ::Error`. Parts of the tree where the type has yet to be
 64 |     /// inferred are typed `Typ::Unknown`.
 65 |     ///
 66 |     /// Before lowering a well-formed tree should contain neither of
 67 |     /// these types.
 68 |     pub typ: Typ,
 69 | }
 70 | 
 71 | /// The Expression Kind Enum
 72 | ///
 73 | /// This enum contains a variant for the different types of expression
 74 | /// in the semantic tree. This is similar to the `syntax::Expression`
 75 | /// enum however some information may have been elided or reordered to
 76 | /// better suit the lowering process.
 77 | #[derive(Debug, PartialEq)]
 78 | pub enum ExpressionKind {
 79 |     /// Invalid Expression
 80 |     ///
 81 |     /// Emitted when transformation or binding can't produce a
 82 |     /// properly typed or bound expression. This should only be
 83 |     /// produced if a diagnostic has already been raised. Further
 84 |     /// passes can safely return error if an `Error` expression is
 85 |     /// encountered to avoid error cascades.
 86 |     Error,
 87 | 
 88 |     /// Identifier
 89 |     ///
 90 |     /// Variable reference. If this is an lvalue then it represents a
 91 |     /// write to the named variable, otherwise a read. Identifier
 92 |     /// expressions appear in other compound expressions such as
 93 |     /// functions when those expressions reference bound values.
 94 |     Identifier(String),
 95 | 
 96 |     /// Literal Value
 97 |     ///
 98 |     /// A constant value. This is just plucked straight from the
 99 |     /// syntax tree.
100 |     Literal(Constant),
101 | 
102 |     /// A prefix operator
103 |     Prefix(PrefixOp, Box<Expression>),
104 | 
105 |     /// An infix operator
106 |     ///
107 |     /// This only counts comparison and arithmetic operators as infix
108 |     /// operators. Assignment is handled by the assignment node.
109 |     Infix(Box<Expression>, InfixOp, Box<Expression>),
110 | 
111 |     /// Call Expression
112 |     ///
113 |     /// Represents the applicaiton of arguments to a function. The
114 |     /// callee is represented as an expression as more than just
115 |     /// identifiers are callable.
116 |     Call(Box<Expression>, Vec<Expression>),
117 | 
118 |     /// Assignment
119 |     Assignment(String, Box<Expression>),
120 | 
121 |     /// Index Expression
122 |     Index(Box<Expression>, Box<Expression>),
123 | 
124 |     /// If Then Else
125 |     IfThenElse(Box<Expression>, Box<Expression>, Box<Expression>),
126 | 
127 |     /// Function Declaraiton
128 |     ///
129 |     /// A function declaration expression both registers a function in
130 |     /// the symbol table and represents the callable function itself.
131 |     Function(FnDecl),
132 | 
133 |     /// Loop with Condition
134 |     Loop(Box<Expression>, Box<Expression>),
135 | 
136 |     /// A Sequence of Expressions
137 |     Sequence(Vec<Expression>),
138 | 
139 |     /// Print Expression
140 |     ///
141 |     /// Converts the inner expression to a string and prints it to
142 |     /// standard output. This underpins the spec tests by allowing
143 |     /// simple output.
144 |     Print(Box<Expression>),
145 | 
146 |     /// Variable Declaration
147 |     Declaration(VarDecl, bool, Box<Expression>),
148 | }
149 | 
150 | impl Expression {
151 |     /// Create a New Expression from parts
152 |     ///
153 |     /// Constructs a new semantic expression tree node from
154 |     /// constituent parts. The type information for a given node can
155 |     /// be set to `Typ::Unknown` if no type inference has yet been run
156 |     /// for this expression.
157 |     pub fn new(kind: ExpressionKind, typ: Typ) -> Self {
158 |         Expression { kind, typ }
159 |     }
160 | 
161 |     /// Create an Error Expresion
162 |     ///
163 |     /// Convenience function for returning error expressions. Error
164 |     /// expressions have a kind of `ExpressionKind::Error` and a type
165 |     /// of `Typ::Error`. They are used to mark invalid or
166 |     /// uncalculateable portions of the bound tree.
167 |     pub fn error() -> Self {
168 |         Expression::new(ExpressionKind::Error, Typ::Error)
169 |     }
170 | }
171 | 


--------------------------------------------------------------------------------
/src/sem/types.rs:
--------------------------------------------------------------------------------
 1 | //! Rich Type Information
 2 | //!
 3 | //! This module contains the `Typ` structure which defines the
 4 | //! properties of builtin and user-specified types along with the
 5 | //! global type registry which is used during compilation for type
 6 | //! checking and inferrance.
 7 | 
 8 | use crate::syntax::text::Ident;
 9 | use std::borrow::Cow;
10 | 
11 | /// Semantic Type
12 | ///
13 | /// This enum defines the different type values that each node in the
14 | /// semantic tree could have.
15 | #[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)]
16 | pub enum Typ {
17 |     /// The given expression is invalid. Futher binding or inference
18 |     /// based off this type is meaningless.
19 |     Error,
20 | 
21 |     /// Type yet to be inferred.
22 |     Unknown,
23 | 
24 |     /// The unit type
25 |     Unit,
26 | 
27 |     /// One of the given basic types in the language.
28 |     Builtin(BuiltinType),
29 | 
30 |     /// A function. We can't store the type in here properly as that
31 |     /// would break the `Copy` of `typ`. Instead we just store the
32 |     /// ident of the function to use later when looking it back up.
33 |     /// It's major HAXX.
34 |     ///
35 |     /// # Issues
36 |     ///
37 |     /// FIXME: We need to create a sepration between the
38 |     /// trivially-copyable `Typ` and some `TypeInfo` which contains
39 |     /// the full information for the type rather than using this
40 |     /// id-stashing workaround
41 |     Function(Ident),
42 | }
43 | 
44 | impl Typ {
45 |     /// Returns the name of a given type
46 |     pub fn name(&self) -> Cow<'_, str> {
47 |         Cow::Borrowed(match *self {
48 |             Typ::Error => "!ERROR!",
49 |             Typ::Unknown => "_",
50 |             Typ::Unit => "()",
51 |             Typ::Builtin(ref b) => match *b {
52 |                 BuiltinType::Number => "Number",
53 |                 BuiltinType::Bool => "Bool",
54 |                 BuiltinType::String => "String",
55 |             },
56 |             Typ::Function(id) => return Cow::Owned(format!("Function({:?})", id)),
57 |         })
58 |     }
59 | }
60 | 
61 | /// The fixed builtin types
62 | #[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)]
63 | pub enum BuiltinType {
64 |     /// 64 bit numerical value
65 |     Number,
66 |     /// Boolean
67 |     Bool,
68 |     /// String type. This is a pointer-lenght pair
69 |     String,
70 | }
71 | 


--------------------------------------------------------------------------------
/src/syntax.rs:
--------------------------------------------------------------------------------
 1 | //! Syntax parsing and expression tree
 2 | //!
 3 | //! This module provides a simple abstract syntax tree, and a parser
 4 | //! implementation which recognises a simple lanugage using
 5 | //! Pratt-style operator precedence parsing.
 6 | 
 7 | mod node;
 8 | pub mod parse;
 9 | pub mod syntax_builder;
10 | pub mod text;
11 | pub mod tree;
12 | 
13 | pub use self::node::*;
14 | pub use self::tree::expression::*;
15 | pub use self::tree::operators::*;
16 | pub use self::tree::types::*;
17 | pub use self::tree::*;
18 | 


--------------------------------------------------------------------------------
/src/syntax/node.rs:
--------------------------------------------------------------------------------
 1 | //! Syntax node abstractions
 2 | 
 3 | use super::text::{SourceText, Span};
 4 | 
 5 | /// Syntax Node
 6 | ///
 7 | /// This trait abstracts over things which can appear in a given
 8 | /// `SyntaxTree`.
 9 | pub trait SyntaxNode {
10 |     /// A short description of the node. Used for tree pretty
11 |     /// printing.
12 |     fn description(&self, source: &SourceText) -> std::borrow::Cow<str>;
13 | 
14 |     /// Get the span this node covers in the tree
15 |     ///
16 |     /// If the node has no real location then `DUMMY_SPAN` is returned
17 |     fn span(&self) -> Span;
18 | }
19 | 


--------------------------------------------------------------------------------
/src/syntax/syntax_builder.rs:
--------------------------------------------------------------------------------
 1 | //! Syntax Tree Factory
 2 | //!
 3 | //! Ergonomic methods to build new expression trees
 4 | //!
 5 | //! Used for building expression trees by hand in code rather than the
 6 | //! parser.
 7 | 
 8 | use super::text::Ident;
 9 | use super::*;
10 | 
11 | /// Build a raw string literal
12 | ///
13 | /// Takes a string and builds a string literal expression from it. The
14 | /// token is stubbed and will contian a dummy span.
15 | pub fn raw_string<S: Into<String>>(value: S) -> Expression {
16 |     let value = value.into();
17 |     Expression::constant_string(
18 |         Token::new(TokenKind::Literal(Literal::RawString(value.clone()))),
19 |         value,
20 |     )
21 | }
22 | 
23 | /// Constant Boolean Expression
24 | ///
25 | /// Takes a given bool value and builds a stubbed-out token for
26 | /// it. The token will contain a dummy span.
27 | pub fn const_bool(value: bool) -> Expression {
28 |     Expression::constant_bool(word(if value { Ident::True } else { Ident::False }), value)
29 | }
30 | 
31 | /// Constant numeric value
32 | ///
33 | /// Takes a given numeric value and builds a stubbed-out token for
34 | /// it. The token will contain a dummy span.
35 | pub fn const_num(value: i64) -> Expression {
36 |     Expression::constant_num(
37 |         Token::new(TokenKind::Literal(Literal::Number(value))),
38 |         value,
39 |     )
40 | }
41 | 
42 | /// Identifier Expression. Reads a value from a variable or provides a
43 | /// reference to a function or other named item.
44 | pub fn ident_expr(value: Ident) -> Expression {
45 |     Expression::identifier(word(value), value)
46 | }
47 | 
48 | /// Word token from identifier. Wraps the identifier in a token with
49 | /// missing position information
50 | pub fn word(id: Ident) -> Token {
51 |     Token::new(TokenKind::Word(id))
52 | }
53 | 


--------------------------------------------------------------------------------
/src/syntax/text.rs:
--------------------------------------------------------------------------------
 1 | //! Source Text
 2 | //!
 3 | //! This module contains abstractions relating to the text of the
 4 | //! program. The main memer is the `SourceText` structure which
 5 | //! provides backing for the source code as well as line information.
 6 | //!
 7 | //! Positions within the source are represnted by the `Pos`, `Span`
 8 | //! and `Location` types.
 9 | 
10 | mod intern;
11 | mod position;
12 | mod source_text;
13 | 
14 | pub use self::intern::{Ident, Interner};
15 | pub use self::position::{Pos, Span, DUMMY_SPAN};
16 | pub use self::source_text::SourceText;
17 | 


--------------------------------------------------------------------------------
/src/syntax/text/intern.rs:
--------------------------------------------------------------------------------
  1 | //! String Interning
  2 | //!
  3 | //! This module contains types to model interned strings. The idea is
  4 | //! that a string can be stored in an `Interner` only once. The
  5 | //! contents of the string can't be accessed without the `Interner`,
  6 | //! but interned strings can be compared for equality quickly.
  7 | 
  8 | use indexmap::IndexSet;
  9 | 
 10 | /// Interner
 11 | ///
 12 | /// Keeps a list of intered strings and a map to look them up.
 13 | pub struct Interner {
 14 |     lookup: IndexSet<String>,
 15 | }
 16 | 
 17 | /// Interned String
 18 | ///
 19 | /// Represents a kind of interned string value. Known keywords appear
 20 | /// in here to allow them to be easily matched.
 21 | #[derive(Debug, PartialEq, Hash, Eq, Copy, Clone)]
 22 | pub enum Ident {
 23 |     /// the `else` keyword
 24 |     Else,
 25 |     /// the `end` keyword
 26 |     End,
 27 |     /// the `false` keyword
 28 |     False,
 29 |     /// the `fn` keyword
 30 |     Fn,
 31 |     /// the `if` keyword
 32 |     If,
 33 |     /// the `let` keyword
 34 |     Let,
 35 |     /// the `print` keyword
 36 |     Print,
 37 |     /// the `true` keyword
 38 |     True,
 39 |     /// the `unless` keyword
 40 |     Unless,
 41 |     /// the `until` keyword
 42 |     Until,
 43 |     /// the `var` keyword
 44 |     Var,
 45 |     /// the `while` keyword
 46 |     While,
 47 |     /// Other unknown identifier values.
 48 |     Unknown(usize),
 49 | }
 50 | 
 51 | impl Interner {
 52 |     /// Create an Empty Interner
 53 |     pub fn new() -> Self {
 54 |         Default::default()
 55 |     }
 56 | 
 57 |     /// Intern a String
 58 |     ///
 59 |     /// If the string is already in this `Interner` then the existing
 60 |     /// inered string is returned. If this is a new string a new
 61 |     /// intern value is allocated and returned.
 62 |     pub fn intern(&mut self, value: &str) -> Ident {
 63 |         match value {
 64 |             "else" => Ident::Else,
 65 |             "end" => Ident::End,
 66 |             "false" => Ident::False,
 67 |             "fn" => Ident::Fn,
 68 |             "if" => Ident::If,
 69 |             "let" => Ident::Let,
 70 |             "print" => Ident::Print,
 71 |             "true" => Ident::True,
 72 |             "unless" => Ident::Unless,
 73 |             "until" => Ident::Until,
 74 |             "var" => Ident::Var,
 75 |             "while" => Ident::While,
 76 |             _ => self.intern_unknown(value),
 77 |         }
 78 |     }
 79 | 
 80 |     /// Intern a New Value
 81 |     ///
 82 |     /// When we have checked that the value isn't a known ident this
 83 |     /// can be used to create a new `Ident::Unknown` entry in the
 84 |     /// table.
 85 |     fn intern_unknown(&mut self, value: &str) -> Ident {
 86 |         let (index, _) = self.lookup.insert_full(value.into());
 87 |         Ident::Unknown(index)
 88 |     }
 89 | 
 90 |     /// Borrow the Interned value
 91 |     ///
 92 |     /// Used to conver the interned value back to a string.
 93 |     pub fn interned_value(&self, ident: Ident) -> &str {
 94 |         match ident {
 95 |             Ident::Else => "else",
 96 |             Ident::End => "end",
 97 |             Ident::False => "false",
 98 |             Ident::Fn => "fn",
 99 |             Ident::If => "if",
100 |             Ident::Let => "let",
101 |             Ident::Print => "print",
102 |             Ident::True => "true",
103 |             Ident::Unless => "unless",
104 |             Ident::Until => "until",
105 |             Ident::Var => "var",
106 |             Ident::While => "while",
107 |             Ident::Unknown(index) => self.lookup.get_index(index).map(|s| &s[..]).unwrap_or(""),
108 |         }
109 |     }
110 | }
111 | 
112 | impl Default for Interner {
113 |     fn default() -> Self {
114 |         Interner {
115 |             lookup: IndexSet::default(),
116 |         }
117 |     }
118 | }
119 | 
120 | #[cfg(test)]
121 | mod test {
122 | 
123 |     use super::*;
124 | 
125 |     #[test]
126 |     fn intern_dedupes_words() {
127 |         let mut interner = Interner::default();
128 | 
129 |         let foo1 = interner.intern("foo");
130 |         let foo2 = interner.intern("foo");
131 |         let bar = interner.intern("bar");
132 | 
133 |         assert_eq!(foo1, foo2);
134 |         assert_ne!(foo1, bar);
135 |         assert_ne!(foo2, bar);
136 |     }
137 | }
138 | 


--------------------------------------------------------------------------------
/src/syntax/text/position.rs:
--------------------------------------------------------------------------------
  1 | //! Syntax Position Information
  2 | //!
  3 | //! This module provides the types needed to represent positions with
  4 | //! a buffer.
  5 | 
  6 | /// Source Buffer Position
  7 | ///
  8 | /// Used to represent a position within a the source of a compilation
  9 | /// session.
 10 | #[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)]
 11 | pub struct Pos(usize);
 12 | 
 13 | /// Source Buffer Span
 14 | ///
 15 | /// A span represents a range of positions within the source. Each
 16 | /// span is deliniated by the start and end `Pos`s. Spans can be
 17 | /// used to identify the extent of lexemes in the AST, and ranges of
 18 | /// interest when emitting error information.
 19 | #[derive(Debug, PartialEq, Copy, Clone)]
 20 | pub struct Span {
 21 |     start: Pos,
 22 |     end: Pos,
 23 | }
 24 | 
 25 | /// Dummy Span used when no real span is avaiable.
 26 | pub const DUMMY_SPAN: Span = Span {
 27 |     start: Pos(0),
 28 |     end: Pos(0),
 29 | };
 30 | 
 31 | impl Pos {
 32 |     /// Get the Byte Offset
 33 |     ///
 34 |     /// Returns the offset from the beginning of the `SourceText` for
 35 |     /// this position.
 36 |     pub fn offset(self) -> usize {
 37 |         self.0
 38 |     }
 39 | }
 40 | 
 41 | impl std::ops::Add for Pos {
 42 |     type Output = Self;
 43 | 
 44 |     fn add(self, other: Pos) -> Self {
 45 |         Pos(self.0 + other.0)
 46 |     }
 47 | }
 48 | 
 49 | impl From<usize> for Pos {
 50 |     fn from(offset: usize) -> Self {
 51 |         Pos(offset)
 52 |     }
 53 | }
 54 | 
 55 | impl Span {
 56 |     /// Create a Span Around two Cursors
 57 |     ///
 58 |     /// The returned span starts after the first cursor and finishes
 59 |     /// before the second one. The cursors themselves can be thought
 60 |     /// to point 'between' the characters in the buffer.
 61 |     pub fn new(start: Pos, end: Pos) -> Self {
 62 |         Span { start, end }
 63 |     }
 64 | 
 65 |     /// Create a zero-width span at a given position
 66 |     ///
 67 |     /// The retunred span has the same start and end position.
 68 |     pub fn new_at(pos: Pos) -> Self {
 69 |         Span {
 70 |             start: pos,
 71 |             end: pos,
 72 |         }
 73 |     }
 74 | 
 75 |     /// Create a `Span` enclosing two existing spans. This makes a new
 76 |     /// span with extents that encompas the highest and lowest `Pos`
 77 |     /// in either span.
 78 |     pub fn enclosing(first: Span, second: Span) -> Self {
 79 |         let start = std::cmp::min(first.start(), second.start());
 80 |         let end = std::cmp::max(first.end(), second.end());
 81 |         Span::new(start, end)
 82 |     }
 83 | 
 84 |     /// Get the starting position of this span
 85 |     pub fn start(&self) -> Pos {
 86 |         self.start
 87 |     }
 88 | 
 89 |     /// Get the end position of this span
 90 |     pub fn end(&self) -> Pos {
 91 |         self.end
 92 |     }
 93 | }
 94 | 
 95 | #[cfg(test)]
 96 | pub mod test {
 97 | 
 98 |     use super::*;
 99 | 
100 |     #[test]
101 |     fn pos_from_usize() {
102 |         let pos = Pos::from(1);
103 |         assert_eq!(Pos(1), pos);
104 |     }
105 | 
106 |     #[test]
107 |     fn pos_converts_back_to_offset() {
108 |         let pos = Pos::from(123);
109 |         assert_eq!(123, pos.offset());
110 |     }
111 | 
112 |     #[test]
113 |     fn span_from_cursor_pair() {
114 |         let span = Span::new(1.into(), 3.into());
115 |         assert_eq!(Pos::from(1), span.start);
116 |         assert_eq!(Pos::from(3), span.end);
117 |     }
118 | 
119 |     #[test]
120 |     fn span_has_start_end() {
121 |         let span_loc = Span::new(12.into(), 43.into());
122 | 
123 |         assert_eq!(12, span_loc.start().offset());
124 |         assert_eq!(43, span_loc.end().offset());
125 |     }
126 | 
127 |     #[test]
128 |     fn span_at_pos() {
129 |         let pos = Pos::from(123);
130 |         let span = Span::new_at(pos);
131 | 
132 |         assert_eq!(123, span.start().offset());
133 |         assert_eq!(123, span.end().offset());
134 |     }
135 | }
136 | 


--------------------------------------------------------------------------------
/src/syntax/text/source_text.rs:
--------------------------------------------------------------------------------
  1 | //! Source Text
  2 | //!
  3 | //! This module contains a a structure to represent the source text of
  4 | //! the program. It is used to hold the source code in memory for
  5 | //! iteration by the lexer. Indexing in to the source is possible
  6 | //! using `Location`s, and `Location`s can be turned into `(line,
  7 | //! col)` position pairs for displaying in diagnostics.
  8 | 
  9 | use super::{Ident, Interner, Pos, Span};
 10 | use std::cell::RefCell;
 11 | use std::fs::File;
 12 | use std::io::{self, prelude::*};
 13 | use std::path::Path;
 14 | 
 15 | /// Source Text Struct
 16 | ///
 17 | /// A source text is a pair of a string containing the contents of the
 18 | /// file or other input and a start position.
 19 | pub struct SourceText {
 20 |     /// The contents of the source text
 21 |     source: String,
 22 |     /// The offsets of the beginning of each line. Can be used to
 23 |     /// convert a character offset into the (line, column)
 24 |     line_offsets: Vec<usize>,
 25 |     /// The name of this source code. Used when reporting diagnostics.
 26 |     name: String,
 27 |     /// String interner to create identifiers
 28 |     ///
 29 |     /// FIXME: Should this live here?
 30 |     interner: RefCell<Interner>,
 31 | }
 32 | 
 33 | impl SourceText {
 34 |     /// Create a `SourceText` for the given string
 35 |     pub fn new<T: Into<String>>(source: T) -> Self {
 36 |         SourceText::with_name(source, "<unamed-snippet>")
 37 |     }
 38 | 
 39 |     /// Create a `SourceText` with a known name
 40 |     pub fn with_name<T: Into<String>, U: Into<String>>(source: T, name: U) -> Self {
 41 |         let source = source.into();
 42 |         let name = name.into();
 43 |         let line_offsets = get_line_offsets(&source[..]);
 44 |         SourceText {
 45 |             source,
 46 |             line_offsets,
 47 |             name,
 48 |             interner: Default::default(),
 49 |         }
 50 |     }
 51 | 
 52 |     /// Create a source text from standard input
 53 |     ///
 54 |     /// Reads the contents of `io:;stdin` to a buffer and creats a new
 55 |     /// source tex from that.
 56 |     pub fn from_stdin() -> io::Result<Self> {
 57 |         let mut s = String::new();
 58 |         io::stdin().read_to_string(&mut s)?;
 59 |         Ok(SourceText::with_name(s, "<stdin>"))
 60 |     }
 61 | 
 62 |     /// Create a source text from a file
 63 |     ///
 64 |     /// Reads the contents of a given file path into a buffer and
 65 |     /// creates a new source text from that.
 66 |     pub fn from_path<P: AsRef<Path>>(path: P) -> io::Result<Self> {
 67 |         let mut s = String::new();
 68 |         File::open(path.as_ref())?.read_to_string(&mut s)?;
 69 |         Ok(SourceText::with_name(
 70 |             s,
 71 |             path.as_ref().display().to_string(),
 72 |         ))
 73 |     }
 74 | 
 75 |     /// Get the Starting Position
 76 |     ///
 77 |     /// Retunrns the index into the buffer which points to the first
 78 |     /// character. As this is a 'cursor' which points 'between' the
 79 |     /// characters even an empty source will have at least one
 80 |     /// distinct position.
 81 |     pub fn start(&self) -> Pos {
 82 |         Pos::from(0)
 83 |     }
 84 | 
 85 |     /// Get Line Count
 86 |     ///
 87 |     /// Returns the number of lines in the source text.
 88 |     pub fn line_count(&self) -> usize {
 89 |         self.line_offsets.len()
 90 |     }
 91 | 
 92 |     /// Get the name of the source text
 93 |     pub fn name(&self) -> &str {
 94 |         &self.name
 95 |     }
 96 | 
 97 |     /// Intern a String Value
 98 |     pub fn intern(&self, value: &str) -> Ident {
 99 |         self.interner.borrow_mut().intern(value)
100 |     }
101 | 
102 |     /// Lookup the value of an identifier
103 |     pub fn interned_value(&self, ident: Ident) -> String {
104 |         self.interner.borrow().interned_value(ident).into()
105 |     }
106 | 
107 |     /// Get Line Position
108 |     ///
109 |     /// Returns the `(line, col)` position of the given position in
110 |     /// the source.
111 |     pub fn line_pos<T: Into<Pos>>(&self, pos: T) -> (usize, usize) {
112 |         let offset = pos.into().offset();
113 |         match self.line_offsets.binary_search(&offset) {
114 |             Ok(index) => (index + 1, 0),
115 |             Err(index) => {
116 |                 let nearest_line_start = self.line_offsets[index - 1];
117 |                 (index, offset - nearest_line_start)
118 |             }
119 |         }
120 |     }
121 | 
122 |     /// Get the positions at the extents of the given span
123 |     pub fn line_extents(&self, span: Span) -> (Pos, Pos) {
124 |         let start_offset = span.start().offset();
125 |         let end_offset = span.end().offset();
126 |         let index = match self.line_offsets.binary_search(&start_offset) {
127 |             Ok(index) => index,
128 |             Err(index) => index - 1,
129 |         };
130 |         let begin_pos = Pos::from(self.line_offsets[index]);
131 |         for line_offset in &self.line_offsets[index..] {
132 |             if *line_offset > end_offset {
133 |                 return (begin_pos, Pos::from(*line_offset));
134 |             }
135 |         }
136 |         // if we couldn't find the start of a line after this one then
137 |         // return to the end of the string.
138 |         (begin_pos, Pos::from(self.source.len()))
139 |     }
140 | 
141 |     /// Slice into the Source
142 |     pub fn slice(&self, start: Pos, end: Pos) -> &str {
143 |         &self.source[start.offset()..end.offset()]
144 |     }
145 | 
146 |     /// Walk the Source Characters
147 |     pub fn walk_chars(&self, start: Pos) -> impl Iterator<Item = (char, Pos)> + '_ {
148 |         self.source[start.offset()..]
149 |             .chars()
150 |             .scan(start, |pos, ch| {
151 |                 let next = Pos::from(pos.offset() + ch.len_utf8());
152 |                 *pos = next;
153 |                 Some((ch, next))
154 |             })
155 |     }
156 | }
157 | 
158 | fn get_line_offsets(source: &str) -> Vec<usize> {
159 |     std::iter::once(0)
160 |         .chain(source.match_indices('\n').map(|(idx, _)| idx + 1))
161 |         .collect()
162 | }
163 | 
164 | #[cfg(test)]
165 | mod test {
166 | 
167 |     use super::*;
168 | 
169 |     #[test]
170 |     fn source_empty_has_single_line() {
171 |         let source = SourceText::new("");
172 |         assert_eq!(1, source.line_count());
173 |     }
174 | 
175 |     #[test]
176 |     fn source_with_windows_newline() {
177 |         let source = SourceText::new("\r\n");
178 |         assert_eq!(2, source.line_count());
179 |     }
180 | 
181 |     #[test]
182 |     fn source_with_multiple_lines() {
183 |         let source = SourceText::new(
184 |             r#"
185 | fn hello(world: String): String
186 |     print 'hello ' + world
187 | end
188 | "#,
189 |         );
190 |         assert_eq!(5, source.line_count());
191 |     }
192 | 
193 |     #[test]
194 |     fn source_pos_to_line_col() {
195 |         let source = SourceText::new(
196 |             r#"
197 | # Modululs remainder
198 | #
199 | # Returns the modulus remainder of n/d
200 | fn mod(n: Number, d: Number): Number
201 |    n if n < d else mod(n - d, d)
202 | end
203 | "#,
204 |         );
205 | 
206 |         assert_eq!((1, 0), source.line_pos(0));
207 |         assert_eq!((2, 1), source.line_pos(2));
208 |         assert_eq!((4, 8), source.line_pos(32));
209 |         assert_eq!((4, 38), source.line_pos(62));
210 |         assert_eq!((5, 0), source.line_pos(63));
211 |         assert_eq!((5, 30), source.line_pos(93));
212 |         assert_eq!((8, 0), source.line_pos(137));
213 |     }
214 | }
215 | 


--------------------------------------------------------------------------------
/src/syntax/tree.rs:
--------------------------------------------------------------------------------
  1 | //! Syntax Tree
  2 | //!
  3 | //! This module defines the types which make up the parsed syntax
  4 | //! tree. This tree defines the full strcuture of a parsed source file
  5 | //! before any semantic transformation is done.
  6 | 
  7 | pub mod expression;
  8 | pub mod operators;
  9 | mod seplist;
 10 | mod token;
 11 | mod trivia;
 12 | pub mod types;
 13 | 
 14 | use std::io::{self, prelude::*};
 15 | 
 16 | use crate::diag::Diagnostic;
 17 | use crate::parse::Parser;
 18 | use crate::text::SourceText;
 19 | 
 20 | pub use self::seplist::{SepList, SepListBuilder};
 21 | pub use self::token::{Literal, Token, TokenKind};
 22 | pub use self::trivia::{TriviaToken, TriviaTokenKind};
 23 | 
 24 | use self::expression::Expression;
 25 | use super::SyntaxNode;
 26 | 
 27 | /// Syntax tree
 28 | ///
 29 | /// The syntax tree represents the parsed source of a given file. It
 30 | /// contains multiple expressions followed by an end of file token.
 31 | pub struct SyntaxTree<'a> {
 32 |     /// The root of the main expression tree
 33 |     root: Expression,
 34 |     /// Diagnostics related to the given tree
 35 |     diagnostics: Vec<Diagnostic>,
 36 |     /// End token
 37 |     end: Token,
 38 |     /// The source for this tree
 39 |     source: &'a SourceText,
 40 | }
 41 | 
 42 | impl<'a> SyntaxTree<'a> {
 43 |     /// Create a new syntax tree
 44 |     ///
 45 |     /// The syntax tree represents a single parsed item of source
 46 |     /// text. Syntax trees are usually constructed by the compiler.
 47 |     ///
 48 |     /// # Parameters
 49 |     ///
 50 |     ///  * `root`: The body of the file. This could be an empty
 51 |     ///            sequence if the file is empty
 52 |     ///  * `diagnostics`: Diagnostics raised in the parsing of the
 53 |     ///                   source.
 54 |     ///  * `end`: The closing EOF token. This may have some leading
 55 |     ///           trivia attached and is therefore required for a
 56 |     ///           full-fidelity tree.
 57 |     pub fn new(
 58 |         source: &'a SourceText,
 59 |         root: Expression,
 60 |         diagnostics: Vec<Diagnostic>,
 61 |         end: Token,
 62 |     ) -> Self {
 63 |         SyntaxTree {
 64 |             root,
 65 |             diagnostics,
 66 |             end,
 67 |             source,
 68 |         }
 69 |     }
 70 | 
 71 |     /// Parse a tree from source text
 72 |     pub fn parse(source: &'a SourceText) -> Self {
 73 |         Parser::new(source).parse()
 74 |     }
 75 | 
 76 |     /// Parse a source tree containing a single expression
 77 |     pub fn parse_single(source: &'a SourceText) -> Self {
 78 |         Parser::new(source).parse_single()
 79 |     }
 80 | 
 81 |     /// Get the root of the tree
 82 |     pub fn root(&self) -> &Expression {
 83 |         &self.root
 84 |     }
 85 | 
 86 |     /// Get the end token
 87 |     pub fn end(&self) -> &Token {
 88 |         &self.end
 89 |     }
 90 | 
 91 |     /// Get diagnostics
 92 |     pub fn diagnostics(&self) -> &[Diagnostic] {
 93 |         &self.diagnostics
 94 |     }
 95 | 
 96 |     /// Check if the tree has buffered diagnostics
 97 |     pub fn has_diagnostics(&self) -> bool {
 98 |         !self.diagnostics.is_empty()
 99 |     }
100 | 
101 |     /// Get the Root Expression
102 |     ///
103 |     /// Accesses the base of the expression tree. The only other part
104 |     /// of the tree is the `end` token.
105 |     pub fn root_expression(&self) -> &Expression {
106 |         &self.root
107 |     }
108 | 
109 |     /// Access the Borrowed Source
110 |     ///
111 |     /// Allows access to the source this syntax tree was parsed from.
112 |     pub fn source(&self) -> &'a SourceText {
113 |         self.source
114 |     }
115 | 
116 |     /// Dump the Expression Tree
117 |     ///
118 |     /// Walks the subnodes of this tree and prints a text representation
119 |     /// of them as an ASCII tree
120 |     pub fn write_to<W>(&self, writer: &mut W) -> io::Result<()>
121 |     where
122 |         W: io::Write,
123 |     {
124 |         let mut writer = io::BufWriter::new(writer);
125 |         let mut prefix = String::new();
126 |         pretty_tree(&mut writer, self.source, self.root(), &mut prefix, "•")
127 |     }
128 | }
129 | 
130 | /// Walks the subnodes of this tree and prints a text representation
131 | /// of them as an ASCII tree.
132 | fn pretty_tree<W>(
133 |     writer: &mut io::BufWriter<W>,
134 |     source: &SourceText,
135 |     expr: &Expression,
136 |     prefix: &mut String,
137 |     lead: &str,
138 | ) -> io::Result<()>
139 | where
140 |     W: io::Write,
141 | {
142 |     writeln!(writer, "{}{} {}", prefix, lead, expr.description(source))?;
143 |     let children: Vec<&Expression> = match expr {
144 |         Expression::Identifier(_) => Vec::new(),
145 |         Expression::Literal(_) => Vec::new(),
146 |         Expression::Prefix(p) => vec![&p.inner],
147 |         Expression::Infix(i) => vec![&i.left, &i.right],
148 |         Expression::Call(c) => std::iter::once(&*c.callee)
149 |             .chain(c.arguments.iter())
150 |             .collect(),
151 |         Expression::Index(i) => vec![&i.index, &i.indexee],
152 |         Expression::IfThenElse(i) => vec![&i.cond, &i.if_true, &i.if_false],
153 |         Expression::Function(f) => vec![&f.body.contents],
154 |         Expression::Loop(l) => vec![&l.condition, &l.body.contents],
155 |         Expression::Sequence(s) => s.iter().collect(),
156 |         Expression::Print(p) => vec![&p.inner],
157 |         Expression::Declaration(d) => vec![&d.initialiser],
158 |         Expression::Grouping(g) => vec![&g.inner],
159 |     };
160 | 
161 |     let orig_prefix_len = prefix.len();
162 |     match lead {
163 |         "└─" => prefix.push_str("  "),
164 |         "├─" => prefix.push_str("│ "),
165 |         _ => (),
166 |     }
167 |     if let Some((last, rest)) = children.split_last() {
168 |         for child in rest {
169 |             pretty_tree(writer, source, child, prefix, "├─")?;
170 |         }
171 |         pretty_tree(writer, source, last, prefix, "└─")?;
172 |     }
173 |     if orig_prefix_len < prefix.len() {
174 |         prefix.truncate(orig_prefix_len);
175 |     }
176 |     Ok(())
177 | }
178 | 
179 | #[cfg(test)]
180 | mod test {
181 | 
182 |     use super::*;
183 |     use crate::syntax::text::{SourceText, DUMMY_SPAN};
184 | 
185 |     #[test]
186 |     fn tree_without_diagnositcs_reports_false() {
187 |         let source = SourceText::new("");
188 |         let tree = SyntaxTree::new(
189 |             &source,
190 |             Expression::empty(),
191 |             Vec::new(),
192 |             Token::new(TokenKind::End),
193 |         );
194 | 
195 |         assert_ne!(true, tree.has_diagnostics());
196 |     }
197 | 
198 |     #[test]
199 |     fn tree_with_diagnostics_reports_true() {
200 |         let source = SourceText::new("");
201 |         let tree = SyntaxTree::new(
202 |             &source,
203 |             Expression::empty(),
204 |             vec![Diagnostic::new("error: test", DUMMY_SPAN)],
205 |             Token::new(TokenKind::End),
206 |         );
207 | 
208 |         assert_eq!(true, tree.has_diagnostics());
209 |     }
210 | 
211 |     #[test]
212 |     fn tree_write_to_string() {
213 |         let source = SourceText::new("(1 + 2) - 3");
214 |         let tree = SyntaxTree::parse(&source);
215 |         let mut buff = Vec::new();
216 | 
217 |         tree.write_to(&mut buff).unwrap();
218 |         let written = String::from_utf8(buff).unwrap();
219 | 
220 |         assert_eq!(
221 |             "
222 | • Sequence
223 | └─ Infix <Sub>
224 |   ├─ Grouping
225 |   │ └─ Infix <Add>
226 |   │   ├─ Literal <Number(1)>
227 |   │   └─ Literal <Number(2)>
228 |   └─ Literal <Number(3)>
229 | "
230 |             .trim(),
231 |             written.trim()
232 |         );
233 |     }
234 | }
235 | 


--------------------------------------------------------------------------------
/src/syntax/tree/operators.rs:
--------------------------------------------------------------------------------
 1 | //! Enums for Operator Types
 2 | //!
 3 | //! There are two groups of operators in the syntax tree, infix and
 4 | //! prefix. This module contains a pair of enums used to discrimiate
 5 | //! between these variants.
 6 | 
 7 | /// Represents an AST prefix operator.
 8 | #[derive(Debug, PartialEq, Copy, Clone)]
 9 | pub enum PrefixOp {
10 |     /// Arithmetic Identity
11 |     ///
12 |     /// Represents the application of the `+` operator to an
13 |     /// expression. This doesn't affect the inner expression at all,
14 |     /// but is accepted for orthogonality with `-`..
15 |     Identity,
16 | 
17 |     /// Unary Airthmetic Negation
18 |     ///
19 |     /// Represents the application of the `-` operator to an
20 |     /// expression. Can be used to arithmetically negate a computed
21 |     /// expression or literal value.
22 |     Negate,
23 | 
24 |     /// Unary Boolean Negation
25 |     ///
26 |     /// Represents the application of the `!` operator to an
27 |     /// expression. Can be used to perfrom boolean negation on an
28 |     /// expression or literal value.
29 |     Not,
30 | }
31 | 
32 | /// Represents an AST infix operator
33 | #[derive(Debug, PartialEq, Copy, Clone)]
34 | pub enum InfixOp {
35 |     /// Assignment Operator (`=`)
36 |     Assign,
37 | 
38 |     /// Arithmetic Addition (`+`)
39 |     Add,
40 |     /// Arithmetic Subtraction (`-`)
41 |     Sub,
42 |     /// Arithmetic Multiplication (`*`)
43 |     Mul,
44 |     /// Arithmetic Division (`/`)
45 |     Div,
46 | 
47 |     /// Boolean Equals (`==`)
48 |     Eq,
49 |     /// Boolean Not Equals (`!=`)
50 |     NotEq,
51 |     /// Less than comparision operator (`<`)
52 |     Lt,
53 |     /// Less than or equals operator (`<=`)
54 |     LtEq,
55 |     /// Greater than comparison operator (`>`)
56 |     Gt,
57 |     /// Greater than or equals operator (`>=`)
58 |     GtEq,
59 | }
60 | 


--------------------------------------------------------------------------------
/src/syntax/tree/seplist.rs:
--------------------------------------------------------------------------------
  1 | //! Separeted Syntax List
  2 | //!
  3 | //! This module holds the definition of the `SepList<T, S>`
  4 | //! type. Separed lists are used in the syntax tree to hold delimited
  5 | //! items such as parameter or argument lists.
  6 | //!
  7 | //! A `SepList` is made up of two lists of items, the main tokens and
  8 | //! the separators.
  9 | 
 10 | use super::Token;
 11 | use std::marker::PhantomData;
 12 | 
 13 | /// The separated list type holds a list of syntax items and the
 14 | /// separators between then.
 15 | #[derive(Debug, PartialEq)]
 16 | pub struct SepList<T, S = Token> {
 17 |     /// The items in the list
 18 |     items: Vec<T>,
 19 |     /// The separators between the items
 20 |     separators: Vec<S>,
 21 | }
 22 | 
 23 | impl<T, S> SepList<T, S> {
 24 |     /// Create a new seplist from the given items and separators
 25 |     ///
 26 |     /// The separator length should be equal or 1 shorter than the
 27 |     /// items length.
 28 |     pub fn new(items: Vec<T>, separators: Vec<S>) -> Self {
 29 |         SepList { items, separators }
 30 |     }
 31 | 
 32 |     /// Create an empty separated list
 33 |     ///
 34 |     /// The new list will contain no items and no separators. This is
 35 |     /// mainly useful for tests or when fabricating trees by hand. The
 36 |     /// parser will usually genrate an empty list by calling
 37 |     /// `SepList::builder().build()`
 38 |     pub fn empty() -> Self {
 39 |         SepList::new(Vec::new(), Vec::new())
 40 |     }
 41 | 
 42 |     /// Create a list builder. This provides a structured way of
 43 |     /// incrementally building a separated list.
 44 |     pub fn builder() -> SepListBuilder<T, S, Item> {
 45 |         SepListBuilder {
 46 |             items: Vec::new(),
 47 |             separators: Vec::new(),
 48 |             state: Default::default(),
 49 |         }
 50 |     }
 51 | 
 52 |     /// Borrow the separators as a slice
 53 |     ///
 54 |     /// Standard iteration of this collection just accesses the main
 55 |     /// items. This allows access to the separators too.
 56 |     pub fn separators(&self) -> &[S] {
 57 |         &self.separators
 58 |     }
 59 | }
 60 | 
 61 | impl<T, S> std::ops::Deref for SepList<T, S> {
 62 |     type Target = [T];
 63 | 
 64 |     fn deref(&self) -> &[T] {
 65 |         &self.items
 66 |     }
 67 | }
 68 | 
 69 | /// Fluent typestate API for builing a separated list
 70 | pub struct SepListBuilder<T, S, State> {
 71 |     /// The buffered items for this list
 72 |     items: Vec<T>,
 73 |     /// The buffered separators for this list
 74 |     separators: Vec<S>,
 75 |     /// Phantom state data
 76 |     state: PhantomData<State>,
 77 | }
 78 | 
 79 | /// Initial state for the separated list builder
 80 | pub struct Item {}
 81 | 
 82 | /// Separated list builder state when item has been seen
 83 | pub struct Separator {}
 84 | 
 85 | impl<T, S, State> SepListBuilder<T, S, State> {
 86 |     /// Finish building the list
 87 |     pub fn build(self) -> SepList<T, S> {
 88 |         SepList::new(self.items, self.separators)
 89 |     }
 90 | }
 91 | 
 92 | impl<T, S> SepListBuilder<T, S, Item> {
 93 |     /// Push an item into the separated list and wait for a separator
 94 |     pub fn push_item(mut self, item: T) -> SepListBuilder<T, S, Separator> {
 95 |         self.items.push(item);
 96 |         SepListBuilder {
 97 |             items: self.items,
 98 |             separators: self.separators,
 99 |             state: Default::default(),
100 |         }
101 |     }
102 | }
103 | 
104 | impl<T, S> SepListBuilder<T, S, Separator> {
105 |     /// Push a separator onto the list and wait for another item
106 |     pub fn push_sep(mut self, sep: S) -> SepListBuilder<T, S, Item> {
107 |         self.separators.push(sep);
108 |         SepListBuilder {
109 |             items: self.items,
110 |             separators: self.separators,
111 |             state: Default::default(),
112 |         }
113 |     }
114 | }
115 | 
116 | #[cfg(test)]
117 | mod test {
118 | 
119 |     use super::*;
120 | 
121 |     #[test]
122 |     fn create_new_seplist() {
123 |         let empty = SepList::<(), u32>::new(Vec::new(), Vec::new());
124 |         let with_items = SepList::new(vec![1, 2, 4, 8], vec![',', '!', '*']);
125 | 
126 |         assert_eq!(0, empty.len());
127 |         assert_eq!(4, with_items.len());
128 |     }
129 | 
130 |     #[test]
131 |     fn seplist_builder() {
132 |         let list = SepList::builder()
133 |             .push_item(123)
134 |             .push_sep(',')
135 |             .push_item(456)
136 |             .push_sep('.')
137 |             .build();
138 | 
139 |         assert_eq!(2, list.len());
140 |         assert_eq!(Some(&123), list.get(0));
141 |         assert_eq!(Some(&456), list.get(1));
142 |         assert_eq!(None, list.get(2));
143 |         assert_eq!(Some(&','), list.separators().get(0));
144 |         assert_eq!(Some(&'.'), list.separators().get(1));
145 |         assert_eq!(None, list.separators().get(2));
146 |     }
147 | }
148 | 


--------------------------------------------------------------------------------
/src/syntax/tree/token.rs:
--------------------------------------------------------------------------------
  1 | //! Syntax Token
  2 | //!
  3 | //! A lexeme in the token stream. Tokens are produced by the
  4 | //! `Tokeniser` when parsing a source text.
  5 | 
  6 | use super::super::text::{Ident, Span, DUMMY_SPAN};
  7 | use super::TriviaToken;
  8 | use std::fmt;
  9 | 
 10 | /// A Syntax Token
 11 | ///
 12 | /// Syntax tokens are produced by the lexer and contain metadata about
 13 | /// their position in the source text.
 14 | #[derive(Debug)]
 15 | pub struct Token {
 16 |     /// The `TokenKind` for this token. Public to allow matching over
 17 |     /// different token kinds.
 18 |     pub kind: TokenKind,
 19 |     span: Span,
 20 |     leading: Vec<TriviaToken>,
 21 |     trailing: Vec<TriviaToken>,
 22 | }
 23 | 
 24 | /// Literal Value
 25 | ///
 26 | /// Represents any constant / literal value in the syntax tree.
 27 | #[derive(Debug, PartialEq, Clone)]
 28 | pub enum Literal {
 29 |     /// A literal string
 30 |     RawString(String),
 31 | 
 32 |     /// A numeric literal
 33 |     Number(i64),
 34 | }
 35 | 
 36 | /// Token Kind
 37 | ///
 38 | /// The data held by a token. This is usually just the token type. For
 39 | /// some tokens, like `Word` we also store their value. For all token
 40 | /// kinds the underlying source can be retrieved from the `Token`'s
 41 | /// span.
 42 | #[derive(Debug, PartialEq, Clone)]
 43 | pub enum TokenKind {
 44 |     /// A string of alpahbetic characters. This could be a langauge
 45 |     /// keyword or a variable or type identifier.
 46 |     Word(Ident),
 47 | 
 48 |     /// Literal Value. Represents either a string or numeric literal
 49 |     /// in the source text. Booleans are not represented by literals
 50 |     /// and instead are just `Word` tokens.
 51 |     Literal(Literal),
 52 | 
 53 |     /// The `=` character
 54 |     Equals,
 55 | 
 56 |     /// The `==` operator
 57 |     DoubleEquals,
 58 | 
 59 |     /// The `!` character
 60 |     Bang,
 61 | 
 62 |     /// The `!=` operator
 63 |     BangEquals,
 64 | 
 65 |     /// The `+` character
 66 |     Plus,
 67 | 
 68 |     /// The `-` character
 69 |     Minus,
 70 | 
 71 |     /// The `*` character
 72 |     Star,
 73 | 
 74 |     /// The `/` character
 75 |     Slash,
 76 | 
 77 |     /// The `(` character
 78 |     OpenBracket,
 79 | 
 80 |     /// The `)` character
 81 |     CloseBracket,
 82 | 
 83 |     /// The `[` character
 84 |     OpenSqBracket,
 85 | 
 86 |     /// The `]` character
 87 |     CloseSqBracket,
 88 | 
 89 |     /// The `,` character
 90 |     Comma,
 91 | 
 92 |     /// The `:` character
 93 |     Colon,
 94 | 
 95 |     /// The `<` character
 96 |     LessThan,
 97 | 
 98 |     /// The `<=` operator
 99 |     LessThanEqual,
100 | 
101 |     /// The `>` character
102 |     MoreThan,
103 | 
104 |     /// The `>=` operator
105 |     MoreThanEqual,
106 | 
107 |     /// The end of the token stream. This is retuend indefinitely once
108 |     /// the lexer reaches the end of the source text.
109 |     End,
110 | }
111 | 
112 | impl fmt::Display for Literal {
113 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
114 |         match self {
115 |             Literal::RawString(s) => write!(f, "'{}'", s),
116 |             Literal::Number(n) => write!(f, "{}", n),
117 |         }
118 |     }
119 | }
120 | 
121 | impl fmt::Display for TokenKind {
122 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
123 |         write!(
124 |             f,
125 |             "{}",
126 |             match self {
127 |                 TokenKind::Word(id) => match id {
128 |                     Ident::Unknown(_) => "identifier",
129 |                     _ => "keyword",
130 |                 },
131 |                 TokenKind::Literal(lit) => return write!(f, "literal value {}", lit),
132 |                 TokenKind::Equals => "'='",
133 |                 TokenKind::DoubleEquals => "'=='",
134 |                 TokenKind::Bang => "'!'",
135 |                 TokenKind::BangEquals => "'!='",
136 |                 TokenKind::Plus => "'+'",
137 |                 TokenKind::Minus => "'-'",
138 |                 TokenKind::Star => "'*'",
139 |                 TokenKind::Slash => "'/'",
140 |                 TokenKind::OpenBracket => "'('",
141 |                 TokenKind::CloseBracket => "')'",
142 |                 TokenKind::OpenSqBracket => "'['",
143 |                 TokenKind::CloseSqBracket => "']'",
144 |                 TokenKind::Comma => "','",
145 |                 TokenKind::Colon => "':'",
146 |                 TokenKind::LessThan => "'<'",
147 |                 TokenKind::LessThanEqual => "'<='",
148 |                 TokenKind::MoreThan => "'>'",
149 |                 TokenKind::MoreThanEqual => "'>='",
150 |                 TokenKind::End => "end of file",
151 |             }
152 |         )
153 |     }
154 | }
155 | 
156 | impl Token {
157 |     /// Create a Token from a Kind
158 |     pub fn new(kind: TokenKind) -> Self {
159 |         Token::with_span(DUMMY_SPAN, kind)
160 |     }
161 | 
162 |     /// Create a token from a position and kind
163 |     pub fn with_span(span: Span, kind: TokenKind) -> Self {
164 |         Token {
165 |             span,
166 |             leading: Vec::new(),
167 |             trailing: Vec::new(),
168 |             kind,
169 |         }
170 |     }
171 | 
172 |     /// Get the Span of a Token
173 |     pub fn span(&self) -> Span {
174 |         self.span
175 |     }
176 | 
177 |     /// Inspect the leading trivia
178 |     pub fn leading(&self) -> &[TriviaToken] {
179 |         &self.leading[..]
180 |     }
181 | 
182 |     /// Inspect the trailing trivia
183 |     pub fn trailing(&self) -> &[TriviaToken] {
184 |         &self.trailing[..]
185 |     }
186 | 
187 |     /// Set leading trivia
188 |     pub fn with_leading_trivia(self, leading: Vec<TriviaToken>) -> Self {
189 |         Token { leading, ..self }
190 |     }
191 | 
192 |     /// Set trailing trivia
193 |     pub fn with_trailing_trivia(self, trailing: Vec<TriviaToken>) -> Self {
194 |         Token { trailing, ..self }
195 |     }
196 | 
197 |     /// Left binding power. This controls the precedence of
198 |     /// the symbol when being parsed as an infix operator.
199 |     ///
200 |     /// Returns the associativity, or binding power, for the given
201 |     /// token. This is used when deciding if to parse the `led()`
202 |     /// of this token.
203 |     pub fn lbp(&self) -> u32 {
204 |         match self.kind {
205 |             TokenKind::Equals => 10,
206 | 
207 |             // ternary if
208 |             TokenKind::Word(Ident::If) | TokenKind::Word(Ident::Unless) => 20,
209 | 
210 |             // boolean conditional operators
211 |             TokenKind::DoubleEquals
212 |             | TokenKind::BangEquals
213 |             | TokenKind::LessThan
214 |             | TokenKind::LessThanEqual
215 |             | TokenKind::MoreThan
216 |             | TokenKind::MoreThanEqual => 40,
217 | 
218 |             // Arithmetic operators
219 |             TokenKind::Plus | TokenKind::Minus => 50,
220 | 
221 |             TokenKind::Star | TokenKind::Slash => 60,
222 | 
223 |             // Index/Call operators
224 |             TokenKind::OpenBracket | TokenKind::OpenSqBracket => 80,
225 | 
226 |             _ => Self::MIN_LBP,
227 |         }
228 |     }
229 | 
230 |     /// The minimum binding power of any token. This is used by the
231 |     /// parser to parse root level expressions.
232 |     ///
233 |     /// For any token `MIN_LBP` is less than or equal to the token's
234 |     /// binding power.
235 |     pub const MIN_LBP: u32 = 0;
236 | 
237 |     /// The maximum binding power of any token. This is used by the
238 |     /// parser to parse right-associative expressions.
239 |     ///
240 |     /// For any token `MAX_LBP` is strictly greatre than the token's
241 |     /// binding power.
242 |     pub const MAX_LBP: u32 = 100;
243 | }
244 | 
245 | impl PartialEq for Token {
246 |     fn eq(&self, other: &Token) -> bool {
247 |         self.kind == other.kind
248 |             && (self.span == DUMMY_SPAN || other.span == DUMMY_SPAN || self.span == other.span)
249 |     }
250 | }
251 | 
252 | #[cfg(test)]
253 | mod test {
254 | 
255 |     use super::super::super::text::Pos;
256 |     use super::super::TriviaTokenKind;
257 |     use super::*;
258 | 
259 |     #[test]
260 |     fn test_token_equality_ignores_dummy_span() {
261 |         assert_eq!(
262 |             Token::new(TokenKind::OpenBracket),
263 |             Token::new(TokenKind::OpenBracket)
264 |         );
265 |         assert_eq!(
266 |             Token::new(TokenKind::CloseBracket),
267 |             Token::with_span(DUMMY_SPAN, TokenKind::CloseBracket)
268 |         );
269 |         assert_eq!(
270 |             Token::new(TokenKind::Comma),
271 |             Token::with_span(Span::new(Pos::from(0), Pos::from(1)), TokenKind::Comma)
272 |         );
273 |         assert_ne!(
274 |             Token::with_span(Span::new(Pos::from(1), Pos::from(2)), TokenKind::LessThan),
275 |             Token::with_span(Span::new(Pos::from(0), Pos::from(1)), TokenKind::LessThan)
276 |         );
277 |     }
278 | 
279 |     #[test]
280 |     fn test_token_equality_ignores_trivia() {
281 |         assert_eq!(
282 |             Token::new(TokenKind::Bang),
283 |             Token::new(TokenKind::Bang).with_leading_trivia(vec![TriviaToken::with_span(
284 |                 Span::new(Pos::from(0), Pos::from(1)),
285 |                 TriviaTokenKind::Newline
286 |             )])
287 |         );
288 |         assert_eq!(
289 |             Token::new(TokenKind::Equals).with_trailing_trivia(vec![TriviaToken::with_span(
290 |                 Span::new(Pos::from(0), Pos::from(1)),
291 |                 TriviaTokenKind::Whitespace
292 |             )]),
293 |             Token::new(TokenKind::Equals)
294 |         );
295 |     }
296 | }
297 | 


--------------------------------------------------------------------------------
/src/syntax/tree/trivia.rs:
--------------------------------------------------------------------------------
 1 | //! Trivia Tokens
 2 | //!
 3 | //! Trivia tokens represent parts of a parse which are not important
 4 | //! to the semantic analysis of the program.
 5 | //!
 6 | //! The idea of trivia tokens is to allow a parsed syntax tree to be
 7 | //! re-written and then serialised to allow code tranformations
 8 | //! without loss of things like comments and indentation.
 9 | 
10 | use super::super::text::Span;
11 | 
12 | /// Trivia Token
13 | ///
14 | /// Trivia tokens appear in the leading or trailing trivia of main
15 | /// tokens. They should be attached to the 'closest' token in the
16 | /// token stream, for some value of closest.
17 | #[derive(Debug, PartialEq, Copy, Clone)]
18 | pub struct TriviaToken {
19 |     kind: TriviaTokenKind,
20 |     span: Span,
21 | }
22 | 
23 | /// Trivia Token Kind
24 | ///
25 | /// The data held by a `TriviaToken`
26 | #[derive(Debug, PartialEq, Copy, Clone)]
27 | pub enum TriviaTokenKind {
28 |     /// A whitespace token
29 |     Whitespace,
30 |     /// Unrecognised characters
31 |     Junk,
32 |     /// A single line comment
33 |     Comment,
34 |     /// A newline character
35 |     Newline,
36 | }
37 | 
38 | impl TriviaToken {
39 |     /// Create a new triva token
40 |     pub fn with_span(span: Span, kind: TriviaTokenKind) -> Self {
41 |         TriviaToken { span, kind }
42 |     }
43 | 
44 |     /// Get the `Span` of this trivia
45 |     pub fn span(&self) -> Span {
46 |         self.span
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/src/syntax/tree/types.rs:
--------------------------------------------------------------------------------
  1 | //! Syntax Types
  2 | //!
  3 | //! This module contians the structures used in the syntax tree to
  4 | //! reference types.
  5 | 
  6 | use super::super::text::{SourceText, Span, DUMMY_SPAN};
  7 | use super::super::{SepList, SyntaxNode};
  8 | use super::Token;
  9 | use std::borrow::Cow;
 10 | 
 11 | /// Type Reference
 12 | ///
 13 | /// Represents a reference to a type. This could be a simple reference
 14 | /// to a named type or a complex type such as an array or Tuple.
 15 | #[derive(Debug, PartialEq)]
 16 | pub enum TypeRef {
 17 |     /// Simple Named Type
 18 |     Simple(Box<Token>),
 19 |     /// The Unit Type
 20 |     Unit(Box<Token>, Box<Token>),
 21 |     /// A non-empty Tuple
 22 |     Tuple(Box<Token>, SepList<TypeRef>, Box<Token>),
 23 |     /// An Array Type
 24 |     Array(Box<Token>, Box<TypeRef>, Box<Token>),
 25 |     /// Missing type. Used to represent type information being missing
 26 |     /// at a given location.
 27 |     Missing,
 28 | }
 29 | 
 30 | /// Type Annotation
 31 | ///
 32 | /// Reference to a type annotated to a variable or function. This is a
 33 | /// type reference and the accompanying `:` token.
 34 | #[derive(Debug, PartialEq)]
 35 | pub struct TypeAnno {
 36 |     /// The `:` Token
 37 |     pub anno_tok: Box<Token>,
 38 |     /// The type reference
 39 |     pub type_ref: TypeRef,
 40 | }
 41 | 
 42 | impl TypeRef {
 43 |     /// Create a New Simple Type
 44 |     ///
 45 |     /// A simple type is a direct reference to a non-generic non-array
 46 |     /// type, such as `Num` or `String`. We keep track of the token
 47 |     /// and the inner identifier separately for convenience.
 48 |     pub fn simple(tok: Token) -> Self {
 49 |         TypeRef::Simple(Box::new(tok))
 50 |     }
 51 | 
 52 |     /// Create a new Unit Type Reference
 53 |     ///
 54 |     /// The unit type is represented as a struct with no contents. It
 55 |     /// has special meaning in some areas as it can be used to idicate
 56 |     /// the absence of a value.
 57 |     pub fn unit(open: Token, close: Token) -> Self {
 58 |         TypeRef::Unit(Box::new(open), Box::new(close))
 59 |     }
 60 | 
 61 |     /// Create a Tuple Type
 62 |     ///
 63 |     /// A tuple type is an ordered collection of values. Each value
 64 |     /// can be of a different type.
 65 |     pub fn tuple(open: Token, inner: SepList<TypeRef>, close: Token) -> Self {
 66 |         if inner.is_empty() {
 67 |             Self::unit(open, close)
 68 |         } else {
 69 |             TypeRef::Tuple(Box::new(open), inner, Box::new(close))
 70 |         }
 71 |     }
 72 | 
 73 |     /// Create an Array Type
 74 |     ///
 75 |     /// An array type represents a contiguous collection of another
 76 |     /// type.
 77 |     pub fn array(open: Token, inner: TypeRef, close: Token) -> Self {
 78 |         TypeRef::Array(Box::new(open), Box::new(inner), Box::new(close))
 79 |     }
 80 | 
 81 |     /// Create a missing type
 82 |     pub fn missing() -> Self {
 83 |         TypeRef::Missing
 84 |     }
 85 | }
 86 | 
 87 | impl SyntaxNode for TypeRef {
 88 |     fn description(&self, source: &SourceText) -> Cow<str> {
 89 |         match self {
 90 |             TypeRef::Array(..) => "Type <array>".into(),
 91 |             TypeRef::Missing => "Type <missing>".into(),
 92 |             TypeRef::Simple(t) => {
 93 |                 format!("Type `{}`", source.slice(t.span().start(), t.span().end())).into()
 94 |             }
 95 |             _ => "err.into".into(),
 96 |         }
 97 |     }
 98 | 
 99 |     fn span(&self) -> Span {
100 |         match self {
101 |             TypeRef::Array(open, _, close) => Span::enclosing(open.span(), close.span()),
102 |             TypeRef::Missing => DUMMY_SPAN,
103 |             TypeRef::Simple(token) => token.span(),
104 |             TypeRef::Tuple(open, _, close) => Span::enclosing(open.span(), close.span()),
105 |             TypeRef::Unit(open, close) => Span::enclosing(open.span(), close.span()),
106 |         }
107 |     }
108 | }
109 | 
110 | impl TypeAnno {
111 |     /// Create a Type Annotation
112 |     ///
113 |     /// Constructs a new `TypeAnno` structure with the given
114 |     /// annotation separation token and inner type reference.
115 |     pub fn new(anno_tok: Token, inner_ty: TypeRef) -> Self {
116 |         TypeAnno {
117 |             anno_tok: Box::new(anno_tok),
118 |             type_ref: inner_ty,
119 |         }
120 |     }
121 | }
122 | 


--------------------------------------------------------------------------------