├── src ├── backends │ ├── riscv64.rs │ ├── wasm64.rs │ ├── aarch64.rs │ ├── aarch64 │ │ ├── m1_jit.h │ │ ├── m1_jit.c │ │ └── codegen.rs │ ├── x86_64.rs │ ├── ir.rs │ └── x86_64 │ │ └── codegen.rs ├── frontend.rs ├── frontend │ ├── scopes.rs │ ├── correctness.rs │ ├── types.rs │ └── parser.rs ├── backends.rs ├── lib.rs └── main.rs ├── tests └── id.cly ├── .gitignore ├── lib ├── src │ ├── unknown_arity.h │ ├── syscalls.h │ ├── syscalls.s │ ├── rc.h │ ├── unknown_arity.c │ └── rc.c └── makefile ├── Cargo.toml ├── README.md ├── .github └── workflows │ └── rust.yml └── LICENSE /src/backends/riscv64.rs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/backends/wasm64.rs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/backends/aarch64.rs: -------------------------------------------------------------------------------- 1 | pub mod codegen; 2 | -------------------------------------------------------------------------------- /tests/id.cly: -------------------------------------------------------------------------------- 1 | main = (\a: 'a . a) (\a: 'a . a) 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | .build 3 | Cargo.lock 4 | tags 5 | history.txt 6 | *.o 7 | *.a 8 | *.out 9 | -------------------------------------------------------------------------------- /src/backends/aarch64/m1_jit.h: -------------------------------------------------------------------------------- 1 | #ifndef M1_JIT_H 2 | #define M1_JIT_H 3 | 4 | void pthread_jit_write_protect_np(int _); 5 | 6 | #endif /* M1_JIT_H */ 7 | -------------------------------------------------------------------------------- /lib/src/unknown_arity.h: -------------------------------------------------------------------------------- 1 | #ifndef UNKNOWN_ARITY_H 2 | #define UNKNOWN_ARITY_H 3 | 4 | // Calls a function with unknown arity. 5 | void* call_unknown_arity(void* func, unsigned int called_args, void* args[]); 6 | 7 | #endif /* UNKNOWN_ARITY_H */ 8 | -------------------------------------------------------------------------------- /lib/makefile: -------------------------------------------------------------------------------- 1 | CC=gcc 2 | CFLAGS=-c -fPIC -nostdlib -Wall -g -masm=intel -fno-stack-protector 3 | AR=ar 4 | AFLAGS=rcs 5 | 6 | all: *.o 7 | mkdir -p bin/ 8 | $(AR) $(AFLAGS) bin/libclosey.a $? 9 | 10 | *.o: src/*.c src/*.s 11 | $(CC) $(CFLAGS) $? 12 | 13 | clean: 14 | rm -r bin *.o 15 | 16 | -------------------------------------------------------------------------------- /lib/src/syscalls.h: -------------------------------------------------------------------------------- 1 | #ifndef SYSCALLS_H 2 | #define SYSCALLS_H 3 | 4 | #include 5 | 6 | void* mmap(void* start, size_t length, int prot, int flags, int fd, size_t offset); 7 | 8 | int munmap(void* start, size_t length); 9 | 10 | void exit(int ecode); 11 | 12 | #endif /* SYSCALLS_H */ 13 | -------------------------------------------------------------------------------- /src/backends/aarch64/m1_jit.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "m1_jit.h" 7 | 8 | #if !defined(__APPLE__) || !defined(__aarch64__) 9 | void pthread_jit_write_protect_np(int _) { if (_) { } } 10 | #else 11 | void ________is_not_used_but_is_here_to_suppress_warnings________() { } 12 | #endif /* __APPLE__ */ 13 | 14 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "closeyc" 3 | version = "0.0.1" 4 | authors = ["jenra"] 5 | edition = "2018" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [build-dependencies] 10 | cc = "1.0.46" 11 | 12 | [dependencies] 13 | clap = "2.33.0" 14 | codespan-reporting = "0.11.0" 15 | faerie = "0.16.0" 16 | goblin = "0.4.2" 17 | iced-x86 = "1.12.0" 18 | libc = "0.2.97" 19 | logos = "0.11.4" 20 | rustyline = "8.2.0" 21 | target-lexicon = "0.11.0" 22 | -------------------------------------------------------------------------------- /src/backends/aarch64/codegen.rs: -------------------------------------------------------------------------------- 1 | use super::super::ir::IrModule; 2 | use super::super::GeneratedCode; 3 | 4 | /// Transforms an IrModule into aarch64 machine code. 5 | pub fn generate_code(_: &mut IrModule) -> GeneratedCode { 6 | let mut code = GeneratedCode::new(); 7 | code.data = vec![ 8 | 0xff, 0x43, 0x00, 0xd1, 0xff, 0x0f, 0x00, 0xb9, 0xa0, 0x08, 0x80, 0x52, 0xff, 0x43, 0x00, 9 | 0x91, 0xc0, 0x03, 0x5f, 0xd6, 10 | ]; 11 | code.func_addrs.insert(String::from("main"), 0..code.len()); 12 | code 13 | } 14 | -------------------------------------------------------------------------------- /lib/src/syscalls.s: -------------------------------------------------------------------------------- 1 | .intel_syntax noprefix 2 | .global mmap 3 | .global munmap 4 | .global exit 5 | 6 | # args passed into registers: 7 | # rdi, rsi, rdx, rcx, r8, r9 8 | 9 | # void* mmap(void* start, size_t length, int prot, int flags, int fd, size_t offset); 10 | mmap: 11 | mov r10, rcx 12 | mov rax, 9 13 | syscall 14 | ret 15 | 16 | # int munmap(void* start, size_t length); 17 | munmap: 18 | mov rax, 11 19 | syscall 20 | ret 21 | 22 | # void exit(int ecode); 23 | exit: 24 | mov rax, 60 25 | syscall 26 | ret 27 | 28 | -------------------------------------------------------------------------------- /src/frontend.rs: -------------------------------------------------------------------------------- 1 | /// Module for correctness checking. This module contains all the functions that are involved in, 2 | /// for example, determining arity and type checking. 3 | pub mod correctness; 4 | 5 | /// Module for the frontend intermediate representation. This module contains functions for 6 | /// generating the IR and handling it. 7 | pub mod ir; 8 | 9 | /// Module for parsing the source text. 10 | pub mod parser; 11 | 12 | /// Module for scopes. This module contains functions for manipulating scopes and variables. 13 | pub mod scopes; 14 | 15 | /// Module for types. This module contains functions to help with type checking and manipulating 16 | /// types. 17 | pub mod types; 18 | -------------------------------------------------------------------------------- /lib/src/rc.h: -------------------------------------------------------------------------------- 1 | #ifndef RC_H 2 | #define RC_H 3 | 4 | #include 5 | #include 6 | 7 | // Allocates something on the heap with a reference count of 1 8 | void* rcalloc(size_t size); 9 | 10 | // Copies a pointer with a given size onto the heap with a reference count of 1. 11 | void* rccopy(void* ptr, size_t len, size_t size); 12 | 13 | // Increments the reference count. 14 | void rcinc(void* ptr); 15 | 16 | // Returns true if there is only one reference to the pointer. 17 | bool has_one_reference(void* ptr); 18 | 19 | // Decrement the reference count. 20 | void rcfree(void* ptr); 21 | 22 | // Frees a reference counted closure structure. 23 | void rcfuncfree(void* ptr); 24 | 25 | #endif /* RC_H */ 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Closey 2 | Closey is a functional programming language designed to have as few features as possible but still be easy to use and compiles to efficient machine code. 3 | 4 | ## Features 5 | - Functions 6 | - Function applications 7 | - Partial function application 8 | - Union types 9 | - Match expressions 10 | - Closures 11 | - Optimisation of church numerals and cons boxes to corresponding native types 12 | 13 | ## Examples 14 | ### Identity 15 | ```ocaml 16 | (\a: 'a . a) (\a: 'a . a) 17 | ``` 18 | 19 | ## Build 20 | Just type in the following: 21 | ```bash 22 | git clone https://github.com/jenra-uwu/closey-lang && cd closey-lang && cargo build 23 | ``` 24 | 25 | ### Building the library 26 | ```bash 27 | cd lib && make 28 | ``` 29 | 30 | Note: Currently the focus is on Linux and macOS support, Linux being the more stable of the two. If you want it to run on a Windows computer, either a) install WSL, or b) get a better operating system. 31 | 32 | ## Building a program 33 | ```bash 34 | closeyc build -o file.o -- file.closey 35 | ld -o file file.o path/to/libclosey.a 36 | ./file 37 | ``` 38 | 39 | ## Progress 40 | See TODO.md. Everything is highly experimental. Be cautious: code may be explosive. 41 | 42 | ## Support 43 | Come to the [official discord server!](https://discord.gg/Gxfr6JDecv) 44 | 45 | -------------------------------------------------------------------------------- /src/backends/x86_64.rs: -------------------------------------------------------------------------------- 1 | pub mod codegen; 2 | 3 | use super::GeneratedCode; 4 | 5 | /// Disassembles x86 machine code into human readable assembly to stdout. 6 | pub fn disassemble(code: &GeneratedCode, base: *const u8) { 7 | use iced_x86::{Decoder, DecoderOptions, Formatter, Instruction, NasmFormatter}; 8 | 9 | for (name, range) in code.func_addrs.iter() { 10 | println!("\n{}:", name); 11 | let bytes = &code.data[range.start..range.end]; 12 | let mut decoder = Decoder::with_ip( 13 | 64, 14 | bytes, 15 | base as u64 + range.start as u64, 16 | DecoderOptions::NONE, 17 | ); 18 | 19 | let mut formatter = NasmFormatter::new(); 20 | 21 | formatter.options_mut().set_digit_separator("`"); 22 | formatter.options_mut().set_first_operand_char_index(0); 23 | 24 | let mut output = String::new(); 25 | let mut instruction = Instruction::default(); 26 | while decoder.can_decode() { 27 | decoder.decode_out(&mut instruction); 28 | 29 | output.clear(); 30 | formatter.format(&instruction, &mut output); 31 | 32 | print!("{:016X}\n ", instruction.ip()); 33 | let start_index = instruction.ip() as usize - base as usize; 34 | let instr_bytes = &code.data[start_index..start_index + instruction.len()]; 35 | for b in instr_bytes.iter() { 36 | print!("{:02X}", b); 37 | } 38 | if instr_bytes.len() < 10 { 39 | for _ in 0..10 - instr_bytes.len() { 40 | print!(" "); 41 | } 42 | } 43 | println!(" {}", output); 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | nightly: 14 | name: Deploy nightly 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | os: [ubuntu-latest, macOS-latest] 19 | runs-on: ${{ matrix.os }} 20 | if: "!contains(github.event.head_commit.message, '[skip ci]')" 21 | 22 | steps: 23 | - uses: actions/checkout@v2 24 | 25 | - name: Update Rust 26 | if: ${{ matrix.os == 'ubuntu-latest' }} 27 | run: rustup update 28 | 29 | - name: Build 30 | run: cargo build --release --verbose 31 | 32 | - name: Build asset 33 | run: | 34 | strip ./target/release/closeyc 35 | 7z a ./closeyc-nightly.zip ./target/release/closeyc 36 | 37 | - name: Set tag 38 | if: ${{ matrix.os == 'ubuntu-latest' }} 39 | env: 40 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 41 | run: | 42 | git config --local user.email "action@github.com" 43 | git config --local user.name "GitHub Action" 44 | git tag -f -a nightly -m "Nightly update" 45 | git push origin -f --follow-tags nightly 46 | - name: Deploy release 47 | uses: WebFreak001/deploy-nightly@v1.1.0 48 | env: 49 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 50 | with: 51 | upload_url: https://uploads.github.com/repos/jenra-uwu/closey-lang/releases/45233280/assets{?name,label} 52 | release_id: 45233280 # same as above (id can just be taken out the upload_url, it's used to find old releases) 53 | asset_path: ./closeyc-nightly.zip # path to archive to upload 54 | asset_name: closeyc-nightly-${{matrix.os}}-$$.zip # name to upload the release as, use $$ to insert date (YYYYMMDD) and 6 letter commit hash 55 | asset_content_type: application/zip # required by GitHub API 56 | max_releases: 1 # optional, if there are more releases than this matching the asset_name, the oldest ones are going to be deleted 57 | -------------------------------------------------------------------------------- /src/frontend/scopes.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use super::ir::{ArityInfo, Location}; 4 | use super::types::TypeRc; 5 | 6 | #[derive(Debug, Default)] 7 | pub struct Scope { 8 | pub variables: HashMap, 9 | pub parent: Option>, 10 | new_func: bool, 11 | } 12 | 13 | impl Scope { 14 | // new() -> Scope 15 | // Creates a new empty scope. 16 | pub fn new() -> Scope { 17 | Scope { 18 | variables: HashMap::with_capacity(0), 19 | parent: None, 20 | new_func: false, 21 | } 22 | } 23 | 24 | // put_var_raw(&mut self, String, TypeRc, ArityInfo, Span, bool) -> () 25 | // Puts a variable in the current scope. 26 | pub fn put_var_raw( 27 | &mut self, 28 | name: String, 29 | _type: TypeRc, 30 | arity: ArityInfo, 31 | loc: Location, 32 | assigned: bool, 33 | origin: String, 34 | ) { 35 | self.variables 36 | .insert(name, (_type, arity, loc, assigned, origin)); 37 | } 38 | 39 | // put_var(&mut self, &str, ArityInfo, Span, bool) -> () 40 | // Puts a variable in the current scope. 41 | pub fn put_var( 42 | &mut self, 43 | name: &str, 44 | _type: &TypeRc, 45 | arity: ArityInfo, 46 | loc: &Location, 47 | assigned: bool, 48 | origin: &str, 49 | ) { 50 | self.variables.insert( 51 | String::from(name), 52 | ( 53 | _type.clone(), 54 | arity, 55 | loc.clone(), 56 | assigned, 57 | String::from(origin), 58 | ), 59 | ); 60 | } 61 | 62 | // get_var(&self, &str) -> Option<&(Type, ArityInfo, Location, bool, String)> 63 | // Gets a variable from the stack of scopes. 64 | pub fn get_var(&self, name: &str) -> Option<&(TypeRc, ArityInfo, Location, bool, String)> { 65 | // Set up 66 | let name = String::from(name); 67 | let mut scope = self; 68 | 69 | loop { 70 | // Return success if found 71 | if let Some(v) = scope.variables.get(&name) { 72 | return Some(v); 73 | } 74 | 75 | // Get next scope 76 | scope = match &scope.parent { 77 | Some(v) => &**v, 78 | None => break None, 79 | } 80 | } 81 | } 82 | 83 | // push_scope(&mut self, bool) -> () 84 | // Pushes a new scope to the top of the scope stack. 85 | pub fn push_scope(&mut self, new_func: bool) { 86 | use std::mem::swap; 87 | 88 | let mut scope = Scope::new(); 89 | scope.new_func = new_func; 90 | 91 | swap(&mut scope, self); 92 | self.parent = Some(Box::new(scope)); 93 | } 94 | 95 | // pop_scop(&mut self) -> () 96 | // Pops a scope from the stack if a parent scope exists. 97 | pub fn pop_scope(&mut self) { 98 | use std::mem::swap; 99 | 100 | if let Some(v) = &mut self.parent { 101 | let mut scope = Scope::new(); 102 | 103 | swap(&mut scope, v); 104 | swap(self, &mut scope); 105 | } 106 | } 107 | 108 | // is_captured(&self, &str) -> bool 109 | // Returns true if captured from a new function 110 | pub fn is_captured(&self, name: &str) -> bool { 111 | // Set up 112 | let mut scope = self; 113 | let mut last_new_func = false; 114 | let mut new_func; 115 | 116 | loop { 117 | // Global scope is not captured 118 | if scope.parent.is_none() { 119 | break false; 120 | } 121 | 122 | // Update new_func if in a new function 123 | new_func = last_new_func; 124 | if scope.new_func { 125 | last_new_func = true; 126 | } 127 | 128 | // Return success if found 129 | if scope.variables.get(name).is_some() { 130 | break new_func; 131 | } 132 | 133 | // Get next scope 134 | scope = match &scope.parent { 135 | Some(v) => &**v, 136 | None => break false, 137 | } 138 | } 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/backends.rs: -------------------------------------------------------------------------------- 1 | /// Module for the lower level intermediate representation. This module contains functions that 2 | /// generate the IR and manipulate it. 3 | pub mod ir; 4 | 5 | /// Module for aarch64 code generation. 6 | pub mod aarch64; 7 | 8 | /// Module for RISC-V code generation. 9 | pub mod riscv64; 10 | 11 | /// Module for x86_64 code generation. 12 | pub mod x86_64; 13 | 14 | /// Module for wasm64 code generation. 15 | pub mod wasm64; 16 | 17 | use std::collections::HashMap; 18 | use std::ops::Range; 19 | 20 | use ir::IrFunction; 21 | 22 | #[cfg(target_arch = "aarch64")] 23 | pub const DEFAULT_ARCH: &str = "aarch64"; 24 | #[cfg(target_arch = "riscv64")] 25 | pub const DEFAULT_ARCH: &str = "riscv64"; 26 | #[cfg(target_arch = "wasm64")] 27 | pub const DEFAULT_ARCH: &str = "wasm64"; 28 | #[cfg(target_arch = "x86_64")] 29 | pub const DEFAULT_ARCH: &str = "x86_64"; 30 | 31 | #[cfg(target_os = "linux")] 32 | pub const DEFAULT_OS: &str = "linux"; 33 | #[cfg(target_os = "macos")] 34 | pub const DEFAULT_OS: &str = "macos"; 35 | 36 | /// Represents generated code in some architecture. 37 | #[derive(Default)] 38 | pub struct GeneratedCode { 39 | func_addrs: HashMap>, 40 | func_refs: HashMap, 41 | data: Vec, 42 | } 43 | 44 | impl GeneratedCode { 45 | /// Creates a new empty generated code. 46 | pub fn new() -> GeneratedCode { 47 | GeneratedCode { 48 | func_addrs: HashMap::new(), 49 | func_refs: HashMap::new(), 50 | data: Vec::new(), 51 | } 52 | } 53 | 54 | /// Gets the length of the x86 code. 55 | pub fn len(&self) -> usize { 56 | self.data.len() 57 | } 58 | 59 | /// Returns true if the code is empty. 60 | pub fn is_empty(&self) -> bool { 61 | self.data.is_empty() 62 | } 63 | 64 | /// Returns the code as a Vec. 65 | pub fn data(&self) -> &Vec { 66 | &self.data 67 | } 68 | 69 | /// Returns the code as a mutable Vec. 70 | pub fn data_mut(&mut self) -> &mut Vec { 71 | &mut self.data 72 | } 73 | 74 | /// Returns executable code as a function. 75 | /// 76 | /// # Safety 77 | /// This function uses transmute to turn a pointer to raw bytes into a function, so use it with 78 | /// caution. 79 | pub unsafe fn get_fn( 80 | &self, 81 | func: &str, 82 | base: *const u8, 83 | ) -> Option *const u8> { 84 | if let Some(f) = self.func_addrs.get(func) { 85 | use std::mem::transmute; 86 | Some(transmute(base.add(f.start + 16))) 87 | } else { 88 | None 89 | } 90 | } 91 | 92 | /// Gets the mapping from function names to ranges in code. 93 | pub fn get_funcs(&self) -> &HashMap> { 94 | &self.func_addrs 95 | } 96 | 97 | /// Gets the mutable mapping from function names to ranges in code. 98 | pub fn get_mut_funcs(&mut self) -> &mut HashMap> { 99 | &mut self.func_addrs 100 | } 101 | 102 | /// Gets the mapping used to relocate a file. 103 | pub fn get_relocation_table(&self) -> &HashMap { 104 | &self.func_refs 105 | } 106 | 107 | /// Gets the mutable mapping used to relocate a file. 108 | pub fn get_mut_relocation_table(&mut self) -> &mut HashMap { 109 | &mut self.func_refs 110 | } 111 | } 112 | 113 | /// Performs register allocation by linear scan on an IrFunction. 114 | pub fn linear_scan(func: &mut IrFunction, register_count: usize) { 115 | let mut register_lifetimes = vec![0usize; register_count]; 116 | 117 | 'a: for ssa in func.ssas.iter_mut() { 118 | for lifetime in register_lifetimes.iter_mut() { 119 | if *lifetime > 0 { 120 | *lifetime -= 1; 121 | } 122 | } 123 | 124 | if ssa.local.is_some() { 125 | for (reg, lifetime) in register_lifetimes.iter_mut().enumerate() { 126 | if *lifetime == 0 { 127 | *lifetime = ssa.local_lifetime; 128 | ssa.local_register = reg; 129 | continue 'a; 130 | } 131 | } 132 | ssa.local_register = register_lifetimes.len(); 133 | register_lifetimes.push(ssa.local_lifetime); 134 | } 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /lib/src/unknown_arity.c: -------------------------------------------------------------------------------- 1 | #include "rc.h" 2 | #include "unknown_arity.h" 3 | 4 | // Applies a function with the given closed values and passed in arguments. 5 | void* apply_func(void* func, unsigned int* called_argc, unsigned int saved, void* closed[], unsigned int argc, void* args[]) { 6 | asm(".intel_syntax noprefix"); 7 | asm("push rdi"); 8 | asm("push rsi"); 9 | asm("push rdx"); 10 | asm("push rcx"); 11 | asm("push r8"); 12 | asm("push r9"); 13 | 14 | { 15 | void* passed_args[argc]; 16 | for (unsigned int i = 0; i < saved; i++) { 17 | passed_args[i] = closed[i]; 18 | } 19 | for (unsigned int i = saved; i < argc; i++) { 20 | passed_args[i] = args[i - saved]; 21 | } 22 | 23 | // rdi, rsi, rdx, rcx, r8, r9 24 | func = (void*) (((unsigned long long) func) + 0xf); 25 | asm("mov rax, %0" : "=r" (func)); 26 | if (argc >= 1) { 27 | asm("pop rdi"); 28 | 29 | if (argc >= 2) { 30 | asm("pop rsi"); 31 | 32 | if (argc >= 3) { 33 | asm("pop rdx"); 34 | 35 | if (argc >= 4) { 36 | asm("pop rcx"); 37 | 38 | if (argc >= 5) { 39 | asm("pop r8"); 40 | 41 | if (argc >= 6) { 42 | asm("pop r9"); 43 | } 44 | } 45 | } 46 | } 47 | } 48 | } 49 | 50 | asm("call rax"); 51 | 52 | if (argc >= 1) { 53 | asm("push rdi"); 54 | 55 | if (argc >= 2) { 56 | asm("push rsi"); 57 | 58 | if (argc >= 3) { 59 | asm("push rdx"); 60 | 61 | if (argc >= 4) { 62 | asm("push rcx"); 63 | 64 | if (argc >= 5) { 65 | asm("push r8"); 66 | 67 | if (argc >= 6) { 68 | asm("push r9"); 69 | } 70 | } 71 | } 72 | } 73 | } 74 | } 75 | } 76 | 77 | asm("pop r9"); 78 | asm("pop r8"); 79 | asm("pop rcx"); 80 | asm("pop rdx"); 81 | asm("pop rsi"); 82 | asm("pop rdi"); 83 | asm("mov %0, rax" : "=r" (func)); 84 | *called_argc -= argc - saved; 85 | return func; 86 | } 87 | 88 | // Calls a function with unknown arity. 89 | void* call_unknown_arity(void* func, unsigned int called_argc, void* args[]) { 90 | while (called_argc > 0) { 91 | if (((unsigned long long) func) & 1) { 92 | unsigned int argc = *((unsigned int*) func); 93 | 94 | if (argc <= called_argc) { 95 | func = apply_func(func, &called_argc, 0, NULL, argc, args); 96 | } else { 97 | void* array[called_argc + 1]; 98 | array[0] = func; 99 | for (unsigned int i = 1; i < called_argc + 1; i++) { 100 | array[i] = args[i]; 101 | } 102 | return rccopy(array, (called_argc + 1) * 8, (argc + 1) * 8); 103 | } 104 | } else { 105 | void** closure = (void**) func; 106 | func = *closure; 107 | unsigned int argc = *((unsigned int*) func); 108 | unsigned int saved = 0; 109 | for (unsigned int i = 1; i < argc + 1; i++) { 110 | if (closure[i] == NULL) { 111 | saved = i - 1; 112 | break; 113 | } 114 | } 115 | 116 | if (argc <= called_argc + saved) { 117 | func = apply_func(func, &called_argc, saved, closure + 1, argc, args); 118 | } else { 119 | if (!has_one_reference(closure)) { 120 | closure = rccopy(closure, (saved + 1) * 8, (argc + 1) * 8); 121 | } 122 | 123 | for (unsigned int i = saved + 1; i < saved + called_argc + 1; i++) { 124 | closure[i] = args[i - saved - 1]; 125 | rcinc(closure[i]); 126 | } 127 | 128 | return closure; 129 | } 130 | } 131 | } 132 | 133 | return func; 134 | } 135 | 136 | -------------------------------------------------------------------------------- /lib/src/rc.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "rc.h" 4 | 5 | #define PAGE_SIZE 4096 6 | 7 | struct s_rcalloc_header { 8 | void* next; 9 | size_t size; 10 | size_t rc; 11 | }; 12 | 13 | struct s_rcalloc_header* start = NULL; 14 | 15 | // Allocates something on the heap with a reference count of 1 16 | void* rcalloc(size_t size) { 17 | // NULL if size is 0 18 | if (size == 0) { 19 | return NULL; 20 | } 21 | 22 | // Create initial part of heap 23 | if (start == NULL) { 24 | // Get mmapped pointer 25 | start = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ANON, -1, 0); 26 | 27 | // Error 28 | if (start == (void*) -1) 29 | return NULL; 30 | 31 | // Set metadata 32 | start->next = NULL; 33 | start->size = PAGE_SIZE - sizeof(struct s_rcalloc_header); 34 | start->rc = 0; 35 | } 36 | 37 | // Get next free pointer with enough space if available 38 | struct s_rcalloc_header* p = start; 39 | struct s_rcalloc_header* last = start; 40 | while (p != NULL) { 41 | // Free pointer of appropriate size found 42 | if (!p->rc && p->size >= size) { 43 | // Shrink the pointer if it's sufficiently big 44 | if (p->size >= size * 2 + sizeof(struct s_rcalloc_header)) { 45 | struct s_rcalloc_header* q = (struct s_rcalloc_header*) (((void*) (p + 1)) + size); 46 | q->next = p->next; 47 | q->size = p->size - size - sizeof(struct s_rcalloc_header); 48 | q->rc = 0; 49 | p->next = q; 50 | p->size = size; 51 | } 52 | 53 | // Mark as used and return 54 | p->rc = 1; 55 | return (void*) (p + 1); 56 | } 57 | 58 | // Get next pointer 59 | last = p; 60 | p = p->next; 61 | } 62 | 63 | // Get new mmapped pointer 64 | p = mmap(NULL, size > PAGE_SIZE - sizeof(struct s_rcalloc_header) ? size + sizeof(struct s_rcalloc_header) : PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ANON, -1, 0); 65 | if (p == (void*) -1) 66 | return NULL; 67 | last->next = p; 68 | 69 | // Shrink if too big 70 | if (p->size >= size * 2 + sizeof(struct s_rcalloc_header)) { 71 | struct s_rcalloc_header* q = (struct s_rcalloc_header*) (((void*) (p + 1)) + size); 72 | q->next = p->next; 73 | q->size = p->size - size - sizeof(struct s_rcalloc_header); 74 | q->rc = 0; 75 | p->next = q; 76 | p->size = size; 77 | } 78 | 79 | return (void*) (p + 1); 80 | } 81 | 82 | // Copies a pointer with a given size onto the heap with a reference count of 1. 83 | void* rccopy(void* ptr, size_t len, size_t size) { 84 | if (ptr == ((void*) 0)) 85 | return ptr; 86 | void* alloced = rcalloc(size); 87 | if (alloced == ((void*) 0)) 88 | return alloced; 89 | 90 | for (size_t i = 0; i < len; i++) { 91 | ((char*) alloced)[i] = ((char*) ptr)[i]; 92 | } 93 | 94 | for (size_t i = len; i < size; i++) { 95 | ((char*) alloced)[i] = ((char*) ptr)[i]; 96 | } 97 | 98 | return alloced; 99 | } 100 | 101 | // Increments the reference count. 102 | inline void rcinc(void* ptr) { 103 | struct s_rcalloc_header* header = ptr; 104 | header--; 105 | header->rc++; 106 | } 107 | 108 | // Returns true if there is only one reference to the pointer. 109 | bool has_one_reference(void* ptr) { 110 | struct s_rcalloc_header* header = ptr; 111 | header--; 112 | return header->rc == 1; 113 | } 114 | 115 | // Decrement the reference count. 116 | void rcfree(void* ptr) { 117 | struct s_rcalloc_header* header = ptr; 118 | header--; 119 | 120 | if (header->rc) 121 | header->rc--; 122 | } 123 | 124 | // Frees a reference counted closure structure. 125 | void rcfuncfree(void* ptr) { 126 | if (((unsigned long long) ptr) & 1) 127 | return; 128 | 129 | struct s_rcalloc_header* header = ptr; 130 | header--; 131 | 132 | if (header->rc) { 133 | if (header->rc == 1) { 134 | unsigned long long* closure = ptr; 135 | unsigned int* func = (unsigned int*) closure[0]; 136 | unsigned int argc = *func; 137 | for (unsigned int i = 1; i < argc + 1; i++) { 138 | if (closure[i] == 0) 139 | break; 140 | rcfuncfree((void*) closure[i]); 141 | } 142 | } 143 | 144 | header->rc--; 145 | } else { 146 | *((volatile char*) 0) = 69; 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /src/frontend/correctness.rs: -------------------------------------------------------------------------------- 1 | use logos::Span; 2 | use std::collections::HashMap; 3 | use std::sync::Arc; 4 | 5 | use super::ir::{ArityInfo, Ir, IrFunction, IrModule, Location, SExpr, SExprMetadata}; 6 | use super::types::{arc, Type}; 7 | 8 | pub enum CorrectnessError {} 9 | 10 | fn check_sexpr( 11 | parent_func: &mut IrFunction, 12 | sexpr: &mut SExpr, 13 | module: &mut IrModule, 14 | errors: &mut Vec, 15 | ) { 16 | match sexpr { 17 | SExpr::Empty(_) => todo!(), 18 | 19 | SExpr::TypeAlias(_, _) => todo!(), 20 | 21 | SExpr::Symbol(m, s) => { 22 | if let Some((_type, arity, _, _, _)) = module.scope.get_var(s) { 23 | m._type = _type.clone(); 24 | m.arity = *arity; 25 | if module.scope.is_captured(s) && !parent_func.captured_names.contains(s) { 26 | parent_func.captured_names.push(s.clone()); 27 | parent_func.captured.insert(s.clone(), _type.clone()); 28 | } 29 | } else if let Some(func) = module.globals.get(s) { 30 | *sexpr = SExpr::Function(m.clone(), func.clone()); 31 | check_sexpr(parent_func, sexpr, module, errors); 32 | } else { 33 | panic!("variable {} not found", s); 34 | } 35 | } 36 | 37 | SExpr::Function(m, f) => { 38 | if let Some(func) = module.funcs.get(f) { 39 | if func.checked { 40 | m._type = func._type.clone(); 41 | m.arity = ArityInfo::Known(func.args.len()); 42 | } else { 43 | let mut func = module.funcs.remove(f).unwrap(); 44 | module.scope.push_scope(true); 45 | 46 | for arg in func.args.iter() { 47 | module.scope.put_var( 48 | &arg.0, 49 | &arg.1, 50 | ArityInfo::Unknown, 51 | &Location::empty(), 52 | true, 53 | &module.name, 54 | ); 55 | } 56 | 57 | use std::mem::swap; 58 | let mut body = SExpr::Empty(SExprMetadata::empty()); 59 | swap(&mut func.body, &mut body); 60 | check_sexpr(&mut func, &mut body, module, errors); 61 | swap(&mut func.body, &mut body); 62 | 63 | module.scope.pop_scope(); 64 | 65 | let mut _type = func.body.get_metadata()._type.clone(); 66 | for arg in func.args.iter().rev() { 67 | _type = arc::new(Type::Func(arg.1.clone(), _type)); 68 | } 69 | 70 | func._type = _type; 71 | m._type = func._type.clone(); 72 | m.arity = ArityInfo::Known(func.args.len()); 73 | 74 | func.checked = true; 75 | module.funcs.insert(f.clone(), func); 76 | } 77 | } else { 78 | panic!("this shouldn't happen i believe"); 79 | } 80 | } 81 | 82 | SExpr::ExternalFunc(_, _, _) => todo!(), 83 | 84 | SExpr::Chain(_, _, _) => todo!(), 85 | 86 | SExpr::Application(m, func, args) => { 87 | check_sexpr(parent_func, func, module, errors); 88 | for arg in args.iter_mut() { 89 | check_sexpr(parent_func, arg, module, errors); 90 | } 91 | 92 | let mut ft = func.get_metadata()._type.clone(); 93 | let mut generics_map = HashMap::new(); 94 | 95 | use std::mem::swap; 96 | let mut args_temp = vec![]; 97 | swap(&mut args_temp, args); 98 | let mut arity = func.get_metadata().arity; 99 | let last_index = args_temp.len(); 100 | for (i, arg) in args_temp.into_iter().enumerate() { 101 | if let Type::Func(at, rt) = &*ft { 102 | if arg 103 | .get_metadata() 104 | ._type 105 | .is_subtype(at, &module.types, &mut generics_map) 106 | { 107 | m._type = rt.clone(); 108 | ft = rt.clone(); 109 | Arc::make_mut(&mut m._type).replace_generics(&generics_map); 110 | } else { 111 | panic!("{} is not a subtype of {}", arg.get_metadata()._type, at); 112 | } 113 | 114 | args.push(arg); 115 | 116 | arity = match arity { 117 | ArityInfo::Known(v) if v > 0 => ArityInfo::Known(v - 1), 118 | ArityInfo::Known(_) => ArityInfo::Unknown, 119 | ArityInfo::Unknown => ArityInfo::Unknown, 120 | }; 121 | 122 | if i != last_index - 1 && matches!(arity, ArityInfo::Known(0)) { 123 | let mut temp = vec![]; 124 | swap(&mut temp, args); 125 | **func = SExpr::Application( 126 | SExprMetadata { 127 | loc: Location::new( 128 | Span { 129 | start: m.loc.span.start, 130 | end: temp.last().unwrap().get_metadata().loc.span.end, 131 | }, 132 | &m.loc.filename, 133 | ), 134 | loc2: Location::empty(), 135 | origin: m.origin.clone(), 136 | _type: { 137 | let mut ft = ft.clone(); 138 | Arc::make_mut(&mut ft).replace_generics(&generics_map); 139 | ft 140 | }, 141 | arity, 142 | tailrec: false, 143 | impure: false, 144 | }, 145 | func.clone(), 146 | temp, 147 | ); 148 | } 149 | } else { 150 | panic!("type {} is not a function", func.get_metadata()._type); 151 | } 152 | } 153 | 154 | m._type = ft; 155 | m.arity = arity; 156 | Arc::make_mut(&mut m._type).replace_generics(&generics_map); 157 | } 158 | 159 | SExpr::Assign(m, a, v) => { 160 | check_sexpr(parent_func, v, module, errors); 161 | m._type = v.get_metadata()._type.clone(); 162 | m.arity = v.get_metadata().arity; 163 | module 164 | .scope 165 | .put_var(a, &m._type, m.arity, &m.loc, true, &module.name); 166 | } 167 | 168 | SExpr::With(_, _, _) => todo!(), 169 | 170 | SExpr::Match(_, _, _) => todo!(), 171 | } 172 | } 173 | 174 | pub fn check_correctness(ir: &mut Ir, _require_main: bool) -> Result<(), Vec> { 175 | let mut errors = vec![]; 176 | 177 | for (_, module) in ir.modules.iter_mut() { 178 | let globals = module.globals.clone(); 179 | for (_, raw) in globals { 180 | use std::mem::swap; 181 | 182 | let mut func = module.funcs.remove(&raw).unwrap(); 183 | if func.checked { 184 | module.funcs.insert(raw, func); 185 | continue; 186 | } 187 | 188 | module.scope.push_scope(true); 189 | for arg in func.args.iter() { 190 | module.scope.put_var( 191 | &arg.0, 192 | &arg.1, 193 | ArityInfo::Unknown, 194 | &Location::empty(), 195 | true, 196 | "", 197 | ); 198 | } 199 | 200 | let mut body = SExpr::Empty(SExprMetadata::empty()); 201 | swap(&mut func.body, &mut body); 202 | check_sexpr(&mut func, &mut body, module, &mut errors); 203 | swap(&mut func.body, &mut body); 204 | 205 | let mut _type = func.body.get_metadata()._type.clone(); 206 | for arg in func.args.iter().rev() { 207 | _type = arc::new(Type::Func(arg.1.clone(), _type)); 208 | } 209 | 210 | func._type = _type; 211 | 212 | module.scope.pop_scope(); 213 | 214 | module.funcs.insert(raw, func); 215 | } 216 | } 217 | 218 | if errors.is_empty() { 219 | Ok(()) 220 | } else { 221 | Err(errors) 222 | } 223 | } 224 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // #![deny(missing_docs)] 2 | 3 | /// Module that contains helper functions for transforming the higher level intermediate 4 | /// representation into machine code. This includes functions for lowering the IR, functions for 5 | /// manipulating the lower level IR, functions for manipulating code structures, and functions for 6 | /// emitting code. 7 | pub mod backends; 8 | 9 | /// Module that contains helper functions transforming the source text into higher level 10 | /// intermediate representation. This includes functions for parsing, functions for transforming 11 | /// the text into IR, and functions for checking the correctness of IR. 12 | pub mod frontend; 13 | 14 | use codespan_reporting::diagnostic::{Diagnostic, Label}; 15 | use codespan_reporting::files::SimpleFiles; 16 | use codespan_reporting::term; 17 | use codespan_reporting::term::termcolor::{ColorChoice, StandardStream}; 18 | use logos::Span; 19 | use std::collections::HashMap; 20 | 21 | use crate::frontend::ir::{self, Ir, IrError}; 22 | use crate::frontend::parser; 23 | 24 | /// Determines whether the compiler should output debug information or not. 25 | static DEBUG: bool = false; 26 | 27 | /// The return type of check<>(). 28 | pub type Res<'a> = Result< 29 | (Vec>, SimpleFiles<&'a String, String>), 30 | (Vec>, SimpleFiles<&'a String, String>), 31 | >; 32 | 33 | /// Checks whether given code is valid. 34 | pub fn check<'a>( 35 | filenames: &'a [(String, bool)], 36 | codes: &[String], 37 | ir: &mut Ir, 38 | _require_main: bool, 39 | emit: bool, 40 | ) -> Res<'a> { 41 | // Set up codespan 42 | let mut files = SimpleFiles::new(); 43 | let mut file_hash = HashMap::new(); 44 | for file in filenames.iter().enumerate() { 45 | file_hash.insert(&file.1 .0, files.add(&file.1 .0, codes[file.0].clone())); 46 | } 47 | let file_hash = file_hash; 48 | 49 | let writer = StandardStream::stderr(ColorChoice::Auto); 50 | let config = term::Config::default(); 51 | let mut diagnostics = Vec::new(); 52 | let mut fail = false; 53 | 54 | for (file, code) in filenames.iter().zip(codes.iter()) { 55 | let file_id = *file_hash.get(&file.0).unwrap(); 56 | 57 | if let Some(start) = code.find("uwu") { 58 | let loc = Span { 59 | start, 60 | end: start + 3, 61 | }; 62 | let diagnostic = Diagnostic::note() 63 | .with_message("owo") 64 | .with_labels(vec![Label::primary(file_id, loc).with_message("nya")]); 65 | if emit { 66 | term::emit(&mut writer.lock(), &config, &files, &diagnostic).unwrap(); 67 | } 68 | diagnostics.push(diagnostic); 69 | } 70 | 71 | // Generate the ast 72 | if file.1 { 73 | // TODO: remove this condition 74 | todo!("this should never be available"); 75 | } else { 76 | let ast = match parser::parse(code) { 77 | Ok(v) => v, 78 | Err(e) => { 79 | let diagnostic = Diagnostic::error() 80 | .with_message(&e.msg) 81 | .with_labels(vec![Label::primary(file_id, e.span)]); 82 | if emit { 83 | term::emit(&mut writer.lock(), &config, &files, &diagnostic).unwrap(); 84 | } 85 | diagnostics.push(diagnostic); 86 | return Err((diagnostics, files)); 87 | } 88 | }; 89 | 90 | // Print out the ast 91 | if DEBUG { 92 | println!("{:#?}", &ast); 93 | } 94 | match ir::convert_ast_to_ir(&file.0, code, ast, ir) { 95 | Ok(_) if DEBUG => { 96 | dbg!(&ir); 97 | } 98 | Ok(_) => (), 99 | Err(e) => { 100 | for e in e { 101 | let mut diagnostic = Diagnostic::error(); 102 | match e { 103 | IrError::InvalidType(s) => { 104 | diagnostic = diagnostic 105 | .with_message("Invalid type used") 106 | .with_labels(vec![Label::primary( 107 | *file_hash.get(&s.filename).unwrap(), 108 | s.span, 109 | ) 110 | .with_message("Undeclared type")]) 111 | } 112 | 113 | IrError::DuplicateTypeInUnion(s1, s2, t) => { 114 | diagnostic = diagnostic 115 | .with_message("Duplicate type in union type declaration") 116 | .with_labels(vec![ 117 | Label::secondary( 118 | *file_hash.get(&s1.filename).unwrap(), 119 | s1.span, 120 | ) 121 | .with_message("Type used here first"), 122 | Label::primary( 123 | *file_hash.get(&s2.filename).unwrap(), 124 | s2.span, 125 | ) 126 | .with_message( 127 | format!("Type `{}` used a second time here", t), 128 | ), 129 | ]) 130 | } 131 | 132 | IrError::DoubleExport(s1, s2, e) => { 133 | diagnostic = diagnostic 134 | .with_message("Value exported twice") 135 | .with_labels(vec![ 136 | Label::secondary( 137 | *file_hash.get(&s1.filename).unwrap(), 138 | s1.span, 139 | ) 140 | .with_message("Value exported here first"), 141 | Label::primary( 142 | *file_hash.get(&s2.filename).unwrap(), 143 | s2.span, 144 | ) 145 | .with_message( 146 | format!("Value {} exported a second time here", e), 147 | ), 148 | ]) 149 | } 150 | 151 | IrError::RedefineImportAlias(s1, s2, a) => { 152 | diagnostic = diagnostic 153 | .with_message("Alias defined twice") 154 | .with_labels(vec![ 155 | Label::secondary( 156 | *file_hash.get(&s1.filename).unwrap(), 157 | s1.span, 158 | ) 159 | .with_message("Alias defined here first"), 160 | Label::primary( 161 | *file_hash.get(&s2.filename).unwrap(), 162 | s2.span, 163 | ) 164 | .with_message( 165 | format!("Alias {} defined a second time here", a), 166 | ), 167 | ]) 168 | } 169 | 170 | IrError::UnsupportedAnnotation(s, a) => { 171 | diagnostic = diagnostic 172 | .with_message("Unsupported annotation used") 173 | .with_labels(vec![Label::primary( 174 | *file_hash.get(&s.filename).unwrap(), 175 | s.span, 176 | ) 177 | .with_message(format!("Annotation {} is unsupported", a))]) 178 | } 179 | 180 | IrError::InvalidFFIType(s, t) => { 181 | diagnostic = diagnostic 182 | .with_message("Unsupported type used for FFI") 183 | .with_labels(vec![Label::primary( 184 | *file_hash.get(&s.filename).unwrap(), 185 | s.span, 186 | ) 187 | .with_message(format!("Type {} is unsupported by FFI", t))]) 188 | } 189 | 190 | IrError::DuplicateModule(v, _t) => { 191 | diagnostic = 192 | diagnostic.with_message(format!("Duplicate module `{}`", v)) 193 | } 194 | } 195 | if emit { 196 | term::emit(&mut writer.lock(), &config, &files, &diagnostic).unwrap(); 197 | } 198 | diagnostics.push(diagnostic); 199 | fail = true; 200 | } 201 | } 202 | } 203 | } 204 | } 205 | 206 | if fail { 207 | Err((diagnostics, files)) 208 | } else { 209 | Ok((diagnostics, files)) 210 | } 211 | } 212 | -------------------------------------------------------------------------------- /src/backends/ir.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use std::fmt::Display; 3 | 4 | use super::super::frontend::ir::{self, ArityInfo, SExpr, SExprMetadata}; 5 | 6 | /// An instruction in the low level intermediate representation. 7 | #[derive(Copy, Clone)] 8 | pub enum IrInstruction { 9 | /// Returns an optional parameter from a function. 10 | Ret, 11 | 12 | /// Loads a function or argument parameter into a local. 13 | Load, 14 | 15 | /// Applies a list of arguments to a function pointer or closure struct to form a new closure 16 | /// struct. If passed in a closure struct, it allocates a new closure struct if the passed in 17 | /// closure struct has a reference count greater than 1. 18 | Apply, 19 | 20 | /// Calls a function, function pointer, or closure struct and passes the return value into a 21 | /// new local value. True if the arity is known at compile time, false otherwise. 22 | Call(bool), 23 | 24 | /// Increments the reference counter for a closure struct. 25 | RcInc, 26 | 27 | /// Decrements the reference counter for a closure struct and deallocates and decrements child 28 | /// nodes if the reference counter reaches 0. 29 | RcFuncFree, 30 | } 31 | 32 | impl Display for IrInstruction { 33 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 34 | use IrInstruction::*; 35 | match self { 36 | Ret => write!(f, "ret"), 37 | Load => write!(f, "load"), 38 | Apply => write!(f, "apply"), 39 | Call(true) => write!(f, "call"), 40 | Call(false) => write!(f, "call?"), 41 | RcInc => write!(f, "rcinc"), 42 | RcFuncFree => write!(f, "rcfuncfree"), 43 | } 44 | } 45 | } 46 | 47 | /// An argument passed into an instruction in the low level intermediate representation. 48 | #[derive(Clone, PartialEq, Eq, Hash)] 49 | pub enum IrArgument { 50 | /// A local value. 51 | Local(usize), 52 | 53 | /// An argument passed into the function that contains the instruction. Closed values are also 54 | /// considered arguments. 55 | Argument(usize), 56 | 57 | /// A function address. 58 | Function(String), 59 | } 60 | 61 | impl Display for IrArgument { 62 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 63 | use IrArgument::*; 64 | match self { 65 | Local(l) => write!(f, "%{}", l), 66 | Argument(a) => write!(f, "${}", a), 67 | Function(g) => write!(f, "@{}", g), 68 | } 69 | } 70 | } 71 | 72 | /// Represents a single instruction in the lower level intermediate representation. 73 | pub struct IrSsa { 74 | /// The local value the instruction is assigned to. 75 | pub local: Option, 76 | 77 | /// The lifetime of the local assigned in this statement. 78 | pub local_lifetime: usize, 79 | 80 | /// The register the local assigned to in this instruction is allocated in. 81 | pub local_register: usize, 82 | 83 | /// The instruction (ie opcode) being executed in this instruction. 84 | pub instr: IrInstruction, 85 | 86 | /// The arguments passed into the instruction. 87 | pub args: Vec, 88 | } 89 | 90 | impl Display for IrSsa { 91 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 92 | if let Some(l) = self.local { 93 | write!(f, "%{} = ", l)?; 94 | } 95 | 96 | write!(f, "{}", self.instr)?; 97 | for a in self.args.iter() { 98 | write!(f, " {}", a)?; 99 | } 100 | Ok(()) 101 | } 102 | } 103 | 104 | /// A function in the lower level intermediate representation. 105 | pub struct IrFunction { 106 | /// The name of the function. 107 | pub name: String, 108 | 109 | /// The number of arguments (including closed over values) that the function takes in. 110 | pub argc: usize, 111 | 112 | /// The list of all SSAs associated with this function. 113 | /// TODO: Replace with basic blocks. 114 | pub ssas: Vec, 115 | } 116 | 117 | impl Display for IrFunction { 118 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 119 | write!(f, "{}({}):", self.name, self.argc)?; 120 | for ssa in self.ssas.iter() { 121 | write!(f, "\n {}", ssa)?; 122 | } 123 | Ok(()) 124 | } 125 | } 126 | 127 | impl IrFunction { 128 | fn get_last_local(&self) -> Option { 129 | for ssa in self.ssas.iter().rev() { 130 | if let Some(l) = ssa.local { 131 | return Some(l); 132 | } 133 | } 134 | None 135 | } 136 | 137 | fn get_next_local(&self) -> usize { 138 | for ssa in self.ssas.iter().rev() { 139 | if let Some(l) = ssa.local { 140 | return l + 1; 141 | } 142 | } 143 | 0 144 | } 145 | } 146 | 147 | /// A module in lower level intermediate representation. 148 | /// TODO: Have a higher level data structure that represents the list of all modules in the code. 149 | pub struct IrModule { 150 | /// The list of all functions in the module. 151 | pub funcs: Vec, 152 | } 153 | 154 | impl Display for IrModule { 155 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 156 | for func in self.funcs.iter() { 157 | write!(f, "{}\n\n", func)?; 158 | } 159 | Ok(()) 160 | } 161 | } 162 | 163 | fn get_arg_if_applicable<'a>( 164 | args_map: &HashMap, 165 | sexpr: &'a SExpr, 166 | map: &HashMap>, 167 | ) -> Result { 168 | match sexpr { 169 | SExpr::Symbol(_, s) => { 170 | if let Some(a) = args_map.get(s) { 171 | Ok(IrArgument::Argument(*a)) 172 | } else { 173 | todo!("symbols that aren't arguments"); 174 | } 175 | } 176 | 177 | SExpr::Function(_, f) if map.get(f).unwrap().is_empty() => { 178 | Ok(IrArgument::Function(f.clone())) 179 | } 180 | 181 | _ => Err(sexpr), 182 | } 183 | } 184 | 185 | fn conversion_helper( 186 | args_map: &HashMap, 187 | func: &mut IrFunction, 188 | sexpr: &SExpr, 189 | map: &HashMap>, 190 | ) -> Option { 191 | match get_arg_if_applicable(args_map, sexpr, map) { 192 | Ok(v) => { 193 | let local = Some(func.get_next_local()); 194 | func.ssas.push(IrSsa { 195 | local, 196 | local_lifetime: 0, 197 | local_register: 0, 198 | instr: IrInstruction::Load, 199 | args: vec![v], 200 | }); 201 | local 202 | } 203 | 204 | Err(SExpr::Empty(_)) => todo!(), 205 | Err(SExpr::TypeAlias(_, _)) => todo!(), 206 | 207 | Err(SExpr::ExternalFunc(_, _, _)) => todo!(), 208 | Err(SExpr::Chain(_, _, _)) => todo!(), 209 | 210 | Err(SExpr::Function(_, f)) => { 211 | use std::iter::once; 212 | let local = Some(func.get_next_local()); 213 | let args = map.get(f).unwrap().iter().map(|v| { 214 | get_arg_if_applicable( 215 | args_map, 216 | &SExpr::Symbol(SExprMetadata::empty(), v.clone()), 217 | map, 218 | ) 219 | .unwrap() 220 | }); 221 | func.ssas.push(IrSsa { 222 | local, 223 | local_lifetime: 0, 224 | local_register: 0, 225 | instr: IrInstruction::Apply, 226 | args: once(IrArgument::Function(f.clone())).chain(args).collect(), 227 | }); 228 | local 229 | } 230 | 231 | Err(SExpr::Application(m, f, a)) => { 232 | let f = match get_arg_if_applicable(args_map, &**f, map) { 233 | Ok(v) => v, 234 | Err(e) => IrArgument::Local(conversion_helper(args_map, func, e, map).unwrap()), 235 | }; 236 | 237 | let args: Vec<_> = a 238 | .iter() 239 | .map(|a| match get_arg_if_applicable(args_map, a, map) { 240 | Ok(v) => v, 241 | Err(e) => IrArgument::Local(conversion_helper(args_map, func, e, map).unwrap()), 242 | }) 243 | .collect(); 244 | 245 | use std::iter::once; 246 | let local = Some(func.get_next_local()); 247 | if matches!(m.arity, ArityInfo::Known(v) if v != 0) { 248 | func.ssas.push(IrSsa { 249 | local, 250 | local_lifetime: 0, 251 | local_register: 0, 252 | instr: IrInstruction::Apply, 253 | args: once(f).chain(args.into_iter()).collect(), 254 | }); 255 | } else { 256 | func.ssas.push(IrSsa { 257 | local, 258 | local_lifetime: 0, 259 | local_register: 0, 260 | instr: IrInstruction::Call(matches!(m.arity, ArityInfo::Known(_))), 261 | args: once(f).chain(args.into_iter()).collect(), 262 | }); 263 | } 264 | 265 | local 266 | } 267 | 268 | Err(SExpr::Assign(_, _, _)) => todo!(), 269 | Err(SExpr::With(_, _, _)) => todo!(), 270 | Err(SExpr::Match(_, _, _)) => todo!(), 271 | 272 | Err(SExpr::Symbol(_, _)) => unreachable!(), 273 | } 274 | } 275 | 276 | fn calculate_lifetimes(func: &mut IrFunction) { 277 | let mut iter = func.ssas.iter_mut(); 278 | let mut i = 0; 279 | while let Some(ssa) = iter.next() { 280 | if ssa.local.is_none() { 281 | continue; 282 | } 283 | let local = ssa.local.unwrap(); 284 | 285 | let mut j = i + 1; 286 | for next in iter.as_slice() { 287 | for arg in next.args.iter() { 288 | if let IrArgument::Local(l) = arg { 289 | if *l == local { 290 | ssa.local_lifetime = j - i; 291 | break; 292 | } 293 | } 294 | } 295 | 296 | j += 1; 297 | } 298 | 299 | i += 1; 300 | } 301 | } 302 | 303 | fn insert_rc_instructions(func: &mut IrFunction) { 304 | let mut i = 0; 305 | let mut local_lifetimes: HashMap = HashMap::new(); 306 | while let Some(mut ssa) = func.ssas.get(i) { 307 | if let IrInstruction::Apply = ssa.instr { 308 | let mut inserts = vec![]; 309 | for arg in ssa.args.iter().skip(1) { 310 | if !matches!(arg, IrArgument::Function(_)) { 311 | inserts.push(IrSsa { 312 | local: None, 313 | local_lifetime: 0, 314 | local_register: 0, 315 | instr: IrInstruction::RcInc, 316 | args: vec![arg.clone()], 317 | }); 318 | } 319 | } 320 | 321 | for insert in inserts { 322 | func.ssas.insert(i, insert); 323 | i += 1; 324 | } 325 | 326 | ssa = func.ssas.get(i).unwrap(); 327 | if let Some(local) = ssa.local { 328 | local_lifetimes.insert(IrArgument::Local(local), ssa.local_lifetime + 1); 329 | } 330 | } else if let IrInstruction::Call(_) = ssa.instr { 331 | if let Some(local) = ssa.local { 332 | local_lifetimes.insert(IrArgument::Local(local), ssa.local_lifetime + 1); 333 | } 334 | } 335 | 336 | if let IrInstruction::Call(false) = ssa.instr { 337 | let mut befores = vec![]; 338 | let mut afters = vec![]; 339 | for arg in ssa.args.iter().skip(1) { 340 | if !matches!(arg, IrArgument::Function(_)) { 341 | befores.push(IrSsa { 342 | local: None, 343 | local_lifetime: 0, 344 | local_register: 0, 345 | instr: IrInstruction::RcInc, 346 | args: vec![arg.clone()], 347 | }); 348 | afters.push(IrSsa { 349 | local: None, 350 | local_lifetime: 0, 351 | local_register: 0, 352 | instr: IrInstruction::RcFuncFree, 353 | args: vec![arg.clone()], 354 | }); 355 | } 356 | } 357 | 358 | let i_inc = afters.len(); 359 | for (before, after) in befores.into_iter().zip(afters.into_iter()) { 360 | func.ssas.insert(i, before); 361 | i += 1; 362 | func.ssas.insert(i + 1, after); 363 | } 364 | i += i_inc; 365 | } 366 | 367 | for local in local_lifetimes.keys().cloned().collect::>() { 368 | if i == func.ssas.len() - 1 { 369 | break; 370 | } 371 | 372 | let lifetime = local_lifetimes.get_mut(&local).unwrap(); 373 | *lifetime -= 1; 374 | if *lifetime == 0 { 375 | local_lifetimes.remove(&local); 376 | func.ssas.insert( 377 | i + 1, 378 | IrSsa { 379 | local: None, 380 | local_lifetime: 0, 381 | local_register: 0, 382 | instr: IrInstruction::RcFuncFree, 383 | args: vec![local], 384 | }, 385 | ); 386 | i += 1; 387 | } 388 | } 389 | 390 | i += 1; 391 | } 392 | } 393 | 394 | /// Converts the frontend IR language to the backend IR language. 395 | pub fn convert_frontend_ir_to_backend_ir(module: &ir::IrModule) -> IrModule { 396 | let mut new = IrModule { funcs: vec![] }; 397 | 398 | let map: HashMap<_, _> = module 399 | .funcs 400 | .iter() 401 | .map(|v| (v.0.clone(), v.1.captured_names.clone())) 402 | .collect(); 403 | for func in module.funcs.iter() { 404 | let mut f = IrFunction { 405 | name: func.1.name.clone(), 406 | argc: func.1.args.len() + func.1.captured.len(), 407 | ssas: vec![], 408 | }; 409 | let args_map: HashMap = func 410 | .1 411 | .captured_names 412 | .iter() 413 | .cloned() 414 | .enumerate() 415 | .chain(func.1.args.iter().map(|v| v.0.clone()).enumerate()) 416 | .map(|v| (v.1, v.0)) 417 | .collect(); 418 | 419 | conversion_helper(&args_map, &mut f, &func.1.body, &map); 420 | f.ssas.push(IrSsa { 421 | local: None, 422 | local_lifetime: 0, 423 | local_register: 0, 424 | instr: IrInstruction::Ret, 425 | args: if let Some(l) = f.get_last_local() { 426 | vec![IrArgument::Local(l)] 427 | } else { 428 | vec![] 429 | }, 430 | }); 431 | 432 | calculate_lifetimes(&mut f); 433 | insert_rc_instructions(&mut f); 434 | 435 | new.funcs.push(f); 436 | } 437 | 438 | new 439 | } 440 | -------------------------------------------------------------------------------- /src/frontend/types.rs: -------------------------------------------------------------------------------- 1 | use logos::Span; 2 | use std::collections::hash_map::DefaultHasher; 3 | use std::collections::{HashMap, HashSet}; 4 | use std::fmt::{Display, Error, Formatter}; 5 | use std::hash::{Hash, Hasher}; 6 | use std::sync::Arc; 7 | 8 | use super::ir::Location; 9 | use super::parser::Ast; 10 | 11 | #[derive(Clone, Debug)] 12 | pub struct HashSetWrapper(pub HashSet); 13 | 14 | impl PartialEq for HashSetWrapper { 15 | fn eq(&self, other: &HashSetWrapper) -> bool { 16 | self.0 == other.0 17 | } 18 | } 19 | 20 | impl Eq for HashSetWrapper {} 21 | 22 | impl Hash for HashSetWrapper { 23 | fn hash(&self, h: &mut H) { 24 | let mut hash: u64 = 0; 25 | for v in self.0.iter() { 26 | let mut h_ = DefaultHasher::new(); 27 | v.hash(&mut h_); 28 | hash ^= h_.finish(); 29 | } 30 | hash.hash(h); 31 | } 32 | } 33 | 34 | pub type TypeRc = Arc; 35 | 36 | // Represents a type. 37 | #[derive(Debug, Clone, Hash, PartialEq, Eq)] 38 | pub enum Type { 39 | Error, 40 | UndeclaredTypeError(Location), 41 | DuplicateTypeError(Location, Location, TypeRc), 42 | Unknown, 43 | Int, 44 | Float, 45 | Bool, 46 | Word, 47 | Char, 48 | Symbol(String), 49 | Generic(String, usize), 50 | Func(TypeRc, TypeRc), 51 | Union(HashSetWrapper), 52 | } 53 | 54 | impl Display for Type { 55 | fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { 56 | match self { 57 | // Errors 58 | Type::Error => { 59 | write!(f, "{{ unknown }}")?; 60 | } 61 | Type::UndeclaredTypeError(_) => { 62 | write!(f, "UndeclaredTypeError")?; 63 | } 64 | Type::DuplicateTypeError(_, _, _) => { 65 | write!(f, "DuplicateTypeError")?; 66 | } 67 | Type::Unknown => { 68 | write!(f, "{{ unknown }}")?; 69 | } 70 | 71 | // Primitives 72 | Type::Int => { 73 | write!(f, "Int")?; 74 | } 75 | Type::Float => { 76 | write!(f, "Float")?; 77 | } 78 | Type::Bool => { 79 | write!(f, "Bool")?; 80 | } 81 | Type::Word => { 82 | write!(f, "Word")?; 83 | } 84 | Type::Char => { 85 | write!(f, "Char")?; 86 | } 87 | Type::Symbol(s) => { 88 | write!(f, "{}", s)?; 89 | } 90 | Type::Generic(g, uid) => { 91 | write!(f, "'{}${}", g, uid)?; 92 | } 93 | 94 | // Function types 95 | Type::Func(arg, ret) => { 96 | if let Type::Func(_, _) = **arg { 97 | write!(f, "({})", **arg)?; 98 | } else { 99 | write!(f, "{}", **arg)?; 100 | } 101 | write!(f, " -> {}", ret)?; 102 | } 103 | 104 | // Union types 105 | Type::Union(fields) => { 106 | let mut bar = false; 107 | for field in fields.0.iter() { 108 | if bar { 109 | write!(f, " | ")?; 110 | } else { 111 | bar = true; 112 | } 113 | 114 | if let Type::Func(_, _) = **field { 115 | write!(f, "({})", field)?; 116 | } else { 117 | write!(f, "{}", field)?; 118 | } 119 | } 120 | } 121 | } 122 | Ok(()) 123 | } 124 | } 125 | 126 | #[derive(Clone, Hash, Eq, PartialEq)] 127 | pub struct GenericPair { 128 | generic: String, 129 | uid: usize, 130 | } 131 | 132 | impl Type { 133 | // sum_hash(&self) -> u64 134 | // Returns the hash value used by codegenned sum/union types. 135 | pub fn sum_hash(&self) -> u64 { 136 | let mut hash = DefaultHasher::new(); 137 | self.hash(&mut hash); 138 | hash.finish() 139 | } 140 | 141 | // is_subtype(&self, &Type, &HashMap) -> bool 142 | // Returns true if self is a valid subtype in respect to the passed in type. 143 | pub fn is_subtype( 144 | &self, 145 | supertype: &Type, 146 | types: &HashMap, 147 | generics_map: &mut HashMap, 148 | ) -> bool { 149 | if !matches!(self, Type::Generic(_, _)) && self == supertype { 150 | return true; 151 | } 152 | 153 | match supertype { 154 | // Primitives 155 | Type::Int => *self == Type::Int, 156 | Type::Float => *self == Type::Float, 157 | Type::Bool => *self == Type::Bool, 158 | Type::Word => *self == Type::Word, 159 | Type::Char => *self == Type::Char, 160 | 161 | // Functions 162 | Type::Func(sf, sa) => { 163 | if let Type::Func(f, a) = self { 164 | f.is_subtype(sf, types, generics_map) && a.is_subtype(sa, types, generics_map) 165 | } else { 166 | false 167 | } 168 | } 169 | 170 | // Generics 171 | Type::Generic(g, uid) => { 172 | let generic_pair = GenericPair { 173 | generic: g.clone(), 174 | uid: *uid, 175 | }; 176 | 177 | if let Some(t) = generics_map.get(&generic_pair) { 178 | if let Type::Generic(t, _) = &**t { 179 | if let Type::Generic(s, _) = self { 180 | t == s 181 | } else { 182 | false 183 | } 184 | } else { 185 | self.is_subtype(&*t.clone(), types, generics_map) 186 | } 187 | } else if !self.contains_generic(&generic_pair) { 188 | generics_map.insert(generic_pair, arc::new(self.clone())); 189 | true 190 | } else { 191 | false 192 | } 193 | } 194 | 195 | // Union types 196 | Type::Union(fields) => { 197 | // Union types mean the subtype has fields over a subset of fields of the supertype 198 | if let Type::Union(sub) = self { 199 | for s in sub.0.iter() { 200 | let mut is_subtype = false; 201 | for f in fields.0.iter() { 202 | if s.is_subtype(&f, types, generics_map) { 203 | is_subtype = true; 204 | break; 205 | } 206 | } 207 | 208 | if !is_subtype { 209 | return false; 210 | } 211 | } 212 | 213 | return true; 214 | } 215 | 216 | for t in fields.0.iter() { 217 | if self.is_subtype(t, types, generics_map) { 218 | return true; 219 | } 220 | } 221 | 222 | false 223 | } 224 | 225 | // Everything else is to be ignored 226 | Type::Error 227 | | Type::UndeclaredTypeError(_) 228 | | Type::DuplicateTypeError(_, _, _) 229 | | Type::Unknown 230 | | Type::Symbol(_) => false, 231 | } 232 | } 233 | 234 | fn contains_generic(&self, generic: &GenericPair) -> bool { 235 | match self { 236 | Type::Error 237 | | Type::UndeclaredTypeError(_) 238 | | Type::DuplicateTypeError(_, _, _) 239 | | Type::Unknown 240 | | Type::Int 241 | | Type::Float 242 | | Type::Bool 243 | | Type::Word 244 | | Type::Char 245 | | Type::Symbol(_) => false, 246 | 247 | Type::Generic(g, uid) => generic.generic == *g && generic.uid == *uid, 248 | 249 | Type::Func(a, r) => a.contains_generic(generic) || r.contains_generic(generic), 250 | 251 | Type::Union(_) => todo!(), 252 | } 253 | } 254 | 255 | pub fn replace_generics(&mut self, generics_map: &HashMap) { 256 | match self { 257 | // Functions 258 | Type::Func(f, a) => { 259 | Arc::make_mut(f).replace_generics(generics_map); 260 | Arc::make_mut(a).replace_generics(generics_map); 261 | } 262 | 263 | // Generics 264 | Type::Generic(g, uid) => { 265 | let generic_pair = GenericPair { 266 | generic: g.clone(), 267 | uid: *uid, 268 | }; 269 | 270 | if let Some(t) = generics_map.get(&generic_pair) { 271 | *self = (**t).clone(); 272 | } 273 | } 274 | 275 | // Union types 276 | Type::Union(_fields) => { 277 | todo!(); 278 | } 279 | 280 | // Everything else is to be ignored 281 | Type::Error 282 | | Type::UndeclaredTypeError(_) 283 | | Type::DuplicateTypeError(_, _, _) 284 | | Type::Unknown 285 | | Type::Int 286 | | Type::Float 287 | | Type::Bool 288 | | Type::Word 289 | | Type::Char 290 | | Type::Symbol(_) => {} 291 | } 292 | } 293 | 294 | pub fn get_generics<'a>(&'a self, v: &mut Vec<(&'a str, usize)>) { 295 | match self { 296 | Type::Error 297 | | Type::UndeclaredTypeError(_) 298 | | Type::DuplicateTypeError(_, _, _) 299 | | Type::Unknown 300 | | Type::Int 301 | | Type::Float 302 | | Type::Bool 303 | | Type::Word 304 | | Type::Char 305 | | Type::Symbol(_) => (), 306 | 307 | Type::Generic(g, uid) => v.push((g, *uid)), 308 | 309 | Type::Func(a, r) => { 310 | a.get_generics(v); 311 | r.get_generics(v); 312 | } 313 | 314 | Type::Union(_) => todo!(), 315 | } 316 | } 317 | } 318 | 319 | // ast_sum_builder_helper(Ast, &str, &mut HashMap) -> Type 320 | // Helper function for building sum/union types. 321 | fn ast_sum_builder_helper( 322 | ast: Ast, 323 | filename: &str, 324 | fields: &mut HashMap, 325 | generic_uids: &mut HashMap, 326 | last_uid: &mut usize, 327 | ) -> Type { 328 | let s = ast.get_span(); 329 | let v = convert_ast_to_type(ast, filename, generic_uids, last_uid); 330 | if let Type::Union(v) = v { 331 | for v in v.0 { 332 | if let Some(s2) = fields.remove(&v) { 333 | return Type::DuplicateTypeError( 334 | Location::new(s, filename), 335 | Location::new(s2, filename), 336 | v, 337 | ); 338 | } else { 339 | fields.insert(v, s.clone()); 340 | } 341 | } 342 | } else { 343 | let v = arc::new(v); 344 | if let Some(s2) = fields.remove(&v) { 345 | return Type::DuplicateTypeError( 346 | Location::new(s, filename), 347 | Location::new(s2, filename), 348 | v, 349 | ); 350 | } else { 351 | fields.insert(v, s); 352 | } 353 | } 354 | 355 | Type::Unknown 356 | } 357 | 358 | // convert_ast_to_type(Ast, &str, &mut HashMap, &mut usize) -> Type 359 | // Converts an ast node into a type. 360 | pub fn convert_ast_to_type( 361 | ast: Ast, 362 | filename: &str, 363 | generic_uids: &mut HashMap, 364 | last_uid: &mut usize, 365 | ) -> Type { 366 | match ast { 367 | // Symbols 368 | Ast::Symbol(_, v) => { 369 | match v.as_str() { 370 | // Primitives 371 | "Int" => Type::Int, 372 | "Float" => Type::Float, 373 | "Bool" => Type::Bool, 374 | "Word" => Type::Word, 375 | "Char" => Type::Char, 376 | 377 | // Symbol 378 | _ => Type::Symbol(v), 379 | } 380 | } 381 | 382 | // Generics 383 | Ast::Generic(_, g) => { 384 | let uid = if generic_uids.contains_key(&g) { 385 | *generic_uids.get(&g).unwrap() 386 | } else { 387 | *last_uid += 1; 388 | generic_uids.insert(g.clone(), *last_uid); 389 | *last_uid 390 | }; 391 | 392 | Type::Generic(g, uid) 393 | } 394 | 395 | // Sum types 396 | Ast::Infix(_, op, l, r) if op == "|" => { 397 | let mut fields = HashMap::new(); 398 | let mut acc = *l; 399 | let t = ast_sum_builder_helper(*r, filename, &mut fields, generic_uids, last_uid); 400 | if t != Type::Unknown { 401 | return t; 402 | } 403 | 404 | loop { 405 | match acc { 406 | Ast::Infix(_, op, l, r) if op == "|" => { 407 | let t = ast_sum_builder_helper( 408 | *r, 409 | filename, 410 | &mut fields, 411 | generic_uids, 412 | last_uid, 413 | ); 414 | if t != Type::Unknown { 415 | return t; 416 | } 417 | 418 | acc = *l; 419 | } 420 | 421 | _ => break, 422 | } 423 | } 424 | 425 | let t = ast_sum_builder_helper(acc, filename, &mut fields, generic_uids, last_uid); 426 | if t != Type::Unknown { 427 | return t; 428 | } 429 | 430 | for f in fields.iter() { 431 | if let Type::UndeclaredTypeError(s) = &**f.0 { 432 | return Type::UndeclaredTypeError(s.clone()); 433 | } 434 | } 435 | 436 | if fields.len() == 1 { 437 | (*fields.into_iter().next().unwrap().0).clone() 438 | } else { 439 | Type::Union(HashSetWrapper(fields.into_iter().map(|v| v.0).collect())) 440 | } 441 | } 442 | 443 | // Function types 444 | Ast::Infix(_, op, l, r) if op == "->" => { 445 | let l = convert_ast_to_type(*l, filename, generic_uids, last_uid); 446 | let r = convert_ast_to_type(*r, filename, generic_uids, last_uid); 447 | 448 | if let Type::UndeclaredTypeError(s) = l { 449 | Type::UndeclaredTypeError(s) 450 | } else if let Type::DuplicateTypeError(a, b, c) = l { 451 | Type::DuplicateTypeError(a, b, c) 452 | } else if let Type::UndeclaredTypeError(s) = r { 453 | Type::UndeclaredTypeError(s) 454 | } else if let Type::DuplicateTypeError(a, b, c) = r { 455 | Type::DuplicateTypeError(a, b, c) 456 | } else { 457 | Type::Func(arc::new(l), arc::new(r)) 458 | } 459 | } 460 | 461 | // Error 462 | _ => Type::UndeclaredTypeError(Location::new(ast.get_span(), filename)), 463 | } 464 | } 465 | 466 | pub mod arc { 467 | use super::Type; 468 | use std::sync::Arc; 469 | 470 | pub fn new(t: Type) -> Arc { 471 | Arc::new(t) 472 | } 473 | } 474 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use clap::{crate_version, App, Arg, SubCommand}; 2 | use faerie::{ArtifactBuilder, Decl, Link}; 3 | use goblin::Object; 4 | use rustyline::{error::ReadlineError, Editor}; 5 | use std::env; 6 | use std::fs::{self, File}; 7 | use std::process::exit; 8 | use target_lexicon::Triple; 9 | 10 | #[allow(unused_imports)] 11 | use closeyc::backends::{ 12 | aarch64, ir as backend_ir, riscv64, wasm64, x86_64, GeneratedCode, DEFAULT_ARCH, 13 | }; 14 | use closeyc::frontend::correctness; 15 | use closeyc::frontend::ir as frontend_ir; 16 | use closeyc::frontend::parser; 17 | 18 | #[cfg(all(target_os = "macos", target_arch = "aarch64"))] 19 | static MAP_JIT: i32 = 0x0800; 20 | #[cfg(not(all(target_os = "macos", target_arch = "aarch64")))] 21 | static MAP_JIT: i32 = 0; 22 | 23 | extern "C" { 24 | fn pthread_jit_write_protect_np(_: bool); 25 | } 26 | 27 | #[derive(Debug)] 28 | enum CloseyCode<'a> { 29 | None, 30 | Exec(&'a str), 31 | Files(Vec<&'a str>), 32 | } 33 | 34 | struct Jit { 35 | code: GeneratedCode, 36 | mem: *const u8, 37 | } 38 | 39 | impl Jit { 40 | fn new(mut code: GeneratedCode) -> Jit { 41 | let mem = unsafe { 42 | libc::mmap( 43 | std::ptr::null_mut(), 44 | code.len(), 45 | libc::PROT_WRITE | libc::PROT_READ, 46 | libc::MAP_ANONYMOUS | libc::MAP_PRIVATE | MAP_JIT, 47 | -1, 48 | 0, 49 | ) 50 | } as *mut u8; 51 | 52 | match DEFAULT_ARCH { 53 | "aarch64" => todo!(), 54 | "riscv64" => todo!(), 55 | "wasm64" => todo!(), 56 | "x86_64" => x86_64::codegen::relocate(&mut code), 57 | _ => panic!("unsupported architecture!"), 58 | } 59 | 60 | unsafe { 61 | pthread_jit_write_protect_np(false); 62 | std::ptr::copy(code.data().as_ptr(), mem, code.len()); 63 | libc::mprotect( 64 | mem as *mut libc::c_void, 65 | code.len(), 66 | libc::PROT_READ | libc::PROT_EXEC, 67 | ); 68 | pthread_jit_write_protect_np(true); 69 | } 70 | 71 | Jit { code, mem } 72 | } 73 | 74 | unsafe fn call(&self, func: &str) -> Option<*const u8> { 75 | self.code.get_fn(func, self.mem).map(|v| v()) 76 | } 77 | } 78 | 79 | impl Drop for Jit { 80 | fn drop(&mut self) { 81 | unsafe { 82 | libc::munmap(self.mem as *mut libc::c_void, self.code.len()); 83 | } 84 | } 85 | } 86 | 87 | fn main() { 88 | let files = Arg::with_name("files") 89 | .multiple(true) 90 | .last(true) 91 | .required_unless("exec"); 92 | let exec = Arg::with_name("exec") 93 | .long("exec") 94 | .short("e") 95 | .min_values(1) 96 | .max_values(1); 97 | let app = 98 | App::new("closeyc") 99 | .version(crate_version!()) 100 | .about("Compiler for the Closey language.") 101 | .subcommand( 102 | SubCommand::with_name("build") 103 | .about("Builds Closey code and exports as an object file.") 104 | .arg( 105 | Arg::with_name("output") 106 | .long("output") 107 | .short("o") 108 | .help("The output file; by default this is a.out") 109 | .min_values(1) 110 | .max_values(1), 111 | ) 112 | .arg(files.clone().help("The Closey files to compile.")) 113 | .arg(exec.clone().help("A Closey command to compile.")), 114 | ) 115 | .subcommand( 116 | SubCommand::with_name("run") 117 | .about("Runs Closey code by JIT compiling it.") 118 | .arg(files.clone().help("The Closey files to run.")) 119 | .arg(exec.clone().help("A Closey command to run.")), 120 | ) 121 | .subcommand( 122 | SubCommand::with_name("analyse") 123 | .alias("analyze") 124 | .about("Runs the semantic analyser on the given Closey code") 125 | .arg( 126 | Arg::with_name("hlir") 127 | .long("hlir") 128 | .short("i") 129 | .help("Prints out the higher level IR"), 130 | ) 131 | .arg(files.clone().help("The Closey files to analyse.")) 132 | .arg(exec.clone().help("The Closey command to analyse.")), 133 | ) 134 | .subcommand( 135 | SubCommand::with_name("assembly") 136 | .alias("asm") 137 | .about("Prints out the assembly for the given Closey code") 138 | .arg( 139 | files 140 | .clone() 141 | .help("The Closey files to generate assembly for."), 142 | ) 143 | .arg( 144 | exec.clone() 145 | .help("The Closey command to generate assembly for."), 146 | ), 147 | ) 148 | .subcommand( 149 | SubCommand::with_name("llir") 150 | .about("Prints out the low level IR for the given Closey code") 151 | .arg(files.help("The Closey files to generate LLIR for.")) 152 | .arg(exec.help("The Closey command to generate LLIR for.")), 153 | ) 154 | .subcommand(SubCommand::with_name("repl").about( 155 | "Runs the Closey REPL. If no subcommand is provided, the REPL will still run.", 156 | )); 157 | 158 | let matches = app.get_matches(); 159 | 160 | let code = match matches.subcommand_name() { 161 | Some("repl") | None => CloseyCode::None, 162 | 163 | Some(s) => { 164 | let matches = matches.subcommand_matches(s).unwrap(); 165 | match matches.value_of("exec") { 166 | Some(v) => CloseyCode::Exec(v), 167 | None => CloseyCode::Files(matches.values_of("files").unwrap().collect()), 168 | } 169 | } 170 | }; 171 | 172 | let contents = match code { 173 | CloseyCode::Exec(s) => Some(s.to_owned()), 174 | CloseyCode::Files(v) => match fs::read_to_string(v.first().unwrap()) { 175 | Ok(s) => Some(s), 176 | Err(e) => { 177 | eprintln!("error reading file {}: {}", v.first().unwrap(), e); 178 | exit(1); 179 | } 180 | }, 181 | CloseyCode::None => None, 182 | }; 183 | 184 | match matches.subcommand_name() { 185 | Some("analyse") => { 186 | let contents = contents.unwrap(); 187 | let mut root = frontend_ir::Ir::new(); 188 | check(&contents, "Main", &mut root); 189 | print!("{}", root); 190 | } 191 | 192 | Some("assembly") => { 193 | let contents = contents.unwrap(); 194 | let mut root = frontend_ir::Ir::new(); 195 | check(&contents, "Main", &mut root); 196 | 197 | let mut module = backend_ir::convert_frontend_ir_to_backend_ir( 198 | &root.modules.iter().next().unwrap().1, 199 | ); 200 | 201 | let mut code = match compile(&mut module) { 202 | Some(v) => v, 203 | None => return, 204 | }; 205 | 206 | match DEFAULT_ARCH { 207 | "aarch64" => todo!(), 208 | "riscv64" => todo!(), 209 | "wasm64" => todo!(), 210 | "x86_64" => x86_64::codegen::relocate(&mut code), 211 | _ => panic!("unsupported architecture!"), 212 | } 213 | 214 | match DEFAULT_ARCH { 215 | "aarch64" => todo!(), 216 | "riscv64" => todo!(), 217 | "wasm64" => todo!(), 218 | "x86_64" => x86_64::disassemble(&code, std::ptr::null()), 219 | _ => panic!("unsupported architecture!"), 220 | } 221 | } 222 | 223 | Some("build") => { 224 | let contents = contents.unwrap(); 225 | let mut root = frontend_ir::Ir::new(); 226 | check(&contents, "Main", &mut root); 227 | 228 | let mut module = backend_ir::convert_frontend_ir_to_backend_ir( 229 | &root.modules.iter().next().unwrap().1, 230 | ); 231 | 232 | let mut code = match compile(&mut module) { 233 | Some(v) => v, 234 | None => return, 235 | }; 236 | 237 | match DEFAULT_ARCH { 238 | "aarch64" => todo!(), 239 | "riscv64" => todo!(), 240 | "wasm64" => todo!(), 241 | "x86_64" => x86_64::codegen::generate_start_func(&mut code), 242 | _ => panic!("unsupported architecture!"), 243 | } 244 | 245 | let f = matches 246 | .subcommand_matches("build") 247 | .unwrap() 248 | .value_of("output") 249 | .unwrap_or("a.o") 250 | .to_owned(); 251 | 252 | let mut artefact = ArtifactBuilder::new(Triple::host()) 253 | .name(f.clone()) 254 | .finish(); 255 | 256 | let mut funcs: Vec<_> = code.get_funcs().iter().collect(); 257 | funcs.sort_by(|a, b| a.1.start.cmp(&b.1.start)); 258 | match artefact.declarations({ 259 | funcs.iter().map(|v| { 260 | ( 261 | v.0, 262 | if v.0 == "_start" || v.0 == "main" { 263 | Decl::function().global().into() 264 | } else if v.1.start == 0 && v.1.end == 0 { 265 | Decl::function_import().into() 266 | } else { 267 | Decl::function().into() 268 | }, 269 | ) 270 | }) 271 | }) { 272 | Ok(_) => (), 273 | Err(e) => { 274 | eprintln!("Error declaring functions: {}", e); 275 | return; 276 | } 277 | } 278 | 279 | for (func, range) in funcs { 280 | if range.start == 0 && range.end == 0 { 281 | continue; 282 | } 283 | 284 | match artefact.define(func, code.data()[range.start..range.end].to_owned()) { 285 | Ok(_) => (), 286 | Err(e) => { 287 | eprintln!("Error defining function: {}", e); 288 | return; 289 | } 290 | } 291 | } 292 | 293 | for (addr, to) in code.get_relocation_table() { 294 | for (from, range) in code.get_funcs() { 295 | if range.start <= *addr && *addr < range.end { 296 | match artefact.link(Link { 297 | from, 298 | to, 299 | at: (addr - range.start) as u64, 300 | }) { 301 | Ok(_) => (), 302 | Err(e) => { 303 | eprintln!("Error linking: {}", e); 304 | return; 305 | } 306 | } 307 | break; 308 | } 309 | } 310 | } 311 | 312 | match artefact.write(match File::create(&f) { 313 | Ok(v) => v, 314 | Err(e) => { 315 | eprintln!("Error getting file {}: {}", f, e); 316 | exit(1); 317 | } 318 | }) { 319 | Ok(_) => (), 320 | Err(e) => { 321 | eprintln!("Error writing artefact to file: {}", e); 322 | } 323 | } 324 | } 325 | 326 | Some("llir") => { 327 | let contents = contents.unwrap(); 328 | let mut root = frontend_ir::Ir::new(); 329 | check(&contents, "Main", &mut root); 330 | 331 | let module = backend_ir::convert_frontend_ir_to_backend_ir( 332 | &root.modules.iter().next().unwrap().1, 333 | ); 334 | println!("{}", module); 335 | } 336 | 337 | Some("run") => { 338 | let contents = contents.unwrap(); 339 | let mut root = frontend_ir::Ir::new(); 340 | check(&contents, "Main", &mut root); 341 | 342 | let mut module = backend_ir::convert_frontend_ir_to_backend_ir( 343 | &root.modules.iter().next().unwrap().1, 344 | ); 345 | 346 | let code = match compile(&mut module) { 347 | Some(v) => v, 348 | None => return, 349 | }; 350 | 351 | let jit = Jit::new(code); 352 | println!("{:#x}", unsafe { jit.call("main") }.unwrap() as u64); 353 | } 354 | 355 | Some("repl") | None => repl(), 356 | 357 | _ => unreachable!("Invalid subcommand"), 358 | } 359 | } 360 | 361 | fn check(s: &str, mod_name: &str, root: &mut frontend_ir::Ir) { 362 | let ast = match parser::parse(s) { 363 | Ok(v) => v, 364 | 365 | Err(_) => { 366 | eprintln!("Error parsing!"); 367 | exit(1); 368 | } 369 | }; 370 | 371 | match frontend_ir::convert_ast_to_ir(mod_name, &s, ast, root) { 372 | Ok(v) => v, 373 | Err(_) => { 374 | eprintln!("Error creating ir!"); 375 | exit(1); 376 | } 377 | }; 378 | 379 | let _ = correctness::check_correctness(root, true); 380 | } 381 | 382 | fn compile(module: &mut backend_ir::IrModule) -> Option { 383 | match DEFAULT_ARCH { 384 | "aarch64" => Some(aarch64::codegen::generate_code(module)), 385 | "riscv64" => todo!(), 386 | "wasm64" => todo!(), 387 | "x86_64" => Some(x86_64::codegen::generate_code(module)), 388 | _ => panic!("unsupported architecture"), 389 | } 390 | } 391 | 392 | fn repl() { 393 | let mut rl = Editor::<()>::new(); 394 | let mut root = frontend_ir::Ir::new(); 395 | let mut i = 0; 396 | 397 | loop { 398 | let readline = rl.readline(">>> "); 399 | match readline { 400 | Ok(line) => { 401 | rl.add_history_entry(&line); 402 | 403 | let mod_name = format!("m{}", i); 404 | i += 1; 405 | check(&line, &mod_name, &mut root); 406 | let f_module = root.modules.get(&mod_name).unwrap(); 407 | 408 | let mut b_module = backend_ir::convert_frontend_ir_to_backend_ir(f_module); 409 | 410 | let code = match compile(&mut b_module) { 411 | Some(v) => v, 412 | None => return, 413 | }; 414 | 415 | let jit = Jit::new(code); 416 | println!( 417 | "{:#x}", 418 | unsafe { jit.call(f_module.funcs.iter().next().unwrap().0) }.unwrap() as u64 419 | ); 420 | } 421 | 422 | Err(ReadlineError::Interrupted) => { 423 | println!("^C"); 424 | } 425 | 426 | Err(ReadlineError::Eof) => { 427 | println!("^D"); 428 | break; 429 | } 430 | 431 | Err(err) => { 432 | println!("Error: {}", err); 433 | break; 434 | } 435 | } 436 | } 437 | } 438 | 439 | #[allow(dead_code, unused_mut)] 440 | fn load_libclosey(path: &str) -> Result, ()> { 441 | let buffer = match fs::read(path) { 442 | Ok(v) => v, 443 | Err(e) => { 444 | eprintln!("Error reading {}: {}", path, e); 445 | return Err(()); 446 | } 447 | }; 448 | 449 | let mut jits = vec![]; 450 | match Object::parse(&buffer) { 451 | Ok(Object::Elf(_)) => todo!(), 452 | Ok(Object::Mach(_)) => todo!(), 453 | Ok(Object::PE(_)) => todo!(), 454 | 455 | Ok(Object::Archive(ar)) => { 456 | for member in ar.members() { 457 | let buffer = ar.extract(member, &buffer).unwrap(); 458 | match Object::parse(buffer) { 459 | Ok(Object::Elf(elf)) => { 460 | println!("{:#?}", elf); 461 | } 462 | 463 | Ok(Object::Mach(_)) => todo!(), 464 | 465 | Ok(Object::PE(_)) => todo!(), 466 | 467 | Ok(Object::Archive(_)) => unreachable!("Archives cannot contain archives!"), 468 | 469 | Ok(Object::Unknown(magic)) => { 470 | eprintln!( 471 | "Error reading object file {} in {}: unknown magic number {}", 472 | member, path, magic 473 | ); 474 | return Err(()); 475 | } 476 | 477 | Err(e) => { 478 | eprintln!("Error reading object file {} in {}: {}", member, path, e); 479 | return Err(()); 480 | } 481 | } 482 | } 483 | 484 | Ok(jits) 485 | } 486 | 487 | Ok(Object::Unknown(magic)) => { 488 | println!("Error parsing {}: unknown magic number {}", path, magic); 489 | Err(()) 490 | } 491 | 492 | Err(e) => { 493 | eprintln!("Error parsing {}: {}", path, e); 494 | Err(()) 495 | } 496 | } 497 | } 498 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 2.1, February 1999 3 | 4 | Copyright (C) 1991, 1999 Free Software Foundation, Inc. 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | [This is the first released version of the Lesser GPL. It also counts 10 | as the successor of the GNU Library Public License, version 2, hence 11 | the version number 2.1.] 12 | 13 | Preamble 14 | 15 | The licenses for most software are designed to take away your 16 | freedom to share and change it. By contrast, the GNU General Public 17 | Licenses are intended to guarantee your freedom to share and change 18 | free software--to make sure the software is free for all its users. 19 | 20 | This license, the Lesser General Public License, applies to some 21 | specially designated software packages--typically libraries--of the 22 | Free Software Foundation and other authors who decide to use it. You 23 | can use it too, but we suggest you first think carefully about whether 24 | this license or the ordinary General Public License is the better 25 | strategy to use in any particular case, based on the explanations below. 26 | 27 | When we speak of free software, we are referring to freedom of use, 28 | not price. Our General Public Licenses are designed to make sure that 29 | you have the freedom to distribute copies of free software (and charge 30 | for this service if you wish); that you receive source code or can get 31 | it if you want it; that you can change the software and use pieces of 32 | it in new free programs; and that you are informed that you can do 33 | these things. 34 | 35 | To protect your rights, we need to make restrictions that forbid 36 | distributors to deny you these rights or to ask you to surrender these 37 | rights. These restrictions translate to certain responsibilities for 38 | you if you distribute copies of the library or if you modify it. 39 | 40 | For example, if you distribute copies of the library, whether gratis 41 | or for a fee, you must give the recipients all the rights that we gave 42 | you. You must make sure that they, too, receive or can get the source 43 | code. If you link other code with the library, you must provide 44 | complete object files to the recipients, so that they can relink them 45 | with the library after making changes to the library and recompiling 46 | it. And you must show them these terms so they know their rights. 47 | 48 | We protect your rights with a two-step method: (1) we copyright the 49 | library, and (2) we offer you this license, which gives you legal 50 | permission to copy, distribute and/or modify the library. 51 | 52 | To protect each distributor, we want to make it very clear that 53 | there is no warranty for the free library. Also, if the library is 54 | modified by someone else and passed on, the recipients should know 55 | that what they have is not the original version, so that the original 56 | author's reputation will not be affected by problems that might be 57 | introduced by others. 58 | 59 | Finally, software patents pose a constant threat to the existence of 60 | any free program. We wish to make sure that a company cannot 61 | effectively restrict the users of a free program by obtaining a 62 | restrictive license from a patent holder. Therefore, we insist that 63 | any patent license obtained for a version of the library must be 64 | consistent with the full freedom of use specified in this license. 65 | 66 | Most GNU software, including some libraries, is covered by the 67 | ordinary GNU General Public License. This license, the GNU Lesser 68 | General Public License, applies to certain designated libraries, and 69 | is quite different from the ordinary General Public License. We use 70 | this license for certain libraries in order to permit linking those 71 | libraries into non-free programs. 72 | 73 | When a program is linked with a library, whether statically or using 74 | a shared library, the combination of the two is legally speaking a 75 | combined work, a derivative of the original library. The ordinary 76 | General Public License therefore permits such linking only if the 77 | entire combination fits its criteria of freedom. The Lesser General 78 | Public License permits more lax criteria for linking other code with 79 | the library. 80 | 81 | We call this license the "Lesser" General Public License because it 82 | does Less to protect the user's freedom than the ordinary General 83 | Public License. It also provides other free software developers Less 84 | of an advantage over competing non-free programs. These disadvantages 85 | are the reason we use the ordinary General Public License for many 86 | libraries. However, the Lesser license provides advantages in certain 87 | special circumstances. 88 | 89 | For example, on rare occasions, there may be a special need to 90 | encourage the widest possible use of a certain library, so that it becomes 91 | a de-facto standard. To achieve this, non-free programs must be 92 | allowed to use the library. A more frequent case is that a free 93 | library does the same job as widely used non-free libraries. In this 94 | case, there is little to gain by limiting the free library to free 95 | software only, so we use the Lesser General Public License. 96 | 97 | In other cases, permission to use a particular library in non-free 98 | programs enables a greater number of people to use a large body of 99 | free software. For example, permission to use the GNU C Library in 100 | non-free programs enables many more people to use the whole GNU 101 | operating system, as well as its variant, the GNU/Linux operating 102 | system. 103 | 104 | Although the Lesser General Public License is Less protective of the 105 | users' freedom, it does ensure that the user of a program that is 106 | linked with the Library has the freedom and the wherewithal to run 107 | that program using a modified version of the Library. 108 | 109 | The precise terms and conditions for copying, distribution and 110 | modification follow. Pay close attention to the difference between a 111 | "work based on the library" and a "work that uses the library". The 112 | former contains code derived from the library, whereas the latter must 113 | be combined with the library in order to run. 114 | 115 | GNU LESSER GENERAL PUBLIC LICENSE 116 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 117 | 118 | 0. This License Agreement applies to any software library or other 119 | program which contains a notice placed by the copyright holder or 120 | other authorized party saying it may be distributed under the terms of 121 | this Lesser General Public License (also called "this License"). 122 | Each licensee is addressed as "you". 123 | 124 | A "library" means a collection of software functions and/or data 125 | prepared so as to be conveniently linked with application programs 126 | (which use some of those functions and data) to form executables. 127 | 128 | The "Library", below, refers to any such software library or work 129 | which has been distributed under these terms. A "work based on the 130 | Library" means either the Library or any derivative work under 131 | copyright law: that is to say, a work containing the Library or a 132 | portion of it, either verbatim or with modifications and/or translated 133 | straightforwardly into another language. (Hereinafter, translation is 134 | included without limitation in the term "modification".) 135 | 136 | "Source code" for a work means the preferred form of the work for 137 | making modifications to it. For a library, complete source code means 138 | all the source code for all modules it contains, plus any associated 139 | interface definition files, plus the scripts used to control compilation 140 | and installation of the library. 141 | 142 | Activities other than copying, distribution and modification are not 143 | covered by this License; they are outside its scope. The act of 144 | running a program using the Library is not restricted, and output from 145 | such a program is covered only if its contents constitute a work based 146 | on the Library (independent of the use of the Library in a tool for 147 | writing it). Whether that is true depends on what the Library does 148 | and what the program that uses the Library does. 149 | 150 | 1. You may copy and distribute verbatim copies of the Library's 151 | complete source code as you receive it, in any medium, provided that 152 | you conspicuously and appropriately publish on each copy an 153 | appropriate copyright notice and disclaimer of warranty; keep intact 154 | all the notices that refer to this License and to the absence of any 155 | warranty; and distribute a copy of this License along with the 156 | Library. 157 | 158 | You may charge a fee for the physical act of transferring a copy, 159 | and you may at your option offer warranty protection in exchange for a 160 | fee. 161 | 162 | 2. You may modify your copy or copies of the Library or any portion 163 | of it, thus forming a work based on the Library, and copy and 164 | distribute such modifications or work under the terms of Section 1 165 | above, provided that you also meet all of these conditions: 166 | 167 | a) The modified work must itself be a software library. 168 | 169 | b) You must cause the files modified to carry prominent notices 170 | stating that you changed the files and the date of any change. 171 | 172 | c) You must cause the whole of the work to be licensed at no 173 | charge to all third parties under the terms of this License. 174 | 175 | d) If a facility in the modified Library refers to a function or a 176 | table of data to be supplied by an application program that uses 177 | the facility, other than as an argument passed when the facility 178 | is invoked, then you must make a good faith effort to ensure that, 179 | in the event an application does not supply such function or 180 | table, the facility still operates, and performs whatever part of 181 | its purpose remains meaningful. 182 | 183 | (For example, a function in a library to compute square roots has 184 | a purpose that is entirely well-defined independent of the 185 | application. Therefore, Subsection 2d requires that any 186 | application-supplied function or table used by this function must 187 | be optional: if the application does not supply it, the square 188 | root function must still compute square roots.) 189 | 190 | These requirements apply to the modified work as a whole. If 191 | identifiable sections of that work are not derived from the Library, 192 | and can be reasonably considered independent and separate works in 193 | themselves, then this License, and its terms, do not apply to those 194 | sections when you distribute them as separate works. But when you 195 | distribute the same sections as part of a whole which is a work based 196 | on the Library, the distribution of the whole must be on the terms of 197 | this License, whose permissions for other licensees extend to the 198 | entire whole, and thus to each and every part regardless of who wrote 199 | it. 200 | 201 | Thus, it is not the intent of this section to claim rights or contest 202 | your rights to work written entirely by you; rather, the intent is to 203 | exercise the right to control the distribution of derivative or 204 | collective works based on the Library. 205 | 206 | In addition, mere aggregation of another work not based on the Library 207 | with the Library (or with a work based on the Library) on a volume of 208 | a storage or distribution medium does not bring the other work under 209 | the scope of this License. 210 | 211 | 3. You may opt to apply the terms of the ordinary GNU General Public 212 | License instead of this License to a given copy of the Library. To do 213 | this, you must alter all the notices that refer to this License, so 214 | that they refer to the ordinary GNU General Public License, version 2, 215 | instead of to this License. (If a newer version than version 2 of the 216 | ordinary GNU General Public License has appeared, then you can specify 217 | that version instead if you wish.) Do not make any other change in 218 | these notices. 219 | 220 | Once this change is made in a given copy, it is irreversible for 221 | that copy, so the ordinary GNU General Public License applies to all 222 | subsequent copies and derivative works made from that copy. 223 | 224 | This option is useful when you wish to copy part of the code of 225 | the Library into a program that is not a library. 226 | 227 | 4. You may copy and distribute the Library (or a portion or 228 | derivative of it, under Section 2) in object code or executable form 229 | under the terms of Sections 1 and 2 above provided that you accompany 230 | it with the complete corresponding machine-readable source code, which 231 | must be distributed under the terms of Sections 1 and 2 above on a 232 | medium customarily used for software interchange. 233 | 234 | If distribution of object code is made by offering access to copy 235 | from a designated place, then offering equivalent access to copy the 236 | source code from the same place satisfies the requirement to 237 | distribute the source code, even though third parties are not 238 | compelled to copy the source along with the object code. 239 | 240 | 5. A program that contains no derivative of any portion of the 241 | Library, but is designed to work with the Library by being compiled or 242 | linked with it, is called a "work that uses the Library". Such a 243 | work, in isolation, is not a derivative work of the Library, and 244 | therefore falls outside the scope of this License. 245 | 246 | However, linking a "work that uses the Library" with the Library 247 | creates an executable that is a derivative of the Library (because it 248 | contains portions of the Library), rather than a "work that uses the 249 | library". The executable is therefore covered by this License. 250 | Section 6 states terms for distribution of such executables. 251 | 252 | When a "work that uses the Library" uses material from a header file 253 | that is part of the Library, the object code for the work may be a 254 | derivative work of the Library even though the source code is not. 255 | Whether this is true is especially significant if the work can be 256 | linked without the Library, or if the work is itself a library. The 257 | threshold for this to be true is not precisely defined by law. 258 | 259 | If such an object file uses only numerical parameters, data 260 | structure layouts and accessors, and small macros and small inline 261 | functions (ten lines or less in length), then the use of the object 262 | file is unrestricted, regardless of whether it is legally a derivative 263 | work. (Executables containing this object code plus portions of the 264 | Library will still fall under Section 6.) 265 | 266 | Otherwise, if the work is a derivative of the Library, you may 267 | distribute the object code for the work under the terms of Section 6. 268 | Any executables containing that work also fall under Section 6, 269 | whether or not they are linked directly with the Library itself. 270 | 271 | 6. As an exception to the Sections above, you may also combine or 272 | link a "work that uses the Library" with the Library to produce a 273 | work containing portions of the Library, and distribute that work 274 | under terms of your choice, provided that the terms permit 275 | modification of the work for the customer's own use and reverse 276 | engineering for debugging such modifications. 277 | 278 | You must give prominent notice with each copy of the work that the 279 | Library is used in it and that the Library and its use are covered by 280 | this License. You must supply a copy of this License. If the work 281 | during execution displays copyright notices, you must include the 282 | copyright notice for the Library among them, as well as a reference 283 | directing the user to the copy of this License. Also, you must do one 284 | of these things: 285 | 286 | a) Accompany the work with the complete corresponding 287 | machine-readable source code for the Library including whatever 288 | changes were used in the work (which must be distributed under 289 | Sections 1 and 2 above); and, if the work is an executable linked 290 | with the Library, with the complete machine-readable "work that 291 | uses the Library", as object code and/or source code, so that the 292 | user can modify the Library and then relink to produce a modified 293 | executable containing the modified Library. (It is understood 294 | that the user who changes the contents of definitions files in the 295 | Library will not necessarily be able to recompile the application 296 | to use the modified definitions.) 297 | 298 | b) Use a suitable shared library mechanism for linking with the 299 | Library. A suitable mechanism is one that (1) uses at run time a 300 | copy of the library already present on the user's computer system, 301 | rather than copying library functions into the executable, and (2) 302 | will operate properly with a modified version of the library, if 303 | the user installs one, as long as the modified version is 304 | interface-compatible with the version that the work was made with. 305 | 306 | c) Accompany the work with a written offer, valid for at 307 | least three years, to give the same user the materials 308 | specified in Subsection 6a, above, for a charge no more 309 | than the cost of performing this distribution. 310 | 311 | d) If distribution of the work is made by offering access to copy 312 | from a designated place, offer equivalent access to copy the above 313 | specified materials from the same place. 314 | 315 | e) Verify that the user has already received a copy of these 316 | materials or that you have already sent this user a copy. 317 | 318 | For an executable, the required form of the "work that uses the 319 | Library" must include any data and utility programs needed for 320 | reproducing the executable from it. However, as a special exception, 321 | the materials to be distributed need not include anything that is 322 | normally distributed (in either source or binary form) with the major 323 | components (compiler, kernel, and so on) of the operating system on 324 | which the executable runs, unless that component itself accompanies 325 | the executable. 326 | 327 | It may happen that this requirement contradicts the license 328 | restrictions of other proprietary libraries that do not normally 329 | accompany the operating system. Such a contradiction means you cannot 330 | use both them and the Library together in an executable that you 331 | distribute. 332 | 333 | 7. You may place library facilities that are a work based on the 334 | Library side-by-side in a single library together with other library 335 | facilities not covered by this License, and distribute such a combined 336 | library, provided that the separate distribution of the work based on 337 | the Library and of the other library facilities is otherwise 338 | permitted, and provided that you do these two things: 339 | 340 | a) Accompany the combined library with a copy of the same work 341 | based on the Library, uncombined with any other library 342 | facilities. This must be distributed under the terms of the 343 | Sections above. 344 | 345 | b) Give prominent notice with the combined library of the fact 346 | that part of it is a work based on the Library, and explaining 347 | where to find the accompanying uncombined form of the same work. 348 | 349 | 8. You may not copy, modify, sublicense, link with, or distribute 350 | the Library except as expressly provided under this License. Any 351 | attempt otherwise to copy, modify, sublicense, link with, or 352 | distribute the Library is void, and will automatically terminate your 353 | rights under this License. However, parties who have received copies, 354 | or rights, from you under this License will not have their licenses 355 | terminated so long as such parties remain in full compliance. 356 | 357 | 9. You are not required to accept this License, since you have not 358 | signed it. However, nothing else grants you permission to modify or 359 | distribute the Library or its derivative works. These actions are 360 | prohibited by law if you do not accept this License. Therefore, by 361 | modifying or distributing the Library (or any work based on the 362 | Library), you indicate your acceptance of this License to do so, and 363 | all its terms and conditions for copying, distributing or modifying 364 | the Library or works based on it. 365 | 366 | 10. Each time you redistribute the Library (or any work based on the 367 | Library), the recipient automatically receives a license from the 368 | original licensor to copy, distribute, link with or modify the Library 369 | subject to these terms and conditions. You may not impose any further 370 | restrictions on the recipients' exercise of the rights granted herein. 371 | You are not responsible for enforcing compliance by third parties with 372 | this License. 373 | 374 | 11. If, as a consequence of a court judgment or allegation of patent 375 | infringement or for any other reason (not limited to patent issues), 376 | conditions are imposed on you (whether by court order, agreement or 377 | otherwise) that contradict the conditions of this License, they do not 378 | excuse you from the conditions of this License. If you cannot 379 | distribute so as to satisfy simultaneously your obligations under this 380 | License and any other pertinent obligations, then as a consequence you 381 | may not distribute the Library at all. For example, if a patent 382 | license would not permit royalty-free redistribution of the Library by 383 | all those who receive copies directly or indirectly through you, then 384 | the only way you could satisfy both it and this License would be to 385 | refrain entirely from distribution of the Library. 386 | 387 | If any portion of this section is held invalid or unenforceable under any 388 | particular circumstance, the balance of the section is intended to apply, 389 | and the section as a whole is intended to apply in other circumstances. 390 | 391 | It is not the purpose of this section to induce you to infringe any 392 | patents or other property right claims or to contest validity of any 393 | such claims; this section has the sole purpose of protecting the 394 | integrity of the free software distribution system which is 395 | implemented by public license practices. Many people have made 396 | generous contributions to the wide range of software distributed 397 | through that system in reliance on consistent application of that 398 | system; it is up to the author/donor to decide if he or she is willing 399 | to distribute software through any other system and a licensee cannot 400 | impose that choice. 401 | 402 | This section is intended to make thoroughly clear what is believed to 403 | be a consequence of the rest of this License. 404 | 405 | 12. If the distribution and/or use of the Library is restricted in 406 | certain countries either by patents or by copyrighted interfaces, the 407 | original copyright holder who places the Library under this License may add 408 | an explicit geographical distribution limitation excluding those countries, 409 | so that distribution is permitted only in or among countries not thus 410 | excluded. In such case, this License incorporates the limitation as if 411 | written in the body of this License. 412 | 413 | 13. The Free Software Foundation may publish revised and/or new 414 | versions of the Lesser General Public License from time to time. 415 | Such new versions will be similar in spirit to the present version, 416 | but may differ in detail to address new problems or concerns. 417 | 418 | Each version is given a distinguishing version number. If the Library 419 | specifies a version number of this License which applies to it and 420 | "any later version", you have the option of following the terms and 421 | conditions either of that version or of any later version published by 422 | the Free Software Foundation. If the Library does not specify a 423 | license version number, you may choose any version ever published by 424 | the Free Software Foundation. 425 | 426 | 14. If you wish to incorporate parts of the Library into other free 427 | programs whose distribution conditions are incompatible with these, 428 | write to the author to ask for permission. For software which is 429 | copyrighted by the Free Software Foundation, write to the Free 430 | Software Foundation; we sometimes make exceptions for this. Our 431 | decision will be guided by the two goals of preserving the free status 432 | of all derivatives of our free software and of promoting the sharing 433 | and reuse of software generally. 434 | 435 | NO WARRANTY 436 | 437 | 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO 438 | WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. 439 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR 440 | OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY 441 | KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE 442 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 443 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE 444 | LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME 445 | THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 446 | 447 | 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN 448 | WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY 449 | AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU 450 | FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR 451 | CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE 452 | LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING 453 | RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A 454 | FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF 455 | SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 456 | DAMAGES. 457 | 458 | END OF TERMS AND CONDITIONS 459 | 460 | How to Apply These Terms to Your New Libraries 461 | 462 | If you develop a new library, and you want it to be of the greatest 463 | possible use to the public, we recommend making it free software that 464 | everyone can redistribute and change. You can do so by permitting 465 | redistribution under these terms (or, alternatively, under the terms of the 466 | ordinary General Public License). 467 | 468 | To apply these terms, attach the following notices to the library. It is 469 | safest to attach them to the start of each source file to most effectively 470 | convey the exclusion of warranty; and each file should have at least the 471 | "copyright" line and a pointer to where the full notice is found. 472 | 473 | 474 | Copyright (C) 475 | 476 | This library is free software; you can redistribute it and/or 477 | modify it under the terms of the GNU Lesser General Public 478 | License as published by the Free Software Foundation; either 479 | version 2.1 of the License, or (at your option) any later version. 480 | 481 | This library is distributed in the hope that it will be useful, 482 | but WITHOUT ANY WARRANTY; without even the implied warranty of 483 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 484 | Lesser General Public License for more details. 485 | 486 | You should have received a copy of the GNU Lesser General Public 487 | License along with this library; if not, write to the Free Software 488 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 489 | USA 490 | 491 | Also add information on how to contact you by electronic and paper mail. 492 | 493 | You should also get your employer (if you work as a programmer) or your 494 | school, if any, to sign a "copyright disclaimer" for the library, if 495 | necessary. Here is a sample; alter the names: 496 | 497 | Yoyodyne, Inc., hereby disclaims all copyright interest in the 498 | library `Frob' (a library for tweaking knobs) written by James Random 499 | Hacker. 500 | 501 | , 1 April 1990 502 | Ty Coon, President of Vice 503 | 504 | That's all there is to it! 505 | -------------------------------------------------------------------------------- /src/backends/x86_64/codegen.rs: -------------------------------------------------------------------------------- 1 | use std::collections::{HashMap, HashSet}; 2 | 3 | use super::super::super::backends; 4 | use super::super::ir::{IrArgument, IrInstruction, IrModule}; 5 | use super::super::GeneratedCode; 6 | 7 | const ARG_REGISTER_COUNT: usize = 6; 8 | const NONARG_REGISTER_COUNT: usize = 8; 9 | 10 | enum InstructionRegister { 11 | Bit32(u8), 12 | Bit64(u8), 13 | Spilled(usize), 14 | Arg(usize), 15 | } 16 | 17 | impl InstructionRegister { 18 | fn is_register(&self) -> bool { 19 | match self { 20 | Self::Bit32(_) | Self::Bit64(_) => true, 21 | 22 | Self::Spilled(_) | Self::Arg(_) => false, 23 | } 24 | } 25 | 26 | fn is_64_bit(&self) -> u8 { 27 | if let Self::Bit64(_) = self { 28 | 1 29 | } else { 30 | 0 31 | } 32 | } 33 | 34 | fn get_register(&self) -> u8 { 35 | match self { 36 | Self::Bit32(r) | Self::Bit64(r) => *r, 37 | 38 | Self::Spilled(_) => panic!("Spilled values are not registers!"), 39 | Self::Arg(_) => panic!("Argument values are not registers!"), 40 | } 41 | } 42 | } 43 | 44 | #[derive(Debug, Copy, Clone, PartialEq, Eq)] 45 | enum Register { 46 | Rax, // scratch and return register 47 | Rcx, 48 | Rdx, 49 | Rbx, 50 | Rsp, 51 | Rbp, 52 | Rsi, 53 | Rdi, 54 | R8, 55 | R9, 56 | R10, 57 | R11, 58 | R12, 59 | R13, 60 | R14, 61 | R15, 62 | Spilled(usize), 63 | Arg(usize), 64 | } 65 | 66 | impl Register { 67 | fn convert_arg_register_id(id: usize) -> Register { 68 | use Register::*; 69 | 70 | match id { 71 | 0 => Rdi, 72 | 1 => Rsi, 73 | 2 => Rdx, 74 | 3 => Rcx, 75 | 4 => R8, 76 | 5 => R9, 77 | _ => Arg(id - ARG_REGISTER_COUNT), 78 | } 79 | } 80 | 81 | fn convert_nonarg_register_id(id: usize) -> Register { 82 | use Register::*; 83 | 84 | match id { 85 | 0 => Rbx, 86 | 1 => Rdx, 87 | 2 => R10, 88 | 3 => R11, 89 | 4 => R12, 90 | 5 => R13, 91 | 6 => R14, 92 | 7 => R15, 93 | _ => Spilled(id - NONARG_REGISTER_COUNT), 94 | } 95 | } 96 | 97 | fn revert_to_nonarg_register_id(&self) -> usize { 98 | use Register::*; 99 | 100 | match self { 101 | Rbx => 0, 102 | Rdx => 1, 103 | R10 => 2, 104 | R11 => 3, 105 | R12 => 4, 106 | R13 => 5, 107 | R14 => 6, 108 | R15 => 7, 109 | Spilled(id) => id + NONARG_REGISTER_COUNT, 110 | _ => panic!("Arguments are not not arguments!"), 111 | } 112 | } 113 | 114 | fn is_callee_saved(&self) -> bool { 115 | use Register::*; 116 | matches!(self, Rbx | Rsp | Rbp | R12 | R13 | R14 | R15) 117 | } 118 | 119 | fn convert_to_instr_arg(&self) -> InstructionRegister { 120 | use InstructionRegister as IR; 121 | use Register::*; 122 | 123 | match self { 124 | Rax => IR::Bit32(0), 125 | Rcx => IR::Bit32(1), 126 | Rdx => IR::Bit32(2), 127 | Rbx => IR::Bit32(3), 128 | Rsp => IR::Bit32(4), 129 | Rbp => IR::Bit32(5), 130 | Rsi => IR::Bit32(6), 131 | Rdi => IR::Bit32(7), 132 | R8 => IR::Bit64(0), 133 | R9 => IR::Bit64(1), 134 | R10 => IR::Bit64(2), 135 | R11 => IR::Bit64(3), 136 | R12 => IR::Bit64(4), 137 | R13 => IR::Bit64(5), 138 | R14 => IR::Bit64(6), 139 | R15 => IR::Bit64(7), 140 | Spilled(s) => IR::Spilled(*s), 141 | Arg(s) => IR::Arg(*s), 142 | } 143 | } 144 | } 145 | 146 | fn generate_mov( 147 | code: &mut GeneratedCode, 148 | dest: Register, 149 | source: Register, 150 | stack_allocated_local_count: &mut usize, 151 | ) { 152 | let dest_location = dest.convert_to_instr_arg(); 153 | let source_location = source.convert_to_instr_arg(); 154 | 155 | match (dest_location.is_register(), source_location.is_register()) { 156 | (true, true) => { 157 | // mov dest_reg, source_reg 158 | code.data 159 | .push(0x48 | dest_location.is_64_bit() | (source_location.is_64_bit() << 2)); 160 | code.data.push(0x89); 161 | code.data 162 | .push(0xc0 | dest_location.get_register() | (source_location.get_register() << 3)); 163 | } 164 | 165 | (true, false) => { 166 | // mov dest_reg, [rbp +- offset] 167 | code.data.push(0x48 | (dest_location.is_64_bit() << 2)); 168 | code.data.push(0x8b); 169 | code.data.push(0x85 | (dest_location.get_register() << 3)); 170 | 171 | let offset = if let InstructionRegister::Arg(a) = source_location { 172 | (a as u32 + 2) * 8 173 | } else if let InstructionRegister::Spilled(s) = source_location { 174 | (-(s as i32 + 1) * 8) as u32 175 | } else { 176 | unreachable!(); 177 | }; 178 | 179 | code.data.push((offset & 0xff) as u8); 180 | code.data.push(((offset >> 8) & 0xff) as u8); 181 | code.data.push(((offset >> 16) & 0xff) as u8); 182 | code.data.push(((offset >> 24) & 0xff) as u8); 183 | } 184 | 185 | (false, true) => { 186 | let offset = if let InstructionRegister::Arg(a) = dest_location { 187 | (a as u32 + 2) * 8 188 | } else if let InstructionRegister::Spilled(s) = dest_location { 189 | if s <= *stack_allocated_local_count { 190 | // push reg 191 | *stack_allocated_local_count += 1; 192 | if source_location.is_64_bit() != 0 { 193 | code.data.push(0x41); 194 | } 195 | code.data.push(0x50 | source_location.get_register()); 196 | return; 197 | } 198 | (-(s as i32 + 1) * 8) as u32 199 | } else { 200 | unreachable!(); 201 | }; 202 | 203 | // mov [rbp +- offset], source_reg 204 | code.data.push(0x48 | (source_location.is_64_bit() << 2)); 205 | code.data.push(0x89); 206 | code.data.push(0x85 | (source_location.get_register() << 3)); 207 | code.data.push((offset & 0xff) as u8); 208 | code.data.push(((offset >> 8) & 0xff) as u8); 209 | code.data.push(((offset >> 16) & 0xff) as u8); 210 | code.data.push(((offset >> 24) & 0xff) as u8); 211 | } 212 | 213 | (false, false) => { 214 | // mov rax, [rbp +- offset] 215 | generate_mov(code, Register::Rax, source, stack_allocated_local_count); 216 | 217 | // mov [rbp +- offset], rax 218 | generate_mov(code, dest, Register::Rax, stack_allocated_local_count); 219 | } 220 | } 221 | } 222 | 223 | fn generate_lea( 224 | code: &mut GeneratedCode, 225 | dest: Register, 226 | source: &str, 227 | stack_allocated_local_count: &mut usize, 228 | ) { 229 | let dest_location = dest.convert_to_instr_arg(); 230 | if dest_location.is_register() { 231 | code.data.push(0x48 | (dest_location.is_64_bit() << 2)); 232 | code.data.push(0x8d); 233 | code.data.push(0x05 | (dest_location.get_register() << 3)); 234 | code.func_refs.insert(code.data.len(), source.to_owned()); 235 | code.data.push(0x01); 236 | code.data.push(0x00); 237 | code.data.push(0x00); 238 | code.data.push(0x00); 239 | } else { 240 | code.data.push(0x48); 241 | code.data.push(0x8d); 242 | code.data.push(0x05); 243 | code.func_refs.insert(code.data.len(), source.to_owned()); 244 | code.data.push(0x01); 245 | code.data.push(0x00); 246 | code.data.push(0x00); 247 | code.data.push(0x00); 248 | generate_mov(code, dest, Register::Rax, stack_allocated_local_count); 249 | } 250 | } 251 | 252 | /// Generates the _start function, which calls main and the exit syscall. 253 | pub fn generate_start_func(code: &mut GeneratedCode) { 254 | code.func_addrs 255 | .insert(String::from("_start"), code.len()..code.len() + 1); 256 | code.func_addrs.insert(String::from("exit"), 0..0); 257 | 258 | // call main 259 | code.data.push(0xe8); 260 | code.func_refs.insert(code.len(), String::from("main")); 261 | code.data.push(0x10); 262 | code.data.push(0x00); 263 | code.data.push(0x00); 264 | code.data.push(0x00); 265 | 266 | // mov rdi, rax 267 | code.data.push(0x48); 268 | code.data.push(0x89); 269 | code.data.push(0xc7); 270 | 271 | // call exit 272 | code.data.push(0xe8); 273 | code.func_refs.insert(code.len(), String::from("exit")); 274 | code.data.push(0x00); 275 | code.data.push(0x00); 276 | code.data.push(0x00); 277 | code.data.push(0x00); 278 | 279 | code.func_addrs.get_mut("_start").unwrap().end = code.len(); 280 | } 281 | 282 | /// Transforms an IrModule into x86 machine code. 283 | pub fn generate_code(module: &mut IrModule) -> GeneratedCode { 284 | let mut code = GeneratedCode::new(); 285 | 286 | for func in module.funcs.iter_mut() { 287 | backends::linear_scan(func, NONARG_REGISTER_COUNT); 288 | } 289 | 290 | for func in module.funcs.iter() { 291 | // Add padding 292 | while code.data.len() % 16 != 0 { 293 | code.data.push(0); 294 | } 295 | 296 | // Add function 297 | code.func_addrs 298 | .insert(func.name.clone(), code.len()..code.len() + 1); 299 | 300 | // Offset by 1 301 | code.data.push(0x00); 302 | 303 | // Argument count 304 | code.data.push((func.argc & 0xff) as u8); 305 | code.data.push(((func.argc >> 8) & 0xff) as u8); 306 | code.data.push(((func.argc >> 16) & 0xff) as u8); 307 | code.data.push(((func.argc >> 24) & 0xff) as u8); 308 | 309 | // Padding 310 | while code.data.len() % 16 != 0 { 311 | code.data.push(0); 312 | } 313 | 314 | // push rbp 315 | code.data.push(0x55); 316 | let mut stack_allocated_local_count = 0usize; 317 | 318 | // mov rbp, rsp 319 | generate_mov( 320 | &mut code, 321 | Register::Rbp, 322 | Register::Rsp, 323 | &mut stack_allocated_local_count, 324 | ); 325 | 326 | let mut used_registers = HashSet::new(); 327 | for ssa in func.ssas.iter() { 328 | if ssa.local.is_some() 329 | && Register::convert_nonarg_register_id(ssa.local_register).is_callee_saved() 330 | && !used_registers.contains(&ssa.local_register) 331 | { 332 | used_registers.insert(ssa.local_register); 333 | } 334 | } 335 | 336 | // Push used registers 337 | let used_registers: Vec<_> = used_registers.into_iter().collect(); 338 | for register in used_registers.iter() { 339 | let register = Register::convert_nonarg_register_id(*register).convert_to_instr_arg(); 340 | if register.is_64_bit() != 0 { 341 | code.data.push(0x41); 342 | } 343 | code.data.push(0x50 | register.get_register()); 344 | } 345 | 346 | let mut local_to_register = HashMap::new(); 347 | let mut register_lifetimes = vec![0; NONARG_REGISTER_COUNT]; 348 | for ssa in func.ssas.iter() { 349 | for lifetime in register_lifetimes.iter_mut() { 350 | if *lifetime != 0 { 351 | *lifetime -= 1; 352 | } 353 | } 354 | 355 | if let Some(local) = ssa.local { 356 | let register = Register::convert_nonarg_register_id(ssa.local_register); 357 | 358 | if register_lifetimes.len() < ssa.local_register { 359 | register_lifetimes[ssa.local_register] = ssa.local_lifetime; 360 | } else { 361 | register_lifetimes.push(ssa.local_lifetime); 362 | } 363 | 364 | local_to_register.insert(local, register); 365 | } 366 | 367 | match ssa.instr { 368 | IrInstruction::Ret => { 369 | if let Some(IrArgument::Local(arg)) = ssa.args.first() { 370 | let register = local_to_register.get(arg).unwrap(); 371 | generate_mov( 372 | &mut code, 373 | Register::Rax, 374 | *register, 375 | &mut stack_allocated_local_count, 376 | ); 377 | } 378 | 379 | // Pop used registers 380 | for register in used_registers.iter().rev() { 381 | let register = 382 | Register::convert_nonarg_register_id(*register).convert_to_instr_arg(); 383 | if register.is_64_bit() != 0 { 384 | code.data.push(0x41); 385 | } 386 | code.data.push(0x58 | register.get_register()); 387 | } 388 | 389 | // mov rsp, rbp 390 | generate_mov( 391 | &mut code, 392 | Register::Rsp, 393 | Register::Rbp, 394 | &mut stack_allocated_local_count, 395 | ); 396 | 397 | // pop rbp 398 | code.data.push(0x5d); 399 | 400 | // ret 401 | code.data.push(0xc3); 402 | } 403 | 404 | IrInstruction::Load => { 405 | if let Some(local) = ssa.local { 406 | let local_reg = *local_to_register.get(&local).unwrap(); 407 | 408 | match ssa.args.first() { 409 | Some(IrArgument::Argument(arg)) => { 410 | // mov local, [rbp + offset] 411 | generate_mov( 412 | &mut code, 413 | local_reg, 414 | Register::convert_arg_register_id(*arg), 415 | &mut stack_allocated_local_count, 416 | ); 417 | } 418 | 419 | Some(IrArgument::Function(func)) => { 420 | generate_lea( 421 | &mut code, 422 | local_reg, 423 | func, 424 | &mut stack_allocated_local_count, 425 | ); 426 | } 427 | 428 | _ => (), 429 | } 430 | } 431 | } 432 | 433 | IrInstruction::Apply => { 434 | let f = ssa.args.first().unwrap(); 435 | match f { 436 | // TODO: prove or disprove this 437 | IrArgument::Local(_) => { 438 | unreachable!("Locals are either called or applied earlier") 439 | } 440 | 441 | IrArgument::Argument(_) => { 442 | unreachable!("Arguments are called with unknown arity") 443 | } 444 | 445 | IrArgument::Function(f) => { 446 | for arg in ssa.args.iter().rev() { 447 | match arg { 448 | IrArgument::Local(local) => { 449 | let local_reg = local_to_register.get(&local).unwrap(); 450 | let local_location = local_reg.convert_to_instr_arg(); 451 | if local_location.is_register() { 452 | // push local 453 | if local_location.is_64_bit() != 0 { 454 | code.data.push(0x41); 455 | } 456 | code.data.push(0x50 | local_location.get_register()); 457 | } else { 458 | // mov rax, [rbp - offset] 459 | generate_mov( 460 | &mut code, 461 | Register::Rax, 462 | *local_reg, 463 | &mut stack_allocated_local_count, 464 | ); 465 | 466 | // push rax 467 | code.data.push(0x50); 468 | } 469 | } 470 | 471 | IrArgument::Argument(arg) => { 472 | // mov rax, arg 473 | generate_mov( 474 | &mut code, 475 | Register::Rax, 476 | Register::convert_arg_register_id(*arg), 477 | &mut stack_allocated_local_count, 478 | ); 479 | 480 | // push rax 481 | code.data.push(0x50); 482 | } 483 | 484 | IrArgument::Function(f) => { 485 | // lea rax, [rel func] 486 | generate_lea( 487 | &mut code, 488 | Register::Rax, 489 | f, 490 | &mut stack_allocated_local_count, 491 | ); 492 | 493 | // push rax 494 | code.data.push(0x50); 495 | } 496 | } 497 | } 498 | 499 | // mov rax, rsp 500 | generate_mov( 501 | &mut code, 502 | Register::Rax, 503 | Register::Rsp, 504 | &mut stack_allocated_local_count, 505 | ); 506 | 507 | // Push arguments 508 | for i in 0..func.argc { 509 | let reg = 510 | Register::convert_arg_register_id(i).convert_to_instr_arg(); 511 | if !reg.is_register() { 512 | break; 513 | } 514 | 515 | if reg.is_64_bit() != 0 { 516 | code.data.push(0x41); 517 | } 518 | 519 | code.data.push(0x50 | reg.get_register()); 520 | } 521 | 522 | // mov rdi, rax 523 | generate_mov( 524 | &mut code, 525 | Register::Rdi, 526 | Register::Rax, 527 | &mut stack_allocated_local_count, 528 | ); 529 | 530 | // mov rsi, len 531 | let len = ssa.args.len() * 8; 532 | code.data.push(0xbe); 533 | code.data.push((len & 0xff) as u8); 534 | code.data.push(((len >> 8) & 0xff) as u8); 535 | code.data.push(((len >> 16) & 0xff) as u8); 536 | code.data.push(((len >> 24) & 0xff) as u8); 537 | 538 | // mov rdx, size 539 | let mut size = 0; 540 | for func in module.funcs.iter() { 541 | if &func.name == f { 542 | size = (func.argc + 1) * 8; 543 | break; 544 | } 545 | } 546 | code.data.push(0xba); 547 | code.data.push((size & 0xff) as u8); 548 | code.data.push(((size >> 8) & 0xff) as u8); 549 | code.data.push(((size >> 16) & 0xff) as u8); 550 | code.data.push(((size >> 24) & 0xff) as u8); 551 | 552 | // call rccopy 553 | code.data.push(0xe8); 554 | code.func_refs 555 | .insert(code.data.len(), String::from("rccopy")); 556 | if !code.func_addrs.contains_key("rccopy") { 557 | code.func_addrs.insert(String::from("rccopy"), 0..0); 558 | } 559 | code.data.push(0x00); 560 | code.data.push(0x00); 561 | code.data.push(0x00); 562 | code.data.push(0x00); 563 | 564 | // Pop original arguments 565 | for i in 0..func.argc { 566 | let reg = 567 | Register::convert_arg_register_id(i).convert_to_instr_arg(); 568 | if !reg.is_register() { 569 | break; 570 | } 571 | 572 | if reg.is_64_bit() != 0 { 573 | code.data.push(0x41); 574 | } 575 | 576 | code.data.push(0x58 | reg.get_register()); 577 | } 578 | 579 | // sub rsp, len 580 | code.data.push(0x48); 581 | code.data.push(0x81); 582 | code.data.push(0xec); 583 | code.data.push((len & 0xff) as u8); 584 | code.data.push(((len >> 8) & 0xff) as u8); 585 | code.data.push(((len >> 16) & 0xff) as u8); 586 | code.data.push(((len >> 24) & 0xff) as u8); 587 | 588 | if let Some(local) = ssa.local { 589 | // mov local, rax 590 | generate_mov( 591 | &mut code, 592 | *local_to_register.get(&local).unwrap(), 593 | Register::Rax, 594 | &mut stack_allocated_local_count, 595 | ); 596 | } 597 | } 598 | } 599 | } 600 | 601 | IrInstruction::Call(known_arity) => { 602 | if register_lifetimes[Register::R11.revert_to_nonarg_register_id()] != 0 { 603 | // push r11 604 | code.data.push(0x41); 605 | code.data.push(0x53); 606 | } 607 | 608 | // Push arguments 609 | for i in 0..func.argc { 610 | let reg = Register::convert_arg_register_id(i).convert_to_instr_arg(); 611 | if !reg.is_register() { 612 | break; 613 | } 614 | 615 | if reg.is_64_bit() != 0 { 616 | code.data.push(0x41); 617 | } 618 | 619 | code.data.push(0x50 | reg.get_register()); 620 | } 621 | 622 | if known_arity { 623 | // First 6 arguments are stored in registers 624 | for (i, arg) in ssa.args.iter().skip(1).enumerate() { 625 | let arg_reg = Register::convert_arg_register_id(i); 626 | 627 | match arg { 628 | IrArgument::Local(local) => { 629 | let local_reg = *local_to_register.get(local).unwrap(); 630 | 631 | // mov arg, local 632 | generate_mov( 633 | &mut code, 634 | arg_reg, 635 | local_reg, 636 | &mut stack_allocated_local_count, 637 | ); 638 | } 639 | 640 | IrArgument::Argument(arg) => { 641 | let local_reg = Register::convert_arg_register_id(*arg); 642 | 643 | // mov arg, local 644 | generate_mov( 645 | &mut code, 646 | arg_reg, 647 | local_reg, 648 | &mut stack_allocated_local_count, 649 | ); 650 | } 651 | 652 | IrArgument::Function(func) => { 653 | // lea arg, [rel func] 654 | generate_lea( 655 | &mut code, 656 | arg_reg, 657 | func, 658 | &mut stack_allocated_local_count, 659 | ); 660 | } 661 | } 662 | 663 | if i == ARG_REGISTER_COUNT - 1 { 664 | break; 665 | } 666 | } 667 | 668 | // Rest of the arguments are stored on the stack 669 | for arg in ssa.args.iter().skip(ARG_REGISTER_COUNT + 1).rev() { 670 | match arg { 671 | IrArgument::Local(local) => { 672 | let local_reg = *local_to_register.get(local).unwrap(); 673 | let local_location = local_reg.convert_to_instr_arg(); 674 | 675 | if local_location.is_register() { 676 | // push local 677 | if local_location.is_64_bit() != 0 { 678 | code.data.push(0x41); 679 | } 680 | code.data.push(0x50 | local_location.get_register()); 681 | } else { 682 | // mov rax, [rbp - offset] 683 | generate_mov( 684 | &mut code, 685 | Register::Rax, 686 | local_reg, 687 | &mut stack_allocated_local_count, 688 | ); 689 | 690 | // push rax 691 | code.data.push(0x50); 692 | } 693 | } 694 | 695 | IrArgument::Argument(_) => todo!(), 696 | 697 | IrArgument::Function(func) => { 698 | // lea rax, [rel func] 699 | generate_lea( 700 | &mut code, 701 | Register::Rax, 702 | func, 703 | &mut stack_allocated_local_count, 704 | ); 705 | 706 | // push rax 707 | code.data.push(0x50); 708 | } 709 | } 710 | } 711 | 712 | match ssa.args.first().unwrap() { 713 | IrArgument::Local(_) => todo!(), 714 | IrArgument::Argument(_) => todo!(), 715 | 716 | IrArgument::Function(func) => { 717 | // call func 718 | code.data.push(0xe8); 719 | 720 | // Insert the label 721 | code.func_refs.insert(code.data.len(), func.clone()); 722 | 723 | // Value 724 | code.data.push(0x10); 725 | code.data.push(0x00); 726 | code.data.push(0x00); 727 | code.data.push(0x00); 728 | } 729 | } 730 | } else { 731 | for arg in ssa.args.iter().skip(1).rev() { 732 | match arg { 733 | IrArgument::Local(local) => { 734 | // mov rax, local 735 | generate_mov( 736 | &mut code, 737 | Register::Rax, 738 | *local_to_register.get(local).unwrap(), 739 | &mut stack_allocated_local_count, 740 | ); 741 | } 742 | 743 | IrArgument::Argument(arg) => { 744 | // mov rax, arg 745 | generate_mov( 746 | &mut code, 747 | Register::Rax, 748 | Register::convert_arg_register_id(*arg), 749 | &mut stack_allocated_local_count, 750 | ); 751 | } 752 | 753 | IrArgument::Function(func) => { 754 | // lea rax, [rel func] 755 | generate_lea( 756 | &mut code, 757 | Register::Rax, 758 | func, 759 | &mut stack_allocated_local_count, 760 | ); 761 | } 762 | } 763 | 764 | // push rax 765 | code.data.push(0x50); 766 | } 767 | 768 | // mov rsi, called_argc 769 | let called_argc = ssa.args.len() - 1; 770 | code.data.push(0xbe); 771 | code.data.push((called_argc & 0xff) as u8); 772 | code.data.push(((called_argc >> 8) & 0xff) as u8); 773 | code.data.push(((called_argc >> 16) & 0xff) as u8); 774 | code.data.push(((called_argc >> 24) & 0xff) as u8); 775 | 776 | match ssa.args.first().unwrap() { 777 | IrArgument::Local(local) => { 778 | // mov rdi, local 779 | generate_mov( 780 | &mut code, 781 | Register::Rdi, 782 | *local_to_register.get(local).unwrap(), 783 | &mut stack_allocated_local_count, 784 | ); 785 | } 786 | 787 | IrArgument::Argument(arg) => { 788 | // mov rdi, arg 789 | generate_mov( 790 | &mut code, 791 | Register::Rdi, 792 | Register::convert_arg_register_id(*arg), 793 | &mut stack_allocated_local_count, 794 | ); 795 | } 796 | 797 | IrArgument::Function(func) => { 798 | // lea rdi, [rel func] 799 | generate_lea( 800 | &mut code, 801 | Register::Rdi, 802 | func, 803 | &mut stack_allocated_local_count, 804 | ); 805 | } 806 | } 807 | 808 | // mov rdx, rsp 809 | generate_mov( 810 | &mut code, 811 | Register::Rdx, 812 | Register::Rsp, 813 | &mut stack_allocated_local_count, 814 | ); 815 | 816 | // call call_unknown_arity 817 | code.data.push(0xe8); 818 | code.func_refs 819 | .insert(code.data.len(), String::from("call_unknown_arity")); 820 | if !code.func_addrs.contains_key("call_unknown_arity") { 821 | code.func_addrs 822 | .insert(String::from("call_unknown_arity"), 0..0); 823 | } 824 | code.data.push(0x00); 825 | code.data.push(0x00); 826 | code.data.push(0x00); 827 | code.data.push(0x00); 828 | } 829 | 830 | // Pop arguments passed into the function and arguments saved 831 | let mut pop_count = ssa.args.len() - 1; 832 | if known_arity { 833 | if pop_count > ARG_REGISTER_COUNT { 834 | pop_count -= ARG_REGISTER_COUNT; 835 | } else { 836 | pop_count = 0; 837 | } 838 | } 839 | pop_count *= 8; 840 | if pop_count != 0 { 841 | // add rsp, pop_count 842 | code.data.push(0x48); 843 | code.data.push(0x81); 844 | code.data.push(0xc4); 845 | code.data.push((pop_count & 0xff) as u8); 846 | code.data.push(((pop_count >> 8) & 0xff) as u8); 847 | code.data.push(((pop_count >> 16) & 0xff) as u8); 848 | code.data.push(((pop_count >> 24) & 0xff) as u8); 849 | } 850 | 851 | // Pop original arguments 852 | for i in (0..func.argc).rev() { 853 | let reg = Register::convert_arg_register_id(i).convert_to_instr_arg(); 854 | if !reg.is_register() { 855 | continue; 856 | } 857 | 858 | if reg.is_64_bit() != 0 { 859 | code.data.push(0x41); 860 | } 861 | 862 | code.data.push(0x58 | reg.get_register()); 863 | } 864 | 865 | if register_lifetimes[Register::R11.revert_to_nonarg_register_id()] != 0 { 866 | // pop r11 867 | code.data.push(0x41); 868 | code.data.push(0x5b); 869 | } 870 | 871 | if let Some(local) = ssa.local { 872 | // mov local, rax 873 | let local_reg = Register::convert_nonarg_register_id(local); 874 | generate_mov( 875 | &mut code, 876 | local_reg, 877 | Register::Rax, 878 | &mut stack_allocated_local_count, 879 | ); 880 | } 881 | } 882 | 883 | IrInstruction::RcInc => { 884 | let mut register = Register::Rax; 885 | match ssa.args.first().unwrap() { 886 | IrArgument::Local(local) => { 887 | register = *local_to_register.get(local).unwrap(); 888 | } 889 | 890 | IrArgument::Argument(arg) => { 891 | register = Register::convert_arg_register_id(*arg); 892 | } 893 | 894 | IrArgument::Function(_) => (), 895 | } 896 | 897 | if !matches!(register, Register::Rax) { 898 | // mov rax, register 899 | generate_mov( 900 | &mut code, 901 | Register::Rax, 902 | register, 903 | &mut stack_allocated_local_count, 904 | ); 905 | 906 | // test al, 0x1 907 | code.data.push(0xa8); 908 | code.data.push(0x01); 909 | 910 | // jne rip+4 911 | code.data.push(0x75); 912 | code.data.push(0x04); 913 | 914 | // add dword ptr [rax - 8], 0x1 915 | code.data.push(0x83); 916 | code.data.push(0x40); 917 | code.data.push(0xf8); 918 | code.data.push(0x01); 919 | } 920 | } 921 | 922 | IrInstruction::RcFuncFree => { 923 | if !matches!(ssa.args.first().unwrap(), IrArgument::Function(_)) { 924 | // Push arguments 925 | for i in 0..func.argc { 926 | let reg = Register::convert_arg_register_id(i).convert_to_instr_arg(); 927 | if !reg.is_register() { 928 | break; 929 | } 930 | 931 | if reg.is_64_bit() != 0 { 932 | code.data.push(0x41); 933 | } 934 | 935 | code.data.push(0x50 | reg.get_register()); 936 | } 937 | 938 | let register; 939 | match ssa.args.first().unwrap() { 940 | IrArgument::Local(local) => { 941 | register = *local_to_register.get(local).unwrap(); 942 | } 943 | 944 | IrArgument::Argument(arg) => { 945 | register = Register::convert_arg_register_id(*arg); 946 | } 947 | 948 | &IrArgument::Function(_) => unreachable!(), 949 | } 950 | 951 | // mov rdi, register 952 | generate_mov( 953 | &mut code, 954 | Register::Rdi, 955 | register, 956 | &mut stack_allocated_local_count, 957 | ); 958 | 959 | // call rcfuncfree 960 | code.data.push(0xe8); 961 | code.func_refs 962 | .insert(code.data.len(), String::from("rcfuncfree")); 963 | if !code.func_addrs.contains_key("rcfuncfree") { 964 | code.func_addrs.insert(String::from("rcfuncfree"), 0..0); 965 | } 966 | code.data.push(0x00); 967 | code.data.push(0x00); 968 | code.data.push(0x00); 969 | code.data.push(0x00); 970 | 971 | // Pop arguments 972 | for i in (0..func.argc).rev() { 973 | let reg = Register::convert_arg_register_id(i).convert_to_instr_arg(); 974 | if !reg.is_register() { 975 | continue; 976 | } 977 | 978 | if reg.is_64_bit() != 0 { 979 | code.data.push(0x41); 980 | } 981 | 982 | code.data.push(0x50 | reg.get_register()); 983 | } 984 | } 985 | } 986 | } 987 | } 988 | code.func_addrs.get_mut(&func.name).unwrap().end = code.len(); 989 | } 990 | 991 | code 992 | } 993 | 994 | /// Relocates all function addresses to their offset. 995 | pub fn relocate(code: &mut GeneratedCode) { 996 | for (code_addr, func) in code.func_refs.iter() { 997 | if let Some(range) = code.func_addrs.get(func) { 998 | let addr = ((range.start as i32 - *code_addr as i32) as i64 999 | + unsafe { *(code.data.as_ptr().add(*code_addr) as *const i32) } as i64 1000 | - 4) as u64; 1001 | 1002 | for (i, byte) in code.data.iter_mut().skip(*code_addr).enumerate() { 1003 | if i >= 4 { 1004 | break; 1005 | } 1006 | 1007 | *byte = ((addr >> (i * 8)) & 0xff) as u8; 1008 | } 1009 | } 1010 | } 1011 | } 1012 | -------------------------------------------------------------------------------- /src/frontend/parser.rs: -------------------------------------------------------------------------------- 1 | use logos::{Lexer, Logos, Span}; 2 | 3 | // convert_chars(&str) -> String 4 | // Converts escaped characters into an unescaped string. 5 | fn convert_chars(s: &str, off: usize) -> String { 6 | let mut iter = s[off..s.len() - off].chars(); 7 | let mut s = String::new(); 8 | 9 | while let Some(c) = iter.next() { 10 | if c == '\\' { 11 | match iter.next().unwrap() { 12 | '\\' => s.push('\\'), 13 | '\"' => s.push('\"'), 14 | '\'' => s.push('\''), 15 | 'n' => s.push('\n'), 16 | 'r' => s.push('\r'), 17 | 't' => s.push('\t'), 18 | '0' => s.push('\0'), 19 | c => { 20 | s.push('\\'); 21 | s.push(c) 22 | } 23 | }; 24 | } else { 25 | s.push(c); 26 | } 27 | } 28 | 29 | s 30 | } 31 | 32 | // The tokens parsed by the lexer. 33 | #[derive(Logos, PartialEq, Debug, Clone)] 34 | pub enum Token { 35 | // Brackets 36 | #[token("(")] 37 | LParen, 38 | 39 | #[token(")")] 40 | RParen, 41 | 42 | #[token("[")] 43 | LBrack, 44 | 45 | #[token("]")] 46 | RBrack, 47 | 48 | #[token("{")] 49 | LBrace, 50 | 51 | #[token("}")] 52 | RBrace, 53 | 54 | // Whitespace 55 | #[token("\n")] 56 | Newline, 57 | 58 | #[regex(r"([ \t\f\r]|\\\n)+", logos::skip)] 59 | Whitespace, 60 | 61 | #[regex(r"#[^\n]*", logos::skip)] 62 | #[regex(r"\{-([^-]*-+)+\}", logos::skip)] 63 | Comment, 64 | 65 | // Error 66 | #[error] 67 | Error, 68 | 69 | // Punctuation and symbols 70 | #[token(":")] 71 | Colon, 72 | 73 | #[token("::")] 74 | ColonColon, 75 | 76 | #[token(",")] 77 | Comma, 78 | 79 | #[token("\\")] 80 | Backslash, 81 | 82 | #[token(".")] 83 | Dot, 84 | 85 | #[token("$")] 86 | Dollar, 87 | 88 | #[token(";")] 89 | Semicolon, 90 | 91 | #[token("|")] 92 | Bar, 93 | 94 | #[token("=")] 95 | Assign, 96 | 97 | #[regex(r"(;~!\$%\^&\*\-\+|\./\?)+")] 98 | Operator, 99 | 100 | // Numbers 101 | #[regex(r"[0-9]+", |lex| lex.slice().parse())] 102 | #[regex(r"0x[0-9a-fA-F]+", |lex| i64::from_str_radix(&lex.slice()[2..], 16))] 103 | #[regex(r"0b[01]+", |lex| i64::from_str_radix(&lex.slice()[2..], 2))] 104 | Int(i64), 105 | 106 | #[regex(r"[0-9]+(\.[0-9]*([eE][+-]?[0-9]+)?|[eE][+-]?[0-9]+)", |lex| lex.slice().parse())] 107 | Float(f64), 108 | 109 | #[regex(r"[0-9]+u", |lex| { 110 | let v = lex.slice(); 111 | v[..v.len() - 1].parse() 112 | })] 113 | #[regex(r"[a-fA-F0-9]+h", |lex| { 114 | let v = lex.slice(); 115 | u64::from_str_radix(&v[..v.len() - 1], 16) 116 | })] 117 | #[regex(r"[01]+b", |lex| { 118 | let v = lex.slice(); 119 | u64::from_str_radix(&v[..v.len() - 1], 2) 120 | })] 121 | Word(u64), 122 | 123 | #[regex(r#"'([^\\']|\\[nrt'"0])'"#, |lex| convert_chars(lex.slice(), 1).bytes().next().unwrap())] 124 | Char(u8), 125 | 126 | #[regex(r"'[a-zA-Z_0-9]+", |lex| lex.slice()[1..].to_owned())] 127 | Generic(String), 128 | 129 | // Symbols (variables and stuff) 130 | #[regex(r"[a-zA-Z_][a-zA-Z0-9_']*")] 131 | Symbol, 132 | 133 | // Annotations 134 | #[regex(r"@[a-z_]+")] 135 | Annotation, 136 | 137 | // Strings 138 | #[regex(r#""([^\\"]|\\.)*""#, |lex| convert_chars(lex.slice(), 1))] 139 | #[regex(r##"#"([^"]|"[^#])*"#"##, |lex| convert_chars(lex.slice(), 2))] 140 | String(String), 141 | 142 | // Arrows 143 | #[token("+>")] 144 | PlusArrow, 145 | 146 | #[token("->")] 147 | RightArrow, 148 | 149 | #[token("=>")] 150 | ThiccArrow, 151 | 152 | // Keywords 153 | #[token("let")] 154 | Let, 155 | 156 | #[token("in")] 157 | In, 158 | 159 | #[token("import")] 160 | Import, 161 | 162 | #[token("module")] 163 | Module, 164 | 165 | #[token("extern")] 166 | Extern, 167 | 168 | #[token("type")] 169 | Type, 170 | 171 | #[token("ptr")] 172 | Pointer, 173 | 174 | #[token("match")] 175 | Match, 176 | 177 | #[token("to")] 178 | To, 179 | 180 | Unreachable, 181 | } 182 | 183 | // Represents a parser. 184 | struct Parser<'a> { 185 | // The lexer the parser uses internally. 186 | lexer: Lexer<'a, Token>, 187 | 188 | // The tokens already parsed 189 | tokens: Vec<(Token, Span)>, 190 | 191 | // The current position of the parser. 192 | token_pos: usize, 193 | } 194 | 195 | impl<'a> Parser<'a> { 196 | // new(&str) -> Parser 197 | // Creates a new parser 198 | fn new(s: &str) -> Parser { 199 | Parser { 200 | lexer: Token::lexer(s), 201 | tokens: vec![], 202 | token_pos: 0, 203 | } 204 | } 205 | 206 | // next(&mut self) -> Option<&(Token, Span)> 207 | // Gets the next token. 208 | fn next(&mut self) -> Option<&(Token, Span)> { 209 | // Get token from list of already parsed tokens if it exists 210 | if self.token_pos < self.tokens.len() { 211 | let token = &self.tokens[self.token_pos]; 212 | self.token_pos += 1; 213 | Some(token) 214 | 215 | // Otherwise get token from the lexer 216 | } else { 217 | self.tokens.push((self.lexer.next()?, self.lexer.span())); 218 | self.token_pos += 1; 219 | self.tokens.last() 220 | } 221 | } 222 | 223 | // peek(&mut self) -> Option<&(Token, Span)> 224 | // Peeks at the next token. 225 | fn peek(&mut self) -> Option<(&Token, Span)> { 226 | // Get token from list of already parsed tokens if it exists 227 | if self.token_pos < self.tokens.len() { 228 | let token = &self.tokens[self.token_pos]; 229 | Some((&token.0, token.1.clone())) 230 | 231 | // Otherwise get token from lexer 232 | } else { 233 | self.tokens.push((self.lexer.next()?, self.lexer.span())); 234 | let token = self.tokens.last()?; 235 | Some((&token.0, token.1.clone())) 236 | } 237 | } 238 | 239 | // slice(&self) -> String 240 | // Returns the slice corresponding to the current token. 241 | fn slice(&mut self) -> String { 242 | if self.token_pos >= self.tokens.len() { 243 | self.peek(); 244 | } 245 | 246 | if self.token_pos < self.tokens.len() { 247 | let range = &self.tokens[self.token_pos].1; 248 | String::from(&self.lexer.source()[range.start..range.end]) 249 | } else { 250 | String::with_capacity(0) 251 | } 252 | } 253 | 254 | // span(&self) -> Span 255 | // Returns the current span. 256 | fn span(&mut self) -> Span { 257 | if let Some((_, s)) = self.peek() { 258 | s 259 | } else { 260 | self.lexer.span() 261 | } 262 | } 263 | 264 | // save_state(&self) -> usize 265 | // Saves the current token position by returning it. 266 | fn save_state(&self) -> usize { 267 | self.token_pos 268 | } 269 | 270 | // return_state(&mut self, usize) -> () 271 | // Returns to a given state. 272 | fn return_state(&mut self, state: usize) { 273 | self.token_pos = state; 274 | } 275 | } 276 | 277 | #[derive(Debug, PartialEq, Clone)] 278 | pub enum Ast { 279 | Empty, 280 | 281 | // Numbers 282 | Int(Span, i64), 283 | Float(Span, f64), 284 | Word(Span, u64), 285 | Char(Span, u8), 286 | 287 | // String 288 | String(Span, String), 289 | 290 | // Symbol (variables and stuff) 291 | Symbol(Span, String), 292 | 293 | // Generic type ('a) 294 | Generic(Span, String), 295 | 296 | // Enum (ie, atoms) 297 | Enum(Span, String), 298 | 299 | // Annotations (@pure, @impure, @memoize, etc) 300 | Annotation(Span, String), 301 | 302 | // Lists 303 | List(Span, Vec), 304 | 305 | // Function Application 306 | Application(Span, Box, Vec), 307 | 308 | // Prefix expressions 309 | Prefix(Span, String, Box), 310 | 311 | // Infix expressions 312 | Infix(Span, String, Box, Box), 313 | 314 | // Casting 315 | As(Span, Box, Box), 316 | 317 | // Assignments 318 | Assign(Span, String, Box), 319 | 320 | // Assignments with types 321 | AssignTyped(Span, String, Box, Box), 322 | 323 | // Assignment of types 324 | AssignType(Span, String, Box), 325 | 326 | // Assignment of functions 327 | AssignFunction(Span, String, Vec<(String, Ast)>, Box), 328 | 329 | // Lambda functions 330 | Lambda(Span, Vec<(String, Ast)>, Box), 331 | 332 | // Match expressions 333 | Match(Span, Box, Vec<(Ast, Ast)>), 334 | 335 | // Scoping 336 | With(Span, Vec, Box), 337 | Walrus(Span, String, Box), 338 | 339 | // Imports 340 | Import(Span, Box, Vec), 341 | QualifiedImport(Span, Box, String), 342 | 343 | // Header 344 | Header(Span, Box, Vec<(Span, String, Ast)>, Vec), 345 | LibHeader(Span, Box, Vec<(Span, String, usize, bool, Ast)>), 346 | 347 | // External functions 348 | Extern(Span, String, String, Box), 349 | } 350 | 351 | impl Ast { 352 | pub fn get_span(&self) -> Span { 353 | match self { 354 | Self::Int(s, _) 355 | | Self::Float(s, _) 356 | | Self::Word(s, _) 357 | | Self::Char(s, _) 358 | | Self::String(s, _) 359 | | Self::List(s, _) 360 | | Self::Symbol(s, _) 361 | | Self::Generic(s, _) 362 | | Self::Enum(s, _) 363 | | Self::Annotation(s, _) 364 | | Self::Application(s, _, _) 365 | | Self::Prefix(s, _, _) 366 | | Self::Infix(s, _, _, _) 367 | | Self::As(s, _, _) 368 | | Self::Assign(s, _, _) 369 | | Self::AssignTyped(s, _, _, _) 370 | | Self::AssignType(s, _, _) 371 | | Self::AssignFunction(s, _, _, _) 372 | | Self::Match(s, _, _) 373 | | Self::Lambda(s, _, _) 374 | | Self::With(s, _, _) 375 | | Self::Walrus(s, _, _) 376 | | Self::Import(s, _, _) 377 | | Self::QualifiedImport(s, _, _) 378 | | Self::Header(s, _, _, _) 379 | | Self::LibHeader(s, _, _) 380 | | Self::Extern(s, _, _, _) => s.clone(), 381 | 382 | Self::Empty => panic!("uwu moment"), 383 | } 384 | } 385 | } 386 | 387 | #[derive(Debug)] 388 | pub struct ParseError { 389 | pub span: Span, 390 | pub msg: String, 391 | fatal: bool, 392 | } 393 | 394 | impl ParseError { 395 | // empty() -> Result 396 | // Creates an empty ParseError. 397 | fn empty() -> Result { 398 | Err(ParseError { 399 | span: Span { start: 0, end: 0 }, 400 | msg: String::with_capacity(0), 401 | fatal: false, 402 | }) 403 | } 404 | } 405 | 406 | // call_func(ident, ident, ident) -> Result 407 | // Calls a function and returns if an error was encountered. 408 | macro_rules! call_func { 409 | ($func: ident, $parser: ident, $state: ident) => { 410 | match $func($parser) { 411 | Ok(v) => v, 412 | Err(e) => { 413 | $parser.return_state($state); 414 | return Err(e); 415 | } 416 | } 417 | }; 418 | } 419 | 420 | // call_func_fatal(ident, ident, literal, literal, expr*) -> Result 421 | // Calls a function and returns a fatal error if unsuccessful. 422 | macro_rules! call_func_fatal 423 | { 424 | ($func: ident, $parser: ident, $format: literal $(,$vs: expr),*) => { 425 | match $func($parser) 426 | { 427 | Ok(v) => v, 428 | Err(e) if e.fatal => return Err(e), 429 | Err(_) => return Err(ParseError { 430 | span: $parser.span(), 431 | msg: format!($format $(,$vs),*), 432 | fatal: true 433 | }) 434 | } 435 | } 436 | } 437 | 438 | // call_optional(ident, ident) => Result 439 | // Calls a function and only returns if a fatal error is encountered. 440 | macro_rules! call_optional { 441 | ($func: ident, $parser: ident) => { 442 | match $func($parser) { 443 | Ok(v) => Ok(v), 444 | Err(e) if e.fatal => return Err(e), 445 | Err(e) => Err(e), 446 | } 447 | }; 448 | } 449 | 450 | // consume_nosave(ident, ident, ident, literal, literal, literal, expr*) -> Result 451 | // Consumes a token without saving it, returning if an error was encountered. 452 | macro_rules! consume_nosave 453 | { 454 | ($parser: ident, $token: ident, $state: ident, $fatal: literal, $format: literal $(,$vs: expr),*) => { 455 | match $parser.peek() 456 | { 457 | Some((Token::$token, _)) => { 458 | $parser.next(); 459 | } 460 | 461 | _ => { 462 | let span = $parser.span(); 463 | $parser.return_state($state); 464 | return Err(ParseError { 465 | span, 466 | msg: format!($format $(,$vs),*), 467 | fatal: $fatal 468 | }); 469 | } 470 | } 471 | } 472 | } 473 | 474 | // consume_save(ident, ident, ident, literal, literal, literal, expr*) -> Result 475 | // Consumes a token and saves it, returning if an error was encountered. 476 | macro_rules! consume_save 477 | { 478 | ($parser: ident, $token: ident, $state: ident, $fatal: literal, $format: literal $(,$vs: expr),*) => { 479 | match $parser.peek() 480 | { 481 | Some((Token::$token, s)) => { 482 | let v = ($parser.slice(), s); 483 | $parser.next(); 484 | v 485 | } 486 | 487 | _ => { 488 | let span = $parser.span(); 489 | $parser.return_state($state); 490 | return Err(ParseError { 491 | span, 492 | msg: format!($format $(,$vs),*), 493 | fatal: $fatal 494 | }) 495 | } 496 | }; 497 | } 498 | } 499 | 500 | // infixl_op(ident, ident, pat, pat) -> Result 501 | // Parses a left associative infix operator. 502 | macro_rules! infixl_op { 503 | ($parser: ident, $subfunc: ident, $op1: pat, $op2: pat) => {{ 504 | // Set up 505 | let state = $parser.save_state(); 506 | let mut left = call_func!($subfunc, $parser, state); 507 | 508 | loop { 509 | // Save current state 510 | let state2 = $parser.save_state(); 511 | newline($parser); 512 | 513 | // Check for operator 514 | if let Some(op) = $parser.peek() { 515 | // Get operator 516 | let op = match op.0 { 517 | $op1 | $op2 => String::from($parser.slice()), 518 | _ => { 519 | $parser.return_state(state2); 520 | break; 521 | } 522 | }; 523 | $parser.next(); 524 | newline($parser); 525 | 526 | // Get right hand side 527 | let right = 528 | call_func_fatal!($subfunc, $parser, "Expected value after infix operator"); 529 | 530 | // Build ast 531 | left = Ast::Infix( 532 | Span { 533 | start: left.get_span().start, 534 | end: right.get_span().end, 535 | }, 536 | op, 537 | Box::new(left), 538 | Box::new(right), 539 | ); 540 | 541 | // If there's no operator, break 542 | } else { 543 | break; 544 | } 545 | } 546 | 547 | Ok(left) 548 | }}; 549 | } 550 | 551 | // infixl_op(ident, ident, pat, pat) -> Result 552 | // Parses a right associative infix operator. 553 | macro_rules! infixr_op { 554 | ($parser: ident, $subfunc: ident, $op1: pat, $op2: pat) => {{ 555 | // Set up 556 | use std::mem::swap; 557 | let state = $parser.save_state(); 558 | let mut top = call_func!($subfunc, $parser, state); 559 | let mut acc = &mut top; 560 | let mut first = true; 561 | let mut last = Span { start: 0, end: 0 }; 562 | 563 | loop { 564 | // Save current state 565 | let state2 = $parser.save_state(); 566 | newline($parser); 567 | 568 | // Check for operator 569 | if let Some(op) = $parser.peek() { 570 | // Get operator 571 | let op = match op.0 { 572 | $op1 | $op2 => String::from($parser.slice()), 573 | _ => { 574 | $parser.return_state(state2); 575 | break; 576 | } 577 | }; 578 | $parser.next(); 579 | newline($parser); 580 | 581 | // Get right hand side 582 | let right = 583 | call_func_fatal!($subfunc, $parser, "Expected value after infix operator"); 584 | last = right.get_span(); 585 | 586 | #[allow(unused_assignments)] 587 | if first { 588 | let mut t1 = Ast::Empty; 589 | let mut t2 = Ast::Empty; 590 | acc = &mut t1; 591 | swap(&mut t2, &mut top); 592 | top = Ast::Infix( 593 | Span { 594 | start: t2.get_span().start, 595 | end: right.get_span().end, 596 | }, 597 | op, 598 | Box::new(t2), 599 | Box::new(right), 600 | ); 601 | first = false; 602 | acc = &mut top; 603 | } else { 604 | let mut t = Ast::Empty; 605 | if let Ast::Infix(_, _, _, r) = acc { 606 | let r1 = &mut **r; 607 | swap(r1, &mut t); 608 | let ast = Ast::Infix( 609 | Span { 610 | start: t.get_span().start, 611 | end: right.get_span().end, 612 | }, 613 | op, 614 | Box::new(t), 615 | Box::new(right), 616 | ); 617 | *r1 = ast; 618 | acc = r1; 619 | } 620 | } 621 | 622 | // If there's no operator, break 623 | } else { 624 | break; 625 | } 626 | } 627 | 628 | acc = &mut top; 629 | if !first { 630 | while let Ast::Infix(s, _, _, r) = acc { 631 | s.end = last.end; 632 | acc = r; 633 | } 634 | } 635 | 636 | Ok(top) 637 | }}; 638 | } 639 | 640 | // newline(&mut Parser) -> () 641 | // Optionally parses newlines. 642 | fn newline(parser: &mut Parser) { 643 | while let Some((Token::Newline, _)) = parser.peek() { 644 | parser.next(); 645 | } 646 | } 647 | 648 | // symbol(&mut Parser) -> Result 649 | // Parses a symbol. 650 | fn symbol(parser: &mut Parser) -> Result { 651 | let state = parser.save_state(); 652 | let (token, span) = consume_save!(parser, Symbol, state, false, ""); 653 | Ok(Ast::Symbol(span, token)) 654 | } 655 | 656 | // access_member(&mut Parser) -> Result 657 | // Parses accessing a member. 658 | fn access_member(parser: &mut Parser) -> Result { 659 | infixl_op!(parser, symbol, Token::ColonColon, Token::Unreachable) 660 | } 661 | 662 | // value(&mut Parser) -> Result 663 | // Gets the next value. 664 | fn value(parser: &mut Parser) -> Result { 665 | // Parse symbols/accessing members 666 | if let Ok(v) = call_optional!(access_member, parser) { 667 | return Ok(v); 668 | } 669 | 670 | // Get token 671 | let (token, _span) = match parser.peek() { 672 | Some(v) => v, 673 | None => return ParseError::empty(), 674 | }; 675 | 676 | /* 677 | // Check for int 678 | if let Token::Int(n) = token { 679 | let n = *n; 680 | parser.next(); 681 | Ok(Ast::Int(span, n)) 682 | 683 | // Check for float 684 | } else if let Token::Float(n) = token { 685 | let n = *n; 686 | parser.next(); 687 | Ok(Ast::Float(span, n)) 688 | 689 | // Check for word 690 | } else if let Token::Word(n) = token { 691 | let n = *n; 692 | parser.next(); 693 | Ok(Ast::Word(span, n)) 694 | 695 | // Check for char 696 | } else if let Token::Char(c) = token { 697 | let c = *c; 698 | parser.next(); 699 | Ok(Ast::Char(span, c)) 700 | 701 | // Check for string 702 | } else if let Token::String(s) = token { 703 | let s = s.clone(); 704 | parser.next(); 705 | Ok(Ast::String(span, s)) 706 | 707 | // Check for enum 708 | } else if let Token::Enum = token { 709 | let s = parser.span(); 710 | let state = parser.save_state(); 711 | parser.next(); 712 | let (t, s2) = consume_save!(parser, Symbol, state, true, ""); 713 | Ok(Ast::Enum( 714 | Span { 715 | start: s.start, 716 | end: s2.end, 717 | }, 718 | t, 719 | )) 720 | 721 | // True 722 | } else if let Token::True = token { 723 | parser.next(); 724 | Ok(Ast::True(span)) 725 | 726 | // False 727 | } else if let Token::False = token { 728 | parser.next(); 729 | Ok(Ast::False(span)) 730 | 731 | // Parenthesised expressions 732 | } else */ 733 | if let Token::LParen = token { 734 | // Get value 735 | let state = parser.save_state(); 736 | parser.next(); 737 | newline(parser); 738 | let value = match expression(parser) { 739 | Ok(v) => v, 740 | Err(e) => { 741 | parser.return_state(state); 742 | return Err(e); 743 | } 744 | }; 745 | 746 | // Get right parenthesis 747 | newline(parser); 748 | consume_nosave!(parser, RParen, state, true, ""); 749 | Ok(value) 750 | 751 | // Not a value 752 | } else { 753 | ParseError::empty() 754 | } 755 | } 756 | 757 | fn _as(parser: &mut Parser) -> Result { 758 | let value = value(parser)?; 759 | 760 | if let Some((Token::Colon, _)) = parser.peek() { 761 | parser.next(); 762 | let _type = call_func_fatal!(type_expr, parser, "Expected type after `:`"); 763 | 764 | Ok(Ast::As( 765 | Span { 766 | start: value.get_span().start, 767 | end: _type.get_span().end, 768 | }, 769 | Box::new(value), 770 | Box::new(_type), 771 | )) 772 | } else { 773 | Ok(value) 774 | } 775 | } 776 | 777 | // application(&mut Parser) -> Result 778 | // Parses function application. 779 | fn application(parser: &mut Parser) -> Result { 780 | let mut func = _as(parser)?; 781 | 782 | loop { 783 | let right = match _as(parser) { 784 | Ok(v) => v, 785 | Err(e) if e.fatal => break Err(e), 786 | Err(_) => break Ok(func), 787 | }; 788 | 789 | if let Ast::Application(s, _, v) = &mut func { 790 | s.end = right.get_span().end; 791 | v.push(right); 792 | } else { 793 | func = Ast::Application( 794 | Span { 795 | start: func.get_span().start, 796 | end: right.get_span().end, 797 | }, 798 | Box::new(func), 799 | vec![right], 800 | ); 801 | } 802 | } 803 | } 804 | 805 | // list(&mut Parser) -> Result 806 | // Parses a list. 807 | fn list(parser: &mut Parser) -> Result { 808 | let state = parser.save_state(); 809 | let (_, start) = consume_save!(parser, LBrack, state, false, ""); 810 | let mut list = vec![]; 811 | 812 | loop { 813 | if !list.is_empty() { 814 | match parser.peek() { 815 | Some((Token::Comma, _)) => { 816 | parser.next(); 817 | } 818 | 819 | _ => break, 820 | } 821 | } 822 | 823 | newline(parser); 824 | list.push(match expression(parser) { 825 | Ok(v) => v, 826 | Err(e) if e.fatal => return Err(e), 827 | Err(_) => break, 828 | }); 829 | } 830 | 831 | newline(parser); 832 | let (_, end) = consume_save!( 833 | parser, 834 | RBrack, 835 | state, 836 | true, 837 | "Expected `]` after end of list" 838 | ); 839 | 840 | Ok(Ast::List( 841 | Span { 842 | start: start.start, 843 | end: end.end, 844 | }, 845 | list, 846 | )) 847 | } 848 | 849 | // lambda(&mut Parser) -> Result 850 | // Parses a lambda function. 851 | fn lambda(parser: &mut Parser) -> Result { 852 | let state = parser.save_state(); 853 | let mut args = vec![]; 854 | let (_, span) = consume_save!(parser, Backslash, state, false, ""); 855 | 856 | // Get arguments 857 | loop { 858 | // Get comma 859 | if !args.is_empty() { 860 | match parser.peek() { 861 | Some((Token::Comma, _)) => { 862 | parser.next(); 863 | } 864 | 865 | _ => break, 866 | } 867 | } 868 | 869 | let arg = match declaration(parser) { 870 | Ok(v) => (v.1, v.2), 871 | Err(e) => { 872 | parser.return_state(state); 873 | return Err(e); 874 | } 875 | }; 876 | 877 | args.push(arg); 878 | } 879 | 880 | // Check that there is at least one argument 881 | if args.is_empty() { 882 | parser.return_state(state); 883 | return Err(ParseError { 884 | span: parser.span(), 885 | msg: String::from("Expected argument after `lambda`"), 886 | fatal: true, 887 | }); 888 | } 889 | 890 | // Get the assign operator 891 | let slice = parser.slice(); 892 | consume_nosave!(parser, Dot, state, true, "Expected `.`, got `{}`", slice); 893 | 894 | // Get the value 895 | newline(parser); 896 | let body = call_func_fatal!(apply_op, parser, "Expected function body after `=`"); 897 | 898 | Ok(Ast::Lambda( 899 | Span { 900 | start: span.start, 901 | end: body.get_span().end, 902 | }, 903 | args, 904 | Box::new(body), 905 | )) 906 | } 907 | 908 | // matchy(&mut Parser) -> Result 909 | // Parses a match expression. 910 | fn matchy(parser: &mut Parser) -> Result { 911 | let state = parser.save_state(); 912 | let (_, span) = consume_save!(parser, Match, state, false, ""); 913 | 914 | // Get value 915 | let value = call_func_fatal!(apply_op, parser, "Expected expression after `match`"); 916 | let mut arms = vec![]; 917 | newline(parser); 918 | 919 | while let Some((Token::To, _)) = parser.peek() { 920 | parser.next(); 921 | let _type = call_func_fatal!(type_expr, parser, "Expected type after `to`"); 922 | newline(parser); 923 | consume_nosave!(parser, ThiccArrow, state, true, "Expected `=>` after type"); 924 | newline(parser); 925 | let value = call_func_fatal!(apply_op, parser, "Expected expression after `=>`"); 926 | arms.push((_type, value)); 927 | newline(parser); 928 | } 929 | 930 | // Error if no match arms 931 | if arms.is_empty() { 932 | return Err(ParseError { 933 | span: Span { 934 | start: span.start, 935 | end: value.get_span().end, 936 | }, 937 | msg: String::from("Expected `to` after match value"), 938 | fatal: true, 939 | }); 940 | } 941 | 942 | Ok(Ast::Match( 943 | Span { 944 | start: span.start, 945 | end: arms.last().unwrap().1.get_span().end, 946 | }, 947 | Box::new(value), 948 | arms, 949 | )) 950 | } 951 | 952 | // expression_values(&mut Parser) -> Result 953 | // Parses an expression. 954 | fn expression_values(parser: &mut Parser) -> Result { 955 | if let Ok(withy) = call_optional!(with, parser) { 956 | Ok(withy) 957 | } else if let Ok(lambda) = call_optional!(lambda, parser) { 958 | Ok(lambda) 959 | } else if let Ok(list) = call_optional!(list, parser) { 960 | Ok(list) 961 | } else if let Ok(matchy) = call_optional!(matchy, parser) { 962 | Ok(matchy) 963 | } else { 964 | application(parser) 965 | } 966 | } 967 | 968 | // apply_op(&mut Parser) -> Result 969 | // Gets the next infix application. 970 | fn apply_op(parser: &mut Parser) -> Result { 971 | infixr_op!(parser, expression_values, Token::Dollar, Token::Unreachable) 972 | } 973 | 974 | // expression(&mut Parser) -> Result 975 | // Parses expressions chained by ;. 976 | fn expression(parser: &mut Parser) -> Result { 977 | infixr_op!(parser, apply_op, Token::Semicolon, Token::Unreachable) 978 | } 979 | 980 | // annotation(&mut Parser) -> Result 981 | // Parses an annotation. 982 | fn annotation(parser: &mut Parser) -> Result { 983 | let state = parser.save_state(); 984 | let (annotation, span) = consume_save!(parser, Annotation, state, false, ""); 985 | Ok(Ast::Annotation(span, annotation)) 986 | } 987 | 988 | // assignment_raw(&mut Parser) -> Result 989 | // Parses an assignment without any types or arguments. 990 | fn assignment_raw(parser: &mut Parser) -> Result { 991 | // Get the variable name 992 | let state = parser.save_state(); 993 | let (name, span) = consume_save!(parser, Symbol, state, false, ""); 994 | 995 | // Get the assign operator 996 | consume_nosave!(parser, Assign, state, false, ""); 997 | 998 | // Get the value 999 | newline(parser); 1000 | let value = call_func_fatal!(expression, parser, "Expected value after `=`"); 1001 | 1002 | Ok(Ast::Assign( 1003 | Span { 1004 | start: span.start, 1005 | end: value.get_span().end, 1006 | }, 1007 | name, 1008 | Box::new(value), 1009 | )) 1010 | } 1011 | 1012 | // type_symbol(&mut Parser) -> Result 1013 | // Parses a type symbol or parenthesised type. 1014 | fn type_symbol(parser: &mut Parser) -> Result { 1015 | let (token, span) = match parser.peek() { 1016 | Some(v) => v, 1017 | None => return ParseError::empty(), 1018 | }; 1019 | 1020 | // Symbols 1021 | if let Token::Symbol = token { 1022 | let value = Ast::Symbol(span, parser.slice()); 1023 | parser.next(); 1024 | Ok(value) 1025 | 1026 | // Generics 1027 | } else if let Token::Generic(v) = token { 1028 | let value = Ast::Generic(span, v.to_owned()); 1029 | parser.next(); 1030 | Ok(value) 1031 | 1032 | // Parenthesised types 1033 | } else if let Token::LParen = token { 1034 | // Get value 1035 | let state = parser.save_state(); 1036 | parser.next(); 1037 | newline(parser); 1038 | 1039 | let value = match type_expr(parser) { 1040 | Ok(v) => v, 1041 | Err(e) => { 1042 | parser.return_state(state); 1043 | return Err(e); 1044 | } 1045 | }; 1046 | 1047 | // Get right parenthesis 1048 | newline(parser); 1049 | consume_nosave!(parser, RParen, state, true, "Expected right parenthesis"); 1050 | Ok(value) 1051 | 1052 | // Not a value 1053 | } else { 1054 | ParseError::empty() 1055 | } 1056 | } 1057 | 1058 | // type_tagged(&mut Parser) -> Result 1059 | // Parses a tagged type (a: T). 1060 | fn type_tagged(parser: &mut Parser) -> Result { 1061 | let state = parser.save_state(); 1062 | let s = call_func!(symbol, parser, state); 1063 | consume_nosave!(parser, Colon, state, false, ""); 1064 | let t = call_func_fatal!(type_symbol, parser, "Expected type after `:`"); 1065 | 1066 | Ok(Ast::Infix( 1067 | Span { 1068 | start: s.get_span().start, 1069 | end: t.get_span().end, 1070 | }, 1071 | String::from(":"), 1072 | Box::new(s), 1073 | Box::new(t), 1074 | )) 1075 | } 1076 | 1077 | // type_field(&mut Parser) -> Result 1078 | // Parses a field of a union or product type. 1079 | fn type_field(parser: &mut Parser) -> Result { 1080 | if let Ok(v) = call_optional!(type_tagged, parser) { 1081 | Ok(v) 1082 | } else { 1083 | type_symbol(parser) 1084 | } 1085 | } 1086 | 1087 | // type_union(&mut Parser) -> Result 1088 | // Parses a union type declaration. 1089 | fn type_union(parser: &mut Parser) -> Result { 1090 | infixl_op!(parser, type_field, Token::Bar, Token::Unreachable) 1091 | } 1092 | 1093 | // type_expr(&mut Parser) -> Result 1094 | // Parses a type. 1095 | fn type_expr(parser: &mut Parser) -> Result { 1096 | infixr_op!(parser, type_union, Token::RightArrow, Token::Unreachable) 1097 | } 1098 | 1099 | // type_assignment(&mut Parser) -> Result 1100 | // Parses an assignment of a type. 1101 | fn type_assignment(parser: &mut Parser) -> Result { 1102 | // Get type keyword 1103 | let state = parser.save_state(); 1104 | let (_, span) = consume_save!(parser, Type, state, false, ""); 1105 | 1106 | // Get name of type 1107 | let (name, _) = consume_save!(parser, Symbol, state, true, "Expected symbol after type"); 1108 | 1109 | // Get assignment operator 1110 | consume_nosave!(parser, Assign, state, true, "Expected `=` after type name"); 1111 | newline(parser); 1112 | 1113 | // Get type 1114 | let _type = call_func_fatal!(type_expr, parser, "Expected type after `=`"); 1115 | 1116 | // Successfully return 1117 | Ok(Ast::AssignType( 1118 | Span { 1119 | start: span.start, 1120 | end: _type.get_span().end, 1121 | }, 1122 | name, 1123 | Box::new(_type), 1124 | )) 1125 | } 1126 | 1127 | // declaration(&mut Parser) -> Result<(Span, String, Ast), ParseError> 1128 | // Parses a declaration. 1129 | fn declaration(parser: &mut Parser) -> Result<(Span, String, Ast), ParseError> { 1130 | // Get the variable name 1131 | let state = parser.save_state(); 1132 | let (name, span) = consume_save!(parser, Symbol, state, false, ""); 1133 | 1134 | // Get the colon 1135 | consume_nosave!(parser, Colon, state, false, ""); 1136 | 1137 | // Get the type 1138 | let type_val = call_func_fatal!(type_expr, parser, "Expected type after `:`"); 1139 | 1140 | Ok((span, name, type_val)) 1141 | } 1142 | 1143 | // assignment_func(&mut Parser) -> Result 1144 | // Parses an assignment for a function. 1145 | fn assignment_func(parser: &mut Parser) -> Result { 1146 | // Get the variable name 1147 | let state = parser.save_state(); 1148 | let mut args = vec![]; 1149 | let (name, span) = consume_save!(parser, Symbol, state, false, ""); 1150 | 1151 | // Get arguments 1152 | loop { 1153 | // Get comma 1154 | if !args.is_empty() { 1155 | match parser.peek() { 1156 | Some((Token::Comma, _)) => { 1157 | parser.next(); 1158 | } 1159 | 1160 | _ => break, 1161 | } 1162 | } 1163 | 1164 | let arg = match declaration(parser) { 1165 | Ok(v) => (v.1, v.2), 1166 | Err(e) => { 1167 | parser.return_state(state); 1168 | return Err(e); 1169 | } 1170 | }; 1171 | 1172 | args.push(arg); 1173 | } 1174 | 1175 | // Check that there is at least one argument 1176 | if args.is_empty() { 1177 | parser.return_state(state); 1178 | return ParseError::empty(); 1179 | } 1180 | 1181 | // Get the assign operator 1182 | let slice = parser.slice(); 1183 | consume_nosave!(parser, Assign, state, true, "Expected `=`, got `{}`", slice); 1184 | 1185 | // Get the value 1186 | newline(parser); 1187 | let value = call_func_fatal!(expression, parser, "Expected function body after `=`"); 1188 | 1189 | Ok(Ast::AssignFunction( 1190 | Span { 1191 | start: span.start, 1192 | end: value.get_span().end, 1193 | }, 1194 | name, 1195 | args, 1196 | Box::new(value), 1197 | )) 1198 | } 1199 | 1200 | // assignment(&mut Parser) -> Result 1201 | // Parses an assignment. 1202 | fn assignment(parser: &mut Parser) -> Result { 1203 | if let Ok(typed) = call_optional!(assignment_raw, parser) { 1204 | Ok(typed) 1205 | } else { 1206 | assignment_func(parser) 1207 | } 1208 | } 1209 | 1210 | // with(&mut Parser) -> Result 1211 | // Parses a with expression (scoping). 1212 | fn with(parser: &mut Parser) -> Result { 1213 | // Get the with keyword 1214 | let state = parser.save_state(); 1215 | let span = parser.span(); 1216 | consume_nosave!(parser, Let, state, false, ""); 1217 | 1218 | // Get assignments 1219 | let mut assigns = vec![]; 1220 | loop { 1221 | let assign = match assignment(parser) { 1222 | Ok(v) => v, 1223 | Err(e) if e.fatal => return Err(e), 1224 | Err(_) => break, 1225 | }; 1226 | assigns.push(assign); 1227 | 1228 | // Newline 1229 | newline(parser); 1230 | } 1231 | 1232 | // Check that there is at least one assignment 1233 | if assigns.is_empty() { 1234 | parser.return_state(state); 1235 | return ParseError::empty(); 1236 | } 1237 | 1238 | // Get the body 1239 | consume_nosave!(parser, In, state, true, "Expected `in` after let bindings"); 1240 | newline(parser); 1241 | let body = call_func!(expression, parser, state); 1242 | 1243 | Ok(Ast::With( 1244 | Span { 1245 | start: span.start, 1246 | end: body.get_span().end, 1247 | }, 1248 | assigns, 1249 | Box::new(body), 1250 | )) 1251 | } 1252 | 1253 | /* 1254 | // import(&mut Parser) -> Result 1255 | // Parses an import statement. 1256 | fn import(parser: &mut Parser) -> Result { 1257 | let state = parser.save_state(); 1258 | let (_, span) = consume_save!(parser, Import, state, false, ""); 1259 | let start = span.start; 1260 | 1261 | let name = call_func_fatal!(access_member, parser, "Expected module name after `import`"); 1262 | let mut end = name.get_span().end; 1263 | 1264 | let qualified = !matches!( 1265 | parser.peek(), 1266 | Some((Token::LParen, _)) 1267 | ); 1268 | 1269 | if qualified { 1270 | let mut alias = String::with_capacity(0); 1271 | if let Some((Token::As, _)) = parser.peek() { 1272 | parser.next(); 1273 | let (a, s) = consume_save!(parser, Symbol, state, true, "Expected alias after `as`"); 1274 | end = s.end; 1275 | alias = a 1276 | } 1277 | 1278 | Ok(Ast::QualifiedImport( 1279 | Span { start, end }, 1280 | Box::new(name), 1281 | alias, 1282 | )) 1283 | } else { 1284 | let mut imports = vec![]; 1285 | parser.next(); 1286 | loop { 1287 | newline(parser); 1288 | if parser.peek().is_none() { 1289 | parser.return_state(state); 1290 | return Err(ParseError { 1291 | span: parser.span(), 1292 | msg: String::from( 1293 | "Expected imported item or right parenthesis, got end of file", 1294 | ), 1295 | fatal: true, 1296 | }); 1297 | } 1298 | 1299 | if imports.is_empty() { 1300 | if let Some((Token::Mul, _)) = parser.peek() { 1301 | parser.next(); 1302 | consume_nosave!(parser, RParen, state, true, "Expected right parenthesis"); 1303 | break; 1304 | } 1305 | } else { 1306 | match parser.peek() { 1307 | Some((Token::Comma, _)) => { 1308 | parser.next(); 1309 | }, 1310 | 1311 | Some((Token::RParen, _)) => { 1312 | parser.next(); 1313 | break; 1314 | } 1315 | 1316 | _ => { 1317 | return Err(ParseError { 1318 | span: parser.span(), 1319 | msg: String::from("Expected comma or right parenthesis"), 1320 | fatal: true, 1321 | }); 1322 | } 1323 | } 1324 | } 1325 | 1326 | let (token, span) = parser.peek().unwrap(); 1327 | end = span.end; 1328 | 1329 | match token { 1330 | Token::Symbol => imports.push(parser.slice()), 1331 | _ => { 1332 | parser.return_state(state); 1333 | return Err(ParseError { 1334 | span: parser.span(), 1335 | msg: String::from("Expected imported item"), 1336 | fatal: true, 1337 | }); 1338 | } 1339 | } 1340 | 1341 | parser.next(); 1342 | } 1343 | 1344 | parser.next(); 1345 | Ok(Ast::Import(Span { start, end }, Box::new(name), imports)) 1346 | } 1347 | } 1348 | 1349 | // header(&mut Parser) -> Result 1350 | // Parses a header entry. 1351 | fn header(parser: &mut Parser) -> Result { 1352 | let state = parser.save_state(); 1353 | let (_, span) = consume_save!(parser, Module, state, false, ""); 1354 | let start = span.start; 1355 | let name = call_func_fatal!(access_member, parser, "Expected module name after `module`"); 1356 | let mut end = name.get_span().end; 1357 | 1358 | let mut exports = vec![]; 1359 | newline(parser); 1360 | let mut comma = false; 1361 | if let Some((Token::LParen, _)) = parser.peek() { 1362 | parser.next(); 1363 | 1364 | loop { 1365 | newline(parser); 1366 | if comma { 1367 | match parser.peek() { 1368 | Some((Token::Comma, _)) => { 1369 | parser.next(); 1370 | } 1371 | Some((Token::RParen, _)) => break, 1372 | _ => (), 1373 | } 1374 | } else { 1375 | comma = true; 1376 | } 1377 | 1378 | newline(parser); 1379 | if parser.peek().is_none() { 1380 | let span = parser.span(); 1381 | parser.return_state(state); 1382 | return Err(ParseError { 1383 | span, 1384 | msg: String::from( 1385 | "Expected exported item or right parenthesis, got end of file", 1386 | ), 1387 | fatal: true, 1388 | }); 1389 | } 1390 | 1391 | let (token, span) = parser.peek().unwrap(); 1392 | end = span.end; 1393 | 1394 | match token { 1395 | Token::RParen => break, 1396 | Token::Symbol => exports.push(match declaration(parser) { 1397 | Ok(v) => v, 1398 | Err(e) => { 1399 | parser.return_state(state); 1400 | return Err(e); 1401 | } 1402 | }), 1403 | 1404 | Token::Type => { 1405 | parser.next(); 1406 | if let Some((Token::Symbol, _)) = parser.peek() { 1407 | exports.push((parser.span(), parser.slice(), Ast::Empty)); 1408 | parser.next(); 1409 | } else { 1410 | return Err(ParseError { 1411 | span: parser.span(), 1412 | msg: String::from("Expected type name after `type`"), 1413 | fatal: true, 1414 | }); 1415 | } 1416 | } 1417 | 1418 | _ => { 1419 | let span = parser.span(); 1420 | parser.return_state(state); 1421 | return Err(ParseError { 1422 | span, 1423 | msg: String::from("Expected exported item or right parenthesis"), 1424 | fatal: true, 1425 | }); 1426 | } 1427 | } 1428 | } 1429 | 1430 | parser.next(); 1431 | } 1432 | 1433 | newline(parser); 1434 | 1435 | let mut imports = vec![]; 1436 | while let Ok(v) = call_optional!(import, parser) { 1437 | imports.push(v); 1438 | newline(parser); 1439 | } 1440 | 1441 | Ok(Ast::Header( 1442 | Span { start, end }, 1443 | Box::new(name), 1444 | exports, 1445 | imports, 1446 | )) 1447 | } 1448 | 1449 | // externy(&mut Parser) -> Result 1450 | // Parses an external function declaration. 1451 | fn externy(parser: &mut Parser) -> Result { 1452 | let state = parser.save_state(); 1453 | consume_nosave!(parser, Extern, state, false, ""); 1454 | 1455 | let (c_func, s) = if let Some((Token::String(s), v)) = parser.peek() { 1456 | (s.clone(), v) 1457 | } else { 1458 | return Err(ParseError { 1459 | span: parser.span(), 1460 | msg: String::from("Expected string literal after `extern`"), 1461 | fatal: true, 1462 | }); 1463 | }; 1464 | 1465 | parser.next(); 1466 | newline(parser); 1467 | let (name, _) = consume_save!( 1468 | parser, 1469 | Symbol, 1470 | state, 1471 | true, 1472 | "Expected symbol after external function declaration" 1473 | ); 1474 | consume_nosave!( 1475 | parser, 1476 | Colon, 1477 | state, 1478 | true, 1479 | "Expected `:` after foreign function declaration" 1480 | ); 1481 | let _type = call_func_fatal!(type_expr, parser, "Expected type after `:`"); 1482 | 1483 | Ok(Ast::Extern( 1484 | Span { 1485 | start: s.start, 1486 | end: _type.get_span().end, 1487 | }, 1488 | c_func, 1489 | name, 1490 | Box::new(_type), 1491 | )) 1492 | } 1493 | */ 1494 | 1495 | // parse(&str) -> Result 1496 | // Parses curly code. 1497 | pub fn parse(s: &str) -> Result, ParseError> { 1498 | let mut parser = Parser::new(s); 1499 | let mut lines = vec![]; 1500 | let p = &mut parser; 1501 | 1502 | newline(p); 1503 | /* 1504 | if let Ok(header) = call_optional!(header, p) { 1505 | lines.push(header); 1506 | }*/ 1507 | 1508 | while p.peek().is_some() { 1509 | // Parse one line 1510 | if let Ok(annotation) = call_optional!(annotation, p) { 1511 | lines.push(annotation); 1512 | } else if let Ok(assign) = call_optional!(assignment, p) { 1513 | lines.push(assign); 1514 | } else { 1515 | lines.push(match type_assignment(p) { 1516 | Ok(v) => v, 1517 | Err(e) if e.fatal => return Err(e), 1518 | Err(_) => { 1519 | let peeked = if p.peek().is_some() { 1520 | p.slice() 1521 | } else { 1522 | String::from("eof") 1523 | }; 1524 | return Err(ParseError { 1525 | span: p.span(), 1526 | msg: format!("Unexpected `{}`", peeked), 1527 | fatal: true, 1528 | }); 1529 | } 1530 | }); 1531 | } 1532 | /* 1533 | } else if let Ok(_type) = call_optional!(type_assignment, p) { 1534 | lines.push(_type); 1535 | } else { 1536 | lines.push(match externy(p) { 1537 | Ok(v) => v, 1538 | Err(e) if e.fatal => return Err(e), 1539 | Err(_) => { 1540 | let peeked = if p.peek().is_some() { 1541 | p.slice() 1542 | } else { 1543 | String::from("eof") 1544 | }; 1545 | return Err(ParseError { 1546 | span: p.span(), 1547 | msg: format!("Unexpected `{}`", peeked), 1548 | fatal: true, 1549 | }); 1550 | } 1551 | }); 1552 | */ 1553 | 1554 | // Skip newlines 1555 | newline(p); 1556 | } 1557 | 1558 | Ok(lines) 1559 | } 1560 | --------------------------------------------------------------------------------