├── .github └── workflows │ ├── book.yml │ └── ci.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE-APACHE ├── LICENSE-MIT ├── LICENSE.txt ├── README.md ├── blockalloc ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE.txt └── src │ └── lib.rs ├── book.toml ├── booksrc ├── 404.md ├── SUMMARY.md ├── chapter-alignment.md ├── chapter-allocation-api.md ├── chapter-allocation-impl.md ├── chapter-blocks.md ├── chapter-interp-alloc.md ├── chapter-interp-arrays.md ├── chapter-interp-bytecode.md ├── chapter-interp-compiler-design.md ├── chapter-interp-compiler-impl.md ├── chapter-interp-dicts.md ├── chapter-interp-parsing.md ├── chapter-interp-symbols-and-pairs.md ├── chapter-interp-tagged-ptrs.md ├── chapter-interp-vm-design.md ├── chapter-interp-vm-impl.md ├── chapter-managing-blocks.md ├── chapter-simple-bump.md ├── chapter-what-is-alloc.md ├── evalrus-medium.png ├── img │ ├── alignment.png │ ├── fragmented_block.png │ └── stickyimmix_block.png ├── introduction.md ├── part-allocators.md ├── part-interpreter.md └── part-stickyimmix.md ├── interpreter ├── .gitignore ├── Cargo.toml ├── LICENSE.txt ├── README.md └── src │ ├── arena.rs │ ├── array.rs │ ├── bytecode.rs │ ├── compiler.rs │ ├── containers.rs │ ├── dict.rs │ ├── error.rs │ ├── function.rs │ ├── hashable.rs │ ├── headers.rs │ ├── lexer.rs │ ├── list.rs │ ├── main.rs │ ├── memory.rs │ ├── number.rs │ ├── pair.rs │ ├── parser.rs │ ├── pointerops.rs │ ├── printer.rs │ ├── rawarray.rs │ ├── repl.rs │ ├── safeptr.rs │ ├── symbol.rs │ ├── symbolmap.rs │ ├── taggedptr.rs │ ├── text.rs │ └── vm.rs └── stickyimmix ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE.txt ├── README.md └── src ├── allocator.rs ├── blockmeta.rs ├── bumpblock.rs ├── constants.rs ├── heap.rs ├── lib.rs └── rawptr.rs /.github/workflows/book.yml: -------------------------------------------------------------------------------- 1 | name: github pages 2 | 3 | on: [push] 4 | 5 | jobs: 6 | deploy: 7 | runs-on: ubuntu-20.04 8 | steps: 9 | - uses: actions/checkout@v2 10 | 11 | - name: Setup mdBook 12 | uses: peaceiris/actions-mdbook@v1 13 | with: 14 | mdbook-version: '0.4.5' 15 | # mdbook-version: 'latest' 16 | 17 | - run: mdbook build 18 | 19 | - name: Deploy 20 | uses: peaceiris/actions-gh-pages@v3 21 | with: 22 | deploy_key: ${{ secrets.ACTIONS_DEPLOY_KEY }} 23 | publish_dir: ./book 24 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | on: [push, pull_request] 2 | # push: 3 | # branches: [master] 4 | # pull_request: 5 | # branches: [master] 6 | 7 | name: code-test 8 | 9 | jobs: 10 | test: 11 | name: Test Suite 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout sources 15 | uses: actions/checkout@v2 16 | 17 | - name: Install stable toolchain 18 | uses: actions-rs/toolchain@v1 19 | with: 20 | profile: minimal 21 | toolchain: stable 22 | override: true 23 | 24 | - name: Cargo fmt check blockalloc 25 | working-directory: ./blockalloc 26 | run: cargo fmt --all -- --check 27 | 28 | - name: Cargo fmt check stickyimmix 29 | working-directory: ./stickyimmix 30 | run: cargo fmt --all -- --check 31 | 32 | - name: Cargo fmt check interpreter 33 | working-directory: ./interpreter 34 | run: cargo fmt --all -- --check 35 | 36 | - name: Cargo test blockalloc 37 | working-directory: ./blockalloc 38 | run: cargo test 39 | 40 | - name: Cargo test stickyimmix 41 | working-directory: ./stickyimmix 42 | run: cargo test 43 | 44 | - name: Cargo test interpreter 45 | working-directory: ./interpreter 46 | run: cargo test 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | book 2 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | We'd love your help! 4 | 5 | See list of [issues under discussion](https://github.com/rust-hosted-langs/runtimes-WG/labels/book), 6 | add to these discussion, suggest chapters, content. 7 | 8 | Submit a PR for anything from spelling and grammar to content accuracy and 9 | correctness to entire chapter content! Feel free to add yourself to the 10 | `contributors.txt` list. 11 | 12 | 13 | ## Licensing 14 | 15 | ### Book text 16 | 17 | Text in this repository is under the CC-BY 4.0 license - see `LICENSE.txt`. 18 | Copyrights retained by contributors. 19 | 20 | #### Contribution 21 | 22 | Unless you explicitly state otherwise, any book text contribution intentionally 23 | submitted for inclusion in the work by you shall be licensed as CC-BY 4.0 24 | without any additional terms or conditions. 25 | 26 | 27 | ### Code licenses 28 | 29 | Licensed under either of 30 | 31 | * Apache License, Version 2.0 32 | ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) 33 | * MIT license 34 | ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) 35 | 36 | at your option. 37 | 38 | #### Contribution 39 | 40 | Unless you explicitly state otherwise, any code contribution intentionally 41 | submitted for inclusion in the work by you, as defined in the Apache-2.0 42 | license, shall be dual licensed as above, without any additional terms or 43 | conditions. 44 | 45 | 46 | ## Code of Conduct 47 | 48 | All communication shall be conducted within the framework of the 49 | [Rust code of conduct](http://rust-lang.org/conduct.html). 50 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Peter Liniker 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Writing Interpreters in Rust: a Guide 2 | 3 | ![](https://github.com/rust-hosted-langs/book/workflows/code-test/badge.svg) 4 | 5 | This is an online book covering the lower level topics involved in writing an 6 | interpreter in Rust including: 7 | 8 | * memory management: allocation and garbage collection 9 | * compiling: expressions, functions, closures 10 | * virtual machines: bytecode, instruction dispatch 11 | 12 | 13 | ## Project vision 14 | 15 | From CPython to Ruby's YARV, V8 and SpiderMonkey, GHC to the JVM, most language 16 | runtimes are written in C/C++. 17 | 18 | We believe that Rust is eminently suitable for implementing languages and can 19 | provide significant productivity improvements over C and C++ while retaining 20 | the performance advantages and low level control of both. 21 | 22 | While there are a number of languages implemented in Rust available now, in 23 | varying states of completeness - interpreters, AOT compilers and 24 | JIT-compiled - our vision is singular: 25 | 26 | _To create a well documented reference compiler and runtime, 27 | permissively licensed, such that you can fork and morph it into your own 28 | programming language._ 29 | 30 | That is, a platform for bootstrapping other languages, written in Rust. 31 | To that end, the implementation provided here is not intended to be feature 32 | complete and cannot possibly represent every variation of programming 33 | language or local optimization. 34 | 35 | It is a lofty goal, and it certainly won't be the right approach for 36 | everybody. However, we hope it will help shift the landscape in favor of more 37 | memory-safe language implementations. 38 | 39 | 40 | ## Getting involved 41 | 42 | See `CONTRIBUTING.md` for licensing and how to get involved. 43 | 44 | 45 | ## The contents 46 | 47 | The rendered book can be read [here](https://rust-hosted-langs.github.io/book/) 48 | while the accompanying source code can be browsed in this repository. 49 | -------------------------------------------------------------------------------- /blockalloc/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /blockalloc/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | [[package]] 4 | name = "blockalloc" 5 | version = "0.1.0" 6 | 7 | -------------------------------------------------------------------------------- /blockalloc/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "blockalloc" 3 | version = "0.1.0" 4 | authors = ["Peter Liniker "] 5 | edition = "2018" 6 | license = "MIT OR Apache-2.0" 7 | 8 | -------------------------------------------------------------------------------- /blockalloc/src/lib.rs: -------------------------------------------------------------------------------- 1 | /// A block allocator for blocks of memory that must be: 2 | /// - powers of two in size 3 | /// - aligned to their size 4 | /// 5 | /// Internally this calls the stabilized std Alloc API. 6 | /// https://doc.rust-lang.org/std/alloc/index.html 7 | /// 8 | /// Usage: 9 | /// ``` 10 | /// extern crate blockalloc; 11 | /// use blockalloc::Block; 12 | /// 13 | /// let size = 4096; // must be a power of 2 14 | /// let block = Block::new(size).unwrap(); 15 | /// ``` 16 | /// 17 | /// Normal scoping rules will call Block::drop() when `block` goes out of scope 18 | /// causing the block to be fully deallocated. 19 | use std::ptr::NonNull; 20 | 21 | // ANCHOR: DefBlockComponents 22 | pub type BlockPtr = NonNull; 23 | pub type BlockSize = usize; 24 | // ANCHOR_END: DefBlockComponents 25 | 26 | /// Set of possible block allocation failures 27 | // ANCHOR: DefBlockError 28 | #[derive(Debug, PartialEq)] 29 | pub enum BlockError { 30 | /// Usually means requested block size, and therefore alignment, wasn't a 31 | /// power of two 32 | BadRequest, 33 | /// Insufficient memory, couldn't allocate a block 34 | OOM, 35 | } 36 | // ANCHOR_END: DefBlockError 37 | 38 | /// A block-size-aligned block of memory 39 | // ANCHOR: DefBlock 40 | pub struct Block { 41 | ptr: BlockPtr, 42 | size: BlockSize, 43 | } 44 | // ANCHOR_END: DefBlock 45 | 46 | impl Block { 47 | /// Instantiate a new block of the given size. Size must be a power of two. 48 | // ANCHOR: BlockNew 49 | pub fn new(size: BlockSize) -> Result { 50 | if !size.is_power_of_two() { 51 | return Err(BlockError::BadRequest); 52 | } 53 | 54 | Ok(Block { 55 | ptr: internal::alloc_block(size)?, 56 | size, 57 | }) 58 | } 59 | // ANCHOR_END: BlockNew 60 | 61 | /// Consume and return the pointer only 62 | pub fn into_mut_ptr(self) -> BlockPtr { 63 | self.ptr 64 | } 65 | 66 | /// Return the size in bytes of the block 67 | pub fn size(&self) -> BlockSize { 68 | self.size 69 | } 70 | 71 | /// Unsafely reassemble from pointer and size 72 | pub unsafe fn from_raw_parts(ptr: BlockPtr, size: BlockSize) -> Block { 73 | Block { ptr, size } 74 | } 75 | 76 | /// Return a bare pointer to the base of the block 77 | // ANCHOR: BlockAsPtr 78 | pub fn as_ptr(&self) -> *const u8 { 79 | self.ptr.as_ptr() 80 | } 81 | // ANCHOR_END: BlockAsPtr 82 | } 83 | 84 | impl Drop for Block { 85 | fn drop(&mut self) { 86 | internal::dealloc_block(self.ptr, self.size); 87 | } 88 | } 89 | 90 | mod internal { 91 | use super::{BlockError, BlockPtr, BlockSize}; 92 | use std::alloc::{alloc, dealloc, Layout}; 93 | use std::ptr::NonNull; 94 | 95 | // ANCHOR: AllocBlock 96 | pub fn alloc_block(size: BlockSize) -> Result { 97 | unsafe { 98 | let layout = Layout::from_size_align_unchecked(size, size); 99 | 100 | let ptr = alloc(layout); 101 | if ptr.is_null() { 102 | Err(BlockError::OOM) 103 | } else { 104 | Ok(NonNull::new_unchecked(ptr)) 105 | } 106 | } 107 | } 108 | // ANCHOR_END: AllocBlock 109 | 110 | // ANCHOR: DeallocBlock 111 | pub fn dealloc_block(ptr: BlockPtr, size: BlockSize) { 112 | unsafe { 113 | let layout = Layout::from_size_align_unchecked(size, size); 114 | 115 | dealloc(ptr.as_ptr(), layout); 116 | } 117 | } 118 | // ANCHOR_END: DeallocBlock 119 | } 120 | 121 | #[cfg(test)] 122 | mod tests { 123 | 124 | use crate::{Block, BlockError, BlockSize}; 125 | 126 | fn alloc_dealloc(size: BlockSize) -> Result<(), BlockError> { 127 | let block = Block::new(size)?; 128 | 129 | // ANCHOR: TestAllocPointer 130 | // the block address bitwise AND the alignment bits (size - 1) should 131 | // be a mutually exclusive set of bits 132 | let mask = size - 1; 133 | assert!((block.ptr.as_ptr() as usize & mask) ^ mask == mask); 134 | // ANCHOR_END: TestAllocPointer 135 | 136 | drop(block); 137 | Ok(()) 138 | } 139 | 140 | #[test] 141 | fn test_bad_sizealign() { 142 | assert!(alloc_dealloc(999) == Err(BlockError::BadRequest)) 143 | } 144 | 145 | #[test] 146 | fn test_4k() { 147 | assert!(alloc_dealloc(4096).is_ok()) 148 | } 149 | 150 | #[test] 151 | fn test_32k() { 152 | assert!(alloc_dealloc(32768).is_ok()) 153 | } 154 | 155 | #[test] 156 | fn test_16m() { 157 | assert!(alloc_dealloc(16 * 1024 * 1024).is_ok()) 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /book.toml: -------------------------------------------------------------------------------- 1 | [book] 2 | authors = ["Peter Liniker"] 3 | multilingual = false 4 | language = "en" 5 | src = "booksrc" 6 | title = "Writing Interpreters in Rust: a Guide" 7 | 8 | 9 | [preprocess.links] 10 | -------------------------------------------------------------------------------- /booksrc/404.md: -------------------------------------------------------------------------------- 1 | # 404 - this chapter has not yet been written 2 | -------------------------------------------------------------------------------- /booksrc/SUMMARY.md: -------------------------------------------------------------------------------- 1 | # Summary 2 | 3 | - [Introduction](./introduction.md) 4 | - [Allocation](./part-allocators.md) 5 | - [Alignment](./chapter-alignment.md) 6 | - [Obtaining blocks of memory](./chapter-blocks.md) 7 | - [The type of allocation](./chapter-what-is-alloc.md) 8 | - [An allocator: Sticky Immix](./part-stickyimmix.md) 9 | - [Bump allocation](./chapter-simple-bump.md) 10 | - [Allocating into multiple blocks](./chapter-managing-blocks.md) 11 | - [Defining the allocation API](./chapter-allocation-api.md) 12 | - [Implementing the API](./chapter-allocation-impl.md) 13 | - [An interpreter: Eval-rs](./part-interpreter.md) 14 | - [Allocating objects and dereferencing safely](./chapter-interp-alloc.md) 15 | - [Tagged pointers and object headers](./chapter-interp-tagged-ptrs.md) 16 | - [Symbols and Pairs](./chapter-interp-symbols-and-pairs.md) 17 | - [Parsing s-expressions](./chapter-interp-parsing.md) 18 | - [Arrays](./chapter-interp-arrays.md) 19 | - [Bytecode](./chapter-interp-bytecode.md) 20 | - [Dicts](./chapter-interp-dicts.md) 21 | - [Virtual Machine: Design](./chapter-interp-vm-design.md) 22 | - [Virtual Machine: Implementation](./chapter-interp-vm-impl.md) 23 | - [Compiler: Design](./chapter-interp-compiler-design.md) 24 | - [Compiler: Implementation](./chapter-interp-compiler-impl.md) 25 | - [Garbage collection](./404.md) 26 | - [TODO - Tracing](./404.md) 27 | - [TODO - Sweeping](./404.md) 28 | - [TODO - Recycling blocks](./404.md) 29 | -------------------------------------------------------------------------------- /booksrc/chapter-alignment.md: -------------------------------------------------------------------------------- 1 | # Alignment 2 | 3 | There are subtleties in memory access alignment: 4 | 5 | - Some hardware architectures and implementations may fault on unaligned 6 | memory access. 7 | - Atomic operations require word-aligned access. 8 | - SIMD operations typically require double-word-aligned access. 9 | - In practice on 64 bit architectures, allocators align objects to 8 byte 10 | boundaries for 64 bit objects and smaller and 16 byte boundaries for larger 11 | objects for performance optimization and the above reasons. 12 | 13 | Intel 32 and 64 bit x86 architectures allow general access to be unaligned but 14 | will probably incur an access penalty. The story on 32bit ARM and aarch64 is 15 | sufficiently similar but there is a higher chance that an ARM core is 16 | configured to raise a bus error on a misaligned access. 17 | 18 | Another very important factor is atomic memory operations. 19 | Atomic access works on a whole word basis - any unaligned access by nature 20 | cannot be guaranteed to be atomic as it will probably involve more than one 21 | access. To support atomic operations, alignment must be minmally on word 22 | boundaries. 23 | 24 | SIMD operations, tending to be 128 bits wide or higher, should be 25 | aligned to 16 byte boundaries for optimal code generation and performance. 26 | Unaligned loads and stores may be allowed but normally these incur 27 | performance penalties. 28 | 29 | ![Common alignments](img/alignment.png) 30 | 31 | While Intel allows unaligned access (that is, alignment on any byte boundary), 32 | the 33 | [recommended](https://software.intel.com/sites/default/files/managed/9e/bc/64-ia-32-architectures-optimization-manual.pdf?wapkw=248966) 34 | (see section 3.6.4) alignment for objects larger than 64 bits is to 16 byte 35 | boundaries. 36 | 37 | Apparently system `malloc()` implementations 38 | [tend to comply](http://www.erahm.org/2016/03/24/minimum-alignment-of-allocation-across-platforms/) 39 | with the 16 byte boundary. 40 | 41 | To verify the above, a rough test of both the system allocator and jemalloc 42 | on x86_64 by using `Box::new()` on a set of types (`u8`, `u16`, `u32`, `u64`, 43 | `String` and a larger `struct`) confirms a minimum of 8 byte alignment for 44 | anything word size or smaller and 16 byte alignment for everything bigger. 45 | Sample pointer printouts below are for jemalloc but Linux libc malloc produced 46 | the same pattern: 47 | 48 | ``` 49 | p=0x7fb78b421028 u8 50 | p=0x7fb78b421030 u16 51 | p=0x7fb78b421038 u32 52 | p=0x7fb78b421050 u64 53 | p=0x7fb78b420060 "spam" 54 | p=0x7fb78b4220f0 Hoge { y: 2, z: "ほげ", x: 1 } 55 | ``` 56 | 57 | Compare with `std::mem::align_of()` which, on x86_64 for example, 58 | returns alignment values: 59 | 60 | - `u8`: 1 byte 61 | - `u16`: 2 bytes 62 | - `u32`: 4 bytes 63 | - `u64`: 8 bytes 64 | - any bigger struct: 8 65 | 66 | Thus despite the value of `std::mem::align_of::()`, mature allocators will 67 | do what is most pragmatic and follow recommended practice in support of optimal 68 | performance. 69 | 70 | With all that in mind, to keep things simple, we'll align everything to a 71 | double-word boundaries. When we add in prepending an object header, the minimum 72 | memory required for an object will be two words anyway. 73 | 74 | Thus, the allocated size of an object will be calculated[^1] by 75 | 76 | ```rust,ignore 77 | let alignment = size_of::() * 2; 78 | // mask out the least significant bits that correspond to the alignment - 1 79 | // then add the full alignment 80 | let size = (size_of::() & !(alignment - 1)) + alignment; 81 | ``` 82 | 83 | ---- 84 | 85 | [^1]: For a more detailed explanation of alignment adjustment calculations, see 86 | [phil-opp](https://github.com/phil-opp)'s kernel 87 | [heap allocator](https://os.phil-opp.com/kernel-heap/#alignment). 88 | -------------------------------------------------------------------------------- /booksrc/chapter-allocation-impl.md: -------------------------------------------------------------------------------- 1 | # Implementing the Allocation API 2 | 3 | In this final chapter of the allocation part of the book, we'll cover the 4 | `AllocRaw` trait implementation. 5 | 6 | This trait is implemented on the `StickyImmixHeap` struct: 7 | 8 | ```rust,ignore 9 | impl AllocRaw for StickyImmixHeap { 10 | type Header = H; 11 | 12 | ... 13 | } 14 | ``` 15 | 16 | Here the associated header type is provided as the generic type `H`, leaving it 17 | up to the interpreter to define. 18 | 19 | ## Allocating objects 20 | 21 | The first function to implement is `AllocRaw::alloc()`. This function must: 22 | * calculate how much space in bytes is required by the object and header 23 | * allocate that space 24 | * instantiate an object header and write it to the first bytes of the space 25 | * copy the object itself to the remaining bytes of the space 26 | * return a pointer to where the object lives in this space 27 | 28 | Let's look at the implementation. 29 | 30 | ```rust,ignore 31 | impl AllocRaw for StickyImmixHeap { 32 | {{#include ../stickyimmix/src/heap.rs:DefAlloc}} 33 | } 34 | ``` 35 | 36 | This, hopefully, is easy enough to follow after the previous chapters - 37 | * `self.find_space()` is the function described in the chapter 38 | [Allocating into multiple blocks](./chapter-managing-blocks.md#allocating-into-the-head-block) 39 | * `Self::Header::new()` will be implemented by the interpreter 40 | * `write(space as *mut Self::Header, header)` calls the std function 41 | `std::ptr::write` 42 | 43 | ## Allocating arrays 44 | 45 | We need a similar (but awkwardly different enough) implementation for array 46 | allocation. The key differences are that the type is fixed to a `u8` pointer 47 | and the array is initialized to zero bytes. It is up to the interpreter to 48 | write into the array itself. 49 | 50 | ```rust,ignore 51 | impl AllocRaw for StickyImmixHeap { 52 | {{#include ../stickyimmix/src/heap.rs:DefAllocArray}} 53 | } 54 | ``` 55 | 56 | ## Switching between header and object 57 | 58 | As stated in the previous chapter, these functions are essentially pointer 59 | operations that do not dereference the pointers. Thus they are not unsafe 60 | to call, but the types they operate _on_ should have a suitably unsafe API. 61 | 62 | `NonNull` is the chosen parameter and return type and the pointer arithmetic 63 | for obtaining the header from an object pointer of unknown type is shown 64 | below. 65 | 66 | For our Immix implementation, since headers are placed immediately 67 | ahead of an object, we simply subtract the header size from the object 68 | pointer. 69 | 70 | ```rust,ignore 71 | impl AllocRaw for StickyImmixHeap { 72 | {{#include ../stickyimmix/src/heap.rs:DefGetHeader}} 73 | } 74 | ``` 75 | 76 | Getting the object from a header is the reverse - adding the header size 77 | to the header pointer results in the object pointer: 78 | 79 | ```rust,ignore 80 | impl AllocRaw for StickyImmixHeap { 81 | {{#include ../stickyimmix/src/heap.rs:DefGetObject}} 82 | } 83 | ``` 84 | 85 | ## Conclusion 86 | 87 | Thus ends the first part of our Immix implementation. In the next part of the 88 | book we will jump over the fence to the interpreter and begin using the 89 | interfaces we've defined in this part. 90 | -------------------------------------------------------------------------------- /booksrc/chapter-blocks.md: -------------------------------------------------------------------------------- 1 | # Obtaining Blocks of Memory 2 | 3 | When requesting blocks of memory at a time, one of the questions is *what 4 | is the desired block alignment?* 5 | 6 | * In deciding, one factor is that using an alignment that is a multiple of the 7 | page size can make it easier to return memory to the operating system. 8 | * Another factor is that if the block is aligned to it's size, it is fast to 9 | do bitwise arithmetic on a pointer to an object in a block to compute the 10 | block boundary and therefore the location of any block metadata. 11 | 12 | With both these in mind we'll look at how to allocate blocks that are 13 | aligned to the size of the block. 14 | 15 | 16 | ## A basic crate interface 17 | 18 | A block of memory is defined as a base address and a size, so we need a struct 19 | that contains these elements. 20 | 21 | To wrap the base address pointer, we'll use the recommended type for building 22 | collections, [`std::ptr::NonNull`](https://doc.rust-lang.org/std/ptr/struct.NonNull.html), 23 | which is available on stable. 24 | 25 | ```rust,ignore 26 | {{#include ../blockalloc/src/lib.rs:DefBlock}} 27 | ``` 28 | 29 | Where `BlockPtr` and `BlockSize` are defined as: 30 | 31 | ```rust,ignore 32 | {{#include ../blockalloc/src/lib.rs:DefBlockComponents}} 33 | ``` 34 | 35 | To obtain a `Block`, we'll create a `Block::new()` function which, along with 36 | `Block::drop()`, is implemented internally by wrapping the stabilized Rust alloc 37 | routines: 38 | 39 | ```rust,ignore 40 | {{#include ../blockalloc/src/lib.rs:BlockNew}} 41 | ``` 42 | 43 | Where parameter `size` must be a power of two, which is validated on the first 44 | line of the function. Requiring the block size to be a power of two means 45 | simple bit arithmetic can be used to find the beginning and end of a block in 46 | memory, if the block size is always the same. 47 | 48 | Errors take one of two forms, an invalid block-size or out-of-memory, both 49 | of which may be returned by `Block::new()`. 50 | 51 | ```rust,ignore 52 | {{#include ../blockalloc/src/lib.rs:DefBlockError}} 53 | ``` 54 | 55 | Now on to the platform-specific implementations. 56 | 57 | 58 | ## Custom aligned allocation on stable Rust 59 | 60 | On the stable rustc channel we have access to some features of the 61 | [Alloc](https://doc.rust-lang.org/std/alloc/index.html) API. 62 | 63 | This is the ideal option since it abstracts platform specifics for us, we do 64 | not need to write different code for Unix and Windows ourselves. 65 | 66 | Fortunately there is enough stable functionality to 67 | fully implement what we need. 68 | 69 | With an appropriate underlying implementation this code should compile and 70 | execute for any target. The allocation function, implemented in the `internal` 71 | mod, reads: 72 | 73 | ```rust,ignore 74 | {{#include ../blockalloc/src/lib.rs:AllocBlock}} 75 | ``` 76 | 77 | Once a block has been allocated, there is no safe abstraction at this level 78 | to access the memory. The `Block` will provide a bare pointer to the beginning 79 | of the memory and it is up to the user to avoid invalid pointer arithmetic 80 | and reading or writing outside of the block boundary. 81 | 82 | ```rust,ignore 83 | {{#include ../blockalloc/src/lib.rs:BlockAsPtr}} 84 | ``` 85 | 86 | 87 | ## Deallocation 88 | 89 | Again, using the stable Alloc functions: 90 | 91 | ```rust,ignore 92 | {{#include ../blockalloc/src/lib.rs:DeallocBlock}} 93 | ``` 94 | 95 | The implementation of `Block::drop()` calls the deallocation function 96 | for us so we can create and drop `Block` instances without leaking memory. 97 | 98 | 99 | ## Testing 100 | 101 | We want to be sure that the system level allocation APIs do indeed return 102 | block-size-aligned blocks. Checking for this is straightforward. 103 | 104 | A correctly aligned block should have it's low bits 105 | set to `0` for a number of bits that represents the range of the block 106 | size - that is, the block size minus one. A bitwise XOR will highlight any 107 | bits that shouldn't be set: 108 | 109 | ```rust,ignore 110 | {{#include ../blockalloc/src/lib.rs:TestAllocPointer}} 111 | ``` 112 | -------------------------------------------------------------------------------- /booksrc/chapter-interp-arrays.md: -------------------------------------------------------------------------------- 1 | # Arrays 2 | 3 | Before we get to the basics of compilation, we need another data structure: 4 | the humble array. The first use for arrays will be to store the bytecode 5 | sequences that the compiler generates. 6 | 7 | Rust already provides `Vec` but as we're implementing everything in terms of our 8 | memory management abstraction, we cannot directly use `Vec`. Rust does not 9 | (yet) expose the ability to specify a custom allocator type as part of `Vec`, 10 | nor are we interested in replacing the global allocator. 11 | 12 | Our only option is to write our own version of `Vec`! Fortunately we can 13 | learn a lot from `Vec` itself and it's underlying implementation. Jump over to 14 | the [Rustonomicon][1] for a primer on the internals of `Vec`. 15 | 16 | The first thing we'll learn is to split the implementation into a `RawArray` 17 | type and an `Array` type. `RawArray` will provide an unsafe abstraction 18 | while `Array` will make a safe layer over it. 19 | 20 | 21 | ## RawArray 22 | 23 | If you've just come back from _Implementing Vec_ in the Nomicon, you'll 24 | recognize what we're doing below with `RawArray`: 25 | 26 | ```rust,ignore 27 | {{#include ../interpreter/src/rawarray.rs:DefRawArray}} 28 | ``` 29 | 30 | Instead of `Unique` for the pointer, we're using `Option>`. 31 | One simple reason is that `Unique` is likely to be permanently unstable and 32 | only available internally to `std` collections. The other is that we can 33 | avoid allocating the backing store if no capacity is requested yet, setting 34 | the value of `ptr` to `None`. 35 | 36 | For when we _do_ know the desired capacity, there is 37 | `RawArray::with_capacity()`. This method, because it allocates, requires 38 | access to the `MutatorView` instance. If you'll recall from the chapter on 39 | the allocation API, the API provides an array allocation method with 40 | signature: 41 | 42 | ```rust,ignore 43 | AllocRaw::alloc_array(&self, size_bytes: ArraySize) -> Result, AllocError>; 44 | ``` 45 | 46 | This method is wrapped on the interpreter side by `Heap` and `MutatorView` and 47 | in both cases the return value remains, simply, `RawPtr` in the success 48 | case. It's up to `RawArray` to receive the `RawPtr` value and maintain 49 | it safely. Here's `with_capcity()`, now: 50 | 51 | ```rust,ignore 52 | {{#include ../interpreter/src/rawarray.rs:DefRawArrayWithCapacity}} 53 | ``` 54 | 55 | ### Resizing 56 | 57 | If a `RawArray`'s content will exceed it's capacity, there is 58 | `RawArray::resize()`. It allocates a new backing array using the 59 | `MutatorView` method `alloc_array()` and copies the content of the old 60 | over to the new, finally swapping in the new backing array for the old. 61 | 62 | The code for this is straightforward but a little longer, go check it out 63 | in `interpreter/src/rawarray.rs`. 64 | 65 | ### Accessing 66 | 67 | Since `RawArray` will be wrapped by `Array`, we need a couple more 68 | methods to access the raw memory: 69 | 70 | ```rust,ignore 71 | impl RawArray { 72 | {{#include ../interpreter/src/rawarray.rs:DefRawArrayCapacity}} 73 | 74 | {{#include ../interpreter/src/rawarray.rs:DefRawArrayAsPtr}} 75 | } 76 | ``` 77 | 78 | And that's it! Now for the safe wrapper. 79 | 80 | 81 | ## Array 82 | 83 | The definition of the struct wrapping `RawArray` is as follows: 84 | 85 | ```rust,ignore 86 | {{#include ../interpreter/src/array.rs:DefArray}} 87 | ``` 88 | 89 | Here we have three members: 90 | 91 | * `length` - the length of the array 92 | * `data` - the `RawArray` being wrapped 93 | * `borrow` - a flag serving as a runtime borrow check, allowing `RefCell` 94 | runtime semantics, since we're in a world of interior mutability patterns 95 | 96 | We have a method to create a new array - `Array::alloc()` 97 | 98 | ```rust,ignore 99 | impl Array { 100 | {{#include ../interpreter/src/array.rs:DefArrayAlloc}} 101 | } 102 | ``` 103 | 104 | In fact we'll extend this pattern of a method named "alloc" to any data 105 | structure for convenience sake. 106 | 107 | There are many more methods for `Array` and it would be exhausting to be 108 | exhaustive. Let's go over the core methods used to read and write elements 109 | and then an example use case. 110 | 111 | ### Reading and writing 112 | 113 | First of all, we need a function that takes an array index and returns a 114 | pointer to a memory location, if the index is within bounds: 115 | 116 | ```rust,ignore 117 | impl Array { 118 | {{#include ../interpreter/src/array.rs:DefArrayGetOffset}} 119 | } 120 | ``` 121 | 122 | There are two bounds checks here - firstly, the index should be within the 123 | (likely non-zero) length values; secondly, the `RawArray` instance 124 | should have a backing array allocated. If either of these checks fail, the 125 | result is an error. If these checks pass, we can be confident that there 126 | is array backing memory and that we can return a valid pointer to somewhere 127 | inside that memory block. 128 | 129 | For reading a value in an array, we need two methods: 130 | 131 | 1. one that handles move/copy semantics and returns a value 132 | 2. one that handles reference semantics and returns a reference to the original 133 | value in it's location in the backing memory 134 | 135 | First, then: 136 | 137 | ```rust,ignore 138 | impl Array { 139 | {{#include ../interpreter/src/array.rs:DefArrayRead}} 140 | } 141 | ``` 142 | 143 | and secondly: 144 | 145 | ```rust,ignore 146 | impl Array { 147 | {{#include ../interpreter/src/array.rs:DefArrayReadRef}} 148 | } 149 | ``` 150 | 151 | Writing, or copying, an object to an array is implemented as simply as follows: 152 | 153 | ```rust,ignore 154 | impl Array { 155 | {{#include ../interpreter/src/array.rs:DefArrayReadRef}} 156 | } 157 | ``` 158 | 159 | These simple functions should only be used internally by `Array` impl 160 | methods. We have numerous methods that wrap the above in more appropriate 161 | semantics for values of `T` in `Array`. 162 | 163 | ### The Array interfaces 164 | 165 | To define the interfaces to the Array, and other collection types, we define a 166 | number of traits. For example, a collection that behaves as a stack implements 167 | `StackContainer`; a numerically indexable type implements `IndexedContainer`, 168 | and so on. As we'll see, there is some nuance, though, when it comes to a 169 | difference between collections of non-pointer types and collections of pointer 170 | types. 171 | 172 | For our example, we will describe the stack interfaces of `Array`. 173 | 174 | First, the general case trait, with methods for accessing values stored in the 175 | array (non-pointer types): 176 | 177 | ```rust,ignore 178 | {{#include ../interpreter/src/containers.rs:DefStackContainer}} 179 | ``` 180 | 181 | These are unremarkable functions, by now we're familiar with the references to 182 | `MutatorScope` and `MutatorView` in method parameter lists. 183 | 184 | In any instance of `Array`, `T` need only implement `Clone` and cannot be 185 | dynamically sized. Thus `T` can be any primitive type or any straightforward 186 | struct. 187 | 188 | What if we want to store pointers to other objects? For example, if we want a 189 | heterogenous array, such as Python's `List` type, what would we provide in 190 | place of `T`? The answer is to use the `TaggedCellPtr` type. However, 191 | an `Array StackContainer for Array { 210 | {{#include ../interpreter/src/array.rs:DefStackContainerArrayPush}} 211 | } 212 | ``` 213 | 214 | In summary, the order of operations is: 215 | 216 | 1. Check that a runtime borrow isn't in progress. If it is, return an error. 217 | 1. Since we must implement interior mutability, the member `data` of the 218 | `Array` struct is a `Cell`. We have to `get()` the content in order 219 | to use it. 220 | 1. We then ask whether the array backing store needs to be grown. If so, 221 | we resize the `RawArray` and, since it's kept in a `Cell` on `Array`, 222 | we have to `set()` value back into `data` to save the change. 223 | 1. Now we have an `RawArray` that has enough capacity, the length is 224 | incremented and the object to be pushed is written to the next memory 225 | location using the internal `Array::write()` method detailed earlier. 226 | 227 | Fortunately we can implement `StackAnyContainer::push()` in terms of 228 | `StackContainer::push()`: 229 | 230 | ```rust,ignore 231 | impl StackAnyContainer for Array { 232 | {{#include ../interpreter/src/array.rs:DefStackAnyContainerArrayPush}} 233 | } 234 | ``` 235 | 236 | ### One last thing 237 | 238 | To more easily differentiate arrays of type `Array` from arrays of type 239 | `Array`, we make a type alias `List` where: 240 | 241 | ```rust,ignore 242 | pub type List = Array; 243 | ``` 244 | 245 | 246 | ## In conclusion 247 | 248 | We referenced how `Vec` is implemented internally and followed the same pattern 249 | of defining a `RawArray` unsafe layer with a safe `Array` wrapper. Then 250 | we looked into the stack interface for `Array` and the implementation of 251 | `push()`. 252 | 253 | There is more to arrays, of course - indexed access the most obvious, and also 254 | a few convenience methods. See the source code in `interpreter/src/array.rs` 255 | for the full detail. 256 | 257 | In the next chapter we'll put `Array` to use in a `Bytecode` type! 258 | 259 | 260 | [1]: https://doc.rust-lang.org/nomicon/vec.html 261 | -------------------------------------------------------------------------------- /booksrc/chapter-interp-bytecode.md: -------------------------------------------------------------------------------- 1 | # Bytecode 2 | 3 | In this chapter we will look at a bytecode compilation target. We'll combine 4 | this with a section on the virtual machine interface to the bytecode data 5 | structure. 6 | 7 | We won't go much into detail on each bytecode operation, that will be more 8 | usefully covered in the compiler and virtual machine chapters. Here, we'll 9 | describe the data structures involved. As such, this will be one of our 10 | shorter chapters. Let's go! 11 | 12 | 13 | ## Design questions 14 | 15 | Now that we're talking bytecode, we're at the point of choosing what type of 16 | virtual machine we will be compiling for. The most common type is stack-based 17 | where operands are pushed and popped on and off the stack. This requires 18 | instructions for pushing and popping, with instructions in-between for operating 19 | on values on the stack. 20 | 21 | We'll be implementing a register-based VM though. The inspiration for this 22 | comes from Lua 5[^1] which implements a fixed-width bytecode register VM. While 23 | stack based VMs are typically claimed to be simpler, we'll see that the Lua 24 | way of allocating registers per function also has an inherent simplicity and 25 | has performance gains over a stack VM, at least for an interpreted 26 | non jit-compiled VM. 27 | 28 | Given register based, fixed-width bytecode, each opcode must reference the 29 | register numbers that it operates on. Thus, for an (untyped) addition 30 | operation `x = a + b`, each of `x`, `a` and `b` must be associated with a 31 | register. 32 | 33 | Following Lua, encoding this as a fixed width opcode typically looks like 34 | encoding the operator and operands as 8 bit values packed into a 32 bit opcode 35 | word. That implies, given 8 bits, that there can be a theoretical maximum of 36 | 256 registers for a function call. For the addition above, this encoding 37 | might look like this: 38 | 39 | ```ignore 40 | 32.....24......16.......8.......0 41 | [reg a ][reg b ][reg x ][Add ] 42 | ``` 43 | 44 | where the first 8 bits contain the operator, in this case "Add", and the 45 | other three 8 bit slots in the 32 bit word each contain a register number. 46 | 47 | For some operators, we will need to encode values larger than 8 bits. As 48 | we will still need space for an operator and a destination register, that 49 | leaves a maximum of 16 bits for larger values. 50 | 51 | 52 | ## Opcodes 53 | 54 | We have options in how we describe opcodes in Rust. 55 | 56 | 1. Each opcode represented by a u32 57 | * Pros: encoding flexibility, it's just a set of bits 58 | * Cons: bit shift and masking operations to encode and decode operator 59 | and operands. This isn't necessarily a big deal but it doesn't allow 60 | us to leverage the Rust type system to avoid encoding mistakes 61 | 1. Each opcode represented by an enum discriminant 62 | * Pros: operators and operands baked as Rust types at compile time, type 63 | safe encoding; no bit operations needed 64 | * Cons: encoding scheme limited to what an enum can represent 65 | 66 | The ability to leverage the compiler to prevent opcode encoding errors is 67 | attractive and we won't have any need for complex encodings. We'll use an enum 68 | to represent all possible opcodes and their operands. 69 | 70 | Since a Rust enum can contain named values within each variant, this is what 71 | we use to most tightly define our opcodes. 72 | 73 | ### Opcode size 74 | 75 | Since we're using `enum` instead of a directly size-controlled type such as u32 76 | for our opcodes, we have to be more careful about making sure our opcode type 77 | doesn't take up more space than is necessary. 32 bits is ideal for reasons 78 | stated earlier (8 bits for the operator and 8 bits for three operands each.) 79 | 80 | Let's do an experiment. 81 | 82 | First, we need to define a register as an 8 bit value. We'll also define an 83 | inline literal integer as 16 bits. 84 | 85 | ```rust,ignore 86 | type Register = u8; 87 | type LiteralInteger = i16; 88 | ``` 89 | 90 | Then we'll create an opcode enum with a few variants that might be typical: 91 | 92 | ```rust,ignore 93 | #[derive(Copy, Clone)] 94 | enum Opcode { 95 | Add { 96 | dest: Register, 97 | a: Register, 98 | b: Register 99 | }, 100 | LoadLiteral { 101 | dest: Register, 102 | value: LiteralInteger 103 | } 104 | } 105 | ``` 106 | 107 | It should be obvious that with an enum like this we can safely pass compiled 108 | bytecode from the compiler to the VM. It should also be clear that this, by 109 | allowing use of `match` statements, will be very ergonomic to work with. 110 | 111 | Theoretically, if we never have more than 256 variants, our variants never have 112 | more than 3 `Register` values (or one `Register` and one `LiteralInteger` sized 113 | value), the compiler should be able to pack `Opcode` into 32 bits. 114 | 115 | Our test: we hope the output of the following code to be `4` - 4 bytes or 32 116 | bits. 117 | 118 | ```rust,ignore 119 | use std::mem::size_of; 120 | 121 | fn main() { 122 | println!("Size of Opcode is {}", size_of::()); 123 | } 124 | ``` 125 | 126 | And indeed when we run this, we get `Size of Opcode is 4`! 127 | 128 | To keep an eye on this situation, we'll put this check into a unit test: 129 | 130 | ```rust,ignore 131 | {{#include ../interpreter/src/bytecode.rs:DefTestOpcodeIs32Bits}} 132 | ``` 133 | 134 | Now, let's put these `Opcode`s into an array. 135 | 136 | 137 | ## An array of Opcode 138 | 139 | We can define this array easily, given that `Array` is a generic type: 140 | 141 | ```rust,ignore 142 | {{#include ../interpreter/src/bytecode.rs:DefArrayOpcode}} 143 | ``` 144 | 145 | Is this enough to define bytecode? Not quite. We've accommodated 16 bit 146 | literal signed integers, but all kinds of other types can be literals. 147 | We need some way of referencing any literal type in bytecode. For that 148 | we add a `Literals` type, which is just: 149 | 150 | ```rust,ignore 151 | {{#include ../interpreter/src/bytecode.rs:DefLiterals}} 152 | ``` 153 | 154 | Any opcode that loads a literal (other than a 16 bit signed integer) will 155 | need to reference an object in the `Literals` list. This is easy enough: 156 | just as there's a `LiteralInteger`, we have `LiteralId` defined as 157 | 158 | ```rust,ignore 159 | pub type LiteralId = u16; 160 | ``` 161 | 162 | This id is an index into the `Literals` list. This isn't the most efficient 163 | scheme or encoding, but given a preference for fixed 32 bit opcodes, it will 164 | also keep things simple. 165 | 166 | The `ByteCode` type, finally, is a composition of `ArrayOpcode` and `Literals`: 167 | 168 | ```rust,ignore 169 | {{#include ../interpreter/src/bytecode.rs:DefByteCode}} 170 | ``` 171 | 172 | 173 | ## Bytecode compiler support 174 | 175 | There are a few methods implemented for `ByteCode`: 176 | 177 | 1. `fn push<'guard>(&self, mem: &'MutatorView, op: Opcode) -> Result<(), RuntimeError>` 178 | This function pushes a new opcode into the `ArrayOpcode` instance. 179 | 1. ```rust,ignore 180 | fn update_jump_offset<'guard>( 181 | &self, 182 | mem: &'guard MutatorView, 183 | instruction: ArraySize, 184 | offset: JumpOffset, 185 | ) -> Result<(), RuntimeError> 186 | ``` 187 | This function, given an instruction index into the `ArrayOpcode` instance, 188 | and given that the instruction at that index is a type of jump instruction, 189 | sets the relative jump offset of the instruction to the given offset. 190 | This is necessary because forward jumps cannot be calculated until all the 191 | in-between instructions have been compiled first. 192 | 1. ```rust,ignore 193 | fn push_lit<'guard>( 194 | &self, 195 | mem: &'guard MutatorView, 196 | literal: TaggedScopedPtr 197 | ) -> Result 198 | ``` 199 | This function pushes a literal on to the `Literals` list and returns the 200 | index - the id - of the item. 201 | 1. ```rust,ignore 202 | fn push_loadlit<'guard>( 203 | &self, 204 | mem: &'guard MutatorView, 205 | dest: Register, 206 | literal_id: LiteralId, 207 | ) -> Result<(), RuntimeError> 208 | ``` 209 | After pushing a literal into the `Literals` list, the corresponding load 210 | instruction should be pushed into the `ArrayOpcode` list. 211 | 212 | `ByteCode` and it's functions combined with the `Opcode` enum are enough to 213 | build a compiler for. 214 | 215 | 216 | ## Bytecode execution support 217 | 218 | The previous section described a handful of functions for our compiler to use 219 | to build a `ByteCode` structure. 220 | 221 | We'll need a different set of functions for our virtual machine to access 222 | `ByteCode` from an execution standpoint. 223 | 224 | The execution view of bytecode is of a contiguous sequence of instructions and 225 | an instruction pointer. We're going to create a separate `ByteCode` instance 226 | for each function that gets compiled, so our execution model will have to 227 | be able to jump between `ByteCode` instances. We'll need a new struct to 228 | represent that: 229 | 230 | ```rust,ignore 231 | {{#include ../interpreter/src/bytecode.rs:DefInstructionStream}} 232 | ``` 233 | 234 | In this definition, the pointer `instructions` can be updated to point at any 235 | `ByteCode` instance. This allows us to switch between functions by managing 236 | different `ByteCode` pointers as part of a stack of call frames. In support 237 | of this we have: 238 | 239 | ```rust,ignore 240 | impl InstructionStream { 241 | {{#include ../interpreter/src/bytecode.rs:DefInstructionStreamSwitchFrame}} 242 | } 243 | ``` 244 | 245 | Of course, the main function needed during execution is to retrieve the next 246 | opcode. Ideally, we can keep a pointer that points directly at the next opcode 247 | such that only a single dereference and pointer increment is needed to get 248 | the opcode and advance the instruction pointer. Our implementation is less 249 | efficient for now, requiring a dereference of 1. the `ByteCode` instance and 250 | then 2. the `ArrayOpcode` instance and finally 3. an indexing into the 251 | `ArrayOpcode` instance: 252 | 253 | ```rust,ignore 254 | {{#include ../interpreter/src/bytecode.rs:DefInstructionStreamGetNextOpcode}} 255 | ``` 256 | 257 | 258 | ## Conclusion 259 | 260 | The full `Opcode` definition can be found in `interpreter/src/bytecode.rs`. 261 | 262 | As we work toward implementing a compiler, the next data structure we need is 263 | a dictionary or hash map. This will also build on the foundational 264 | `RawArray` implementation. Let's go on to that now! 265 | 266 | 267 | --- 268 | 269 | [^1]: Roberto Ierusalimschy et al, [The Implementation of Lua 5.0](https://www.lua.org/doc/jucs05.pdf) 270 | -------------------------------------------------------------------------------- /booksrc/chapter-interp-dicts.md: -------------------------------------------------------------------------------- 1 | # Dicts 2 | 3 | The implementation of dicts, or hash tables, is going to combine a reuse of the 4 | [RawArray](./chapter-interp-arrays.md) 5 | type and closely follow the [Crafting Interpreters][1] design: 6 | 7 | * open addressing 8 | * linear probing 9 | * FNV hashing 10 | 11 | Go read the corresponding chapter in Crafting Interpreters and then come 12 | back here. We won't duplicate much of Bob's excellent explanation of the above 13 | terms and we'll assume you are familiar with his chapter when reading 14 | ours. 15 | 16 | 17 | ## Code design 18 | 19 | A `Dict` in our interpreter will allow any hashable value as a key and any 20 | type as a value. We'll store pointers to the key and the value together in 21 | a struct `DictItem`. 22 | 23 | Here, we'll also introduce the single diversion from 24 | Crafting Interpreters' implementation in that we'll cache the hash value and 25 | use it as part of a tombstone indicator. This adds an extra word 26 | per entry but we will also take the stance that if two keys have 27 | the same hash value then the keys are equal. This simplifies our implementation 28 | as we won't need to implement object equality comparisons just yet. 29 | 30 | ```rust,ignore 31 | {{#include ../interpreter/src/dict.rs:DefDictItem}} 32 | ``` 33 | 34 | The `Dict` itself mirrors Crafting Interpreters' implementation of a count of 35 | used entries and an array of entries. Since tombstones are counted as used 36 | entries, we'll add a separate `length` that excludes tombstones so we can 37 | accurately report the number of items in a dict. 38 | 39 | ```rust,ignore 40 | {{#include ../interpreter/src/dict.rs:DefDict}} 41 | ``` 42 | 43 | 44 | ## Hashing 45 | 46 | To implement our compiler we will need to be able to hash the `Symbol` type and 47 | integers (inline in tagged pointers.) 48 | 49 | The Rust standard library defines trait `std::hash::Hash` that must be 50 | implemented by types that want to be hashed. This trait requires the type to 51 | implement method `fn hash(&self, state: &mut H) where H: Hasher`. 52 | 53 | This signature requires a reference to the type `&self` to access it's data. 54 | In our world, this is insufficient: we also require a `&MutatorScope` 55 | lifetime to access an object. We will have to wrap `std::hash::Hash` in our 56 | own trait that extends, essentially the same signature, with this scope 57 | guard parameter. This trait is named `Hashable`: 58 | 59 | 60 | ```rust,ignore 61 | {{#include ../interpreter/src/hashable.rs:DefHashable}} 62 | ``` 63 | 64 | We can implement this trait for `Symbol` - it's a straightforward wrap of 65 | calling `Hash::hash()`: 66 | 67 | ```rust,ignore 68 | {{#include ../interpreter/src/symbol.rs:DefImplHashableForSymbol}} 69 | ``` 70 | 71 | Then finally, because this is all for a dynamically typed interpreter, we'll 72 | write a function that can take any type - a `TaggedScopedPtr` - and attempt 73 | to return a 64 bit hash value from it: 74 | 75 | ```rust,ignore 76 | {{#include ../interpreter/src/dict.rs:DefHashKey}} 77 | ``` 78 | 79 | Now we can take a `Symbol` or a tagged integer and use them as keys in our 80 | `Dict`. 81 | 82 | 83 | ## Finding an entry 84 | 85 | The methods that a dictionary typically provides, lookup, insertion and 86 | deletion, all hinge around one internal function, `find_entry()`. 87 | 88 | This function scans the internal `RawArray` array for a slot that 89 | matches the hash value argument. It may find an exact match for an existing 90 | key-value entry; if it does not, it will return the first available slot for 91 | the hash value, whether an empty never-before used slot or the tombstone 92 | entry of a formerly used slot. 93 | 94 | A tombstone, remember, is a slot that previously held a key-value pair but 95 | has been deleted. These slots must be specially marked so that when searching 96 | for an entry that generated a hash for an earlier slot but had to be inserted 97 | at a later slot, we know to keep looking rather than stop searching at the 98 | empty slot of a deleted entry. 99 | 100 | Slot | Content 101 | ------|-------- 102 | n - 1 | empty 103 | n | X: hash % capacity == n 104 | n + 1 | tombstone 105 | n + 2 | Y: hash % capacity == n 106 | n + 3 | empty 107 | 108 | For example, in the above table: 109 | 110 | * Key `X`'s hash maps to slot `n`. 111 | * At some point another entry was inserted at slot `n + 1`. 112 | * Then `Y`, with hash mapping also to slot `n`, was inserted, but had to be 113 | bumped to slot `n + 2` because the previous two slots were occupied. 114 | * Then the entry at slot `n + 1` was deleted and marked as a tombstone. 115 | 116 | If slot `n + 1` was simply marked as `empty` after it's occupant was deleted, 117 | then when searching for `Y` we wouldn't know to keep searching and find `Y` in 118 | slot `n + 2`. Hence, deleted entries are marked differently to empty slots. 119 | 120 | Here is the code for the Find Entry function: 121 | 122 | ```rust,ignore 123 | {{#include ../interpreter/src/dict.rs:DefFindEntry}} 124 | ``` 125 | 126 | To begin with, it calculates the index in the array from which to start 127 | searching. Then it iterates over the internal array, examining each entry's 128 | hash and key as it goes. 129 | 130 | * The first tombstone that is encountered is saved. This may turn out to be the 131 | entry that should be returned if an exact hash match isn't found by the time 132 | a never-before used slot is reached. We want to reuse tombstone entries, of 133 | course. 134 | * If no tombstone was found and we reach a never-before used slot, return 135 | that slot. 136 | * If an exact match is found, return that slot of course. 137 | 138 | 139 | ## The external API 140 | 141 | Just as we defined some conainer traits for `Array` to define access to 142 | arrays based on stack or indexed style access, we'll define a container trait 143 | for `Dict`: 144 | 145 | ```rust,ignore 146 | {{#include ../interpreter/src/containers.rs:DefHashIndexedAnyContainer}} 147 | ``` 148 | 149 | This trait contains the external API that `Dict` will expose for managing 150 | keys and values. The implementation of each of these methods will be in terms 151 | of the `find_entry()` function described above. Let's look at a couple of the 152 | more complex examples, `assoc()` and `dissoc()`. 153 | 154 | ### assoc 155 | 156 | ```rust,ignore 157 | impl HashIndexedAnyContainer for Dict { 158 | {{#include ../interpreter/src/dict.rs:DefHashIndexedAnyContainerForDictAssoc}} 159 | } 160 | ``` 161 | 162 | ### dissoc 163 | 164 | ```rust,ignore 165 | impl HashIndexedAnyContainer for Dict { 166 | {{#include ../interpreter/src/dict.rs:DefHashIndexedAnyContainerForDictDissoc}} 167 | } 168 | ``` 169 | 170 | As you can see, once `find_entry()` is implemented as a separate function, 171 | these methods become fairly easy to comprehend. 172 | 173 | 174 | ## Conclusion 175 | 176 | If you _haven't_ read Bob Nystron's chapter on [hash tables][1] in Crafting 177 | Interpreters we encourage you to do so: it will help make sense of this 178 | chapter. 179 | 180 | Now, we'll transition to some compiler and virtual machine design before 181 | we continue with code implementation. 182 | 183 | 184 | [1]: http://craftinginterpreters.com/hash-tables.html 185 | -------------------------------------------------------------------------------- /booksrc/chapter-interp-parsing.md: -------------------------------------------------------------------------------- 1 | # Parsing s-expressions 2 | 3 | We'll make this quick. It's not the main focus of this book and the topic is 4 | better served by seeking out other resources that can do it justice. 5 | 6 | In service of keeping it short, we're parsing s-expressions and we'll start 7 | by considering only symbols and parentheses. We could hardly make it simpler. 8 | 9 | 10 | ## The interface 11 | 12 | The interface we want should take a `&str` and return a `TaggedScopedPtr`. 13 | We want the tagged version of the scoped ptr because the return value might 14 | point to either a `Pair` or a `Symbol`. Examples of valid input are: 15 | 16 | * `a-symbol`: a `Symbol` with name "a-symbol" 17 | * `(this is a list)`: a linked list of `Pair`s, each with the `first` value 18 | pointing to a `Symbol` 19 | * `(this (is a nested) list)`: a linked list, as above, containing a nested 20 | linked list 21 | * `(this () is a nil symbol)`: the two characters `()` together are equivalent 22 | to the special symbol `nil`, also the value `0` in our `TaggedPtr` type 23 | * `(one . pair)`: a single `Pair` instance with `first` pointing at the `Symbol` 24 | for "one" and `second` at the `Symbol` for "two" 25 | 26 | Our internal implementation is split into tokenizing and then parsing the 27 | token stream. Tokenizing takes the `&str` input and returns a `Vec` 28 | on success: 29 | 30 | ```rust,ignore 31 | fn tokenize(input: &str) -> Result, RuntimeError>; 32 | ``` 33 | 34 | The return `Vec` is an intermediate, throwaway value, and does not 35 | interact with our Sticky Immix heap. Parsing takes the `Vec` and 36 | returns a `TaggedScopedPtr` on success: 37 | 38 | ```rust,ignore 39 | fn parse_tokens<'guard>( 40 | mem: &'guard MutatorView, 41 | tokens: Vec, 42 | ) -> Result, RuntimeError>; 43 | ``` 44 | 45 | 46 | ## Tokens, a short description 47 | 48 | The full set of tokens we will consider parsing is: 49 | 50 | ```rust,ignore 51 | {{#include ../interpreter/src/lexer.rs:DefTokenType}} 52 | ``` 53 | 54 | We combine this enum with a source input position indicator to compose the 55 | `Token` type. This source position is defined as: 56 | 57 | ```rust,ignore 58 | {{#include ../interpreter/src/error.rs:DefSourcePos}} 59 | ``` 60 | 61 | And whenever it is available to return as part of an error, error messages can 62 | be printed with the relevant source code line. 63 | 64 | The `Token` type; 65 | 66 | ```rust,ignore 67 | {{#include ../interpreter/src/lexer.rs:DefToken}} 68 | ``` 69 | 70 | 71 | ## Parsing, a short description 72 | 73 | The key to quickly writing a parser in Rust is the `std::iter::Peekable` 74 | iterator which can be obtained from the `Vec` instance with 75 | `tokens.iter().peekable()`. This iterator has a `peek()` method that allows 76 | you to look at the next `Token` instance without advancing the iterator. 77 | 78 | Our parser, a hand-written recursive descent parser, uses this iterator type 79 | to look ahead to the next token to identify primarily whether the next token 80 | is valid in combination with the current token, or to know how to recurse 81 | next without consuming the token yet. 82 | 83 | For example, an open paren `(` followed by a symbol would start a new `Pair` 84 | linked list, recursing into a new parser function call, but if it is 85 | immediately followed by a close paren `)`, that is `()`, it is equivalent to 86 | the symbol `nil`, while otherwise `)` _terminates_ a `Pair` linked list and 87 | causes the current parsing function instance to return. 88 | 89 | Another case is the `.` operator, which is only valid in the following pattern: 90 | `(a b c . d)` where `a`, `b`, `c`, and `d` must be symbols or nested lists. 91 | A `.` must be followed by a single expression followed by a `)`. 92 | 93 | Tokenizing and parsing are wrapped in a function that takes the input `&str` 94 | and gives back the `TaggedScopedPtr`: 95 | 96 | ```rust,ignore 97 | {{#include ../interpreter/src/parser.rs:DefParse}} 98 | ``` 99 | 100 | Notice that this function and `parse_tokens()` require the 101 | `mem: &'guard MutatorView` parameter. Parsing creates `Symbol` and `Pair` 102 | instances in our Sticky Immix heap and so requires the scope-restricted 103 | `MutatorView` instance. 104 | 105 | This is all we'll say on parsing s-expressions. In the next chapter we'll do 106 | something altogether more informative with regards to memory management 107 | and it'll be necessary by the time we're ready to compile: arrays! 108 | -------------------------------------------------------------------------------- /booksrc/chapter-interp-symbols-and-pairs.md: -------------------------------------------------------------------------------- 1 | # Symbols and Pairs 2 | 3 | To bootstrap our compiler, we'll parse s-expressions into `Symbol` ad `Pair` 4 | types, where a `Pair` is essentially a Lisp cons cell. 5 | 6 | The definition of `Symbol` is just the raw components of a `&str`: 7 | 8 | ```rust,ignore 9 | {{#include ../interpreter/src/symbol.rs:DefSymbol}} 10 | ``` 11 | 12 | Why this is how `Symbol` is defined and how we handle these raw components will 13 | be covered in just a bit. First though, we'll delve into the `Pair` type. 14 | 15 | 16 | ## Pairs of pointers 17 | 18 | The definition of `Pair` is 19 | 20 | ```rust,ignore 21 | {{#include ../interpreter/src/pair.rs:DefPair}} 22 | ``` 23 | 24 | The type of `first` and `second` is `TaggedCellPtr`, as seen in the previous 25 | chapter. This pointer type can point at any dynamic type. By the 26 | end of this chapter we'll be able to build a nested linked list of `Pair`s 27 | and `Symbol`s. 28 | 29 | Since this structure will be used for parsing and compiling, the `Pair` 30 | `struct` has a couple of extra members that optionally describe the source 31 | code line and character number of the values pointed at by `first` and 32 | `second`. These will be useful for reporting error messages. We'll come back 33 | to these in the chapter on parsing. 34 | 35 | To instantiate a `Pair` function with `first` and `second` set to nil, let's 36 | create a `new()` function: 37 | 38 | ```rust,ignore 39 | impl Pair { 40 | {{#include ../interpreter/src/pair.rs:DefPairNew}} 41 | } 42 | ``` 43 | 44 | That function, as it's not being allocated into the heap, doesn't require the 45 | lifetime guard. Let's look at a more interesting function: `cons()`, which 46 | assigns a value to `first` and `second` and puts the `Pair` on to the heap: 47 | 48 | ```rust,ignore 49 | {{#include ../interpreter/src/pair.rs:DefCons}} 50 | ``` 51 | 52 | Here we have the lifetime `'guard` associated with the `MutatorView` instance 53 | which grants access to the allocator `alloc_tagged()` method and the getter 54 | and setter on `TaggedScopedPtr`. 55 | 56 | The other two args, `head` and `rest` are required to share the same `'guard` 57 | lifetime as the `MutatorView` instance, or rather, `'guard` must at least be 58 | a subtype of their lifetimes. Their values, of type `TaggedScopedPtr<'guard>`, 59 | can be written directly to the `first` and `second` members of `Pair` with 60 | the setter `TaggedCellPtr::set()`. 61 | 62 | We'll also add a couple `impl` methods for appending an object to a `Pair` 63 | in linked-list fashion: 64 | 65 | ```rust,ignore 66 | impl Pair { 67 | {{#include ../interpreter/src/pair.rs:DefPairAppend}} 68 | } 69 | ``` 70 | 71 | This method, given a value to append, creates a new `Pair` whose member `first` 72 | points at the value, then sets the `second` of the `&self` `Pair` to that new 73 | `Pair` instance. This is in support of s-expression notation `(a b)` which 74 | describes a linked-list of `Pair`s arranged, in pseudo-Rust: 75 | 76 | ``` 77 | Pair { 78 | first: a, 79 | second: Pair { 80 | first: b, 81 | second: nil, 82 | }, 83 | } 84 | ``` 85 | 86 | The second method is for directly setting the value of the `second` for 87 | s-expression dot-notation style: `(a . b)` is represented by `first` pointing 88 | at `a`, dotted with `b` which is pointed at by `second`. In our pseudo 89 | representation: 90 | 91 | ``` 92 | Pair { 93 | first: a, 94 | second: b, 95 | } 96 | ``` 97 | 98 | The implementation is simply: 99 | 100 | ```rust,ignore 101 | impl Pair { 102 | {{#include ../interpreter/src/pair.rs:DefPairDot}} 103 | } 104 | ``` 105 | 106 | The only other piece to add, since `Pair` must be able to be passed into 107 | our allocator API, is the `AllocObject` impl for `Pair`: 108 | 109 | ```rust,ignore 110 | impl AllocObject for Pair { 111 | const TYPE_ID: TypeList = TypeList::Pair; 112 | } 113 | ``` 114 | 115 | This impl pattern will repeat for every type in `TypeList` so it'll be a great 116 | candidate for a macro. 117 | 118 | And that's it! We have a cons-cell style `Pair` type and some elementary 119 | methods for creating and allocating them. 120 | 121 | Now, back to `Symbol`, which seems like it should be even simpler, but as we'll 122 | see has some nuance to it. 123 | 124 | 125 | ## Symbols and pointers 126 | 127 | Let's recap the definition of `Symbol` and that it is the raw members of a 128 | `&str`: 129 | 130 | ```rust,ignore 131 | {{#include ../interpreter/src/symbol.rs:DefSymbol}} 132 | ``` 133 | 134 | By this definition, a symbol has a name string, but does not own the string 135 | itself. What means this? 136 | 137 | Symbols are in fact pointers to interned strings. Since each symbol points 138 | to a unique string, we can identify a symbol by it's pointer value rather than 139 | needing to look up the string itself. 140 | 141 | However, symbols do need to be discovered by their string name, and symbol 142 | pointers must dereference to return their string form. i.e. a we need a 143 | bidirectional mapping of string to pointer and pointer to string. 144 | 145 | In our implementation, we use a `HashMap>` to map from 146 | name strings to symbol pointers, while the `Symbol` object itself points back 147 | to the name string. 148 | 149 | This is encapsulated in a `SymbolMap` struct: 150 | 151 | ```rust,ignore 152 | {{#include ../interpreter/src/symbolmap.rs:DefSymbolMap}} 153 | ``` 154 | 155 | where we use `RefCell` to wrap operations in interior mutability, just like 156 | all other allocator functionality. 157 | 158 | The second struct member `Arena` requires further explanation: since symbols are 159 | unique strings that can be identified and compared by their pointer values, 160 | these pointer values must remain static throughout the program lifetime. 161 | Thus, `Symbol` objects cannot be managed by a heap that might perform object 162 | relocation. We need a separate heap type for objects that are never 163 | moved or freed unil the program ends, the `Arena` type. 164 | 165 | The `Arena` type is simple. It, like `Heap`, wraps `StickyImmixHeap` but 166 | unlike `Heap`, it will never run garbage collection. 167 | 168 | ```rust,ignore 169 | {{#include ../interpreter/src/arena.rs:DefArena}} 170 | ``` 171 | 172 | The `ArenaHeader` is a simple object header type to fulfill the allocator 173 | API requirements but whose methods will never be needed. 174 | 175 | Allocating a `Symbol` will use the `Arena::alloc()` method which calls through 176 | to the `StickyImmixHeap` instance. 177 | 178 | We'll add a method for getting a `Symbol` from it's name string to the 179 | `SymbolMap` at the allocator API level: 180 | 181 | ```rust,ignore 182 | impl SymbolMap { 183 | {{#include ../interpreter/src/symbolmap.rs:DefSymbolMapLookup}} 184 | } 185 | ``` 186 | 187 | Then we'll add wrappers to the `Heap` and `MutatorView` impls to scope-restrict 188 | access: 189 | 190 | ```rust,ignore 191 | impl Heap { 192 | {{#include ../interpreter/src/memory.rs:DefHeapLookupSym}} 193 | } 194 | ``` 195 | 196 | and 197 | 198 | ```rust,ignore 199 | impl<'memory> MutatorView<'memory> { 200 | {{#include ../interpreter/src/memory.rs:DefMutatorViewLookupSym}} 201 | } 202 | ``` 203 | 204 | This scope restriction is absolutely necessary, despite these objects never 205 | being freed or moved during runtime. This is because `Symbol`, as a standalone 206 | struct, remains unsafe to use with it's raw `&str` components. These components 207 | can only safely be accessed when there is a guarantee that the backing 208 | `Hashmap` is still in existence, which is only when the `MutatorView` is 209 | accessible. 210 | 211 | Two methods on `Symbol` guard access to the `&str`, one unsafe to reassemble 212 | the `&str` from raw components, the other safe when given a `MutatorScope` 213 | guard instance. 214 | 215 | ```rust,ignore 216 | impl Symbol { 217 | {{#include ../interpreter/src/symbol.rs:DefSymbolUnguardedAsStr}} 218 | 219 | {{#include ../interpreter/src/symbol.rs:DefSymbolAsStr}} 220 | } 221 | ``` 222 | 223 | Finally, to make `Symbol`s allocatable in the Sticky Immix heap, we need to 224 | implement `AllocObject` for it: 225 | 226 | ```rust,ignore 227 | impl AllocObject for Symbol { 228 | const TYPE_ID: TypeList = TypeList::Symbol; 229 | } 230 | ``` 231 | 232 | 233 | ## Moving on swiftly 234 | 235 | Now we've got the elemental pieces of s-expressions, lists and symbols, we can 236 | move on to parsing s-expression strings. 237 | 238 | Since the focus of this book is the underlying mechanisms of memory management 239 | in Rust and the details of runtime implementation, parsing will receive less 240 | attention. We'll make it quick! 241 | -------------------------------------------------------------------------------- /booksrc/chapter-interp-vm-design.md: -------------------------------------------------------------------------------- 1 | # Virtual Machine: Architecture and Design 2 | 3 | In this short chapter we will outline our virtual machine design choices. These 4 | are substantially a matter of pragmatic dynamic language implementation points 5 | and as such, borrow heavily from uncomplicated prior work such as Lua 5 and 6 | Crafting Interpreters. 7 | 8 | 9 | ## Bytecode 10 | 11 | We already discussed our Lua-inspired bytecode in a [previous 12 | chapter](./chapter-interp-bytecode.md). To recap, we are using 32 bit 13 | fixed-width opcodes with space for 8 bit register identifiers and 16 bit 14 | literals. 15 | 16 | 17 | ## The stack 18 | 19 | Following the example of [Crafting Interpreters][1] we'll maintain two separate 20 | stack data structures: 21 | 22 | * the register stack for storing stack values 23 | * the call frame stack 24 | 25 | In our case, these are best separated out because the register stack will be 26 | composed entirely of `TaggedCellPtr`s. 27 | 28 | To store call frames on the register stack we would have to either: 29 | 30 | 1. allocate every stack frame on the heap with pointers to them from the 31 | register stack 32 | 2. or coerce a call frame `struct` type into the register stack type 33 | 34 | Neither of these is attractive so we will maintain the call frame stack as an 35 | independent data structure. 36 | 37 | ### The register stack 38 | 39 | The register stack is a homogeneous array of `TaggedCellPtr`s. Thus, no object 40 | is allocated directly on the stack, all objects are heap allocated and the stack 41 | only consists of pointers to heap objects. The exception is literal integers 42 | that fit within the range allowed by a tagged pointer. 43 | 44 | Since this is a register virtual machine, not following stack push and pop 45 | semantics, and bytecode operands are limited to 8 bit register indexes, a 46 | function is limited to addressing a maximum of 256 contiguous registers. 47 | 48 | Due to function call nesting, the register stack may naturally grow much more 49 | than a length of 256. 50 | 51 | This requires us to implement a sliding window into the register stack which 52 | will move as functions are called and return. The call frame stack will contain 53 | the stack base pointer for each function call. We can then happily make use a 54 | Rust slice to implement the window of 256 contiguous stack slots which a 55 | function call is limited to. 56 | 57 | ### The call frame stack 58 | 59 | A call frame needs to store three critical data points: 60 | 61 | * a pointer to the function being executed 62 | * the return instruction pointer when a nested function is called 63 | * the stack base pointer for the function call 64 | 65 | These three items can form a simple struct and we can define an 66 | `Array` type for optimum performance. 67 | 68 | 69 | ## Global values 70 | 71 | To store global values, we have all we need: the `Dict` type that maps `Symbol`s 72 | to another value. The VM will, of course, have an abstraction over the internal 73 | `Dict` to enforce `Symbol`s only as keys. 74 | 75 | 76 | ## Closures 77 | 78 | In the classic upvalues implementation from Lua 5, followed also by [Crafting 79 | Interpreters][2], a linked list of upvalues is used to map stack locations to 80 | shared variables. 81 | 82 | In every respect but one, our implementation will be similar. 83 | 84 | In our implementation, we'll use the `Dict` type that we already have available 85 | to do this mapping of stack locations to shared variables. 86 | 87 | As the language and compiler will implement lexical scoping, the compiler will 88 | have static knowledge of the _relative_ stack locations of closed-over variables 89 | and can generate the appropriate bytecode operands for the virtual machine to 90 | calculate the absolute stack locations at runtime. Thus, absolute stack 91 | locations can be mapped to `Upvalue` objects and so a `Dict` can be employed to 92 | facilitate the mapping. This obviates the need to implement a linked list data 93 | structure. 94 | 95 | The compiler must issue instructions to tell the VM when to make a closure data 96 | structure. It can do so, of course, because simple analysis shows whether 97 | a function references nonlocal bindings. A closure data structure as generated 98 | by the compiler must reference the function that will be called and the list of 99 | relative stack locations that correspond to each nonlocal binding. 100 | 101 | The VM, when executing the instruction to make a closure, will calculate the 102 | absolute stack locations for each nonlocal binding and create the closure 103 | environment - a `List`. VM instructions within the function code, as in 104 | Lua, indirectly reference nonlocal bindings by indexing into this environment. 105 | 106 | 107 | ## Partial functions 108 | 109 | Here is one point where we will introduce a less common construct in our virtual 110 | machine. Functions will be first class, that is they are objects that can be 111 | passed around as values and arguments. On top of that, we'll allow passing 112 | insufficient arguments to a function when it is called. The return value of 113 | such an operation will, instead of an error, be a `Partial` instance. This value 114 | must carry with it the arguments given and a pointer to the function waiting to 115 | be called. 116 | 117 | This is insufficient for a fully featured currying implementation but is an 118 | interesting extension to first class functions, especially as it allows us to 119 | not _require_ lambdas to be constructed syntactically every time they might be 120 | used. 121 | 122 | By that we mean the following: if we have a function `(def mul (x y) (* x y))`, 123 | to turn that into a function that multiplies a number by 3 we'd normally have to 124 | define a second function, or lambda, `(lambda (x) (mul x 3))` and call it 125 | instead. However, with a simple partial function implementation we can avoid the 126 | lambda definition and call `(mul 3)` directly, which will collect the function 127 | pointer for `mul` and argument `3` into a `Partial` and wait for the final 128 | argument before calling into the function `mul` with both required arguments. 129 | 130 | > ***Note:*** We can use the same struct for both closures and partial 131 | > functions. A closure is a yet-to-be-called function carrying a list of 132 | > references to values. or a list of values. A partial is a yet-to-be-called 133 | > function carrying a list of arguments. They look very similar, and it's 134 | > possible, of course, to partially apply arguments to a closure. 135 | 136 | 137 | ## Instruction dispatch 138 | 139 | In dispatch, one optimal outcome is to minimize the machine code overhead 140 | between each VM instruction code. This overhead, where the next VM instruction 141 | is fetched, decoded and mapped to the entry point of the instruction code, is 142 | the dispatch code. The other axis of optimization is code ergonomics. 143 | 144 | Prior [research][3] into implementing dispatch in Rust concludes that simple 145 | switch-style dispatch is the only cross-platform construct we can reasonably 146 | make use of. Other mechanisms come with undesirable complexity or are platform 147 | dependent. For the most part, with modern CPU branch prediction, the overhead 148 | of switch dispatch is small. 149 | 150 | What this looks like: a single `match` expression with a pattern to represent 151 | each bytecode discriminant, all wrapped in a loop. To illustrate: 152 | 153 | ```rust,ignore 154 | loop { 155 | let opcode = get_next_opcode(); 156 | match opcode { 157 | Opcode::Add(a, x, y) => { ... }, 158 | Opcode::Call(f, r, p) => { ... }, 159 | } 160 | } 161 | ``` 162 | 163 | 164 | ## That's it! 165 | 166 | Next we'll look at the counterpart of VM design - compiler design. 167 | 168 | 169 | [1]: http://craftinginterpreters.com/calls-and-functions.html#call-frames 170 | [2]: http://craftinginterpreters.com/closures.html 171 | [3]: https://pliniker.github.io/post/dispatchers/ 172 | -------------------------------------------------------------------------------- /booksrc/chapter-interp-vm-impl.md: -------------------------------------------------------------------------------- 1 | # Virtual Machine: Implementation 2 | 3 | In this chapter we'll dive into some of the more interesting and important 4 | implementation details of our virtual machine. 5 | 6 | To begin with, we'll lay out a struct for a single thread of execution. This 7 | struct should contain everything needed to execute the output of the compiler. 8 | 9 | ```rust,ignore 10 | {{#include ../interpreter/src/vm.rs:DefThread}} 11 | ``` 12 | 13 | Here we see every data structure needed to represent: 14 | 15 | - function call frames 16 | - stack values 17 | - closed-over stack values (Upvalues) 18 | - global values 19 | - bytecode to execute 20 | 21 | The VM's primary operation is to iterate through instructions, executing each 22 | in sequence. The outermost control struture is, therefore, a loop containing 23 | a `match` expression. 24 | 25 | Here is a code extract of the opening lines of this match operation. The 26 | function shown is a member of the `Thread` struct. It evaluates the next 27 | instruction and is called in a loop by an outer function. We'll look at several 28 | extracts from this function in this chapter. 29 | 30 | ```rust,ignore 31 | {{#include ../interpreter/src/vm.rs:ThreadEvalNextInstr}} 32 | 33 | ... 34 | ``` 35 | 36 | The function obtains a slice view of the register stack, then narrows that down 37 | to a 256 register window for the current function. 38 | 39 | Then it fetches the next opcode and using `match`, decodes it. 40 | 41 | Let's take a closer look at the stack. 42 | 43 | 44 | ## The stack 45 | 46 | While some runtimes and compilers, particularly low-level languages, have a 47 | single stack that represents both function call information and local variables, 48 | our high-level runtime splits the stack into: 49 | 50 | 1. a stack of `CallFrame` objects containing function call and return 51 | information 52 | 2. and a register stack for local variables. 53 | 54 | Let's look at each in turn. 55 | 56 | ### The register stack 57 | 58 | In our `Thread` struct, the register stack is represented by the two members: 59 | 60 | ```rust,ignore 61 | pub struct Thread { 62 | ... 63 | stack: CellPtr, 64 | stack_base: Cell, 65 | ... 66 | } 67 | ``` 68 | 69 | Remember that the `List` type is defined as `Array` and is 70 | therefore an array of tagged pointers. Thus, the register stack is a homogenous 71 | array of word sized values that are pointers to objects on the heap or values 72 | that can be inlined in the tagged pointer word. 73 | 74 | We also have a `stack_base` variable to quickly retrieve the offset into `stack` 75 | that indicates the beginning of the window of 256 registers that the current 76 | function has for it's local variables. 77 | 78 | ### The call frame stack 79 | 80 | In our `Thread` struct, the call frame stack is represented by the members: 81 | 82 | ```rust,ignore 83 | pub struct Thread { 84 | ... 85 | frames: CellPtr, 86 | instr: CellPtr, 87 | ... 88 | } 89 | ``` 90 | 91 | A `CallFrame` and an array of them are defined as: 92 | 93 | ```rust,ignore 94 | {{#include ../interpreter/src/vm.rs:DefCallFrame}} 95 | 96 | {{#include ../interpreter/src/vm.rs:DefCallFrameList}} 97 | ``` 98 | 99 | A `CallFrame` contains all the information needed to resume a function when 100 | a nested function call returns: 101 | 102 | * a `Function` object, which references the `Bytecode` comprising the 103 | function 104 | * the return instruction pointer 105 | * the stack base index for the function's stack register window 106 | 107 | On every function call, a `CallFrame` instance is pushed on to the `Thread`'s 108 | `frames` stack and on every return from a function, the top `CallFrame` is 109 | popped off the stack. 110 | 111 | Additionally, we keep a pointer to the current executing function (the function 112 | represented by the top `CallFrame`) with the member `instr: 113 | CellPtr`. 114 | 115 | For a review of the definition of `InstructionStream` see the 116 | [bytecode](./chapter-interp-bytecode.md) chapter where we defined it as 117 | a pair of values - a `ByteCode` reference and a pointer to the next `Opcode` 118 | to fetch. 119 | 120 | The VM keeps the `InstructionStream` object pointing at the same `ByteCode` 121 | object as is pointed at by the `Function` in the `CallFrame` at the top of 122 | the call frame stack. Thus, when a call frame is popped off the stack, the 123 | `InstructionStream` is updated with the `ByteCode` and instruction pointer 124 | from the `CallFrame` at the new stack top; and similarly when a function 125 | is called _into_ and a new `CallFrame` is pushed on to the stack. 126 | 127 | 128 | ## Functions and function calls 129 | 130 | ### Function objects 131 | 132 | Since we've mentioned `Function` objects above, let's now have a look at the 133 | definition. 134 | 135 | ```rust,ignore 136 | {{#include ../interpreter/src/function.rs:DefFunction}} 137 | ``` 138 | 139 | Instances of `Function` are produced by the compiler, one for each function 140 | definition that is compiled, including nested function definitions. 141 | 142 | A `Function` object is a simple collection of values, some of which may be 143 | `nil`. Any member represented by a `TaggedCellPtr` may, of course, contain 144 | a `nil` value. 145 | 146 | Thus the function may be anonymous, represented by a `nil` name value. 147 | 148 | While the function name is optional, the parameter names are always included. 149 | Though they do not need to be known in order to execute the function, they are 150 | useful for representing the function in string form if the programmer needs to 151 | introspect a function object. 152 | 153 | Members that are _required_ to execute the function are the arity, the 154 | `ByteCode` and any nonlocal references. 155 | 156 | Nonlocal references are an optional list of `(relative_stack_frame, register)` 157 | tuples, provided by the compiler, that are needed to locate nonlocal variables 158 | on the register stack. These are, of course, a key component of implementing 159 | closures. 160 | 161 | We'll talk about closures shortly, but before we do, we'll extend `Function`s 162 | with partial application of arguments. 163 | 164 | 165 | ### Partial functions 166 | 167 | A partial function application takes a subset of the arguments required to 168 | make a function call. These arguments must be stored for later. 169 | 170 | Thus, a `Partial` object references the `Function` to be called and a list 171 | of arguments to give it when the call is finally executed. 172 | 173 | Below is the definition of `Partial`. Note that it also contains a possible 174 | closure environment which, again, we'll arrive at momentarily. 175 | 176 | ```rust,ignore 177 | {{#include ../interpreter/src/function.rs:DefPartial}} 178 | ``` 179 | 180 | The `arity` and `used` members indicate how many arguments are expected and how 181 | many have been given. These are provided directly in this struct rather than 182 | requiring dereferencing the `arity` on the `Function` object and the length of 183 | the `args` list. This is for convenience and performance. 184 | 185 | Each time more arguments are added to a `Partial`, a new `Partial` instance must 186 | be allocated and the existing arguments copied over. A `Partial` object, once 187 | created, is immutable. 188 | 189 | 190 | ### Closures 191 | 192 | Closures and partial applications have, at an abstract level, something in 193 | common: they both reference values that the function will need when it is 194 | finally called. 195 | 196 | It's also possible, of course, to have a partially applied closure. 197 | 198 | We can extend the `Partial` definition with a closure environment so that we can 199 | use the same object type everywhere to represent a function pointer, applied 200 | arguments and closure environment as needed. 201 | 202 | #### Compiling a closure 203 | 204 | The compiler, because it keeps track of variable names and scopes, knows when a 205 | `Function` references nonlocal variables. After such a function is defined, the 206 | compiler emits a `MakeClosure` instruction. 207 | 208 | #### Referencing the stack with upvalues 209 | 210 | The VM, when it executes `MakeClosure`, creates a new `Partial` object. It 211 | then iterates over the list of nonlocal references and allocates an `Upvalue` 212 | object for each, which are added to the `env` member on the `Partial` object. 213 | 214 | The below code extract is from the function `Thread::eval_next_instr()` in 215 | the `MakeClosure` instruction decode and execution block. 216 | 217 | The two operands of the `MakeClosure` operation - `dest` and `function` - are 218 | registers. `function` points at the `Function` to be given an environment and 219 | made into a closure `Partial` instance; the pointer to this instance will be 220 | written to the `dest` register. 221 | 222 | ```rust,ignore 223 | {{#include ../interpreter/src/vm.rs:OpcodeMakeClosure}} 224 | ``` 225 | 226 | The `Upvalue` struct itself is defined as: 227 | 228 | ```rust,ignore 229 | {{#include ../interpreter/src/vm.rs:DefUpvalue}} 230 | ``` 231 | 232 | An `Upvalue` is an object that references an absolute register stack location 233 | (that is the `location` member.) 234 | 235 | The initial value of `closed` is `false`. In this state, the location on the 236 | stack that contains the variable _must_ be a valid location. That is, the stack 237 | can not have been unwound yet. If the closure is called, `Upvalue`s in this 238 | state are simply an indirection between the function and the variable on the 239 | register stack. 240 | 241 | The compiler is able to keep track of variables and whether they are closed 242 | over. It emits bytecode instructions to close `Upvalue` objects when variables 243 | on the stack go out of scope. 244 | 245 | This instruction, `CloseUpvalues`, copies the variable from the register stack 246 | to the `value` member of the `Upvalue` object and sets `closed` to `true`. 247 | 248 | From then on, when the closure reads or writes to this variable, the value on 249 | the `Upvalue` object is modified rather than the location on the register stack. 250 | 251 | 252 | ## Global values 253 | 254 | ```rust,ignore 255 | pub struct Thread { 256 | ... 257 | globals: CellPtr, 258 | ... 259 | } 260 | ``` 261 | 262 | The outermost scope of a program's values and functions are the global values. 263 | We can manage these with an instance of a `Dict`. While a `Dict` can use any 264 | hashable value as a key, internally the VM will only allow `Symbol`s to be 265 | keys. That is, globals must be named objects. 266 | 267 | # Next... 268 | 269 | Let's dive into the compiler! 270 | -------------------------------------------------------------------------------- /booksrc/chapter-managing-blocks.md: -------------------------------------------------------------------------------- 1 | # Allocating into Multiple Blocks 2 | 3 | Let's now zoom out of the fractal code soup one level and begin arranging multiple 4 | blocks so we can allocate - in theory - indefinitely. 5 | 6 | ## Lists of blocks 7 | 8 | We'll need a new struct for wrapping multiple blocks: 9 | 10 | ```rust,ignore 11 | {{#include ../stickyimmix/src/heap.rs:DefBlockList}} 12 | ``` 13 | 14 | Immix maintains several lists of blocks. We won't include them all in the first 15 | iteration but in short they are: 16 | 17 | * `free`: a list of blocks that contain no objects. These blocks are held at the 18 | ready to allocate into on demand 19 | * `recycle`: a list of blocks that contain some objects but also at least one 20 | line that can be allocated into 21 | * `large`: not a list of blocks, necessarily, but a list of objects larger than 22 | the block size, or some other method of accounting for large objects 23 | * `rest`: the rest of the blocks that have been allocated into but are not 24 | suitable for recycling 25 | 26 | In our first iteration we'll only keep the `rest` list of blocks and two blocks 27 | to immediately allocate into. Why two? To understand why, we need to understand 28 | how Immix thinks about object sizes. 29 | 30 | ### Immix and object sizes 31 | 32 | We've seen that there are two numbers that define granularity in Immix: the 33 | block size and the line size. These numbers give us the ability to categorize 34 | object sizes: 35 | 36 | * small: those that (with object header and alignment overhead) fit inside a 37 | line 38 | * medium: those that (again with object header and alignment overhead) are 39 | larger than one line but smaller than a block 40 | * large: those that are larger than a block 41 | 42 | In the previous chapter we described the basic allocation algorithm: when 43 | an object is being allocated, the current block is scanned for a hole between 44 | marked lines large enough to allocate into. This does seem like it could 45 | be inefficient. We could spend a lot of CPU cycles looking for a big enough 46 | hole, especially for a medium sized object. 47 | 48 | To avoid this, Immix maintains a second block, an overflow block, to allocate 49 | medium objects into that don't fit the first available hole in the 50 | main block being allocated into. 51 | 52 | Thus two blocks to immediately allocate into: 53 | 54 | * `head`: the current block being allocated into 55 | * `overflow`: a block kept handy for writing medium objects into that don't 56 | fit the `head` block's current hole 57 | 58 | We'll be ignoring large objects for now and attending only to allocating small 59 | and medium objects into blocks. 60 | 61 | Instead of recycling blocks with holes, or maintaining a list of pre-allocated 62 | free blocks, we'll allocate a new block on demand whenever we need more space. 63 | We'll get to identifying holes and recyclable blocks in a later chapter. 64 | 65 | ### Managing the overflow block 66 | 67 | Generally in our code for this book, we will try to default to not allocating 68 | memory unless it is needed. For example, when an array is instantiated, 69 | the backing storage will remain unallocated until a value is pushed on to 70 | it. 71 | 72 | Thus in the definition of `BlockList`, `head` and `overflow` are `Option` 73 | types and won't be instantiated except on demand. 74 | 75 | For allocating into the overflow block we'll define a function in the 76 | `BlockList` impl: 77 | 78 | ```rust,ignore 79 | impl BlockList { 80 | fn overflow_alloc(&mut self, alloc_size: usize) -> Result<*const u8, AllocError> { 81 | ... 82 | } 83 | } 84 | ``` 85 | 86 | The input constraint is that, since overflow is for medium objects, `alloc_size` 87 | must be less than the block size. 88 | 89 | The logic inside will divide into three branches: 90 | 91 | 1. We haven't got an overflow block yet - `self.overflow` is `None`. In this 92 | case we have to instantiate a new block (since we're not maintaining 93 | a list of preinstantiated free blocks yet) and then, since that block 94 | is empty and we have a medium sized object, we can expect the allocation 95 | to succeed. 96 | ```rust,ignore 97 | match self.overflow { 98 | Some ..., 99 | None => { 100 | let mut overflow = BumpBlock::new()?; 101 | 102 | // object size < block size means we can't fail this expect 103 | let space = overflow 104 | .inner_alloc(alloc_size) 105 | .expect("We expected this object to fit!"); 106 | 107 | self.overflow = Some(overflow); 108 | 109 | space 110 | } 111 | } 112 | ``` 113 | 2. We _have_ an overflow block and the object fits. Easy. 114 | ```rust,ignore 115 | match self.overflow { 116 | // We already have an overflow block to try to use... 117 | Some(ref mut overflow) => { 118 | // This is a medium object that might fit in the current block... 119 | match overflow.inner_alloc(alloc_size) { 120 | // the block has a suitable hole 121 | Some(space) => space, 122 | ... 123 | } 124 | }, 125 | None => ... 126 | } 127 | ``` 128 | 3. We have an overflow block but the object does not fit. Now we simply 129 | instantiate a _new_ overflow block, adding the old one to the `rest` 130 | list (in future it will make a good candidate for recycing!). Again, 131 | since we're writing a medium object into a block, we can expect allocation 132 | to succeed. 133 | ```rust,ignore 134 | match self.overflow { 135 | // We already have an overflow block to try to use... 136 | Some(ref mut overflow) => { 137 | // This is a medium object that might fit in the current block... 138 | match overflow.inner_alloc(alloc_size) { 139 | Some ..., 140 | // the block does not have a suitable hole 141 | None => { 142 | let previous = replace(overflow, BumpBlock::new()?); 143 | 144 | self.rest.push(previous); 145 | 146 | overflow.inner_alloc(alloc_size).expect("Unexpected error!") 147 | } 148 | } 149 | }, 150 | None => ... 151 | } 152 | ``` 153 | 154 | In this logic, the only error can come from failing to create a new block. 155 | 156 | On success, at this level of interface we continue to return a `*const u8` 157 | pointer to the available space as we're not yet handling the type of the 158 | object being allocated. 159 | 160 | You may have noticed that the function signature for `overflow_alloc` takes a 161 | `&mut self`. This isn't compatible with the interior mutability model 162 | of allocation. We'll have to wrap the `BlockList` struct in another struct 163 | that handles this change of API model. 164 | 165 | ## The heap struct 166 | 167 | This outer struct will provide the external crate interface and some further 168 | implementation of block management. 169 | 170 | The crate interface will require us to consider object headers and so in the 171 | struct definition below there is reference to a generic type `H` that 172 | the _user_ of the heap will define as the object header. 173 | 174 | ```rust,ignore 175 | {{#include ../stickyimmix/src/heap.rs:DefStickyImmixHeap}} 176 | ``` 177 | 178 | Since object headers are not owned directly by the heap struct, we need a 179 | `PhantomData` instance to associate with `H`. We'll discuss object headers 180 | in a later chapter. 181 | 182 | Now let's focus on the use of the `BlockList`. 183 | 184 | The instance of `BlockList` in the `StickyImmixHeap` struct is wrapped in an 185 | `UnsafeCell` because we need interior mutability. We need to be able to 186 | borrow the `BlockList` mutably while presenting an immutable interface to 187 | the outside world. Since we won't be borrowing the `BlockList` in multiple 188 | places in the same call tree, we don't need `RefCell` and we can avoid it's 189 | runtime borrow checking. 190 | 191 | ### Allocating into the head block 192 | 193 | We've already taken care of the overflow block, now we'll handle allocation 194 | into the `head` block. We'll define a new function: 195 | 196 | ```rust,ignore 197 | impl StickyImmixHeap { 198 | fn find_space( 199 | &self, 200 | alloc_size: usize, 201 | size_class: SizeClass, 202 | ) -> Result<*const u8, AllocError> { 203 | let blocks = unsafe { &mut *self.blocks.get() }; 204 | ... 205 | } 206 | } 207 | ``` 208 | 209 | This function is going to look almost identical to the `alloc_overflow()` 210 | function defined earlier. It has more or less the same cases to walk through: 211 | 212 | 1. `head` block is `None`, i.e. we haven't allocated a head block yet. Allocate 213 | one and write the object into it. 214 | 2. We have `Some(ref mut head)` in `head`. At this point we divert from the 215 | `alloc_overflow()` function and query the size of the object - if this is 216 | is a medium object and the current hole between marked lines in the `head` 217 | block is too small, call into `alloc_overflow()` and return. 218 | ```rust,ignore 219 | if size_class == SizeClass::Medium && alloc_size > head.current_hole_size() { 220 | return blocks.overflow_alloc(alloc_size); 221 | } 222 | ``` 223 | Otherwise, continue to allocate into `head` and return. 224 | 3. We have `Some(ref mut head)` in `head` but this block is unable to 225 | accommodate the object, whether medium or small. We must append the current 226 | head to the `rest` list and create a new `BumpBlock` to allocate into. 227 | 228 | There is one more thing to mention. What about large objects? We'll cover those 229 | in a later chapter. Right now we'll make it an error to try to allocate a large 230 | object by putting this at the beginning of the `StickyImmixHeap::inner_alloc()` 231 | function: 232 | 233 | ```rust,ignore 234 | if size_class == SizeClass::Large { 235 | return Err(AllocError::BadRequest); 236 | } 237 | 238 | ``` 239 | 240 | ## Where to next? 241 | 242 | We have a scheme for finding space in blocks for small and medium objects 243 | and so, in the next chapter we will define the external interface to the crate. 244 | -------------------------------------------------------------------------------- /booksrc/chapter-what-is-alloc.md: -------------------------------------------------------------------------------- 1 | # The type of allocation 2 | 3 | Before we start writing objects into `Block`s, we need to know the nature of 4 | the interface in Rust terms. 5 | 6 | If we consider the global allocator in Rust, implicitly available via 7 | `Box::new()`, `Vec::new()` and so on, we'll notice that since the global 8 | allocator is available on every thread and allows the creation of new 9 | objects on the heap (that is, mutation of the heap) from any code location 10 | without needing to follow the rules of borrowing and mutable aliasing, 11 | it is essentially a container that implements `Sync` and the interior 12 | mutability pattern. 13 | 14 | We need to follow suit, but we'll leave `Sync` for advanced chapters. 15 | 16 | An interface that satisfies the interior mutability property, by borrowing 17 | the allocator instance immutably, might look like: 18 | 19 | ```rust,ignore 20 | trait AllocRaw { 21 | fn alloc(&self, object: T) -> *const T; 22 | } 23 | ``` 24 | 25 | naming it `AllocRaw` because when layering on top of `Block` we'll 26 | work with raw pointers and not concern ourselves with the lifetime of 27 | allocated objects. 28 | 29 | It will become a little more complex than this but for now, this captures 30 | the essence of the interface. 31 | -------------------------------------------------------------------------------- /booksrc/evalrus-medium.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-hosted-langs/book/f4dc72f71e119ecdb87d842afa5bf65bb131ecaa/booksrc/evalrus-medium.png -------------------------------------------------------------------------------- /booksrc/img/alignment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-hosted-langs/book/f4dc72f71e119ecdb87d842afa5bf65bb131ecaa/booksrc/img/alignment.png -------------------------------------------------------------------------------- /booksrc/img/fragmented_block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-hosted-langs/book/f4dc72f71e119ecdb87d842afa5bf65bb131ecaa/booksrc/img/fragmented_block.png -------------------------------------------------------------------------------- /booksrc/img/stickyimmix_block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rust-hosted-langs/book/f4dc72f71e119ecdb87d842afa5bf65bb131ecaa/booksrc/img/stickyimmix_block.png -------------------------------------------------------------------------------- /booksrc/introduction.md: -------------------------------------------------------------------------------- 1 | # Writing Interpreters in Rust: a Guide 2 | 3 | ## Welcome! 4 | 5 | In this book we will walk through the basics of interpreted language 6 | implementation in Rust with a focus on the challenges that are specific 7 | to using Rust. 8 | 9 | At a glance, these are: 10 | 11 | * A custom allocator for use in an interpreter 12 | * A safe-Rust wrapper over allocation 13 | * A compiler and VM that interact with the above two layers 14 | 15 | The goal of this book is not to cover a full featured language but rather to 16 | provide a solid foundation on which you can build further features. Along 17 | the way we'll implement as much as possible in terms of our own memory 18 | management abstractions rather than using Rust std collections. 19 | 20 | ### Level of difficulty 21 | 22 | Bob Nystrom's [Crafting Interpreters](http://craftinginterpreters.com/) 23 | is recommended _introductory_ reading to this book for beginners to the topic. 24 | Bob has produced a high quality, accessible work and while there is 25 | considerable overlap, in some ways this book builds on Bob's work with some 26 | additional complexity, optimizations and discussions of Rust's safe vs unsafe. 27 | 28 | **We hope you find this book to be informative!** 29 | 30 | 31 | ## Further reading and other projects to study: 32 | 33 | All the links below are acknowledged as inspiration or prior art. 34 | 35 | ### Interpreters 36 | 37 | * Bob Nystrom's [Crafting Interpreters](http://craftinginterpreters.com/) 38 | * [The Inko programming language](https://inko-lang.org/) 39 | * kyren - [luster](https://github.com/kyren/luster) and [gc-arena](https://github.com/kyren/gc-arena) 40 | 41 | ### Memory management 42 | 43 | * Richard Jones, Anthony Hosking, Elliot Moss - [The Garbage Collection Handbook](http://gchandbook.org/) 44 | * Stephen M. Blackburn & Kathryn S. McKinley - 45 | [Immix: A Mark-Region Garbage Collector with Space Efficiency, Fast Collection, and Mutator Performance](http://users.cecs.anu.edu.au/~steveb/pubs/papers/immix-pldi-2008.pdf) 46 | * Felix S Klock II - [GC and Rust Part 0: Garbage Collection Background](http://blog.pnkfx.org/blog/2015/10/27/gc-and-rust-part-0-how-does-gc-work/) 47 | * Felix S Klock II - [GC and Rust Part 1: Specifying the Problem](http://blog.pnkfx.org/blog/2015/11/10/gc-and-rust-part-1-specing-the-problem/) 48 | * Felix S Klock II - [GC and Rust Part 2: The Roots of the Problem](http://blog.pnkfx.org/blog/2016/01/01/gc-and-rust-part-2-roots-of-the-problem/) 49 | -------------------------------------------------------------------------------- /booksrc/part-allocators.md: -------------------------------------------------------------------------------- 1 | # Allocators 2 | 3 | This section gives an overview and implementation detail of allocating blocks 4 | of memory. 5 | 6 | _What this is not: a custom allocator to replace the global Rust allocator_ 7 | -------------------------------------------------------------------------------- /booksrc/part-interpreter.md: -------------------------------------------------------------------------------- 1 | # An interpreter: Eval-rs 2 | 3 | In this part of the book we'll dive into creating: 4 | * a safe Rust layer on top of the Sticky Immix API of the previous part 5 | * a compiler for a primitive s-expression syntax language 6 | * a bytecode based virtual machine 7 | 8 | So what kind of interpreter will we implement? This book is a guide to help 9 | you along your own journey and not not intended to provide an exhaustive 10 | language ecosystem. The direction we'll take is to support John McCarthy's 11 | classic s-expression based meta-circular evaluator[^1]. 12 | 13 | Along the way we'll need to implement fundamental data types and structures 14 | from scratch upon our safe layer - symbols, pairs, arrays and dicts - with 15 | each chapter building upon the previous ones. 16 | 17 | While this will not result in an exhaustive language implementation, 18 | you'll see that we _will_ end up with all the building blocks for you to take 19 | it the rest of the way! 20 | 21 | We shall name our interpreter "Eval-rs", for which we have an appropriate 22 | illustration generously provided by the author's then 10 year old daughter. 23 | 24 | ![The Evalrus](evalrus-medium.png) 25 | 26 | We'll begin by defining the safe abstration over the Sticky Immix interface. 27 | Then we'll put that to use in parsing s-expressions into a very simple data 28 | structure. 29 | 30 | Once we've covered those basics, we'll build arrays and dicts and then 31 | use those in the compiler and virtual machine. 32 | 33 | [^1]: These days this is cliché but that is substantially to our benefit. We're 34 | not trying to create yet another Lisp, rather the fact that there is a 35 | preexisting design of some elegance and historical interest is a convenience. 36 | For a practical, accessible introduction to the topic, do see Paul 37 | Graham's [The Roots of Lisp](http://www.paulgraham.com/rootsoflisp.html) 38 | -------------------------------------------------------------------------------- /booksrc/part-stickyimmix.md: -------------------------------------------------------------------------------- 1 | # An allocator: Sticky Immix 2 | 3 | Quickly, some terminology: 4 | 5 | * Mutator: the thread of execution that writes and modifies objects on the heap. 6 | * Live objects: the graph of objects that the mutator can reach, either directly 7 | from it's stack or indirectly through other reachable objects. 8 | * Dead objects: any object that is disconnected from the mutator's graph of live 9 | objects. 10 | * Collector: the thread of execution that identifies objects that are no longer 11 | reachable by the mutator and marks them as free space that can be reused 12 | * Fragmentation: as objects have many different sizes, after allocating and 13 | freeing many objects, gaps of unused memory appear between objects that are 14 | too small for most objects but that add up to a measurable percentage of 15 | wasted space. 16 | * Evacuation: when the collector _moves_ live objects to another block of memory 17 | so that the originating block can be _de_fragmented_ 18 | 19 | ## About Immix 20 | 21 | Immix is a memory management scheme that considers blocks of fixed size at a time. 22 | Each block is divided into lines. In the original paper, blocks are sized at 32k 23 | and lines at 128 bytes. Objects are allocated into blocks using bump allocation 24 | and objects can cross line boundaries. 25 | 26 | ![StickyImmix Block](img/stickyimmix_block.png) 27 | 28 | During tracing to discover live objects, objects are marked as live, but the 29 | line, or lines, that each object occupies are also marked as live. This can mean, of 30 | course, that a line may contain a dead object and a live object but the whole 31 | line is marked as live. 32 | 33 | To mark lines as live, a portion of the block is set aside for line mark bits, 34 | usually one byte per mark bit. If _any_ line is marked as live, the whole block 35 | is also marked as live. There must also, therefore, be a bit that indicates 36 | block liveness. 37 | 38 | ### Conservative marking 39 | 40 | The Immix authors found that marking _every_ line that contains a live object 41 | could be expensive. For example, many small objects might cross line boundaries, 42 | requiring two lines to be marked as live. This would require looking up the 43 | object size and calculating whether the object crosses the boundary into the 44 | next line. To save CPU cycles, they simplified the algorithm by saying that 45 | any object that fits in a line _might_ cross into the next line so we will 46 | conservatively _consider_ the next line marked just in case. This sped up 47 | marking at little fragmentation expense. 48 | 49 | ### Collection 50 | 51 | During collection, only lines not marked as live are considered available for 52 | re-use. Inevitably then, there is acceptance of some amount of fragmentation 53 | at this point. 54 | 55 | _Full_ Immix implements evacuating objects out of the most fragmented blocks 56 | into fresh, empty blocks, for defragmentation. 57 | 58 | For simplicity of implementation, we'll leave out this evacuation operation 59 | in this guide. This is called _Sticky_ Immix. 60 | 61 | We'll also stick to a single thread for the mutator and collector to avoid the 62 | complexity overhead of a multi-threaded implementation for now. 63 | 64 | Recommended reading: [Stephen M. Blackburn & Kathryn S. McKinley - Immix: A Mark-Region Garbage Collector with Space Efficiency, Fast Collection, and Mutator Performance][1] 65 | 66 | ## About this part of the book 67 | 68 | This section will describe a Rust crate that implements a Sticky Immix heap. 69 | As part of this implementation we will dive into the crate API details to 70 | understand how we can define an interface between the heap and the language 71 | VM that will come later. 72 | 73 | _What this is not: custom memory management to replace the global Rust 74 | allocator! The APIs we arrive at will be substantially incompatible with the 75 | global Rust allocator._ 76 | 77 | [1]: http://users.cecs.anu.edu.au/~steveb/pubs/papers/immix-pldi-2008.pdf 78 | -------------------------------------------------------------------------------- /interpreter/.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | 5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 6 | # More information here http://doc.crates.io/guide.html#cargotoml-vs-cargolock 7 | Cargo.lock 8 | -------------------------------------------------------------------------------- /interpreter/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "evalrus" 3 | version = "0.0.1" 4 | authors = ["Peter Liniker "] 5 | edition = "2018" 6 | license = "MIT OR Apache-2.0" 7 | 8 | [dependencies] 9 | clap = "2.20.3" 10 | dirs = "1.0" 11 | fnv = "1.0.3" 12 | itertools = "0.9" 13 | rustyline = "6.1.2" 14 | stickyimmix = { path = "../stickyimmix" } 15 | blockalloc = { path = "../blockalloc" } 16 | -------------------------------------------------------------------------------- /interpreter/README.md: -------------------------------------------------------------------------------- 1 | # The Eval-rs 2 | 3 | A simple interpreter, built on the `stickyimmix` allocator. 4 | 5 | ![The Evalrus](https://pliniker.github.io/assets/img/evalrus-medium.png) 6 | -------------------------------------------------------------------------------- /interpreter/src/arena.rs: -------------------------------------------------------------------------------- 1 | /// A memory arena implemented as an ever growing pool of blocks. 2 | /// Currently implemented on top of stickyimmix without any gc which includes unnecessary 3 | /// overhead. 4 | use std::ptr::NonNull; 5 | 6 | use stickyimmix::{ 7 | AllocError, AllocHeader, AllocObject, AllocRaw, ArraySize, Mark, RawPtr, SizeClass, 8 | StickyImmixHeap, 9 | }; 10 | 11 | use crate::headers::TypeList; 12 | 13 | /// Allocation header for an Arena-allocated value 14 | pub struct ArenaHeader {} 15 | 16 | /// Since we're not using this functionality in an Arena, the impl is just 17 | /// a set of no-ops. 18 | impl AllocHeader for ArenaHeader { 19 | type TypeId = TypeList; 20 | 21 | fn new>( 22 | _size: u32, 23 | _size_class: SizeClass, 24 | _mark: Mark, 25 | ) -> ArenaHeader { 26 | ArenaHeader {} 27 | } 28 | 29 | fn new_array(_size: ArraySize, _size_class: SizeClass, _mark: Mark) -> ArenaHeader { 30 | ArenaHeader {} 31 | } 32 | 33 | fn mark(&mut self) {} 34 | 35 | fn is_marked(&self) -> bool { 36 | true 37 | } 38 | 39 | fn size_class(&self) -> SizeClass { 40 | SizeClass::Small 41 | } 42 | 43 | fn size(&self) -> u32 { 44 | 1 45 | } 46 | 47 | fn type_id(&self) -> TypeList { 48 | TypeList::Symbol 49 | } 50 | } 51 | 52 | /// A non-garbage-collected pool of memory blocks for interned values. 53 | /// These values are not dropped on Arena deallocation. 54 | /// Values must be "atomic", that is, not composed of other object 55 | /// pointers that need to be traced. 56 | // ANCHOR: DefArena 57 | pub struct Arena { 58 | heap: StickyImmixHeap, 59 | } 60 | // ANCHOR_END: DefArena 61 | 62 | impl Arena { 63 | pub fn new() -> Arena { 64 | Arena { 65 | heap: StickyImmixHeap::new(), 66 | } 67 | } 68 | } 69 | 70 | impl AllocRaw for Arena { 71 | type Header = ArenaHeader; 72 | 73 | // ANCHOR: DefArenaAlloc 74 | fn alloc(&self, object: T) -> Result, AllocError> 75 | where 76 | T: AllocObject, 77 | { 78 | self.heap.alloc(object) 79 | } 80 | // ANCHOR_END: DefArenaAlloc 81 | 82 | fn alloc_array(&self, _size_bytes: ArraySize) -> Result, AllocError> { 83 | unimplemented!() 84 | } 85 | 86 | fn get_header(_object: NonNull<()>) -> NonNull { 87 | unimplemented!() 88 | } 89 | 90 | fn get_object(_header: NonNull) -> NonNull<()> { 91 | unimplemented!() 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /interpreter/src/containers.rs: -------------------------------------------------------------------------------- 1 | /// Container traits 2 | /// 3 | /// TODO iterators/views 4 | use stickyimmix::ArraySize; 5 | 6 | use crate::error::RuntimeError; 7 | use crate::memory::MutatorView; 8 | use crate::safeptr::{MutatorScope, ScopedPtr, TaggedCellPtr, TaggedScopedPtr}; 9 | 10 | /// Base container-type trait. All container types are subtypes of `Container`. 11 | /// 12 | /// All container operations _must_ follow interior mutability only rules. 13 | /// Because there are no compile-time mutable aliasing guarantees, there can be no references 14 | /// into arrays at all, unless there can be a guarantee that the array memory will not be 15 | /// reallocated. 16 | /// 17 | /// `T` cannot be restricted to `Copy` because of the use of `Cell` for interior mutability. 18 | pub trait Container: Sized { 19 | /// Create a new, empty container instance. 20 | fn new() -> Self; 21 | /// Create a new container instance with the given capacity. 22 | // TODO: this may not make sense for tree types 23 | fn with_capacity<'guard>( 24 | mem: &'guard MutatorView, 25 | capacity: ArraySize, 26 | ) -> Result; 27 | 28 | /// Reset the size of the container to zero - empty 29 | fn clear<'guard>(&self, mem: &'guard MutatorView) -> Result<(), RuntimeError>; 30 | 31 | /// Count of items in the container 32 | fn length(&self) -> ArraySize; 33 | } 34 | 35 | /// If implemented, the container can be filled with a set number of values in one operation 36 | pub trait FillContainer: Container { 37 | /// The `item` is an object to copy into each container memory slot. 38 | fn fill<'guard>( 39 | &self, 40 | mem: &'guard MutatorView, 41 | size: ArraySize, 42 | item: T, 43 | ) -> Result<(), RuntimeError>; 44 | } 45 | 46 | /// If implemented, the container can be filled with a set number of values in one operation 47 | pub trait FillAnyContainer: FillContainer { 48 | /// The `item` is an object to copy into each container memory slot. 49 | fn fill<'guard>( 50 | &self, 51 | mem: &'guard MutatorView, 52 | size: ArraySize, 53 | item: TaggedScopedPtr<'guard>, 54 | ) -> Result<(), RuntimeError>; 55 | } 56 | 57 | /// Generic stack trait. If implemented, the container can function as a stack 58 | // ANCHOR: DefStackContainer 59 | pub trait StackContainer: Container { 60 | /// Push can trigger an underlying array resize, hence it requires the ability to allocate 61 | fn push<'guard>(&self, mem: &'guard MutatorView, item: T) -> Result<(), RuntimeError>; 62 | 63 | /// Pop returns a bounds error if the container is empty, otherwise moves the last item of the 64 | /// array out to the caller. 65 | fn pop<'guard>(&self, _guard: &'guard dyn MutatorScope) -> Result; 66 | 67 | /// Return the value at the top of the stack without removing it 68 | fn top<'guard>(&self, _guard: &'guard dyn MutatorScope) -> Result; 69 | } 70 | // ANCHOR_END: DefStackContainer 71 | 72 | /// Specialized stack trait. If implemented, the container can function as a stack 73 | // ANCHOR: DefStackAnyContainer 74 | pub trait StackAnyContainer: StackContainer { 75 | /// Push can trigger an underlying array resize, hence it requires the ability to allocate 76 | fn push<'guard>( 77 | &self, 78 | mem: &'guard MutatorView, 79 | item: TaggedScopedPtr<'guard>, 80 | ) -> Result<(), RuntimeError>; 81 | 82 | /// Pop returns a bounds error if the container is empty, otherwise moves the last item of the 83 | /// array out to the caller. 84 | fn pop<'guard>( 85 | &self, 86 | _guard: &'guard dyn MutatorScope, 87 | ) -> Result, RuntimeError>; 88 | 89 | /// Return the value at the top of the stack without removing it 90 | fn top<'guard>( 91 | &self, 92 | _guard: &'guard dyn MutatorScope, 93 | ) -> Result, RuntimeError>; 94 | } 95 | // ANCHOR_END: DefStackAnyContainer 96 | 97 | /// Generic indexed-access trait. If implemented, the container can function as an indexable vector 98 | pub trait IndexedContainer: Container { 99 | /// Return a copy of the object at the given index. Bounds-checked. 100 | fn get<'guard>( 101 | &self, 102 | _guard: &'guard dyn MutatorScope, 103 | index: ArraySize, 104 | ) -> Result; 105 | 106 | /// Move an object into the array at the given index. Bounds-checked. 107 | fn set<'guard>( 108 | &self, 109 | _guard: &'guard dyn MutatorScope, 110 | index: ArraySize, 111 | item: T, 112 | ) -> Result<(), RuntimeError>; 113 | } 114 | 115 | /// A trait that is implemented for containers that can represent their contents as a slice. 116 | pub trait SliceableContainer: IndexedContainer { 117 | /// This function allows access to the interior of a container as a slice by way of a 118 | /// function, permitting direct access to the memory locations of objects in the container 119 | /// for the lifetime of the closure call. 120 | /// 121 | /// It is important to understand that the 'guard lifetime is not the same safe duration 122 | /// as the slice lifetime - the slice may be invalidated during the 'guard lifetime 123 | /// by operations on the container that cause reallocation. 124 | /// 125 | /// To prevent the function from modifying the container outside of the slice reference, 126 | /// the implementing container must maintain a RefCell-style flag to catch runtime 127 | /// container modifications that would render the slice invalid or cause undefined 128 | /// behavior. 129 | fn access_slice<'guard, F, R>(&self, _guard: &'guard dyn MutatorScope, f: F) -> R 130 | where 131 | F: FnOnce(&mut [T]) -> R; 132 | } 133 | 134 | /// Specialized indexable interface for where TaggedCellPtr is used as T 135 | pub trait IndexedAnyContainer: IndexedContainer { 136 | /// Return a pointer to the object at the given index. Bounds-checked. 137 | fn get<'guard>( 138 | &self, 139 | guard: &'guard dyn MutatorScope, 140 | index: ArraySize, 141 | ) -> Result, RuntimeError>; 142 | 143 | /// Set the object pointer at the given index. Bounds-checked. 144 | fn set<'guard>( 145 | &self, 146 | _guard: &'guard dyn MutatorScope, 147 | index: ArraySize, 148 | item: TaggedScopedPtr<'guard>, 149 | ) -> Result<(), RuntimeError>; 150 | } 151 | 152 | /// Hashable-indexed interface. Objects used as keys must implement Hashable. 153 | // ANCHOR: DefHashIndexedAnyContainer 154 | pub trait HashIndexedAnyContainer { 155 | /// Return a pointer to to the object associated with the given key. 156 | /// Absence of an association should return an error. 157 | fn lookup<'guard>( 158 | &self, 159 | guard: &'guard dyn MutatorScope, 160 | key: TaggedScopedPtr, 161 | ) -> Result, RuntimeError>; 162 | 163 | /// Associate a key with a value. 164 | fn assoc<'guard>( 165 | &self, 166 | mem: &'guard MutatorView, 167 | key: TaggedScopedPtr<'guard>, 168 | value: TaggedScopedPtr<'guard>, 169 | ) -> Result<(), RuntimeError>; 170 | 171 | /// Remove an association by its key. 172 | fn dissoc<'guard>( 173 | &self, 174 | guard: &'guard dyn MutatorScope, 175 | key: TaggedScopedPtr, 176 | ) -> Result, RuntimeError>; 177 | 178 | /// Returns true if the key exists in the container. 179 | fn exists<'guard>( 180 | &self, 181 | guard: &'guard dyn MutatorScope, 182 | key: TaggedScopedPtr, 183 | ) -> Result; 184 | } 185 | // ANCHOR_END: DefHashIndexedAnyContainer 186 | 187 | /// Convert a Pair list to a different container 188 | pub trait AnyContainerFromPairList: Container { 189 | fn from_pair_list<'guard>( 190 | &self, 191 | mem: &'guard MutatorView, 192 | pair_list: TaggedScopedPtr<'guard>, 193 | ) -> Result<(), RuntimeError>; 194 | } 195 | 196 | /// Replace the contents of a container with the values in the slice 197 | pub trait ContainerFromSlice: Container { 198 | fn from_slice<'guard>( 199 | mem: &'guard MutatorView, 200 | data: &[T], 201 | ) -> Result, RuntimeError>; 202 | } 203 | 204 | /// Replace the contents of a container with the values in the slice 205 | pub trait AnyContainerFromSlice: Container { 206 | fn from_slice<'guard>( 207 | mem: &'guard MutatorView, 208 | data: &[TaggedScopedPtr<'guard>], 209 | ) -> Result, RuntimeError>; 210 | } 211 | 212 | /// The implementor represents mutable changes via an internal version count 213 | /// such that the use of any references to an older version return an error 214 | pub trait VersionedContainer: Container {} 215 | 216 | pub trait ImmutableContainer: Container {} 217 | -------------------------------------------------------------------------------- /interpreter/src/error.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error; 2 | use std::fmt; 3 | use std::io; 4 | 5 | use rustyline::error::ReadlineError; 6 | 7 | use blockalloc::BlockError; 8 | use stickyimmix::AllocError; 9 | 10 | /// Source code position 11 | // ANCHOR: DefSourcePos 12 | #[derive(Copy, Clone, Debug, PartialEq)] 13 | pub struct SourcePos { 14 | pub line: u32, 15 | pub column: u32, 16 | } 17 | // ANCHOR_END: DefSourcePos 18 | 19 | impl SourcePos { 20 | fn new(line: u32, column: u32) -> SourcePos { 21 | SourcePos { line, column } 22 | } 23 | } 24 | 25 | #[derive(Debug, PartialEq)] 26 | pub enum ErrorKind { 27 | IOError(String), 28 | LexerError(String), 29 | ParseError(String), 30 | EvalError(String), 31 | BadAllocationRequest, 32 | OutOfMemory, 33 | BoundsError, 34 | KeyError, 35 | UnhashableError, 36 | MutableBorrowError, 37 | } 38 | 39 | /// An Eval-rs runtime error type 40 | #[derive(Debug, PartialEq)] 41 | pub struct RuntimeError { 42 | kind: ErrorKind, 43 | pos: Option, 44 | } 45 | 46 | impl RuntimeError { 47 | pub fn new(kind: ErrorKind) -> RuntimeError { 48 | RuntimeError { 49 | kind: kind, 50 | pos: None, 51 | } 52 | } 53 | 54 | pub fn with_pos(kind: ErrorKind, pos: SourcePos) -> RuntimeError { 55 | RuntimeError { 56 | kind: kind, 57 | pos: Some(pos), 58 | } 59 | } 60 | 61 | pub fn error_kind(&self) -> &ErrorKind { 62 | &self.kind 63 | } 64 | 65 | pub fn error_pos(&self) -> Option { 66 | self.pos 67 | } 68 | 69 | /// Given the relevant source code string, show the error in context 70 | pub fn print_with_source(&self, source: &str) { 71 | if let Some(ref pos) = self.pos { 72 | let mut iter = source.lines().enumerate(); 73 | 74 | while let Some((count, line)) = iter.next() { 75 | // count starts at 0, line numbers start at 1 76 | if count + 1 == pos.line as usize { 77 | println!("error: {}", self); 78 | println!("{:5}|{}", pos.line, line); 79 | println!("{:5}|{:width$}^", " ", " ", width = pos.column as usize); 80 | println!("{:5}|", " "); 81 | return; 82 | } 83 | } 84 | } else { 85 | println!("error: {}", self); 86 | } 87 | } 88 | } 89 | 90 | impl fmt::Display for RuntimeError { 91 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 92 | match self.kind { 93 | ErrorKind::IOError(ref reason) => write!(f, "IO Error: {}", reason), 94 | ErrorKind::LexerError(ref reason) => write!(f, "Parse error: {}", reason), 95 | ErrorKind::ParseError(ref reason) => write!(f, "Parse error: {}", reason), 96 | ErrorKind::EvalError(ref reason) => write!(f, "Evaluation error: {}", reason), 97 | ErrorKind::OutOfMemory => write!(f, "Out of memory!"), 98 | ErrorKind::BadAllocationRequest => { 99 | write!(f, "An invalid memory size allocation was requested!") 100 | } 101 | ErrorKind::BoundsError => write!(f, "Indexing bounds error"), 102 | ErrorKind::KeyError => write!(f, "Key does not exist in Dict"), 103 | ErrorKind::UnhashableError => write!(f, "Attempt to access Dict with unhashable key"), 104 | ErrorKind::MutableBorrowError => write!( 105 | f, 106 | "Attempt to modify a container that is already mutably borrowed" 107 | ), 108 | } 109 | } 110 | } 111 | 112 | /// Convert from io::Error 113 | impl From for RuntimeError { 114 | fn from(other: io::Error) -> RuntimeError { 115 | RuntimeError::new(ErrorKind::IOError(format!("{}", other))) 116 | } 117 | } 118 | 119 | /// Convert from ReadlineError 120 | impl From for RuntimeError { 121 | fn from(other: ReadlineError) -> RuntimeError { 122 | RuntimeError::new(ErrorKind::IOError(format!("{}", other))) 123 | } 124 | } 125 | 126 | /// Convert from BlockError 127 | impl From for RuntimeError { 128 | fn from(other: BlockError) -> RuntimeError { 129 | match other { 130 | BlockError::OOM => RuntimeError::new(ErrorKind::OutOfMemory), 131 | BlockError::BadRequest => RuntimeError::new(ErrorKind::BadAllocationRequest), 132 | } 133 | } 134 | } 135 | 136 | /// Convert from AllocError 137 | impl From for RuntimeError { 138 | fn from(other: AllocError) -> RuntimeError { 139 | match other { 140 | AllocError::OOM => RuntimeError::new(ErrorKind::OutOfMemory), 141 | AllocError::BadRequest => RuntimeError::new(ErrorKind::BadAllocationRequest), 142 | } 143 | } 144 | } 145 | 146 | impl Error for RuntimeError { 147 | fn cause(&self) -> Option<&dyn Error> { 148 | None 149 | } 150 | } 151 | 152 | /// Convert _to_ std::fmt::Error 153 | impl From for fmt::Error { 154 | fn from(_other: RuntimeError) -> fmt::Error { 155 | // Is there anything else that can be done here? :-( 156 | fmt::Error 157 | } 158 | } 159 | 160 | /// Convenience shorthand function for building a SourcePos 161 | pub fn spos(line: u32, column: u32) -> SourcePos { 162 | SourcePos::new(line, column) 163 | } 164 | 165 | /// Convenience shorthand function for building a lexer error 166 | pub fn err_lexer(pos: SourcePos, reason: &str) -> RuntimeError { 167 | RuntimeError::with_pos(ErrorKind::LexerError(String::from(reason)), pos) 168 | } 169 | 170 | /// Convenience shorthand function for building a parser error 171 | pub fn err_parser(reason: &str) -> RuntimeError { 172 | RuntimeError::new(ErrorKind::ParseError(String::from(reason))) 173 | } 174 | 175 | /// Convenience shorthand function for building a parser error including a source position 176 | pub fn err_parser_wpos(pos: SourcePos, reason: &str) -> RuntimeError { 177 | RuntimeError::with_pos(ErrorKind::ParseError(String::from(reason)), pos) 178 | } 179 | 180 | /// Convenience shorthand function for building an evaluation error 181 | pub fn err_eval(reason: &str) -> RuntimeError { 182 | RuntimeError::new(ErrorKind::EvalError(String::from(reason))) 183 | } 184 | -------------------------------------------------------------------------------- /interpreter/src/function.rs: -------------------------------------------------------------------------------- 1 | use itertools::join; 2 | use std::fmt; 3 | 4 | use crate::array::ArrayU16; 5 | use crate::bytecode::ByteCode; 6 | use crate::containers::{Container, ContainerFromSlice, SliceableContainer, StackContainer}; 7 | use crate::error::RuntimeError; 8 | use crate::list::List; 9 | use crate::memory::MutatorView; 10 | use crate::printer::Print; 11 | use crate::safeptr::{CellPtr, MutatorScope, ScopedPtr, TaggedCellPtr, TaggedScopedPtr}; 12 | use crate::taggedptr::Value; 13 | 14 | /// A function object type 15 | // ANCHOR: DefFunction 16 | #[derive(Clone)] 17 | pub struct Function { 18 | /// name could be a Symbol, or nil if it is an anonymous fn 19 | name: TaggedCellPtr, 20 | /// Number of arguments required to activate the function 21 | arity: u8, 22 | /// Instructions comprising the function code 23 | code: CellPtr, 24 | /// Param names are stored for introspection of a function signature 25 | param_names: CellPtr, 26 | /// List of (CallFrame-index: u8 | Window-index: u8) relative offsets from this function's 27 | /// declaration where nonlocal variables will be found. Needed when creating a closure. May be 28 | /// nil 29 | nonlocal_refs: TaggedCellPtr, 30 | } 31 | // ANCHOR_END: DefFunction 32 | 33 | impl Function { 34 | /// Allocate a Function object on the heap. 35 | /// 36 | /// The nonlocal_refs arg must contain a list of 16 bit values composed of two 37 | /// 8 bit values: CallFrame relative offset << 8 | Window offset 38 | /// These values should follow the same order as given in param_names 39 | pub fn alloc<'guard>( 40 | mem: &'guard MutatorView, 41 | name: TaggedScopedPtr<'guard>, 42 | param_names: ScopedPtr<'guard, List>, 43 | code: ScopedPtr<'guard, ByteCode>, 44 | nonlocal_refs: Option>, 45 | ) -> Result, RuntimeError> { 46 | // Store a nil ptr if no nonlocal references are given 47 | let nonlocal_refs = if let Some(refs_ptr) = nonlocal_refs { 48 | TaggedCellPtr::new_with(refs_ptr.as_tagged(mem)) 49 | } else { 50 | TaggedCellPtr::new_nil() 51 | }; 52 | 53 | mem.alloc(Function { 54 | name: TaggedCellPtr::new_with(name), 55 | arity: param_names.length() as u8, 56 | code: CellPtr::new_with(code), 57 | param_names: CellPtr::new_with(param_names), 58 | nonlocal_refs, 59 | }) 60 | } 61 | 62 | /// Return the Function's name as a string slice 63 | pub fn name<'guard>(&self, guard: &'guard dyn MutatorScope) -> &'guard str { 64 | let name = self.name.get(guard); 65 | match *name { 66 | Value::Symbol(s) => s.as_str(guard), 67 | _ => "", 68 | } 69 | } 70 | 71 | /// Return the number of arguments the Function can take 72 | pub fn arity(&self) -> u8 { 73 | self.arity 74 | } 75 | 76 | /// Return the names of the parameters that the Function takes 77 | pub fn param_names<'guard>(&self, guard: &'guard dyn MutatorScope) -> ScopedPtr<'guard, List> { 78 | self.param_names.get(guard) 79 | } 80 | 81 | /// Return the ByteCode object associated with the Function 82 | pub fn code<'guard>(&self, guard: &'guard dyn MutatorScope) -> ScopedPtr<'guard, ByteCode> { 83 | self.code.get(guard) 84 | } 85 | 86 | /// Return true if the function is a closure - it has nonlocal variable references 87 | pub fn is_closure<'guard>(&self) -> bool { 88 | !self.nonlocal_refs.is_nil() 89 | } 90 | 91 | /// Return a list of nonlocal stack references referenced by the function. It is a panickable 92 | /// offense to call this when there are no nonlocals referenced by the function. This would 93 | /// indicate a compiler bug. 94 | pub fn nonlocals<'guard>( 95 | &self, 96 | guard: &'guard dyn MutatorScope, 97 | ) -> ScopedPtr<'guard, ArrayU16> { 98 | match *self.nonlocal_refs.get(guard) { 99 | Value::ArrayU16(nonlocals) => nonlocals, 100 | _ => unreachable!(), 101 | } 102 | } 103 | } 104 | 105 | impl Print for Function { 106 | /// Prints a string representation of the function 107 | fn print<'guard>( 108 | &self, 109 | guard: &'guard dyn MutatorScope, 110 | f: &mut fmt::Formatter, 111 | ) -> fmt::Result { 112 | let name = self.name.get(guard); 113 | let params = self.param_names.get(guard); 114 | 115 | let mut param_string = String::new(); 116 | params.access_slice(guard, |items| { 117 | param_string = join(items.iter().map(|item| item.get(guard)), " ") 118 | }); 119 | 120 | match *name { 121 | Value::Symbol(s) => write!(f, "(Function {} ({}))", s.as_str(guard), param_string), 122 | _ => write!(f, "(Function ({}))", param_string), 123 | } 124 | } 125 | 126 | /// Prints the disassembled bytecode 127 | fn debug<'guard>( 128 | &self, 129 | guard: &'guard dyn MutatorScope, 130 | f: &mut fmt::Formatter, 131 | ) -> fmt::Result { 132 | self.print(guard, f)?; 133 | write!(f, "\nbytecode follows:\n")?; 134 | self.code(guard).debug(guard, f) 135 | } 136 | } 137 | 138 | /// A partial function application object type 139 | // ANCHOR: DefPartial 140 | #[derive(Clone)] 141 | pub struct Partial { 142 | /// Remaining number of arguments required to activate the function 143 | arity: u8, 144 | /// Number of arguments already applied 145 | used: u8, 146 | /// List of argument values already applied 147 | args: CellPtr, 148 | /// Closure environment - must be either nil or a List of Upvalues 149 | env: TaggedCellPtr, 150 | /// Function that will be activated when all arguments are applied 151 | func: CellPtr, 152 | } 153 | // ANCHOR_END: DefPartial 154 | 155 | impl Partial { 156 | /// Allocate a Partial application of a Function on the heap with the given set of arguments 157 | pub fn alloc<'guard>( 158 | mem: &'guard MutatorView, 159 | function: ScopedPtr<'guard, Function>, 160 | env: Option>, 161 | args: &[TaggedCellPtr], 162 | ) -> Result, RuntimeError> { 163 | let used = args.len() as u8; 164 | let arity = function.arity() - used; 165 | 166 | // Store a nil ptr if no closure env is given 167 | let env = if let Some(env_ptr) = env { 168 | TaggedCellPtr::new_with(env_ptr.as_tagged(mem)) 169 | } else { 170 | TaggedCellPtr::new_nil() 171 | }; 172 | 173 | // copy args to the Partial's own list 174 | let args_list: ScopedPtr<'guard, List> = ContainerFromSlice::from_slice(mem, &args)?; 175 | 176 | mem.alloc(Partial { 177 | arity, 178 | used, 179 | args: CellPtr::new_with(args_list), 180 | env, 181 | func: CellPtr::new_with(function), 182 | }) 183 | } 184 | 185 | /// Clone an existing Partial application, appending the given arguments to the list 186 | pub fn alloc_clone<'guard>( 187 | mem: &'guard MutatorView, 188 | partial: ScopedPtr<'guard, Partial>, 189 | new_args: &[TaggedCellPtr], 190 | ) -> Result, RuntimeError> { 191 | let used = partial.used() + new_args.len() as u8; 192 | let arity = partial.arity() - new_args.len() as u8; 193 | 194 | // clone the parent Partial's args 195 | let arg_list = List::alloc_clone(mem, partial.args(mem))?; 196 | // append any new args 197 | for arg in new_args { 198 | arg_list.push(mem, arg.clone())? 199 | } 200 | 201 | mem.alloc(Partial { 202 | arity, 203 | used, 204 | args: CellPtr::new_with(arg_list), 205 | env: partial.env.clone(), 206 | func: partial.func.clone(), 207 | }) 208 | } 209 | 210 | /// Return the number of arguments this Partial needs before the function can be called 211 | pub fn arity(&self) -> u8 { 212 | self.arity 213 | } 214 | 215 | /// Return the count of arguments already applied 216 | pub fn used(&self) -> u8 { 217 | self.used 218 | } 219 | 220 | /// Return the arguments already supplied to the Partial 221 | pub fn args<'guard>(&self, guard: &'guard dyn MutatorScope) -> ScopedPtr<'guard, List> { 222 | self.args.get(guard) 223 | } 224 | 225 | /// Return the closure environment. This will be nil if the Partial does not close over any 226 | /// variables. 227 | pub fn closure_env(&self) -> TaggedCellPtr { 228 | self.env.clone() 229 | } 230 | 231 | /// Return the Function object that the Partial will call 232 | pub fn function<'guard>(&self, guard: &'guard dyn MutatorScope) -> ScopedPtr<'guard, Function> { 233 | self.func.get(guard) 234 | } 235 | } 236 | 237 | impl Print for Partial { 238 | /// Prints a string representation of the Partial object 239 | fn print<'guard>( 240 | &self, 241 | guard: &'guard dyn MutatorScope, 242 | f: &mut fmt::Formatter, 243 | ) -> fmt::Result { 244 | let function = self.func.get(guard); 245 | let name = function.name.get(guard); 246 | let params = function.param_names.get(guard); 247 | 248 | let mut param_string = String::new(); 249 | params.access_slice(guard, |items| { 250 | let start = self.used as usize; 251 | param_string = join(items[start..].iter().map(|item| item.get(guard)), " ") 252 | }); 253 | 254 | match *name { 255 | Value::Symbol(s) => write!(f, "(Partial {} ({}))", s.as_str(guard), param_string), 256 | _ => write!(f, "(Partial ({}))", param_string), 257 | } 258 | } 259 | 260 | /// Prints the associated function's disassembled bytecode 261 | fn debug<'guard>( 262 | &self, 263 | guard: &'guard dyn MutatorScope, 264 | f: &mut fmt::Formatter, 265 | ) -> fmt::Result { 266 | self.print(guard, f)?; 267 | write!(f, "\nbytecode follows:\n")?; 268 | self.func.get(guard).code(guard).debug(guard, f) 269 | } 270 | } 271 | 272 | /// A list of arguments to apply to functions 273 | pub struct CurriedArguments { 274 | // TODO 275 | // not sure of the mechanics of this. 276 | // The ghc runtime would push all these to the stack and then consume the stack with 277 | // function continuations 278 | } 279 | -------------------------------------------------------------------------------- /interpreter/src/hashable.rs: -------------------------------------------------------------------------------- 1 | /// Scope-guard limited Hashable trait type 2 | use std::hash::Hasher; 3 | 4 | use crate::safeptr::MutatorScope; 5 | 6 | // ANCHOR: DefHashable 7 | /// Similar to Hash but for use in a mutator lifetime-limited scope 8 | pub trait Hashable { 9 | fn hash<'guard, H: Hasher>(&self, _guard: &'guard dyn MutatorScope, hasher: &mut H); 10 | } 11 | // ANCHOR_END: DefHashable 12 | -------------------------------------------------------------------------------- /interpreter/src/headers.rs: -------------------------------------------------------------------------------- 1 | /// Defines an `ObjectHeader` type to immediately preceed each heap allocated 2 | /// object, which also contains a type tag but with space for many more types. 3 | use stickyimmix::{ 4 | AllocHeader, AllocObject, AllocRaw, AllocTypeId, ArraySize, Mark, RawPtr, SizeClass, 5 | }; 6 | 7 | use crate::array::{ArrayU16, ArrayU32, ArrayU8}; 8 | use crate::bytecode::{ArrayOpcode, ByteCode, InstructionStream}; 9 | use crate::dict::Dict; 10 | use crate::function::{Function, Partial}; 11 | use crate::list::List; 12 | use crate::memory::HeapStorage; 13 | use crate::number::NumberObject; 14 | use crate::pair::Pair; 15 | use crate::pointerops::{AsNonNull, Tagged}; 16 | use crate::symbol::Symbol; 17 | use crate::taggedptr::FatPtr; 18 | use crate::text::Text; 19 | use crate::vm::{CallFrameList, Thread, Upvalue}; 20 | 21 | /// Recognized heap-allocated types. 22 | /// This should represent every type native to the runtime with the exception of tagged pointer inline value 23 | /// types. 24 | // ANCHOR: DefTypeList 25 | #[repr(u16)] 26 | #[derive(Debug, Copy, Clone, PartialEq)] 27 | pub enum TypeList { 28 | ArrayBackingBytes, 29 | ArrayOpcode, 30 | ArrayU8, 31 | ArrayU16, 32 | ArrayU32, 33 | ByteCode, 34 | CallFrameList, 35 | Dict, 36 | Function, 37 | InstructionStream, 38 | List, 39 | NumberObject, 40 | Pair, 41 | Partial, 42 | Symbol, 43 | Text, 44 | Thread, 45 | Upvalue, 46 | } 47 | 48 | // Mark this as a Stickyimmix type-identifier type 49 | impl AllocTypeId for TypeList {} 50 | // ANCHOR_END: DefTypeList 51 | 52 | /// A heap-allocated object header 53 | // ANCHOR: DefObjectHeader 54 | pub struct ObjectHeader { 55 | mark: Mark, 56 | size_class: SizeClass, 57 | type_id: TypeList, 58 | size_bytes: u32, 59 | } 60 | // ANCHOR_END: DefObjectHeader 61 | 62 | impl ObjectHeader { 63 | /// Convert the ObjectHeader address to a FatPtr pointing at the object itself. 64 | // NOTE Any type that is a runtime dynamic type must be added to the below list 65 | // NOTE Be careful to match the correct TypeList discriminant with it's corresponding FatPtr discriminant 66 | // NOTE Be careful to untag the pointer before putting it into a `FatPtr` 67 | // ANCHOR: DefObjectHeaderGetObjectFatPtr 68 | pub unsafe fn get_object_fatptr(&self) -> FatPtr { 69 | let ptr_to_self = self.non_null_ptr(); 70 | let object_addr = HeapStorage::get_object(ptr_to_self); 71 | 72 | match self.type_id { 73 | TypeList::ArrayU8 => FatPtr::ArrayU8(RawPtr::untag(object_addr.cast::())), 74 | TypeList::ArrayU16 => FatPtr::ArrayU16(RawPtr::untag(object_addr.cast::())), 75 | TypeList::ArrayU32 => FatPtr::ArrayU32(RawPtr::untag(object_addr.cast::())), 76 | TypeList::Dict => FatPtr::Dict(RawPtr::untag(object_addr.cast::())), 77 | TypeList::Function => FatPtr::Function(RawPtr::untag(object_addr.cast::())), 78 | TypeList::List => FatPtr::List(RawPtr::untag(object_addr.cast::())), 79 | TypeList::NumberObject => { 80 | FatPtr::NumberObject(RawPtr::untag(object_addr.cast::())) 81 | } 82 | TypeList::Pair => FatPtr::Pair(RawPtr::untag(object_addr.cast::())), 83 | TypeList::Partial => FatPtr::Partial(RawPtr::untag(object_addr.cast::())), 84 | TypeList::Symbol => FatPtr::Symbol(RawPtr::untag(object_addr.cast::())), 85 | TypeList::Text => FatPtr::Text(RawPtr::untag(object_addr.cast::())), 86 | TypeList::Upvalue => FatPtr::Upvalue(RawPtr::untag(object_addr.cast::())), 87 | 88 | // Other types not represented by FatPtr are an error to id here 89 | _ => panic!("Invalid ObjectHeader type tag {:?}!", self.type_id), 90 | } 91 | } 92 | // ANCHOR_END: DefObjectHeaderGetObjectFatPtr 93 | } 94 | 95 | impl AsNonNull for ObjectHeader {} 96 | 97 | impl AllocHeader for ObjectHeader { 98 | type TypeId = TypeList; 99 | 100 | fn new>( 101 | size: u32, 102 | size_class: SizeClass, 103 | mark: Mark, 104 | ) -> ObjectHeader { 105 | ObjectHeader { 106 | mark, 107 | size_class, 108 | type_id: O::TYPE_ID, 109 | size_bytes: size, 110 | } 111 | } 112 | 113 | fn new_array(size: ArraySize, size_class: SizeClass, mark: Mark) -> ObjectHeader { 114 | ObjectHeader { 115 | mark, 116 | size_class, 117 | type_id: TypeList::ArrayBackingBytes, 118 | size_bytes: size as u32, 119 | } 120 | } 121 | 122 | fn mark(&mut self) { 123 | self.mark = Mark::Marked; 124 | } 125 | 126 | fn is_marked(&self) -> bool { 127 | self.mark == Mark::Marked 128 | } 129 | 130 | fn size_class(&self) -> SizeClass { 131 | self.size_class 132 | } 133 | 134 | fn size(&self) -> u32 { 135 | self.size_bytes 136 | } 137 | 138 | fn type_id(&self) -> TypeList { 139 | self.type_id 140 | } 141 | } 142 | 143 | /// Apply the type ID to each native type 144 | macro_rules! declare_allocobject { 145 | ($T:ty, $I:tt) => { 146 | impl AllocObject for $T { 147 | const TYPE_ID: TypeList = TypeList::$I; 148 | } 149 | }; 150 | } 151 | 152 | declare_allocobject!(ArrayOpcode, ArrayOpcode); 153 | declare_allocobject!(ArrayU8, ArrayU8); 154 | declare_allocobject!(ArrayU16, ArrayU16); 155 | declare_allocobject!(ArrayU32, ArrayU32); 156 | declare_allocobject!(ByteCode, ByteCode); 157 | declare_allocobject!(CallFrameList, CallFrameList); 158 | declare_allocobject!(Dict, Dict); 159 | declare_allocobject!(Function, Function); 160 | declare_allocobject!(InstructionStream, InstructionStream); 161 | declare_allocobject!(List, List); 162 | declare_allocobject!(NumberObject, NumberObject); 163 | declare_allocobject!(Pair, Pair); 164 | declare_allocobject!(Partial, Partial); 165 | declare_allocobject!(Symbol, Symbol); 166 | declare_allocobject!(Text, Text); 167 | declare_allocobject!(Thread, Thread); 168 | declare_allocobject!(Upvalue, Upvalue); 169 | -------------------------------------------------------------------------------- /interpreter/src/lexer.rs: -------------------------------------------------------------------------------- 1 | /// S-Expression lexer implementation. 2 | /// 3 | /// This isn't using any look-ahead yet and so always interprets 4 | /// (.symbol) as ( DOT SYMBOL ) 5 | use crate::error::{err_lexer, spos, RuntimeError, SourcePos}; 6 | 7 | // key characters 8 | const OPEN_PAREN: char = '('; 9 | const CLOSE_PAREN: char = ')'; 10 | const SPACE: char = ' '; 11 | const TAB: char = '\t'; 12 | const CR: char = '\r'; 13 | const LF: char = '\n'; 14 | const DOT: char = '.'; 15 | const DOUBLE_QUOTE: char = '"'; 16 | const SINGLE_QUOTE: char = '\''; 17 | 18 | // ANCHOR: DefTokenType 19 | #[derive(Debug, PartialEq)] 20 | pub enum TokenType { 21 | OpenParen, 22 | CloseParen, 23 | Symbol(String), 24 | Dot, 25 | Text(String), 26 | Quote, 27 | } 28 | // ANCHOR_END: DefTokenType 29 | 30 | // ANCHOR: DefToken 31 | #[derive(Debug, PartialEq)] 32 | pub struct Token { 33 | pub pos: SourcePos, 34 | pub token: TokenType, 35 | } 36 | // ANCHOR_END: DefToken 37 | 38 | impl Token { 39 | fn new(pos: SourcePos, token: TokenType) -> Token { 40 | Token { pos, token } 41 | } 42 | } 43 | 44 | // tokenize a String 45 | pub fn tokenize(input: &str) -> Result, RuntimeError> { 46 | use self::TokenType::*; 47 | 48 | // characters that terminate a symbol 49 | let terminating = [OPEN_PAREN, CLOSE_PAREN, SPACE, TAB, CR, LF, DOUBLE_QUOTE]; 50 | let is_terminating = |c: char| terminating.iter().any(|t| c == *t); 51 | 52 | // return value 53 | let mut tokens = Vec::new(); 54 | 55 | // start line numbering at 1, the first character of each line being number 0 56 | let mut lineno = 1; 57 | let mut charno = 0; 58 | 59 | let mut chars = input.chars(); 60 | let mut current = chars.next(); 61 | 62 | loop { 63 | match current { 64 | Some(TAB) => { 65 | return Err(err_lexer( 66 | spos(lineno, charno), 67 | "tabs are not valid whitespace", 68 | )); 69 | } 70 | 71 | Some(SPACE) => current = chars.next(), 72 | 73 | Some(CR) => { 74 | current = chars.next(); 75 | 76 | // consume \n if it follows \r 77 | if let Some(LF) = current { 78 | current = chars.next(); 79 | } 80 | 81 | lineno += 1; 82 | charno = 0; 83 | continue; 84 | } 85 | 86 | Some(LF) => { 87 | current = chars.next(); 88 | lineno += 1; 89 | charno = 0; 90 | continue; 91 | } 92 | 93 | // this is not correct because it doesn't allow for a . to begin a number 94 | // or a symbol. Will have to fix later. 95 | Some(DOT) => { 96 | tokens.push(Token::new(spos(lineno, charno), Dot)); 97 | current = chars.next(); 98 | } 99 | 100 | Some(OPEN_PAREN) => { 101 | tokens.push(Token::new(spos(lineno, charno), OpenParen)); 102 | current = chars.next(); 103 | } 104 | 105 | Some(CLOSE_PAREN) => { 106 | tokens.push(Token::new(spos(lineno, charno), CloseParen)); 107 | current = chars.next(); 108 | } 109 | 110 | Some(DOUBLE_QUOTE) => { 111 | let text_begin = charno; 112 | 113 | let mut text = String::from(""); 114 | 115 | loop { 116 | current = chars.next(); 117 | if let Some(c) = current { 118 | if c == DOUBLE_QUOTE { 119 | current = chars.next(); 120 | charno += 1; 121 | break; 122 | } else { 123 | text.push(c); 124 | charno += 1; 125 | } 126 | } else { 127 | return Err(err_lexer(spos(lineno, charno), "Unterminated string")); 128 | } 129 | } 130 | 131 | tokens.push(Token::new(spos(lineno, text_begin), Text(text))) 132 | } 133 | 134 | Some(SINGLE_QUOTE) => { 135 | tokens.push(Token::new(spos(lineno, charno), Quote)); 136 | current = chars.next(); 137 | } 138 | 139 | Some(non_terminating) => { 140 | let symbol_begin = charno; 141 | 142 | let mut symbol = String::from(""); 143 | symbol.push(non_terminating); 144 | 145 | // consume symbol 146 | loop { 147 | current = chars.next(); 148 | if let Some(c) = current { 149 | if is_terminating(c) { 150 | break; 151 | } else { 152 | symbol.push(c); 153 | charno += 1; 154 | } 155 | } else { 156 | break; 157 | } 158 | } 159 | 160 | // complete symbol 161 | tokens.push(Token::new(spos(lineno, symbol_begin), Symbol(symbol))); 162 | } 163 | 164 | // EOL 165 | None => break, 166 | } 167 | 168 | charno += 1; 169 | } 170 | 171 | Ok(tokens) 172 | } 173 | 174 | #[cfg(test)] 175 | mod test { 176 | use super::*; 177 | 178 | #[test] 179 | fn lexer_empty_string() { 180 | if let Ok(tokens) = tokenize("") { 181 | assert!(tokens.len() == 0); 182 | } else { 183 | assert!(false, "unexpected error"); 184 | } 185 | } 186 | 187 | #[test] 188 | fn lexer_one_line() { 189 | if let Ok(tokens) = tokenize("(foo bar baz)") { 190 | assert!(tokens.len() == 5); 191 | assert_eq!(tokens[0], Token::new(spos(1, 0), TokenType::OpenParen)); 192 | assert_eq!( 193 | tokens[1], 194 | Token::new(spos(1, 1), TokenType::Symbol(String::from("foo"))) 195 | ); 196 | assert_eq!( 197 | tokens[2], 198 | Token::new(spos(1, 5), TokenType::Symbol(String::from("bar"))) 199 | ); 200 | assert_eq!( 201 | tokens[3], 202 | Token::new(spos(1, 9), TokenType::Symbol(String::from("baz"))) 203 | ); 204 | assert_eq!(tokens[4], Token::new(spos(1, 12), TokenType::CloseParen)); 205 | } else { 206 | assert!(false, "unexpected error"); 207 | } 208 | } 209 | 210 | #[test] 211 | fn lexer_multi_line() { 212 | if let Ok(tokens) = tokenize("( foo\nbar\nbaz\n)") { 213 | assert!(tokens.len() == 5); 214 | assert_eq!(tokens[0], Token::new(spos(1, 0), TokenType::OpenParen)); 215 | assert_eq!( 216 | tokens[1], 217 | Token::new(spos(1, 2), TokenType::Symbol(String::from("foo"))) 218 | ); 219 | assert_eq!( 220 | tokens[2], 221 | Token::new(spos(2, 0), TokenType::Symbol(String::from("bar"))) 222 | ); 223 | assert_eq!( 224 | tokens[3], 225 | Token::new(spos(3, 0), TokenType::Symbol(String::from("baz"))) 226 | ); 227 | assert_eq!(tokens[4], Token::new(spos(4, 0), TokenType::CloseParen)); 228 | } else { 229 | assert!(false, "unexpected error"); 230 | } 231 | } 232 | 233 | #[test] 234 | fn lexer_bad_whitespace() { 235 | if let Err(e) = tokenize("(foo\n\t(bar))") { 236 | if let Some(SourcePos { line, column }) = e.error_pos() { 237 | assert_eq!(line, 2); 238 | assert_eq!(column, 0); 239 | } else { 240 | assert!(false, "Expected error position"); 241 | } 242 | } else { 243 | assert!(false, "expected ParseEvalError for tab character"); 244 | } 245 | } 246 | 247 | #[test] 248 | fn lexer_text() { 249 | if let Ok(_tokens) = tokenize("(foo \"text\" bar)") { 250 | // TODO 251 | } else { 252 | assert!(false, "unexpected error") 253 | } 254 | } 255 | } 256 | -------------------------------------------------------------------------------- /interpreter/src/list.rs: -------------------------------------------------------------------------------- 1 | /// List is an Array type that can contain any other object 2 | use crate::array::Array; 3 | use crate::safeptr::TaggedCellPtr; 4 | 5 | /// A List can contain a mixed sequence of any type of value 6 | pub type List = Array; 7 | -------------------------------------------------------------------------------- /interpreter/src/main.rs: -------------------------------------------------------------------------------- 1 | extern crate blockalloc; 2 | extern crate clap; 3 | extern crate dirs; 4 | extern crate fnv; 5 | extern crate itertools; 6 | extern crate rustyline; 7 | extern crate stickyimmix; 8 | 9 | use std::fs::File; 10 | use std::io; 11 | use std::io::prelude::*; 12 | use std::process; 13 | 14 | use clap::{App, Arg}; 15 | 16 | use rustyline::error::ReadlineError; 17 | use rustyline::Editor; 18 | 19 | mod arena; 20 | mod array; 21 | mod bytecode; 22 | mod compiler; 23 | mod containers; 24 | mod dict; 25 | mod error; 26 | mod function; 27 | mod hashable; 28 | mod headers; 29 | mod lexer; 30 | mod list; 31 | mod memory; 32 | mod number; 33 | mod pair; 34 | mod parser; 35 | mod pointerops; 36 | mod printer; 37 | mod rawarray; 38 | mod repl; 39 | mod safeptr; 40 | mod symbol; 41 | mod symbolmap; 42 | mod taggedptr; 43 | mod text; 44 | mod vm; 45 | 46 | use crate::error::RuntimeError; 47 | use crate::memory::Memory; 48 | use crate::repl::RepMaker; 49 | 50 | /// Read a file into a String 51 | fn load_file(filename: &str) -> Result { 52 | let mut contents = String::new(); 53 | 54 | File::open(filename)?.read_to_string(&mut contents)?; 55 | 56 | Ok(contents) 57 | } 58 | 59 | /// Read and evaluate an entire file 60 | fn read_file(filename: &str) -> Result<(), RuntimeError> { 61 | let _contents = load_file(&filename)?; 62 | 63 | // TODO 64 | 65 | Ok(()) 66 | } 67 | 68 | /// Read a line at a time, printing the input back out 69 | fn read_print_loop() -> Result<(), RuntimeError> { 70 | // establish a repl input history file path 71 | let history_file = match dirs::home_dir() { 72 | Some(mut path) => { 73 | path.push(".evalrus_history"); 74 | Some(String::from(path.to_str().unwrap())) 75 | } 76 | None => None, 77 | }; 78 | 79 | // () means no completion support (TODO) 80 | // Another TODO - find a more suitable alternative to rustyline 81 | let mut reader = Editor::<()>::new(); 82 | 83 | // Try to load the repl history file 84 | if let Some(ref path) = history_file { 85 | if let Err(err) = reader.load_history(&path) { 86 | eprintln!("Could not read history: {}", err); 87 | } 88 | } 89 | 90 | let mem = Memory::new(); 91 | let rep_maker = RepMaker {}; 92 | let rep = mem.mutate(&rep_maker, ())?; 93 | 94 | // repl 95 | loop { 96 | let readline = reader.readline("> "); 97 | 98 | match readline { 99 | // valid input 100 | Ok(line) => { 101 | reader.add_history_entry(&line); 102 | mem.mutate(&rep, line)?; 103 | } 104 | 105 | // some kind of program termination condition 106 | Err(e) => { 107 | if let Some(ref path) = history_file { 108 | reader.save_history(&path).unwrap_or_else(|err| { 109 | eprintln!("could not save input history in {}: {}", path, err); 110 | }); 111 | } 112 | 113 | // EOF is fine 114 | if let ReadlineError::Eof = e { 115 | return Ok(()); 116 | } else { 117 | return Err(RuntimeError::from(e)); 118 | } 119 | } 120 | } 121 | } 122 | } 123 | 124 | fn main() { 125 | // parse command line argument, an optional filename 126 | let matches = App::new("Eval-R-Us") 127 | .about("Evaluate expressions") 128 | .arg( 129 | Arg::with_name("filename") 130 | .help("Optional filename to read in") 131 | .index(1), 132 | ) 133 | .get_matches(); 134 | 135 | if let Some(filename) = matches.value_of("filename") { 136 | // if a filename was specified, read it into a String 137 | read_file(filename).unwrap_or_else(|err| { 138 | eprintln!("Terminated: {}", err); 139 | process::exit(1); 140 | }); 141 | } else { 142 | // otherwise begin a repl 143 | read_print_loop().unwrap_or_else(|err| { 144 | eprintln!("Terminated: {}", err); 145 | process::exit(1); 146 | }); 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /interpreter/src/memory.rs: -------------------------------------------------------------------------------- 1 | /// VM-level memory abstraction 2 | /// 3 | /// Defines Stack, Heap and Memory types, and a MemoryView type that gives a mutator a safe 4 | /// view into the stack and heap. 5 | use stickyimmix::{AllocObject, AllocRaw, ArraySize, RawPtr, StickyImmixHeap}; 6 | 7 | use crate::error::RuntimeError; 8 | use crate::headers::{ObjectHeader, TypeList}; 9 | use crate::pointerops::ScopedRef; 10 | use crate::safeptr::{MutatorScope, ScopedPtr, TaggedScopedPtr}; 11 | use crate::symbolmap::SymbolMap; 12 | use crate::taggedptr::{FatPtr, TaggedPtr}; 13 | 14 | /// This type describes the mutator's view into memory - the heap and symbol name/ptr lookup. 15 | /// 16 | /// It implements `MutatorScope` such that any `TaggedScopedPtr` or `Value` instances must be lifetime- 17 | /// limited to the lifetime of this instance using `&'scope dyn MutatorScope`; 18 | // ANCHOR: DefMutatorView 19 | pub struct MutatorView<'memory> { 20 | heap: &'memory Heap, 21 | } 22 | // ANCHOR_END: DefMutatorView 23 | 24 | impl<'memory> MutatorView<'memory> { 25 | fn new(mem: &'memory Memory) -> MutatorView<'memory> { 26 | MutatorView { heap: &mem.heap } 27 | } 28 | 29 | /// Get a Symbol pointer from its name 30 | // ANCHOR: DefMutatorViewLookupSym 31 | pub fn lookup_sym(&self, name: &str) -> TaggedScopedPtr<'_> { 32 | TaggedScopedPtr::new(self, self.heap.lookup_sym(name)) 33 | } 34 | // ANCHOR_END: DefMutatorViewLookupSym 35 | 36 | /// Write an object into the heap and return a scope-limited pointer to it 37 | // ANCHOR: DefMutatorViewAlloc 38 | pub fn alloc(&self, object: T) -> Result, RuntimeError> 39 | where 40 | T: AllocObject, 41 | { 42 | Ok(ScopedPtr::new( 43 | self, 44 | self.heap.alloc(object)?.scoped_ref(self), 45 | )) 46 | } 47 | // ANCHOR_END: DefMutatorViewAlloc 48 | 49 | /// Write an object into the heap and return a scope-limited runtime-tagged pointer to it 50 | // ANCHOR: DefMutatorViewAllocTagged 51 | pub fn alloc_tagged(&self, object: T) -> Result, RuntimeError> 52 | where 53 | FatPtr: From>, 54 | T: AllocObject, 55 | { 56 | Ok(TaggedScopedPtr::new(self, self.heap.alloc_tagged(object)?)) 57 | } 58 | // ANCHOR_END: DefMutatorViewAllocTagged 59 | 60 | /// Make space for an array of bytes 61 | pub fn alloc_array(&self, capacity: ArraySize) -> Result, RuntimeError> { 62 | self.heap.alloc_array(capacity) 63 | } 64 | 65 | /// Return a nil-initialized runtime-tagged pointer 66 | pub fn nil(&self) -> TaggedScopedPtr<'_> { 67 | TaggedScopedPtr::new(self, TaggedPtr::nil()) 68 | } 69 | } 70 | 71 | impl<'memory> MutatorScope for MutatorView<'memory> {} 72 | 73 | /// The heap implementation 74 | // ANCHOR: DefHeapStorage 75 | pub type HeapStorage = StickyImmixHeap; 76 | // ANCHOR_END: DefHeapStorage 77 | 78 | /// Heap memory types. 79 | // ANCHOR: DefHeap 80 | struct Heap { 81 | heap: HeapStorage, 82 | syms: SymbolMap, 83 | } 84 | // ANCHOR_END: DefHeap 85 | 86 | impl Heap { 87 | fn new() -> Heap { 88 | Heap { 89 | heap: HeapStorage::new(), 90 | syms: SymbolMap::new(), 91 | } 92 | } 93 | 94 | /// Get a Symbol pointer from its name 95 | // ANCHOR: DefHeapLookupSym 96 | fn lookup_sym(&self, name: &str) -> TaggedPtr { 97 | TaggedPtr::symbol(self.syms.lookup(name)) 98 | } 99 | // ANCHOR_END: DefHeapLookupSym 100 | 101 | /// Write an object to the heap and return the raw pointer to it 102 | // ANCHOR: DefHeapAlloc 103 | fn alloc(&self, object: T) -> Result, RuntimeError> 104 | where 105 | T: AllocObject, 106 | { 107 | Ok(self.heap.alloc(object)?) 108 | } 109 | // ANCHOR_END: DefHeapAlloc 110 | 111 | /// Write an object into the heap and return a tagged pointer to it 112 | // ANCHOR: DefHeapAllocTagged 113 | fn alloc_tagged(&self, object: T) -> Result 114 | where 115 | FatPtr: From>, 116 | T: AllocObject, 117 | { 118 | Ok(TaggedPtr::from(FatPtr::from(self.heap.alloc(object)?))) 119 | } 120 | // ANCHOR_END: DefHeapAllocTagged 121 | 122 | fn alloc_array(&self, capacity: ArraySize) -> Result, RuntimeError> { 123 | Ok(self.heap.alloc_array(capacity)?) 124 | } 125 | } 126 | 127 | /// Wraps a heap and provides scope-limited access to the heap 128 | // ANCHOR: DefMemory 129 | pub struct Memory { 130 | heap: Heap, 131 | } 132 | // ANCHOR_END: DefMemory 133 | 134 | impl Memory { 135 | /// Instantiate a new memory environment 136 | pub fn new() -> Memory { 137 | Memory { heap: Heap::new() } 138 | } 139 | 140 | /// Run a mutator process 141 | // ANCHOR: DefMemoryMutate 142 | pub fn mutate(&self, m: &M, input: M::Input) -> Result { 143 | let mut guard = MutatorView::new(self); 144 | m.run(&mut guard, input) 145 | } 146 | // ANCHOR_END: DefMemoryMutate 147 | } 148 | 149 | /// Defines the interface a heap-mutating type must use to be allowed access to the heap 150 | // ANCHOR: DefMutator 151 | pub trait Mutator: Sized { 152 | type Input; 153 | type Output; 154 | 155 | fn run(&self, mem: &MutatorView, input: Self::Input) -> Result; 156 | 157 | // TODO 158 | // function to return iterator that iterates over roots 159 | } 160 | // ANCHOR_END: DefMutator 161 | -------------------------------------------------------------------------------- /interpreter/src/number.rs: -------------------------------------------------------------------------------- 1 | /// An integer type - TODO 2 | use std::fmt; 3 | 4 | use crate::array::Array; 5 | use crate::printer::Print; 6 | use crate::safeptr::MutatorScope; 7 | 8 | /// TODO A heap-allocated number 9 | pub struct NumberObject { 10 | _value: Array, 11 | } 12 | 13 | impl Print for NumberObject { 14 | fn print<'guard>( 15 | &self, 16 | _guard: &'guard dyn MutatorScope, 17 | f: &mut fmt::Formatter, 18 | ) -> fmt::Result { 19 | // TODO 20 | write!(f, "NumberObject(nan)") 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /interpreter/src/pair.rs: -------------------------------------------------------------------------------- 1 | use std::cell::Cell; 2 | use std::fmt; 3 | 4 | use crate::error::{err_eval, RuntimeError, SourcePos}; 5 | use crate::memory::MutatorView; 6 | use crate::printer::Print; 7 | use crate::safeptr::{MutatorScope, ScopedPtr, TaggedCellPtr, TaggedScopedPtr}; 8 | use crate::taggedptr::Value; 9 | 10 | /// A Pair of pointers, like a Cons cell of old 11 | // ANCHOR: DefPair 12 | #[derive(Clone)] 13 | pub struct Pair { 14 | pub first: TaggedCellPtr, 15 | pub second: TaggedCellPtr, 16 | // Possible source code positions of the first and second values 17 | pub first_pos: Cell>, 18 | pub second_pos: Cell>, 19 | } 20 | // ANCHOR_END: DefPair 21 | 22 | impl Pair { 23 | /// Return a new empty Pair instance 24 | // ANCHOR: DefPairNew 25 | pub fn new() -> Pair { 26 | Pair { 27 | first: TaggedCellPtr::new_nil(), 28 | second: TaggedCellPtr::new_nil(), 29 | first_pos: Cell::new(None), 30 | second_pos: Cell::new(None), 31 | } 32 | } 33 | // ANCHOR_END: DefPairNew 34 | 35 | /// Set Pair.second to a new Pair with newPair.first set to the value 36 | // ANCHOR: DefPairAppend 37 | pub fn append<'guard>( 38 | &self, 39 | mem: &'guard MutatorView, 40 | value: TaggedScopedPtr<'guard>, 41 | ) -> Result, RuntimeError> { 42 | let pair = Pair::new(); 43 | pair.first.set(value); 44 | 45 | let pair = mem.alloc_tagged(pair)?; 46 | self.second.set(pair); 47 | 48 | Ok(pair) 49 | } 50 | // ANCHOR_END: DefPairAppend 51 | 52 | /// Set Pair.second to the given value 53 | // ANCHOR: DefPairDot 54 | pub fn dot<'guard>(&self, value: TaggedScopedPtr<'guard>) { 55 | self.second.set(value); 56 | } 57 | // ANCHOR_END: DefPairDot 58 | 59 | pub fn set_first_source_code_pos(&self, pos: SourcePos) { 60 | self.first_pos.set(Some(pos)); 61 | } 62 | 63 | pub fn set_second_source_code_pos(&self, pos: SourcePos) { 64 | self.second_pos.set(Some(pos)); 65 | } 66 | } 67 | 68 | impl Print for Pair { 69 | fn print<'guard>( 70 | &self, 71 | guard: &'guard dyn MutatorScope, 72 | f: &mut fmt::Formatter, 73 | ) -> fmt::Result { 74 | let mut tail = ScopedPtr::new(guard, self); 75 | 76 | write!(f, "({}", tail.first.get(guard))?; 77 | 78 | while let Value::Pair(next) = *tail.second.get(guard) { 79 | tail = next; 80 | write!(f, " {}", tail.first.get(guard))?; 81 | } 82 | 83 | // clunky way to print anything but nil 84 | let second = *tail.second.get(guard); 85 | match second { 86 | Value::Nil => (), 87 | _ => write!(f, " . {}", second)?, 88 | } 89 | 90 | write!(f, ")") 91 | } 92 | 93 | // In debug print, use dot notation 94 | fn debug<'guard>( 95 | &self, 96 | guard: &'guard dyn MutatorScope, 97 | f: &mut fmt::Formatter, 98 | ) -> fmt::Result { 99 | write!( 100 | f, 101 | "({:?} . {:?})", 102 | self.first.get(guard), 103 | self.second.get(guard) 104 | ) 105 | } 106 | } 107 | 108 | /// Link the two values `head` and `rest` into a Pair instance 109 | // ANCHOR: DefCons 110 | pub fn cons<'guard>( 111 | mem: &'guard MutatorView, 112 | head: TaggedScopedPtr<'guard>, 113 | rest: TaggedScopedPtr<'guard>, 114 | ) -> Result, RuntimeError> { 115 | let pair = Pair::new(); 116 | pair.first.set(head); 117 | pair.second.set(rest); 118 | mem.alloc_tagged(pair) 119 | } 120 | // ANCHOR_END: DefCons 121 | 122 | /// Unpack a list of Pair instances into a Vec 123 | pub fn vec_from_pairs<'guard>( 124 | guard: &'guard dyn MutatorScope, 125 | pair_list: TaggedScopedPtr<'guard>, 126 | ) -> Result>, RuntimeError> { 127 | match *pair_list { 128 | Value::Pair(pair) => { 129 | let mut result = Vec::new(); 130 | 131 | result.push(pair.first.get(guard)); 132 | 133 | let mut next = pair.second.get(guard); 134 | while let Value::Pair(next_pair) = *next { 135 | result.push(next_pair.first.get(guard)); 136 | next = next_pair.second.get(guard); 137 | } 138 | 139 | // we've terminated the list, but correctly? 140 | match *next { 141 | Value::Nil => Ok(result), 142 | _ => Err(err_eval("Incorrectly terminated Pair list")), 143 | } 144 | } 145 | Value::Nil => Ok(Vec::new()), 146 | _ => Err(err_eval("Expected a Pair")), 147 | } 148 | } 149 | 150 | /// Unpack a list of Pair instances into a Vec, expecting n values 151 | pub fn vec_from_n_pairs<'guard>( 152 | guard: &'guard dyn MutatorScope, 153 | pair_list: TaggedScopedPtr<'guard>, 154 | expect_length: usize, 155 | ) -> Result>, RuntimeError> { 156 | let result = vec_from_pairs(guard, pair_list)?; 157 | 158 | if result.len() != expect_length { 159 | return Err(err_eval(&format!( 160 | "Pair list has {} items, expected {}", 161 | result.len(), 162 | expect_length 163 | ))); 164 | } 165 | 166 | Ok(result) 167 | } 168 | 169 | /// Convenience function for unpacking a list of Pair instances into one value 170 | pub fn value_from_1_pair<'guard>( 171 | guard: &'guard dyn MutatorScope, 172 | pair_list: TaggedScopedPtr<'guard>, 173 | ) -> Result, RuntimeError> { 174 | let result = vec_from_pairs(guard, pair_list)?; 175 | 176 | match result.as_slice() { 177 | [first] => Ok(*first), 178 | _ => Err(err_eval(&format!( 179 | "Pair list has {} items, expected 1", 180 | result.len() 181 | ))), 182 | } 183 | } 184 | 185 | /// Convenience function for unpacking a list of Pair instances into two values 186 | pub fn values_from_2_pairs<'guard>( 187 | guard: &'guard dyn MutatorScope, 188 | pair_list: TaggedScopedPtr<'guard>, 189 | ) -> Result<(TaggedScopedPtr<'guard>, TaggedScopedPtr<'guard>), RuntimeError> { 190 | let result = vec_from_pairs(guard, pair_list)?; 191 | 192 | match result.as_slice() { 193 | [first, second] => Ok((*first, *second)), 194 | _ => Err(err_eval(&format!( 195 | "Pair list has {} items, expected 2", 196 | result.len() 197 | ))), 198 | } 199 | } 200 | 201 | /// Convenience function for unpacking a list of Pair instances into three values 202 | pub fn values_from_3_pairs<'guard>( 203 | guard: &'guard dyn MutatorScope, 204 | pair_list: TaggedScopedPtr<'guard>, 205 | ) -> Result< 206 | ( 207 | TaggedScopedPtr<'guard>, 208 | TaggedScopedPtr<'guard>, 209 | TaggedScopedPtr<'guard>, 210 | ), 211 | RuntimeError, 212 | > { 213 | let result = vec_from_pairs(guard, pair_list)?; 214 | 215 | match result.as_slice() { 216 | [first, second, third] => Ok((*first, *second, *third)), 217 | _ => Err(err_eval(&format!( 218 | "Pair list has {} items, expected 3", 219 | result.len() 220 | ))), 221 | } 222 | } 223 | 224 | #[cfg(test)] 225 | mod test { 226 | use super::*; 227 | use crate::error::RuntimeError; 228 | use crate::memory::{Memory, Mutator, MutatorView}; 229 | 230 | fn test_helper(test_fn: fn(&MutatorView) -> Result<(), RuntimeError>) { 231 | let mem = Memory::new(); 232 | 233 | struct Test {} 234 | impl Mutator for Test { 235 | type Input = fn(&MutatorView) -> Result<(), RuntimeError>; 236 | type Output = (); 237 | 238 | fn run( 239 | &self, 240 | mem: &MutatorView, 241 | test_fn: Self::Input, 242 | ) -> Result { 243 | test_fn(mem) 244 | } 245 | } 246 | 247 | let test = Test {}; 248 | mem.mutate(&test, test_fn).unwrap(); 249 | } 250 | 251 | #[test] 252 | fn unpack_pair_list_bad() { 253 | fn test_inner(mem: &MutatorView) -> Result<(), RuntimeError> { 254 | // this is not a Pair, it's an error to convert it to a Vec 255 | let thing = mem.lookup_sym("nothing"); 256 | 257 | let result = vec_from_pairs(mem, thing); 258 | 259 | assert!(result.is_err()); 260 | 261 | Ok(()) 262 | } 263 | 264 | test_helper(test_inner) 265 | } 266 | 267 | #[test] 268 | fn unpack_pair_list_n_values() { 269 | fn test_inner(mem: &MutatorView) -> Result<(), RuntimeError> { 270 | let mut head = cons(mem, mem.lookup_sym("alice"), mem.nil())?; 271 | head = cons(mem, mem.lookup_sym("bob"), head)?; 272 | head = cons(mem, mem.lookup_sym("carlos"), head)?; 273 | head = cons(mem, mem.lookup_sym("dave"), head)?; 274 | head = cons(mem, mem.lookup_sym("eve"), head)?; 275 | 276 | let result = vec_from_pairs(mem, head); 277 | 278 | assert!(result.is_ok()); 279 | 280 | let inside = result.unwrap(); 281 | assert!( 282 | inside 283 | == vec![ 284 | mem.lookup_sym("eve"), 285 | mem.lookup_sym("dave"), 286 | mem.lookup_sym("carlos"), 287 | mem.lookup_sym("bob"), 288 | mem.lookup_sym("alice") 289 | ] 290 | ); 291 | 292 | Ok(()) 293 | } 294 | 295 | test_helper(test_inner) 296 | } 297 | 298 | #[test] 299 | fn unpack_pair_list_bad_terminator() { 300 | fn test_inner(mem: &MutatorView) -> Result<(), RuntimeError> { 301 | let mut head = cons( 302 | mem, 303 | mem.lookup_sym("alice"), 304 | mem.lookup_sym("non-terminator"), 305 | )?; 306 | head = cons(mem, mem.lookup_sym("bob"), head)?; 307 | head = cons(mem, mem.lookup_sym("carlos"), head)?; 308 | head = cons(mem, mem.lookup_sym("dave"), head)?; 309 | head = cons(mem, mem.lookup_sym("eve"), head)?; 310 | 311 | let result = vec_from_pairs(mem, head); 312 | 313 | assert!(result.is_err()); 314 | 315 | Ok(()) 316 | } 317 | 318 | test_helper(test_inner) 319 | } 320 | 321 | #[test] 322 | fn unpack_pair_list_n_values_expected() { 323 | fn test_inner(mem: &MutatorView) -> Result<(), RuntimeError> { 324 | let mut head = cons(mem, mem.lookup_sym("alice"), mem.nil())?; 325 | head = cons(mem, mem.lookup_sym("bob"), head)?; 326 | head = cons(mem, mem.lookup_sym("carlos"), head)?; 327 | head = cons(mem, mem.lookup_sym("dave"), head)?; 328 | head = cons(mem, mem.lookup_sym("eve"), head)?; 329 | 330 | let result = vec_from_n_pairs(mem, head, 5); 331 | assert!(result.is_ok()); 332 | 333 | let result = vec_from_n_pairs(mem, head, 3); 334 | assert!(result.is_err()); 335 | 336 | let result = vec_from_n_pairs(mem, head, 6); 337 | assert!(result.is_err()); 338 | 339 | Ok(()) 340 | } 341 | 342 | test_helper(test_inner) 343 | } 344 | } 345 | -------------------------------------------------------------------------------- /interpreter/src/pointerops.rs: -------------------------------------------------------------------------------- 1 | /// Miscelaneous pointer operations 2 | use std::ptr::NonNull; 3 | 4 | use stickyimmix::RawPtr; 5 | 6 | use crate::safeptr::MutatorScope; 7 | 8 | /// For conversion of a reference to a NonNull 9 | pub trait AsNonNull { 10 | fn non_null_ptr(&self) -> NonNull { 11 | unsafe { NonNull::new_unchecked(self as *const Self as *mut Self) } 12 | } 13 | } 14 | 15 | // Pointer tag values and masks using the lowest 2 bits 16 | // ANCHOR: TaggedPtrTags 17 | const TAG_MASK: usize = 0x3; 18 | pub const TAG_SYMBOL: usize = 0x0; 19 | pub const TAG_PAIR: usize = 0x1; 20 | pub const TAG_OBJECT: usize = 0x2; 21 | pub const TAG_NUMBER: usize = 0x3; 22 | const PTR_MASK: usize = !0x3; 23 | // ANCHOR_END: TaggedPtrTags 24 | 25 | /// Return the tag from the given word 26 | pub fn get_tag(tagged_word: usize) -> usize { 27 | tagged_word & TAG_MASK 28 | } 29 | 30 | /// Pointer tagging operations on RawPtr 31 | // ANCHOR: DefTagged 32 | pub trait Tagged { 33 | fn tag(self, tag: usize) -> NonNull; 34 | fn untag(from: NonNull) -> RawPtr; 35 | } 36 | 37 | impl Tagged for RawPtr { 38 | fn tag(self, tag: usize) -> NonNull { 39 | unsafe { NonNull::new_unchecked((self.as_word() | tag) as *mut T) } 40 | } 41 | 42 | fn untag(from: NonNull) -> RawPtr { 43 | RawPtr::new((from.as_ptr() as usize & PTR_MASK) as *const T) 44 | } 45 | } 46 | // ANCHOR_END: DefTagged 47 | 48 | /// For accessing a pointer target, given a lifetime 49 | // ANCHOR: DefScopedRef 50 | pub trait ScopedRef { 51 | fn scoped_ref<'scope>(&self, guard: &'scope dyn MutatorScope) -> &'scope T; 52 | } 53 | 54 | impl ScopedRef for RawPtr { 55 | fn scoped_ref<'scope>(&self, _guard: &'scope dyn MutatorScope) -> &'scope T { 56 | unsafe { &*self.as_ptr() } 57 | } 58 | } 59 | // ANCHOR_END: DefScopedRef 60 | -------------------------------------------------------------------------------- /interpreter/src/printer.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | //use std::io; 3 | 4 | use crate::safeptr::MutatorScope; 5 | use crate::taggedptr::Value; 6 | 7 | /// Trait for using a `Value` lifted pointer in the `Display` trait 8 | pub trait Print { 9 | fn print<'guard>( 10 | &self, 11 | _guard: &'guard dyn MutatorScope, 12 | f: &mut fmt::Formatter, 13 | ) -> fmt::Result; 14 | 15 | fn debug<'guard>( 16 | &self, 17 | _guard: &'guard dyn MutatorScope, 18 | f: &mut fmt::Formatter, 19 | ) -> fmt::Result { 20 | self.print(_guard, f) 21 | } 22 | 23 | //fn repr<'guard, F: fmt::Write>(&self, _guard: &'guard dyn MutatorScope, f: &mut F) -> fmt::Result; 24 | 25 | //fn output<'guard, F: io::Write>( 26 | // &self, 27 | // _guard: &'guard dyn MutatorScope, 28 | // f: &mut F, 29 | //) -> io::Result<()>; 30 | } 31 | 32 | pub fn print(value: Value) -> String { 33 | format!("{}", value) 34 | } 35 | 36 | pub fn debug(value: Value) -> String { 37 | format!("{:?}", value) 38 | } 39 | -------------------------------------------------------------------------------- /interpreter/src/rawarray.rs: -------------------------------------------------------------------------------- 1 | use std::mem::size_of; 2 | use std::ptr::NonNull; 3 | use std::slice::from_raw_parts_mut; 4 | 5 | pub use stickyimmix::ArraySize; 6 | 7 | use crate::error::{ErrorKind, RuntimeError}; 8 | use crate::memory::MutatorView; 9 | 10 | /// Arrays start out at this size by default 11 | pub const DEFAULT_ARRAY_SIZE: ArraySize = 8; 12 | 13 | /// Arrays grow at this rate by default 14 | pub fn default_array_growth(capacity: ArraySize) -> Result { 15 | if capacity == 0 { 16 | Ok(DEFAULT_ARRAY_SIZE) 17 | } else { 18 | capacity 19 | .checked_add(capacity / 2) 20 | .ok_or(RuntimeError::new(ErrorKind::BadAllocationRequest)) 21 | } 22 | } 23 | 24 | /// Fundamental array type on which other variable-length types are built. 25 | /// Analagous to RawVec. 26 | // ANCHOR: DefRawArray 27 | pub struct RawArray { 28 | /// Count of T-sized objects that can fit in the array 29 | capacity: ArraySize, 30 | ptr: Option>, 31 | } 32 | // ANCHOR_END: DefRawArray 33 | 34 | /// Since this base array type needs to be used in an interior-mutable way by the containers 35 | /// built on top of it, the Copy+Clone traits need to be implemented for it so that it can 36 | /// be used in a Cell 37 | impl Clone for RawArray { 38 | fn clone(&self) -> Self { 39 | RawArray { 40 | capacity: self.capacity, 41 | ptr: self.ptr, 42 | } 43 | } 44 | } 45 | 46 | impl Copy for RawArray {} 47 | 48 | impl RawArray { 49 | /// Return a RawArray of capacity 0 with no array bytes allocated 50 | pub fn new() -> RawArray { 51 | RawArray { 52 | capacity: 0, 53 | ptr: None, 54 | } 55 | } 56 | 57 | /// Return a RawArray of the given capacity number of bytes allocated 58 | // ANCHOR: DefRawArrayWithCapacity 59 | pub fn with_capacity<'scope>( 60 | mem: &'scope MutatorView, 61 | capacity: u32, 62 | ) -> Result, RuntimeError> { 63 | // convert to bytes, checking for possible overflow of ArraySize limit 64 | let capacity_bytes = capacity 65 | .checked_mul(size_of::() as ArraySize) 66 | .ok_or(RuntimeError::new(ErrorKind::BadAllocationRequest))?; 67 | 68 | Ok(RawArray { 69 | capacity, 70 | ptr: NonNull::new(mem.alloc_array(capacity_bytes)?.as_ptr() as *mut T), 71 | }) 72 | } 73 | // ANCHOR_END: DefRawArrayWithCapacity 74 | 75 | /// Resize the array to the new capacity 76 | /// TODO the inner implementation of this should live in the allocator API to make 77 | /// better use of optimizations 78 | pub fn resize<'scope>( 79 | &mut self, 80 | mem: &'scope MutatorView, 81 | new_capacity: u32, 82 | ) -> Result<(), RuntimeError> { 83 | // If we're reducing the capacity to 0, simply detach the array pointer 84 | if new_capacity == 0 { 85 | self.capacity = 0; 86 | self.ptr = None; 87 | return Ok(()); 88 | } 89 | 90 | match self.ptr { 91 | // If we have capacity, create new capacity and copy over all bytes from the old 92 | // to the new array 93 | Some(old_ptr) => { 94 | // Convert existing capacity to bytes 95 | let old_capacity_bytes = size_of::() as ArraySize * self.capacity; 96 | let old_ptr = old_ptr.as_ptr(); 97 | 98 | // Convert new capacity to bytes but check that the number of bytes isn't 99 | // outside of ArraySize range 100 | let new_capacity_bytes = new_capacity 101 | .checked_mul(size_of::() as ArraySize) 102 | .ok_or(RuntimeError::new(ErrorKind::BadAllocationRequest))?; 103 | 104 | let new_ptr = mem.alloc_array(new_capacity_bytes)?.as_ptr() as *mut T; 105 | 106 | // create a pair of slices from the raw pointers and byte sizes 107 | let (old_slice, new_slice) = unsafe { 108 | ( 109 | from_raw_parts_mut(old_ptr as *mut u8, old_capacity_bytes as usize), 110 | from_raw_parts_mut(new_ptr as *mut u8, new_capacity_bytes as usize), 111 | ) 112 | }; 113 | 114 | // Copy content from old to new array 115 | for (src, dest) in old_slice.iter().zip(new_slice) { 116 | *dest = *src; 117 | } 118 | 119 | self.ptr = NonNull::new(new_ptr); 120 | self.capacity = new_capacity; 121 | 122 | Ok(()) 123 | } 124 | 125 | // If we have no capacity, create new blank capacity 126 | None => { 127 | *self = Self::with_capacity(mem, new_capacity)?; 128 | Ok(()) 129 | } 130 | } 131 | } 132 | 133 | /// Return the capacity of the array in the count of objects it can hold 134 | // ANCHOR: DefRawArrayCapacity 135 | pub fn capacity(&self) -> ArraySize { 136 | self.capacity 137 | } 138 | // ANCHOR_END: DefRawArrayCapacity 139 | 140 | /// Return a pointer to the array 141 | // ANCHOR: DefRawArrayAsPtr 142 | pub fn as_ptr(&self) -> Option<*const T> { 143 | match self.ptr { 144 | Some(ptr) => Some(ptr.as_ptr()), 145 | None => None, 146 | } 147 | } 148 | // ANCHOR_END: DefRawArrayAsPtr 149 | } 150 | -------------------------------------------------------------------------------- /interpreter/src/repl.rs: -------------------------------------------------------------------------------- 1 | use crate::compiler::compile; 2 | use crate::error::{ErrorKind, RuntimeError}; 3 | use crate::memory::{Mutator, MutatorView}; 4 | use crate::parser::parse; 5 | use crate::safeptr::{CellPtr, TaggedScopedPtr}; 6 | use crate::vm::Thread; 7 | 8 | /// A mutator that returns a Repl instance 9 | pub struct RepMaker {} 10 | 11 | impl Mutator for RepMaker { 12 | type Input = (); 13 | type Output = ReadEvalPrint; 14 | 15 | fn run(&self, mem: &MutatorView, _input: ()) -> Result { 16 | ReadEvalPrint::alloc(mem) 17 | } 18 | } 19 | 20 | /// Mutator that implements the VM 21 | pub struct ReadEvalPrint { 22 | main_thread: CellPtr, 23 | } 24 | 25 | impl ReadEvalPrint { 26 | pub fn alloc(mem: &MutatorView) -> Result { 27 | Ok(ReadEvalPrint { 28 | main_thread: CellPtr::new_with(Thread::alloc(mem)?), 29 | }) 30 | } 31 | } 32 | 33 | impl Mutator for ReadEvalPrint { 34 | type Input = String; 35 | type Output = (); 36 | 37 | fn run(&self, mem: &MutatorView, line: String) -> Result<(), RuntimeError> { 38 | let thread = self.main_thread.get(mem); 39 | 40 | // If the first 2 chars of the line are ":d", then the user has requested a debug 41 | // representation 42 | let (line, debug) = if line.starts_with(":d ") { 43 | (&line[3..], true) 44 | } else { 45 | (line.as_str(), false) 46 | }; 47 | 48 | match (|mem, line| -> Result { 49 | let value = parse(mem, line)?; 50 | 51 | if debug { 52 | println!( 53 | "# Debug\n## Input:\n```\n{}\n```\n## Parsed:\n```\n{:?}\n```", 54 | line, value 55 | ); 56 | } 57 | 58 | let function = compile(mem, value)?; 59 | 60 | if debug { 61 | println!("## Compiled:\n```\n{:?}\n```", function); 62 | } 63 | 64 | let value = thread.quick_vm_eval(mem, function)?; 65 | 66 | if debug { 67 | println!("## Evaluated:\n```\n{:?}\n```\n", value); 68 | } 69 | 70 | Ok(value) 71 | })(mem, &line) 72 | { 73 | Ok(value) => println!("{}", value), 74 | 75 | Err(e) => { 76 | match e.error_kind() { 77 | // non-fatal repl errors 78 | ErrorKind::LexerError(_) => e.print_with_source(&line), 79 | ErrorKind::ParseError(_) => e.print_with_source(&line), 80 | ErrorKind::EvalError(_) => e.print_with_source(&line), 81 | _ => return Err(e), 82 | } 83 | } 84 | } 85 | 86 | Ok(()) 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /interpreter/src/safeptr.rs: -------------------------------------------------------------------------------- 1 | use std::cell::Cell; 2 | use std::fmt; 3 | use std::ops::Deref; 4 | 5 | use stickyimmix::{AllocObject, RawPtr}; 6 | 7 | use crate::headers::TypeList; 8 | use crate::pointerops::ScopedRef; 9 | use crate::printer::Print; 10 | use crate::taggedptr::{FatPtr, TaggedPtr, Value}; 11 | 12 | /// Type that provides a generic anchor for mutator timeslice lifetimes 13 | // ANCHOR: DefMutatorScope 14 | pub trait MutatorScope {} 15 | // ANCHOR_END: DefMutatorScope 16 | 17 | // Copy On Write semantics? Maybe the below... 18 | // TODO, add MutatorView methods that can return MutScopedPtr? 19 | // 20 | // pub trait CopyOnWrite { 21 | // fn copy_mut<'guard>(&self, _guard: &'guard MutatorView) -> MutScopedPtr<'guard, Self>; 22 | // } 23 | // 24 | // pub struct MutScopedPtr<'guard, T: Sized> { 25 | // value: &mut 'guard T 26 | // } 27 | // 28 | // impl Deref, DerefMut for MutScopedPtr 29 | // 30 | // impl<'guard, T: Sized> MutScopedPtr<'guard, T> { 31 | // pub fn into_immut(self) -> ScopedPtr<'guard, T> {} 32 | // } 33 | 34 | /// An untagged compile-time typed pointer with scope limited by `MutatorScope` 35 | // ANCHOR: DefScopedPtr 36 | pub struct ScopedPtr<'guard, T: Sized> { 37 | value: &'guard T, 38 | } 39 | // ANCHOR_END: DefScopedPtr 40 | 41 | impl<'guard, T: Sized> ScopedPtr<'guard, T> { 42 | pub fn new(_guard: &'guard dyn MutatorScope, value: &'guard T) -> ScopedPtr<'guard, T> { 43 | ScopedPtr { value } 44 | } 45 | 46 | /// Convert the compile-time type pointer to a runtime type pointer 47 | pub fn as_tagged(&self, guard: &'guard dyn MutatorScope) -> TaggedScopedPtr<'guard> 48 | where 49 | FatPtr: From>, 50 | T: AllocObject, 51 | { 52 | TaggedScopedPtr::new( 53 | guard, 54 | TaggedPtr::from(FatPtr::from(RawPtr::new(self.value))), 55 | ) 56 | } 57 | } 58 | 59 | /// Anything that _has_ a scope lifetime can pass as a scope representation 60 | impl<'scope, T: Sized> MutatorScope for ScopedPtr<'scope, T> {} 61 | 62 | impl<'guard, T: Sized> Clone for ScopedPtr<'guard, T> { 63 | fn clone(&self) -> ScopedPtr<'guard, T> { 64 | ScopedPtr { value: self.value } 65 | } 66 | } 67 | 68 | impl<'guard, T: Sized> Copy for ScopedPtr<'guard, T> {} 69 | 70 | impl<'guard, T: Sized> Deref for ScopedPtr<'guard, T> { 71 | type Target = T; 72 | 73 | fn deref(&self) -> &T { 74 | self.value 75 | } 76 | } 77 | 78 | impl<'guard, T: Sized + Print> fmt::Display for ScopedPtr<'guard, T> { 79 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 80 | self.value.print(self, f) 81 | } 82 | } 83 | 84 | impl<'guard, T: Sized + Print> fmt::Debug for ScopedPtr<'guard, T> { 85 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 86 | self.value.print(self, f) 87 | } 88 | } 89 | 90 | impl<'guard, T: Sized + PartialEq> PartialEq for ScopedPtr<'guard, T> { 91 | fn eq(&self, rhs: &ScopedPtr<'guard, T>) -> bool { 92 | self.value == rhs.value 93 | } 94 | } 95 | 96 | /// A wrapper around untagged raw pointers for storing compile-time typed pointers in data 97 | /// structures with interior mutability, allowing pointers to be updated to point at different 98 | /// target objects. 99 | // ANCHOR: DefCellPtr 100 | #[derive(Clone)] 101 | pub struct CellPtr { 102 | inner: Cell>, 103 | } 104 | // ANCHOR_END: DefCellPtr 105 | 106 | impl CellPtr { 107 | /// Construct a new CellPtr from a ScopedPtr 108 | pub fn new_with(source: ScopedPtr) -> CellPtr { 109 | CellPtr { 110 | inner: Cell::new(RawPtr::new(source.value)), 111 | } 112 | } 113 | 114 | // ANCHOR: DefCellPtrGet 115 | pub fn get<'guard>(&self, guard: &'guard dyn MutatorScope) -> ScopedPtr<'guard, T> { 116 | ScopedPtr::new(guard, self.inner.get().scoped_ref(guard)) 117 | } 118 | // ANCHOR_END: DefCellPtrGet 119 | 120 | // the explicit 'guard lifetime bound to MutatorScope is omitted here since the ScopedPtr 121 | // carries this lifetime already so we can assume that this operation is safe 122 | pub fn set(&self, source: ScopedPtr) { 123 | self.inner.set(RawPtr::new(source.value)) 124 | } 125 | } 126 | 127 | impl From> for CellPtr { 128 | fn from(ptr: ScopedPtr) -> CellPtr { 129 | CellPtr::new_with(ptr) 130 | } 131 | } 132 | 133 | /// A _tagged_ runtime typed pointer type with scope limited by `MutatorScope` such that a `Value` 134 | /// instance can safely be derived and accessed. This type is neccessary to derive `Value`s from. 135 | // ANCHOR: DefTaggedScopedPtr 136 | #[derive(Copy, Clone)] 137 | pub struct TaggedScopedPtr<'guard> { 138 | ptr: TaggedPtr, 139 | value: Value<'guard>, 140 | } 141 | // ANCHOR_END: DefTaggedScopedPtr 142 | 143 | impl<'guard> TaggedScopedPtr<'guard> { 144 | pub fn new(guard: &'guard dyn MutatorScope, ptr: TaggedPtr) -> TaggedScopedPtr<'guard> { 145 | TaggedScopedPtr { 146 | ptr, 147 | value: FatPtr::from(ptr).as_value(guard), 148 | } 149 | } 150 | 151 | pub fn value(&self) -> Value<'guard> { 152 | self.value 153 | } 154 | 155 | pub fn get_ptr(&self) -> TaggedPtr { 156 | self.ptr 157 | } 158 | } 159 | 160 | /// Anything that _has_ a scope lifetime can pass as a scope representation. `Value` also implements 161 | /// `MutatorScope` so this is largely for consistency. 162 | impl<'scope> MutatorScope for TaggedScopedPtr<'scope> {} 163 | 164 | impl<'guard> Deref for TaggedScopedPtr<'guard> { 165 | type Target = Value<'guard>; 166 | 167 | fn deref(&self) -> &Value<'guard> { 168 | &self.value 169 | } 170 | } 171 | 172 | impl<'guard> fmt::Display for TaggedScopedPtr<'guard> { 173 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 174 | self.value.fmt(f) 175 | } 176 | } 177 | 178 | impl<'guard> fmt::Debug for TaggedScopedPtr<'guard> { 179 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 180 | self.value.fmt(f) 181 | } 182 | } 183 | 184 | impl<'guard> PartialEq for TaggedScopedPtr<'guard> { 185 | fn eq(&self, rhs: &TaggedScopedPtr<'guard>) -> bool { 186 | self.ptr == rhs.ptr 187 | } 188 | } 189 | 190 | /// A wrapper around the runtime typed `TaggedPtr` for storing pointers in data structures with 191 | /// interior mutability, allowing pointers to be updated to point at different target objects. 192 | // ANCHOR: DefTaggedCellPtr 193 | #[derive(Clone)] 194 | pub struct TaggedCellPtr { 195 | inner: Cell, 196 | } 197 | // ANCHOR_END: DefTaggedCellPtr 198 | 199 | impl TaggedCellPtr { 200 | /// Construct a new Nil TaggedCellPtr instance 201 | pub fn new_nil() -> TaggedCellPtr { 202 | TaggedCellPtr { 203 | inner: Cell::new(TaggedPtr::nil()), 204 | } 205 | } 206 | 207 | /// Construct a new TaggedCellPtr from a TaggedScopedPtr 208 | pub fn new_with(source: TaggedScopedPtr) -> TaggedCellPtr { 209 | TaggedCellPtr { 210 | inner: Cell::new(TaggedPtr::from(source.ptr)), 211 | } 212 | } 213 | 214 | pub fn new_ptr(source: TaggedPtr) -> TaggedCellPtr { 215 | TaggedCellPtr { 216 | inner: Cell::new(source), 217 | } 218 | } 219 | 220 | /// Return the pointer as a `TaggedScopedPtr` type that carries a copy of the `TaggedPtr` and 221 | /// a `Value` type for both copying and access convenience 222 | // ANCHOR: DefTaggedCellPtrGet 223 | pub fn get<'guard>(&self, guard: &'guard dyn MutatorScope) -> TaggedScopedPtr<'guard> { 224 | TaggedScopedPtr::new(guard, self.inner.get()) 225 | } 226 | // ANCHOR_END: DefTaggedCellPtrGet 227 | 228 | /// Set this pointer to point at the same object as a given `TaggedScopedPtr` instance 229 | /// The explicit 'guard lifetime bound to MutatorScope is omitted here since the TaggedScopedPtr 230 | /// carries this lifetime already so we can assume that this operation is safe 231 | pub fn set(&self, source: TaggedScopedPtr) { 232 | self.inner.set(TaggedPtr::from(source.ptr)) 233 | } 234 | 235 | /// Take the pointer of another `TaggedCellPtr` and set this instance to point at that object too 236 | pub fn copy_from(&self, other: &TaggedCellPtr) { 237 | self.inner.set(other.inner.get()); 238 | } 239 | 240 | /// Return true if the pointer is nil 241 | pub fn is_nil(&self) -> bool { 242 | self.inner.get().is_nil() 243 | } 244 | 245 | /// Set this pointer to nil 246 | pub fn set_to_nil(&self) { 247 | self.inner.set(TaggedPtr::nil()) 248 | } 249 | 250 | /// Set this pointer to another TaggedPtr 251 | pub fn set_to_ptr(&self, ptr: TaggedPtr) { 252 | self.inner.set(ptr) 253 | } 254 | 255 | /// Return the raw TaggedPtr from within 256 | pub fn get_ptr(&self) -> TaggedPtr { 257 | self.inner.get() 258 | } 259 | } 260 | 261 | impl From> for TaggedCellPtr { 262 | fn from(ptr: TaggedScopedPtr) -> TaggedCellPtr { 263 | TaggedCellPtr::new_with(ptr) 264 | } 265 | } 266 | -------------------------------------------------------------------------------- /interpreter/src/symbol.rs: -------------------------------------------------------------------------------- 1 | /// A Symbol type 2 | use std::fmt; 3 | use std::hash::{Hash, Hasher}; 4 | use std::slice; 5 | use std::str; 6 | 7 | use crate::hashable::Hashable; 8 | use crate::printer::Print; 9 | use crate::safeptr::MutatorScope; 10 | 11 | /// A Symbol is a unique object that has a unique name string. The backing storage for the 12 | /// underlying str data must have a lifetime of at least that of the Symbol instance to 13 | /// prevent use-after-free. 14 | /// See `SymbolMap` 15 | // ANCHOR: DefSymbol 16 | #[derive(Copy, Clone)] 17 | pub struct Symbol { 18 | name_ptr: *const u8, 19 | name_len: usize, 20 | } 21 | // ANCHOR_END: DefSymbol 22 | 23 | impl Symbol { 24 | /// The originating &str must be owned by a SymbolMap hash table 25 | pub fn new(name: &str) -> Symbol { 26 | Symbol { 27 | name_ptr: name.as_ptr(), 28 | name_len: name.len(), 29 | } 30 | } 31 | 32 | /// Unsafe because Symbol does not own the &str nor can it know anything about the actual lifetime 33 | // ANCHOR: DefSymbolUnguardedAsStr 34 | pub unsafe fn unguarded_as_str<'desired_lifetime>(&self) -> &'desired_lifetime str { 35 | let slice = slice::from_raw_parts(self.name_ptr, self.name_len); 36 | str::from_utf8(slice).unwrap() 37 | } 38 | // ANCHOR_END: DefSymbolUnguardedAsStr 39 | 40 | // ANCHOR: DefSymbolAsStr 41 | pub fn as_str<'guard>(&self, _guard: &'guard dyn MutatorScope) -> &'guard str { 42 | unsafe { self.unguarded_as_str() } 43 | } 44 | // ANCHOR_END: DefSymbolAsStr 45 | } 46 | 47 | impl Print for Symbol { 48 | /// Safe because the lifetime of `MutatorScope` defines a safe-access window 49 | fn print<'guard>( 50 | &self, 51 | guard: &'guard dyn MutatorScope, 52 | f: &mut fmt::Formatter, 53 | ) -> fmt::Result { 54 | write!(f, "{}", self.as_str(guard)) 55 | } 56 | } 57 | 58 | // ANCHOR: DefImplHashableForSymbol 59 | impl Hashable for Symbol { 60 | fn hash<'guard, H: Hasher>(&self, guard: &'guard dyn MutatorScope, h: &mut H) { 61 | self.as_str(guard).hash(h) 62 | } 63 | } 64 | // ANCHOR_END: DefImplHashableForSymbol 65 | -------------------------------------------------------------------------------- /interpreter/src/symbolmap.rs: -------------------------------------------------------------------------------- 1 | /// Implements str interning for mapping Symbol names to unique pointers 2 | use std::cell::RefCell; 3 | use std::collections::HashMap; 4 | 5 | use stickyimmix::{AllocRaw, RawPtr}; 6 | 7 | use crate::arena::Arena; 8 | use crate::symbol::Symbol; 9 | 10 | /// A mapping of symbol names (Strings) to Symbol pointers. Only one copy of the symbol 11 | /// name String is kept; a Symbol resides in managed memory with a raw pointer to the 12 | /// String. Thus the lifetime of the SymbolMap must be at least the lifetime of the 13 | /// managed memory. This is arranged here by maintaining Symbol memory alongside the 14 | /// mapping HashMap. 15 | /// 16 | /// No Symbol is ever deleted. Symbol name strings must be immutable. 17 | // ANCHOR: DefSymbolMap 18 | pub struct SymbolMap { 19 | map: RefCell>>, 20 | arena: Arena, 21 | } 22 | // ANCHOR_END: DefSymbolMap 23 | 24 | impl SymbolMap { 25 | pub fn new() -> SymbolMap { 26 | SymbolMap { 27 | map: RefCell::new(HashMap::new()), 28 | arena: Arena::new(), 29 | } 30 | } 31 | 32 | // Can't take a map.entry(name) without providing an owned String, i.e. cloning 'name' 33 | // Can't insert a new entry with just a reference without hashing twice, and cloning 'name' 34 | // The common case, lookups, should be fast, inserts can be slower. 35 | // ANCHOR: DefSymbolMapLookup 36 | pub fn lookup(&self, name: &str) -> RawPtr { 37 | { 38 | if let Some(ptr) = self.map.borrow().get(name) { 39 | return *ptr; 40 | } 41 | } 42 | 43 | let name = String::from(name); 44 | let ptr = self.arena.alloc(Symbol::new(&name)).unwrap(); 45 | self.map.borrow_mut().insert(name, ptr); 46 | ptr 47 | } 48 | // ANCHOR_END: DefSymbolMapLookup 49 | } 50 | -------------------------------------------------------------------------------- /interpreter/src/text.rs: -------------------------------------------------------------------------------- 1 | /// A type for representing strings. Implementation is an immutable wrapper around Array. 2 | use std::fmt; 3 | use std::hash::{Hash, Hasher}; 4 | use std::slice; 5 | use std::str; 6 | 7 | use crate::error::{ErrorKind, RuntimeError}; 8 | use crate::hashable::Hashable; 9 | use crate::memory::MutatorView; 10 | use crate::printer::Print; 11 | use crate::rawarray::{ArraySize, RawArray}; 12 | use crate::safeptr::MutatorScope; 13 | 14 | /// While Text is somewhat similar to Symbol, it is instead garbage-collected heap allocated and not interned. 15 | #[derive(Copy, Clone)] 16 | pub struct Text { 17 | content: RawArray, 18 | } 19 | 20 | impl Text { 21 | /// Create an empty Text string object 22 | pub fn new_empty() -> Text { 23 | Text { 24 | content: RawArray::new(), 25 | } 26 | } 27 | 28 | /// Initialize a Text object from a &str slice 29 | pub fn new_from_str<'guard>( 30 | mem: &'guard MutatorView, 31 | from_str: &str, 32 | ) -> Result { 33 | let len = from_str.len(); 34 | let from_ptr = from_str.as_ptr(); 35 | 36 | if len > (ArraySize::max_value() as usize) { 37 | return Err(RuntimeError::new(ErrorKind::BadAllocationRequest)); 38 | } 39 | 40 | let content = RawArray::with_capacity(mem, len as ArraySize)?; 41 | 42 | if let Some(to_ptr) = content.as_ptr() { 43 | unsafe { from_ptr.copy_to_nonoverlapping(to_ptr as *mut u8, len) } 44 | Ok(Text { content }) 45 | } else { 46 | panic!("Text content array expected to have backing storage") 47 | } 48 | } 49 | 50 | unsafe fn unguarded_as_str(&self) -> &str { 51 | if let Some(ptr) = self.content.as_ptr() { 52 | let slice = slice::from_raw_parts(ptr, self.content.capacity() as usize); 53 | str::from_utf8(slice).unwrap() 54 | } else { 55 | &"" 56 | } 57 | } 58 | 59 | /// Using scope guarded access, get the Text content as a &str slice 60 | pub fn as_str<'guard>(&self, _guard: &'guard dyn MutatorScope) -> &str { 61 | unsafe { self.unguarded_as_str() } 62 | } 63 | } 64 | 65 | impl Print for Text { 66 | fn print<'guard>( 67 | &self, 68 | guard: &'guard dyn MutatorScope, 69 | f: &mut fmt::Formatter, 70 | ) -> fmt::Result { 71 | // TODO this will need to be printed with certain string escape codes embedded 72 | write!(f, "\"{}\"", self.as_str(guard)) 73 | } 74 | } 75 | 76 | impl Hashable for Text { 77 | fn hash<'guard, H: Hasher>(&self, guard: &'guard dyn MutatorScope, h: &mut H) { 78 | self.as_str(guard).hash(h) 79 | } 80 | } 81 | 82 | #[cfg(test)] 83 | mod test { 84 | use super::Text; 85 | use crate::error::RuntimeError; 86 | use crate::memory::{Memory, Mutator, MutatorView}; 87 | 88 | #[test] 89 | fn text_empty_string() { 90 | let mem = Memory::new(); 91 | 92 | struct Test {} 93 | impl Mutator for Test { 94 | type Input = (); 95 | type Output = (); 96 | 97 | fn run( 98 | &self, 99 | view: &MutatorView, 100 | _input: Self::Input, 101 | ) -> Result { 102 | let text = Text::new_empty(); 103 | assert!(text.as_str(view) == ""); 104 | 105 | Ok(()) 106 | } 107 | } 108 | 109 | let test = Test {}; 110 | mem.mutate(&test, ()).unwrap(); 111 | } 112 | 113 | #[test] 114 | fn text_from_static_str() { 115 | let mem = Memory::new(); 116 | 117 | struct Test {} 118 | impl Mutator for Test { 119 | type Input = (); 120 | type Output = (); 121 | 122 | fn run( 123 | &self, 124 | view: &MutatorView, 125 | _input: Self::Input, 126 | ) -> Result { 127 | let expected = "こんにちは"; 128 | let text = Text::new_from_str(view, expected)?; 129 | let got = text.as_str(view); 130 | 131 | assert!(got == expected); 132 | 133 | Ok(()) 134 | } 135 | } 136 | 137 | let test = Test {}; 138 | mem.mutate(&test, ()).unwrap(); 139 | } 140 | 141 | #[test] 142 | fn value_from_string() { 143 | let mem = Memory::new(); 144 | 145 | struct Test {} 146 | impl Mutator for Test { 147 | type Input = (); 148 | type Output = (); 149 | 150 | fn run( 151 | &self, 152 | view: &MutatorView, 153 | _input: Self::Input, 154 | ) -> Result { 155 | let input = String::from("こんにちは"); 156 | // the Value representation of the object is wrapped in quotes 157 | let expected = format!("\"{}\"", input); 158 | 159 | let text = Text::new_from_str(view, &input)?; 160 | let heap_text = view.alloc_tagged(text)?; 161 | 162 | let got = format!("{}", heap_text.value()); 163 | 164 | assert!(got == expected); 165 | 166 | Ok(()) 167 | } 168 | } 169 | 170 | let test = Test {}; 171 | mem.mutate(&test, ()).unwrap(); 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /stickyimmix/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /stickyimmix/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | [[package]] 4 | name = "blockalloc" 5 | version = "0.1.0" 6 | 7 | [[package]] 8 | name = "stickyimmix" 9 | version = "0.1.0" 10 | dependencies = [ 11 | "blockalloc 0.1.0", 12 | ] 13 | 14 | -------------------------------------------------------------------------------- /stickyimmix/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "stickyimmix" 3 | version = "0.1.0" 4 | authors = ["Peter Liniker "] 5 | edition = "2018" 6 | license = "MIT OR Apache-2.0" 7 | 8 | [dependencies] 9 | blockalloc = { path = "../blockalloc/" } 10 | -------------------------------------------------------------------------------- /stickyimmix/README.md: -------------------------------------------------------------------------------- 1 | # Single threaded sticky immix 2 | 3 | A non-evacuating single-threaded immix implementation. 4 | -------------------------------------------------------------------------------- /stickyimmix/src/allocator.rs: -------------------------------------------------------------------------------- 1 | use std::mem::size_of; 2 | use std::ptr::NonNull; 3 | 4 | use crate::constants; 5 | use crate::rawptr::RawPtr; 6 | 7 | /// An allocation error type 8 | // ANCHOR: DefAllocError 9 | #[derive(Copy, Clone, Debug, PartialEq)] 10 | pub enum AllocError { 11 | /// Some attribute of the allocation, most likely the size requested, 12 | /// could not be fulfilled 13 | BadRequest, 14 | /// Out of memory - allocating the space failed 15 | OOM, 16 | } 17 | // ANCHOR_END: DefAllocError 18 | 19 | /// A type that describes allocation of an object into a heap space, returning 20 | /// a bare pointer type on success 21 | // ANCHOR: DefAllocRaw 22 | pub trait AllocRaw { 23 | /// An implementation of an object header type 24 | type Header: AllocHeader; 25 | 26 | /// Allocate a single object of type T. 27 | fn alloc(&self, object: T) -> Result, AllocError> 28 | where 29 | T: AllocObject<::TypeId>; 30 | 31 | /// Allocating an array allows the client to put anything in the resulting data 32 | /// block but the type of the memory block will simply be 'Array'. No other 33 | /// type information will be stored in the object header. 34 | /// This is just a special case of alloc() for T=u8 but a count > 1 of u8 35 | /// instances. The caller is responsible for the content of the array. 36 | fn alloc_array(&self, size_bytes: ArraySize) -> Result, AllocError>; 37 | 38 | /// Given a bare pointer to an object, return the expected header address 39 | fn get_header(object: NonNull<()>) -> NonNull; 40 | 41 | /// Given a bare pointer to an object's header, return the expected object address 42 | fn get_object(header: NonNull) -> NonNull<()>; 43 | } 44 | // ANCHOR_END: DefAllocRaw 45 | 46 | /// Object size class. 47 | /// - Small objects fit inside a line 48 | /// - Medium objects span more than one line 49 | /// - Large objects span multiple blocks 50 | #[repr(u8)] 51 | #[derive(Copy, Clone, Debug, PartialEq)] 52 | pub enum SizeClass { 53 | Small, 54 | Medium, 55 | Large, 56 | } 57 | 58 | impl SizeClass { 59 | pub fn get_for_size(object_size: usize) -> Result { 60 | match object_size { 61 | constants::SMALL_OBJECT_MIN..=constants::SMALL_OBJECT_MAX => Ok(SizeClass::Small), 62 | constants::MEDIUM_OBJECT_MIN..=constants::MEDIUM_OBJECT_MAX => Ok(SizeClass::Medium), 63 | constants::LARGE_OBJECT_MIN..=constants::LARGE_OBJECT_MAX => Ok(SizeClass::Large), 64 | _ => Err(AllocError::BadRequest), 65 | } 66 | } 67 | } 68 | 69 | /// The type that describes the bounds of array sizing 70 | pub type ArraySize = u32; 71 | 72 | /// TODO Object mark bit. 73 | /// Every object is `Allocated` on creation. 74 | #[repr(u8)] 75 | #[derive(Copy, Clone, Debug, PartialEq)] 76 | pub enum Mark { 77 | Allocated, 78 | Unmarked, 79 | Marked, 80 | } 81 | 82 | /// A managed-type type-identifier type should implement this! 83 | // ANCHOR: DefAllocTypeId 84 | pub trait AllocTypeId: Copy + Clone {} 85 | // ANCHOR_END: DefAllocTypeId 86 | 87 | /// All managed object types must implement this trait in order to be allocatable 88 | // ANCHOR: DefAllocObject 89 | pub trait AllocObject { 90 | const TYPE_ID: T; 91 | } 92 | // ANCHOR_END: DefAllocObject 93 | 94 | /// An object header struct must provide an implementation of this trait, 95 | /// providing appropriate information to the garbage collector. 96 | // TODO tracing information 97 | // e.g. fn tracer(&self) -> Fn() 98 | // ANCHOR: DefAllocHeader 99 | pub trait AllocHeader: Sized { 100 | /// Associated type that identifies the allocated object type 101 | type TypeId: AllocTypeId; 102 | 103 | /// Create a new header for object type O 104 | fn new>(size: u32, size_class: SizeClass, mark: Mark) -> Self; 105 | 106 | /// Create a new header for an array type 107 | fn new_array(size: ArraySize, size_class: SizeClass, mark: Mark) -> Self; 108 | 109 | /// Set the Mark value to "marked" 110 | fn mark(&mut self); 111 | 112 | /// Get the current Mark value 113 | fn is_marked(&self) -> bool; 114 | 115 | /// Get the size class of the object 116 | fn size_class(&self) -> SizeClass; 117 | 118 | /// Get the size of the object in bytes 119 | fn size(&self) -> u32; 120 | 121 | /// Get the type of the object 122 | fn type_id(&self) -> Self::TypeId; 123 | } 124 | // ANCHOR_END: DefAllocHeader 125 | 126 | /// Return the allocated size of an object as it's size_of::() value rounded 127 | /// up to a double-word boundary 128 | /// 129 | /// TODO this isn't correctly implemented, as aligning the object to a double-word 130 | /// boundary while considering header size (which is not known to this libarary 131 | /// until compile time) means touching numerous bump-allocation code points with 132 | /// some math and bitwise ops I haven't worked out yet 133 | pub fn alloc_size_of(object_size: usize) -> usize { 134 | let align = size_of::(); // * 2; 135 | (object_size + (align - 1)) & !(align - 1) 136 | } 137 | -------------------------------------------------------------------------------- /stickyimmix/src/blockmeta.rs: -------------------------------------------------------------------------------- 1 | use crate::constants; 2 | 3 | /// Block marking metadata. This metadata is stored at the end of a Block. 4 | // ANCHOR: DefBlockMeta 5 | pub struct BlockMeta { 6 | lines: *mut u8, 7 | } 8 | // ANCHOR_END: DefBlockMeta 9 | 10 | impl BlockMeta { 11 | /// Heap allocate a metadata instance so that it doesn't move so we can store pointers 12 | /// to it. 13 | pub fn new(block_ptr: *const u8) -> BlockMeta { 14 | let mut meta = BlockMeta { 15 | lines: unsafe { block_ptr.add(constants::LINE_MARK_START) as *mut u8 }, 16 | }; 17 | 18 | meta.reset(); 19 | 20 | meta 21 | } 22 | 23 | unsafe fn as_block_mark(&mut self) -> &mut u8 { 24 | // Use the last byte of the block because no object will occupy the line 25 | // associated with this: it's the mark bits. 26 | &mut *self.lines.add(constants::LINE_COUNT - 1) 27 | } 28 | 29 | unsafe fn as_line_mark(&mut self, line: usize) -> &mut u8 { 30 | &mut *self.lines.add(line) 31 | } 32 | 33 | /// Mark the indexed line 34 | pub fn mark_line(&mut self, index: usize) { 35 | unsafe { *self.as_line_mark(index) = 1 }; 36 | } 37 | 38 | /// Indicate the entire block as marked 39 | pub fn mark_block(&mut self) { 40 | unsafe { *self.as_block_mark() = 1 } 41 | } 42 | 43 | /// Reset all mark flags to unmarked. 44 | pub fn reset(&mut self) { 45 | unsafe { 46 | for i in 0..constants::LINE_COUNT { 47 | *self.lines.add(i) = 0; 48 | } 49 | } 50 | } 51 | 52 | /// Return an iterator over all the line mark flags 53 | //pub fn line_iter(&self) -> impl Iterator { 54 | // self.line_mark.iter() 55 | //} 56 | 57 | // ANCHOR: DefFindNextHole 58 | /// When it comes to finding allocatable holes, we bump-allocate downward. 59 | pub fn find_next_available_hole( 60 | &self, 61 | starting_at: usize, 62 | alloc_size: usize, 63 | ) -> Option<(usize, usize)> { 64 | // The count of consecutive avaliable holes. Must take into account a conservatively marked 65 | // hole at the beginning of the sequence. 66 | let mut count = 0; 67 | let starting_line = starting_at / constants::LINE_SIZE; 68 | let lines_required = (alloc_size + constants::LINE_SIZE - 1) / constants::LINE_SIZE; 69 | // Counting down from the given search start index 70 | let mut end = starting_line; 71 | 72 | for index in (0..starting_line).rev() { 73 | let marked = unsafe { *self.lines.add(index) }; 74 | 75 | if marked == 0 { 76 | // count unmarked lines 77 | count += 1; 78 | 79 | if index == 0 && count >= lines_required { 80 | let limit = index * constants::LINE_SIZE; 81 | let cursor = end * constants::LINE_SIZE; 82 | return Some((cursor, limit)); 83 | } 84 | } else { 85 | // This block is marked 86 | if count > lines_required { 87 | // But at least 2 previous blocks were not marked. Return the hole, considering the 88 | // immediately preceding block as conservatively marked 89 | let limit = (index + 2) * constants::LINE_SIZE; 90 | let cursor = end * constants::LINE_SIZE; 91 | return Some((cursor, limit)); 92 | } 93 | 94 | // If this line is marked and we didn't return a new cursor/limit pair by now, 95 | // reset the hole search state 96 | count = 0; 97 | end = index; 98 | } 99 | } 100 | 101 | None 102 | } 103 | // ANCHOR_END: DefFindNextHole 104 | } 105 | 106 | #[cfg(test)] 107 | mod tests { 108 | 109 | use super::*; 110 | use crate::blockalloc::Block; 111 | 112 | #[test] 113 | fn test_find_next_hole() { 114 | // A set of marked lines with a couple holes. 115 | // The first hole should be seen as conservatively marked. 116 | // The second hole should be the one selected. 117 | let block = Block::new(constants::BLOCK_SIZE).unwrap(); 118 | let mut meta = BlockMeta::new(block.as_ptr()); 119 | 120 | meta.mark_line(0); 121 | meta.mark_line(1); 122 | meta.mark_line(2); 123 | meta.mark_line(4); 124 | meta.mark_line(10); 125 | 126 | // line 5 should be conservatively marked 127 | let expect = Some((10 * constants::LINE_SIZE, 6 * constants::LINE_SIZE)); 128 | 129 | let got = meta.find_next_available_hole(10 * constants::LINE_SIZE, constants::LINE_SIZE); 130 | 131 | println!("test_find_next_hole got {:?} expected {:?}", got, expect); 132 | 133 | assert!(got == expect); 134 | } 135 | 136 | #[test] 137 | fn test_find_next_hole_at_line_zero() { 138 | // Should find the hole starting at the beginning of the block 139 | let block = Block::new(constants::BLOCK_SIZE).unwrap(); 140 | let mut meta = BlockMeta::new(block.as_ptr()); 141 | 142 | meta.mark_line(3); 143 | meta.mark_line(4); 144 | meta.mark_line(5); 145 | 146 | let expect = Some((3 * constants::LINE_SIZE, 0)); 147 | 148 | let got = meta.find_next_available_hole(3 * constants::LINE_SIZE, constants::LINE_SIZE); 149 | 150 | println!( 151 | "test_find_next_hole_at_line_zero got {:?} expected {:?}", 152 | got, expect 153 | ); 154 | 155 | assert!(got == expect); 156 | } 157 | 158 | #[test] 159 | fn test_find_next_hole_at_block_end() { 160 | // The first half of the block is marked. 161 | // The second half of the block should be identified as a hole. 162 | let block = Block::new(constants::BLOCK_SIZE).unwrap(); 163 | let mut meta = BlockMeta::new(block.as_ptr()); 164 | 165 | let halfway = constants::LINE_COUNT / 2; 166 | 167 | for i in halfway..constants::LINE_COUNT { 168 | meta.mark_line(i); 169 | } 170 | 171 | // because halfway line should be conservatively marked 172 | let expect = Some((halfway * constants::LINE_SIZE, 0)); 173 | 174 | let got = meta.find_next_available_hole(constants::BLOCK_CAPACITY, constants::LINE_SIZE); 175 | 176 | println!( 177 | "test_find_next_hole_at_block_end got {:?} expected {:?}", 178 | got, expect 179 | ); 180 | 181 | assert!(got == expect); 182 | } 183 | 184 | #[test] 185 | fn test_find_hole_all_conservatively_marked() { 186 | // Every other line is marked. 187 | // No hole should be found. 188 | let block = Block::new(constants::BLOCK_SIZE).unwrap(); 189 | let mut meta = BlockMeta::new(block.as_ptr()); 190 | 191 | for i in 0..constants::LINE_COUNT { 192 | if i % 2 == 0 { 193 | // there is no stable step function for range 194 | meta.mark_line(i); 195 | } 196 | } 197 | 198 | let got = meta.find_next_available_hole(constants::BLOCK_CAPACITY, constants::LINE_SIZE); 199 | 200 | println!( 201 | "test_find_hole_all_conservatively_marked got {:?} expected None", 202 | got 203 | ); 204 | 205 | assert!(got == None); 206 | } 207 | 208 | #[test] 209 | fn test_find_entire_block() { 210 | // No marked lines. Entire block is available. 211 | let block = Block::new(constants::BLOCK_SIZE).unwrap(); 212 | let meta = BlockMeta::new(block.as_ptr()); 213 | 214 | let expect = Some((constants::BLOCK_CAPACITY, 0)); 215 | let got = meta.find_next_available_hole(constants::BLOCK_CAPACITY, constants::LINE_SIZE); 216 | 217 | println!("test_find_entire_block got {:?} expected {:?}", got, expect); 218 | 219 | assert!(got == expect); 220 | } 221 | } 222 | -------------------------------------------------------------------------------- /stickyimmix/src/bumpblock.rs: -------------------------------------------------------------------------------- 1 | use std::ptr::write; 2 | 3 | use blockalloc::{Block, BlockError}; 4 | 5 | use crate::allocator::AllocError; 6 | use crate::blockmeta::BlockMeta; 7 | use crate::constants; 8 | 9 | impl From for AllocError { 10 | fn from(error: BlockError) -> AllocError { 11 | match error { 12 | BlockError::BadRequest => AllocError::BadRequest, 13 | BlockError::OOM => AllocError::OOM, 14 | } 15 | } 16 | } 17 | 18 | /// A block of heap. This maintains the bump cursor and limit per block 19 | /// and the mark flags in a separate `meta` struct. A pointer to the 20 | /// `meta` struct is placed in the very first word of the block memory 21 | /// to provide fast access when in the object marking phase. 22 | /// Thus allocation in the first line of the block doesn't begin at 23 | /// offset 0 but after this `meta` pointer. 24 | // ANCHOR: DefBumpBlock 25 | pub struct BumpBlock { 26 | cursor: *const u8, 27 | limit: *const u8, 28 | block: Block, 29 | meta: BlockMeta, 30 | } 31 | // ANCHOR_END: DefBumpBlock 32 | 33 | impl BumpBlock { 34 | /// Create a new block of heap space and it's metadata, placing a 35 | /// pointer to the metadata in the first word of the block. 36 | pub fn new() -> Result { 37 | let inner_block = Block::new(constants::BLOCK_SIZE)?; 38 | let block_ptr = inner_block.as_ptr(); 39 | 40 | let block = BumpBlock { 41 | cursor: unsafe { block_ptr.add(constants::BLOCK_CAPACITY) }, 42 | limit: block_ptr, 43 | block: inner_block, 44 | meta: BlockMeta::new(block_ptr), 45 | }; 46 | 47 | Ok(block) 48 | } 49 | 50 | /// Write an object into the block at the given offset. The offset is not 51 | /// checked for overflow, hence this function is unsafe. 52 | unsafe fn write(&mut self, object: T, offset: usize) -> *const T { 53 | let p = self.block.as_ptr().add(offset) as *mut T; 54 | write(p, object); 55 | p 56 | } 57 | 58 | /// Find a hole of at least the requested size and return Some(pointer) to it, or 59 | /// None if this block doesn't have a big enough hole. 60 | // ANCHOR: DefBumpBlockAlloc 61 | pub fn inner_alloc(&mut self, alloc_size: usize) -> Option<*const u8> { 62 | let ptr = self.cursor as usize; 63 | let limit = self.limit as usize; 64 | 65 | let next_ptr = ptr.checked_sub(alloc_size)? & constants::ALLOC_ALIGN_MASK; 66 | 67 | if next_ptr < limit { 68 | let block_relative_limit = 69 | unsafe { self.limit.sub(self.block.as_ptr() as usize) } as usize; 70 | 71 | if block_relative_limit > 0 { 72 | if let Some((cursor, limit)) = self 73 | .meta 74 | .find_next_available_hole(block_relative_limit, alloc_size) 75 | { 76 | self.cursor = unsafe { self.block.as_ptr().add(cursor) }; 77 | self.limit = unsafe { self.block.as_ptr().add(limit) }; 78 | return self.inner_alloc(alloc_size); 79 | } 80 | } 81 | 82 | None 83 | } else { 84 | self.cursor = next_ptr as *const u8; 85 | Some(self.cursor) 86 | } 87 | } 88 | // ANCHOR_END: DefBumpBlockAlloc 89 | 90 | /// Return the size of the hole we're positioned at 91 | pub fn current_hole_size(&self) -> usize { 92 | self.cursor as usize - self.limit as usize 93 | } 94 | } 95 | 96 | #[cfg(test)] 97 | mod tests { 98 | 99 | use super::*; 100 | 101 | const TEST_UNIT_SIZE: usize = constants::ALLOC_ALIGN_BYTES; 102 | 103 | // Helper function: given the Block, fill all holes with u32 values 104 | // and return the number of values allocated. 105 | // Also assert that all allocated values are unchanged as allocation 106 | // proceeds. 107 | fn loop_check_allocate(b: &mut BumpBlock) -> usize { 108 | let mut v = Vec::new(); 109 | let mut index = 0; 110 | 111 | loop { 112 | //println!("cursor={}, limit={}", b.cursor, b.limit); 113 | if let Some(ptr) = b.inner_alloc(TEST_UNIT_SIZE) { 114 | let u32ptr = ptr as *mut u32; 115 | 116 | assert!(!v.contains(&u32ptr)); 117 | 118 | v.push(u32ptr); 119 | unsafe { *u32ptr = index } 120 | 121 | index += 1; 122 | } else { 123 | break; 124 | } 125 | } 126 | 127 | for (index, u32ptr) in v.iter().enumerate() { 128 | unsafe { 129 | assert!(**u32ptr == index as u32); 130 | } 131 | } 132 | 133 | index as usize 134 | } 135 | 136 | #[test] 137 | fn test_empty_block() { 138 | let mut b = BumpBlock::new().unwrap(); 139 | 140 | let count = loop_check_allocate(&mut b); 141 | let expect = constants::BLOCK_CAPACITY / TEST_UNIT_SIZE; 142 | 143 | println!("expect={}, count={}", expect, count); 144 | assert!(count == expect); 145 | } 146 | 147 | #[test] 148 | fn test_half_block() { 149 | // This block has an available hole as the second half of the block 150 | let mut b = BumpBlock::new().unwrap(); 151 | 152 | for i in 0..(constants::LINE_COUNT / 2) { 153 | b.meta.mark_line(i); 154 | } 155 | let occupied_bytes = (constants::LINE_COUNT / 2) * constants::LINE_SIZE; 156 | 157 | b.limit = b.cursor; // block is recycled 158 | 159 | let count = loop_check_allocate(&mut b); 160 | let expect = 161 | (constants::BLOCK_CAPACITY - constants::LINE_SIZE - occupied_bytes) / TEST_UNIT_SIZE; 162 | 163 | println!("expect={}, count={}", expect, count); 164 | assert!(count == expect); 165 | } 166 | 167 | #[test] 168 | fn test_conservatively_marked_block() { 169 | // This block has every other line marked, so the alternate lines are conservatively 170 | // marked. Nothing should be allocated in this block. 171 | 172 | let mut b = BumpBlock::new().unwrap(); 173 | 174 | for i in 0..constants::LINE_COUNT { 175 | if i % 2 == 0 { 176 | b.meta.mark_line(i); 177 | } 178 | } 179 | 180 | b.limit = b.cursor; // block is recycled 181 | 182 | let count = loop_check_allocate(&mut b); 183 | 184 | println!("count={}", count); 185 | assert!(count == 0); 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /stickyimmix/src/constants.rs: -------------------------------------------------------------------------------- 1 | // ANCHOR: ConstBlockSize 2 | pub const BLOCK_SIZE_BITS: usize = 15; 3 | pub const BLOCK_SIZE: usize = 1 << BLOCK_SIZE_BITS; 4 | // ANCHOR_END: ConstBlockSize 5 | pub const BLOCK_PTR_MASK: usize = !(BLOCK_SIZE - 1); 6 | 7 | // ANCHOR: ConstLineSize 8 | pub const LINE_SIZE_BITS: usize = 7; 9 | pub const LINE_SIZE: usize = 1 << LINE_SIZE_BITS; 10 | 11 | // How many total lines are in a block 12 | pub const LINE_COUNT: usize = BLOCK_SIZE / LINE_SIZE; 13 | 14 | // We need LINE_COUNT number of bytes for marking lines, so the capacity of a block 15 | // is reduced by that number of bytes. 16 | pub const BLOCK_CAPACITY: usize = BLOCK_SIZE - LINE_COUNT; 17 | // ANCHOR_END: ConstLineSize 18 | 19 | // The first line-mark offset into the block is here. 20 | pub const LINE_MARK_START: usize = BLOCK_CAPACITY; 21 | 22 | // Allocation alignment 23 | pub const ALLOC_ALIGN_BYTES: usize = 16; 24 | pub const ALLOC_ALIGN_MASK: usize = !(ALLOC_ALIGN_BYTES - 1); 25 | 26 | // Object size ranges 27 | pub const MAX_ALLOC_SIZE: usize = std::u32::MAX as usize; 28 | pub const SMALL_OBJECT_MIN: usize = 1; 29 | pub const SMALL_OBJECT_MAX: usize = LINE_SIZE; 30 | pub const MEDIUM_OBJECT_MIN: usize = SMALL_OBJECT_MAX + 1; 31 | pub const MEDIUM_OBJECT_MAX: usize = BLOCK_CAPACITY; 32 | pub const LARGE_OBJECT_MIN: usize = MEDIUM_OBJECT_MAX + 1; 33 | pub const LARGE_OBJECT_MAX: usize = MAX_ALLOC_SIZE; 34 | -------------------------------------------------------------------------------- /stickyimmix/src/lib.rs: -------------------------------------------------------------------------------- 1 | extern crate blockalloc; 2 | 3 | mod allocator; 4 | mod blockmeta; 5 | mod bumpblock; 6 | mod constants; 7 | mod heap; 8 | mod rawptr; 9 | 10 | pub use crate::allocator::{ 11 | AllocError, AllocHeader, AllocObject, AllocRaw, AllocTypeId, ArraySize, Mark, SizeClass, 12 | }; 13 | 14 | pub use crate::heap::StickyImmixHeap; 15 | 16 | pub use crate::rawptr::RawPtr; 17 | -------------------------------------------------------------------------------- /stickyimmix/src/rawptr.rs: -------------------------------------------------------------------------------- 1 | use std::ptr::NonNull; 2 | 3 | /// A container for a bare pointer to an object of type `T`. 4 | /// At this level, compile-time type information is still 5 | /// part of the type. 6 | // ANCHOR: DefRawPtr 7 | pub struct RawPtr { 8 | ptr: NonNull, 9 | } 10 | // ANCHOR_END: DefRawPtr 11 | 12 | impl RawPtr { 13 | /// Create a new RawPtr from a bare pointer 14 | pub fn new(ptr: *const T) -> RawPtr { 15 | RawPtr { 16 | ptr: unsafe { NonNull::new_unchecked(ptr as *mut T) }, 17 | } 18 | } 19 | 20 | /// Get the raw `*const` pointer to the object. 21 | pub fn as_ptr(self) -> *const T { 22 | self.ptr.as_ptr() 23 | } 24 | /// Get the pointer value as a word-sized integer 25 | pub fn as_word(self) -> usize { 26 | self.ptr.as_ptr() as usize 27 | } 28 | 29 | pub fn as_untyped(self) -> NonNull<()> { 30 | self.ptr.cast() 31 | } 32 | 33 | /// Get a `&` reference to the object. Unsafe because there are no guarantees at this level 34 | /// about the internal pointer's validity. 35 | pub unsafe fn as_ref(&self) -> &T { 36 | self.ptr.as_ref() 37 | } 38 | 39 | /// Get a `&mut` reference to the object. Unsafe because there are no guarantees at this level 40 | /// about the internal pointer's validity. 41 | /// In addition, there can be no compile-time guarantees of mutable aliasing prevention. 42 | /// Use with caution! 43 | pub unsafe fn as_mut_ref(&mut self) -> &mut T { 44 | self.ptr.as_mut() 45 | } 46 | } 47 | 48 | impl Clone for RawPtr { 49 | fn clone(&self) -> RawPtr { 50 | RawPtr { ptr: self.ptr } 51 | } 52 | } 53 | 54 | impl Copy for RawPtr {} 55 | 56 | impl PartialEq for RawPtr { 57 | fn eq(&self, other: &RawPtr) -> bool { 58 | self.ptr == other.ptr 59 | } 60 | } 61 | --------------------------------------------------------------------------------