├── .github
    └── workflows
    │   ├── book.yml
    │   └── ci.yml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE-APACHE
├── LICENSE-MIT
├── LICENSE.txt
├── README.md
├── blockalloc
    ├── .gitignore
    ├── Cargo.lock
    ├── Cargo.toml
    ├── LICENSE.txt
    └── src
    │   └── lib.rs
├── book.toml
├── booksrc
    ├── 404.md
    ├── SUMMARY.md
    ├── chapter-alignment.md
    ├── chapter-allocation-api.md
    ├── chapter-allocation-impl.md
    ├── chapter-blocks.md
    ├── chapter-interp-alloc.md
    ├── chapter-interp-arrays.md
    ├── chapter-interp-bytecode.md
    ├── chapter-interp-compiler-design.md
    ├── chapter-interp-compiler-impl.md
    ├── chapter-interp-dicts.md
    ├── chapter-interp-parsing.md
    ├── chapter-interp-symbols-and-pairs.md
    ├── chapter-interp-tagged-ptrs.md
    ├── chapter-interp-vm-design.md
    ├── chapter-interp-vm-impl.md
    ├── chapter-managing-blocks.md
    ├── chapter-simple-bump.md
    ├── chapter-what-is-alloc.md
    ├── evalrus-medium.png
    ├── img
    │   ├── alignment.png
    │   ├── fragmented_block.png
    │   └── stickyimmix_block.png
    ├── introduction.md
    ├── part-allocators.md
    ├── part-interpreter.md
    └── part-stickyimmix.md
├── interpreter
    ├── .gitignore
    ├── Cargo.toml
    ├── LICENSE.txt
    ├── README.md
    └── src
    │   ├── arena.rs
    │   ├── array.rs
    │   ├── bytecode.rs
    │   ├── compiler.rs
    │   ├── containers.rs
    │   ├── dict.rs
    │   ├── error.rs
    │   ├── function.rs
    │   ├── hashable.rs
    │   ├── headers.rs
    │   ├── lexer.rs
    │   ├── list.rs
    │   ├── main.rs
    │   ├── memory.rs
    │   ├── number.rs
    │   ├── pair.rs
    │   ├── parser.rs
    │   ├── pointerops.rs
    │   ├── printer.rs
    │   ├── rawarray.rs
    │   ├── repl.rs
    │   ├── safeptr.rs
    │   ├── symbol.rs
    │   ├── symbolmap.rs
    │   ├── taggedptr.rs
    │   ├── text.rs
    │   └── vm.rs
└── stickyimmix
    ├── .gitignore
    ├── Cargo.lock
    ├── Cargo.toml
    ├── LICENSE.txt
    ├── README.md
    └── src
        ├── allocator.rs
        ├── blockmeta.rs
        ├── bumpblock.rs
        ├── constants.rs
        ├── heap.rs
        ├── lib.rs
        └── rawptr.rs


/.github/workflows/book.yml:
--------------------------------------------------------------------------------
 1 | name: github pages
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   deploy:
 7 |     runs-on: ubuntu-20.04
 8 |     steps:
 9 |       - uses: actions/checkout@v2
10 | 
11 |       - name: Setup mdBook
12 |         uses: peaceiris/actions-mdbook@v1
13 |         with:
14 |           mdbook-version: '0.4.5'
15 |           # mdbook-version: 'latest'
16 | 
17 |       - run: mdbook build
18 | 
19 |       - name: Deploy
20 |         uses: peaceiris/actions-gh-pages@v3
21 |         with:
22 |           deploy_key: ${{ secrets.ACTIONS_DEPLOY_KEY }}
23 |           publish_dir: ./book
24 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | on: [push, pull_request]
 2 | #  push: 
 3 | #    branches: [master]
 4 | #  pull_request: 
 5 | #    branches: [master]
 6 | 
 7 | name: code-test
 8 | 
 9 | jobs:
10 |   test:
11 |     name: Test Suite
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - name: Checkout sources
15 |         uses: actions/checkout@v2
16 | 
17 |       - name: Install stable toolchain
18 |         uses: actions-rs/toolchain@v1
19 |         with:
20 |           profile: minimal
21 |           toolchain: stable
22 |           override: true
23 | 
24 |       - name: Cargo fmt check blockalloc
25 |         working-directory: ./blockalloc
26 |         run: cargo fmt --all -- --check
27 | 
28 |       - name: Cargo fmt check stickyimmix
29 |         working-directory: ./stickyimmix
30 |         run: cargo fmt --all -- --check
31 | 
32 |       - name: Cargo fmt check interpreter
33 |         working-directory: ./interpreter
34 |         run: cargo fmt --all -- --check
35 | 
36 |       - name: Cargo test blockalloc
37 |         working-directory: ./blockalloc
38 |         run: cargo test
39 | 
40 |       - name: Cargo test stickyimmix
41 |         working-directory: ./stickyimmix
42 |         run: cargo test
43 | 
44 |       - name: Cargo test interpreter
45 |         working-directory: ./interpreter
46 |         run: cargo test
47 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | book
2 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | We'd love your help!
 4 | 
 5 | See list of [issues under discussion](https://github.com/rust-hosted-langs/runtimes-WG/labels/book),
 6 | add to these discussion, suggest chapters, content.
 7 | 
 8 | Submit a PR for anything from spelling and grammar to content accuracy and
 9 | correctness to entire chapter content! Feel free to add yourself to the
10 | `contributors.txt` list.
11 | 
12 | 
13 | ## Licensing
14 | 
15 | ### Book text
16 | 
17 | Text in this repository is under the CC-BY 4.0 license - see `LICENSE.txt`.
18 | Copyrights retained by contributors.
19 | 
20 | #### Contribution
21 | 
22 | Unless you explicitly state otherwise, any book text contribution intentionally
23 | submitted for inclusion in the work by you shall be licensed as CC-BY 4.0 
24 | without any additional terms or conditions.
25 | 
26 | 
27 | ### Code licenses
28 | 
29 | Licensed under either of
30 | 
31 |  * Apache License, Version 2.0
32 |    ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
33 |  * MIT license
34 |    ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
35 | 
36 | at your option.
37 | 
38 | #### Contribution
39 | 
40 | Unless you explicitly state otherwise, any code contribution intentionally 
41 | submitted for inclusion in the work by you, as defined in the Apache-2.0 
42 | license, shall be dual licensed as above, without any additional terms or 
43 | conditions.
44 | 
45 | 
46 | ## Code of Conduct
47 | 
48 | All communication shall be conducted within the framework of the
49 | [Rust code of conduct](http://rust-lang.org/conduct.html).
50 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Peter Liniker
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Writing Interpreters in Rust: a Guide
 2 | 
 3 | ![](https://github.com/rust-hosted-langs/book/workflows/code-test/badge.svg)
 4 | 
 5 | This is an online book covering the lower level topics involved in writing an
 6 | interpreter in Rust including:
 7 | 
 8 | * memory management: allocation and garbage collection
 9 | * compiling: expressions, functions, closures
10 | * virtual machines: bytecode, instruction dispatch
11 | 
12 | 
13 | ## Project vision
14 | 
15 | From CPython to Ruby's YARV, V8 and SpiderMonkey, GHC to the JVM, most language
16 | runtimes are written in C/C++.
17 | 
18 | We believe that Rust is eminently suitable for implementing languages and can
19 | provide significant productivity improvements over C and C++ while retaining
20 | the performance advantages and low level control of both.
21 | 
22 | While there are a number of languages implemented in Rust available now, in
23 | varying states of completeness - interpreters, AOT compilers and
24 | JIT-compiled - our vision is singular:
25 | 
26 | _To create a well documented reference compiler and runtime,
27 | permissively licensed, such that you can fork and morph it into your own
28 | programming language._
29 | 
30 | That is, a platform for bootstrapping other languages, written in Rust.
31 | To that end, the implementation provided here is not intended to be feature
32 | complete and cannot possibly represent every variation of programming
33 | language or local optimization.
34 | 
35 | It is a lofty goal, and it certainly won't be the right approach for
36 | everybody. However, we hope it will help shift the landscape in favor of more
37 | memory-safe language implementations.
38 | 
39 | 
40 | ## Getting involved
41 | 
42 | See `CONTRIBUTING.md` for licensing and how to get involved.
43 | 
44 | 
45 | ## The contents
46 | 
47 | The rendered book can be read [here](https://rust-hosted-langs.github.io/book/)
48 | while the accompanying source code can be browsed in this repository.
49 | 


--------------------------------------------------------------------------------
/blockalloc/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 | 


--------------------------------------------------------------------------------
/blockalloc/Cargo.lock:
--------------------------------------------------------------------------------
1 | # This file is automatically @generated by Cargo.
2 | # It is not intended for manual editing.
3 | [[package]]
4 | name = "blockalloc"
5 | version = "0.1.0"
6 | 
7 | 


--------------------------------------------------------------------------------
/blockalloc/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "blockalloc"
3 | version = "0.1.0"
4 | authors = ["Peter Liniker <peter.liniker@gmail.com>"]
5 | edition = "2018"
6 | license = "MIT OR Apache-2.0"
7 | 
8 | 


--------------------------------------------------------------------------------
/blockalloc/src/lib.rs:
--------------------------------------------------------------------------------
  1 | /// A block allocator for blocks of memory that must be:
  2 | ///  - powers of two in size
  3 | ///  - aligned to their size
  4 | ///
  5 | /// Internally this calls the stabilized std Alloc API.
  6 | /// https://doc.rust-lang.org/std/alloc/index.html
  7 | ///
  8 | /// Usage:
  9 | /// ```
 10 | /// extern crate blockalloc;
 11 | /// use blockalloc::Block;
 12 | ///
 13 | /// let size = 4096;  // must be a power of 2
 14 | /// let block = Block::new(size).unwrap();
 15 | /// ```
 16 | ///
 17 | /// Normal scoping rules will call Block::drop() when `block` goes out of scope
 18 | /// causing the block to be fully deallocated.
 19 | use std::ptr::NonNull;
 20 | 
 21 | // ANCHOR: DefBlockComponents
 22 | pub type BlockPtr = NonNull<u8>;
 23 | pub type BlockSize = usize;
 24 | // ANCHOR_END: DefBlockComponents
 25 | 
 26 | /// Set of possible block allocation failures
 27 | // ANCHOR: DefBlockError
 28 | #[derive(Debug, PartialEq)]
 29 | pub enum BlockError {
 30 |     /// Usually means requested block size, and therefore alignment, wasn't a
 31 |     /// power of two
 32 |     BadRequest,
 33 |     /// Insufficient memory, couldn't allocate a block
 34 |     OOM,
 35 | }
 36 | // ANCHOR_END: DefBlockError
 37 | 
 38 | /// A block-size-aligned block of memory
 39 | // ANCHOR: DefBlock
 40 | pub struct Block {
 41 |     ptr: BlockPtr,
 42 |     size: BlockSize,
 43 | }
 44 | // ANCHOR_END: DefBlock
 45 | 
 46 | impl Block {
 47 |     /// Instantiate a new block of the given size. Size must be a power of two.
 48 |     // ANCHOR: BlockNew
 49 |     pub fn new(size: BlockSize) -> Result<Block, BlockError> {
 50 |         if !size.is_power_of_two() {
 51 |             return Err(BlockError::BadRequest);
 52 |         }
 53 | 
 54 |         Ok(Block {
 55 |             ptr: internal::alloc_block(size)?,
 56 |             size,
 57 |         })
 58 |     }
 59 |     // ANCHOR_END: BlockNew
 60 | 
 61 |     /// Consume and return the pointer only
 62 |     pub fn into_mut_ptr(self) -> BlockPtr {
 63 |         self.ptr
 64 |     }
 65 | 
 66 |     /// Return the size in bytes of the block
 67 |     pub fn size(&self) -> BlockSize {
 68 |         self.size
 69 |     }
 70 | 
 71 |     /// Unsafely reassemble from pointer and size
 72 |     pub unsafe fn from_raw_parts(ptr: BlockPtr, size: BlockSize) -> Block {
 73 |         Block { ptr, size }
 74 |     }
 75 | 
 76 |     /// Return a bare pointer to the base of the block
 77 |     // ANCHOR: BlockAsPtr
 78 |     pub fn as_ptr(&self) -> *const u8 {
 79 |         self.ptr.as_ptr()
 80 |     }
 81 |     // ANCHOR_END: BlockAsPtr
 82 | }
 83 | 
 84 | impl Drop for Block {
 85 |     fn drop(&mut self) {
 86 |         internal::dealloc_block(self.ptr, self.size);
 87 |     }
 88 | }
 89 | 
 90 | mod internal {
 91 |     use super::{BlockError, BlockPtr, BlockSize};
 92 |     use std::alloc::{alloc, dealloc, Layout};
 93 |     use std::ptr::NonNull;
 94 | 
 95 |     // ANCHOR: AllocBlock
 96 |     pub fn alloc_block(size: BlockSize) -> Result<BlockPtr, BlockError> {
 97 |         unsafe {
 98 |             let layout = Layout::from_size_align_unchecked(size, size);
 99 | 
100 |             let ptr = alloc(layout);
101 |             if ptr.is_null() {
102 |                 Err(BlockError::OOM)
103 |             } else {
104 |                 Ok(NonNull::new_unchecked(ptr))
105 |             }
106 |         }
107 |     }
108 |     // ANCHOR_END: AllocBlock
109 | 
110 |     // ANCHOR: DeallocBlock
111 |     pub fn dealloc_block(ptr: BlockPtr, size: BlockSize) {
112 |         unsafe {
113 |             let layout = Layout::from_size_align_unchecked(size, size);
114 | 
115 |             dealloc(ptr.as_ptr(), layout);
116 |         }
117 |     }
118 |     // ANCHOR_END: DeallocBlock
119 | }
120 | 
121 | #[cfg(test)]
122 | mod tests {
123 | 
124 |     use crate::{Block, BlockError, BlockSize};
125 | 
126 |     fn alloc_dealloc(size: BlockSize) -> Result<(), BlockError> {
127 |         let block = Block::new(size)?;
128 | 
129 |         // ANCHOR: TestAllocPointer
130 |         // the block address bitwise AND the alignment bits (size - 1) should
131 |         // be a mutually exclusive set of bits
132 |         let mask = size - 1;
133 |         assert!((block.ptr.as_ptr() as usize & mask) ^ mask == mask);
134 |         // ANCHOR_END: TestAllocPointer
135 | 
136 |         drop(block);
137 |         Ok(())
138 |     }
139 | 
140 |     #[test]
141 |     fn test_bad_sizealign() {
142 |         assert!(alloc_dealloc(999) == Err(BlockError::BadRequest))
143 |     }
144 | 
145 |     #[test]
146 |     fn test_4k() {
147 |         assert!(alloc_dealloc(4096).is_ok())
148 |     }
149 | 
150 |     #[test]
151 |     fn test_32k() {
152 |         assert!(alloc_dealloc(32768).is_ok())
153 |     }
154 | 
155 |     #[test]
156 |     fn test_16m() {
157 |         assert!(alloc_dealloc(16 * 1024 * 1024).is_ok())
158 |     }
159 | }
160 | 


--------------------------------------------------------------------------------
/book.toml:
--------------------------------------------------------------------------------
 1 | [book]
 2 | authors = ["Peter Liniker"]
 3 | multilingual = false
 4 | language = "en"
 5 | src = "booksrc"
 6 | title = "Writing Interpreters in Rust: a Guide"
 7 | 
 8 | 
 9 | [preprocess.links]
10 | 


--------------------------------------------------------------------------------
/booksrc/404.md:
--------------------------------------------------------------------------------
1 | # 404 - this chapter has not yet been written
2 | 


--------------------------------------------------------------------------------
/booksrc/SUMMARY.md:
--------------------------------------------------------------------------------
 1 | # Summary
 2 | 
 3 | - [Introduction](./introduction.md)
 4 | - [Allocation](./part-allocators.md)
 5 |   - [Alignment](./chapter-alignment.md)
 6 |   - [Obtaining blocks of memory](./chapter-blocks.md)
 7 |   - [The type of allocation](./chapter-what-is-alloc.md)
 8 | - [An allocator: Sticky Immix](./part-stickyimmix.md)
 9 |   - [Bump allocation](./chapter-simple-bump.md)
10 |   - [Allocating into multiple blocks](./chapter-managing-blocks.md)
11 |   - [Defining the allocation API](./chapter-allocation-api.md)
12 |   - [Implementing the API](./chapter-allocation-impl.md)
13 | - [An interpreter: Eval-rs](./part-interpreter.md)
14 |   - [Allocating objects and dereferencing safely](./chapter-interp-alloc.md)
15 |   - [Tagged pointers and object headers](./chapter-interp-tagged-ptrs.md)
16 |   - [Symbols and Pairs](./chapter-interp-symbols-and-pairs.md)
17 |   - [Parsing s-expressions](./chapter-interp-parsing.md)
18 |   - [Arrays](./chapter-interp-arrays.md)
19 |   - [Bytecode](./chapter-interp-bytecode.md)
20 |   - [Dicts](./chapter-interp-dicts.md)
21 |   - [Virtual Machine: Design](./chapter-interp-vm-design.md)
22 |   - [Virtual Machine: Implementation](./chapter-interp-vm-impl.md)
23 |   - [Compiler: Design](./chapter-interp-compiler-design.md)
24 |   - [Compiler: Implementation](./chapter-interp-compiler-impl.md)
25 | - [Garbage collection](./404.md)
26 |   - [TODO - Tracing](./404.md)
27 |   - [TODO - Sweeping](./404.md)
28 |   - [TODO - Recycling blocks](./404.md)
29 | 


--------------------------------------------------------------------------------
/booksrc/chapter-alignment.md:
--------------------------------------------------------------------------------
 1 | # Alignment
 2 | 
 3 | There are subtleties in memory access alignment:
 4 | 
 5 | - Some hardware architectures and implementations may fault on unaligned
 6 |   memory access.
 7 | - Atomic operations require word-aligned access.
 8 | - SIMD operations typically require double-word-aligned access.
 9 | - In practice on 64 bit architectures, allocators align objects to 8 byte
10 |   boundaries for 64 bit objects and smaller and 16 byte boundaries for larger
11 |   objects for performance optimization and the above reasons.
12 | 
13 | Intel 32 and 64 bit x86 architectures allow general access to be unaligned but
14 | will probably incur an access penalty. The story on 32bit ARM and aarch64 is
15 | sufficiently similar but there is a higher chance that an ARM core is
16 | configured to raise a bus error on a misaligned access.
17 | 
18 | Another very important factor is atomic memory operations.
19 | Atomic access works on a whole word basis - any unaligned access by nature
20 | cannot be guaranteed to be atomic as it will probably involve more than one
21 | access.  To support atomic operations, alignment must be minmally on word
22 | boundaries.
23 | 
24 | SIMD operations, tending to be 128 bits wide or higher, should be
25 | aligned to 16 byte boundaries for optimal code generation and performance.
26 | Unaligned loads and stores may be allowed but normally these incur
27 | performance penalties.
28 | 
29 | ![Common alignments](img/alignment.png)
30 | 
31 | While Intel allows unaligned access (that is, alignment on any byte boundary),
32 | the
33 | [recommended](https://software.intel.com/sites/default/files/managed/9e/bc/64-ia-32-architectures-optimization-manual.pdf?wapkw=248966)
34 | (see section 3.6.4) alignment for objects larger than 64 bits is to 16 byte
35 | boundaries.
36 | 
37 | Apparently system `malloc()` implementations
38 | [tend to comply](http://www.erahm.org/2016/03/24/minimum-alignment-of-allocation-across-platforms/)
39 | with the 16 byte boundary.
40 | 
41 | To verify the above, a rough test of both the system allocator and jemalloc
42 | on x86_64 by using `Box::new()` on a set of types (`u8`, `u16`, `u32`, `u64`,
43 | `String` and a larger `struct`) confirms a minimum of 8 byte alignment for
44 | anything word size or smaller and 16 byte alignment for everything bigger.
45 | Sample pointer printouts below are for jemalloc but Linux libc malloc produced
46 | the same pattern:
47 | 
48 | ```
49 | p=0x7fb78b421028 u8
50 | p=0x7fb78b421030 u16
51 | p=0x7fb78b421038 u32
52 | p=0x7fb78b421050 u64
53 | p=0x7fb78b420060 "spam"
54 | p=0x7fb78b4220f0 Hoge { y: 2, z: "ほげ", x: 1 }
55 | ```
56 | 
57 | Compare with `std::mem::align_of<T>()` which, on x86_64 for example,
58 | returns alignment values:
59 | 
60 | - `u8`: 1 byte
61 | - `u16`: 2 bytes
62 | - `u32`: 4 bytes
63 | - `u64`: 8 bytes
64 | - any bigger struct: 8
65 | 
66 | Thus despite the value of `std::mem::align_of::<T>()`, mature allocators will
67 | do what is most pragmatic and follow recommended practice in support of optimal
68 | performance.
69 | 
70 | With all that in mind, to keep things simple, we'll align everything to a
71 | double-word boundaries. When we add in prepending an object header, the minimum
72 | memory required for an object will be two words anyway.
73 | 
74 | Thus, the allocated size of an object will be calculated[^1] by
75 | 
76 | ```rust,ignore
77 | let alignment = size_of::<usize>() * 2;
78 | // mask out the least significant bits that correspond to the alignment - 1
79 | // then add the full alignment
80 | let size = (size_of::<T>() & !(alignment - 1)) + alignment;
81 | ```
82 | 
83 | ----
84 | 
85 | [^1]: For a more detailed explanation of alignment adjustment calculations, see
86 | [phil-opp](https://github.com/phil-opp)'s kernel
87 | [heap allocator](https://os.phil-opp.com/kernel-heap/#alignment).
88 | 


--------------------------------------------------------------------------------
/booksrc/chapter-allocation-impl.md:
--------------------------------------------------------------------------------
 1 | # Implementing the Allocation API
 2 | 
 3 | In this final chapter of the allocation part of the book, we'll cover the
 4 | `AllocRaw` trait implementation.
 5 | 
 6 | This trait is implemented on the `StickyImmixHeap` struct:
 7 | 
 8 | ```rust,ignore
 9 | impl<H: AllocHeader> AllocRaw for StickyImmixHeap<H> {
10 |     type Header = H;
11 | 
12 |     ...
13 | }
14 | ```
15 | 
16 | Here the associated header type is provided as the generic type `H`, leaving it
17 | up to the interpreter to define.
18 | 
19 | ## Allocating objects
20 | 
21 | The first function to implement is `AllocRaw::alloc<T>()`. This function must:
22 | * calculate how much space in bytes is required by the object and header
23 | * allocate that space
24 | * instantiate an object header and write it to the first bytes of the space
25 | * copy the object itself to the remaining bytes of the space
26 | * return a pointer to where the object lives in this space
27 | 
28 | Let's look at the implementation.
29 | 
30 | ```rust,ignore
31 | impl<H: AllocHeader> AllocRaw for StickyImmixHeap<H> {
32 | {{#include ../stickyimmix/src/heap.rs:DefAlloc}}
33 | }
34 | ```
35 | 
36 | This, hopefully, is easy enough to follow after the previous chapters -
37 | * `self.find_space()` is the function described in the chapter
38 |   [Allocating into multiple blocks](./chapter-managing-blocks.md#allocating-into-the-head-block)
39 | * `Self::Header::new()` will be implemented by the interpreter
40 | * `write(space as *mut Self::Header, header)` calls the std function
41 |   `std::ptr::write`
42 | 
43 | ## Allocating arrays
44 | 
45 | We need a similar (but awkwardly different enough) implementation for array
46 | allocation. The key differences are that the type is fixed to a `u8` pointer
47 | and the array is initialized to zero bytes. It is up to the interpreter to
48 | write into the array itself.
49 | 
50 | ```rust,ignore
51 | impl<H: AllocHeader> AllocRaw for StickyImmixHeap<H> {
52 | {{#include ../stickyimmix/src/heap.rs:DefAllocArray}}
53 | }
54 | ```
55 | 
56 | ## Switching between header and object
57 | 
58 | As stated in the previous chapter, these functions are essentially pointer
59 | operations that do not dereference the pointers. Thus they are not unsafe
60 | to call, but the types they operate _on_ should have a suitably unsafe API.
61 | 
62 | `NonNull` is the chosen parameter and return type and the pointer arithmetic
63 | for obtaining the header from an object pointer of unknown type is shown
64 | below.
65 | 
66 | For our Immix implementation, since headers are placed immediately
67 | ahead of an object, we simply subtract the header size from the object
68 | pointer.
69 | 
70 | ```rust,ignore
71 | impl<H: AllocHeader> AllocRaw for StickyImmixHeap<H> {
72 | {{#include ../stickyimmix/src/heap.rs:DefGetHeader}}
73 | }
74 | ```
75 | 
76 | Getting the object from a header is the reverse - adding the header size
77 | to the header pointer results in the object pointer:
78 | 
79 | ```rust,ignore
80 | impl<H: AllocHeader> AllocRaw for StickyImmixHeap<H> {
81 | {{#include ../stickyimmix/src/heap.rs:DefGetObject}}
82 | }
83 | ```
84 | 
85 | ## Conclusion
86 | 
87 | Thus ends the first part of our Immix implementation. In the next part of the
88 | book we will jump over the fence to the interpreter and begin using the
89 | interfaces we've defined in this part.
90 | 


--------------------------------------------------------------------------------
/booksrc/chapter-blocks.md:
--------------------------------------------------------------------------------
  1 | # Obtaining Blocks of Memory
  2 | 
  3 | When requesting blocks of memory at a time, one of the questions is *what
  4 | is the desired block alignment?*
  5 | 
  6 | * In deciding, one factor is that using an alignment that is a multiple of the
  7 |   page size can make it easier to return memory to the operating system.
  8 | * Another factor is that if the block is aligned to it's size, it is fast to
  9 |   do bitwise arithmetic on a pointer to an object in a block to compute the
 10 |   block boundary and therefore the location of any block metadata.
 11 | 
 12 | With both these in mind we'll look at how to allocate blocks that are
 13 | aligned to the size of the block.
 14 | 
 15 | 
 16 | ## A basic crate interface
 17 | 
 18 | A block of memory is defined as a base address and a size, so we need a struct
 19 | that contains these elements.
 20 | 
 21 | To wrap the base address pointer, we'll use the recommended type for building
 22 | collections, [`std::ptr::NonNull<T>`](https://doc.rust-lang.org/std/ptr/struct.NonNull.html),
 23 | which is available on stable.
 24 | 
 25 | ```rust,ignore
 26 | {{#include ../blockalloc/src/lib.rs:DefBlock}}
 27 | ```
 28 | 
 29 | Where `BlockPtr` and `BlockSize` are defined as:
 30 | 
 31 | ```rust,ignore
 32 | {{#include ../blockalloc/src/lib.rs:DefBlockComponents}}
 33 | ```
 34 | 
 35 | To obtain a `Block`, we'll create a `Block::new()` function which, along with
 36 | `Block::drop()`, is implemented internally by wrapping the stabilized Rust alloc
 37 | routines:
 38 | 
 39 | ```rust,ignore
 40 | {{#include ../blockalloc/src/lib.rs:BlockNew}}
 41 | ```
 42 | 
 43 | Where parameter `size` must be a power of two, which is validated on the first
 44 | line of the function.  Requiring the block size to be a power of two means
 45 | simple bit arithmetic can be used to find the beginning and end of a block in
 46 | memory, if the block size is always the same.
 47 | 
 48 | Errors take one of two forms, an invalid block-size or out-of-memory, both
 49 | of which may be returned by `Block::new()`.
 50 | 
 51 | ```rust,ignore
 52 | {{#include ../blockalloc/src/lib.rs:DefBlockError}}
 53 | ```
 54 | 
 55 | Now on to the platform-specific implementations.
 56 | 
 57 | 
 58 | ## Custom aligned allocation on stable Rust
 59 | 
 60 | On the stable rustc channel we have access to some features of the
 61 | [Alloc](https://doc.rust-lang.org/std/alloc/index.html) API.
 62 | 
 63 | This is the ideal option since it abstracts platform specifics for us, we do
 64 | not need to write different code for Unix and Windows ourselves.
 65 | 
 66 | Fortunately there is enough stable functionality to
 67 | fully implement what we need.
 68 | 
 69 | With an appropriate underlying implementation this code should compile and
 70 | execute for any target. The allocation function, implemented in the `internal`
 71 | mod, reads:
 72 | 
 73 | ```rust,ignore
 74 | {{#include ../blockalloc/src/lib.rs:AllocBlock}}
 75 | ```
 76 | 
 77 | Once a block has been allocated, there is no safe abstraction at this level
 78 | to access the memory. The `Block` will provide a bare pointer to the beginning
 79 | of the memory and it is up to the user to avoid invalid pointer arithmetic
 80 | and reading or writing outside of the block boundary.
 81 | 
 82 | ```rust,ignore
 83 | {{#include ../blockalloc/src/lib.rs:BlockAsPtr}}
 84 | ```
 85 | 
 86 | 
 87 | ## Deallocation
 88 | 
 89 | Again, using the stable Alloc functions:
 90 | 
 91 | ```rust,ignore
 92 | {{#include ../blockalloc/src/lib.rs:DeallocBlock}}
 93 | ```
 94 | 
 95 | The implementation of `Block::drop()` calls the deallocation function
 96 | for us so we can create and drop `Block` instances without leaking memory.
 97 | 
 98 | 
 99 | ## Testing
100 | 
101 | We want to be sure that the system level allocation APIs do indeed return
102 | block-size-aligned blocks. Checking for this is straightforward.
103 | 
104 | A correctly aligned block should have it's low bits
105 | set to `0` for a number of bits that represents the range of the block
106 | size - that is, the block size minus one. A bitwise XOR will highlight any
107 | bits that shouldn't be set:
108 | 
109 | ```rust,ignore
110 | {{#include ../blockalloc/src/lib.rs:TestAllocPointer}}
111 | ```
112 | 


--------------------------------------------------------------------------------
/booksrc/chapter-interp-arrays.md:
--------------------------------------------------------------------------------
  1 | # Arrays
  2 | 
  3 | Before we get to the basics of compilation, we need another data structure:
  4 | the humble array. The first use for arrays will be to store the bytecode
  5 | sequences that the compiler generates.
  6 | 
  7 | Rust already provides `Vec` but as we're implementing everything in terms of our
  8 | memory management abstraction, we cannot directly use `Vec`. Rust does not
  9 | (yet) expose the ability to specify a custom allocator type as part of `Vec`,
 10 | nor are we interested in replacing the global allocator.
 11 | 
 12 | Our only option is to write our own version of `Vec`! Fortunately we can
 13 | learn a lot from `Vec` itself and it's underlying implementation. Jump over to
 14 | the [Rustonomicon][1] for a primer on the internals of `Vec`.
 15 | 
 16 | The first thing we'll learn is to split the implementation into a `RawArray<T>`
 17 | type and an `Array<T>` type. `RawArray<T>` will provide an unsafe abstraction
 18 | while `Array<T>` will make a safe layer over it.
 19 | 
 20 | 
 21 | ## RawArray
 22 | 
 23 | If you've just come back from _Implementing Vec_ in the Nomicon, you'll
 24 | recognize what we're doing below with `RawArray<T>`:
 25 | 
 26 | ```rust,ignore
 27 | {{#include ../interpreter/src/rawarray.rs:DefRawArray}}
 28 | ```
 29 | 
 30 | Instead of `Unique<T>` for the pointer, we're using `Option<NonNull<T>>`.
 31 | One simple reason is that `Unique<T>` is likely to be permanently unstable and
 32 | only available internally to `std` collections. The other is that we can
 33 | avoid allocating the backing store if no capacity is requested yet, setting
 34 | the value of `ptr` to `None`.
 35 | 
 36 | For when we _do_ know the desired capacity, there is
 37 | `RawArray<T>::with_capacity()`. This method, because it allocates, requires
 38 | access to the `MutatorView` instance. If you'll recall from the chapter on
 39 | the allocation API, the API provides an array allocation method with
 40 | signature:
 41 | 
 42 | ```rust,ignore
 43 | AllocRaw::alloc_array(&self, size_bytes: ArraySize) -> Result<RawPtr<u8>, AllocError>;
 44 | ```
 45 | 
 46 | This method is wrapped on the interpreter side by `Heap` and `MutatorView` and
 47 | in both cases the return value remains, simply, `RawPtr<u8>` in the success
 48 | case. It's up to `RawArray<T>` to receive the `RawPtr<u8>` value and maintain
 49 | it safely. Here's `with_capcity()`, now:
 50 | 
 51 | ```rust,ignore
 52 | {{#include ../interpreter/src/rawarray.rs:DefRawArrayWithCapacity}}
 53 | ```
 54 | 
 55 | ### Resizing
 56 | 
 57 | If a `RawArray<T>`'s content will exceed it's capacity, there is
 58 | `RawArray<T>::resize()`. It allocates a new backing array using the
 59 | `MutatorView` method `alloc_array()` and copies the content of the old
 60 | over to the new, finally swapping in the new backing array for the old.
 61 | 
 62 | The code for this is straightforward but a little longer, go check it out
 63 | in `interpreter/src/rawarray.rs`.
 64 | 
 65 | ### Accessing
 66 | 
 67 | Since `RawArray<T>` will be wrapped by `Array<T>`, we need a couple more
 68 | methods to access the raw memory:
 69 | 
 70 | ```rust,ignore
 71 | impl<T: Sized> RawArray<T> {
 72 | {{#include ../interpreter/src/rawarray.rs:DefRawArrayCapacity}}
 73 | 
 74 | {{#include ../interpreter/src/rawarray.rs:DefRawArrayAsPtr}}
 75 | }
 76 | ```
 77 | 
 78 | And that's it! Now for the safe wrapper.
 79 | 
 80 | 
 81 | ## Array
 82 | 
 83 | The definition of the struct wrapping `RawArray<T>` is as follows:
 84 | 
 85 | ```rust,ignore
 86 | {{#include ../interpreter/src/array.rs:DefArray}}
 87 | ```
 88 | 
 89 | Here we have three members:
 90 | 
 91 | * `length` - the length of the array
 92 | * `data` - the `RawArray<T>` being wrapped
 93 | * `borrow` - a flag serving as a runtime borrow check, allowing `RefCell`
 94 |   runtime semantics, since we're in a world of interior mutability patterns
 95 | 
 96 | We have a method to create a new array - `Array::alloc()`
 97 | 
 98 | ```rust,ignore
 99 | impl<T: Sized + Clone> Array<T> {
100 | {{#include ../interpreter/src/array.rs:DefArrayAlloc}}
101 | }
102 | ```
103 | 
104 | In fact we'll extend this pattern of a method named "alloc" to any data
105 | structure for convenience sake.
106 | 
107 | There are many more methods for `Array<T>` and it would be exhausting to be
108 | exhaustive. Let's go over the core methods used to read and write elements
109 | and then an example use case.
110 | 
111 | ### Reading and writing
112 | 
113 | First of all, we need a function that takes an array index and returns a
114 | pointer to a memory location, if the index is within bounds:
115 | 
116 | ```rust,ignore
117 | impl<T: Sized + Clone> Array<T> {
118 | {{#include ../interpreter/src/array.rs:DefArrayGetOffset}}
119 | }
120 | ```
121 | 
122 | There are two bounds checks here - firstly, the index should be within the
123 | (likely non-zero) length values; secondly, the `RawArray<T>` instance
124 | should have a backing array allocated. If either of these checks fail, the
125 | result is an error. If these checks pass, we can be confident that there
126 | is array backing memory and that we can return a valid pointer to somewhere
127 | inside that memory block.
128 | 
129 | For reading a value in an array, we need two methods:
130 | 
131 | 1. one that handles move/copy semantics and returns a value
132 | 2. one that handles reference semantics and returns a reference to the original
133 |    value in it's location in the backing memory
134 | 
135 | First, then:
136 | 
137 | ```rust,ignore
138 | impl<T: Sized + Clone> Array<T> {
139 | {{#include ../interpreter/src/array.rs:DefArrayRead}}
140 | }
141 | ```
142 | 
143 | and secondly:
144 | 
145 | ```rust,ignore
146 | impl<T: Sized + Clone> Array<T> {
147 | {{#include ../interpreter/src/array.rs:DefArrayReadRef}}
148 | }
149 | ```
150 | 
151 | Writing, or copying, an object to an array is implemented as simply as follows:
152 | 
153 | ```rust,ignore
154 | impl<T: Sized + Clone> Array<T> {
155 | {{#include ../interpreter/src/array.rs:DefArrayReadRef}}
156 | }
157 | ```
158 | 
159 | These simple functions should only be used internally by `Array<T>` impl
160 | methods. We have numerous methods that wrap the above in more appropriate
161 | semantics for values of `T` in `Array<T>`.
162 | 
163 | ### The Array interfaces
164 | 
165 | To define the interfaces to the Array, and other collection types, we define a
166 | number of traits. For example, a collection that behaves as a stack implements
167 | `StackContainer`; a numerically indexable type implements `IndexedContainer`,
168 | and so on. As we'll see, there is some nuance, though, when it comes to a
169 | difference between collections of non-pointer types and collections of pointer
170 | types.
171 | 
172 | For our example, we will describe the stack interfaces of `Array<T>`.
173 | 
174 | First, the general case trait, with methods for accessing values stored in the
175 | array (non-pointer types):
176 | 
177 | ```rust,ignore
178 | {{#include ../interpreter/src/containers.rs:DefStackContainer}}
179 | ```
180 | 
181 | These are unremarkable functions, by now we're familiar with the references to
182 | `MutatorScope` and `MutatorView` in method parameter lists.
183 | 
184 | In any instance of `Array<T>`, `T` need only implement `Clone` and cannot be
185 | dynamically sized. Thus `T` can be any primitive type or any straightforward
186 | struct.
187 | 
188 | What if we want to store pointers to other objects? For example, if we want a
189 | heterogenous array, such as Python's `List` type, what would we provide in
190 | place of `T`? The answer is to use the `TaggedCellPtr` type. However,
191 | an `Array<TaggedCellPtr`, because we want to interface with pointers and
192 | use the memory access abstractions provided, can be made a little more
193 | ergonomic. For that reason, we have separate traits for containers of type
194 | `Container<TaggedCellPtr`. In the case of the stack interface this looks like:
195 | 
196 | ```rust,ignore
197 | {{#include ../interpreter/src/containers.rs:DefStackAnyContainer}}
198 | ```
199 | 
200 | As you can see, these methods, while for `T = TaggedCellPtr`, provide an
201 | interface based on passing and returning `TaggedScopedPtr`.
202 | 
203 | Let's look at the implementation of one of these methods - `push()`  - for
204 | both `StackContainer` and `StackAnyContainer`.
205 | 
206 | Here's the code for `StackContainer::push()`:
207 | 
208 | ```rust,ignore
209 | impl<T: Sized + Clone> StackContainer<T> for Array<T> {
210 | {{#include ../interpreter/src/array.rs:DefStackContainerArrayPush}}
211 | }
212 | ```
213 | 
214 | In summary, the order of operations is:
215 | 
216 | 1. Check that a runtime borrow isn't in progress. If it is, return an error.
217 | 1. Since we must implement interior mutability, the member `data` of the
218 |    `Array<T>` struct is a `Cell`. We have to `get()` the content in order
219 |    to use it.
220 | 1. We then ask whether the array backing store needs to be grown. If so,
221 |    we resize the `RawArray<T>` and, since it's kept in a `Cell` on `Array<T>`,
222 |    we have to `set()` value back into `data` to save the change.
223 | 1. Now we have an `RawArray<T>` that has enough capacity, the length is
224 |    incremented and the object to be pushed is written to the next memory
225 |    location using the internal `Array<T>::write()` method detailed earlier.
226 | 
227 | Fortunately we can implement `StackAnyContainer::push()` in terms of
228 | `StackContainer::push()`:
229 | 
230 | ```rust,ignore
231 | impl StackAnyContainer for Array<TaggedCellPtr> {
232 | {{#include ../interpreter/src/array.rs:DefStackAnyContainerArrayPush}}
233 | }
234 | ```
235 | 
236 | ### One last thing
237 | 
238 | To more easily differentiate arrays of type `Array<T>` from arrays of type
239 | `Array<TaggedCellPtr>`, we make a type alias `List` where:
240 | 
241 | ```rust,ignore
242 | pub type List = Array<TaggedCellPtr>;
243 | ```
244 | 
245 | 
246 | ## In conclusion
247 | 
248 | We referenced how `Vec` is implemented internally and followed the same pattern
249 | of defining a `RawArray<T>` unsafe layer with a safe `Array<T>` wrapper. Then
250 | we looked into the stack interface for `Array<T>` and the implementation of
251 | `push()`.
252 | 
253 | There is more to arrays, of course - indexed access the most obvious, and also
254 | a few convenience methods. See the source code in `interpreter/src/array.rs`
255 | for the full detail.
256 | 
257 | In the next chapter we'll put `Array<T>` to use in a `Bytecode` type!
258 | 
259 | 
260 | [1]: https://doc.rust-lang.org/nomicon/vec.html
261 | 


--------------------------------------------------------------------------------
/booksrc/chapter-interp-bytecode.md:
--------------------------------------------------------------------------------
  1 | # Bytecode
  2 | 
  3 | In this chapter we will look at a bytecode compilation target. We'll combine
  4 | this with a section on the virtual machine interface to the bytecode data
  5 | structure.
  6 | 
  7 | We won't go much into detail on each bytecode operation, that will be more
  8 | usefully covered in the compiler and virtual machine chapters. Here, we'll
  9 | describe the data structures involved. As such, this will be one of our
 10 | shorter chapters. Let's go!
 11 | 
 12 | 
 13 | ## Design questions
 14 | 
 15 | Now that we're talking bytecode, we're at the point of choosing what type of
 16 | virtual machine we will be compiling for. The most common type is stack-based
 17 | where operands are pushed and popped on and off the stack. This requires
 18 | instructions for pushing and popping, with instructions in-between for operating
 19 | on values on the stack.
 20 | 
 21 | We'll be implementing a register-based VM though. The inspiration for this
 22 | comes from Lua 5[^1] which implements a fixed-width bytecode register VM. While
 23 | stack based VMs are typically claimed to be simpler, we'll see that the Lua
 24 | way of allocating registers per function also has an inherent simplicity and
 25 | has performance gains over a stack VM, at least for an interpreted
 26 | non jit-compiled VM.
 27 | 
 28 | Given register based, fixed-width bytecode, each opcode must reference the
 29 | register numbers that it operates on. Thus, for an (untyped) addition
 30 | operation `x = a + b`, each of `x`, `a` and `b` must be associated with a
 31 | register.
 32 | 
 33 | Following Lua, encoding this as a fixed width opcode typically looks like
 34 | encoding the operator and operands as 8 bit values packed into a 32 bit opcode
 35 | word. That implies, given 8 bits, that there can be a theoretical maximum of
 36 | 256 registers for a function call. For the addition above, this encoding
 37 | might look like this:
 38 | 
 39 | ```ignore
 40 |    32.....24......16.......8.......0
 41 |     [reg a ][reg b ][reg x ][Add   ]
 42 | ```
 43 | 
 44 | where the first 8 bits contain the operator, in this case "Add", and the
 45 | other three 8 bit slots in the 32 bit word each contain a register number.
 46 | 
 47 | For some operators, we will need to encode values larger than 8 bits. As
 48 | we will still need space for an operator and a destination register, that
 49 | leaves a maximum of 16 bits for larger values.
 50 | 
 51 | 
 52 | ## Opcodes
 53 | 
 54 | We have options in how we describe opcodes in Rust.
 55 | 
 56 | 1. Each opcode represented by a u32
 57 |     * Pros: encoding flexibility, it's just a set of bits
 58 |     * Cons: bit shift and masking operations to encode and decode operator
 59 |       and operands. This isn't necessarily a big deal but it doesn't allow
 60 |       us to leverage the Rust type system to avoid encoding mistakes
 61 | 1. Each opcode represented by an enum discriminant
 62 |     * Pros: operators and operands baked as Rust types at compile time, type
 63 |       safe encoding; no bit operations needed
 64 |     * Cons: encoding scheme limited to what an enum can represent
 65 | 
 66 | The ability to leverage the compiler to prevent opcode encoding errors is
 67 | attractive and we won't have any need for complex encodings. We'll use an enum
 68 | to represent all possible opcodes and their operands.
 69 | 
 70 | Since a Rust enum can contain named values within each variant, this is what
 71 | we use to most tightly define our opcodes.
 72 | 
 73 | ### Opcode size
 74 | 
 75 | Since we're using `enum` instead of a directly size-controlled type such as u32
 76 | for our opcodes, we have to be more careful about making sure our opcode type
 77 | doesn't take up more space than is necessary.  32 bits is ideal for reasons
 78 | stated earlier (8 bits for the operator and 8 bits for three operands each.)
 79 | 
 80 | Let's do an experiment.
 81 | 
 82 | First, we need to define a register as an 8 bit value. We'll also define an
 83 | inline literal integer as 16 bits.
 84 | 
 85 | ```rust,ignore
 86 | type Register = u8;
 87 | type LiteralInteger = i16;
 88 | ```
 89 | 
 90 | Then we'll create an opcode enum with a few variants that might be typical:
 91 | 
 92 | ```rust,ignore
 93 | #[derive(Copy, Clone)]
 94 | enum Opcode {
 95 |     Add {
 96 |         dest: Register,
 97 |         a: Register,
 98 |         b: Register
 99 |     },
100 |     LoadLiteral {
101 |         dest: Register,
102 |         value: LiteralInteger
103 |     }
104 | }
105 | ```
106 | 
107 | It should be obvious that with an enum like this we can safely pass compiled
108 | bytecode from the compiler to the VM. It should also be clear that this, by
109 | allowing use of `match` statements, will be very ergonomic to work with.
110 | 
111 | Theoretically, if we never have more than 256 variants, our variants never have
112 | more than 3 `Register` values (or one `Register` and one `LiteralInteger` sized
113 | value), the compiler should be able to pack `Opcode` into 32 bits.
114 | 
115 | Our test: we hope the output of the following code to be `4` - 4 bytes or 32
116 | bits.
117 | 
118 | ```rust,ignore
119 | use std::mem::size_of;
120 | 
121 | fn main() {
122 |     println!("Size of Opcode is {}", size_of::<Opcode>());
123 | }
124 | ```
125 | 
126 | And indeed when we run this, we get `Size of Opcode is 4`!
127 | 
128 | To keep an eye on this situation, we'll put this check into a unit test:
129 | 
130 | ```rust,ignore
131 | {{#include ../interpreter/src/bytecode.rs:DefTestOpcodeIs32Bits}}
132 | ```
133 | 
134 | Now, let's put these `Opcode`s into an array.
135 | 
136 | 
137 | ## An array of Opcode
138 | 
139 | We can define this array easily, given that `Array<T>` is a generic type:
140 | 
141 | ```rust,ignore
142 | {{#include ../interpreter/src/bytecode.rs:DefArrayOpcode}}
143 | ```
144 | 
145 | Is this enough to define bytecode? Not quite. We've accommodated 16 bit
146 | literal signed integers, but all kinds of other types can be literals.
147 | We need some way of referencing any literal type in bytecode. For that
148 | we add a `Literals` type, which is just:
149 | 
150 | ```rust,ignore
151 | {{#include ../interpreter/src/bytecode.rs:DefLiterals}}
152 | ```
153 | 
154 | Any opcode that loads a literal (other than a 16 bit signed integer) will
155 | need to reference an object in the `Literals` list. This is easy enough:
156 | just as there's a `LiteralInteger`, we have `LiteralId` defined as
157 | 
158 | ```rust,ignore
159 | pub type LiteralId = u16;
160 | ```
161 | 
162 | This id is an index into the `Literals` list.  This isn't the most efficient
163 | scheme or encoding, but given a preference for fixed 32 bit opcodes, it will
164 | also keep things simple.
165 | 
166 | The `ByteCode` type, finally, is a composition of `ArrayOpcode` and `Literals`:
167 | 
168 | ```rust,ignore
169 | {{#include ../interpreter/src/bytecode.rs:DefByteCode}}
170 | ```
171 | 
172 | 
173 | ## Bytecode compiler support
174 | 
175 | There are a few methods implemented for `ByteCode`:
176 | 
177 | 1. `fn push<'guard>(&self, mem: &'MutatorView, op: Opcode) -> Result<(), RuntimeError>`
178 |    This function pushes a new opcode into the `ArrayOpcode` instance.
179 | 1. ```rust,ignore
180 |    fn update_jump_offset<'guard>(
181 |        &self,
182 |        mem: &'guard MutatorView,
183 |        instruction: ArraySize,
184 |        offset: JumpOffset,
185 |    ) -> Result<(), RuntimeError>
186 |    ```
187 |    This function, given an instruction index into the `ArrayOpcode` instance,
188 |    and given that the instruction at that index is a type of jump instruction,
189 |    sets the relative jump offset of the instruction to the given offset.
190 |    This is necessary because forward jumps cannot be calculated until all the
191 |    in-between instructions have been compiled first.
192 | 1. ```rust,ignore
193 |    fn push_lit<'guard>(
194 |        &self,
195 |        mem: &'guard MutatorView,
196 |        literal: TaggedScopedPtr
197 |    ) -> Result<LiteralId, RuntimeError>
198 |    ```
199 |    This function pushes a literal on to the `Literals` list and returns the
200 |    index - the id - of the item.
201 | 1. ```rust,ignore
202 |    fn push_loadlit<'guard>(
203 |        &self,
204 |        mem: &'guard MutatorView,
205 |        dest: Register,
206 |        literal_id: LiteralId,
207 |    ) -> Result<(), RuntimeError>
208 |    ```
209 |    After pushing a literal into the `Literals` list, the corresponding load
210 |    instruction should be pushed into the `ArrayOpcode` list.
211 | 
212 | `ByteCode` and it's functions combined with the `Opcode` enum are enough to
213 | build a compiler for.
214 | 
215 | 
216 | ## Bytecode execution support
217 | 
218 | The previous section described a handful of functions for our compiler to use
219 | to build a `ByteCode` structure.
220 | 
221 | We'll need a different set of functions for our virtual machine to access
222 | `ByteCode` from an execution standpoint.
223 | 
224 | The execution view of bytecode is of a contiguous sequence of instructions and
225 | an instruction pointer. We're going to create a separate `ByteCode` instance
226 | for each function that gets compiled, so our execution model will have to
227 | be able to jump between `ByteCode` instances. We'll need a new struct to
228 | represent that:
229 | 
230 | ```rust,ignore
231 | {{#include ../interpreter/src/bytecode.rs:DefInstructionStream}}
232 | ```
233 | 
234 | In this definition, the pointer `instructions` can be updated to point at any
235 | `ByteCode` instance. This allows us to switch between functions by managing
236 | different `ByteCode` pointers as part of a stack of call frames. In support
237 | of this we have:
238 | 
239 | ```rust,ignore
240 | impl InstructionStream {
241 | {{#include ../interpreter/src/bytecode.rs:DefInstructionStreamSwitchFrame}}
242 | }
243 | ```
244 | 
245 | Of course, the main function needed during execution is to retrieve the next
246 | opcode. Ideally, we can keep a pointer that points directly at the next opcode
247 | such that only a single dereference and pointer increment is needed to get
248 | the opcode and advance the instruction pointer. Our implementation is less
249 | efficient for now, requiring a dereference of 1. the `ByteCode` instance and
250 | then 2. the `ArrayOpcode` instance and finally 3. an indexing into the
251 | `ArrayOpcode` instance:
252 | 
253 | ```rust,ignore
254 | {{#include ../interpreter/src/bytecode.rs:DefInstructionStreamGetNextOpcode}}
255 | ```
256 | 
257 | 
258 | ## Conclusion
259 | 
260 | The full `Opcode` definition can be found in `interpreter/src/bytecode.rs`.
261 | 
262 | As we work toward implementing a compiler, the next data structure we need is
263 | a dictionary or hash map. This will also build on the foundational
264 | `RawArray<T>` implementation. Let's go on to that now!
265 | 
266 | 
267 | ---
268 | 
269 | [^1]: Roberto Ierusalimschy et al, [The Implementation of Lua 5.0](https://www.lua.org/doc/jucs05.pdf)
270 | 


--------------------------------------------------------------------------------
/booksrc/chapter-interp-dicts.md:
--------------------------------------------------------------------------------
  1 | # Dicts
  2 | 
  3 | The implementation of dicts, or hash tables, is going to combine a reuse of the
  4 | [RawArray](./chapter-interp-arrays.md)
  5 | type and closely follow the [Crafting Interpreters][1] design:
  6 | 
  7 | * open addressing
  8 | * linear probing
  9 | * FNV hashing
 10 | 
 11 | Go read the corresponding chapter in Crafting Interpreters and then come
 12 | back here. We won't duplicate much of Bob's excellent explanation of the above
 13 | terms and we'll assume you are familiar with his chapter when reading
 14 | ours.
 15 | 
 16 | 
 17 | ## Code design
 18 | 
 19 | A `Dict` in our interpreter will allow any hashable value as a key and any
 20 | type as a value. We'll store pointers to the key and the value together in
 21 | a struct `DictItem`.
 22 | 
 23 | Here, we'll also introduce the single diversion from
 24 | Crafting Interpreters' implementation in that we'll cache the hash value and
 25 | use it as part of a tombstone indicator. This adds an extra word
 26 | per entry but we will also take the stance that if two keys have
 27 | the same hash value then the keys are equal. This simplifies our implementation
 28 | as we won't need to implement object equality comparisons just yet.
 29 | 
 30 | ```rust,ignore
 31 | {{#include ../interpreter/src/dict.rs:DefDictItem}}
 32 | ```
 33 | 
 34 | The `Dict` itself mirrors Crafting Interpreters' implementation of a count of
 35 | used entries and an array of entries. Since tombstones are counted as used
 36 | entries, we'll add a separate `length` that excludes tombstones so we can
 37 | accurately report the number of items in a dict.
 38 | 
 39 | ```rust,ignore
 40 | {{#include ../interpreter/src/dict.rs:DefDict}}
 41 | ```
 42 | 
 43 | 
 44 | ## Hashing
 45 | 
 46 | To implement our compiler we will need to be able to hash the `Symbol` type and
 47 | integers (inline in tagged pointers.)
 48 | 
 49 | The Rust standard library defines trait `std::hash::Hash` that must be
 50 | implemented by types that want to be hashed. This trait requires the type to
 51 | implement method `fn hash<H>(&self, state: &mut H) where H: Hasher`.
 52 | 
 53 | This signature requires a reference to the type `&self` to access it's data.
 54 | In our world, this is insufficient: we also require a `&MutatorScope`
 55 | lifetime to access an object. We will have to wrap `std::hash::Hash` in our
 56 | own trait that extends, essentially the same signature, with this scope
 57 | guard parameter. This trait is named `Hashable`:
 58 | 
 59 | 
 60 | ```rust,ignore
 61 | {{#include ../interpreter/src/hashable.rs:DefHashable}}
 62 | ```
 63 | 
 64 | We can implement this trait for `Symbol` - it's a straightforward wrap of
 65 | calling `Hash::hash()`:
 66 | 
 67 | ```rust,ignore
 68 | {{#include ../interpreter/src/symbol.rs:DefImplHashableForSymbol}}
 69 | ```
 70 | 
 71 | Then finally, because this is all for a dynamically typed interpreter, we'll
 72 | write a function that can take any type - a `TaggedScopedPtr` - and attempt
 73 | to return a 64 bit hash value from it:
 74 | 
 75 | ```rust,ignore
 76 | {{#include ../interpreter/src/dict.rs:DefHashKey}}
 77 | ```
 78 | 
 79 | Now we can take a `Symbol` or a tagged integer and use them as keys in our
 80 | `Dict`.
 81 | 
 82 | 
 83 | ## Finding an entry
 84 | 
 85 | The methods that a dictionary typically provides, lookup, insertion and
 86 | deletion, all hinge around one internal function, `find_entry()`.
 87 | 
 88 | This function scans the internal `RawArray<DictItem>` array for a slot that
 89 | matches the hash value argument. It may find an exact match for an existing
 90 | key-value entry; if it does not, it will return the first available slot for
 91 | the hash value, whether an empty never-before used slot or the tombstone
 92 | entry of a formerly used slot.
 93 | 
 94 | A tombstone, remember, is a slot that previously held a key-value pair but
 95 | has been deleted. These slots must be specially marked so that when searching
 96 | for an entry that generated a hash for an earlier slot but had to be inserted
 97 | at a later slot, we know to keep looking rather than stop searching at the
 98 | empty slot of a deleted entry.
 99 | 
100 | Slot  | Content
101 | ------|--------
102 | n - 1 | empty
103 | n     | X: hash % capacity == n
104 | n + 1 | tombstone
105 | n + 2 | Y: hash % capacity == n
106 | n + 3 | empty
107 | 
108 | For example, in the above table:
109 | 
110 | * Key `X`'s hash maps to slot `n`.
111 | * At some point another entry was inserted at slot `n + 1`.
112 | * Then `Y`, with hash mapping also to slot `n`, was inserted, but had to be
113 |   bumped to slot `n + 2` because the previous two slots were occupied.
114 | * Then the entry at slot `n + 1` was deleted and marked as a tombstone.
115 | 
116 | If slot `n + 1` was simply marked as `empty` after it's occupant was deleted,
117 | then when searching for `Y` we wouldn't know to keep searching and find `Y` in
118 | slot `n + 2`. Hence, deleted entries are marked differently to empty slots.
119 | 
120 | Here is the code for the Find Entry function:
121 | 
122 | ```rust,ignore
123 | {{#include ../interpreter/src/dict.rs:DefFindEntry}}
124 | ```
125 | 
126 | To begin with, it calculates the index in the array from which to start
127 | searching. Then it iterates over the internal array, examining each entry's
128 | hash and key as it goes.
129 | 
130 | * The first tombstone that is encountered is saved. This may turn out to be the
131 |   entry that should be returned if an exact hash match isn't found by the time
132 |   a never-before used slot is reached. We want to reuse tombstone entries, of
133 |   course.
134 | * If no tombstone was found and we reach a never-before used slot, return
135 |   that slot.
136 | * If an exact match is found, return that slot of course.
137 | 
138 | 
139 | ## The external API
140 | 
141 | Just as we defined some conainer traits for `Array<T>` to define access to
142 | arrays based on stack or indexed style access, we'll define a container trait
143 | for `Dict`:
144 | 
145 | ```rust,ignore
146 | {{#include ../interpreter/src/containers.rs:DefHashIndexedAnyContainer}}
147 | ```
148 | 
149 | This trait contains the external API that `Dict` will expose for managing
150 | keys and values. The implementation of each of these methods will be in terms
151 | of the `find_entry()` function described above. Let's look at a couple of the
152 | more complex examples, `assoc()` and `dissoc()`.
153 | 
154 | ### assoc
155 | 
156 | ```rust,ignore
157 | impl HashIndexedAnyContainer for Dict {
158 | {{#include ../interpreter/src/dict.rs:DefHashIndexedAnyContainerForDictAssoc}}
159 | }
160 | ```
161 | 
162 | ### dissoc
163 | 
164 | ```rust,ignore
165 | impl HashIndexedAnyContainer for Dict {
166 | {{#include ../interpreter/src/dict.rs:DefHashIndexedAnyContainerForDictDissoc}}
167 | }
168 | ```
169 | 
170 | As you can see, once `find_entry()` is implemented as a separate function,
171 | these methods become fairly easy to comprehend.
172 | 
173 | 
174 | ## Conclusion
175 | 
176 | If you _haven't_ read Bob Nystron's chapter on [hash tables][1] in Crafting 
177 | Interpreters we encourage you to do so: it will help make sense of this 
178 | chapter.
179 | 
180 | Now, we'll transition to some compiler and virtual machine design before
181 | we continue with code implementation.
182 | 
183 |  
184 | [1]: http://craftinginterpreters.com/hash-tables.html
185 | 


--------------------------------------------------------------------------------
/booksrc/chapter-interp-parsing.md:
--------------------------------------------------------------------------------
  1 | # Parsing s-expressions
  2 | 
  3 | We'll make this quick. It's not the main focus of this book and the topic is
  4 | better served by seeking out other resources that can do it justice.
  5 | 
  6 | In service of keeping it short, we're parsing s-expressions and we'll start
  7 | by considering only symbols and parentheses. We could hardly make it simpler.
  8 | 
  9 | 
 10 | ## The interface
 11 | 
 12 | The interface we want should take a `&str` and return a `TaggedScopedPtr`.
 13 | We want the tagged version of the scoped ptr because the return value might
 14 | point to either a `Pair` or a `Symbol`. Examples of valid input are:
 15 | 
 16 | * `a-symbol`: a `Symbol` with name "a-symbol"
 17 | * `(this is a list)`: a linked list of `Pair`s, each with the `first` value
 18 |   pointing to a `Symbol`
 19 | * `(this (is a nested) list)`: a linked list, as above, containing a nested
 20 |   linked list
 21 | * `(this () is a nil symbol)`: the two characters `()` together are equivalent
 22 |   to the special symbol `nil`, also the value `0` in our `TaggedPtr` type
 23 | * `(one . pair)`: a single `Pair` instance with `first` pointing at the `Symbol`
 24 |   for "one" and `second` at the `Symbol` for "two"
 25 | 
 26 | Our internal implementation is split into tokenizing and then parsing the
 27 | token stream. Tokenizing takes the `&str` input and returns a `Vec<Token>`
 28 | on success:
 29 | 
 30 | ```rust,ignore
 31 | fn tokenize(input: &str) -> Result<Vec<Token>, RuntimeError>;
 32 | ```
 33 | 
 34 | The return `Vec<Token>` is an intermediate, throwaway value, and does not
 35 | interact with our Sticky Immix heap. Parsing takes the `Vec<Token>` and
 36 | returns a `TaggedScopedPtr` on success:
 37 | 
 38 | ```rust,ignore
 39 | fn parse_tokens<'guard>(
 40 |     mem: &'guard MutatorView,
 41 |     tokens: Vec<Token>,
 42 | ) -> Result<TaggedScopedPtr<'guard>, RuntimeError>;
 43 | ```
 44 | 
 45 | 
 46 | ## Tokens, a short description
 47 | 
 48 | The full set of tokens we will consider parsing is:
 49 | 
 50 | ```rust,ignore
 51 | {{#include ../interpreter/src/lexer.rs:DefTokenType}}
 52 | ```
 53 | 
 54 | We combine this enum with a source input position indicator to compose the
 55 | `Token` type. This source position is defined as:
 56 | 
 57 | ```rust,ignore
 58 | {{#include ../interpreter/src/error.rs:DefSourcePos}}
 59 | ```
 60 | 
 61 | And whenever it is available to return as part of an error, error messages can
 62 | be printed with the relevant source code line.
 63 | 
 64 | The `Token` type;
 65 | 
 66 | ```rust,ignore
 67 | {{#include ../interpreter/src/lexer.rs:DefToken}}
 68 | ```
 69 | 
 70 | 
 71 | ## Parsing, a short description
 72 | 
 73 | The key to quickly writing a parser in Rust is the `std::iter::Peekable`
 74 | iterator which can be obtained from the `Vec<Token>` instance with
 75 | `tokens.iter().peekable()`. This iterator has a `peek()` method that allows
 76 | you to look at the next `Token` instance without advancing the iterator.
 77 | 
 78 | Our parser, a hand-written recursive descent parser, uses this iterator type
 79 | to look ahead to the next token to identify primarily whether the next token
 80 | is valid in combination with the current token, or to know how to recurse
 81 | next without consuming the token yet.
 82 | 
 83 | For example, an open paren `(` followed by a symbol would start a new `Pair`
 84 | linked list, recursing into a new parser function call, but if it is
 85 | immediately followed by a close paren `)`, that is `()`, it is equivalent to
 86 | the symbol `nil`, while otherwise `)` _terminates_ a `Pair` linked list and
 87 | causes the current parsing function instance to return.
 88 | 
 89 | Another case is the `.` operator, which is only valid in the following pattern:
 90 | `(a b c . d)` where `a`, `b`, `c`, and `d` must be symbols or nested lists.
 91 | A `.` must be followed by a single expression followed by a `)`.
 92 | 
 93 | Tokenizing and parsing are wrapped in a function that takes the input `&str`
 94 | and gives back the `TaggedScopedPtr`:
 95 | 
 96 | ```rust,ignore
 97 | {{#include ../interpreter/src/parser.rs:DefParse}}
 98 | ```
 99 | 
100 | Notice that this function and `parse_tokens()` require the
101 | `mem: &'guard MutatorView` parameter. Parsing creates `Symbol` and `Pair`
102 | instances in our Sticky Immix heap and so requires the scope-restricted
103 | `MutatorView` instance.
104 | 
105 | This is all we'll say on parsing s-expressions. In the next chapter we'll do
106 | something altogether more informative with regards to memory management
107 | and it'll be necessary by the time we're ready to compile: arrays!
108 | 


--------------------------------------------------------------------------------
/booksrc/chapter-interp-symbols-and-pairs.md:
--------------------------------------------------------------------------------
  1 | # Symbols and Pairs
  2 | 
  3 | To bootstrap our compiler, we'll parse s-expressions into `Symbol` ad `Pair`
  4 | types, where a `Pair` is essentially a Lisp cons cell.
  5 | 
  6 | The definition of `Symbol` is just the raw components of a `&str`:
  7 | 
  8 | ```rust,ignore
  9 | {{#include ../interpreter/src/symbol.rs:DefSymbol}}
 10 | ```
 11 | 
 12 | Why this is how `Symbol` is defined and how we handle these raw components will
 13 | be covered in just a bit. First though, we'll delve into the `Pair` type.
 14 | 
 15 | 
 16 | ## Pairs of pointers
 17 | 
 18 | The definition of `Pair` is
 19 | 
 20 | ```rust,ignore
 21 | {{#include ../interpreter/src/pair.rs:DefPair}}
 22 | ```
 23 | 
 24 | The type of `first` and `second` is `TaggedCellPtr`, as seen in the previous
 25 | chapter. This pointer type can point at any dynamic type. By the
 26 | end of this chapter we'll be able to build a nested linked list of `Pair`s
 27 | and `Symbol`s.
 28 | 
 29 | Since this structure will be used for parsing and compiling, the `Pair`
 30 | `struct` has a couple of extra members that optionally describe the source
 31 | code line and character number of the values pointed at by `first` and
 32 | `second`. These will be useful for reporting error messages. We'll come back
 33 | to these in the chapter on parsing.
 34 | 
 35 | To instantiate a `Pair` function with `first` and `second` set to nil, let's
 36 | create a `new()` function:
 37 | 
 38 | ```rust,ignore
 39 | impl Pair {
 40 | {{#include ../interpreter/src/pair.rs:DefPairNew}}
 41 | }
 42 | ```
 43 | 
 44 | That function, as it's not being allocated into the heap, doesn't require the
 45 | lifetime guard. Let's look at a more interesting function: `cons()`, which
 46 | assigns a value to `first` and `second` and puts the `Pair` on to the heap:
 47 | 
 48 | ```rust,ignore
 49 | {{#include ../interpreter/src/pair.rs:DefCons}}
 50 | ```
 51 | 
 52 | Here we have the lifetime `'guard` associated with the `MutatorView` instance
 53 | which grants access to the allocator `alloc_tagged()` method and the getter
 54 | and setter on `TaggedScopedPtr`.
 55 | 
 56 | The other two args, `head` and `rest` are required to share the same `'guard`
 57 | lifetime as the `MutatorView` instance, or rather, `'guard` must at least be
 58 | a subtype of their lifetimes. Their values, of type `TaggedScopedPtr<'guard>`,
 59 | can be written directly to the `first` and `second` members of `Pair` with
 60 | the setter `TaggedCellPtr::set()`.
 61 | 
 62 | We'll also add a couple `impl` methods for appending an object to a `Pair`
 63 | in linked-list fashion:
 64 | 
 65 | ```rust,ignore
 66 | impl Pair {
 67 | {{#include ../interpreter/src/pair.rs:DefPairAppend}}
 68 | }
 69 | ```
 70 | 
 71 | This method, given a value to append, creates a new `Pair` whose member `first`
 72 | points at the value, then sets the `second` of the `&self` `Pair` to that new
 73 | `Pair` instance. This is in support of s-expression notation `(a b)` which
 74 | describes a linked-list of `Pair`s arranged, in pseudo-Rust:
 75 | 
 76 | ```
 77 | Pair {
 78 |     first: a,
 79 |     second: Pair {
 80 |         first: b,
 81 |         second: nil,
 82 |     },
 83 | }
 84 | ```
 85 | 
 86 | The second method is for directly setting the value of the `second` for
 87 | s-expression dot-notation style: `(a . b)` is represented by `first` pointing
 88 | at `a`, dotted with `b` which is pointed at by `second`. In our pseudo
 89 | representation:
 90 | 
 91 | ```
 92 | Pair {
 93 |     first: a,
 94 |     second: b,
 95 | }
 96 | ```
 97 | 
 98 | The implementation is simply:
 99 | 
100 | ```rust,ignore
101 | impl Pair {
102 | {{#include ../interpreter/src/pair.rs:DefPairDot}}
103 | }
104 | ```
105 | 
106 | The only other piece to add, since `Pair` must be able to be passed into
107 | our allocator API, is the `AllocObject` impl for `Pair`:
108 | 
109 | ```rust,ignore
110 | impl AllocObject<TypeList> for Pair {
111 |     const TYPE_ID: TypeList = TypeList::Pair;
112 | }
113 | ```
114 | 
115 | This impl pattern will repeat for every type in `TypeList` so it'll be a great
116 | candidate for a macro.
117 | 
118 | And that's it! We have a cons-cell style `Pair` type and some elementary
119 | methods for creating and allocating them.
120 | 
121 | Now, back to `Symbol`, which seems like it should be even simpler, but as we'll
122 | see has some nuance to it.
123 | 
124 | 
125 | ## Symbols and pointers
126 | 
127 | Let's recap the definition of `Symbol` and that it is the raw members of a
128 | `&str`:
129 | 
130 | ```rust,ignore
131 | {{#include ../interpreter/src/symbol.rs:DefSymbol}}
132 | ```
133 | 
134 | By this definition, a symbol has a name string, but does not own the string
135 | itself. What means this?
136 | 
137 | Symbols are in fact pointers to interned strings. Since each symbol points
138 | to a unique string, we can identify a symbol by it's pointer value rather than
139 | needing to look up the string itself.
140 | 
141 | However, symbols do need to be discovered by their string name, and symbol
142 | pointers must dereference to return their string form. i.e. a we need a
143 | bidirectional mapping of string to pointer and pointer to string.
144 | 
145 | In our implementation, we use a `HashMap<String, RawPtr<Symbol>>` to map from
146 | name strings to symbol pointers, while the `Symbol` object itself points back
147 | to the name string.
148 | 
149 | This is encapsulated in a `SymbolMap` struct:
150 | 
151 | ```rust,ignore
152 | {{#include ../interpreter/src/symbolmap.rs:DefSymbolMap}}
153 | ```
154 | 
155 | where we use `RefCell` to wrap operations in interior mutability, just like
156 | all other allocator functionality.
157 | 
158 | The second struct member `Arena` requires further explanation: since symbols are
159 | unique strings that can be identified and compared by their pointer values,
160 | these pointer values must remain static throughout the program lifetime.
161 | Thus, `Symbol` objects cannot be managed by a heap that might perform object
162 | relocation. We need a separate heap type for objects that are never
163 | moved or freed unil the program ends, the `Arena` type.
164 | 
165 | The `Arena` type is simple. It, like `Heap`, wraps `StickyImmixHeap` but
166 | unlike `Heap`, it will never run garbage collection.
167 | 
168 | ```rust,ignore
169 | {{#include ../interpreter/src/arena.rs:DefArena}}
170 | ```
171 | 
172 | The `ArenaHeader` is a simple object header type to fulfill the allocator
173 | API requirements but whose methods will never be needed.
174 | 
175 | Allocating a `Symbol` will use the `Arena::alloc()` method which calls through
176 | to the `StickyImmixHeap` instance.
177 | 
178 | We'll add a method for getting a `Symbol` from it's name string to the
179 | `SymbolMap` at the allocator API level:
180 | 
181 | ```rust,ignore
182 | impl SymbolMap {
183 | {{#include ../interpreter/src/symbolmap.rs:DefSymbolMapLookup}}
184 | }
185 | ```
186 | 
187 | Then we'll add wrappers to the `Heap` and `MutatorView` impls to scope-restrict
188 | access:
189 | 
190 | ```rust,ignore
191 | impl Heap {
192 | {{#include ../interpreter/src/memory.rs:DefHeapLookupSym}}
193 | }
194 | ```
195 | 
196 | and
197 | 
198 | ```rust,ignore
199 | impl<'memory> MutatorView<'memory> {
200 | {{#include ../interpreter/src/memory.rs:DefMutatorViewLookupSym}}
201 | }
202 | ```
203 | 
204 | This scope restriction is absolutely necessary, despite these objects never
205 | being freed or moved during runtime. This is because `Symbol`, as a standalone
206 | struct, remains unsafe to use with it's raw `&str` components. These components
207 | can only safely be accessed when there is a guarantee that the backing
208 | `Hashmap` is still in existence, which is only when the `MutatorView` is
209 | accessible.
210 | 
211 | Two methods on `Symbol` guard access to the `&str`, one unsafe to reassemble
212 | the `&str` from raw components, the other safe when given a `MutatorScope`
213 | guard instance.
214 | 
215 | ```rust,ignore
216 | impl Symbol {
217 | {{#include ../interpreter/src/symbol.rs:DefSymbolUnguardedAsStr}}
218 | 
219 | {{#include ../interpreter/src/symbol.rs:DefSymbolAsStr}}
220 | }
221 | ```
222 | 
223 | Finally, to make `Symbol`s allocatable in the Sticky Immix heap, we need to
224 | implement `AllocObject` for it:
225 | 
226 | ```rust,ignore
227 | impl AllocObject<TypeList> for Symbol {
228 |     const TYPE_ID: TypeList = TypeList::Symbol;
229 | }
230 | ```
231 | 
232 | 
233 | ## Moving on swiftly
234 | 
235 | Now we've got the elemental pieces of s-expressions, lists and symbols, we can
236 | move on to parsing s-expression strings.
237 | 
238 | Since the focus of this book is the underlying mechanisms of memory management
239 | in Rust and the details of runtime implementation, parsing will receive less
240 | attention. We'll make it quick!
241 | 


--------------------------------------------------------------------------------
/booksrc/chapter-interp-vm-design.md:
--------------------------------------------------------------------------------
  1 | # Virtual Machine: Architecture and Design
  2 | 
  3 | In this short chapter we will outline our virtual machine design choices. These
  4 | are substantially a matter of pragmatic dynamic language implementation points
  5 | and as such, borrow heavily from uncomplicated prior work such as Lua 5 and 
  6 | Crafting Interpreters.
  7 | 
  8 | 
  9 | ## Bytecode
 10 | 
 11 | We already discussed our Lua-inspired bytecode in a [previous
 12 | chapter](./chapter-interp-bytecode.md). To recap, we are using 32 bit
 13 | fixed-width opcodes with space for 8 bit register identifiers and 16 bit
 14 | literals.
 15 | 
 16 | 
 17 | ## The stack
 18 | 
 19 | Following the example of [Crafting Interpreters][1] we'll maintain two separate
 20 | stack data structures:
 21 | 
 22 | * the register stack for storing stack values
 23 | * the call frame stack
 24 | 
 25 | In our case, these are best separated out because the register stack will be
 26 | composed entirely of `TaggedCellPtr`s.
 27 | 
 28 | To store call frames on the register stack we would have to either:
 29 | 
 30 | 1. allocate every stack frame on the heap with pointers to them from the
 31 |    register stack 
 32 | 2. or coerce a call frame `struct` type into the register stack type
 33 | 
 34 | Neither of these is attractive so we will maintain the call frame stack as an
 35 | independent data structure.
 36 | 
 37 | ### The register stack
 38 | 
 39 | The register stack is a homogeneous array of `TaggedCellPtr`s. Thus, no object
 40 | is allocated directly on the stack, all objects are heap allocated and the stack
 41 | only consists of pointers to heap objects. The exception is literal integers
 42 | that fit within the range allowed by a tagged pointer.
 43 | 
 44 | Since this is a register virtual machine, not following stack push and pop
 45 | semantics, and bytecode operands are limited to 8 bit register indexes, a
 46 | function is limited to addressing a maximum of 256 contiguous registers. 
 47 | 
 48 | Due to function call nesting, the register stack may naturally grow much more
 49 | than a length of 256. 
 50 | 
 51 | This requires us to implement a sliding window into the register stack which
 52 | will move as functions are called and return. The call frame stack will contain
 53 | the stack base pointer for each function call. We can then happily make use a
 54 | Rust slice to implement the window of 256 contiguous stack slots which a
 55 | function call is limited to.
 56 | 
 57 | ### The call frame stack
 58 | 
 59 | A call frame needs to store three critical data points:
 60 | 
 61 | * a pointer to the function being executed
 62 | * the return instruction pointer when a nested function is called
 63 | * the stack base pointer for the function call
 64 | 
 65 | These three items can form a simple struct and we can define an
 66 | `Array<CallFrame>` type for optimum performance.
 67 | 
 68 | 
 69 | ## Global values
 70 | 
 71 | To store global values, we have all we need: the `Dict` type that maps `Symbol`s
 72 | to another value. The VM will, of course, have an abstraction over the internal
 73 | `Dict` to enforce `Symbol`s only as keys.
 74 | 
 75 | 
 76 | ## Closures
 77 | 
 78 | In the classic upvalues implementation from Lua 5, followed also by [Crafting
 79 | Interpreters][2], a linked list of upvalues is used to map stack locations to
 80 | shared variables.
 81 | 
 82 | In every respect but one, our implementation will be similar.
 83 | 
 84 | In our implementation, we'll use the `Dict` type that we already have available
 85 | to do this mapping of stack locations to shared variables. 
 86 | 
 87 | As the language and compiler will implement lexical scoping, the compiler will
 88 | have static knowledge of the _relative_ stack locations of closed-over variables
 89 | and can generate the appropriate bytecode operands for the virtual machine to
 90 | calculate the absolute stack locations at runtime. Thus, absolute stack
 91 | locations can be mapped to `Upvalue` objects and so a `Dict` can be employed to
 92 | facilitate the mapping. This obviates the need to implement a linked list data
 93 | structure.
 94 | 
 95 | The compiler must issue instructions to tell the VM when to make a closure data
 96 | structure. It can do so, of course, because simple analysis shows whether
 97 | a function references nonlocal bindings. A closure data structure as generated
 98 | by the compiler must reference the function that will be called and the list of
 99 | relative stack locations that correspond to each nonlocal binding. 
100 | 
101 | The VM, when executing the instruction to make a closure, will calculate the
102 | absolute stack locations for each nonlocal binding and create the closure
103 | environment - a `List<Upvalue>`. VM instructions within the function code, as in
104 | Lua, indirectly reference nonlocal bindings by indexing into this environment.
105 | 
106 | 
107 | ## Partial functions
108 | 
109 | Here is one point where we will introduce a less common construct in our virtual
110 | machine. Functions will be first class, that is they are objects that can be
111 | passed around as values and arguments. On top of that, we'll allow passing
112 | insufficient arguments to a function when it is called. The return value of
113 | such an operation will, instead of an error, be a `Partial` instance. This value
114 | must carry with it the arguments given and a pointer to the function waiting to
115 | be called.
116 | 
117 | This is insufficient for a fully featured currying implementation but is an
118 | interesting extension to first class functions, especially as it allows us to
119 | not _require_ lambdas to be constructed syntactically every time they might be
120 | used.
121 | 
122 | By that we mean the following: if we have a function `(def mul (x y) (* x y))`,
123 | to turn that into a function that multiplies a number by 3 we'd normally have to
124 | define a second function, or lambda, `(lambda (x) (mul x 3))` and call it
125 | instead. However, with a simple partial function implementation we can avoid the
126 | lambda definition and call `(mul 3)` directly, which will collect the function
127 | pointer for `mul` and argument `3` into a `Partial` and wait for the final
128 | argument before calling into the function `mul` with both required arguments.
129 | 
130 | > ***Note:*** We can use the same struct for both closures and partial
131 | > functions. A closure is a yet-to-be-called function carrying a list of
132 | > references to values. or a list of values. A partial is a yet-to-be-called
133 | > function carrying a list of arguments. They look very similar, and it's
134 | > possible, of course, to partially apply arguments to a closure.
135 | 
136 | 
137 | ## Instruction dispatch
138 | 
139 | In dispatch, one optimal outcome is to minimize the machine code overhead
140 | between each VM instruction code.  This overhead, where the next VM instruction
141 | is fetched, decoded and mapped to the entry point of the instruction code, is
142 | the dispatch code.  The other axis of optimization is code ergonomics.
143 | 
144 | Prior [research][3] into implementing dispatch in Rust concludes that simple
145 | switch-style dispatch is the only cross-platform construct we can reasonably
146 | make use of. Other mechanisms come with undesirable complexity or are platform
147 | dependent. For the most part, with modern CPU branch prediction, the overhead
148 | of switch dispatch is small.
149 | 
150 | What this looks like: a single `match` expression with a pattern to represent
151 | each bytecode discriminant, all wrapped in a loop. To illustrate:
152 | 
153 | ```rust,ignore
154 | loop {
155 |     let opcode = get_next_opcode();
156 |     match opcode {
157 |         Opcode::Add(a, x, y) => { ... },
158 |         Opcode::Call(f, r, p) => { ... },
159 |     }
160 | }
161 | ```
162 | 
163 | 
164 | ## That's it!
165 | 
166 | Next we'll look at the counterpart of VM design - compiler design.
167 | 
168 | 
169 | [1]: http://craftinginterpreters.com/calls-and-functions.html#call-frames
170 | [2]: http://craftinginterpreters.com/closures.html
171 | [3]: https://pliniker.github.io/post/dispatchers/
172 | 


--------------------------------------------------------------------------------
/booksrc/chapter-interp-vm-impl.md:
--------------------------------------------------------------------------------
  1 | # Virtual Machine: Implementation
  2 | 
  3 | In this chapter we'll dive into some of the more interesting and important
  4 | implementation details of our virtual machine.
  5 | 
  6 | To begin with, we'll lay out a struct for a single thread of execution. This
  7 | struct should contain everything needed to execute the output of the compiler.
  8 | 
  9 | ```rust,ignore
 10 | {{#include ../interpreter/src/vm.rs:DefThread}}
 11 | ```
 12 | 
 13 | Here we see every data structure needed to represent:
 14 | 
 15 | - function call frames
 16 | - stack values
 17 | - closed-over stack values (Upvalues)
 18 | - global values
 19 | - bytecode to execute
 20 | 
 21 | The VM's primary operation is to iterate through instructions, executing each
 22 | in sequence. The outermost control struture is, therefore, a loop containing
 23 | a `match` expression.
 24 | 
 25 | Here is a code extract of the opening lines of this match operation. The
 26 | function shown is a member of the `Thread` struct. It evaluates the next
 27 | instruction and is called in a loop by an outer function. We'll look at several
 28 | extracts from this function in this chapter.
 29 | 
 30 | ```rust,ignore
 31 | {{#include ../interpreter/src/vm.rs:ThreadEvalNextInstr}}
 32 | 
 33 |                 ...
 34 | ```
 35 | 
 36 | The function obtains a slice view of the register stack, then narrows that down
 37 | to a 256 register window for the current function.
 38 | 
 39 | Then it fetches the next opcode and using `match`, decodes it.
 40 | 
 41 | Let's take a closer look at the stack.
 42 | 
 43 | 
 44 | ## The stack
 45 | 
 46 | While some runtimes and compilers, particularly low-level languages, have a
 47 | single stack that represents both function call information and local variables,
 48 | our high-level runtime splits the stack into:
 49 | 
 50 | 1. a stack of `CallFrame` objects containing function call and return
 51 |    information
 52 | 2. and a register stack for local variables.
 53 | 
 54 | Let's look at each in turn.
 55 | 
 56 | ### The register stack
 57 | 
 58 | In our `Thread` struct, the register stack is represented by the two members:
 59 | 
 60 | ```rust,ignore
 61 | pub struct Thread {
 62 |     ...
 63 |     stack: CellPtr<List>,
 64 |     stack_base: Cell<ArraySize>,
 65 |     ...
 66 | }
 67 | ```
 68 | 
 69 | Remember that the `List` type is defined as `Array<TaggedCellPtr>` and is
 70 | therefore an array of tagged pointers. Thus, the register stack is a homogenous
 71 | array of word sized values that are pointers to objects on the heap or values
 72 | that can be inlined in the tagged pointer word.
 73 | 
 74 | We also have a `stack_base` variable to quickly retrieve the offset into `stack`
 75 | that indicates the beginning of the window of 256 registers that the current
 76 | function has for it's local variables.
 77 | 
 78 | ### The call frame stack
 79 | 
 80 | In our `Thread` struct, the call frame stack is represented by the members:
 81 | 
 82 | ```rust,ignore
 83 | pub struct Thread {
 84 |     ...
 85 |     frames: CellPtr<CallFrameList>,
 86 |     instr: CellPtr<InstructionStream>,
 87 |     ...
 88 | }
 89 | ```
 90 | 
 91 | A `CallFrame` and an array of them are defined as:
 92 | 
 93 | ```rust,ignore
 94 | {{#include ../interpreter/src/vm.rs:DefCallFrame}}
 95 | 
 96 | {{#include ../interpreter/src/vm.rs:DefCallFrameList}}
 97 | ```
 98 | 
 99 | A `CallFrame` contains all the information needed to resume a function when
100 | a nested function call returns:
101 | 
102 | * a `Function` object, which references the `Bytecode` comprising the
103 |   function
104 | * the return instruction pointer
105 | * the stack base index for the function's stack register window
106 | 
107 | On every function call, a `CallFrame` instance is pushed on to the `Thread`'s
108 | `frames` stack and on every return from a function, the top `CallFrame` is
109 | popped off the stack.
110 | 
111 | Additionally, we keep a pointer to the current executing function (the function
112 | represented by the top `CallFrame`) with the member `instr:
113 | CellPtr<InstructionStream>`.
114 | 
115 | For a review of the definition of `InstructionStream` see the
116 | [bytecode](./chapter-interp-bytecode.md) chapter where we defined it as
117 | a pair of values - a `ByteCode` reference and a pointer to the next `Opcode`
118 | to fetch.
119 | 
120 | The VM keeps the `InstructionStream` object pointing at the same `ByteCode`
121 | object as is pointed at by the `Function` in the `CallFrame` at the top of
122 | the call frame stack. Thus, when a call frame is popped off the stack, the
123 | `InstructionStream` is updated with the `ByteCode` and instruction pointer
124 | from the `CallFrame` at the new stack top; and similarly when a function
125 | is called _into_ and a new `CallFrame` is pushed on to the stack.
126 | 
127 | 
128 | ## Functions and function calls
129 | 
130 | ### Function objects
131 | 
132 | Since we've mentioned `Function` objects above, let's now have a look at the
133 | definition.
134 | 
135 | ```rust,ignore
136 | {{#include ../interpreter/src/function.rs:DefFunction}}
137 | ```
138 | 
139 | Instances of `Function` are produced by the compiler, one for each function
140 | definition that is compiled, including nested function definitions.
141 | 
142 | A `Function` object is a simple collection of values, some of which may be
143 | `nil`. Any member represented by a `TaggedCellPtr` may, of course, contain
144 | a `nil` value.
145 | 
146 | Thus the function may be anonymous, represented by a `nil` name value.
147 | 
148 | While the function name is optional, the parameter names are always included.
149 | Though they do not need to be known in order to execute the function, they are
150 | useful for representing the function in string form if the programmer needs to
151 | introspect a function object.
152 | 
153 | Members that are _required_ to execute the function are the arity, the
154 | `ByteCode` and any nonlocal references.
155 | 
156 | Nonlocal references are an optional list of `(relative_stack_frame, register)`
157 | tuples, provided by the compiler, that are needed to locate nonlocal variables
158 | on the register stack. These are, of course, a key component of implementing
159 | closures.
160 | 
161 | We'll talk about closures shortly, but before we do, we'll extend `Function`s
162 | with partial application of arguments.
163 | 
164 | 
165 | ### Partial functions
166 | 
167 | A partial function application takes a subset of the arguments required to
168 | make a function call. These arguments must be stored for later.
169 | 
170 | Thus, a `Partial` object references the `Function` to be called and a list
171 | of arguments to give it when the call is finally executed.
172 | 
173 | Below is the definition of `Partial`. Note that it also contains a possible
174 | closure environment which, again, we'll arrive at momentarily.
175 | 
176 | ```rust,ignore
177 | {{#include ../interpreter/src/function.rs:DefPartial}}
178 | ```
179 | 
180 | The `arity` and `used` members indicate how many arguments are expected and how
181 | many have been given. These are provided directly in this struct rather than
182 | requiring dereferencing the `arity` on the `Function` object and the length of
183 | the `args` list. This is for convenience and performance.
184 | 
185 | Each time more arguments are added to a `Partial`, a new `Partial` instance must
186 | be allocated and the existing arguments copied over. A `Partial` object, once
187 | created, is immutable.
188 | 
189 | 
190 | ### Closures
191 | 
192 | Closures and partial applications have, at an abstract level, something in
193 | common: they both reference values that the function will need when it is
194 | finally called.
195 | 
196 | It's also possible, of course, to have a partially applied closure.
197 | 
198 | We can extend the `Partial` definition with a closure environment so that we can
199 | use the same object type everywhere to represent a function pointer, applied
200 | arguments and closure environment as needed.
201 | 
202 | #### Compiling a closure
203 | 
204 | The compiler, because it keeps track of variable names and scopes, knows when a
205 | `Function` references nonlocal variables. After such a function is defined, the
206 | compiler emits a `MakeClosure` instruction.
207 | 
208 | #### Referencing the stack with upvalues
209 | 
210 | The VM, when it executes `MakeClosure`, creates a new `Partial` object.  It
211 | then iterates over the list of nonlocal references and allocates an `Upvalue`
212 | object for each, which are added to the `env` member on the `Partial` object.
213 | 
214 | The below code extract is from the function `Thread::eval_next_instr()` in
215 | the `MakeClosure` instruction decode and execution block.
216 | 
217 | The two operands of the `MakeClosure` operation - `dest` and `function` - are
218 | registers. `function` points at the `Function` to be given an environment and
219 | made into a closure `Partial` instance; the pointer to this instance will be
220 | written to the `dest` register.
221 | 
222 | ```rust,ignore
223 | {{#include ../interpreter/src/vm.rs:OpcodeMakeClosure}}
224 | ```
225 | 
226 | The `Upvalue` struct itself is defined as:
227 | 
228 | ```rust,ignore
229 | {{#include ../interpreter/src/vm.rs:DefUpvalue}}
230 | ```
231 | 
232 | An `Upvalue` is an object that references an absolute register stack location
233 | (that is the `location` member.)
234 | 
235 | The initial value of `closed` is `false`. In this state, the location on the
236 | stack that contains the variable _must_ be a valid location. That is, the stack
237 | can not have been unwound yet. If the closure is called, `Upvalue`s in this
238 | state are simply an indirection between the function and the variable on the
239 | register stack.
240 | 
241 | The compiler is able to keep track of variables and whether they are closed
242 | over. It emits bytecode instructions to close `Upvalue` objects when variables
243 | on the stack go out of scope.
244 | 
245 | This instruction, `CloseUpvalues`, copies the variable from the register stack
246 | to the `value` member of the `Upvalue` object and sets `closed` to `true`.
247 | 
248 | From then on, when the closure reads or writes to this variable, the value on
249 | the `Upvalue` object is modified rather than the location on the register stack.
250 | 
251 | 
252 | ## Global values
253 | 
254 | ```rust,ignore
255 | pub struct Thread {
256 |     ...
257 |     globals: CellPtr<Dict>,
258 |     ...
259 | }
260 | ```
261 | 
262 | The outermost scope of a program's values and functions are the global values.
263 | We can manage these with an instance of a `Dict`. While a `Dict` can use any
264 | hashable value as a key, internally the VM will only allow `Symbol`s to be
265 | keys. That is, globals must be named objects.
266 | 
267 | # Next...
268 | 
269 | Let's dive into the compiler!
270 | 


--------------------------------------------------------------------------------
/booksrc/chapter-managing-blocks.md:
--------------------------------------------------------------------------------
  1 | # Allocating into Multiple Blocks
  2 | 
  3 | Let's now zoom out of the fractal code soup one level and begin arranging multiple
  4 | blocks so we can allocate - in theory - indefinitely.
  5 | 
  6 | ## Lists of blocks
  7 | 
  8 | We'll need a new struct for wrapping multiple blocks:
  9 | 
 10 | ```rust,ignore
 11 | {{#include ../stickyimmix/src/heap.rs:DefBlockList}}
 12 | ```
 13 | 
 14 | Immix maintains several lists of blocks. We won't include them all in the first
 15 | iteration but in short they are:
 16 | 
 17 | * `free`: a list of blocks that contain no objects. These blocks are held at the
 18 |   ready to allocate into on demand
 19 | * `recycle`: a list of blocks that contain some objects but also at least one
 20 |   line that can be allocated into
 21 | * `large`: not a list of blocks, necessarily, but a list of objects larger than
 22 |   the block size, or some other method of accounting for large objects
 23 | * `rest`: the rest of the blocks that have been allocated into but are not
 24 |   suitable for recycling
 25 | 
 26 | In our first iteration we'll only keep the `rest` list of blocks and two blocks
 27 | to immediately allocate into. Why two? To understand why, we need to understand
 28 | how Immix thinks about object sizes.
 29 | 
 30 | ### Immix and object sizes
 31 | 
 32 | We've seen that there are two numbers that define granularity in Immix: the
 33 | block size and the line size.  These numbers give us the ability to categorize
 34 | object sizes:
 35 | 
 36 | * small: those that (with object header and alignment overhead) fit inside a
 37 |   line
 38 | * medium: those that (again with object header and alignment overhead) are
 39 |   larger than one line but smaller than a block
 40 | * large: those that are larger than a block
 41 | 
 42 | In the previous chapter we described the basic allocation algorithm: when
 43 | an object is being allocated, the current block is scanned for a hole between
 44 | marked lines large enough to allocate into. This does seem like it could
 45 | be inefficient. We could spend a lot of CPU cycles looking for a big enough
 46 | hole, especially for a medium sized object.
 47 | 
 48 | To avoid this, Immix maintains a second block, an overflow block, to allocate
 49 | medium objects into that don't fit the first available hole in the
 50 | main block being allocated into.
 51 | 
 52 | Thus two blocks to immediately allocate into:
 53 | 
 54 | * `head`: the current block being allocated into
 55 | * `overflow`: a block kept handy for writing medium objects into that don't
 56 |   fit the `head` block's current hole
 57 | 
 58 | We'll be ignoring large objects for now and attending only to allocating small
 59 | and medium objects into blocks.
 60 | 
 61 | Instead of recycling blocks with holes, or maintaining a list of pre-allocated
 62 | free blocks, we'll allocate a new block on demand whenever we need more space.
 63 | We'll get to identifying holes and recyclable blocks in a later chapter.
 64 | 
 65 | ### Managing the overflow block
 66 | 
 67 | Generally in our code for this book, we will try to default to not allocating
 68 | memory unless it is needed. For example, when an array is instantiated,
 69 | the backing storage will remain unallocated until a value is pushed on to
 70 | it.
 71 | 
 72 | Thus in the definition of `BlockList`, `head` and `overflow` are `Option`
 73 | types and won't be instantiated except on demand.
 74 | 
 75 | For allocating into the overflow block we'll define a function in the
 76 | `BlockList` impl:
 77 | 
 78 | ```rust,ignore
 79 | impl BlockList {
 80 |     fn overflow_alloc(&mut self, alloc_size: usize) -> Result<*const u8, AllocError> {
 81 |         ...
 82 |     }
 83 | }
 84 | ```
 85 | 
 86 | The input constraint is that, since overflow is for medium objects, `alloc_size`
 87 | must be less than the block size.
 88 | 
 89 | The logic inside will divide into three branches:
 90 | 
 91 | 1. We haven't got an overflow block yet - `self.overflow` is `None`. In this
 92 |    case we have to instantiate a new block (since we're not maintaining
 93 |    a list of preinstantiated free blocks yet) and then, since that block
 94 |    is empty and we have a medium sized object, we can expect the allocation
 95 |    to succeed.
 96 |    ```rust,ignore
 97 |        match self.overflow {
 98 |            Some ...,
 99 |            None => {
100 |                 let mut overflow = BumpBlock::new()?;
101 | 
102 |                 // object size < block size means we can't fail this expect
103 |                 let space = overflow
104 |                     .inner_alloc(alloc_size)
105 |                     .expect("We expected this object to fit!");
106 | 
107 |                 self.overflow = Some(overflow);
108 | 
109 |                 space
110 |             }
111 |        }
112 |    ```
113 | 2. We _have_ an overflow block and the object fits. Easy.
114 |    ```rust,ignore
115 |         match self.overflow {
116 |             // We already have an overflow block to try to use...
117 |             Some(ref mut overflow) => {
118 |                 // This is a medium object that might fit in the current block...
119 |                 match overflow.inner_alloc(alloc_size) {
120 |                     // the block has a suitable hole
121 |                     Some(space) => space,
122 |                     ...
123 |                 }
124 |             },
125 |             None => ...
126 |         }
127 |    ```
128 | 3. We have an overflow block but the object does not fit. Now we simply
129 |    instantiate a _new_ overflow block, adding the old one to the `rest`
130 |    list (in future it will make a good candidate for recycing!). Again,
131 |    since we're writing a medium object into a block, we can expect allocation
132 |    to succeed.
133 |    ```rust,ignore
134 |         match self.overflow {
135 |             // We already have an overflow block to try to use...
136 |             Some(ref mut overflow) => {
137 |                 // This is a medium object that might fit in the current block...
138 |                 match overflow.inner_alloc(alloc_size) {
139 |                     Some ...,
140 |                     // the block does not have a suitable hole
141 |                     None => {
142 |                         let previous = replace(overflow, BumpBlock::new()?);
143 | 
144 |                         self.rest.push(previous);
145 | 
146 |                         overflow.inner_alloc(alloc_size).expect("Unexpected error!")
147 |                     }
148 |                 }
149 |             },
150 |             None => ...
151 |         }
152 |    ```
153 | 
154 | In this logic, the only error can come from failing to create a new block.
155 | 
156 | On success, at this level of interface we continue to return a `*const u8`
157 | pointer to the available space as we're not yet handling the type of the
158 | object being allocated.
159 | 
160 | You may have noticed that the function signature for `overflow_alloc` takes a
161 | `&mut self`.  This isn't compatible with the interior mutability model
162 | of allocation.  We'll have to wrap the `BlockList` struct in another struct
163 | that handles this change of API model.
164 | 
165 | ## The heap struct
166 | 
167 | This outer struct will provide the external crate interface and some further
168 | implementation of block management.
169 | 
170 | The crate interface will require us to consider object headers and so in the
171 | struct definition below there is reference to a generic type `H` that
172 | the _user_ of the heap will define as the object header.
173 | 
174 | ```rust,ignore
175 | {{#include ../stickyimmix/src/heap.rs:DefStickyImmixHeap}}
176 | ```
177 | 
178 | Since object headers are not owned directly by the heap struct, we need a
179 | `PhantomData` instance to associate with `H`.  We'll discuss object headers
180 | in a later chapter.
181 | 
182 | Now let's focus on the use of the `BlockList`.
183 | 
184 | The instance of `BlockList` in the `StickyImmixHeap` struct is wrapped in an
185 | `UnsafeCell` because we need interior mutability. We need to be able to
186 | borrow the `BlockList` mutably while presenting an immutable interface to
187 | the outside world.  Since we won't be borrowing the `BlockList` in multiple
188 | places in the same call tree, we don't need `RefCell` and we can avoid it's
189 | runtime borrow checking.
190 | 
191 | ### Allocating into the head block
192 | 
193 | We've already taken care of the overflow block, now we'll handle allocation
194 | into the `head` block. We'll define a new function:
195 | 
196 | ```rust,ignore
197 | impl StickyImmixHeap {
198 |     fn find_space(
199 |         &self,
200 |         alloc_size: usize,
201 |         size_class: SizeClass,
202 |     ) -> Result<*const u8, AllocError> {
203 |         let blocks = unsafe { &mut *self.blocks.get() };
204 |         ...
205 |     }
206 | }
207 | ```
208 | 
209 | This function is going to look almost identical to the `alloc_overflow()`
210 | function defined earlier. It has more or less the same cases to walk through:
211 | 
212 | 1. `head` block is `None`, i.e. we haven't allocated a head block yet. Allocate
213 |    one and write the object into it.
214 | 2. We have `Some(ref mut head)` in `head`.  At this point we divert from the
215 |    `alloc_overflow()` function and query the size of the object - if this is
216 |    is a medium object and the current hole between marked lines in the `head`
217 |    block is too small, call into `alloc_overflow()` and return.
218 |    ```rust,ignore
219 |                 if size_class == SizeClass::Medium && alloc_size > head.current_hole_size() {
220 |                     return blocks.overflow_alloc(alloc_size);
221 |                 }
222 |    ```
223 |    Otherwise, continue to allocate into `head` and return.
224 | 3. We have `Some(ref mut head)` in `head` but this block is unable to
225 |    accommodate the object, whether medium or small. We must append the current
226 |    head to the `rest` list and create a new `BumpBlock` to allocate into.
227 | 
228 | There is one more thing to mention. What about large objects? We'll cover those
229 | in a later chapter. Right now we'll make it an error to try to allocate a large
230 | object by putting this at the beginning of the `StickyImmixHeap::inner_alloc()`
231 | function:
232 | 
233 | ```rust,ignore
234 |         if size_class == SizeClass::Large {
235 |             return Err(AllocError::BadRequest);
236 |         }
237 | 
238 | ```
239 | 
240 | ## Where to next?
241 | 
242 | We have a scheme for finding space in blocks for small and medium objects
243 | and so, in the next chapter we will define the external interface to the crate.
244 | 


--------------------------------------------------------------------------------
/booksrc/chapter-what-is-alloc.md:
--------------------------------------------------------------------------------
 1 | # The type of allocation
 2 | 
 3 | Before we start writing objects into `Block`s, we need to know the nature of
 4 | the interface in Rust terms.
 5 | 
 6 | If we consider the global allocator in Rust, implicitly available via
 7 | `Box::new()`, `Vec::new()` and so on, we'll notice that since the global
 8 | allocator is available on every thread and allows the creation of new
 9 | objects on the heap (that is, mutation of the heap) from any code location
10 | without needing to follow the rules of borrowing and mutable aliasing,
11 | it is essentially a container that implements `Sync` and the interior
12 | mutability pattern.
13 | 
14 | We need to follow suit, but we'll leave `Sync` for advanced chapters.
15 | 
16 | An interface that satisfies the interior mutability property, by borrowing
17 | the allocator instance immutably, might look like:
18 | 
19 | ```rust,ignore
20 | trait AllocRaw {
21 |     fn alloc<T>(&self, object: T) -> *const T;
22 | }
23 | ```
24 | 
25 | naming it `AllocRaw` because when layering on top of `Block` we'll
26 | work with raw pointers and not concern ourselves with the lifetime of
27 | allocated objects.
28 | 
29 | It will become a little more complex than this but for now, this captures
30 | the essence of the interface.
31 | 


--------------------------------------------------------------------------------
/booksrc/evalrus-medium.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rust-hosted-langs/book/f4dc72f71e119ecdb87d842afa5bf65bb131ecaa/booksrc/evalrus-medium.png


--------------------------------------------------------------------------------
/booksrc/img/alignment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rust-hosted-langs/book/f4dc72f71e119ecdb87d842afa5bf65bb131ecaa/booksrc/img/alignment.png


--------------------------------------------------------------------------------
/booksrc/img/fragmented_block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rust-hosted-langs/book/f4dc72f71e119ecdb87d842afa5bf65bb131ecaa/booksrc/img/fragmented_block.png


--------------------------------------------------------------------------------
/booksrc/img/stickyimmix_block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rust-hosted-langs/book/f4dc72f71e119ecdb87d842afa5bf65bb131ecaa/booksrc/img/stickyimmix_block.png


--------------------------------------------------------------------------------
/booksrc/introduction.md:
--------------------------------------------------------------------------------
 1 | # Writing Interpreters in Rust: a Guide
 2 | 
 3 | ## Welcome!
 4 | 
 5 | In this book we will walk through the basics of interpreted language
 6 | implementation in Rust with a focus on the challenges that are specific
 7 | to using Rust.
 8 | 
 9 | At a glance, these are:
10 | 
11 | * A custom allocator for use in an interpreter
12 | * A safe-Rust wrapper over allocation
13 | * A compiler and VM that interact with the above two layers
14 | 
15 | The goal of this book is not to cover a full featured language but rather to
16 | provide a solid foundation on which you can build further features. Along
17 | the way we'll implement as much as possible in terms of our own memory
18 | management abstractions rather than using Rust std collections.
19 | 
20 | ### Level of difficulty
21 | 
22 | Bob Nystrom's [Crafting Interpreters](http://craftinginterpreters.com/)
23 | is recommended _introductory_ reading to this book for beginners to the topic.
24 | Bob has produced a high quality, accessible work and while there is
25 | considerable overlap, in some ways this book builds on Bob's work with some
26 | additional complexity, optimizations and discussions of Rust's safe vs unsafe.
27 | 
28 | **We hope you find this book to be informative!**
29 | 
30 | 
31 | ## Further reading and other projects to study:
32 | 
33 | All the links below are acknowledged as inspiration or prior art.
34 | 
35 | ### Interpreters
36 | 
37 | * Bob Nystrom's [Crafting Interpreters](http://craftinginterpreters.com/)
38 | * [The Inko programming language](https://inko-lang.org/)
39 | * kyren - [luster](https://github.com/kyren/luster) and [gc-arena](https://github.com/kyren/gc-arena)
40 | 
41 | ### Memory management
42 | 
43 | * Richard Jones, Anthony Hosking, Elliot Moss - [The Garbage Collection Handbook](http://gchandbook.org/)
44 | * Stephen M. Blackburn & Kathryn S. McKinley -
45 |   [Immix: A Mark-Region Garbage Collector with Space Efficiency, Fast Collection, and Mutator Performance](http://users.cecs.anu.edu.au/~steveb/pubs/papers/immix-pldi-2008.pdf)
46 | * Felix S Klock II - [GC and Rust Part 0: Garbage Collection Background](http://blog.pnkfx.org/blog/2015/10/27/gc-and-rust-part-0-how-does-gc-work/)
47 | * Felix S Klock II - [GC and Rust Part 1: Specifying the Problem](http://blog.pnkfx.org/blog/2015/11/10/gc-and-rust-part-1-specing-the-problem/)
48 | * Felix S Klock II - [GC and Rust Part 2: The Roots of the Problem](http://blog.pnkfx.org/blog/2016/01/01/gc-and-rust-part-2-roots-of-the-problem/)
49 | 


--------------------------------------------------------------------------------
/booksrc/part-allocators.md:
--------------------------------------------------------------------------------
1 | # Allocators
2 | 
3 | This section gives an overview and implementation detail of allocating blocks
4 | of memory.
5 | 
6 | _What this is not: a custom allocator to replace the global Rust allocator_
7 | 


--------------------------------------------------------------------------------
/booksrc/part-interpreter.md:
--------------------------------------------------------------------------------
 1 | # An interpreter: Eval-rs
 2 | 
 3 | In this part of the book we'll dive into creating:
 4 | * a safe Rust layer on top of the Sticky Immix API of the previous part
 5 | * a compiler for a primitive s-expression syntax language
 6 | * a bytecode based virtual machine
 7 | 
 8 | So what kind of interpreter will we implement? This book is a guide to help
 9 | you along your own journey and not not intended to provide an exhaustive
10 | language ecosystem. The direction we'll take is to support John McCarthy's
11 | classic s-expression based meta-circular evaluator[^1].
12 | 
13 | Along the way we'll need to implement fundamental data types and structures
14 | from scratch upon our safe layer - symbols, pairs, arrays and dicts - with
15 | each chapter building upon the previous ones.
16 | 
17 | While this will not result in an exhaustive language implementation,
18 | you'll see that we _will_ end up with all the building blocks for you to take
19 | it the rest of the way!
20 | 
21 | We shall name our interpreter "Eval-rs", for which we have an appropriate
22 | illustration generously provided by the author's then 10 year old daughter.
23 | 
24 | ![The Evalrus](evalrus-medium.png)
25 | 
26 | We'll begin by defining the safe abstration over the Sticky Immix interface.
27 | Then we'll put that to use in parsing s-expressions into a very simple data
28 | structure.
29 | 
30 | Once we've covered those basics, we'll build arrays and dicts and then
31 | use those in the compiler and virtual machine.
32 | 
33 | [^1]: These days this is cliché but that is substantially to our benefit. We're
34 | not trying to create yet another Lisp, rather the fact that there is a
35 | preexisting design of some elegance and historical interest is a convenience.
36 | For a practical, accessible introduction to the topic, do see Paul
37 | Graham's [The Roots of Lisp](http://www.paulgraham.com/rootsoflisp.html)
38 | 


--------------------------------------------------------------------------------
/booksrc/part-stickyimmix.md:
--------------------------------------------------------------------------------
 1 | # An allocator: Sticky Immix
 2 | 
 3 | Quickly, some terminology:
 4 | 
 5 | * Mutator: the thread of execution that writes and modifies objects on the heap.
 6 | * Live objects: the graph of objects that the mutator can reach, either directly
 7 |   from it's stack or indirectly through other reachable objects.
 8 | * Dead objects: any object that is disconnected from the mutator's graph of live
 9 |   objects.
10 | * Collector: the thread of execution that identifies objects that are no longer
11 |   reachable by the mutator and marks them as free space that can be reused
12 | * Fragmentation: as objects have many different sizes, after allocating and
13 |   freeing many objects, gaps of unused memory appear between objects that are
14 |   too small for most objects but that add up to a measurable percentage of
15 |   wasted space.
16 | * Evacuation: when the collector _moves_ live objects to another block of memory
17 |   so that the originating block can be _de_fragmented_
18 | 
19 | ## About Immix
20 | 
21 | Immix is a memory management scheme that considers blocks of fixed size at a time.
22 | Each block is divided into lines. In the original paper, blocks are sized at 32k
23 | and lines at 128 bytes.  Objects are allocated into blocks using bump allocation
24 | and objects can cross line boundaries.
25 | 
26 | ![StickyImmix Block](img/stickyimmix_block.png)
27 | 
28 | During tracing to discover live objects, objects are marked as live, but the
29 | line, or lines, that each object occupies are also marked as live. This can mean, of
30 | course, that a line may contain a dead object and a live object but the whole
31 | line is marked as live.
32 | 
33 | To mark lines as live, a portion of the block is set aside for line mark bits,
34 | usually one byte per mark bit. If _any_ line is marked as live, the whole block
35 | is also marked as live. There must also, therefore, be a bit that indicates
36 | block liveness.
37 | 
38 | ### Conservative marking
39 | 
40 | The Immix authors found that marking _every_ line that contains a live object
41 | could be expensive. For example, many small objects might cross line boundaries,
42 | requiring two lines to be marked as live. This would require looking up the
43 | object size and calculating whether the object crosses the boundary into the
44 | next line. To save CPU cycles, they simplified the algorithm by saying that
45 | any object that fits in a line _might_ cross into the next line so we will
46 | conservatively _consider_ the next line marked just in case. This sped up
47 | marking at little fragmentation expense.
48 | 
49 | ### Collection
50 | 
51 | During collection, only lines not marked as live are considered available for
52 | re-use. Inevitably then, there is acceptance of some amount of fragmentation
53 | at this point.
54 | 
55 | _Full_ Immix implements evacuating objects out of the most fragmented blocks
56 | into fresh, empty blocks, for defragmentation.
57 | 
58 | For simplicity of implementation, we'll leave out this evacuation operation
59 | in this guide. This is called _Sticky_ Immix.
60 | 
61 | We'll also stick to a single thread for the mutator and collector to avoid the
62 | complexity overhead of a multi-threaded implementation for now.
63 | 
64 | Recommended reading: [Stephen M. Blackburn & Kathryn S. McKinley - Immix: A Mark-Region Garbage Collector with Space Efficiency, Fast Collection, and Mutator Performance][1]
65 | 
66 | ## About this part of the book
67 | 
68 | This section will describe a Rust crate that implements a Sticky Immix heap.
69 | As part of this implementation we will dive into the crate API details to
70 | understand how we can define an interface between the heap and the language
71 | VM that will come later.
72 | 
73 | _What this is not: custom memory management to replace the global Rust
74 | allocator! The APIs we arrive at will be substantially incompatible with the
75 | global Rust allocator._
76 | 
77 | [1]: http://users.cecs.anu.edu.au/~steveb/pubs/papers/immix-pldi-2008.pdf
78 | 


--------------------------------------------------------------------------------
/interpreter/.gitignore:
--------------------------------------------------------------------------------
1 | # Generated by Cargo
2 | # will have compiled files and executables
3 | /target/
4 | 
5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
6 | # More information here http://doc.crates.io/guide.html#cargotoml-vs-cargolock
7 | Cargo.lock
8 | 


--------------------------------------------------------------------------------
/interpreter/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "evalrus"
 3 | version = "0.0.1"
 4 | authors = ["Peter Liniker <peter.liniker+github@gmail.com>"]
 5 | edition = "2018"
 6 | license = "MIT OR Apache-2.0"
 7 | 
 8 | [dependencies]
 9 | clap = "2.20.3"
10 | dirs = "1.0"
11 | fnv = "1.0.3"
12 | itertools = "0.9"
13 | rustyline = "6.1.2"
14 | stickyimmix = { path = "../stickyimmix" }
15 | blockalloc = { path = "../blockalloc" }
16 | 


--------------------------------------------------------------------------------
/interpreter/README.md:
--------------------------------------------------------------------------------
1 | # The Eval-rs
2 | 
3 | A simple interpreter, built on the `stickyimmix` allocator.
4 | 
5 | ![The Evalrus](https://pliniker.github.io/assets/img/evalrus-medium.png)
6 | 


--------------------------------------------------------------------------------
/interpreter/src/arena.rs:
--------------------------------------------------------------------------------
 1 | /// A memory arena implemented as an ever growing pool of blocks.
 2 | /// Currently implemented on top of stickyimmix without any gc which includes unnecessary
 3 | /// overhead.
 4 | use std::ptr::NonNull;
 5 | 
 6 | use stickyimmix::{
 7 |     AllocError, AllocHeader, AllocObject, AllocRaw, ArraySize, Mark, RawPtr, SizeClass,
 8 |     StickyImmixHeap,
 9 | };
10 | 
11 | use crate::headers::TypeList;
12 | 
13 | /// Allocation header for an Arena-allocated value
14 | pub struct ArenaHeader {}
15 | 
16 | /// Since we're not using this functionality in an Arena, the impl is just
17 | /// a set of no-ops.
18 | impl AllocHeader for ArenaHeader {
19 |     type TypeId = TypeList;
20 | 
21 |     fn new<O: AllocObject<Self::TypeId>>(
22 |         _size: u32,
23 |         _size_class: SizeClass,
24 |         _mark: Mark,
25 |     ) -> ArenaHeader {
26 |         ArenaHeader {}
27 |     }
28 | 
29 |     fn new_array(_size: ArraySize, _size_class: SizeClass, _mark: Mark) -> ArenaHeader {
30 |         ArenaHeader {}
31 |     }
32 | 
33 |     fn mark(&mut self) {}
34 | 
35 |     fn is_marked(&self) -> bool {
36 |         true
37 |     }
38 | 
39 |     fn size_class(&self) -> SizeClass {
40 |         SizeClass::Small
41 |     }
42 | 
43 |     fn size(&self) -> u32 {
44 |         1
45 |     }
46 | 
47 |     fn type_id(&self) -> TypeList {
48 |         TypeList::Symbol
49 |     }
50 | }
51 | 
52 | /// A non-garbage-collected pool of memory blocks for interned values.
53 | /// These values are not dropped on Arena deallocation.
54 | /// Values must be "atomic", that is, not composed of other object
55 | /// pointers that need to be traced.
56 | // ANCHOR: DefArena
57 | pub struct Arena {
58 |     heap: StickyImmixHeap<ArenaHeader>,
59 | }
60 | // ANCHOR_END: DefArena
61 | 
62 | impl Arena {
63 |     pub fn new() -> Arena {
64 |         Arena {
65 |             heap: StickyImmixHeap::new(),
66 |         }
67 |     }
68 | }
69 | 
70 | impl AllocRaw for Arena {
71 |     type Header = ArenaHeader;
72 | 
73 |     // ANCHOR: DefArenaAlloc
74 |     fn alloc<T>(&self, object: T) -> Result<RawPtr<T>, AllocError>
75 |     where
76 |         T: AllocObject<TypeList>,
77 |     {
78 |         self.heap.alloc(object)
79 |     }
80 |     // ANCHOR_END: DefArenaAlloc
81 | 
82 |     fn alloc_array(&self, _size_bytes: ArraySize) -> Result<RawPtr<u8>, AllocError> {
83 |         unimplemented!()
84 |     }
85 | 
86 |     fn get_header(_object: NonNull<()>) -> NonNull<Self::Header> {
87 |         unimplemented!()
88 |     }
89 | 
90 |     fn get_object(_header: NonNull<Self::Header>) -> NonNull<()> {
91 |         unimplemented!()
92 |     }
93 | }
94 | 


--------------------------------------------------------------------------------
/interpreter/src/containers.rs:
--------------------------------------------------------------------------------
  1 | /// Container traits
  2 | ///
  3 | /// TODO iterators/views
  4 | use stickyimmix::ArraySize;
  5 | 
  6 | use crate::error::RuntimeError;
  7 | use crate::memory::MutatorView;
  8 | use crate::safeptr::{MutatorScope, ScopedPtr, TaggedCellPtr, TaggedScopedPtr};
  9 | 
 10 | /// Base container-type trait. All container types are subtypes of `Container`.
 11 | ///
 12 | /// All container operations _must_ follow interior mutability only rules.
 13 | /// Because there are no compile-time mutable aliasing guarantees, there can be no references
 14 | /// into arrays at all, unless there can be a guarantee that the array memory will not be
 15 | /// reallocated.
 16 | ///
 17 | /// `T` cannot be restricted to `Copy` because of the use of `Cell` for interior mutability.
 18 | pub trait Container<T: Sized + Clone>: Sized {
 19 |     /// Create a new, empty container instance.
 20 |     fn new() -> Self;
 21 |     /// Create a new container instance with the given capacity.
 22 |     // TODO: this may not make sense for tree types
 23 |     fn with_capacity<'guard>(
 24 |         mem: &'guard MutatorView,
 25 |         capacity: ArraySize,
 26 |     ) -> Result<Self, RuntimeError>;
 27 | 
 28 |     /// Reset the size of the container to zero - empty
 29 |     fn clear<'guard>(&self, mem: &'guard MutatorView) -> Result<(), RuntimeError>;
 30 | 
 31 |     /// Count of items in the container
 32 |     fn length(&self) -> ArraySize;
 33 | }
 34 | 
 35 | /// If implemented, the container can be filled with a set number of values in one operation
 36 | pub trait FillContainer<T: Sized + Clone>: Container<T> {
 37 |     /// The `item` is an object to copy into each container memory slot.
 38 |     fn fill<'guard>(
 39 |         &self,
 40 |         mem: &'guard MutatorView,
 41 |         size: ArraySize,
 42 |         item: T,
 43 |     ) -> Result<(), RuntimeError>;
 44 | }
 45 | 
 46 | /// If implemented, the container can be filled with a set number of values in one operation
 47 | pub trait FillAnyContainer: FillContainer<TaggedCellPtr> {
 48 |     /// The `item` is an object to copy into each container memory slot.
 49 |     fn fill<'guard>(
 50 |         &self,
 51 |         mem: &'guard MutatorView,
 52 |         size: ArraySize,
 53 |         item: TaggedScopedPtr<'guard>,
 54 |     ) -> Result<(), RuntimeError>;
 55 | }
 56 | 
 57 | /// Generic stack trait. If implemented, the container can function as a stack
 58 | // ANCHOR: DefStackContainer
 59 | pub trait StackContainer<T: Sized + Clone>: Container<T> {
 60 |     /// Push can trigger an underlying array resize, hence it requires the ability to allocate
 61 |     fn push<'guard>(&self, mem: &'guard MutatorView, item: T) -> Result<(), RuntimeError>;
 62 | 
 63 |     /// Pop returns a bounds error if the container is empty, otherwise moves the last item of the
 64 |     /// array out to the caller.
 65 |     fn pop<'guard>(&self, _guard: &'guard dyn MutatorScope) -> Result<T, RuntimeError>;
 66 | 
 67 |     /// Return the value at the top of the stack without removing it
 68 |     fn top<'guard>(&self, _guard: &'guard dyn MutatorScope) -> Result<T, RuntimeError>;
 69 | }
 70 | // ANCHOR_END: DefStackContainer
 71 | 
 72 | /// Specialized stack trait. If implemented, the container can function as a stack
 73 | // ANCHOR: DefStackAnyContainer
 74 | pub trait StackAnyContainer: StackContainer<TaggedCellPtr> {
 75 |     /// Push can trigger an underlying array resize, hence it requires the ability to allocate
 76 |     fn push<'guard>(
 77 |         &self,
 78 |         mem: &'guard MutatorView,
 79 |         item: TaggedScopedPtr<'guard>,
 80 |     ) -> Result<(), RuntimeError>;
 81 | 
 82 |     /// Pop returns a bounds error if the container is empty, otherwise moves the last item of the
 83 |     /// array out to the caller.
 84 |     fn pop<'guard>(
 85 |         &self,
 86 |         _guard: &'guard dyn MutatorScope,
 87 |     ) -> Result<TaggedScopedPtr<'guard>, RuntimeError>;
 88 | 
 89 |     /// Return the value at the top of the stack without removing it
 90 |     fn top<'guard>(
 91 |         &self,
 92 |         _guard: &'guard dyn MutatorScope,
 93 |     ) -> Result<TaggedScopedPtr<'guard>, RuntimeError>;
 94 | }
 95 | // ANCHOR_END: DefStackAnyContainer
 96 | 
 97 | /// Generic indexed-access trait. If implemented, the container can function as an indexable vector
 98 | pub trait IndexedContainer<T: Sized + Clone>: Container<T> {
 99 |     /// Return a copy of the object at the given index. Bounds-checked.
100 |     fn get<'guard>(
101 |         &self,
102 |         _guard: &'guard dyn MutatorScope,
103 |         index: ArraySize,
104 |     ) -> Result<T, RuntimeError>;
105 | 
106 |     /// Move an object into the array at the given index. Bounds-checked.
107 |     fn set<'guard>(
108 |         &self,
109 |         _guard: &'guard dyn MutatorScope,
110 |         index: ArraySize,
111 |         item: T,
112 |     ) -> Result<(), RuntimeError>;
113 | }
114 | 
115 | /// A trait that is implemented for containers that can represent their contents as a slice.
116 | pub trait SliceableContainer<T: Sized + Clone>: IndexedContainer<T> {
117 |     /// This function allows access to the interior of a container as a slice by way of a
118 |     /// function, permitting direct access to the memory locations of objects in the container
119 |     /// for the lifetime of the closure call.
120 |     ///
121 |     /// It is important to understand that the 'guard lifetime is not the same safe duration
122 |     /// as the slice lifetime - the slice may be invalidated during the 'guard lifetime
123 |     /// by operations on the container that cause reallocation.
124 |     ///
125 |     /// To prevent the function from modifying the container outside of the slice reference,
126 |     /// the implementing container must maintain a RefCell-style flag to catch runtime
127 |     /// container modifications that would render the slice invalid or cause undefined
128 |     /// behavior.
129 |     fn access_slice<'guard, F, R>(&self, _guard: &'guard dyn MutatorScope, f: F) -> R
130 |     where
131 |         F: FnOnce(&mut [T]) -> R;
132 | }
133 | 
134 | /// Specialized indexable interface for where TaggedCellPtr is used as T
135 | pub trait IndexedAnyContainer: IndexedContainer<TaggedCellPtr> {
136 |     /// Return a pointer to the object at the given index. Bounds-checked.
137 |     fn get<'guard>(
138 |         &self,
139 |         guard: &'guard dyn MutatorScope,
140 |         index: ArraySize,
141 |     ) -> Result<TaggedScopedPtr<'guard>, RuntimeError>;
142 | 
143 |     /// Set the object pointer at the given index. Bounds-checked.
144 |     fn set<'guard>(
145 |         &self,
146 |         _guard: &'guard dyn MutatorScope,
147 |         index: ArraySize,
148 |         item: TaggedScopedPtr<'guard>,
149 |     ) -> Result<(), RuntimeError>;
150 | }
151 | 
152 | /// Hashable-indexed interface. Objects used as keys must implement Hashable.
153 | // ANCHOR: DefHashIndexedAnyContainer
154 | pub trait HashIndexedAnyContainer {
155 |     /// Return a pointer to to the object associated with the given key.
156 |     /// Absence of an association should return an error.
157 |     fn lookup<'guard>(
158 |         &self,
159 |         guard: &'guard dyn MutatorScope,
160 |         key: TaggedScopedPtr,
161 |     ) -> Result<TaggedScopedPtr<'guard>, RuntimeError>;
162 | 
163 |     /// Associate a key with a value.
164 |     fn assoc<'guard>(
165 |         &self,
166 |         mem: &'guard MutatorView,
167 |         key: TaggedScopedPtr<'guard>,
168 |         value: TaggedScopedPtr<'guard>,
169 |     ) -> Result<(), RuntimeError>;
170 | 
171 |     /// Remove an association by its key.
172 |     fn dissoc<'guard>(
173 |         &self,
174 |         guard: &'guard dyn MutatorScope,
175 |         key: TaggedScopedPtr,
176 |     ) -> Result<TaggedScopedPtr<'guard>, RuntimeError>;
177 | 
178 |     /// Returns true if the key exists in the container.
179 |     fn exists<'guard>(
180 |         &self,
181 |         guard: &'guard dyn MutatorScope,
182 |         key: TaggedScopedPtr,
183 |     ) -> Result<bool, RuntimeError>;
184 | }
185 | // ANCHOR_END: DefHashIndexedAnyContainer
186 | 
187 | /// Convert a Pair list to a different container
188 | pub trait AnyContainerFromPairList: Container<TaggedCellPtr> {
189 |     fn from_pair_list<'guard>(
190 |         &self,
191 |         mem: &'guard MutatorView,
192 |         pair_list: TaggedScopedPtr<'guard>,
193 |     ) -> Result<(), RuntimeError>;
194 | }
195 | 
196 | /// Replace the contents of a container with the values in the slice
197 | pub trait ContainerFromSlice<T: Sized + Clone>: Container<T> {
198 |     fn from_slice<'guard>(
199 |         mem: &'guard MutatorView,
200 |         data: &[T],
201 |     ) -> Result<ScopedPtr<'guard, Self>, RuntimeError>;
202 | }
203 | 
204 | /// Replace the contents of a container with the values in the slice
205 | pub trait AnyContainerFromSlice: Container<TaggedCellPtr> {
206 |     fn from_slice<'guard>(
207 |         mem: &'guard MutatorView,
208 |         data: &[TaggedScopedPtr<'guard>],
209 |     ) -> Result<ScopedPtr<'guard, Self>, RuntimeError>;
210 | }
211 | 
212 | /// The implementor represents mutable changes via an internal version count
213 | /// such that the use of any references to an older version return an error
214 | pub trait VersionedContainer<T: Sized + Clone>: Container<T> {}
215 | 
216 | pub trait ImmutableContainer<T: Sized + Clone>: Container<T> {}
217 | 


--------------------------------------------------------------------------------
/interpreter/src/error.rs:
--------------------------------------------------------------------------------
  1 | use std::error::Error;
  2 | use std::fmt;
  3 | use std::io;
  4 | 
  5 | use rustyline::error::ReadlineError;
  6 | 
  7 | use blockalloc::BlockError;
  8 | use stickyimmix::AllocError;
  9 | 
 10 | /// Source code position
 11 | // ANCHOR: DefSourcePos
 12 | #[derive(Copy, Clone, Debug, PartialEq)]
 13 | pub struct SourcePos {
 14 |     pub line: u32,
 15 |     pub column: u32,
 16 | }
 17 | // ANCHOR_END: DefSourcePos
 18 | 
 19 | impl SourcePos {
 20 |     fn new(line: u32, column: u32) -> SourcePos {
 21 |         SourcePos { line, column }
 22 |     }
 23 | }
 24 | 
 25 | #[derive(Debug, PartialEq)]
 26 | pub enum ErrorKind {
 27 |     IOError(String),
 28 |     LexerError(String),
 29 |     ParseError(String),
 30 |     EvalError(String),
 31 |     BadAllocationRequest,
 32 |     OutOfMemory,
 33 |     BoundsError,
 34 |     KeyError,
 35 |     UnhashableError,
 36 |     MutableBorrowError,
 37 | }
 38 | 
 39 | /// An Eval-rs runtime error type
 40 | #[derive(Debug, PartialEq)]
 41 | pub struct RuntimeError {
 42 |     kind: ErrorKind,
 43 |     pos: Option<SourcePos>,
 44 | }
 45 | 
 46 | impl RuntimeError {
 47 |     pub fn new(kind: ErrorKind) -> RuntimeError {
 48 |         RuntimeError {
 49 |             kind: kind,
 50 |             pos: None,
 51 |         }
 52 |     }
 53 | 
 54 |     pub fn with_pos(kind: ErrorKind, pos: SourcePos) -> RuntimeError {
 55 |         RuntimeError {
 56 |             kind: kind,
 57 |             pos: Some(pos),
 58 |         }
 59 |     }
 60 | 
 61 |     pub fn error_kind(&self) -> &ErrorKind {
 62 |         &self.kind
 63 |     }
 64 | 
 65 |     pub fn error_pos(&self) -> Option<SourcePos> {
 66 |         self.pos
 67 |     }
 68 | 
 69 |     /// Given the relevant source code string, show the error in context
 70 |     pub fn print_with_source(&self, source: &str) {
 71 |         if let Some(ref pos) = self.pos {
 72 |             let mut iter = source.lines().enumerate();
 73 | 
 74 |             while let Some((count, line)) = iter.next() {
 75 |                 // count starts at 0, line numbers start at 1
 76 |                 if count + 1 == pos.line as usize {
 77 |                     println!("error: {}", self);
 78 |                     println!("{:5}|{}", pos.line, line);
 79 |                     println!("{:5}|{:width$}^", " ", " ", width = pos.column as usize);
 80 |                     println!("{:5}|", " ");
 81 |                     return;
 82 |                 }
 83 |             }
 84 |         } else {
 85 |             println!("error: {}", self);
 86 |         }
 87 |     }
 88 | }
 89 | 
 90 | impl fmt::Display for RuntimeError {
 91 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 92 |         match self.kind {
 93 |             ErrorKind::IOError(ref reason) => write!(f, "IO Error: {}", reason),
 94 |             ErrorKind::LexerError(ref reason) => write!(f, "Parse error: {}", reason),
 95 |             ErrorKind::ParseError(ref reason) => write!(f, "Parse error: {}", reason),
 96 |             ErrorKind::EvalError(ref reason) => write!(f, "Evaluation error: {}", reason),
 97 |             ErrorKind::OutOfMemory => write!(f, "Out of memory!"),
 98 |             ErrorKind::BadAllocationRequest => {
 99 |                 write!(f, "An invalid memory size allocation was requested!")
100 |             }
101 |             ErrorKind::BoundsError => write!(f, "Indexing bounds error"),
102 |             ErrorKind::KeyError => write!(f, "Key does not exist in Dict"),
103 |             ErrorKind::UnhashableError => write!(f, "Attempt to access Dict with unhashable key"),
104 |             ErrorKind::MutableBorrowError => write!(
105 |                 f,
106 |                 "Attempt to modify a container that is already mutably borrowed"
107 |             ),
108 |         }
109 |     }
110 | }
111 | 
112 | /// Convert from io::Error
113 | impl From<io::Error> for RuntimeError {
114 |     fn from(other: io::Error) -> RuntimeError {
115 |         RuntimeError::new(ErrorKind::IOError(format!("{}", other)))
116 |     }
117 | }
118 | 
119 | /// Convert from ReadlineError
120 | impl From<ReadlineError> for RuntimeError {
121 |     fn from(other: ReadlineError) -> RuntimeError {
122 |         RuntimeError::new(ErrorKind::IOError(format!("{}", other)))
123 |     }
124 | }
125 | 
126 | /// Convert from BlockError
127 | impl From<BlockError> for RuntimeError {
128 |     fn from(other: BlockError) -> RuntimeError {
129 |         match other {
130 |             BlockError::OOM => RuntimeError::new(ErrorKind::OutOfMemory),
131 |             BlockError::BadRequest => RuntimeError::new(ErrorKind::BadAllocationRequest),
132 |         }
133 |     }
134 | }
135 | 
136 | /// Convert from AllocError
137 | impl From<AllocError> for RuntimeError {
138 |     fn from(other: AllocError) -> RuntimeError {
139 |         match other {
140 |             AllocError::OOM => RuntimeError::new(ErrorKind::OutOfMemory),
141 |             AllocError::BadRequest => RuntimeError::new(ErrorKind::BadAllocationRequest),
142 |         }
143 |     }
144 | }
145 | 
146 | impl Error for RuntimeError {
147 |     fn cause(&self) -> Option<&dyn Error> {
148 |         None
149 |     }
150 | }
151 | 
152 | /// Convert _to_ std::fmt::Error
153 | impl From<RuntimeError> for fmt::Error {
154 |     fn from(_other: RuntimeError) -> fmt::Error {
155 |         // Is there anything else that can be done here? :-(
156 |         fmt::Error
157 |     }
158 | }
159 | 
160 | /// Convenience shorthand function for building a SourcePos
161 | pub fn spos(line: u32, column: u32) -> SourcePos {
162 |     SourcePos::new(line, column)
163 | }
164 | 
165 | /// Convenience shorthand function for building a lexer error
166 | pub fn err_lexer(pos: SourcePos, reason: &str) -> RuntimeError {
167 |     RuntimeError::with_pos(ErrorKind::LexerError(String::from(reason)), pos)
168 | }
169 | 
170 | /// Convenience shorthand function for building a parser error
171 | pub fn err_parser(reason: &str) -> RuntimeError {
172 |     RuntimeError::new(ErrorKind::ParseError(String::from(reason)))
173 | }
174 | 
175 | /// Convenience shorthand function for building a parser error including a source position
176 | pub fn err_parser_wpos(pos: SourcePos, reason: &str) -> RuntimeError {
177 |     RuntimeError::with_pos(ErrorKind::ParseError(String::from(reason)), pos)
178 | }
179 | 
180 | /// Convenience shorthand function for building an evaluation error
181 | pub fn err_eval(reason: &str) -> RuntimeError {
182 |     RuntimeError::new(ErrorKind::EvalError(String::from(reason)))
183 | }
184 | 


--------------------------------------------------------------------------------
/interpreter/src/function.rs:
--------------------------------------------------------------------------------
  1 | use itertools::join;
  2 | use std::fmt;
  3 | 
  4 | use crate::array::ArrayU16;
  5 | use crate::bytecode::ByteCode;
  6 | use crate::containers::{Container, ContainerFromSlice, SliceableContainer, StackContainer};
  7 | use crate::error::RuntimeError;
  8 | use crate::list::List;
  9 | use crate::memory::MutatorView;
 10 | use crate::printer::Print;
 11 | use crate::safeptr::{CellPtr, MutatorScope, ScopedPtr, TaggedCellPtr, TaggedScopedPtr};
 12 | use crate::taggedptr::Value;
 13 | 
 14 | /// A function object type
 15 | // ANCHOR: DefFunction
 16 | #[derive(Clone)]
 17 | pub struct Function {
 18 |     /// name could be a Symbol, or nil if it is an anonymous fn
 19 |     name: TaggedCellPtr,
 20 |     /// Number of arguments required to activate the function
 21 |     arity: u8,
 22 |     /// Instructions comprising the function code
 23 |     code: CellPtr<ByteCode>,
 24 |     /// Param names are stored for introspection of a function signature
 25 |     param_names: CellPtr<List>,
 26 |     /// List of (CallFrame-index: u8 | Window-index: u8) relative offsets from this function's
 27 |     /// declaration where nonlocal variables will be found. Needed when creating a closure. May be
 28 |     /// nil
 29 |     nonlocal_refs: TaggedCellPtr,
 30 | }
 31 | // ANCHOR_END: DefFunction
 32 | 
 33 | impl Function {
 34 |     /// Allocate a Function object on the heap.
 35 |     ///
 36 |     /// The nonlocal_refs arg must contain a list of 16 bit values composed of two
 37 |     /// 8 bit values: CallFrame relative offset << 8 | Window offset
 38 |     /// These values should follow the same order as given in param_names
 39 |     pub fn alloc<'guard>(
 40 |         mem: &'guard MutatorView,
 41 |         name: TaggedScopedPtr<'guard>,
 42 |         param_names: ScopedPtr<'guard, List>,
 43 |         code: ScopedPtr<'guard, ByteCode>,
 44 |         nonlocal_refs: Option<ScopedPtr<'guard, ArrayU16>>,
 45 |     ) -> Result<ScopedPtr<'guard, Function>, RuntimeError> {
 46 |         // Store a nil ptr if no nonlocal references are given
 47 |         let nonlocal_refs = if let Some(refs_ptr) = nonlocal_refs {
 48 |             TaggedCellPtr::new_with(refs_ptr.as_tagged(mem))
 49 |         } else {
 50 |             TaggedCellPtr::new_nil()
 51 |         };
 52 | 
 53 |         mem.alloc(Function {
 54 |             name: TaggedCellPtr::new_with(name),
 55 |             arity: param_names.length() as u8,
 56 |             code: CellPtr::new_with(code),
 57 |             param_names: CellPtr::new_with(param_names),
 58 |             nonlocal_refs,
 59 |         })
 60 |     }
 61 | 
 62 |     /// Return the Function's name as a string slice
 63 |     pub fn name<'guard>(&self, guard: &'guard dyn MutatorScope) -> &'guard str {
 64 |         let name = self.name.get(guard);
 65 |         match *name {
 66 |             Value::Symbol(s) => s.as_str(guard),
 67 |             _ => "<lambda>",
 68 |         }
 69 |     }
 70 | 
 71 |     /// Return the number of arguments the Function can take
 72 |     pub fn arity(&self) -> u8 {
 73 |         self.arity
 74 |     }
 75 | 
 76 |     /// Return the names of the parameters that the Function takes
 77 |     pub fn param_names<'guard>(&self, guard: &'guard dyn MutatorScope) -> ScopedPtr<'guard, List> {
 78 |         self.param_names.get(guard)
 79 |     }
 80 | 
 81 |     /// Return the ByteCode object associated with the Function
 82 |     pub fn code<'guard>(&self, guard: &'guard dyn MutatorScope) -> ScopedPtr<'guard, ByteCode> {
 83 |         self.code.get(guard)
 84 |     }
 85 | 
 86 |     /// Return true if the function is a closure - it has nonlocal variable references
 87 |     pub fn is_closure<'guard>(&self) -> bool {
 88 |         !self.nonlocal_refs.is_nil()
 89 |     }
 90 | 
 91 |     /// Return a list of nonlocal stack references referenced by the function. It is a panickable
 92 |     /// offense to call this when there are no nonlocals referenced by the function. This would
 93 |     /// indicate a compiler bug.
 94 |     pub fn nonlocals<'guard>(
 95 |         &self,
 96 |         guard: &'guard dyn MutatorScope,
 97 |     ) -> ScopedPtr<'guard, ArrayU16> {
 98 |         match *self.nonlocal_refs.get(guard) {
 99 |             Value::ArrayU16(nonlocals) => nonlocals,
100 |             _ => unreachable!(),
101 |         }
102 |     }
103 | }
104 | 
105 | impl Print for Function {
106 |     /// Prints a string representation of the function
107 |     fn print<'guard>(
108 |         &self,
109 |         guard: &'guard dyn MutatorScope,
110 |         f: &mut fmt::Formatter,
111 |     ) -> fmt::Result {
112 |         let name = self.name.get(guard);
113 |         let params = self.param_names.get(guard);
114 | 
115 |         let mut param_string = String::new();
116 |         params.access_slice(guard, |items| {
117 |             param_string = join(items.iter().map(|item| item.get(guard)), " ")
118 |         });
119 | 
120 |         match *name {
121 |             Value::Symbol(s) => write!(f, "(Function {} ({}))", s.as_str(guard), param_string),
122 |             _ => write!(f, "(Function ({}))", param_string),
123 |         }
124 |     }
125 | 
126 |     /// Prints the disassembled bytecode
127 |     fn debug<'guard>(
128 |         &self,
129 |         guard: &'guard dyn MutatorScope,
130 |         f: &mut fmt::Formatter,
131 |     ) -> fmt::Result {
132 |         self.print(guard, f)?;
133 |         write!(f, "\nbytecode follows:\n")?;
134 |         self.code(guard).debug(guard, f)
135 |     }
136 | }
137 | 
138 | /// A partial function application object type
139 | // ANCHOR: DefPartial
140 | #[derive(Clone)]
141 | pub struct Partial {
142 |     /// Remaining number of arguments required to activate the function
143 |     arity: u8,
144 |     /// Number of arguments already applied
145 |     used: u8,
146 |     /// List of argument values already applied
147 |     args: CellPtr<List>,
148 |     /// Closure environment - must be either nil or a List of Upvalues
149 |     env: TaggedCellPtr,
150 |     /// Function that will be activated when all arguments are applied
151 |     func: CellPtr<Function>,
152 | }
153 | // ANCHOR_END: DefPartial
154 | 
155 | impl Partial {
156 |     /// Allocate a Partial application of a Function on the heap with the given set of arguments
157 |     pub fn alloc<'guard>(
158 |         mem: &'guard MutatorView,
159 |         function: ScopedPtr<'guard, Function>,
160 |         env: Option<ScopedPtr<'guard, List>>,
161 |         args: &[TaggedCellPtr],
162 |     ) -> Result<ScopedPtr<'guard, Partial>, RuntimeError> {
163 |         let used = args.len() as u8;
164 |         let arity = function.arity() - used;
165 | 
166 |         // Store a nil ptr if no closure env is given
167 |         let env = if let Some(env_ptr) = env {
168 |             TaggedCellPtr::new_with(env_ptr.as_tagged(mem))
169 |         } else {
170 |             TaggedCellPtr::new_nil()
171 |         };
172 | 
173 |         // copy args to the Partial's own list
174 |         let args_list: ScopedPtr<'guard, List> = ContainerFromSlice::from_slice(mem, &args)?;
175 | 
176 |         mem.alloc(Partial {
177 |             arity,
178 |             used,
179 |             args: CellPtr::new_with(args_list),
180 |             env,
181 |             func: CellPtr::new_with(function),
182 |         })
183 |     }
184 | 
185 |     /// Clone an existing Partial application, appending the given arguments to the list
186 |     pub fn alloc_clone<'guard>(
187 |         mem: &'guard MutatorView,
188 |         partial: ScopedPtr<'guard, Partial>,
189 |         new_args: &[TaggedCellPtr],
190 |     ) -> Result<ScopedPtr<'guard, Partial>, RuntimeError> {
191 |         let used = partial.used() + new_args.len() as u8;
192 |         let arity = partial.arity() - new_args.len() as u8;
193 | 
194 |         // clone the parent Partial's args
195 |         let arg_list = List::alloc_clone(mem, partial.args(mem))?;
196 |         // append any new args
197 |         for arg in new_args {
198 |             arg_list.push(mem, arg.clone())?
199 |         }
200 | 
201 |         mem.alloc(Partial {
202 |             arity,
203 |             used,
204 |             args: CellPtr::new_with(arg_list),
205 |             env: partial.env.clone(),
206 |             func: partial.func.clone(),
207 |         })
208 |     }
209 | 
210 |     /// Return the number of arguments this Partial needs before the function can be called
211 |     pub fn arity(&self) -> u8 {
212 |         self.arity
213 |     }
214 | 
215 |     /// Return the count of arguments already applied
216 |     pub fn used(&self) -> u8 {
217 |         self.used
218 |     }
219 | 
220 |     /// Return the arguments already supplied to the Partial
221 |     pub fn args<'guard>(&self, guard: &'guard dyn MutatorScope) -> ScopedPtr<'guard, List> {
222 |         self.args.get(guard)
223 |     }
224 | 
225 |     /// Return the closure environment. This will be nil if the Partial does not close over any
226 |     /// variables.
227 |     pub fn closure_env(&self) -> TaggedCellPtr {
228 |         self.env.clone()
229 |     }
230 | 
231 |     /// Return the Function object that the Partial will call
232 |     pub fn function<'guard>(&self, guard: &'guard dyn MutatorScope) -> ScopedPtr<'guard, Function> {
233 |         self.func.get(guard)
234 |     }
235 | }
236 | 
237 | impl Print for Partial {
238 |     /// Prints a string representation of the Partial object
239 |     fn print<'guard>(
240 |         &self,
241 |         guard: &'guard dyn MutatorScope,
242 |         f: &mut fmt::Formatter,
243 |     ) -> fmt::Result {
244 |         let function = self.func.get(guard);
245 |         let name = function.name.get(guard);
246 |         let params = function.param_names.get(guard);
247 | 
248 |         let mut param_string = String::new();
249 |         params.access_slice(guard, |items| {
250 |             let start = self.used as usize;
251 |             param_string = join(items[start..].iter().map(|item| item.get(guard)), " ")
252 |         });
253 | 
254 |         match *name {
255 |             Value::Symbol(s) => write!(f, "(Partial {} ({}))", s.as_str(guard), param_string),
256 |             _ => write!(f, "(Partial ({}))", param_string),
257 |         }
258 |     }
259 | 
260 |     /// Prints the associated function's disassembled bytecode
261 |     fn debug<'guard>(
262 |         &self,
263 |         guard: &'guard dyn MutatorScope,
264 |         f: &mut fmt::Formatter,
265 |     ) -> fmt::Result {
266 |         self.print(guard, f)?;
267 |         write!(f, "\nbytecode follows:\n")?;
268 |         self.func.get(guard).code(guard).debug(guard, f)
269 |     }
270 | }
271 | 
272 | /// A list of arguments to apply to functions
273 | pub struct CurriedArguments {
274 |     // TODO
275 |     // not sure of the mechanics of this.
276 |     // The ghc runtime would push all these to the stack and then consume the stack with
277 |     // function continuations
278 | }
279 | 


--------------------------------------------------------------------------------
/interpreter/src/hashable.rs:
--------------------------------------------------------------------------------
 1 | /// Scope-guard limited Hashable trait type
 2 | use std::hash::Hasher;
 3 | 
 4 | use crate::safeptr::MutatorScope;
 5 | 
 6 | // ANCHOR: DefHashable
 7 | /// Similar to Hash but for use in a mutator lifetime-limited scope
 8 | pub trait Hashable {
 9 |     fn hash<'guard, H: Hasher>(&self, _guard: &'guard dyn MutatorScope, hasher: &mut H);
10 | }
11 | // ANCHOR_END: DefHashable
12 | 


--------------------------------------------------------------------------------
/interpreter/src/headers.rs:
--------------------------------------------------------------------------------
  1 | /// Defines an `ObjectHeader` type to immediately preceed each heap allocated
  2 | /// object, which also contains a type tag but with space for many more types.
  3 | use stickyimmix::{
  4 |     AllocHeader, AllocObject, AllocRaw, AllocTypeId, ArraySize, Mark, RawPtr, SizeClass,
  5 | };
  6 | 
  7 | use crate::array::{ArrayU16, ArrayU32, ArrayU8};
  8 | use crate::bytecode::{ArrayOpcode, ByteCode, InstructionStream};
  9 | use crate::dict::Dict;
 10 | use crate::function::{Function, Partial};
 11 | use crate::list::List;
 12 | use crate::memory::HeapStorage;
 13 | use crate::number::NumberObject;
 14 | use crate::pair::Pair;
 15 | use crate::pointerops::{AsNonNull, Tagged};
 16 | use crate::symbol::Symbol;
 17 | use crate::taggedptr::FatPtr;
 18 | use crate::text::Text;
 19 | use crate::vm::{CallFrameList, Thread, Upvalue};
 20 | 
 21 | /// Recognized heap-allocated types.
 22 | /// This should represent every type native to the runtime with the exception of tagged pointer inline value
 23 | /// types.
 24 | // ANCHOR: DefTypeList
 25 | #[repr(u16)]
 26 | #[derive(Debug, Copy, Clone, PartialEq)]
 27 | pub enum TypeList {
 28 |     ArrayBackingBytes,
 29 |     ArrayOpcode,
 30 |     ArrayU8,
 31 |     ArrayU16,
 32 |     ArrayU32,
 33 |     ByteCode,
 34 |     CallFrameList,
 35 |     Dict,
 36 |     Function,
 37 |     InstructionStream,
 38 |     List,
 39 |     NumberObject,
 40 |     Pair,
 41 |     Partial,
 42 |     Symbol,
 43 |     Text,
 44 |     Thread,
 45 |     Upvalue,
 46 | }
 47 | 
 48 | // Mark this as a Stickyimmix type-identifier type
 49 | impl AllocTypeId for TypeList {}
 50 | // ANCHOR_END: DefTypeList
 51 | 
 52 | /// A heap-allocated object header
 53 | // ANCHOR: DefObjectHeader
 54 | pub struct ObjectHeader {
 55 |     mark: Mark,
 56 |     size_class: SizeClass,
 57 |     type_id: TypeList,
 58 |     size_bytes: u32,
 59 | }
 60 | // ANCHOR_END: DefObjectHeader
 61 | 
 62 | impl ObjectHeader {
 63 |     /// Convert the ObjectHeader address to a FatPtr pointing at the object itself.
 64 |     // NOTE Any type that is a runtime dynamic type must be added to the below list
 65 |     // NOTE Be careful to match the correct TypeList discriminant with it's corresponding FatPtr discriminant
 66 |     // NOTE Be careful to untag the pointer before putting it into a `FatPtr`
 67 |     // ANCHOR: DefObjectHeaderGetObjectFatPtr
 68 |     pub unsafe fn get_object_fatptr(&self) -> FatPtr {
 69 |         let ptr_to_self = self.non_null_ptr();
 70 |         let object_addr = HeapStorage::get_object(ptr_to_self);
 71 | 
 72 |         match self.type_id {
 73 |             TypeList::ArrayU8 => FatPtr::ArrayU8(RawPtr::untag(object_addr.cast::<ArrayU8>())),
 74 |             TypeList::ArrayU16 => FatPtr::ArrayU16(RawPtr::untag(object_addr.cast::<ArrayU16>())),
 75 |             TypeList::ArrayU32 => FatPtr::ArrayU32(RawPtr::untag(object_addr.cast::<ArrayU32>())),
 76 |             TypeList::Dict => FatPtr::Dict(RawPtr::untag(object_addr.cast::<Dict>())),
 77 |             TypeList::Function => FatPtr::Function(RawPtr::untag(object_addr.cast::<Function>())),
 78 |             TypeList::List => FatPtr::List(RawPtr::untag(object_addr.cast::<List>())),
 79 |             TypeList::NumberObject => {
 80 |                 FatPtr::NumberObject(RawPtr::untag(object_addr.cast::<NumberObject>()))
 81 |             }
 82 |             TypeList::Pair => FatPtr::Pair(RawPtr::untag(object_addr.cast::<Pair>())),
 83 |             TypeList::Partial => FatPtr::Partial(RawPtr::untag(object_addr.cast::<Partial>())),
 84 |             TypeList::Symbol => FatPtr::Symbol(RawPtr::untag(object_addr.cast::<Symbol>())),
 85 |             TypeList::Text => FatPtr::Text(RawPtr::untag(object_addr.cast::<Text>())),
 86 |             TypeList::Upvalue => FatPtr::Upvalue(RawPtr::untag(object_addr.cast::<Upvalue>())),
 87 | 
 88 |             // Other types not represented by FatPtr are an error to id here
 89 |             _ => panic!("Invalid ObjectHeader type tag {:?}!", self.type_id),
 90 |         }
 91 |     }
 92 |     // ANCHOR_END: DefObjectHeaderGetObjectFatPtr
 93 | }
 94 | 
 95 | impl AsNonNull for ObjectHeader {}
 96 | 
 97 | impl AllocHeader for ObjectHeader {
 98 |     type TypeId = TypeList;
 99 | 
100 |     fn new<O: AllocObject<Self::TypeId>>(
101 |         size: u32,
102 |         size_class: SizeClass,
103 |         mark: Mark,
104 |     ) -> ObjectHeader {
105 |         ObjectHeader {
106 |             mark,
107 |             size_class,
108 |             type_id: O::TYPE_ID,
109 |             size_bytes: size,
110 |         }
111 |     }
112 | 
113 |     fn new_array(size: ArraySize, size_class: SizeClass, mark: Mark) -> ObjectHeader {
114 |         ObjectHeader {
115 |             mark,
116 |             size_class,
117 |             type_id: TypeList::ArrayBackingBytes,
118 |             size_bytes: size as u32,
119 |         }
120 |     }
121 | 
122 |     fn mark(&mut self) {
123 |         self.mark = Mark::Marked;
124 |     }
125 | 
126 |     fn is_marked(&self) -> bool {
127 |         self.mark == Mark::Marked
128 |     }
129 | 
130 |     fn size_class(&self) -> SizeClass {
131 |         self.size_class
132 |     }
133 | 
134 |     fn size(&self) -> u32 {
135 |         self.size_bytes
136 |     }
137 | 
138 |     fn type_id(&self) -> TypeList {
139 |         self.type_id
140 |     }
141 | }
142 | 
143 | /// Apply the type ID to each native type
144 | macro_rules! declare_allocobject {
145 |     ($T:ty, $I:tt) => {
146 |         impl AllocObject<TypeList> for $T {
147 |             const TYPE_ID: TypeList = TypeList::$I;
148 |         }
149 |     };
150 | }
151 | 
152 | declare_allocobject!(ArrayOpcode, ArrayOpcode);
153 | declare_allocobject!(ArrayU8, ArrayU8);
154 | declare_allocobject!(ArrayU16, ArrayU16);
155 | declare_allocobject!(ArrayU32, ArrayU32);
156 | declare_allocobject!(ByteCode, ByteCode);
157 | declare_allocobject!(CallFrameList, CallFrameList);
158 | declare_allocobject!(Dict, Dict);
159 | declare_allocobject!(Function, Function);
160 | declare_allocobject!(InstructionStream, InstructionStream);
161 | declare_allocobject!(List, List);
162 | declare_allocobject!(NumberObject, NumberObject);
163 | declare_allocobject!(Pair, Pair);
164 | declare_allocobject!(Partial, Partial);
165 | declare_allocobject!(Symbol, Symbol);
166 | declare_allocobject!(Text, Text);
167 | declare_allocobject!(Thread, Thread);
168 | declare_allocobject!(Upvalue, Upvalue);
169 | 


--------------------------------------------------------------------------------
/interpreter/src/lexer.rs:
--------------------------------------------------------------------------------
  1 | /// S-Expression lexer implementation.
  2 | ///
  3 | /// This isn't using any look-ahead yet and so always interprets
  4 | /// (.symbol) as ( DOT SYMBOL )
  5 | use crate::error::{err_lexer, spos, RuntimeError, SourcePos};
  6 | 
  7 | // key characters
  8 | const OPEN_PAREN: char = '(';
  9 | const CLOSE_PAREN: char = ')';
 10 | const SPACE: char = ' ';
 11 | const TAB: char = '\t';
 12 | const CR: char = '\r';
 13 | const LF: char = '\n';
 14 | const DOT: char = '.';
 15 | const DOUBLE_QUOTE: char = '"';
 16 | const SINGLE_QUOTE: char = '\'';
 17 | 
 18 | // ANCHOR: DefTokenType
 19 | #[derive(Debug, PartialEq)]
 20 | pub enum TokenType {
 21 |     OpenParen,
 22 |     CloseParen,
 23 |     Symbol(String),
 24 |     Dot,
 25 |     Text(String),
 26 |     Quote,
 27 | }
 28 | // ANCHOR_END: DefTokenType
 29 | 
 30 | // ANCHOR: DefToken
 31 | #[derive(Debug, PartialEq)]
 32 | pub struct Token {
 33 |     pub pos: SourcePos,
 34 |     pub token: TokenType,
 35 | }
 36 | // ANCHOR_END: DefToken
 37 | 
 38 | impl Token {
 39 |     fn new(pos: SourcePos, token: TokenType) -> Token {
 40 |         Token { pos, token }
 41 |     }
 42 | }
 43 | 
 44 | // tokenize a String
 45 | pub fn tokenize(input: &str) -> Result<Vec<Token>, RuntimeError> {
 46 |     use self::TokenType::*;
 47 | 
 48 |     // characters that terminate a symbol
 49 |     let terminating = [OPEN_PAREN, CLOSE_PAREN, SPACE, TAB, CR, LF, DOUBLE_QUOTE];
 50 |     let is_terminating = |c: char| terminating.iter().any(|t| c == *t);
 51 | 
 52 |     // return value
 53 |     let mut tokens = Vec::new();
 54 | 
 55 |     // start line numbering at 1, the first character of each line being number 0
 56 |     let mut lineno = 1;
 57 |     let mut charno = 0;
 58 | 
 59 |     let mut chars = input.chars();
 60 |     let mut current = chars.next();
 61 | 
 62 |     loop {
 63 |         match current {
 64 |             Some(TAB) => {
 65 |                 return Err(err_lexer(
 66 |                     spos(lineno, charno),
 67 |                     "tabs are not valid whitespace",
 68 |                 ));
 69 |             }
 70 | 
 71 |             Some(SPACE) => current = chars.next(),
 72 | 
 73 |             Some(CR) => {
 74 |                 current = chars.next();
 75 | 
 76 |                 // consume \n if it follows \r
 77 |                 if let Some(LF) = current {
 78 |                     current = chars.next();
 79 |                 }
 80 | 
 81 |                 lineno += 1;
 82 |                 charno = 0;
 83 |                 continue;
 84 |             }
 85 | 
 86 |             Some(LF) => {
 87 |                 current = chars.next();
 88 |                 lineno += 1;
 89 |                 charno = 0;
 90 |                 continue;
 91 |             }
 92 | 
 93 |             // this is not correct because it doesn't allow for a . to begin a number
 94 |             // or a symbol. Will have to fix later.
 95 |             Some(DOT) => {
 96 |                 tokens.push(Token::new(spos(lineno, charno), Dot));
 97 |                 current = chars.next();
 98 |             }
 99 | 
100 |             Some(OPEN_PAREN) => {
101 |                 tokens.push(Token::new(spos(lineno, charno), OpenParen));
102 |                 current = chars.next();
103 |             }
104 | 
105 |             Some(CLOSE_PAREN) => {
106 |                 tokens.push(Token::new(spos(lineno, charno), CloseParen));
107 |                 current = chars.next();
108 |             }
109 | 
110 |             Some(DOUBLE_QUOTE) => {
111 |                 let text_begin = charno;
112 | 
113 |                 let mut text = String::from("");
114 | 
115 |                 loop {
116 |                     current = chars.next();
117 |                     if let Some(c) = current {
118 |                         if c == DOUBLE_QUOTE {
119 |                             current = chars.next();
120 |                             charno += 1;
121 |                             break;
122 |                         } else {
123 |                             text.push(c);
124 |                             charno += 1;
125 |                         }
126 |                     } else {
127 |                         return Err(err_lexer(spos(lineno, charno), "Unterminated string"));
128 |                     }
129 |                 }
130 | 
131 |                 tokens.push(Token::new(spos(lineno, text_begin), Text(text)))
132 |             }
133 | 
134 |             Some(SINGLE_QUOTE) => {
135 |                 tokens.push(Token::new(spos(lineno, charno), Quote));
136 |                 current = chars.next();
137 |             }
138 | 
139 |             Some(non_terminating) => {
140 |                 let symbol_begin = charno;
141 | 
142 |                 let mut symbol = String::from("");
143 |                 symbol.push(non_terminating);
144 | 
145 |                 // consume symbol
146 |                 loop {
147 |                     current = chars.next();
148 |                     if let Some(c) = current {
149 |                         if is_terminating(c) {
150 |                             break;
151 |                         } else {
152 |                             symbol.push(c);
153 |                             charno += 1;
154 |                         }
155 |                     } else {
156 |                         break;
157 |                     }
158 |                 }
159 | 
160 |                 // complete symbol
161 |                 tokens.push(Token::new(spos(lineno, symbol_begin), Symbol(symbol)));
162 |             }
163 | 
164 |             // EOL
165 |             None => break,
166 |         }
167 | 
168 |         charno += 1;
169 |     }
170 | 
171 |     Ok(tokens)
172 | }
173 | 
174 | #[cfg(test)]
175 | mod test {
176 |     use super::*;
177 | 
178 |     #[test]
179 |     fn lexer_empty_string() {
180 |         if let Ok(tokens) = tokenize("") {
181 |             assert!(tokens.len() == 0);
182 |         } else {
183 |             assert!(false, "unexpected error");
184 |         }
185 |     }
186 | 
187 |     #[test]
188 |     fn lexer_one_line() {
189 |         if let Ok(tokens) = tokenize("(foo bar baz)") {
190 |             assert!(tokens.len() == 5);
191 |             assert_eq!(tokens[0], Token::new(spos(1, 0), TokenType::OpenParen));
192 |             assert_eq!(
193 |                 tokens[1],
194 |                 Token::new(spos(1, 1), TokenType::Symbol(String::from("foo")))
195 |             );
196 |             assert_eq!(
197 |                 tokens[2],
198 |                 Token::new(spos(1, 5), TokenType::Symbol(String::from("bar")))
199 |             );
200 |             assert_eq!(
201 |                 tokens[3],
202 |                 Token::new(spos(1, 9), TokenType::Symbol(String::from("baz")))
203 |             );
204 |             assert_eq!(tokens[4], Token::new(spos(1, 12), TokenType::CloseParen));
205 |         } else {
206 |             assert!(false, "unexpected error");
207 |         }
208 |     }
209 | 
210 |     #[test]
211 |     fn lexer_multi_line() {
212 |         if let Ok(tokens) = tokenize("( foo\nbar\nbaz\n)") {
213 |             assert!(tokens.len() == 5);
214 |             assert_eq!(tokens[0], Token::new(spos(1, 0), TokenType::OpenParen));
215 |             assert_eq!(
216 |                 tokens[1],
217 |                 Token::new(spos(1, 2), TokenType::Symbol(String::from("foo")))
218 |             );
219 |             assert_eq!(
220 |                 tokens[2],
221 |                 Token::new(spos(2, 0), TokenType::Symbol(String::from("bar")))
222 |             );
223 |             assert_eq!(
224 |                 tokens[3],
225 |                 Token::new(spos(3, 0), TokenType::Symbol(String::from("baz")))
226 |             );
227 |             assert_eq!(tokens[4], Token::new(spos(4, 0), TokenType::CloseParen));
228 |         } else {
229 |             assert!(false, "unexpected error");
230 |         }
231 |     }
232 | 
233 |     #[test]
234 |     fn lexer_bad_whitespace() {
235 |         if let Err(e) = tokenize("(foo\n\t(bar))") {
236 |             if let Some(SourcePos { line, column }) = e.error_pos() {
237 |                 assert_eq!(line, 2);
238 |                 assert_eq!(column, 0);
239 |             } else {
240 |                 assert!(false, "Expected error position");
241 |             }
242 |         } else {
243 |             assert!(false, "expected ParseEvalError for tab character");
244 |         }
245 |     }
246 | 
247 |     #[test]
248 |     fn lexer_text() {
249 |         if let Ok(_tokens) = tokenize("(foo \"text\" bar)") {
250 |             // TODO
251 |         } else {
252 |             assert!(false, "unexpected error")
253 |         }
254 |     }
255 | }
256 | 


--------------------------------------------------------------------------------
/interpreter/src/list.rs:
--------------------------------------------------------------------------------
1 | /// List is an Array type that can contain any other object
2 | use crate::array::Array;
3 | use crate::safeptr::TaggedCellPtr;
4 | 
5 | /// A List can contain a mixed sequence of any type of value
6 | pub type List = Array<TaggedCellPtr>;
7 | 


--------------------------------------------------------------------------------
/interpreter/src/main.rs:
--------------------------------------------------------------------------------
  1 | extern crate blockalloc;
  2 | extern crate clap;
  3 | extern crate dirs;
  4 | extern crate fnv;
  5 | extern crate itertools;
  6 | extern crate rustyline;
  7 | extern crate stickyimmix;
  8 | 
  9 | use std::fs::File;
 10 | use std::io;
 11 | use std::io::prelude::*;
 12 | use std::process;
 13 | 
 14 | use clap::{App, Arg};
 15 | 
 16 | use rustyline::error::ReadlineError;
 17 | use rustyline::Editor;
 18 | 
 19 | mod arena;
 20 | mod array;
 21 | mod bytecode;
 22 | mod compiler;
 23 | mod containers;
 24 | mod dict;
 25 | mod error;
 26 | mod function;
 27 | mod hashable;
 28 | mod headers;
 29 | mod lexer;
 30 | mod list;
 31 | mod memory;
 32 | mod number;
 33 | mod pair;
 34 | mod parser;
 35 | mod pointerops;
 36 | mod printer;
 37 | mod rawarray;
 38 | mod repl;
 39 | mod safeptr;
 40 | mod symbol;
 41 | mod symbolmap;
 42 | mod taggedptr;
 43 | mod text;
 44 | mod vm;
 45 | 
 46 | use crate::error::RuntimeError;
 47 | use crate::memory::Memory;
 48 | use crate::repl::RepMaker;
 49 | 
 50 | /// Read a file into a String
 51 | fn load_file(filename: &str) -> Result<String, io::Error> {
 52 |     let mut contents = String::new();
 53 | 
 54 |     File::open(filename)?.read_to_string(&mut contents)?;
 55 | 
 56 |     Ok(contents)
 57 | }
 58 | 
 59 | /// Read and evaluate an entire file
 60 | fn read_file(filename: &str) -> Result<(), RuntimeError> {
 61 |     let _contents = load_file(&filename)?;
 62 | 
 63 |     // TODO
 64 | 
 65 |     Ok(())
 66 | }
 67 | 
 68 | /// Read a line at a time, printing the input back out
 69 | fn read_print_loop() -> Result<(), RuntimeError> {
 70 |     // establish a repl input history file path
 71 |     let history_file = match dirs::home_dir() {
 72 |         Some(mut path) => {
 73 |             path.push(".evalrus_history");
 74 |             Some(String::from(path.to_str().unwrap()))
 75 |         }
 76 |         None => None,
 77 |     };
 78 | 
 79 |     // () means no completion support (TODO)
 80 |     // Another TODO - find a more suitable alternative to rustyline
 81 |     let mut reader = Editor::<()>::new();
 82 | 
 83 |     // Try to load the repl history file
 84 |     if let Some(ref path) = history_file {
 85 |         if let Err(err) = reader.load_history(&path) {
 86 |             eprintln!("Could not read history: {}", err);
 87 |         }
 88 |     }
 89 | 
 90 |     let mem = Memory::new();
 91 |     let rep_maker = RepMaker {};
 92 |     let rep = mem.mutate(&rep_maker, ())?;
 93 | 
 94 |     // repl
 95 |     loop {
 96 |         let readline = reader.readline("> ");
 97 | 
 98 |         match readline {
 99 |             // valid input
100 |             Ok(line) => {
101 |                 reader.add_history_entry(&line);
102 |                 mem.mutate(&rep, line)?;
103 |             }
104 | 
105 |             // some kind of program termination condition
106 |             Err(e) => {
107 |                 if let Some(ref path) = history_file {
108 |                     reader.save_history(&path).unwrap_or_else(|err| {
109 |                         eprintln!("could not save input history in {}: {}", path, err);
110 |                     });
111 |                 }
112 | 
113 |                 // EOF is fine
114 |                 if let ReadlineError::Eof = e {
115 |                     return Ok(());
116 |                 } else {
117 |                     return Err(RuntimeError::from(e));
118 |                 }
119 |             }
120 |         }
121 |     }
122 | }
123 | 
124 | fn main() {
125 |     // parse command line argument, an optional filename
126 |     let matches = App::new("Eval-R-Us")
127 |         .about("Evaluate expressions")
128 |         .arg(
129 |             Arg::with_name("filename")
130 |                 .help("Optional filename to read in")
131 |                 .index(1),
132 |         )
133 |         .get_matches();
134 | 
135 |     if let Some(filename) = matches.value_of("filename") {
136 |         // if a filename was specified, read it into a String
137 |         read_file(filename).unwrap_or_else(|err| {
138 |             eprintln!("Terminated: {}", err);
139 |             process::exit(1);
140 |         });
141 |     } else {
142 |         // otherwise begin a repl
143 |         read_print_loop().unwrap_or_else(|err| {
144 |             eprintln!("Terminated: {}", err);
145 |             process::exit(1);
146 |         });
147 |     }
148 | }
149 | 


--------------------------------------------------------------------------------
/interpreter/src/memory.rs:
--------------------------------------------------------------------------------
  1 | /// VM-level memory abstraction
  2 | ///
  3 | /// Defines Stack, Heap and Memory types, and a MemoryView type that gives a mutator a safe
  4 | /// view into the stack and heap.
  5 | use stickyimmix::{AllocObject, AllocRaw, ArraySize, RawPtr, StickyImmixHeap};
  6 | 
  7 | use crate::error::RuntimeError;
  8 | use crate::headers::{ObjectHeader, TypeList};
  9 | use crate::pointerops::ScopedRef;
 10 | use crate::safeptr::{MutatorScope, ScopedPtr, TaggedScopedPtr};
 11 | use crate::symbolmap::SymbolMap;
 12 | use crate::taggedptr::{FatPtr, TaggedPtr};
 13 | 
 14 | /// This type describes the mutator's view into memory - the heap and symbol name/ptr lookup.
 15 | ///
 16 | /// It implements `MutatorScope` such that any `TaggedScopedPtr` or `Value` instances must be lifetime-
 17 | /// limited to the lifetime of this instance using `&'scope dyn MutatorScope`;
 18 | // ANCHOR: DefMutatorView
 19 | pub struct MutatorView<'memory> {
 20 |     heap: &'memory Heap,
 21 | }
 22 | // ANCHOR_END: DefMutatorView
 23 | 
 24 | impl<'memory> MutatorView<'memory> {
 25 |     fn new(mem: &'memory Memory) -> MutatorView<'memory> {
 26 |         MutatorView { heap: &mem.heap }
 27 |     }
 28 | 
 29 |     /// Get a Symbol pointer from its name
 30 |     // ANCHOR: DefMutatorViewLookupSym
 31 |     pub fn lookup_sym(&self, name: &str) -> TaggedScopedPtr<'_> {
 32 |         TaggedScopedPtr::new(self, self.heap.lookup_sym(name))
 33 |     }
 34 |     // ANCHOR_END: DefMutatorViewLookupSym
 35 | 
 36 |     /// Write an object into the heap and return a scope-limited pointer to it
 37 |     // ANCHOR: DefMutatorViewAlloc
 38 |     pub fn alloc<T>(&self, object: T) -> Result<ScopedPtr<'_, T>, RuntimeError>
 39 |     where
 40 |         T: AllocObject<TypeList>,
 41 |     {
 42 |         Ok(ScopedPtr::new(
 43 |             self,
 44 |             self.heap.alloc(object)?.scoped_ref(self),
 45 |         ))
 46 |     }
 47 |     // ANCHOR_END: DefMutatorViewAlloc
 48 | 
 49 |     /// Write an object into the heap and return a scope-limited runtime-tagged pointer to it
 50 |     // ANCHOR: DefMutatorViewAllocTagged
 51 |     pub fn alloc_tagged<T>(&self, object: T) -> Result<TaggedScopedPtr<'_>, RuntimeError>
 52 |     where
 53 |         FatPtr: From<RawPtr<T>>,
 54 |         T: AllocObject<TypeList>,
 55 |     {
 56 |         Ok(TaggedScopedPtr::new(self, self.heap.alloc_tagged(object)?))
 57 |     }
 58 |     // ANCHOR_END: DefMutatorViewAllocTagged
 59 | 
 60 |     /// Make space for an array of bytes
 61 |     pub fn alloc_array(&self, capacity: ArraySize) -> Result<RawPtr<u8>, RuntimeError> {
 62 |         self.heap.alloc_array(capacity)
 63 |     }
 64 | 
 65 |     /// Return a nil-initialized runtime-tagged pointer
 66 |     pub fn nil(&self) -> TaggedScopedPtr<'_> {
 67 |         TaggedScopedPtr::new(self, TaggedPtr::nil())
 68 |     }
 69 | }
 70 | 
 71 | impl<'memory> MutatorScope for MutatorView<'memory> {}
 72 | 
 73 | /// The heap implementation
 74 | // ANCHOR: DefHeapStorage
 75 | pub type HeapStorage = StickyImmixHeap<ObjectHeader>;
 76 | // ANCHOR_END: DefHeapStorage
 77 | 
 78 | /// Heap memory types.
 79 | // ANCHOR: DefHeap
 80 | struct Heap {
 81 |     heap: HeapStorage,
 82 |     syms: SymbolMap,
 83 | }
 84 | // ANCHOR_END: DefHeap
 85 | 
 86 | impl Heap {
 87 |     fn new() -> Heap {
 88 |         Heap {
 89 |             heap: HeapStorage::new(),
 90 |             syms: SymbolMap::new(),
 91 |         }
 92 |     }
 93 | 
 94 |     /// Get a Symbol pointer from its name
 95 |     // ANCHOR: DefHeapLookupSym
 96 |     fn lookup_sym(&self, name: &str) -> TaggedPtr {
 97 |         TaggedPtr::symbol(self.syms.lookup(name))
 98 |     }
 99 |     // ANCHOR_END: DefHeapLookupSym
100 | 
101 |     /// Write an object to the heap and return the raw pointer to it
102 |     // ANCHOR: DefHeapAlloc
103 |     fn alloc<T>(&self, object: T) -> Result<RawPtr<T>, RuntimeError>
104 |     where
105 |         T: AllocObject<TypeList>,
106 |     {
107 |         Ok(self.heap.alloc(object)?)
108 |     }
109 |     // ANCHOR_END: DefHeapAlloc
110 | 
111 |     /// Write an object into the heap and return a tagged pointer to it
112 |     // ANCHOR: DefHeapAllocTagged
113 |     fn alloc_tagged<T>(&self, object: T) -> Result<TaggedPtr, RuntimeError>
114 |     where
115 |         FatPtr: From<RawPtr<T>>,
116 |         T: AllocObject<TypeList>,
117 |     {
118 |         Ok(TaggedPtr::from(FatPtr::from(self.heap.alloc(object)?)))
119 |     }
120 |     // ANCHOR_END: DefHeapAllocTagged
121 | 
122 |     fn alloc_array(&self, capacity: ArraySize) -> Result<RawPtr<u8>, RuntimeError> {
123 |         Ok(self.heap.alloc_array(capacity)?)
124 |     }
125 | }
126 | 
127 | /// Wraps a heap and provides scope-limited access to the heap
128 | // ANCHOR: DefMemory
129 | pub struct Memory {
130 |     heap: Heap,
131 | }
132 | // ANCHOR_END: DefMemory
133 | 
134 | impl Memory {
135 |     /// Instantiate a new memory environment
136 |     pub fn new() -> Memory {
137 |         Memory { heap: Heap::new() }
138 |     }
139 | 
140 |     /// Run a mutator process
141 |     // ANCHOR: DefMemoryMutate
142 |     pub fn mutate<M: Mutator>(&self, m: &M, input: M::Input) -> Result<M::Output, RuntimeError> {
143 |         let mut guard = MutatorView::new(self);
144 |         m.run(&mut guard, input)
145 |     }
146 |     // ANCHOR_END: DefMemoryMutate
147 | }
148 | 
149 | /// Defines the interface a heap-mutating type must use to be allowed access to the heap
150 | // ANCHOR: DefMutator
151 | pub trait Mutator: Sized {
152 |     type Input;
153 |     type Output;
154 | 
155 |     fn run(&self, mem: &MutatorView, input: Self::Input) -> Result<Self::Output, RuntimeError>;
156 | 
157 |     // TODO
158 |     // function to return iterator that iterates over roots
159 | }
160 | // ANCHOR_END: DefMutator
161 | 


--------------------------------------------------------------------------------
/interpreter/src/number.rs:
--------------------------------------------------------------------------------
 1 | /// An integer type - TODO
 2 | use std::fmt;
 3 | 
 4 | use crate::array::Array;
 5 | use crate::printer::Print;
 6 | use crate::safeptr::MutatorScope;
 7 | 
 8 | /// TODO A heap-allocated number
 9 | pub struct NumberObject {
10 |     _value: Array<u64>,
11 | }
12 | 
13 | impl Print for NumberObject {
14 |     fn print<'guard>(
15 |         &self,
16 |         _guard: &'guard dyn MutatorScope,
17 |         f: &mut fmt::Formatter,
18 |     ) -> fmt::Result {
19 |         // TODO
20 |         write!(f, "NumberObject(nan)")
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/interpreter/src/pair.rs:
--------------------------------------------------------------------------------
  1 | use std::cell::Cell;
  2 | use std::fmt;
  3 | 
  4 | use crate::error::{err_eval, RuntimeError, SourcePos};
  5 | use crate::memory::MutatorView;
  6 | use crate::printer::Print;
  7 | use crate::safeptr::{MutatorScope, ScopedPtr, TaggedCellPtr, TaggedScopedPtr};
  8 | use crate::taggedptr::Value;
  9 | 
 10 | /// A Pair of pointers, like a Cons cell of old
 11 | // ANCHOR: DefPair
 12 | #[derive(Clone)]
 13 | pub struct Pair {
 14 |     pub first: TaggedCellPtr,
 15 |     pub second: TaggedCellPtr,
 16 |     // Possible source code positions of the first and second values
 17 |     pub first_pos: Cell<Option<SourcePos>>,
 18 |     pub second_pos: Cell<Option<SourcePos>>,
 19 | }
 20 | // ANCHOR_END: DefPair
 21 | 
 22 | impl Pair {
 23 |     /// Return a new empty Pair instance
 24 |     // ANCHOR: DefPairNew
 25 |     pub fn new() -> Pair {
 26 |         Pair {
 27 |             first: TaggedCellPtr::new_nil(),
 28 |             second: TaggedCellPtr::new_nil(),
 29 |             first_pos: Cell::new(None),
 30 |             second_pos: Cell::new(None),
 31 |         }
 32 |     }
 33 |     // ANCHOR_END: DefPairNew
 34 | 
 35 |     /// Set Pair.second to a new Pair with newPair.first set to the value
 36 |     // ANCHOR: DefPairAppend
 37 |     pub fn append<'guard>(
 38 |         &self,
 39 |         mem: &'guard MutatorView,
 40 |         value: TaggedScopedPtr<'guard>,
 41 |     ) -> Result<TaggedScopedPtr<'guard>, RuntimeError> {
 42 |         let pair = Pair::new();
 43 |         pair.first.set(value);
 44 | 
 45 |         let pair = mem.alloc_tagged(pair)?;
 46 |         self.second.set(pair);
 47 | 
 48 |         Ok(pair)
 49 |     }
 50 |     // ANCHOR_END: DefPairAppend
 51 | 
 52 |     /// Set Pair.second to the given value
 53 |     // ANCHOR: DefPairDot
 54 |     pub fn dot<'guard>(&self, value: TaggedScopedPtr<'guard>) {
 55 |         self.second.set(value);
 56 |     }
 57 |     // ANCHOR_END: DefPairDot
 58 | 
 59 |     pub fn set_first_source_code_pos(&self, pos: SourcePos) {
 60 |         self.first_pos.set(Some(pos));
 61 |     }
 62 | 
 63 |     pub fn set_second_source_code_pos(&self, pos: SourcePos) {
 64 |         self.second_pos.set(Some(pos));
 65 |     }
 66 | }
 67 | 
 68 | impl Print for Pair {
 69 |     fn print<'guard>(
 70 |         &self,
 71 |         guard: &'guard dyn MutatorScope,
 72 |         f: &mut fmt::Formatter,
 73 |     ) -> fmt::Result {
 74 |         let mut tail = ScopedPtr::new(guard, self);
 75 | 
 76 |         write!(f, "({}", tail.first.get(guard))?;
 77 | 
 78 |         while let Value::Pair(next) = *tail.second.get(guard) {
 79 |             tail = next;
 80 |             write!(f, " {}", tail.first.get(guard))?;
 81 |         }
 82 | 
 83 |         // clunky way to print anything but nil
 84 |         let second = *tail.second.get(guard);
 85 |         match second {
 86 |             Value::Nil => (),
 87 |             _ => write!(f, " . {}", second)?,
 88 |         }
 89 | 
 90 |         write!(f, ")")
 91 |     }
 92 | 
 93 |     // In debug print, use dot notation
 94 |     fn debug<'guard>(
 95 |         &self,
 96 |         guard: &'guard dyn MutatorScope,
 97 |         f: &mut fmt::Formatter,
 98 |     ) -> fmt::Result {
 99 |         write!(
100 |             f,
101 |             "({:?} . {:?})",
102 |             self.first.get(guard),
103 |             self.second.get(guard)
104 |         )
105 |     }
106 | }
107 | 
108 | /// Link the two values `head` and `rest` into a Pair instance
109 | // ANCHOR: DefCons
110 | pub fn cons<'guard>(
111 |     mem: &'guard MutatorView,
112 |     head: TaggedScopedPtr<'guard>,
113 |     rest: TaggedScopedPtr<'guard>,
114 | ) -> Result<TaggedScopedPtr<'guard>, RuntimeError> {
115 |     let pair = Pair::new();
116 |     pair.first.set(head);
117 |     pair.second.set(rest);
118 |     mem.alloc_tagged(pair)
119 | }
120 | // ANCHOR_END: DefCons
121 | 
122 | /// Unpack a list of Pair instances into a Vec
123 | pub fn vec_from_pairs<'guard>(
124 |     guard: &'guard dyn MutatorScope,
125 |     pair_list: TaggedScopedPtr<'guard>,
126 | ) -> Result<Vec<TaggedScopedPtr<'guard>>, RuntimeError> {
127 |     match *pair_list {
128 |         Value::Pair(pair) => {
129 |             let mut result = Vec::new();
130 | 
131 |             result.push(pair.first.get(guard));
132 | 
133 |             let mut next = pair.second.get(guard);
134 |             while let Value::Pair(next_pair) = *next {
135 |                 result.push(next_pair.first.get(guard));
136 |                 next = next_pair.second.get(guard);
137 |             }
138 | 
139 |             // we've terminated the list, but correctly?
140 |             match *next {
141 |                 Value::Nil => Ok(result),
142 |                 _ => Err(err_eval("Incorrectly terminated Pair list")),
143 |             }
144 |         }
145 |         Value::Nil => Ok(Vec::new()),
146 |         _ => Err(err_eval("Expected a Pair")),
147 |     }
148 | }
149 | 
150 | /// Unpack a list of Pair instances into a Vec, expecting n values
151 | pub fn vec_from_n_pairs<'guard>(
152 |     guard: &'guard dyn MutatorScope,
153 |     pair_list: TaggedScopedPtr<'guard>,
154 |     expect_length: usize,
155 | ) -> Result<Vec<TaggedScopedPtr<'guard>>, RuntimeError> {
156 |     let result = vec_from_pairs(guard, pair_list)?;
157 | 
158 |     if result.len() != expect_length {
159 |         return Err(err_eval(&format!(
160 |             "Pair list has {} items, expected {}",
161 |             result.len(),
162 |             expect_length
163 |         )));
164 |     }
165 | 
166 |     Ok(result)
167 | }
168 | 
169 | /// Convenience function for unpacking a list of Pair instances into one value
170 | pub fn value_from_1_pair<'guard>(
171 |     guard: &'guard dyn MutatorScope,
172 |     pair_list: TaggedScopedPtr<'guard>,
173 | ) -> Result<TaggedScopedPtr<'guard>, RuntimeError> {
174 |     let result = vec_from_pairs(guard, pair_list)?;
175 | 
176 |     match result.as_slice() {
177 |         [first] => Ok(*first),
178 |         _ => Err(err_eval(&format!(
179 |             "Pair list has {} items, expected 1",
180 |             result.len()
181 |         ))),
182 |     }
183 | }
184 | 
185 | /// Convenience function for unpacking a list of Pair instances into two values
186 | pub fn values_from_2_pairs<'guard>(
187 |     guard: &'guard dyn MutatorScope,
188 |     pair_list: TaggedScopedPtr<'guard>,
189 | ) -> Result<(TaggedScopedPtr<'guard>, TaggedScopedPtr<'guard>), RuntimeError> {
190 |     let result = vec_from_pairs(guard, pair_list)?;
191 | 
192 |     match result.as_slice() {
193 |         [first, second] => Ok((*first, *second)),
194 |         _ => Err(err_eval(&format!(
195 |             "Pair list has {} items, expected 2",
196 |             result.len()
197 |         ))),
198 |     }
199 | }
200 | 
201 | /// Convenience function for unpacking a list of Pair instances into three values
202 | pub fn values_from_3_pairs<'guard>(
203 |     guard: &'guard dyn MutatorScope,
204 |     pair_list: TaggedScopedPtr<'guard>,
205 | ) -> Result<
206 |     (
207 |         TaggedScopedPtr<'guard>,
208 |         TaggedScopedPtr<'guard>,
209 |         TaggedScopedPtr<'guard>,
210 |     ),
211 |     RuntimeError,
212 | > {
213 |     let result = vec_from_pairs(guard, pair_list)?;
214 | 
215 |     match result.as_slice() {
216 |         [first, second, third] => Ok((*first, *second, *third)),
217 |         _ => Err(err_eval(&format!(
218 |             "Pair list has {} items, expected 3",
219 |             result.len()
220 |         ))),
221 |     }
222 | }
223 | 
224 | #[cfg(test)]
225 | mod test {
226 |     use super::*;
227 |     use crate::error::RuntimeError;
228 |     use crate::memory::{Memory, Mutator, MutatorView};
229 | 
230 |     fn test_helper(test_fn: fn(&MutatorView) -> Result<(), RuntimeError>) {
231 |         let mem = Memory::new();
232 | 
233 |         struct Test {}
234 |         impl Mutator for Test {
235 |             type Input = fn(&MutatorView) -> Result<(), RuntimeError>;
236 |             type Output = ();
237 | 
238 |             fn run(
239 |                 &self,
240 |                 mem: &MutatorView,
241 |                 test_fn: Self::Input,
242 |             ) -> Result<Self::Output, RuntimeError> {
243 |                 test_fn(mem)
244 |             }
245 |         }
246 | 
247 |         let test = Test {};
248 |         mem.mutate(&test, test_fn).unwrap();
249 |     }
250 | 
251 |     #[test]
252 |     fn unpack_pair_list_bad() {
253 |         fn test_inner(mem: &MutatorView) -> Result<(), RuntimeError> {
254 |             // this is not a Pair, it's an error to convert it to a Vec
255 |             let thing = mem.lookup_sym("nothing");
256 | 
257 |             let result = vec_from_pairs(mem, thing);
258 | 
259 |             assert!(result.is_err());
260 | 
261 |             Ok(())
262 |         }
263 | 
264 |         test_helper(test_inner)
265 |     }
266 | 
267 |     #[test]
268 |     fn unpack_pair_list_n_values() {
269 |         fn test_inner(mem: &MutatorView) -> Result<(), RuntimeError> {
270 |             let mut head = cons(mem, mem.lookup_sym("alice"), mem.nil())?;
271 |             head = cons(mem, mem.lookup_sym("bob"), head)?;
272 |             head = cons(mem, mem.lookup_sym("carlos"), head)?;
273 |             head = cons(mem, mem.lookup_sym("dave"), head)?;
274 |             head = cons(mem, mem.lookup_sym("eve"), head)?;
275 | 
276 |             let result = vec_from_pairs(mem, head);
277 | 
278 |             assert!(result.is_ok());
279 | 
280 |             let inside = result.unwrap();
281 |             assert!(
282 |                 inside
283 |                     == vec![
284 |                         mem.lookup_sym("eve"),
285 |                         mem.lookup_sym("dave"),
286 |                         mem.lookup_sym("carlos"),
287 |                         mem.lookup_sym("bob"),
288 |                         mem.lookup_sym("alice")
289 |                     ]
290 |             );
291 | 
292 |             Ok(())
293 |         }
294 | 
295 |         test_helper(test_inner)
296 |     }
297 | 
298 |     #[test]
299 |     fn unpack_pair_list_bad_terminator() {
300 |         fn test_inner(mem: &MutatorView) -> Result<(), RuntimeError> {
301 |             let mut head = cons(
302 |                 mem,
303 |                 mem.lookup_sym("alice"),
304 |                 mem.lookup_sym("non-terminator"),
305 |             )?;
306 |             head = cons(mem, mem.lookup_sym("bob"), head)?;
307 |             head = cons(mem, mem.lookup_sym("carlos"), head)?;
308 |             head = cons(mem, mem.lookup_sym("dave"), head)?;
309 |             head = cons(mem, mem.lookup_sym("eve"), head)?;
310 | 
311 |             let result = vec_from_pairs(mem, head);
312 | 
313 |             assert!(result.is_err());
314 | 
315 |             Ok(())
316 |         }
317 | 
318 |         test_helper(test_inner)
319 |     }
320 | 
321 |     #[test]
322 |     fn unpack_pair_list_n_values_expected() {
323 |         fn test_inner(mem: &MutatorView) -> Result<(), RuntimeError> {
324 |             let mut head = cons(mem, mem.lookup_sym("alice"), mem.nil())?;
325 |             head = cons(mem, mem.lookup_sym("bob"), head)?;
326 |             head = cons(mem, mem.lookup_sym("carlos"), head)?;
327 |             head = cons(mem, mem.lookup_sym("dave"), head)?;
328 |             head = cons(mem, mem.lookup_sym("eve"), head)?;
329 | 
330 |             let result = vec_from_n_pairs(mem, head, 5);
331 |             assert!(result.is_ok());
332 | 
333 |             let result = vec_from_n_pairs(mem, head, 3);
334 |             assert!(result.is_err());
335 | 
336 |             let result = vec_from_n_pairs(mem, head, 6);
337 |             assert!(result.is_err());
338 | 
339 |             Ok(())
340 |         }
341 | 
342 |         test_helper(test_inner)
343 |     }
344 | }
345 | 


--------------------------------------------------------------------------------
/interpreter/src/pointerops.rs:
--------------------------------------------------------------------------------
 1 | /// Miscelaneous pointer operations
 2 | use std::ptr::NonNull;
 3 | 
 4 | use stickyimmix::RawPtr;
 5 | 
 6 | use crate::safeptr::MutatorScope;
 7 | 
 8 | /// For conversion of a reference to a NonNull<T>
 9 | pub trait AsNonNull {
10 |     fn non_null_ptr(&self) -> NonNull<Self> {
11 |         unsafe { NonNull::new_unchecked(self as *const Self as *mut Self) }
12 |     }
13 | }
14 | 
15 | // Pointer tag values and masks using the lowest 2 bits
16 | // ANCHOR: TaggedPtrTags
17 | const TAG_MASK: usize = 0x3;
18 | pub const TAG_SYMBOL: usize = 0x0;
19 | pub const TAG_PAIR: usize = 0x1;
20 | pub const TAG_OBJECT: usize = 0x2;
21 | pub const TAG_NUMBER: usize = 0x3;
22 | const PTR_MASK: usize = !0x3;
23 | // ANCHOR_END: TaggedPtrTags
24 | 
25 | /// Return the tag from the given word
26 | pub fn get_tag(tagged_word: usize) -> usize {
27 |     tagged_word & TAG_MASK
28 | }
29 | 
30 | /// Pointer tagging operations on RawPtr<T>
31 | // ANCHOR: DefTagged
32 | pub trait Tagged<T> {
33 |     fn tag(self, tag: usize) -> NonNull<T>;
34 |     fn untag(from: NonNull<T>) -> RawPtr<T>;
35 | }
36 | 
37 | impl<T> Tagged<T> for RawPtr<T> {
38 |     fn tag(self, tag: usize) -> NonNull<T> {
39 |         unsafe { NonNull::new_unchecked((self.as_word() | tag) as *mut T) }
40 |     }
41 | 
42 |     fn untag(from: NonNull<T>) -> RawPtr<T> {
43 |         RawPtr::new((from.as_ptr() as usize & PTR_MASK) as *const T)
44 |     }
45 | }
46 | // ANCHOR_END: DefTagged
47 | 
48 | /// For accessing a pointer target, given a lifetime
49 | // ANCHOR: DefScopedRef
50 | pub trait ScopedRef<T> {
51 |     fn scoped_ref<'scope>(&self, guard: &'scope dyn MutatorScope) -> &'scope T;
52 | }
53 | 
54 | impl<T> ScopedRef<T> for RawPtr<T> {
55 |     fn scoped_ref<'scope>(&self, _guard: &'scope dyn MutatorScope) -> &'scope T {
56 |         unsafe { &*self.as_ptr() }
57 |     }
58 | }
59 | // ANCHOR_END: DefScopedRef
60 | 


--------------------------------------------------------------------------------
/interpreter/src/printer.rs:
--------------------------------------------------------------------------------
 1 | use std::fmt;
 2 | //use std::io;
 3 | 
 4 | use crate::safeptr::MutatorScope;
 5 | use crate::taggedptr::Value;
 6 | 
 7 | /// Trait for using a `Value` lifted pointer in the `Display` trait
 8 | pub trait Print {
 9 |     fn print<'guard>(
10 |         &self,
11 |         _guard: &'guard dyn MutatorScope,
12 |         f: &mut fmt::Formatter,
13 |     ) -> fmt::Result;
14 | 
15 |     fn debug<'guard>(
16 |         &self,
17 |         _guard: &'guard dyn MutatorScope,
18 |         f: &mut fmt::Formatter,
19 |     ) -> fmt::Result {
20 |         self.print(_guard, f)
21 |     }
22 | 
23 |     //fn repr<'guard, F: fmt::Write>(&self, _guard: &'guard dyn MutatorScope, f: &mut F) -> fmt::Result;
24 | 
25 |     //fn output<'guard, F: io::Write>(
26 |     //    &self,
27 |     //    _guard: &'guard dyn MutatorScope,
28 |     //    f: &mut F,
29 |     //) -> io::Result<()>;
30 | }
31 | 
32 | pub fn print(value: Value) -> String {
33 |     format!("{}", value)
34 | }
35 | 
36 | pub fn debug(value: Value) -> String {
37 |     format!("{:?}", value)
38 | }
39 | 


--------------------------------------------------------------------------------
/interpreter/src/rawarray.rs:
--------------------------------------------------------------------------------
  1 | use std::mem::size_of;
  2 | use std::ptr::NonNull;
  3 | use std::slice::from_raw_parts_mut;
  4 | 
  5 | pub use stickyimmix::ArraySize;
  6 | 
  7 | use crate::error::{ErrorKind, RuntimeError};
  8 | use crate::memory::MutatorView;
  9 | 
 10 | /// Arrays start out at this size by default
 11 | pub const DEFAULT_ARRAY_SIZE: ArraySize = 8;
 12 | 
 13 | /// Arrays grow at this rate by default
 14 | pub fn default_array_growth(capacity: ArraySize) -> Result<ArraySize, RuntimeError> {
 15 |     if capacity == 0 {
 16 |         Ok(DEFAULT_ARRAY_SIZE)
 17 |     } else {
 18 |         capacity
 19 |             .checked_add(capacity / 2)
 20 |             .ok_or(RuntimeError::new(ErrorKind::BadAllocationRequest))
 21 |     }
 22 | }
 23 | 
 24 | /// Fundamental array type on which other variable-length types are built.
 25 | /// Analagous to RawVec.
 26 | // ANCHOR: DefRawArray
 27 | pub struct RawArray<T: Sized> {
 28 |     /// Count of T-sized objects that can fit in the array
 29 |     capacity: ArraySize,
 30 |     ptr: Option<NonNull<T>>,
 31 | }
 32 | // ANCHOR_END: DefRawArray
 33 | 
 34 | /// Since this base array type needs to be used in an interior-mutable way by the containers
 35 | /// built on top of it, the Copy+Clone traits need to be implemented for it so that it can
 36 | /// be used in a Cell
 37 | impl<T: Sized> Clone for RawArray<T> {
 38 |     fn clone(&self) -> Self {
 39 |         RawArray {
 40 |             capacity: self.capacity,
 41 |             ptr: self.ptr,
 42 |         }
 43 |     }
 44 | }
 45 | 
 46 | impl<T: Sized> Copy for RawArray<T> {}
 47 | 
 48 | impl<T: Sized> RawArray<T> {
 49 |     /// Return a RawArray of capacity 0 with no array bytes allocated
 50 |     pub fn new() -> RawArray<T> {
 51 |         RawArray {
 52 |             capacity: 0,
 53 |             ptr: None,
 54 |         }
 55 |     }
 56 | 
 57 |     /// Return a RawArray of the given capacity number of bytes allocated
 58 |     // ANCHOR: DefRawArrayWithCapacity
 59 |     pub fn with_capacity<'scope>(
 60 |         mem: &'scope MutatorView,
 61 |         capacity: u32,
 62 |     ) -> Result<RawArray<T>, RuntimeError> {
 63 |         // convert to bytes, checking for possible overflow of ArraySize limit
 64 |         let capacity_bytes = capacity
 65 |             .checked_mul(size_of::<T>() as ArraySize)
 66 |             .ok_or(RuntimeError::new(ErrorKind::BadAllocationRequest))?;
 67 | 
 68 |         Ok(RawArray {
 69 |             capacity,
 70 |             ptr: NonNull::new(mem.alloc_array(capacity_bytes)?.as_ptr() as *mut T),
 71 |         })
 72 |     }
 73 |     // ANCHOR_END: DefRawArrayWithCapacity
 74 | 
 75 |     /// Resize the array to the new capacity
 76 |     /// TODO the inner implementation of this should live in the allocator API to make
 77 |     /// better use of optimizations
 78 |     pub fn resize<'scope>(
 79 |         &mut self,
 80 |         mem: &'scope MutatorView,
 81 |         new_capacity: u32,
 82 |     ) -> Result<(), RuntimeError> {
 83 |         // If we're reducing the capacity to 0, simply detach the array pointer
 84 |         if new_capacity == 0 {
 85 |             self.capacity = 0;
 86 |             self.ptr = None;
 87 |             return Ok(());
 88 |         }
 89 | 
 90 |         match self.ptr {
 91 |             // If we have capacity, create new capacity and copy over all bytes from the old
 92 |             // to the new array
 93 |             Some(old_ptr) => {
 94 |                 // Convert existing capacity to bytes
 95 |                 let old_capacity_bytes = size_of::<T>() as ArraySize * self.capacity;
 96 |                 let old_ptr = old_ptr.as_ptr();
 97 | 
 98 |                 // Convert new capacity to bytes but check that the number of bytes isn't
 99 |                 // outside of ArraySize range
100 |                 let new_capacity_bytes = new_capacity
101 |                     .checked_mul(size_of::<T>() as ArraySize)
102 |                     .ok_or(RuntimeError::new(ErrorKind::BadAllocationRequest))?;
103 | 
104 |                 let new_ptr = mem.alloc_array(new_capacity_bytes)?.as_ptr() as *mut T;
105 | 
106 |                 // create a pair of slices from the raw pointers and byte sizes
107 |                 let (old_slice, new_slice) = unsafe {
108 |                     (
109 |                         from_raw_parts_mut(old_ptr as *mut u8, old_capacity_bytes as usize),
110 |                         from_raw_parts_mut(new_ptr as *mut u8, new_capacity_bytes as usize),
111 |                     )
112 |                 };
113 | 
114 |                 // Copy content from old to new array
115 |                 for (src, dest) in old_slice.iter().zip(new_slice) {
116 |                     *dest = *src;
117 |                 }
118 | 
119 |                 self.ptr = NonNull::new(new_ptr);
120 |                 self.capacity = new_capacity;
121 | 
122 |                 Ok(())
123 |             }
124 | 
125 |             // If we have no capacity, create new blank capacity
126 |             None => {
127 |                 *self = Self::with_capacity(mem, new_capacity)?;
128 |                 Ok(())
129 |             }
130 |         }
131 |     }
132 | 
133 |     /// Return the capacity of the array in the count of objects it can hold
134 |     // ANCHOR: DefRawArrayCapacity
135 |     pub fn capacity(&self) -> ArraySize {
136 |         self.capacity
137 |     }
138 |     // ANCHOR_END: DefRawArrayCapacity
139 | 
140 |     /// Return a pointer to the array
141 |     // ANCHOR: DefRawArrayAsPtr
142 |     pub fn as_ptr(&self) -> Option<*const T> {
143 |         match self.ptr {
144 |             Some(ptr) => Some(ptr.as_ptr()),
145 |             None => None,
146 |         }
147 |     }
148 |     // ANCHOR_END: DefRawArrayAsPtr
149 | }
150 | 


--------------------------------------------------------------------------------
/interpreter/src/repl.rs:
--------------------------------------------------------------------------------
 1 | use crate::compiler::compile;
 2 | use crate::error::{ErrorKind, RuntimeError};
 3 | use crate::memory::{Mutator, MutatorView};
 4 | use crate::parser::parse;
 5 | use crate::safeptr::{CellPtr, TaggedScopedPtr};
 6 | use crate::vm::Thread;
 7 | 
 8 | /// A mutator that returns a Repl instance
 9 | pub struct RepMaker {}
10 | 
11 | impl Mutator for RepMaker {
12 |     type Input = ();
13 |     type Output = ReadEvalPrint;
14 | 
15 |     fn run(&self, mem: &MutatorView, _input: ()) -> Result<ReadEvalPrint, RuntimeError> {
16 |         ReadEvalPrint::alloc(mem)
17 |     }
18 | }
19 | 
20 | /// Mutator that implements the VM
21 | pub struct ReadEvalPrint {
22 |     main_thread: CellPtr<Thread>,
23 | }
24 | 
25 | impl ReadEvalPrint {
26 |     pub fn alloc(mem: &MutatorView) -> Result<ReadEvalPrint, RuntimeError> {
27 |         Ok(ReadEvalPrint {
28 |             main_thread: CellPtr::new_with(Thread::alloc(mem)?),
29 |         })
30 |     }
31 | }
32 | 
33 | impl Mutator for ReadEvalPrint {
34 |     type Input = String;
35 |     type Output = ();
36 | 
37 |     fn run(&self, mem: &MutatorView, line: String) -> Result<(), RuntimeError> {
38 |         let thread = self.main_thread.get(mem);
39 | 
40 |         // If the first 2 chars of the line are ":d", then the user has requested a debug
41 |         // representation
42 |         let (line, debug) = if line.starts_with(":d ") {
43 |             (&line[3..], true)
44 |         } else {
45 |             (line.as_str(), false)
46 |         };
47 | 
48 |         match (|mem, line| -> Result<TaggedScopedPtr, RuntimeError> {
49 |             let value = parse(mem, line)?;
50 | 
51 |             if debug {
52 |                 println!(
53 |                     "# Debug\n## Input:\n```\n{}\n```\n## Parsed:\n```\n{:?}\n```",
54 |                     line, value
55 |                 );
56 |             }
57 | 
58 |             let function = compile(mem, value)?;
59 | 
60 |             if debug {
61 |                 println!("## Compiled:\n```\n{:?}\n```", function);
62 |             }
63 | 
64 |             let value = thread.quick_vm_eval(mem, function)?;
65 | 
66 |             if debug {
67 |                 println!("## Evaluated:\n```\n{:?}\n```\n", value);
68 |             }
69 | 
70 |             Ok(value)
71 |         })(mem, &line)
72 |         {
73 |             Ok(value) => println!("{}", value),
74 | 
75 |             Err(e) => {
76 |                 match e.error_kind() {
77 |                     // non-fatal repl errors
78 |                     ErrorKind::LexerError(_) => e.print_with_source(&line),
79 |                     ErrorKind::ParseError(_) => e.print_with_source(&line),
80 |                     ErrorKind::EvalError(_) => e.print_with_source(&line),
81 |                     _ => return Err(e),
82 |                 }
83 |             }
84 |         }
85 | 
86 |         Ok(())
87 |     }
88 | }
89 | 


--------------------------------------------------------------------------------
/interpreter/src/safeptr.rs:
--------------------------------------------------------------------------------
  1 | use std::cell::Cell;
  2 | use std::fmt;
  3 | use std::ops::Deref;
  4 | 
  5 | use stickyimmix::{AllocObject, RawPtr};
  6 | 
  7 | use crate::headers::TypeList;
  8 | use crate::pointerops::ScopedRef;
  9 | use crate::printer::Print;
 10 | use crate::taggedptr::{FatPtr, TaggedPtr, Value};
 11 | 
 12 | /// Type that provides a generic anchor for mutator timeslice lifetimes
 13 | // ANCHOR: DefMutatorScope
 14 | pub trait MutatorScope {}
 15 | // ANCHOR_END: DefMutatorScope
 16 | 
 17 | // Copy On Write semantics? Maybe the below...
 18 | // TODO, add MutatorView methods that can return MutScopedPtr?
 19 | //
 20 | // pub trait CopyOnWrite {
 21 | //     fn copy_mut<'guard>(&self, _guard: &'guard MutatorView) -> MutScopedPtr<'guard, Self>;
 22 | // }
 23 | //
 24 | // pub struct MutScopedPtr<'guard, T: Sized> {
 25 | //     value: &mut 'guard T
 26 | // }
 27 | //
 28 | // impl Deref, DerefMut for MutScopedPtr
 29 | //
 30 | // impl<'guard, T: Sized> MutScopedPtr<'guard, T> {
 31 | //    pub fn into_immut(self) -> ScopedPtr<'guard, T> {}
 32 | // }
 33 | 
 34 | /// An untagged compile-time typed pointer with scope limited by `MutatorScope`
 35 | // ANCHOR: DefScopedPtr
 36 | pub struct ScopedPtr<'guard, T: Sized> {
 37 |     value: &'guard T,
 38 | }
 39 | // ANCHOR_END: DefScopedPtr
 40 | 
 41 | impl<'guard, T: Sized> ScopedPtr<'guard, T> {
 42 |     pub fn new(_guard: &'guard dyn MutatorScope, value: &'guard T) -> ScopedPtr<'guard, T> {
 43 |         ScopedPtr { value }
 44 |     }
 45 | 
 46 |     /// Convert the compile-time type pointer to a runtime type pointer
 47 |     pub fn as_tagged(&self, guard: &'guard dyn MutatorScope) -> TaggedScopedPtr<'guard>
 48 |     where
 49 |         FatPtr: From<RawPtr<T>>,
 50 |         T: AllocObject<TypeList>,
 51 |     {
 52 |         TaggedScopedPtr::new(
 53 |             guard,
 54 |             TaggedPtr::from(FatPtr::from(RawPtr::new(self.value))),
 55 |         )
 56 |     }
 57 | }
 58 | 
 59 | /// Anything that _has_ a scope lifetime can pass as a scope representation
 60 | impl<'scope, T: Sized> MutatorScope for ScopedPtr<'scope, T> {}
 61 | 
 62 | impl<'guard, T: Sized> Clone for ScopedPtr<'guard, T> {
 63 |     fn clone(&self) -> ScopedPtr<'guard, T> {
 64 |         ScopedPtr { value: self.value }
 65 |     }
 66 | }
 67 | 
 68 | impl<'guard, T: Sized> Copy for ScopedPtr<'guard, T> {}
 69 | 
 70 | impl<'guard, T: Sized> Deref for ScopedPtr<'guard, T> {
 71 |     type Target = T;
 72 | 
 73 |     fn deref(&self) -> &T {
 74 |         self.value
 75 |     }
 76 | }
 77 | 
 78 | impl<'guard, T: Sized + Print> fmt::Display for ScopedPtr<'guard, T> {
 79 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 80 |         self.value.print(self, f)
 81 |     }
 82 | }
 83 | 
 84 | impl<'guard, T: Sized + Print> fmt::Debug for ScopedPtr<'guard, T> {
 85 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 86 |         self.value.print(self, f)
 87 |     }
 88 | }
 89 | 
 90 | impl<'guard, T: Sized + PartialEq> PartialEq for ScopedPtr<'guard, T> {
 91 |     fn eq(&self, rhs: &ScopedPtr<'guard, T>) -> bool {
 92 |         self.value == rhs.value
 93 |     }
 94 | }
 95 | 
 96 | /// A wrapper around untagged raw pointers for storing compile-time typed pointers in data
 97 | /// structures with interior mutability, allowing pointers to be updated to point at different
 98 | /// target objects.
 99 | // ANCHOR: DefCellPtr
100 | #[derive(Clone)]
101 | pub struct CellPtr<T: Sized> {
102 |     inner: Cell<RawPtr<T>>,
103 | }
104 | // ANCHOR_END: DefCellPtr
105 | 
106 | impl<T: Sized> CellPtr<T> {
107 |     /// Construct a new CellPtr from a ScopedPtr
108 |     pub fn new_with(source: ScopedPtr<T>) -> CellPtr<T> {
109 |         CellPtr {
110 |             inner: Cell::new(RawPtr::new(source.value)),
111 |         }
112 |     }
113 | 
114 |     // ANCHOR: DefCellPtrGet
115 |     pub fn get<'guard>(&self, guard: &'guard dyn MutatorScope) -> ScopedPtr<'guard, T> {
116 |         ScopedPtr::new(guard, self.inner.get().scoped_ref(guard))
117 |     }
118 |     // ANCHOR_END: DefCellPtrGet
119 | 
120 |     // the explicit 'guard lifetime bound to MutatorScope is omitted here since the ScopedPtr
121 |     // carries this lifetime already so we can assume that this operation is safe
122 |     pub fn set(&self, source: ScopedPtr<T>) {
123 |         self.inner.set(RawPtr::new(source.value))
124 |     }
125 | }
126 | 
127 | impl<T: Sized> From<ScopedPtr<'_, T>> for CellPtr<T> {
128 |     fn from(ptr: ScopedPtr<T>) -> CellPtr<T> {
129 |         CellPtr::new_with(ptr)
130 |     }
131 | }
132 | 
133 | /// A _tagged_ runtime typed pointer type with scope limited by `MutatorScope` such that a `Value`
134 | /// instance can safely be derived and accessed. This type is neccessary to derive `Value`s from.
135 | // ANCHOR: DefTaggedScopedPtr
136 | #[derive(Copy, Clone)]
137 | pub struct TaggedScopedPtr<'guard> {
138 |     ptr: TaggedPtr,
139 |     value: Value<'guard>,
140 | }
141 | // ANCHOR_END: DefTaggedScopedPtr
142 | 
143 | impl<'guard> TaggedScopedPtr<'guard> {
144 |     pub fn new(guard: &'guard dyn MutatorScope, ptr: TaggedPtr) -> TaggedScopedPtr<'guard> {
145 |         TaggedScopedPtr {
146 |             ptr,
147 |             value: FatPtr::from(ptr).as_value(guard),
148 |         }
149 |     }
150 | 
151 |     pub fn value(&self) -> Value<'guard> {
152 |         self.value
153 |     }
154 | 
155 |     pub fn get_ptr(&self) -> TaggedPtr {
156 |         self.ptr
157 |     }
158 | }
159 | 
160 | /// Anything that _has_ a scope lifetime can pass as a scope representation. `Value` also implements
161 | /// `MutatorScope` so this is largely for consistency.
162 | impl<'scope> MutatorScope for TaggedScopedPtr<'scope> {}
163 | 
164 | impl<'guard> Deref for TaggedScopedPtr<'guard> {
165 |     type Target = Value<'guard>;
166 | 
167 |     fn deref(&self) -> &Value<'guard> {
168 |         &self.value
169 |     }
170 | }
171 | 
172 | impl<'guard> fmt::Display for TaggedScopedPtr<'guard> {
173 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
174 |         self.value.fmt(f)
175 |     }
176 | }
177 | 
178 | impl<'guard> fmt::Debug for TaggedScopedPtr<'guard> {
179 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
180 |         self.value.fmt(f)
181 |     }
182 | }
183 | 
184 | impl<'guard> PartialEq for TaggedScopedPtr<'guard> {
185 |     fn eq(&self, rhs: &TaggedScopedPtr<'guard>) -> bool {
186 |         self.ptr == rhs.ptr
187 |     }
188 | }
189 | 
190 | /// A wrapper around the runtime typed `TaggedPtr` for storing pointers in data structures with
191 | /// interior mutability, allowing pointers to be updated to point at different target objects.
192 | // ANCHOR: DefTaggedCellPtr
193 | #[derive(Clone)]
194 | pub struct TaggedCellPtr {
195 |     inner: Cell<TaggedPtr>,
196 | }
197 | // ANCHOR_END: DefTaggedCellPtr
198 | 
199 | impl TaggedCellPtr {
200 |     /// Construct a new Nil TaggedCellPtr instance
201 |     pub fn new_nil() -> TaggedCellPtr {
202 |         TaggedCellPtr {
203 |             inner: Cell::new(TaggedPtr::nil()),
204 |         }
205 |     }
206 | 
207 |     /// Construct a new TaggedCellPtr from a TaggedScopedPtr
208 |     pub fn new_with(source: TaggedScopedPtr) -> TaggedCellPtr {
209 |         TaggedCellPtr {
210 |             inner: Cell::new(TaggedPtr::from(source.ptr)),
211 |         }
212 |     }
213 | 
214 |     pub fn new_ptr(source: TaggedPtr) -> TaggedCellPtr {
215 |         TaggedCellPtr {
216 |             inner: Cell::new(source),
217 |         }
218 |     }
219 | 
220 |     /// Return the pointer as a `TaggedScopedPtr` type that carries a copy of the `TaggedPtr` and
221 |     /// a `Value` type for both copying and access convenience
222 |     // ANCHOR: DefTaggedCellPtrGet
223 |     pub fn get<'guard>(&self, guard: &'guard dyn MutatorScope) -> TaggedScopedPtr<'guard> {
224 |         TaggedScopedPtr::new(guard, self.inner.get())
225 |     }
226 |     // ANCHOR_END: DefTaggedCellPtrGet
227 | 
228 |     /// Set this pointer to point at the same object as a given `TaggedScopedPtr` instance
229 |     /// The explicit 'guard lifetime bound to MutatorScope is omitted here since the TaggedScopedPtr
230 |     /// carries this lifetime already so we can assume that this operation is safe
231 |     pub fn set(&self, source: TaggedScopedPtr) {
232 |         self.inner.set(TaggedPtr::from(source.ptr))
233 |     }
234 | 
235 |     /// Take the pointer of another `TaggedCellPtr` and set this instance to point at that object too
236 |     pub fn copy_from(&self, other: &TaggedCellPtr) {
237 |         self.inner.set(other.inner.get());
238 |     }
239 | 
240 |     /// Return true if the pointer is nil
241 |     pub fn is_nil(&self) -> bool {
242 |         self.inner.get().is_nil()
243 |     }
244 | 
245 |     /// Set this pointer to nil
246 |     pub fn set_to_nil(&self) {
247 |         self.inner.set(TaggedPtr::nil())
248 |     }
249 | 
250 |     /// Set this pointer to another TaggedPtr
251 |     pub fn set_to_ptr(&self, ptr: TaggedPtr) {
252 |         self.inner.set(ptr)
253 |     }
254 | 
255 |     /// Return the raw TaggedPtr from within
256 |     pub fn get_ptr(&self) -> TaggedPtr {
257 |         self.inner.get()
258 |     }
259 | }
260 | 
261 | impl From<TaggedScopedPtr<'_>> for TaggedCellPtr {
262 |     fn from(ptr: TaggedScopedPtr) -> TaggedCellPtr {
263 |         TaggedCellPtr::new_with(ptr)
264 |     }
265 | }
266 | 


--------------------------------------------------------------------------------
/interpreter/src/symbol.rs:
--------------------------------------------------------------------------------
 1 | /// A Symbol type
 2 | use std::fmt;
 3 | use std::hash::{Hash, Hasher};
 4 | use std::slice;
 5 | use std::str;
 6 | 
 7 | use crate::hashable::Hashable;
 8 | use crate::printer::Print;
 9 | use crate::safeptr::MutatorScope;
10 | 
11 | /// A Symbol is a unique object that has a unique name string. The backing storage for the
12 | /// underlying str data must have a lifetime of at least that of the Symbol instance to
13 | /// prevent use-after-free.
14 | /// See `SymbolMap`
15 | // ANCHOR: DefSymbol
16 | #[derive(Copy, Clone)]
17 | pub struct Symbol {
18 |     name_ptr: *const u8,
19 |     name_len: usize,
20 | }
21 | // ANCHOR_END: DefSymbol
22 | 
23 | impl Symbol {
24 |     /// The originating &str must be owned by a SymbolMap hash table
25 |     pub fn new(name: &str) -> Symbol {
26 |         Symbol {
27 |             name_ptr: name.as_ptr(),
28 |             name_len: name.len(),
29 |         }
30 |     }
31 | 
32 |     /// Unsafe because Symbol does not own the &str nor can it know anything about the actual lifetime
33 |     // ANCHOR: DefSymbolUnguardedAsStr
34 |     pub unsafe fn unguarded_as_str<'desired_lifetime>(&self) -> &'desired_lifetime str {
35 |         let slice = slice::from_raw_parts(self.name_ptr, self.name_len);
36 |         str::from_utf8(slice).unwrap()
37 |     }
38 |     // ANCHOR_END: DefSymbolUnguardedAsStr
39 | 
40 |     // ANCHOR: DefSymbolAsStr
41 |     pub fn as_str<'guard>(&self, _guard: &'guard dyn MutatorScope) -> &'guard str {
42 |         unsafe { self.unguarded_as_str() }
43 |     }
44 |     // ANCHOR_END: DefSymbolAsStr
45 | }
46 | 
47 | impl Print for Symbol {
48 |     /// Safe because the lifetime of `MutatorScope` defines a safe-access window
49 |     fn print<'guard>(
50 |         &self,
51 |         guard: &'guard dyn MutatorScope,
52 |         f: &mut fmt::Formatter,
53 |     ) -> fmt::Result {
54 |         write!(f, "{}", self.as_str(guard))
55 |     }
56 | }
57 | 
58 | // ANCHOR: DefImplHashableForSymbol
59 | impl Hashable for Symbol {
60 |     fn hash<'guard, H: Hasher>(&self, guard: &'guard dyn MutatorScope, h: &mut H) {
61 |         self.as_str(guard).hash(h)
62 |     }
63 | }
64 | // ANCHOR_END: DefImplHashableForSymbol
65 | 


--------------------------------------------------------------------------------
/interpreter/src/symbolmap.rs:
--------------------------------------------------------------------------------
 1 | /// Implements str interning for mapping Symbol names to unique pointers
 2 | use std::cell::RefCell;
 3 | use std::collections::HashMap;
 4 | 
 5 | use stickyimmix::{AllocRaw, RawPtr};
 6 | 
 7 | use crate::arena::Arena;
 8 | use crate::symbol::Symbol;
 9 | 
10 | /// A mapping of symbol names (Strings) to Symbol pointers. Only one copy of the symbol
11 | /// name String is kept; a Symbol resides in managed memory with a raw pointer to the
12 | /// String. Thus the lifetime of the SymbolMap must be at least the lifetime of the
13 | /// managed memory. This is arranged here by maintaining Symbol memory alongside the
14 | /// mapping HashMap.
15 | ///
16 | /// No Symbol is ever deleted. Symbol name strings must be immutable.
17 | // ANCHOR: DefSymbolMap
18 | pub struct SymbolMap {
19 |     map: RefCell<HashMap<String, RawPtr<Symbol>>>,
20 |     arena: Arena,
21 | }
22 | // ANCHOR_END: DefSymbolMap
23 | 
24 | impl SymbolMap {
25 |     pub fn new() -> SymbolMap {
26 |         SymbolMap {
27 |             map: RefCell::new(HashMap::new()),
28 |             arena: Arena::new(),
29 |         }
30 |     }
31 | 
32 |     // Can't take a map.entry(name) without providing an owned String, i.e. cloning 'name'
33 |     // Can't insert a new entry with just a reference without hashing twice, and cloning 'name'
34 |     // The common case, lookups, should be fast, inserts can be slower.
35 |     // ANCHOR: DefSymbolMapLookup
36 |     pub fn lookup(&self, name: &str) -> RawPtr<Symbol> {
37 |         {
38 |             if let Some(ptr) = self.map.borrow().get(name) {
39 |                 return *ptr;
40 |             }
41 |         }
42 | 
43 |         let name = String::from(name);
44 |         let ptr = self.arena.alloc(Symbol::new(&name)).unwrap();
45 |         self.map.borrow_mut().insert(name, ptr);
46 |         ptr
47 |     }
48 |     // ANCHOR_END: DefSymbolMapLookup
49 | }
50 | 


--------------------------------------------------------------------------------
/interpreter/src/text.rs:
--------------------------------------------------------------------------------
  1 | /// A type for representing strings. Implementation is an immutable wrapper around Array<u8>.
  2 | use std::fmt;
  3 | use std::hash::{Hash, Hasher};
  4 | use std::slice;
  5 | use std::str;
  6 | 
  7 | use crate::error::{ErrorKind, RuntimeError};
  8 | use crate::hashable::Hashable;
  9 | use crate::memory::MutatorView;
 10 | use crate::printer::Print;
 11 | use crate::rawarray::{ArraySize, RawArray};
 12 | use crate::safeptr::MutatorScope;
 13 | 
 14 | /// While Text is somewhat similar to Symbol, it is instead garbage-collected heap allocated and not interned.
 15 | #[derive(Copy, Clone)]
 16 | pub struct Text {
 17 |     content: RawArray<u8>,
 18 | }
 19 | 
 20 | impl Text {
 21 |     /// Create an empty Text string object
 22 |     pub fn new_empty() -> Text {
 23 |         Text {
 24 |             content: RawArray::new(),
 25 |         }
 26 |     }
 27 | 
 28 |     /// Initialize a Text object from a &str slice
 29 |     pub fn new_from_str<'guard>(
 30 |         mem: &'guard MutatorView,
 31 |         from_str: &str,
 32 |     ) -> Result<Text, RuntimeError> {
 33 |         let len = from_str.len();
 34 |         let from_ptr = from_str.as_ptr();
 35 | 
 36 |         if len > (ArraySize::max_value() as usize) {
 37 |             return Err(RuntimeError::new(ErrorKind::BadAllocationRequest));
 38 |         }
 39 | 
 40 |         let content = RawArray::with_capacity(mem, len as ArraySize)?;
 41 | 
 42 |         if let Some(to_ptr) = content.as_ptr() {
 43 |             unsafe { from_ptr.copy_to_nonoverlapping(to_ptr as *mut u8, len) }
 44 |             Ok(Text { content })
 45 |         } else {
 46 |             panic!("Text content array expected to have backing storage")
 47 |         }
 48 |     }
 49 | 
 50 |     unsafe fn unguarded_as_str(&self) -> &str {
 51 |         if let Some(ptr) = self.content.as_ptr() {
 52 |             let slice = slice::from_raw_parts(ptr, self.content.capacity() as usize);
 53 |             str::from_utf8(slice).unwrap()
 54 |         } else {
 55 |             &""
 56 |         }
 57 |     }
 58 | 
 59 |     /// Using scope guarded access, get the Text content as a &str slice
 60 |     pub fn as_str<'guard>(&self, _guard: &'guard dyn MutatorScope) -> &str {
 61 |         unsafe { self.unguarded_as_str() }
 62 |     }
 63 | }
 64 | 
 65 | impl Print for Text {
 66 |     fn print<'guard>(
 67 |         &self,
 68 |         guard: &'guard dyn MutatorScope,
 69 |         f: &mut fmt::Formatter,
 70 |     ) -> fmt::Result {
 71 |         // TODO this will need to be printed with certain string escape codes embedded
 72 |         write!(f, "\"{}\"", self.as_str(guard))
 73 |     }
 74 | }
 75 | 
 76 | impl Hashable for Text {
 77 |     fn hash<'guard, H: Hasher>(&self, guard: &'guard dyn MutatorScope, h: &mut H) {
 78 |         self.as_str(guard).hash(h)
 79 |     }
 80 | }
 81 | 
 82 | #[cfg(test)]
 83 | mod test {
 84 |     use super::Text;
 85 |     use crate::error::RuntimeError;
 86 |     use crate::memory::{Memory, Mutator, MutatorView};
 87 | 
 88 |     #[test]
 89 |     fn text_empty_string() {
 90 |         let mem = Memory::new();
 91 | 
 92 |         struct Test {}
 93 |         impl Mutator for Test {
 94 |             type Input = ();
 95 |             type Output = ();
 96 | 
 97 |             fn run(
 98 |                 &self,
 99 |                 view: &MutatorView,
100 |                 _input: Self::Input,
101 |             ) -> Result<Self::Output, RuntimeError> {
102 |                 let text = Text::new_empty();
103 |                 assert!(text.as_str(view) == "");
104 | 
105 |                 Ok(())
106 |             }
107 |         }
108 | 
109 |         let test = Test {};
110 |         mem.mutate(&test, ()).unwrap();
111 |     }
112 | 
113 |     #[test]
114 |     fn text_from_static_str() {
115 |         let mem = Memory::new();
116 | 
117 |         struct Test {}
118 |         impl Mutator for Test {
119 |             type Input = ();
120 |             type Output = ();
121 | 
122 |             fn run(
123 |                 &self,
124 |                 view: &MutatorView,
125 |                 _input: Self::Input,
126 |             ) -> Result<Self::Output, RuntimeError> {
127 |                 let expected = "こんにちは";
128 |                 let text = Text::new_from_str(view, expected)?;
129 |                 let got = text.as_str(view);
130 | 
131 |                 assert!(got == expected);
132 | 
133 |                 Ok(())
134 |             }
135 |         }
136 | 
137 |         let test = Test {};
138 |         mem.mutate(&test, ()).unwrap();
139 |     }
140 | 
141 |     #[test]
142 |     fn value_from_string() {
143 |         let mem = Memory::new();
144 | 
145 |         struct Test {}
146 |         impl Mutator for Test {
147 |             type Input = ();
148 |             type Output = ();
149 | 
150 |             fn run(
151 |                 &self,
152 |                 view: &MutatorView,
153 |                 _input: Self::Input,
154 |             ) -> Result<Self::Output, RuntimeError> {
155 |                 let input = String::from("こんにちは");
156 |                 // the Value representation of the object is wrapped in quotes
157 |                 let expected = format!("\"{}\"", input);
158 | 
159 |                 let text = Text::new_from_str(view, &input)?;
160 |                 let heap_text = view.alloc_tagged(text)?;
161 | 
162 |                 let got = format!("{}", heap_text.value());
163 | 
164 |                 assert!(got == expected);
165 | 
166 |                 Ok(())
167 |             }
168 |         }
169 | 
170 |         let test = Test {};
171 |         mem.mutate(&test, ()).unwrap();
172 |     }
173 | }
174 | 


--------------------------------------------------------------------------------
/stickyimmix/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 | 


--------------------------------------------------------------------------------
/stickyimmix/Cargo.lock:
--------------------------------------------------------------------------------
 1 | # This file is automatically @generated by Cargo.
 2 | # It is not intended for manual editing.
 3 | [[package]]
 4 | name = "blockalloc"
 5 | version = "0.1.0"
 6 | 
 7 | [[package]]
 8 | name = "stickyimmix"
 9 | version = "0.1.0"
10 | dependencies = [
11 |  "blockalloc 0.1.0",
12 | ]
13 | 
14 | 


--------------------------------------------------------------------------------
/stickyimmix/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "stickyimmix"
 3 | version = "0.1.0"
 4 | authors = ["Peter Liniker <peter.liniker@gmail.com>"]
 5 | edition = "2018"
 6 | license = "MIT OR Apache-2.0"
 7 | 
 8 | [dependencies]
 9 | blockalloc = { path = "../blockalloc/" }
10 | 


--------------------------------------------------------------------------------
/stickyimmix/README.md:
--------------------------------------------------------------------------------
1 | # Single threaded sticky immix
2 | 
3 | A non-evacuating single-threaded immix implementation.
4 | 


--------------------------------------------------------------------------------
/stickyimmix/src/allocator.rs:
--------------------------------------------------------------------------------
  1 | use std::mem::size_of;
  2 | use std::ptr::NonNull;
  3 | 
  4 | use crate::constants;
  5 | use crate::rawptr::RawPtr;
  6 | 
  7 | /// An allocation error type
  8 | // ANCHOR: DefAllocError
  9 | #[derive(Copy, Clone, Debug, PartialEq)]
 10 | pub enum AllocError {
 11 |     /// Some attribute of the allocation, most likely the size requested,
 12 |     /// could not be fulfilled
 13 |     BadRequest,
 14 |     /// Out of memory - allocating the space failed
 15 |     OOM,
 16 | }
 17 | // ANCHOR_END: DefAllocError
 18 | 
 19 | /// A type that describes allocation of an object into a heap space, returning
 20 | /// a bare pointer type on success
 21 | // ANCHOR: DefAllocRaw
 22 | pub trait AllocRaw {
 23 |     /// An implementation of an object header type
 24 |     type Header: AllocHeader;
 25 | 
 26 |     /// Allocate a single object of type T.
 27 |     fn alloc<T>(&self, object: T) -> Result<RawPtr<T>, AllocError>
 28 |     where
 29 |         T: AllocObject<<Self::Header as AllocHeader>::TypeId>;
 30 | 
 31 |     /// Allocating an array allows the client to put anything in the resulting data
 32 |     /// block but the type of the memory block will simply be 'Array'. No other
 33 |     /// type information will be stored in the object header.
 34 |     /// This is just a special case of alloc<T>() for T=u8 but a count > 1 of u8
 35 |     /// instances.  The caller is responsible for the content of the array.
 36 |     fn alloc_array(&self, size_bytes: ArraySize) -> Result<RawPtr<u8>, AllocError>;
 37 | 
 38 |     /// Given a bare pointer to an object, return the expected header address
 39 |     fn get_header(object: NonNull<()>) -> NonNull<Self::Header>;
 40 | 
 41 |     /// Given a bare pointer to an object's header, return the expected object address
 42 |     fn get_object(header: NonNull<Self::Header>) -> NonNull<()>;
 43 | }
 44 | // ANCHOR_END: DefAllocRaw
 45 | 
 46 | /// Object size class.
 47 | /// - Small objects fit inside a line
 48 | /// - Medium objects span more than one line
 49 | /// - Large objects span multiple blocks
 50 | #[repr(u8)]
 51 | #[derive(Copy, Clone, Debug, PartialEq)]
 52 | pub enum SizeClass {
 53 |     Small,
 54 |     Medium,
 55 |     Large,
 56 | }
 57 | 
 58 | impl SizeClass {
 59 |     pub fn get_for_size(object_size: usize) -> Result<SizeClass, AllocError> {
 60 |         match object_size {
 61 |             constants::SMALL_OBJECT_MIN..=constants::SMALL_OBJECT_MAX => Ok(SizeClass::Small),
 62 |             constants::MEDIUM_OBJECT_MIN..=constants::MEDIUM_OBJECT_MAX => Ok(SizeClass::Medium),
 63 |             constants::LARGE_OBJECT_MIN..=constants::LARGE_OBJECT_MAX => Ok(SizeClass::Large),
 64 |             _ => Err(AllocError::BadRequest),
 65 |         }
 66 |     }
 67 | }
 68 | 
 69 | /// The type that describes the bounds of array sizing
 70 | pub type ArraySize = u32;
 71 | 
 72 | /// TODO Object mark bit.
 73 | /// Every object is `Allocated` on creation.
 74 | #[repr(u8)]
 75 | #[derive(Copy, Clone, Debug, PartialEq)]
 76 | pub enum Mark {
 77 |     Allocated,
 78 |     Unmarked,
 79 |     Marked,
 80 | }
 81 | 
 82 | /// A managed-type type-identifier type should implement this!
 83 | // ANCHOR: DefAllocTypeId
 84 | pub trait AllocTypeId: Copy + Clone {}
 85 | // ANCHOR_END: DefAllocTypeId
 86 | 
 87 | /// All managed object types must implement this trait in order to be allocatable
 88 | // ANCHOR: DefAllocObject
 89 | pub trait AllocObject<T: AllocTypeId> {
 90 |     const TYPE_ID: T;
 91 | }
 92 | // ANCHOR_END: DefAllocObject
 93 | 
 94 | /// An object header struct must provide an implementation of this trait,
 95 | /// providing appropriate information to the garbage collector.
 96 | // TODO tracing information
 97 | // e.g. fn tracer(&self) -> Fn()
 98 | // ANCHOR: DefAllocHeader
 99 | pub trait AllocHeader: Sized {
100 |     /// Associated type that identifies the allocated object type
101 |     type TypeId: AllocTypeId;
102 | 
103 |     /// Create a new header for object type O
104 |     fn new<O: AllocObject<Self::TypeId>>(size: u32, size_class: SizeClass, mark: Mark) -> Self;
105 | 
106 |     /// Create a new header for an array type
107 |     fn new_array(size: ArraySize, size_class: SizeClass, mark: Mark) -> Self;
108 | 
109 |     /// Set the Mark value to "marked"
110 |     fn mark(&mut self);
111 | 
112 |     /// Get the current Mark value
113 |     fn is_marked(&self) -> bool;
114 | 
115 |     /// Get the size class of the object
116 |     fn size_class(&self) -> SizeClass;
117 | 
118 |     /// Get the size of the object in bytes
119 |     fn size(&self) -> u32;
120 | 
121 |     /// Get the type of the object
122 |     fn type_id(&self) -> Self::TypeId;
123 | }
124 | // ANCHOR_END: DefAllocHeader
125 | 
126 | /// Return the allocated size of an object as it's size_of::<T>() value rounded
127 | /// up to a double-word boundary
128 | ///
129 | /// TODO this isn't correctly implemented, as aligning the object to a double-word
130 | /// boundary while considering header size (which is not known to this libarary
131 | /// until compile time) means touching numerous bump-allocation code points with
132 | /// some math and bitwise ops I haven't worked out yet
133 | pub fn alloc_size_of(object_size: usize) -> usize {
134 |     let align = size_of::<usize>(); // * 2;
135 |     (object_size + (align - 1)) & !(align - 1)
136 | }
137 | 


--------------------------------------------------------------------------------
/stickyimmix/src/blockmeta.rs:
--------------------------------------------------------------------------------
  1 | use crate::constants;
  2 | 
  3 | /// Block marking metadata. This metadata is stored at the end of a Block.
  4 | // ANCHOR: DefBlockMeta
  5 | pub struct BlockMeta {
  6 |     lines: *mut u8,
  7 | }
  8 | // ANCHOR_END: DefBlockMeta
  9 | 
 10 | impl BlockMeta {
 11 |     /// Heap allocate a metadata instance so that it doesn't move so we can store pointers
 12 |     /// to it.
 13 |     pub fn new(block_ptr: *const u8) -> BlockMeta {
 14 |         let mut meta = BlockMeta {
 15 |             lines: unsafe { block_ptr.add(constants::LINE_MARK_START) as *mut u8 },
 16 |         };
 17 | 
 18 |         meta.reset();
 19 | 
 20 |         meta
 21 |     }
 22 | 
 23 |     unsafe fn as_block_mark(&mut self) -> &mut u8 {
 24 |         // Use the last byte of the block because no object will occupy the line
 25 |         // associated with this: it's the mark bits.
 26 |         &mut *self.lines.add(constants::LINE_COUNT - 1)
 27 |     }
 28 | 
 29 |     unsafe fn as_line_mark(&mut self, line: usize) -> &mut u8 {
 30 |         &mut *self.lines.add(line)
 31 |     }
 32 | 
 33 |     /// Mark the indexed line
 34 |     pub fn mark_line(&mut self, index: usize) {
 35 |         unsafe { *self.as_line_mark(index) = 1 };
 36 |     }
 37 | 
 38 |     /// Indicate the entire block as marked
 39 |     pub fn mark_block(&mut self) {
 40 |         unsafe { *self.as_block_mark() = 1 }
 41 |     }
 42 | 
 43 |     /// Reset all mark flags to unmarked.
 44 |     pub fn reset(&mut self) {
 45 |         unsafe {
 46 |             for i in 0..constants::LINE_COUNT {
 47 |                 *self.lines.add(i) = 0;
 48 |             }
 49 |         }
 50 |     }
 51 | 
 52 |     /// Return an iterator over all the line mark flags
 53 |     //pub fn line_iter(&self) -> impl Iterator<Item = &'_ bool> {
 54 |     //    self.line_mark.iter()
 55 |     //}
 56 | 
 57 |     // ANCHOR: DefFindNextHole
 58 |     /// When it comes to finding allocatable holes, we bump-allocate downward.
 59 |     pub fn find_next_available_hole(
 60 |         &self,
 61 |         starting_at: usize,
 62 |         alloc_size: usize,
 63 |     ) -> Option<(usize, usize)> {
 64 |         // The count of consecutive avaliable holes. Must take into account a conservatively marked
 65 |         // hole at the beginning of the sequence.
 66 |         let mut count = 0;
 67 |         let starting_line = starting_at / constants::LINE_SIZE;
 68 |         let lines_required = (alloc_size + constants::LINE_SIZE - 1) / constants::LINE_SIZE;
 69 |         // Counting down from the given search start index
 70 |         let mut end = starting_line;
 71 | 
 72 |         for index in (0..starting_line).rev() {
 73 |             let marked = unsafe { *self.lines.add(index) };
 74 | 
 75 |             if marked == 0 {
 76 |                 // count unmarked lines
 77 |                 count += 1;
 78 | 
 79 |                 if index == 0 && count >= lines_required {
 80 |                     let limit = index * constants::LINE_SIZE;
 81 |                     let cursor = end * constants::LINE_SIZE;
 82 |                     return Some((cursor, limit));
 83 |                 }
 84 |             } else {
 85 |                 // This block is marked
 86 |                 if count > lines_required {
 87 |                     // But at least 2 previous blocks were not marked. Return the hole, considering the
 88 |                     // immediately preceding block as conservatively marked
 89 |                     let limit = (index + 2) * constants::LINE_SIZE;
 90 |                     let cursor = end * constants::LINE_SIZE;
 91 |                     return Some((cursor, limit));
 92 |                 }
 93 | 
 94 |                 // If this line is marked and we didn't return a new cursor/limit pair by now,
 95 |                 // reset the hole search state
 96 |                 count = 0;
 97 |                 end = index;
 98 |             }
 99 |         }
100 | 
101 |         None
102 |     }
103 |     // ANCHOR_END: DefFindNextHole
104 | }
105 | 
106 | #[cfg(test)]
107 | mod tests {
108 | 
109 |     use super::*;
110 |     use crate::blockalloc::Block;
111 | 
112 |     #[test]
113 |     fn test_find_next_hole() {
114 |         // A set of marked lines with a couple holes.
115 |         // The first hole should be seen as conservatively marked.
116 |         // The second hole should be the one selected.
117 |         let block = Block::new(constants::BLOCK_SIZE).unwrap();
118 |         let mut meta = BlockMeta::new(block.as_ptr());
119 | 
120 |         meta.mark_line(0);
121 |         meta.mark_line(1);
122 |         meta.mark_line(2);
123 |         meta.mark_line(4);
124 |         meta.mark_line(10);
125 | 
126 |         // line 5 should be conservatively marked
127 |         let expect = Some((10 * constants::LINE_SIZE, 6 * constants::LINE_SIZE));
128 | 
129 |         let got = meta.find_next_available_hole(10 * constants::LINE_SIZE, constants::LINE_SIZE);
130 | 
131 |         println!("test_find_next_hole got {:?} expected {:?}", got, expect);
132 | 
133 |         assert!(got == expect);
134 |     }
135 | 
136 |     #[test]
137 |     fn test_find_next_hole_at_line_zero() {
138 |         // Should find the hole starting at the beginning of the block
139 |         let block = Block::new(constants::BLOCK_SIZE).unwrap();
140 |         let mut meta = BlockMeta::new(block.as_ptr());
141 | 
142 |         meta.mark_line(3);
143 |         meta.mark_line(4);
144 |         meta.mark_line(5);
145 | 
146 |         let expect = Some((3 * constants::LINE_SIZE, 0));
147 | 
148 |         let got = meta.find_next_available_hole(3 * constants::LINE_SIZE, constants::LINE_SIZE);
149 | 
150 |         println!(
151 |             "test_find_next_hole_at_line_zero got {:?} expected {:?}",
152 |             got, expect
153 |         );
154 | 
155 |         assert!(got == expect);
156 |     }
157 | 
158 |     #[test]
159 |     fn test_find_next_hole_at_block_end() {
160 |         // The first half of the block is marked.
161 |         // The second half of the block should be identified as a hole.
162 |         let block = Block::new(constants::BLOCK_SIZE).unwrap();
163 |         let mut meta = BlockMeta::new(block.as_ptr());
164 | 
165 |         let halfway = constants::LINE_COUNT / 2;
166 | 
167 |         for i in halfway..constants::LINE_COUNT {
168 |             meta.mark_line(i);
169 |         }
170 | 
171 |         // because halfway line should be conservatively marked
172 |         let expect = Some((halfway * constants::LINE_SIZE, 0));
173 | 
174 |         let got = meta.find_next_available_hole(constants::BLOCK_CAPACITY, constants::LINE_SIZE);
175 | 
176 |         println!(
177 |             "test_find_next_hole_at_block_end got {:?} expected {:?}",
178 |             got, expect
179 |         );
180 | 
181 |         assert!(got == expect);
182 |     }
183 | 
184 |     #[test]
185 |     fn test_find_hole_all_conservatively_marked() {
186 |         // Every other line is marked.
187 |         // No hole should be found.
188 |         let block = Block::new(constants::BLOCK_SIZE).unwrap();
189 |         let mut meta = BlockMeta::new(block.as_ptr());
190 | 
191 |         for i in 0..constants::LINE_COUNT {
192 |             if i % 2 == 0 {
193 |                 // there is no stable step function for range
194 |                 meta.mark_line(i);
195 |             }
196 |         }
197 | 
198 |         let got = meta.find_next_available_hole(constants::BLOCK_CAPACITY, constants::LINE_SIZE);
199 | 
200 |         println!(
201 |             "test_find_hole_all_conservatively_marked got {:?} expected None",
202 |             got
203 |         );
204 | 
205 |         assert!(got == None);
206 |     }
207 | 
208 |     #[test]
209 |     fn test_find_entire_block() {
210 |         // No marked lines. Entire block is available.
211 |         let block = Block::new(constants::BLOCK_SIZE).unwrap();
212 |         let meta = BlockMeta::new(block.as_ptr());
213 | 
214 |         let expect = Some((constants::BLOCK_CAPACITY, 0));
215 |         let got = meta.find_next_available_hole(constants::BLOCK_CAPACITY, constants::LINE_SIZE);
216 | 
217 |         println!("test_find_entire_block got {:?} expected {:?}", got, expect);
218 | 
219 |         assert!(got == expect);
220 |     }
221 | }
222 | 


--------------------------------------------------------------------------------
/stickyimmix/src/bumpblock.rs:
--------------------------------------------------------------------------------
  1 | use std::ptr::write;
  2 | 
  3 | use blockalloc::{Block, BlockError};
  4 | 
  5 | use crate::allocator::AllocError;
  6 | use crate::blockmeta::BlockMeta;
  7 | use crate::constants;
  8 | 
  9 | impl From<BlockError> for AllocError {
 10 |     fn from(error: BlockError) -> AllocError {
 11 |         match error {
 12 |             BlockError::BadRequest => AllocError::BadRequest,
 13 |             BlockError::OOM => AllocError::OOM,
 14 |         }
 15 |     }
 16 | }
 17 | 
 18 | /// A block of heap. This maintains the bump cursor and limit per block
 19 | /// and the mark flags in a separate `meta` struct.  A pointer to the
 20 | /// `meta` struct is placed in the very first word of the block memory
 21 | /// to provide fast access when in the object marking phase.
 22 | /// Thus allocation in the first line of the block doesn't begin at
 23 | /// offset 0 but after this `meta` pointer.
 24 | // ANCHOR: DefBumpBlock
 25 | pub struct BumpBlock {
 26 |     cursor: *const u8,
 27 |     limit: *const u8,
 28 |     block: Block,
 29 |     meta: BlockMeta,
 30 | }
 31 | // ANCHOR_END: DefBumpBlock
 32 | 
 33 | impl BumpBlock {
 34 |     /// Create a new block of heap space and it's metadata, placing a
 35 |     /// pointer to the metadata in the first word of the block.
 36 |     pub fn new() -> Result<BumpBlock, AllocError> {
 37 |         let inner_block = Block::new(constants::BLOCK_SIZE)?;
 38 |         let block_ptr = inner_block.as_ptr();
 39 | 
 40 |         let block = BumpBlock {
 41 |             cursor: unsafe { block_ptr.add(constants::BLOCK_CAPACITY) },
 42 |             limit: block_ptr,
 43 |             block: inner_block,
 44 |             meta: BlockMeta::new(block_ptr),
 45 |         };
 46 | 
 47 |         Ok(block)
 48 |     }
 49 | 
 50 |     /// Write an object into the block at the given offset. The offset is not
 51 |     /// checked for overflow, hence this function is unsafe.
 52 |     unsafe fn write<T>(&mut self, object: T, offset: usize) -> *const T {
 53 |         let p = self.block.as_ptr().add(offset) as *mut T;
 54 |         write(p, object);
 55 |         p
 56 |     }
 57 | 
 58 |     /// Find a hole of at least the requested size and return Some(pointer) to it, or
 59 |     /// None if this block doesn't have a big enough hole.
 60 |     // ANCHOR: DefBumpBlockAlloc
 61 |     pub fn inner_alloc(&mut self, alloc_size: usize) -> Option<*const u8> {
 62 |         let ptr = self.cursor as usize;
 63 |         let limit = self.limit as usize;
 64 | 
 65 |         let next_ptr = ptr.checked_sub(alloc_size)? & constants::ALLOC_ALIGN_MASK;
 66 | 
 67 |         if next_ptr < limit {
 68 |             let block_relative_limit =
 69 |                 unsafe { self.limit.sub(self.block.as_ptr() as usize) } as usize;
 70 | 
 71 |             if block_relative_limit > 0 {
 72 |                 if let Some((cursor, limit)) = self
 73 |                     .meta
 74 |                     .find_next_available_hole(block_relative_limit, alloc_size)
 75 |                 {
 76 |                     self.cursor = unsafe { self.block.as_ptr().add(cursor) };
 77 |                     self.limit = unsafe { self.block.as_ptr().add(limit) };
 78 |                     return self.inner_alloc(alloc_size);
 79 |                 }
 80 |             }
 81 | 
 82 |             None
 83 |         } else {
 84 |             self.cursor = next_ptr as *const u8;
 85 |             Some(self.cursor)
 86 |         }
 87 |     }
 88 |     // ANCHOR_END: DefBumpBlockAlloc
 89 | 
 90 |     /// Return the size of the hole we're positioned at
 91 |     pub fn current_hole_size(&self) -> usize {
 92 |         self.cursor as usize - self.limit as usize
 93 |     }
 94 | }
 95 | 
 96 | #[cfg(test)]
 97 | mod tests {
 98 | 
 99 |     use super::*;
100 | 
101 |     const TEST_UNIT_SIZE: usize = constants::ALLOC_ALIGN_BYTES;
102 | 
103 |     // Helper function: given the Block, fill all holes with u32 values
104 |     // and return the number of values allocated.
105 |     // Also assert that all allocated values are unchanged as allocation
106 |     // proceeds.
107 |     fn loop_check_allocate(b: &mut BumpBlock) -> usize {
108 |         let mut v = Vec::new();
109 |         let mut index = 0;
110 | 
111 |         loop {
112 |             //println!("cursor={}, limit={}", b.cursor, b.limit);
113 |             if let Some(ptr) = b.inner_alloc(TEST_UNIT_SIZE) {
114 |                 let u32ptr = ptr as *mut u32;
115 | 
116 |                 assert!(!v.contains(&u32ptr));
117 | 
118 |                 v.push(u32ptr);
119 |                 unsafe { *u32ptr = index }
120 | 
121 |                 index += 1;
122 |             } else {
123 |                 break;
124 |             }
125 |         }
126 | 
127 |         for (index, u32ptr) in v.iter().enumerate() {
128 |             unsafe {
129 |                 assert!(**u32ptr == index as u32);
130 |             }
131 |         }
132 | 
133 |         index as usize
134 |     }
135 | 
136 |     #[test]
137 |     fn test_empty_block() {
138 |         let mut b = BumpBlock::new().unwrap();
139 | 
140 |         let count = loop_check_allocate(&mut b);
141 |         let expect = constants::BLOCK_CAPACITY / TEST_UNIT_SIZE;
142 | 
143 |         println!("expect={}, count={}", expect, count);
144 |         assert!(count == expect);
145 |     }
146 | 
147 |     #[test]
148 |     fn test_half_block() {
149 |         // This block has an available hole as the second half of the block
150 |         let mut b = BumpBlock::new().unwrap();
151 | 
152 |         for i in 0..(constants::LINE_COUNT / 2) {
153 |             b.meta.mark_line(i);
154 |         }
155 |         let occupied_bytes = (constants::LINE_COUNT / 2) * constants::LINE_SIZE;
156 | 
157 |         b.limit = b.cursor; // block is recycled
158 | 
159 |         let count = loop_check_allocate(&mut b);
160 |         let expect =
161 |             (constants::BLOCK_CAPACITY - constants::LINE_SIZE - occupied_bytes) / TEST_UNIT_SIZE;
162 | 
163 |         println!("expect={}, count={}", expect, count);
164 |         assert!(count == expect);
165 |     }
166 | 
167 |     #[test]
168 |     fn test_conservatively_marked_block() {
169 |         // This block has every other line marked, so the alternate lines are conservatively
170 |         // marked. Nothing should be allocated in this block.
171 | 
172 |         let mut b = BumpBlock::new().unwrap();
173 | 
174 |         for i in 0..constants::LINE_COUNT {
175 |             if i % 2 == 0 {
176 |                 b.meta.mark_line(i);
177 |             }
178 |         }
179 | 
180 |         b.limit = b.cursor; // block is recycled
181 | 
182 |         let count = loop_check_allocate(&mut b);
183 | 
184 |         println!("count={}", count);
185 |         assert!(count == 0);
186 |     }
187 | }
188 | 


--------------------------------------------------------------------------------
/stickyimmix/src/constants.rs:
--------------------------------------------------------------------------------
 1 | // ANCHOR: ConstBlockSize
 2 | pub const BLOCK_SIZE_BITS: usize = 15;
 3 | pub const BLOCK_SIZE: usize = 1 << BLOCK_SIZE_BITS;
 4 | // ANCHOR_END: ConstBlockSize
 5 | pub const BLOCK_PTR_MASK: usize = !(BLOCK_SIZE - 1);
 6 | 
 7 | // ANCHOR: ConstLineSize
 8 | pub const LINE_SIZE_BITS: usize = 7;
 9 | pub const LINE_SIZE: usize = 1 << LINE_SIZE_BITS;
10 | 
11 | // How many total lines are in a block
12 | pub const LINE_COUNT: usize = BLOCK_SIZE / LINE_SIZE;
13 | 
14 | // We need LINE_COUNT number of bytes for marking lines, so the capacity of a block
15 | // is reduced by that number of bytes.
16 | pub const BLOCK_CAPACITY: usize = BLOCK_SIZE - LINE_COUNT;
17 | // ANCHOR_END: ConstLineSize
18 | 
19 | // The first line-mark offset into the block is here.
20 | pub const LINE_MARK_START: usize = BLOCK_CAPACITY;
21 | 
22 | // Allocation alignment
23 | pub const ALLOC_ALIGN_BYTES: usize = 16;
24 | pub const ALLOC_ALIGN_MASK: usize = !(ALLOC_ALIGN_BYTES - 1);
25 | 
26 | // Object size ranges
27 | pub const MAX_ALLOC_SIZE: usize = std::u32::MAX as usize;
28 | pub const SMALL_OBJECT_MIN: usize = 1;
29 | pub const SMALL_OBJECT_MAX: usize = LINE_SIZE;
30 | pub const MEDIUM_OBJECT_MIN: usize = SMALL_OBJECT_MAX + 1;
31 | pub const MEDIUM_OBJECT_MAX: usize = BLOCK_CAPACITY;
32 | pub const LARGE_OBJECT_MIN: usize = MEDIUM_OBJECT_MAX + 1;
33 | pub const LARGE_OBJECT_MAX: usize = MAX_ALLOC_SIZE;
34 | 


--------------------------------------------------------------------------------
/stickyimmix/src/lib.rs:
--------------------------------------------------------------------------------
 1 | extern crate blockalloc;
 2 | 
 3 | mod allocator;
 4 | mod blockmeta;
 5 | mod bumpblock;
 6 | mod constants;
 7 | mod heap;
 8 | mod rawptr;
 9 | 
10 | pub use crate::allocator::{
11 |     AllocError, AllocHeader, AllocObject, AllocRaw, AllocTypeId, ArraySize, Mark, SizeClass,
12 | };
13 | 
14 | pub use crate::heap::StickyImmixHeap;
15 | 
16 | pub use crate::rawptr::RawPtr;
17 | 


--------------------------------------------------------------------------------
/stickyimmix/src/rawptr.rs:
--------------------------------------------------------------------------------
 1 | use std::ptr::NonNull;
 2 | 
 3 | /// A container for a bare pointer to an object of type `T`.
 4 | /// At this level, compile-time type information is still
 5 | /// part of the type.
 6 | // ANCHOR: DefRawPtr
 7 | pub struct RawPtr<T: Sized> {
 8 |     ptr: NonNull<T>,
 9 | }
10 | // ANCHOR_END: DefRawPtr
11 | 
12 | impl<T: Sized> RawPtr<T> {
13 |     /// Create a new RawPtr from a bare pointer
14 |     pub fn new(ptr: *const T) -> RawPtr<T> {
15 |         RawPtr {
16 |             ptr: unsafe { NonNull::new_unchecked(ptr as *mut T) },
17 |         }
18 |     }
19 | 
20 |     /// Get the raw `*const` pointer to the object.
21 |     pub fn as_ptr(self) -> *const T {
22 |         self.ptr.as_ptr()
23 |     }
24 |     /// Get the pointer value as a word-sized integer
25 |     pub fn as_word(self) -> usize {
26 |         self.ptr.as_ptr() as usize
27 |     }
28 | 
29 |     pub fn as_untyped(self) -> NonNull<()> {
30 |         self.ptr.cast()
31 |     }
32 | 
33 |     /// Get a `&` reference to the object. Unsafe because there are no guarantees at this level
34 |     /// about the internal pointer's validity.
35 |     pub unsafe fn as_ref(&self) -> &T {
36 |         self.ptr.as_ref()
37 |     }
38 | 
39 |     /// Get a `&mut` reference to the object. Unsafe because there are no guarantees at this level
40 |     /// about the internal pointer's validity.
41 |     /// In addition, there can be no compile-time guarantees of mutable aliasing prevention.
42 |     /// Use with caution!
43 |     pub unsafe fn as_mut_ref(&mut self) -> &mut T {
44 |         self.ptr.as_mut()
45 |     }
46 | }
47 | 
48 | impl<T: Sized> Clone for RawPtr<T> {
49 |     fn clone(&self) -> RawPtr<T> {
50 |         RawPtr { ptr: self.ptr }
51 |     }
52 | }
53 | 
54 | impl<T: Sized> Copy for RawPtr<T> {}
55 | 
56 | impl<T: Sized> PartialEq for RawPtr<T> {
57 |     fn eq(&self, other: &RawPtr<T>) -> bool {
58 |         self.ptr == other.ptr
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------