├── .gitignore ├── .travis.yml ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── TODO.md ├── doc ├── Implementation-Notes.md └── Project-RFC.md ├── examples ├── balloon_animals.rs ├── correctnesstest.rs ├── low_allocation_rate.rs └── small_objects_stress.rs └── src ├── appthread.rs ├── constants.rs ├── gcthread.rs ├── heap.rs ├── journal.rs ├── lib.rs ├── parheap.rs ├── statistics.rs ├── trace.rs └── youngheap.rs /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | 3 | language: rust 4 | rust: nightly 5 | 6 | after_success: | 7 | [ $TRAVIS_BRANCH = master ] && 8 | [ $TRAVIS_PULL_REQUEST = false ] && 9 | cargo doc && 10 | echo '' > target/doc/index.html && 11 | pip install --user ghp-import && 12 | ghp-import -n target/doc && 13 | git push -qf https://${TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages 14 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "mo-gc" 3 | description = "MO, a pauseless, concurrent, generational, parallel mark-and-sweep garbage collector" 4 | keywords = ["mo", "gc", "garbage", "collector"] 5 | homepage = "https://github.com/pliniker/mo-gc" 6 | repository = "https://github.com/pliniker/mo-gc" 7 | documentation = "https://crates.fyi/crates/mo-gc" 8 | version = "0.1.0" 9 | license = "MIT/Apache-2.0" 10 | authors = ["Peter Liniker "] 11 | 12 | [dependencies] 13 | bitmaptrie = { git = "https://github.com/pliniker/bitmaptrie-rs" } 14 | scoped-pool = "0.1" 15 | num_cpus = "0.2" 16 | time = "0.1" 17 | 18 | [dev-dependencies] 19 | stopwatch = "0.0.6" 20 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 Peter Liniker 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## An experimental garbage collector in Rust 2 | 3 | This is a very experimental garbage collector primarily built to research the viability of a 4 | write barrier mechanism that does not depend on compiler GC support. 5 | 6 | 7 | * [![Build Status](https://travis-ci.org/pliniker/mo-gc.svg?branch=master)](https://travis-ci.org/pliniker/mo-gc) 8 | 9 | ### Further information 10 | 11 | Please read the [Introduction to mo-gc](http://pliniker.github.io/mo-gc-intro.html) first. 12 | 13 | * [Ideas](http://pliniker.github.io/mo-gc-ideas.html) expands on the further direction in the introduction. 14 | * [API Documentation](https://pliniker.github.io/mo-gc/), but also see the examples. 15 | * [Implementation Notes](https://github.com/pliniker/mo-gc/blob/master/doc/Implementation-Notes.md) 16 | * [Original draft design outline](https://github.com/pliniker/mo-gc/blob/master/doc/Project-RFC.md) 17 | * [Original discussion issue](https://github.com/pliniker/mo-gc/issues/1) on the original design. 18 | 19 | ### See also 20 | 21 | * [rust-gc](https://github.com/manishearth/rust-gc) 22 | * [crossbeam](https://github.com/aturon/crossbeam/) 23 | * [bacon-rajan-cc](https://github.com/fitzgen/bacon-rajan-cc) 24 | 25 | ### About this Project 26 | 27 | * Copyright © 2015 Peter Liniker 28 | * Licensed under dual MIT/Apache-2.0 29 | * Named after [M-O](http://pixar.wikia.com/wiki/M-O). 30 | 31 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | # Testing 2 | 3 | * integration tests 4 | * benchmarks 5 | 6 | # Examples 7 | 8 | * build some data structures, esp concurrent data structures 9 | * see crossbeam for treiber stack example 10 | 11 | # Issues 12 | 13 | ## Race condition 14 | 15 | There is currently a race condition where a pointer is read from the heap, rooted and then that 16 | pointer value on the heap is overwritten during the mark/sweep phase of collection. The 17 | rooting should ensure that the referenced object is marked, but the journal is not being 18 | read at this point and the reference count increment is too late to stop the object from being 19 | swept. 20 | 21 | This race condition means that the mutator threads cannot currently use this GC as fully general 22 | purpose, or rather that data structures must be persistent. 23 | 24 | The sequence of events causing the race condition is: 25 | 26 | * GC stops reading journal, enters mark phase 27 | * mutator reads pointer to object A from heap, roots A, writing to journal 28 | * mutator overwrites pointer on heap with new object B reference 29 | * GC traces heap, marking new object B but not previously referenced object A 30 | * GC sweeps, dropping A even though A was rooted 31 | 32 | The benefit of fixing this issue is that this GC design becomes general purpose. 33 | 34 | ### Additional write barrier 35 | 36 | This race condition might be avoided by an additional synchronous write barrier: if a pointer A 37 | on the heap is going to be replaced by pointer B, the object A might be marked as "pinned" 38 | to prevent the sweep phase from dropping it. The sweep phase would unpin the object, after 39 | which if it has been rooted, the reference count increment will be picked up from the journal 40 | before the next mark phase. 41 | 42 | This solution has the downside of adding a word to the size of every object, 43 | the cost of an atomic store on the app-thread side and the cost of an atomic load and store 44 | on the sweep phase. It would also make programs that use this GC less fork-friendly, as 45 | pinning objects would incur copy-on-write costs for memory pages that might otherwise remain 46 | read-only. 47 | 48 | Question: just how atomic would the pinning operation need to be? It only needs to take effect 49 | during the mark phase but the pin flag would need to be readable by the sweep phase. 50 | 51 | Experimentation will determine if this mechanism is worth the cost. There may be alternative 52 | implementation options that are more efficient: perhaps using a shared data structure to 53 | write pinned object pointers to that is consumed by a phase between mark and sweep that 54 | sets the marked flag on those objects? 55 | 56 | ### Use the journal 57 | 58 | The journal contains the rooting information needed to avoid this problem. Another possible 59 | solution may be to read the journal in the mark phase, _after_ marking any new roots, before 60 | moving on to the sweep phase. 61 | 62 | This needs further thought. 63 | 64 | ## Performance Bottlenecks 65 | 66 | ### Journal processing 67 | 68 | `Trie::set()` is the bottleneck in `YoungHeap::read_journals()`. This is a single-threaded 69 | function and consumes most of the GC linear time. It is the single greatest throughput limiter. 70 | If insertion into `bitmaptrie::Trie` could be parallelized, throughput would improve. 71 | 72 | One option is to process each mutator journal on a separate thread but defer new-object 73 | insertion to a single thread. This way some parallelism is gained for processing reference 74 | count increments. This is still not optimal though. 75 | 76 | ### The Allocator 77 | 78 | Building on the generic allocator: jemalloc maintains a radix trie for allocation so there 79 | are two tries, increasing CPU and memory requirements. A custom allocator would 80 | solve this problem, but would introduce the problem of writing a scalable, fragmentation- 81 | minimizing allocator. 82 | 83 | ## Collection Scheduling 84 | 85 | This is currently very simple and has not been tuned at all. 86 | See `gcthread::gc_thread()` and `constants::*` for tuning. 87 | -------------------------------------------------------------------------------- /doc/Implementation-Notes.md: -------------------------------------------------------------------------------- 1 | 2 | * Date: 2016-03-13 3 | 4 | # Implementation Notes 5 | 6 | The current implementation has been tested on x86 and x86_64. It has not bee tested on 7 | ARM, though the ARM weaker memory model may highlight some flaws. 8 | 9 | ## The journal 10 | 11 | The journal is designed to never block the mutator. Each mutator thread allocates a buffer to 12 | write reference count adjustments to. When the buffer is full, a new buffer is allocated. 13 | The GC thread consumes the buffers. Thus the journal behaves like an infinitely sized 14 | SPSC queue. Each mutator gets its own journal. 15 | 16 | The values written by the mutator to a buffer are essentially `TraitObject`s that describe 17 | a pointer to an object and the `Trace` trait virtual table. The virtual table pointer is 18 | required to provide the `drop()` and `Trace::trace()` methods, as the GC thread does not 19 | know concrete types at runtime. 20 | 21 | Because heap allocations are word aligned, a pointer's two least significant bits can be used 22 | as bit flags. 23 | 24 | The object address has four possible values in it's LSBs: 25 | 26 | * 0: reference count decrement 27 | * 1: reference count increment 28 | * 2: new object allocated, no reference count adjustment 29 | * 3: new object allocated, reference count increment 30 | 31 | The object vtable has one flag value that can be set: 32 | 33 | * 2: the object is a container of other GC-managed objects and must be traced. This flag saves 34 | the mark phase from making virtual function calls for scalar objects. 35 | 36 | ### Advantages 37 | 38 | The mutator thread will never be blocked on writing to the journal unless the application hits 39 | out-of-memory, thus providing a basic pauselessness guarantee. 40 | 41 | The journal is very fast, not requiring atomics on the x86/64 TSO-memory-model architecture. 42 | 43 | ### Disadvantages 44 | 45 | If the GC thread cannot keep up with the mutator(s), the journal will continue to allocate 46 | new buffers faster than the GC thread can consume them, contributing to the OOM death march. 47 | 48 | ## Young generation heap and root reference counts 49 | 50 | A young-generation heap map is implemented using a bitmapped vector trie, whose indeces are 51 | word-sized: keys are object addresses, values are a composition of root reference count, the object 52 | vtable and a word for flags for marking and sweeping. 53 | 54 | The addresses used as keys are right-shifted to eliminate the least significant bits that are 55 | always zero because heap allocations are word aligned. 56 | 57 | The flags set on the object address have been processed at this point and the heap updated 58 | accordingly. Reference count decrements are written to a deferred buffer for processing later. 59 | 60 | For new objects, the heap map flags for the object are marked as `NEW`. These are the young 61 | generation objects. Other entries in the map not marked as `NEW` are stack roots only. 62 | 63 | Thus the young generation heap map combines pure stack-root references and new object references. 64 | 65 | A typical generational GC keeps a data structure such a as a card table to discover pointers from 66 | the mature object heap into the young generation heap. Write barriers are required to update the 67 | card table when mature objects are written to. In our case, the non-`NEW` stack-root 68 | references act as the set of mature objects that may have references to young generation objects. 69 | Essentially, the journal is a type of write barrier. 70 | 71 | When the young generation heap enters a mark phase, all objects that have a non-zero reference 72 | count are considered potential roots. Only `NEW` objects are considered during sweeping. 73 | 74 | Both marking and sweeping are done in parallel: during the mark phase, the heap map is sharded across 75 | multiple threads for scanning for roots while each thread can look up entries in the whole map for 76 | marking; during the sweep phase, the heap map is sharded across multiple threads for sweeping. 77 | 78 | ### Advantages 79 | 80 | This combined roots and new-objects map makes for a straightforwardly parallelizable mark and 81 | sweep implementation. The trie can be sharded into sub-tries and each sub-trie can be processed 82 | independently and mutated independently of the others while remaining thread safe without 83 | requiring locks or atomic access. 84 | 85 | ### Disadvantages 86 | 87 | Inserting into the trie is currently not parallelizable, making reading the journal into the trie 88 | a single-threaded affair, impacting GC throughput. 89 | 90 | On high rates of new object allocation, the GC thread currently cannot keep up with the 91 | mutators rate of writing to the journal. The cause of this is not the journal itself: reading 92 | and writing the journal can be done very fast. However, inserting and updating the heap map 93 | causes the GC thread to process the journal at half the rate at which a single mutator thread 94 | can allocate new objects. 95 | 96 | If journal processing (trie insertion) can be parallelized, the GC throughput will hugely improve. 97 | 98 | One part-way step may be to parallelize reference count updates while still processing new 99 | objects in sequence. 100 | 101 | ## The mature object heap 102 | 103 | This heap map is similar to the young generation heap but does not consider reference counts 104 | or new objects. Marking and sweeping is parallelized similarly. 105 | 106 | A mature heap collection is triggered when the young generation heap reaches a threshold count of 107 | `NEW` objects that it is managing. `NEW` object data is copied to the mature heap trie and 108 | the original entries in the young generation are unmarked as `NEW`. They become plain stack 109 | root entries. 110 | -------------------------------------------------------------------------------- /doc/Project-RFC.md: -------------------------------------------------------------------------------- 1 | 2 | * Date: 2015-08-24 3 | * Discussion issue: [pliniker/mo-gc#1](https://github.com/pliniker/mo-gc/issues/1) 4 | 5 | # Summary 6 | 7 | Mutator threads maintain precise-rooted GC-managed objects through smart 8 | pointers on the stack that write reference-count increments and decrements to a 9 | journal. 10 | 11 | The reference-count journal is read by a GC thread that 12 | maintains the actual reference count numbers in a cache of roots. When a 13 | reference count reaches zero, the GC thread moves the pointer to a heap cache 14 | data structure that is used by a tracing collector. 15 | 16 | Because the GC thread runs concurrently with the mutator threads without 17 | stopping them to scan stacks or trace, all GC-managed data structures that refer to 18 | other GC-managed objects must provide a safe concurrent trace function. 19 | 20 | Data structures' trace functions can implement any transactional 21 | mechanism that provides the GC a snapshot of the data structure's 22 | nested pointers for the duration of the trace function call. 23 | 24 | # Why 25 | 26 | Many languages and runtimes are hosted in the inherently unsafe languages 27 | C and/or C++, from Python to GHC. 28 | 29 | My interest in this project is in building a foundation, written in Rust, for 30 | language runtimes on top of Rust. Since Rust is a modern 31 | language for expressing low-level interactions with hardware, it is an 32 | ideal alternative to C/C++ while providing the opportunity to avoid classes 33 | of bugs common to C/C++ by default. 34 | 35 | With the brilliant, notable exception of Rust, a garbage collector is an 36 | essential luxury for most styles of programming. But how memory is managed in 37 | a language can be an asset or a liability that becomes so intertwined with 38 | the language semantics itself that it can become a huge undertaking to 39 | modernize years later. 40 | 41 | With that in mind, this GC is designed from the ground up to be concurrent 42 | and never stop the world. The caveat is that data structures 43 | need to be designed for concurrent reads and writes. In this world, 44 | the GC is just another thread, reading data structures and freeing any that 45 | are no longer live. 46 | 47 | That seems a reasonable tradeoff in a time when scaling out by adding 48 | processors rather than up through increased clock speed is now the status quo. 49 | 50 | # What this is not 51 | 52 | This is not particularly intended to be a general purpose GC, providing 53 | a near drop-in replacement for `Rc`, though it may be possible. 54 | For that, I recommend looking at 55 | [rust-gc](https://github.com/manishearth/rust-gc) or 56 | [bacon-rajan-cc](https://github.com/fitzgen/bacon-rajan-cc). 57 | 58 | This is also not primarily intended to be an ergonomic, native GC for all 59 | concurrent data structures in Rust. For that, I recommend a first look at 60 | [crossbeam](https://github.com/aturon/crossbeam/). 61 | 62 | # Assumptions 63 | 64 | This RFC assumes the use of the default Rust allocator, jemalloc, throughout 65 | the GC. No custom allocator is described here at this time. Correspondingly, 66 | the performance characteristics of jemalloc should be assumed. 67 | 68 | # Journal Implementation 69 | 70 | ## Mutator Threads 71 | 72 | The purpose of using a journal is to minimize the burden on the mutator 73 | threads as much as possible, pushing as much workload as possible over to the 74 | GC thread, while avoiding pauses if that is possible. 75 | 76 | In the most straightforward implementation, the journal can simply be a 77 | MPSC channel shared between mutator threads and sending 78 | reference count adjustments to the GC thread, that is, +1 and -1 for pointer 79 | clone and drop respectively. 80 | 81 | Performance for multiple mutator threads writing to an MPSC, with each 82 | write causing an allocation, can be improved on based on the 83 | [single writer principle][9] by 1) giving each mutator thread its own 84 | channel and 2) buffering journal entries and passing a reference to the buffer 85 | through the channel. 86 | 87 | Buffering journal entries should reduce the number of extra allocations per 88 | object created compared with a non-blocking MPSC channel. 89 | 90 | A typical problem of reference counted objects is locality: every reference 91 | count update requires a write to the object itself, making very inefficient 92 | spatial memory access. The journal, being a series of buffers, each 93 | of which is a contiguous block of memory, should give an efficiency gain 94 | for the mutator threads. 95 | 96 | It should be noted that the root smart-pointers shouldn't necessarily 97 | be churning out reference count adjustments. This is Rust: prefer to borrow 98 | a root smart-pointer before cloning it. This is one of the main features that 99 | makes implementing this in Rust so attractive. 100 | 101 | ### Implementation Notes 102 | 103 | When newly rooting a pointer to the stack, the current buffer must be accessed. 104 | One solution is to use Thread Local Storage so that each thread will be able 105 | to access its own buffer at any time. The overhead of looking up the TLS 106 | pointer is a couple of extra instructions in a release build to check that 107 | the buffer data has been initialized 108 | 109 | A journal buffer maintains a count at offset 0 to indicate how many words of 110 | adjustment data have been written. This count might be written to using 111 | [release](https://doc.rust-lang.org/std/sync/atomic/enum.Ordering.html) ordering 112 | while the GC thread might read the count using acquire ordering. 113 | 114 | ## Garbage Collection Thread 115 | 116 | In the basic MPSC use case, the GC thread reads reference count adjustments 117 | from the channel. For each inc/dec adjustment, it must look up the 118 | associated pointer in the cache of root pointers and update the total reference 119 | count for that pointer. 120 | 121 | In the case of multiple channels, each sending a buffer of adjustments at a 122 | time, there will naturally be an ordering problem: 123 | 124 | Thread A may, for a pointer, write the following to its journal: 125 | 126 | |Action|adjustment| | 127 | | --- | --- | --- | 128 | |new pointer|+1|| 129 | |clone pointer|+1|(move cloned pointer to Thread B)| 130 | |drop pointer|-1|| 131 | 132 | Thread B may do the following a brief moment later after receiving the 133 | cloned pointer: 134 | 135 | |Action|adjustment| | 136 | | --- | --- | --- | 137 | |drop pointer|-1|(drop cloned pointer)| 138 | 139 | The order in which these adjustments are processed by the GC thread may well 140 | be out of order, and there is no information available to restore the correct 141 | order. The decrement from Thread B might be processed first, followed by the 142 | first increment from Thread A, giving a momentary reference count of 0. The 143 | collector may kick in at that point, freeing the object and resulting in a 144 | possible use-after-free and possibly a double-free. 145 | 146 | Here, learning from [Bacon2003][1], decrement adjustments should be 147 | buffered by an amount of time sufficient to clear all increment adjustments 148 | that occurred prior to those decrements. An appropriate amount of time might 149 | be provided by scanning the mutator threads' 150 | buffers one further iteration before applying the buffered decrements. 151 | 152 | Increment adjustments can be applied immediately, always. 153 | 154 | # Tracing 155 | 156 | While more advanced or efficient algorithms might be applied here, this section 157 | will describe how two-colour mark and sweep can be applied. 158 | 159 | As in [rust-gc][4], all types participating in GC must implement 160 | a trait that allows that type to be traced. (This is an inconvenience that 161 | a compiler plugin may be able to alleviate for many cases.) 162 | 163 | The GC thread maintains two trie structures: one to map from roots to 164 | reference counts; a second to map from heap objects to any metadata needed to 165 | run `drop()` against them, and bits for marking objects as live. 166 | 167 | The roots trie is traversed, calling the trace function for each. Every visited 168 | object is marked in the heap trie. 169 | 170 | Then the heap trie is traversed and every unmarked entry is `drop()`ped and 171 | the live objects unmarked. 172 | 173 | It is worth noting that by using a separate data structure for the heap and 174 | root caches that this GC scheme remains `fork()` memory friendly: the act 175 | of updating reference counts and marking heap objects does not force a 176 | page copy-on-write for every counted and marked object location. 177 | 178 | # Concurrent Data Structures 179 | 180 | To prevent data races between the mutator threads and the GC thread, all 181 | GC-managed data structures that contain pointers to other GC-managed objects 182 | must be transactional in updates to those relationships. That is, a 183 | `GcRoot>` can contain mutable data where the mutability follows only 184 | the Rust static analysis rules, but a `GcRoot>>` must be 185 | reimplemented additionally with a transactional runtime nature. 186 | 187 | The `Vec::trace()` method has to be able to provide a readonly 188 | snapshot of its contents to the GC thread and atomic updates to its 189 | contents. 190 | 191 | Applying a compile-time distinction between these may be possible using the 192 | type system. Indeed, presenting a safe API is one of the challenges in 193 | implementing this. 194 | 195 | As the `trace()` method is part of the data structure code itself, data 196 | structures should be free to implement any method of atomic update without the 197 | GC code or thread needing to be aware of transactions or their mechanism. 198 | 199 | The `trace()` method may, depending on the data structure characteristics, 200 | opt to return immediately with an "defer" status, meaning that at the time 201 | of calling, it isn't expedient to obtain a readonly snapshot of the data 202 | structure for tracing. In that case, the GC thread will requeue the object 203 | for a later attempt. 204 | 205 | Fortunately, concurrent data structures are fairly widely researched and 206 | in use by 2015 and I will not go into implementation details here. 207 | 208 | # Tradeoffs 209 | 210 | How throughput compares to other GC algorithms is left to 211 | readers more experienced in the field to say. My guess is that with the overhead 212 | of the journal while doing mostly new-generation collections that this 213 | algorithm should be competitive for multiple threads on multiprocessing 214 | machines. The single-threaded case will suffer from the concurrent data 215 | structure overhead. 216 | 217 | Non-atomic objects must be transactional, adding the runtime and complexity 218 | cost associated with concurrent data structures: the garbage generated. In some 219 | circumstances there could be enormous amounts of garbage generated, raising the 220 | overall overhead of using the GC to where the GC thread affects throughput. 221 | 222 | Jemalloc is said to give low fragmentation rates compared to other malloc 223 | implementations, but fragmentation is likely nonetheless. 224 | 225 | At least this one language/compiler safety issue remains: referencing 226 | GC-managed pointers in a `drop()` is currently considered safe by the compiler 227 | as it has no awareness of the GC, but doing so is of course unsafe as the order 228 | of collection is non-deterministic leading to possible use-after-free in custom 229 | `drop()` functions. 230 | 231 | # Rust Library Compatibility 232 | 233 | As the GC takes over the lifetime management of any objects put under its 234 | control - and that transfer of control is completely under the control of 235 | the programmer - any Rust libraries should work with it, including low-level 236 | libraries such as [coroutine-rs](https://github.com/rustcc/coroutine-rs) and 237 | by extension [mioco](https://github.com/dpc/mioco). 238 | 239 | This GC will never interfere with any code that uses only the native Rust 240 | memory management. 241 | 242 | # Improvements 243 | 244 | ## Compiler Plugin 245 | 246 | It is possible to give the compiler some degree of awareness of GC requirements 247 | through custom plugins, as implemented in [rust-gc][4] and [servo][13]. The same 248 | may be applicable here. 249 | 250 | In the future, this implementation would surely benefit from aspects of the 251 | planned [tracing hooks][5]. 252 | 253 | ## Generational Optimization 254 | 255 | Since the mutator threads write a journal of all root pointers, all 256 | pointers that the mutator uses will be recorded. It may be possible 257 | for the GC thread to use that fact to process batches of journal changes 258 | in a generational manner, rather than having to trace the entire heap 259 | on every iteration. This needs further investigation. 260 | 261 | ## Parallel Collection 262 | 263 | The tries used in the GC should be amenable to parallelizing tracing which 264 | may be particularly beneficial in conjunction with tracing the whole heap. 265 | 266 | ## Copying Collector 267 | 268 | Any form of copying or moving collector would require a custom allocator and 269 | probably a Baker-style read barrier. The barrier could be implemented on the 270 | root smart pointers with the added expense of the mutator threads having to 271 | check whether the pointer must be updated on every dereference. There are 272 | pitfalls here though as the Rust compiler may optimize dereferences with 273 | pointers taking temporary but hard-to-discover root in CPU registers. It may 274 | be necessary to use the future tracing hooks to discover all roots to avoid 275 | Bad Things happening. 276 | 277 | # Patent Issues 278 | 279 | I have read through the patents granted to IBM and David F. Bacon that cover 280 | reference counting and have come to the conclusion that nothing described here 281 | infringes. 282 | 283 | I have not read further afield though. My assumption has been that there is 284 | prior art for most garbage collection methods at this point. 285 | 286 | # References 287 | 288 | * [Bacon2003][1] Bacon et al, A Pure Reference Counting Garbage Collector 289 | * [Bacon2004][2] Bacon et al, A Unified Theory of Garbage Collection 290 | * [Oxischeme][3] Nick Fitzgerald, Memory Management in Oxischeme 291 | * [Manishearth/rust-gc][4] Manish Goregaokar, rust-gc project 292 | * [Rust blog][5] Rust in 2016 293 | * [rust-lang/rust#11399][6] Add garbage collector to std::gc 294 | * [rust-lang/rfcs#415][7] Garbage collection 295 | * [rust-lang/rust#2997][8] Tracing GC in rust 296 | * [Mechanical Sympathy][9] Martin Thompson, Single Writer Principle 297 | * [michaelwoerister/rs-persistent-datastructures][10] Michael Woerister, HAMT in Rust 298 | * [crossbeam][11] Aaron Turon, Lock-freedom without garbage collection 299 | * [Shenandoah][12] Shenandoah, a low-pause GC for the JVM 300 | * [Servo][13] Servo blog, JavaScript: Servo’s only garbage collector 301 | 302 | [1]: http://researcher.watson.ibm.com/researcher/files/us-bacon/Bacon03Pure.pdf 303 | [2]: http://www.cs.virginia.edu/~cs415/reading/bacon-garbage.pdf 304 | [3]: http://fitzgeraldnick.com/weblog/60/ 305 | [4]: https://github.com/Manishearth/rust-gc 306 | [5]: http://blog.rust-lang.org/2015/08/14/Next-year.html 307 | [6]: https://github.com/rust-lang/rust/pull/11399 308 | [7]: https://github.com/rust-lang/rfcs/issues/415 309 | [8]: https://github.com/rust-lang/rust/issues/2997 310 | [9]: http://mechanical-sympathy.blogspot.co.uk/2011/09/single-writer-principle.html 311 | [10]: https://github.com/michaelwoerister/rs-persistent-datastructures 312 | [11]: http://aturon.github.io/blog/2015/08/27/epoch/ 313 | [12]: https://www.youtube.com/watch?v=QcwyKLlmXeY 314 | [13]: https://blog.mozilla.org/research/2014/08/26/javascript-servos-only-garbage-collector/ 315 | -------------------------------------------------------------------------------- /examples/balloon_animals.rs: -------------------------------------------------------------------------------- 1 | 2 | 3 | extern crate mo_gc; 4 | use mo_gc::{Gc, GcRoot, GcThread, StatsLogger, Trace, TraceOps, TraceStack}; 5 | 6 | 7 | struct Segment { 8 | next: Gc, 9 | } 10 | 11 | 12 | impl Segment { 13 | fn new() -> Segment { 14 | Segment { 15 | next: Gc::null() 16 | } 17 | } 18 | 19 | fn join_to(&mut self, to: Gc) { 20 | self.next = to; 21 | } 22 | } 23 | 24 | 25 | unsafe impl Trace for Segment { 26 | fn traversible(&self) -> bool { 27 | true 28 | } 29 | 30 | unsafe fn trace(&self, heap: &mut TraceStack) { 31 | if let Some(ptr) = self.next.as_raw() { 32 | heap.push_to_trace(&*ptr); 33 | } 34 | } 35 | } 36 | 37 | 38 | struct Balloon { 39 | head: Gc, 40 | tail: Gc, 41 | } 42 | 43 | 44 | impl Balloon { 45 | fn inflate() -> Balloon { 46 | let body = Gc::new(Segment::new()); 47 | Balloon { 48 | head: body, 49 | tail: body, 50 | } 51 | } 52 | 53 | fn twist(&mut self) { 54 | let mut new_seg = Gc::new(Segment::new()); 55 | new_seg.join_to(self.head); 56 | self.head = new_seg; 57 | } 58 | 59 | fn complete(&mut self) { 60 | self.tail.next = self.head; 61 | } 62 | 63 | fn count(&mut self) { 64 | let mut count = 0; 65 | let mut current = self.head; 66 | 67 | loop { 68 | current = current.next; 69 | count += 1; 70 | 71 | if current.is(self.tail) { 72 | break; 73 | } 74 | } 75 | 76 | if count != 1000 { 77 | println!("snake is short - only {} segments", count); 78 | } 79 | } 80 | } 81 | 82 | 83 | unsafe impl Trace for Balloon { 84 | fn traversible(&self) -> bool { 85 | true 86 | } 87 | 88 | unsafe fn trace(&self, heap: &mut TraceStack) { 89 | heap.push_to_trace(&*self.head as &Trace); 90 | } 91 | } 92 | 93 | 94 | fn snake() { 95 | // this many snake balloons 96 | for _snake in 0..5000 { 97 | let mut balloon = GcRoot::new(Balloon::inflate()); 98 | 99 | // with this many segments each 100 | for _segment in 0..1000 { 101 | balloon.twist(); 102 | } 103 | 104 | balloon.complete(); 105 | balloon.count(); 106 | } 107 | } 108 | 109 | 110 | fn main() { 111 | let gc = GcThread::spawn_gc(); 112 | 113 | let snake_handle = gc.spawn(|| snake()); 114 | 115 | let logger = gc.join().expect("gc failed"); 116 | logger.dump_to_stdout(); 117 | 118 | snake_handle.join().expect("snake failed"); 119 | } 120 | -------------------------------------------------------------------------------- /examples/correctnesstest.rs: -------------------------------------------------------------------------------- 1 | #![feature(alloc_system)] 2 | extern crate alloc_system; 3 | 4 | 5 | extern crate mo_gc; 6 | 7 | use mo_gc::{GcThread, GcRoot, StatsLogger, Trace}; 8 | 9 | 10 | struct Thing { 11 | value: [usize; 4] 12 | } 13 | 14 | 15 | unsafe impl Trace for Thing {} 16 | 17 | 18 | impl Thing { 19 | fn new() -> Thing { 20 | Thing { 21 | value: [42; 4] 22 | } 23 | } 24 | } 25 | 26 | 27 | impl Drop for Thing { 28 | fn drop(&mut self) { 29 | // any heap corruption might be evident here 30 | assert!(self.value[0] == 42); 31 | assert!(self.value[1] == 42); 32 | assert!(self.value[2] == 42); 33 | assert!(self.value[3] == 42); 34 | } 35 | } 36 | 37 | 38 | fn app() { 39 | for _ in 0..10000000 { 40 | let _new = GcRoot::new(Thing::new()); 41 | } 42 | } 43 | 44 | 45 | fn main() { 46 | let gc = GcThread::spawn_gc(); 47 | 48 | let app_handle = gc.spawn(|| app()); 49 | 50 | let logger = gc.join().expect("gc failed"); 51 | logger.dump_to_stdout(); 52 | 53 | app_handle.join().expect("app failed"); 54 | } 55 | -------------------------------------------------------------------------------- /examples/low_allocation_rate.rs: -------------------------------------------------------------------------------- 1 | 2 | extern crate stopwatch; 3 | use stopwatch::Stopwatch; 4 | 5 | extern crate mo_gc; 6 | 7 | use std::thread; 8 | use std::time::Duration; 9 | 10 | use mo_gc::{GcThread, GcRoot, Trace, StatsLogger}; 11 | 12 | 13 | const THING_SIZE: usize = 8; 14 | const THING_COUNT: i64 = 2500000; 15 | 16 | 17 | struct Thing { 18 | _data: [u64; THING_SIZE], 19 | } 20 | 21 | 22 | impl Thing { 23 | fn new() -> Thing { 24 | Thing { _data: [0; THING_SIZE] } 25 | } 26 | } 27 | 28 | 29 | unsafe impl Trace for Thing {} 30 | 31 | 32 | fn app() { 33 | let sw = Stopwatch::start_new(); 34 | 35 | thread::sleep(Duration::from_millis(100)); 36 | 37 | for count in 0..THING_COUNT { 38 | let _new = GcRoot::new(Thing::new()); 39 | 40 | if count & 0xfff == 0 { 41 | thread::sleep(Duration::from_millis(50)); 42 | } 43 | } 44 | 45 | let per_second = (THING_COUNT * 1000) / sw.elapsed_ms(); 46 | println!("app allocated {} objects at {} objects per second", THING_COUNT, per_second); 47 | println!("app finished in {}ms", sw.elapsed_ms()); 48 | } 49 | 50 | 51 | fn main() { 52 | let gc = GcThread::spawn_gc(); 53 | 54 | let app_handle = gc.spawn(|| app()); 55 | 56 | let logger = gc.join().expect("gc failed"); 57 | logger.dump_to_stdout(); 58 | 59 | app_handle.join().expect("app failed"); 60 | } 61 | -------------------------------------------------------------------------------- /examples/small_objects_stress.rs: -------------------------------------------------------------------------------- 1 | 2 | extern crate stopwatch; 3 | use stopwatch::Stopwatch; 4 | 5 | extern crate mo_gc; 6 | 7 | use mo_gc::{GcThread, GcRoot, Trace, StatsLogger}; 8 | 9 | 10 | const THING_SIZE: usize = 8; 11 | const THING_COUNT: i64 = 2500000; 12 | 13 | 14 | struct Thing { 15 | _data: [u64; THING_SIZE], 16 | } 17 | 18 | 19 | impl Thing { 20 | fn new() -> Thing { 21 | Thing { _data: [0; THING_SIZE] } 22 | } 23 | } 24 | 25 | 26 | unsafe impl Trace for Thing {} 27 | 28 | 29 | fn app() { 30 | let sw = Stopwatch::start_new(); 31 | 32 | for _ in 0..THING_COUNT { 33 | let _new = GcRoot::new(Thing::new()); 34 | } 35 | 36 | let per_second = (THING_COUNT * 1000) / sw.elapsed_ms(); 37 | println!("app allocated {} objects at {} objects per second", THING_COUNT, per_second); 38 | println!("app finished in {}ms", sw.elapsed_ms()); 39 | } 40 | 41 | 42 | fn main() { 43 | let gc = GcThread::spawn_gc(); 44 | 45 | let app_handle1 = gc.spawn(|| app()); 46 | let app_handle2 = gc.spawn(|| app()); 47 | 48 | let logger = gc.join().expect("gc failed"); 49 | logger.dump_to_stdout(); 50 | 51 | app_handle1.join().expect("app failed"); 52 | app_handle2.join().expect("app failed"); 53 | } 54 | -------------------------------------------------------------------------------- /src/appthread.rs: -------------------------------------------------------------------------------- 1 | //! Types for the mutator to use to build data structures 2 | 3 | 4 | use std::cell::Cell; 5 | use std::mem::transmute; 6 | use std::ops::{Deref, DerefMut}; 7 | use std::ptr::{null, null_mut}; 8 | use std::raw::TraitObject; 9 | use std::sync::atomic::{AtomicPtr, Ordering}; 10 | use std::thread; 11 | 12 | use constants::{INC_BIT, JOURNAL_BUFFER_SIZE, NEW_BIT, TRAVERSE_BIT}; 13 | use gcthread::{JournalSender, EntrySender}; 14 | use heap::{Object, TraceStack}; 15 | use journal; 16 | use trace::Trace; 17 | 18 | 19 | /// Each thread gets it's own EntrySender 20 | thread_local!( 21 | static GC_JOURNAL: Cell<*const EntrySender> = Cell::new(null()) 22 | ); 23 | 24 | 25 | /// GcBox struct and traits: a boxed object that is GC managed 26 | pub struct GcBox { 27 | value: T, 28 | } 29 | 30 | 31 | /// Root smart pointer, sends reference count changes to the journal. 32 | /// 33 | /// Whenever a reference to an object on the heap must be retained on the stack, this type must be 34 | /// used. It's use will ensure that the object will be seen as a root. 35 | pub struct GcRoot { 36 | ptr: *mut GcBox, 37 | } 38 | 39 | 40 | /// Non-atomic pointer type. This type is `!Sync` and thus is useful for presenting a Rust-ish 41 | /// API to a data structure where aliasing and mutability must follow the standard rules: there 42 | /// can be only one mutator. 43 | /// 44 | /// *Important note:* even though this type is `!Sync`, any data structures that are composed of 45 | /// `Gc` pointers must still be designed with the awareness that the GC thread will call `trace()` 46 | /// at any point and so, must still be thread safe! 47 | /// 48 | /// This is not a root pointer type. It should be used inside data structures to reference other 49 | /// GC-managed objects. 50 | pub struct Gc { 51 | ptr: *mut GcBox, 52 | } 53 | 54 | 55 | /// Atomic pointer type that points at a traceable object. This type is `Sync` and can be used to 56 | /// build concurrent data structures. 57 | /// 58 | /// This type should be used inside data structures to reference other GC-managed objects, but 59 | /// provides interior mutability and atomic methods. 60 | /// 61 | /// TODO: cas, swap etc for GcRoot and Gc 62 | pub struct GcAtomic { 63 | ptr: AtomicPtr>, 64 | } 65 | 66 | 67 | /// An Application Thread, manages a thread-local reference to a tx channel 68 | /// 69 | /// TODO: a version of `spawn()` is required that can be called from an existing mutator thread. 70 | pub struct AppThread; 71 | 72 | 73 | impl AppThread { 74 | /// As thread::spawn but takes a journal Sender to initialize the thread_local instance with. 75 | pub fn spawn_from_gc(tx: JournalSender, f: F) -> thread::JoinHandle 76 | where F: FnOnce() -> T, 77 | F: Send + 'static, 78 | T: Send + 'static 79 | { 80 | thread::spawn(move || { 81 | let (jtx, jrx) = journal::make_journal(JOURNAL_BUFFER_SIZE); 82 | 83 | tx.send(jrx).expect("Failed to send a new Journal to the GC thread!"); 84 | 85 | GC_JOURNAL.with(|j| { 86 | j.set(&jtx); 87 | }); 88 | 89 | f() 90 | }) 91 | } 92 | } 93 | 94 | // Reference count functions. Only new-objects need to specify the traverse bit. 95 | 96 | #[inline] 97 | fn as_traitobject(object: &T) -> TraitObject { 98 | let trace: &Trace = object; 99 | unsafe { transmute(trace) } 100 | } 101 | 102 | 103 | /// Write a reference count increment to the journal for a newly allocated object 104 | #[inline] 105 | fn write(object: &T, is_new: bool, flags: usize) { 106 | GC_JOURNAL.with(|j| { 107 | let tx = unsafe { &*j.get() }; 108 | 109 | let tobj = as_traitobject(object); 110 | 111 | // set the refcount-increment bit 112 | let ptr = (tobj.data as usize) | flags; 113 | 114 | // set the traversible bit 115 | let mut vtable = tobj.vtable as usize; 116 | if is_new && object.traversible() { 117 | vtable |= TRAVERSE_BIT; 118 | } 119 | 120 | tx.send(Object { 121 | ptr: ptr, 122 | vtable: vtable, 123 | }); 124 | }); 125 | } 126 | 127 | // GcBox implementation 128 | 129 | impl GcBox { 130 | fn new(value: T) -> GcBox { 131 | GcBox { 132 | value: value, 133 | } 134 | } 135 | } 136 | 137 | 138 | unsafe impl Trace for GcBox { 139 | #[inline] 140 | fn traversible(&self) -> bool { 141 | self.value.traversible() 142 | } 143 | 144 | #[inline] 145 | unsafe fn trace(&self, heap: &mut TraceStack) { 146 | self.value.trace(heap); 147 | } 148 | } 149 | 150 | // GcRoot implementation 151 | 152 | impl GcRoot { 153 | /// Put a new object on the heap and hand ownership to the GC, writing a reference count 154 | /// increment to the journal. 155 | pub fn new(value: T) -> GcRoot { 156 | let boxed = Box::new(GcBox::new(value)); 157 | write(&*boxed, true, NEW_BIT | INC_BIT); 158 | 159 | GcRoot { 160 | ptr: Box::into_raw(boxed) 161 | } 162 | } 163 | 164 | fn from_raw(ptr: *mut GcBox) -> GcRoot { 165 | let root = GcRoot { ptr: ptr }; 166 | write(&*root, false, INC_BIT); 167 | root 168 | } 169 | 170 | fn ptr(&self) -> *mut GcBox { 171 | self.ptr 172 | } 173 | 174 | fn value(&self) -> &T { 175 | unsafe { &(*self.ptr).value } 176 | } 177 | 178 | fn value_mut(&mut self) -> &mut T { 179 | unsafe { &mut (*self.ptr).value } 180 | } 181 | } 182 | 183 | 184 | impl Drop for GcRoot { 185 | fn drop(&mut self) { 186 | write(&**self, false, 0); 187 | } 188 | } 189 | 190 | 191 | impl Deref for GcRoot { 192 | type Target = T; 193 | 194 | fn deref(&self) -> &T { 195 | self.value() 196 | } 197 | } 198 | 199 | 200 | impl DerefMut for GcRoot { 201 | fn deref_mut(&mut self) -> &mut T { 202 | self.value_mut() 203 | } 204 | } 205 | 206 | 207 | impl Clone for GcRoot { 208 | fn clone(&self) -> Self { 209 | GcRoot::from_raw(self.ptr()) 210 | } 211 | } 212 | 213 | // Gc implementation 214 | 215 | impl Gc { 216 | /// Creates a new null pointer. 217 | pub fn null() -> Gc { 218 | Gc { 219 | ptr: null_mut(), 220 | } 221 | } 222 | 223 | /// Move a value to the heap and create a pointer to it. 224 | pub fn new(value: T) -> Gc { 225 | let boxed = Box::new(GcBox::new(value)); 226 | write(&*boxed, true, NEW_BIT); 227 | 228 | Gc { 229 | ptr: Box::into_raw(boxed) 230 | } 231 | } 232 | 233 | /// Return the raw pointer value, or None if it is a null pointer. 234 | pub fn as_raw(&self) -> Option<*mut GcBox> { 235 | if self.ptr.is_null() { 236 | None 237 | } else { 238 | Some(self.ptr) 239 | } 240 | } 241 | 242 | /// Pointer equality comparison. 243 | pub fn is(&self, other: Gc) -> bool { 244 | self.ptr == other.ptr 245 | } 246 | 247 | fn from_raw(ptr: *mut GcBox) -> Gc { 248 | Gc { 249 | ptr: ptr, 250 | } 251 | } 252 | 253 | fn ptr(&self) -> *mut GcBox { 254 | self.ptr 255 | } 256 | 257 | fn value(&self) -> &T { 258 | unsafe { &(*self.ptr).value } 259 | } 260 | 261 | fn value_mut(&mut self) -> &mut T { 262 | unsafe { &mut (*self.ptr).value } 263 | } 264 | } 265 | 266 | 267 | impl Deref for Gc { 268 | type Target = T; 269 | 270 | fn deref(&self) -> &T { 271 | self.value() 272 | } 273 | } 274 | 275 | 276 | impl DerefMut for Gc { 277 | fn deref_mut(&mut self) -> &mut T { 278 | self.value_mut() 279 | } 280 | } 281 | 282 | 283 | impl Clone for Gc { 284 | fn clone(&self) -> Self { 285 | Gc { 286 | ptr: self.ptr, 287 | } 288 | } 289 | } 290 | 291 | 292 | impl Copy for Gc {} 293 | 294 | // GcAtomic implementation 295 | 296 | impl GcAtomic { 297 | /// Instantiate a new null pointer 298 | pub fn null() -> GcAtomic { 299 | GcAtomic { 300 | ptr: AtomicPtr::new(null_mut()) 301 | } 302 | } 303 | 304 | /// Instantiate a new pointer, moving `value` to the heap. Writes to the journal. 305 | pub fn new(value: T) -> GcAtomic { 306 | let boxed = Box::new(GcBox::new(value)); 307 | write(&*boxed, true, NEW_BIT); 308 | 309 | GcAtomic { 310 | ptr: AtomicPtr::new(Box::into_raw(boxed)), 311 | } 312 | } 313 | 314 | /// Root the pointer by loading it into a `GcRoot` 315 | /// 316 | /// Panics if `order` is `Release` or `AcqRel`. 317 | pub fn load_into_root(&self, order: Ordering) -> GcRoot { 318 | let root = GcRoot { 319 | ptr: self.ptr.load(order), 320 | }; 321 | 322 | write(&*root, false, INC_BIT); 323 | root 324 | } 325 | 326 | /// Copy the pointer into a new `Gc` instance. 327 | /// 328 | /// Panics if `order` is `Release` or `AcqRel`. 329 | pub fn load_into_gc(&self, order: Ordering) -> Gc { 330 | Gc::from_raw(self.ptr.load(order)) 331 | } 332 | 333 | /// Fetch the current raw pointer value 334 | /// 335 | /// Panics if `order` is `Release` or `AcqRel`. 336 | pub fn load_raw(&self, order: Ordering) -> *mut GcBox { 337 | self.ptr.load(order) 338 | } 339 | 340 | /// Replace the current pointer value with the pointer from the given `GcRoot`. 341 | /// 342 | /// Panics if `order` is `Acquire` or `AcqRel`. 343 | pub fn store_from_root(&self, root: GcRoot, order: Ordering) { 344 | self.ptr.store(root.ptr(), order); 345 | } 346 | 347 | /// Replace the current pointer value with the pointer from the given `Gc`. 348 | /// 349 | /// Panics of `order` is `Acquire` or `AcqRel`. 350 | pub fn store_from_gc(&self, gc: Gc, order: Ordering) { 351 | self.ptr.store(gc.ptr(), order); 352 | } 353 | 354 | /// Replace the current pointer value with the given raw pointer 355 | /// 356 | /// Panics if `order` is `Acquire` or `AcqRel`. 357 | pub fn store_raw(&self, ptr: *mut GcBox, order: Ordering) { 358 | self.ptr.store(ptr, order); 359 | } 360 | } 361 | -------------------------------------------------------------------------------- /src/constants.rs: -------------------------------------------------------------------------------- 1 | //! Numerous constants used as parameters to GC behavior 2 | //! 3 | //! The journal and GC parameters of these should become runtime rather than compile time. 4 | 5 | 6 | // Journal and GC parameters 7 | pub const JOURNAL_BUFFER_SIZE: usize = 32768; 8 | pub const BUFFER_RUN: usize = 1024; 9 | pub const JOURNAL_RUN: usize = 32; 10 | pub const MAX_SLEEP_DUR: usize = 100; // milliseconds 11 | pub const MIN_SLEEP_DUR: usize = 1; // milliseconds 12 | pub const MAJOR_COLLECT_THRESHOLD: usize = 1 << 20; 13 | 14 | // Cache line in bytes 15 | pub const CACHE_LINE: usize = 64; 16 | 17 | // Bits and masks 18 | pub const PTR_MASK: usize = !3; 19 | pub const MARK_BIT: usize = 1; 20 | pub const MARK_MASK: usize = !1; 21 | pub const TRAVERSE_BIT: usize = 2; 22 | 23 | // mask for low bits of address of object through journal 24 | pub const FLAGS_MASK: usize = 3; 25 | 26 | // bit number that indicates whether a reference count is being incremented 27 | pub const INC_BIT: usize = 1; 28 | // // bit number that indicates whether or not an object is newly allocated or not 29 | pub const NEW_BIT: usize = 2; 30 | pub const NEW_MASK: usize = !2; 31 | 32 | // Values found in the 2 bits masked by FLAGS_MASK 33 | // new object, increment refcount value 34 | pub const NEW_INC: usize = 3; 35 | // new object not rooted value 36 | pub const NEW: usize = 2; 37 | // old object, increment refcount value 38 | pub const INC: usize = 1; 39 | // decrement refcount value 40 | pub const DEC: usize = 0; 41 | -------------------------------------------------------------------------------- /src/gcthread.rs: -------------------------------------------------------------------------------- 1 | //! Garbage collection thread 2 | 3 | 4 | use std::any::Any; 5 | use std::cmp::min; 6 | use std::mem::size_of; 7 | use std::sync::mpsc; 8 | use std::thread; 9 | use std::time::Duration; 10 | 11 | use num_cpus; 12 | use scoped_pool::Pool; 13 | 14 | use appthread::AppThread; 15 | use constants::{MAJOR_COLLECT_THRESHOLD, MAX_SLEEP_DUR, MIN_SLEEP_DUR}; 16 | use heap::{CollectOps, Object}; 17 | use journal; 18 | use parheap::ParHeap; 19 | use statistics::{StatsLogger, DefaultLogger}; 20 | use youngheap::YoungHeap; 21 | 22 | 23 | pub type EntryReceiver = journal::Receiver; 24 | pub type EntrySender = journal::Sender; 25 | 26 | pub type JournalReceiver = mpsc::Receiver; 27 | pub type JournalSender = mpsc::Sender; 28 | 29 | pub type JournalList = Vec; 30 | 31 | 32 | /// The Garbage Collection thread handle. 33 | pub struct GcThread { 34 | /// This is cloned and given to app threads. 35 | tx_chan: JournalSender, 36 | 37 | /// The GC thread's handle to join on. 38 | handle: thread::JoinHandle, 39 | } 40 | 41 | 42 | impl GcThread { 43 | /// Spawn a GC thread with default parameters: a `ParHeap` and a `DefaultLogger` parallelized 44 | /// across all available CPUs. 45 | pub fn spawn_gc() -> GcThread { 46 | let cores = num_cpus::get(); 47 | Self::spawn_gc_with(cores, ParHeap::new(cores), DefaultLogger::new()) 48 | } 49 | } 50 | 51 | 52 | impl GcThread { 53 | /// Run the GC on the current thread, spawning another thread to run the application function 54 | /// on. Returns the AppThread std::thread::Thread handle. Caller must provide a custom 55 | /// StatsLogger implementation and a CollectOps heap implementation. 56 | pub fn spawn_gc_with(num_threads: usize, mature: T, logger: S) -> GcThread 57 | where T: CollectOps + Send + 'static 58 | { 59 | let (tx, rx) = mpsc::channel(); 60 | 61 | let handle = thread::spawn(move || gc_thread(num_threads, rx, mature, logger)); 62 | 63 | GcThread { 64 | tx_chan: tx, 65 | handle: handle, 66 | } 67 | } 68 | 69 | /// Spawn an app thread that journals to the GC thread. 70 | pub fn spawn(&self, f: F) -> thread::JoinHandle 71 | where F: FnOnce() -> T, 72 | F: Send + 'static, 73 | T: Send + 'static 74 | { 75 | AppThread::spawn_from_gc(self.tx_chan.clone(), f) 76 | } 77 | 78 | /// Wait for the GC thread to finish. On success, returns the object that implements 79 | /// `StatsLogger` for the calling thread to examine. 80 | pub fn join(self) -> Result> { 81 | self.handle.join() 82 | } 83 | } 84 | 85 | 86 | /// Main GC thread loop. 87 | fn gc_thread(num_threads: usize, rx_chan: JournalReceiver, mature: T, logger: S) -> S 88 | where S: StatsLogger, 89 | T: CollectOps + Send 90 | { 91 | let mut pool = Pool::new(num_threads); 92 | 93 | let mut gc = YoungHeap::new(num_threads, mature, logger); 94 | 95 | // block, wait for first journal 96 | gc.add_journal(rx_chan.recv().expect("Failed to receive first app journal!")); 97 | 98 | gc.logger().mark_start_time(); 99 | 100 | // next duration to sleep if all journals are empty 101 | let mut sleep_dur: usize = 0; 102 | 103 | // loop until all journals are disconnected 104 | while gc.num_journals() > 0 { 105 | 106 | // new appthread connected 107 | if let Ok(journal) = rx_chan.try_recv() { 108 | gc.add_journal(journal); 109 | } 110 | 111 | let entries_read = gc.read_journals(); 112 | 113 | // sleep if nothing read from journal 114 | if entries_read == 0 { 115 | thread::sleep(Duration::from_millis(sleep_dur as u64)); 116 | 117 | gc.logger().add_sleep(sleep_dur); 118 | 119 | // back off exponentially up to the max 120 | sleep_dur = min(sleep_dur * 2, MAX_SLEEP_DUR); 121 | } else { 122 | // reset next sleep duration on receiving no entries 123 | sleep_dur = MIN_SLEEP_DUR; 124 | } 125 | 126 | // TODO: base this call on a duration since last call? 127 | let young_count = gc.minor_collection(&mut pool); 128 | 129 | // do a major collection if the young count reaches a threshold and we're not just trying 130 | // to keep up with the app threads 131 | // TODO: force a major collection every n minutes 132 | if sleep_dur != MIN_SLEEP_DUR && young_count >= MAJOR_COLLECT_THRESHOLD { 133 | gc.major_collection(&mut pool); 134 | } 135 | } 136 | 137 | // do a final collection where all roots should be unrooted 138 | gc.minor_collection(&mut pool); 139 | gc.major_collection(&mut pool); 140 | 141 | // return logger to calling thread 142 | gc.logger().mark_end_time(); 143 | gc.shutdown() 144 | } 145 | 146 | 147 | /// Pointers are word-aligned, meaning the least-significant 2 or 3 bits are always 0, depending 148 | /// on the word size. 149 | #[inline] 150 | pub fn ptr_shift() -> i32 { 151 | if size_of::() == 32 { 152 | 2 153 | } else { 154 | 3 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /src/heap.rs: -------------------------------------------------------------------------------- 1 | //! Core heap traits and data types 2 | //! 3 | //! TODO: RootMeta and ObjectMeta have some things in common, perhaps use traits to abstract 4 | //! the differences, then perhaps YoungHeap and ParHeap can share more code? 5 | 6 | 7 | use std::cell::Cell; 8 | use std::mem::transmute; 9 | use std::raw::TraitObject; 10 | use std::sync::atomic::{AtomicUsize, Ordering}; 11 | 12 | use bitmaptrie::Trie; 13 | use scoped_pool::Pool; 14 | 15 | use constants::{MARK_BIT, MARK_MASK, NEW_BIT, NEW_MASK, PTR_MASK, TRAVERSE_BIT}; 16 | use gcthread::ptr_shift; 17 | use trace::Trace; 18 | 19 | 20 | pub type ObjectBuf = Vec; 21 | pub type RootMap = Trie; 22 | pub type HeapMap = Trie; 23 | 24 | 25 | /// A trait that describes Trace operations on a Heap 26 | pub trait TraceOps { 27 | /// Buffer the given object for future tracing on the trace stack. This method should be called 28 | /// by objects that implement the Trace trait, from the Trace::trace() method. 29 | fn push_to_trace(&mut self, object: &Trace); 30 | } 31 | 32 | 33 | /// A trait that describes collection operations on a Heap 34 | pub trait CollectOps { 35 | /// Add an object directly to the heap. 36 | fn add_object(&mut self, ptr: usize, vtable: usize); 37 | 38 | /// Run a collection iteration on the heap. Return the total heap size and the number of 39 | /// dropped objects. 40 | fn collect(&mut self, thread_pool: &mut Pool, roots: &mut RootMap) -> (usize, usize); 41 | } 42 | 43 | 44 | /// A journal item. Essentially just a Send-able TraitObject 45 | #[derive(Copy, Clone)] 46 | pub struct Object { 47 | pub ptr: usize, 48 | pub vtable: usize, 49 | } 50 | 51 | 52 | /// Root pointer metadata 53 | pub struct RootMeta { 54 | /// the root reference count. This gets decremented by multiple threads and thus must be 55 | /// thread safe. 56 | pub refcount: AtomicUsize, 57 | /// the Trace trait vtable pointer 58 | pub vtable: usize, 59 | /// bits for flags 60 | pub flags: Cell, 61 | } 62 | 63 | 64 | /// A GC-managed pointer's metadata 65 | pub struct ObjectMeta { 66 | /// Using bit 0 as the mark bit (MARK_BIT) 67 | /// Using bit 1 to indicate traversibility (TRAVERSE_BIT) 68 | /// Normally we'd use an AtomicUsize, but since the operations on the value are one-way, 69 | /// i.e. setting a mark bit in parallel, or unsetting it in parallel, we don't need to worry 70 | /// about data races. The worst that will happen is that two threads will try to trace the 71 | /// same object concurrently. 72 | pub vtable: Cell, 73 | } 74 | 75 | 76 | /// A type that contains a stack of objects to trace into. This type is separated out from the 77 | /// main Heap type so that different collection strategies can be implemented without affecting 78 | /// the client code. The `Trace` trait depends only this type, then, and not the whole Heap 79 | /// type. 80 | pub struct TraceStack { 81 | stack: ObjectBuf, 82 | } 83 | 84 | 85 | unsafe impl Send for Object {} 86 | 87 | unsafe impl Send for RootMeta {} 88 | unsafe impl Sync for RootMeta {} 89 | 90 | unsafe impl Send for ObjectMeta {} 91 | // We're using a Cell and not an Atomic in ObjectMeta but that is ok for how we are using it. 92 | unsafe impl Sync for ObjectMeta {} 93 | 94 | 95 | impl Object { 96 | pub fn from_trie_ptr(ptr: usize, vtable: usize) -> Object { 97 | Object { 98 | ptr: ptr << ptr_shift(), 99 | vtable: vtable, 100 | } 101 | } 102 | 103 | // Return this object as a Trace trait object reference 104 | pub fn as_trace(&self) -> &Trace { 105 | let tobj: TraitObject = Object::into(*self); 106 | unsafe { transmute(tobj) } 107 | } 108 | } 109 | 110 | 111 | impl From for Object { 112 | fn from(tobj: TraitObject) -> Object { 113 | Object { 114 | ptr: tobj.data as usize, 115 | vtable: tobj.vtable as usize, 116 | } 117 | } 118 | } 119 | 120 | 121 | impl Into for Object { 122 | fn into(self) -> TraitObject { 123 | TraitObject { 124 | data: self.ptr as *mut (), 125 | // make sure traverse and mark bits are cleared 126 | vtable: (self.vtable & PTR_MASK) as *mut (), 127 | } 128 | } 129 | } 130 | 131 | 132 | impl RootMeta { 133 | pub fn new(refcount: usize, vtable: usize, flags: usize) -> RootMeta { 134 | RootMeta { 135 | refcount: AtomicUsize::new(refcount), 136 | vtable: vtable, 137 | flags: Cell::new(flags), 138 | } 139 | } 140 | 141 | // Initialize with a reference count of 1 142 | pub fn one(vtable: usize, flags: usize) -> RootMeta { 143 | Self::new(1, vtable, flags) 144 | } 145 | 146 | // Initialize with a reference count of 0 147 | pub fn zero(vtable: usize, flags: usize) -> RootMeta { 148 | Self::new(0, vtable, flags) 149 | } 150 | 151 | // Increment the reference count by 1 152 | #[inline] 153 | pub fn inc(&self) { 154 | self.refcount.fetch_add(1, Ordering::SeqCst); 155 | } 156 | 157 | // Decrement the reference count by 1 158 | #[inline] 159 | pub fn dec(&self) { 160 | self.refcount.fetch_sub(1, Ordering::SeqCst); 161 | } 162 | 163 | // Increment the reference count by 1, thread unsafe 164 | #[inline] 165 | pub fn unsync_inc(&self) { 166 | let refcount = self.unsync_refcount(); 167 | refcount.set(refcount.get() + 1); 168 | } 169 | 170 | // Decrement the reference count by 1, thread unsafe 171 | #[inline] 172 | pub fn unsync_dec(&self) { 173 | let refcount = self.unsync_refcount(); 174 | refcount.set(refcount.get() - 1); 175 | } 176 | 177 | // Return true if this object has a zero reference count, thread unsafe 178 | #[inline] 179 | pub fn unsync_is_unrooted(&self) -> bool { 180 | let refcount = self.unsync_refcount(); 181 | refcount.get() == 0 182 | } 183 | 184 | // Return true if this is a new object 185 | #[inline] 186 | pub fn is_new(&self) -> bool { 187 | self.flags.get() & NEW_BIT != 0 188 | } 189 | 190 | // Return true if this is a new object and the mark bit is unset 191 | #[inline] 192 | pub fn is_new_and_unmarked(&self) -> bool { 193 | self.flags.get() & (MARK_BIT | NEW_BIT) == NEW_BIT 194 | } 195 | 196 | #[inline] 197 | pub fn set_not_new(&self) { 198 | self.flags.set(self.flags.get() & NEW_MASK); 199 | } 200 | 201 | // Mark this object and return true if it needs to be traced into 202 | #[inline] 203 | pub fn mark_and_needs_trace(&self) -> bool { 204 | let flags = self.flags.get(); 205 | 206 | let was_unmarked = flags & MARK_BIT == 0; 207 | if was_unmarked { 208 | self.flags.set(flags | MARK_BIT); 209 | } 210 | 211 | was_unmarked && flags & TRAVERSE_BIT != 0 212 | } 213 | 214 | // Reset the mark bit back to 0 215 | #[inline] 216 | pub fn unmark(&self) { 217 | self.flags.set(self.flags.get() & MARK_MASK); 218 | } 219 | 220 | // Returns the vtable without any flags set 221 | #[inline] 222 | pub fn vtable(&self) -> usize { 223 | self.vtable & PTR_MASK 224 | } 225 | 226 | // oh the horror, to save a few clock cycles 227 | #[inline] 228 | fn unsync_refcount(&self) -> &Cell { 229 | let refcount: &Cell = unsafe { transmute(&self.refcount) }; 230 | refcount 231 | } 232 | } 233 | 234 | 235 | impl ObjectMeta { 236 | pub fn new(vtable: usize) -> ObjectMeta { 237 | ObjectMeta { vtable: Cell::new(vtable) } 238 | } 239 | 240 | // Mark this object and return true if it needs to be traced into 241 | #[inline] 242 | pub fn mark_and_needs_trace(&self) -> bool { 243 | let vtable = self.vtable.get(); 244 | 245 | let was_marked = vtable & MARK_BIT == 0; 246 | if !was_marked { 247 | self.vtable.set(vtable | MARK_BIT); 248 | } 249 | 250 | !was_marked && vtable & TRAVERSE_BIT != 0 251 | } 252 | 253 | // Query the mark bit 254 | #[inline] 255 | pub fn is_marked(&self) -> bool { 256 | self.vtable.get() & MARK_BIT != 0 257 | } 258 | 259 | // Unset the mark bit 260 | #[inline] 261 | pub fn unmark(&self) { 262 | let vtable = self.vtable.get(); 263 | self.vtable.set(vtable & MARK_MASK); 264 | } 265 | 266 | // Get the vtable ptr without mark or traverse bits set 267 | #[inline] 268 | pub fn vtable(&self) -> usize { 269 | self.vtable.get() & PTR_MASK 270 | } 271 | } 272 | 273 | 274 | impl TraceStack { 275 | pub fn new() -> TraceStack { 276 | TraceStack { stack: ObjectBuf::new() } 277 | } 278 | 279 | pub fn push(&mut self, obj: Object) { 280 | self.stack.push(obj); 281 | } 282 | 283 | pub fn pop(&mut self) -> Option { 284 | self.stack.pop() 285 | } 286 | 287 | // Create initial contents from a slice of Objects 288 | pub fn from_roots(&mut self, slice: &[Object]) { 289 | self.stack.extend_from_slice(slice); 290 | } 291 | } 292 | 293 | 294 | impl TraceOps for TraceStack { 295 | fn push_to_trace(&mut self, object: &Trace) { 296 | let tobj: TraitObject = unsafe { transmute(object) }; 297 | self.stack.push(Object::from(tobj)); 298 | } 299 | } 300 | -------------------------------------------------------------------------------- /src/journal.rs: -------------------------------------------------------------------------------- 1 | //! An SPSC queue implemented internally as a sequence of SPSC buffers. 2 | //! 3 | //! This queue will allocate new buffers indefinitely and eat up memory if the receiver doesn't 4 | //! keep up. Performance is better if the receiver keeps up as the allocator will likely reuse 5 | //! the same set of memory for each buffer. 6 | //! 7 | //! Because of TSO on x86, the store order by the sender means that the receiver can load values 8 | //! from the buffer without worrying that it'll read invalid data ahead of the sender. 9 | //! On other architectures, we use atomics with the associated performance penalty. 10 | 11 | 12 | use std::cell::Cell; 13 | use std::intrinsics::{needs_drop, abort}; 14 | use std::mem::{align_of, size_of}; 15 | use std::ptr::{null_mut, read, write, Unique}; 16 | use std::sync::Arc; 17 | use std::sync::atomic::{AtomicPtr, Ordering}; 18 | 19 | #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] 20 | use std::sync::atomic::AtomicUsize; 21 | 22 | extern crate alloc; 23 | use self::alloc::heap::{allocate, deallocate}; 24 | 25 | use constants::CACHE_LINE; 26 | 27 | 28 | /// TSO means that we don't need atomics on x86 and that will speed things up. 29 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 30 | struct MaybeAtomicUsize { 31 | value: Cell, 32 | } 33 | 34 | 35 | /// On weaker memory model platforms, default to atomics. 36 | #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] 37 | struct MaybeAtomicUsize { 38 | value: AtomicUsize, 39 | } 40 | 41 | 42 | /// A one-shot spsc buffer: once it's full and has been read, it is disposed of and a new Buffer 43 | /// is allocated. 44 | struct Buffer { 45 | data: Unique, 46 | 47 | capacity: usize, 48 | 49 | head: MaybeAtomicUsize, 50 | 51 | _cachepadding: [u8; CACHE_LINE], 52 | 53 | tail: MaybeAtomicUsize, 54 | tail_max: MaybeAtomicUsize, 55 | 56 | next: AtomicPtr>, 57 | } 58 | 59 | 60 | /// Since the buffers are linked together by raw pointers, this struct assumes ownership of that 61 | /// unsafe relationship, presenting it as safe. 62 | struct BufferQueue { 63 | // this pointer is only accessed by the Receiver 64 | head: Cell<*mut Buffer>, 65 | 66 | _cachepadding: [u8; CACHE_LINE], 67 | 68 | // this pointer is only accessed by the Sender 69 | tail: Cell<*mut Buffer>, 70 | // this value only written once by the Sender, read by the Receiver 71 | hup: Cell, 72 | } 73 | 74 | 75 | /// An iterator type that iters until the receiver returns empty. 76 | pub struct EmptyIter<'a, T: 'a> { 77 | receiver: &'a mut Receiver, 78 | } 79 | 80 | 81 | /// Similar to std::sync::mpsc::TryRecvError 82 | pub enum RecvResult { 83 | Empty, 84 | Disconnected, 85 | } 86 | 87 | 88 | /// A journal reader type which can be sent to another thread 89 | pub struct Receiver { 90 | buffer: Arc>, 91 | } 92 | 93 | 94 | /// A journal writer type which can be sent to another thread 95 | pub struct Sender { 96 | buffer: Arc>, 97 | } 98 | 99 | 100 | unsafe impl Send for Sender {} 101 | unsafe impl Send for Receiver {} 102 | 103 | 104 | impl BufferQueue { 105 | fn new(capacity: usize) -> BufferQueue { 106 | let first_buffer = Box::new(Buffer::new(capacity)); 107 | let ptr = Box::into_raw(first_buffer); 108 | 109 | BufferQueue { 110 | head: Cell::new(ptr), 111 | _cachepadding: [0; CACHE_LINE], 112 | tail: Cell::new(ptr), 113 | hup: Cell::new(false), 114 | } 115 | } 116 | 117 | /// use by Sender only 118 | fn tail(&self) -> *mut Buffer { 119 | self.tail.get() 120 | } 121 | 122 | /// use by Receiver only 123 | fn head(&self) -> *mut Buffer { 124 | self.head.get() 125 | } 126 | 127 | /// use by Receiver only 128 | fn replace_head(&self, next_head: *mut Buffer) { 129 | unsafe { Box::from_raw(self.head.get()) }; 130 | self.head.set(next_head); 131 | } 132 | 133 | /// use by Receiver only 134 | fn head_is_completed(&self) -> bool { 135 | unsafe { &*self.head() }.is_completed() 136 | } 137 | 138 | /// use by Receiver only 139 | fn next_head(&self) -> Option<*mut Buffer> { 140 | unsafe { &*self.head() }.next_buffer() 141 | } 142 | } 143 | 144 | 145 | impl Drop for BufferQueue { 146 | /// Drop all unread buffers. 147 | fn drop(&mut self) { 148 | let mut head = Some(self.head.get()); 149 | 150 | loop { 151 | let mut next = None; 152 | if let Some(head) = head { 153 | next = unsafe { &*head }.next_buffer(); 154 | 155 | unsafe { 156 | let owned = Box::from_raw(head); 157 | drop(owned); 158 | }; 159 | 160 | if let None = next { 161 | break; 162 | } 163 | } 164 | head = next; 165 | } 166 | } 167 | } 168 | 169 | 170 | impl Sender { 171 | /// Send a value to the Receiver. TODO this should probably return some kind of error on 172 | /// receiver hup. 173 | pub fn send(&self, item: T) { 174 | let result = unsafe { &*self.buffer.tail() }.write(item); 175 | 176 | if let Some(new_tail) = result { 177 | self.buffer.tail.set(new_tail); 178 | } 179 | } 180 | } 181 | 182 | 183 | impl Drop for Sender { 184 | fn drop(&mut self) { 185 | // mark the last buffer as completed and set the HUP flag 186 | unsafe { &*self.buffer.tail() }.mark_completed(); 187 | self.buffer.hup.set(true); 188 | } 189 | } 190 | 191 | 192 | impl Receiver { 193 | /// Read a value from the queue if there is one available, otherwise return without blocking 194 | pub fn try_recv(&self) -> Result { 195 | let head = unsafe { &*self.buffer.head() }; 196 | let result = head.try_read(); 197 | 198 | match result { 199 | Some(value) => Ok(value), 200 | 201 | None => { 202 | // is this buffer completed by the sender? 203 | if self.buffer.head_is_completed() { 204 | 205 | // is there a next buffer? 206 | if let Some(next_head) = self.buffer.next_head() { 207 | self.buffer.replace_head(next_head); 208 | 209 | // peek at next buffer for a value befure returning empty 210 | let new_head = unsafe { &*self.buffer.head() }; 211 | if let Some(value) = new_head.try_read() { 212 | Ok(value) 213 | } else { 214 | Err(RecvResult::Empty) 215 | } 216 | 217 | } else { 218 | // no further buffer, did we get hung-up on? 219 | if self.buffer.hup.get() { 220 | Err(RecvResult::Disconnected) 221 | } else { 222 | Err(RecvResult::Empty) 223 | } 224 | } 225 | } else { 226 | Err(RecvResult::Empty) 227 | } 228 | } 229 | } 230 | } 231 | 232 | 233 | /// Make an Iterator that returns values until the queue is empty or disconnected. 234 | pub fn iter_until_empty(&mut self) -> EmptyIter { 235 | EmptyIter { receiver: self } 236 | } 237 | 238 | /// Has the Sender hung up? 239 | pub fn is_disconnected(&self) -> bool { 240 | if self.buffer.hup.get() { 241 | if let None = self.buffer.next_head() { 242 | return unsafe { &*self.buffer.head() }.is_empty(); 243 | } 244 | } 245 | 246 | false 247 | } 248 | } 249 | 250 | 251 | /// Return a Sender/Receiver pair that can be handed over to other threads. The capacity is the 252 | /// requested size of each internal buffer and will be rounded to the next power of two. 253 | pub fn make_journal(capacity: usize) -> (Sender, Receiver) { 254 | let buffer = Arc::new(BufferQueue::new(capacity)); 255 | 256 | (Sender { buffer: buffer.clone() }, 257 | Receiver { buffer: buffer }) 258 | } 259 | 260 | 261 | impl Buffer { 262 | /// Create a new Buffer instance, rounding the capacity up to the nearest power of two. 263 | fn new(requested_capacity: usize) -> Buffer { 264 | let rounded_capacity = requested_capacity.next_power_of_two(); 265 | 266 | let data = unsafe { 267 | let array = allocate(rounded_capacity * size_of::(), align_of::()); 268 | if array.is_null() { 269 | abort() 270 | }; 271 | Unique::new(array as *mut T) 272 | }; 273 | 274 | Buffer { 275 | data: data, 276 | capacity: rounded_capacity, 277 | head: MaybeAtomicUsize::new(0), 278 | _cachepadding: [0; CACHE_LINE], 279 | tail: MaybeAtomicUsize::new(0), 280 | tail_max: MaybeAtomicUsize::new(rounded_capacity as usize), 281 | next: AtomicPtr::new(null_mut()), 282 | } 283 | } 284 | 285 | /// Write to the buffer, returning Some(new_buffer) if the current one was full. 286 | fn write(&self, item: T) -> Option<*mut Buffer> { 287 | let tail = self.tail.load(Ordering::Relaxed); 288 | 289 | if tail < self.tail_max.load(Ordering::Relaxed) { 290 | // write to this buffer 291 | unsafe { write(self.data.offset(tail as isize), item) }; 292 | self.tail.fetch_add(1, Ordering::Release); 293 | None 294 | } else { 295 | // allocate a new buffer and write to that 296 | let buffer = Box::new(Buffer::new(self.capacity)); 297 | buffer.write(item); 298 | 299 | // save the pointer to the new buffer for the receiver 300 | let ptr = Box::into_raw(buffer); 301 | self.next.store(ptr, Ordering::Release); 302 | 303 | Some(ptr) 304 | } 305 | } 306 | 307 | /// Read the next item from the buffer, returning None if the buffer is full or if the contents 308 | /// thus far have been consumed. 309 | fn try_read(&self) -> Option { 310 | let head = self.head.load(Ordering::Relaxed); 311 | 312 | if head < self.tail.load(Ordering::Acquire) { 313 | // read from this buffer 314 | let item = unsafe { read(self.data.offset(head as isize)) }; 315 | self.head.fetch_add(1, Ordering::Relaxed); 316 | Some(item) 317 | } else { 318 | None 319 | } 320 | } 321 | 322 | /// Check the completion status. 323 | fn is_completed(&self) -> bool { 324 | self.tail_max.load(Ordering::Relaxed) == self.tail.load(Ordering::Acquire) 325 | } 326 | 327 | /// Mark this buffer as full. 328 | fn mark_completed(&self) { 329 | self.tail_max.store(self.tail.load(Ordering::Relaxed), Ordering::Relaxed); 330 | } 331 | 332 | /// Check for contents. 333 | fn is_empty(&self) -> bool { 334 | self.head.load(Ordering::Relaxed) == self.tail_max.load(Ordering::Relaxed) 335 | } 336 | 337 | /// Fetch the pointer to the next buffer if the Sender has written one. 338 | fn next_buffer(&self) -> Option<*mut Buffer> { 339 | let ptr = self.next.load(Ordering::Acquire); 340 | 341 | if ptr.is_null() { 342 | None 343 | } else { 344 | Some(ptr) 345 | } 346 | } 347 | } 348 | 349 | 350 | impl Drop for Buffer { 351 | fn drop(&mut self) { 352 | unsafe { 353 | // pop any remaining items if they need to be officially dropped 354 | if needs_drop::() { 355 | loop { 356 | match self.try_read() { 357 | None => break, 358 | _ => (), 359 | } 360 | } 361 | } 362 | 363 | deallocate(self.data.get_mut() as *mut T as *mut u8, 364 | self.capacity * size_of::(), 365 | align_of::()); 366 | } 367 | } 368 | } 369 | 370 | 371 | impl<'a, T> Iterator for EmptyIter<'a, T> { 372 | type Item = T; 373 | 374 | /// Ignores disconnected state 375 | fn next(&mut self) -> Option { 376 | if let Ok(item) = self.receiver.try_recv() { 377 | Some(item) 378 | } else { 379 | None 380 | } 381 | } 382 | } 383 | 384 | 385 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 386 | impl MaybeAtomicUsize { 387 | fn new(value: usize) -> MaybeAtomicUsize { 388 | MaybeAtomicUsize { value: Cell::new(value) } 389 | } 390 | 391 | #[inline] 392 | fn load(&self, _ordering: Ordering) -> usize { 393 | self.value.get() 394 | } 395 | 396 | #[inline] 397 | fn store(&self, value: usize, _ordering: Ordering) { 398 | self.value.set(value); 399 | } 400 | 401 | #[inline] 402 | fn fetch_add(&self, value: usize, _ordering: Ordering) -> usize { 403 | let old = self.value.get(); 404 | self.value.set(old + value); 405 | old 406 | } 407 | } 408 | 409 | 410 | #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] 411 | impl MaybeAtomicUsize { 412 | fn new(value: usize) -> MaybeAtomicUsize { 413 | MaybeAtomicUsize { value: AtomicUsize::new(value) } 414 | } 415 | 416 | #[inline] 417 | fn load(&self, ordering: Ordering) -> usize { 418 | self.value.load(ordering) 419 | } 420 | 421 | #[inline] 422 | fn store(&self, value: usize, ordering: Ordering) { 423 | self.value.store(value, ordering); 424 | } 425 | 426 | #[inline] 427 | fn fetch_add(&self, value: usize, ordering: Ordering) -> usize { 428 | self.value.fetch_add(value, ordering) 429 | } 430 | } 431 | 432 | 433 | #[cfg(test)] 434 | mod tests { 435 | 436 | use super::{make_journal, RecvResult}; 437 | 438 | 439 | const TEST_COUNT: usize = 12345; 440 | const TEST_BUFFER_SIZE: usize = 32; 441 | 442 | 443 | #[test] 444 | fn test_rx_tx() { 445 | let (tx, rx) = make_journal::(TEST_BUFFER_SIZE); 446 | 447 | for i in 0..TEST_COUNT { 448 | tx.send(i); 449 | 450 | let mut value = None; 451 | 452 | while let None = value { 453 | match rx.try_recv() { 454 | Ok(packet) => { 455 | assert!(packet == i); 456 | value = Some(packet); 457 | } 458 | 459 | // may get Empty on transitioning from one buffer to the next 460 | Err(RecvResult::Empty) => continue, 461 | Err(RecvResult::Disconnected) => assert!(false), 462 | } 463 | } 464 | } 465 | } 466 | 467 | #[test] 468 | fn test_disconnect() { 469 | let (tx, rx) = make_journal::(TEST_BUFFER_SIZE); 470 | 471 | drop(tx); 472 | 473 | match rx.try_recv() { 474 | Err(RecvResult::Disconnected) => (), 475 | _ => assert!(false), 476 | } 477 | } 478 | 479 | #[test] 480 | fn test_running_disconnect_tx() { 481 | let (tx, rx) = make_journal::(TEST_BUFFER_SIZE); 482 | 483 | // buffer up some values 484 | for i in 0..TEST_COUNT { 485 | tx.send(i); 486 | } 487 | 488 | drop(tx); 489 | 490 | // should still be able to receive all buffered values 491 | for i in 0..TEST_COUNT { 492 | let mut value = None; 493 | 494 | while let None = value { 495 | match rx.try_recv() { 496 | Ok(packet) => { 497 | assert!(packet == i); 498 | value = Some(packet); 499 | } 500 | 501 | // may get Empty on transitioning from one buffer to the next 502 | Err(RecvResult::Empty) => continue, 503 | Err(RecvResult::Disconnected) => assert!(false), 504 | } 505 | } 506 | } 507 | 508 | // should be disconnected 509 | match rx.try_recv() { 510 | Err(RecvResult::Disconnected) => (), 511 | _ => assert!(false), 512 | } 513 | } 514 | 515 | #[test] 516 | fn test_disconnect_rx() { 517 | let (tx, rx) = make_journal::(TEST_BUFFER_SIZE); 518 | 519 | drop(rx); 520 | 521 | tx.send(42); 522 | 523 | // TODO: tx.send() should return a Result with a disconnected status 524 | } 525 | } 526 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![feature(alloc)] 2 | #![feature(core_intrinsics)] 3 | #![feature(heap_api)] 4 | #![feature(raw)] 5 | #![feature(unique)] 6 | 7 | 8 | //! # mo-gc 9 | //! 10 | //! A pauseless, concurrent, generational, parallel mark-and-sweep garbage collector. 11 | //! 12 | //! This is an experimental design to research an idea into a pauseless garbage collector. 13 | //! 14 | //! The GC handles multiple OS thread mutators without stopping their worlds. It does this by 15 | //! deferring reference counting of stack-rooted pointers to the GC thread through a journal 16 | //! of stack root changes. The journal itself is fast to write to, adding an amortized 25% to 17 | //! the cost of `Box::new()` using jemalloc for a 64 byte object. 18 | //! 19 | //! Thus the mutator never needs to be stopped for it's stack to be scanned or for any collection 20 | //! phase. 21 | //! 22 | //! See [project TODO](https://github.com/pliniker/mo-gc/blob/master/TODO.md) for limitations. 23 | //! 24 | //! ## Usage 25 | //! 26 | //! Usage is best illustrated by the examples provided. 27 | 28 | 29 | extern crate bitmaptrie; 30 | extern crate num_cpus; 31 | extern crate scoped_pool; 32 | extern crate time; 33 | 34 | 35 | mod appthread; 36 | mod constants; 37 | mod gcthread; 38 | mod heap; 39 | mod journal; 40 | mod parheap; 41 | mod statistics; 42 | mod trace; 43 | mod youngheap; 44 | 45 | 46 | pub use appthread::{AppThread, Gc, GcAtomic, GcBox, GcRoot}; 47 | pub use constants::*; 48 | pub use gcthread::GcThread; 49 | pub use heap::{CollectOps, TraceOps, TraceStack}; 50 | pub use journal::{make_journal, Receiver, Sender}; 51 | pub use parheap::ParHeap; 52 | pub use statistics::StatsLogger; 53 | pub use trace::Trace; 54 | pub use youngheap::YoungHeap; 55 | -------------------------------------------------------------------------------- /src/parheap.rs: -------------------------------------------------------------------------------- 1 | //! A parallel collector for the entire heap. 2 | 3 | 4 | use std::mem::transmute; 5 | use std::raw::TraitObject; 6 | use std::sync::Arc; 7 | use std::sync::atomic::{AtomicUsize, Ordering}; 8 | 9 | use scoped_pool::Pool; 10 | 11 | use gcthread::ptr_shift; 12 | use heap::{CollectOps, HeapMap, Object, ObjectMeta, RootMap, TraceStack}; 13 | use trace::Trace; 14 | 15 | 16 | /// This references all known GC-managed objects and handles marking and sweeping; parallel mark 17 | /// and sweep version. 18 | pub struct ParHeap { 19 | num_threads: usize, 20 | objects: HeapMap, 21 | } 22 | 23 | 24 | unsafe impl Send for ParHeap {} 25 | 26 | 27 | impl ParHeap { 28 | /// In this heap implementation, work is split out into a thread pool. There is no knowing, 29 | /// though, how much work each split actually represents. One thread may receive a 30 | /// disproportionate amount of tracing or sweeping. 31 | pub fn new(num_threads: usize) -> ParHeap { 32 | ParHeap { 33 | num_threads: num_threads, 34 | objects: HeapMap::new(), 35 | } 36 | } 37 | 38 | /// A parallel mark implementation: 39 | /// * shares a borrow of the main HeapMap among the thread pool 40 | /// * divides the roots among the thread pool 41 | /// * each thread traces from it's own slice of roots 42 | fn mark(&mut self, thread_pool: &mut Pool, roots: &mut RootMap) { 43 | // divide the roots among threads and trace 44 | let mut sharded_roots = roots.borrow_sharded(self.num_threads); 45 | 46 | thread_pool.scoped(|scope| { 47 | 48 | // borrow the main HeapMap for the duration of this scope 49 | let shared_objects = self.objects.borrow_sync(); 50 | 51 | // split roots into a slice for each thread and hand a slice and an new-object 52 | // HeapMap to each job 53 | for roots in sharded_roots.drain() { 54 | 55 | // make a thread-local trace stack and reference to the heap 56 | let objects = shared_objects.clone(); 57 | 58 | // mark using the thread-local slice of roots 59 | scope.execute(move || { 60 | 61 | let mut stack = TraceStack::new(); 62 | 63 | for (root_ptr, root_meta) in roots.iter() { 64 | if !root_meta.unsync_is_unrooted() && root_meta.mark_and_needs_trace() { 65 | // read the shard to find roots, which are all positive-refcount 66 | // entries. Trace the roots if they need it. 67 | 68 | let obj = Object::from_trie_ptr(root_ptr, root_meta.vtable()); 69 | 70 | let object = obj.as_trace(); 71 | unsafe { object.trace(&mut stack) }; 72 | 73 | // now there may be some child objects on the trace stack: pull 74 | // them off and mark them too 75 | while let Some(obj) = stack.pop() { 76 | 77 | let ptr = obj.ptr >> ptr_shift(); 78 | if let Some(meta) = objects.get(ptr) { 79 | 80 | if meta.mark_and_needs_trace() { 81 | let object = obj.as_trace(); 82 | unsafe { object.trace(&mut stack) }; 83 | } 84 | } 85 | } 86 | } 87 | } 88 | }); // execute 89 | } 90 | }); // scope 91 | } 92 | 93 | /// A parallel sweep implementation: 94 | /// * the main HeapMap tree is split into subtrees and each thread is given a separate subtree 95 | /// to sweep 96 | /// Returns a tuple of (heap_object_count, dropped_object_count) 97 | fn sweep(&mut self, thread_pool: &mut Pool) -> (usize, usize) { 98 | // set counters 99 | let collect_heap_size = Arc::new(AtomicUsize::new(0)); 100 | let collect_drop_count = Arc::new(AtomicUsize::new(0)); 101 | 102 | // shard the heap 103 | let mut sharded_objects = self.objects.borrow_sharded(self.num_threads); 104 | 105 | thread_pool.scoped(|scope| { 106 | 107 | for mut shard in sharded_objects.drain() { 108 | 109 | // pass a reference to each counter to each thread 110 | let heap_size = collect_heap_size.clone(); 111 | let drop_count = collect_drop_count.clone(); 112 | 113 | // each thread sweeps a sub-trie 114 | scope.execute(move || { 115 | 116 | let mut heap_counter = 0; 117 | let mut drop_counter = 0; 118 | 119 | shard.retain_if(|ptr, meta| { 120 | heap_counter += 1; 121 | 122 | if !meta.is_marked() { 123 | drop_counter += 1; 124 | 125 | // if not marked, drop the object 126 | let tobj = TraitObject { 127 | data: (ptr << ptr_shift()) as *mut (), 128 | vtable: meta.vtable() as *mut (), 129 | }; 130 | 131 | unsafe { 132 | let fatptr: *mut Trace = transmute(tobj); 133 | let owned = Box::from_raw(fatptr); 134 | drop(owned); 135 | } 136 | 137 | false 138 | 139 | } else { 140 | // unmark the object 141 | meta.unmark(); 142 | true 143 | } 144 | }); 145 | 146 | // write out the counters 147 | heap_size.fetch_add(heap_counter, Ordering::SeqCst); 148 | drop_count.fetch_add(drop_counter, Ordering::SeqCst); 149 | }); 150 | } 151 | }); 152 | 153 | // return the counters 154 | (collect_heap_size.load(Ordering::Acquire), 155 | collect_drop_count.load(Ordering::Acquire)) 156 | } 157 | } 158 | 159 | 160 | impl CollectOps for ParHeap { 161 | /// Add an object directly to the heap. `ptr` is assumed to already be right-shift adjusted 162 | fn add_object(&mut self, ptr: usize, vtable: usize) { 163 | self.objects.set(ptr, ObjectMeta::new(vtable)); 164 | } 165 | 166 | /// Run a collection iteration on the heap. Return the total heap size and the number of 167 | /// dropped objects. 168 | fn collect(&mut self, thread_pool: &mut Pool, roots: &mut RootMap) -> (usize, usize) { 169 | self.mark(thread_pool, roots); 170 | self.sweep(thread_pool) 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /src/statistics.rs: -------------------------------------------------------------------------------- 1 | //! Performance counters and statistics 2 | 3 | 4 | use std::cmp::max; 5 | 6 | use time::{get_time, Timespec}; 7 | 8 | 9 | /// Type that provides counters for the GC to gain some measure of performance. 10 | pub trait StatsLogger: Send { 11 | /// mark start of time 12 | fn mark_start_time(&mut self); 13 | /// mark end of time 14 | fn mark_end_time(&mut self); 15 | /// add a number of milliseconds that the GcThread was asleep 16 | fn add_sleep(&mut self, ms: usize); 17 | 18 | /// add a count of dropped objects 19 | fn add_dropped(&mut self, count: usize); 20 | /// give the current heap object count 21 | fn current_heap_size(&mut self, size: usize); 22 | 23 | /// print statistics 24 | fn dump_to_stdout(&self); 25 | 26 | /// log something to stdout 27 | fn log(&self, string: &str) { 28 | println!("{}", string); 29 | } 30 | } 31 | 32 | 33 | pub struct DefaultLogger { 34 | max_heap_size: usize, 35 | 36 | total_dropped: usize, 37 | drop_iterations: usize, 38 | 39 | start_time: Timespec, 40 | stop_time: Timespec, 41 | sleep_time: u64, 42 | } 43 | 44 | 45 | unsafe impl Send for DefaultLogger {} 46 | 47 | 48 | impl DefaultLogger { 49 | pub fn new() -> DefaultLogger { 50 | DefaultLogger { 51 | max_heap_size: 0, 52 | total_dropped: 0, 53 | drop_iterations: 0, 54 | start_time: Timespec::new(0, 0), 55 | stop_time: Timespec::new(0, 0), 56 | sleep_time: 0, 57 | } 58 | } 59 | } 60 | 61 | 62 | impl StatsLogger for DefaultLogger { 63 | fn mark_start_time(&mut self) { 64 | self.start_time = get_time(); 65 | } 66 | 67 | fn mark_end_time(&mut self) { 68 | self.stop_time = get_time(); 69 | } 70 | 71 | fn add_sleep(&mut self, ms: usize) { 72 | self.sleep_time += ms as u64; 73 | } 74 | 75 | fn add_dropped(&mut self, count: usize) { 76 | self.total_dropped += count; 77 | self.drop_iterations += 1; 78 | } 79 | 80 | fn current_heap_size(&mut self, size: usize) { 81 | self.max_heap_size = max(self.max_heap_size, size); 82 | } 83 | 84 | fn dump_to_stdout(&self) { 85 | // calculate timing 86 | let total_time = max((self.stop_time - self.start_time).num_milliseconds(), 1); 87 | let active_time = total_time - self.sleep_time as i64; 88 | let percent_active_time = active_time * 100 / total_time; 89 | 90 | // calculate drop rate 91 | let dropped_per_second = self.total_dropped as i64 * 1000 / active_time; 92 | 93 | println!("max-heap {}; dropped {} (per second {}); active {}/{}ms ({}%)", 94 | self.max_heap_size, 95 | self.total_dropped, 96 | dropped_per_second, 97 | active_time, 98 | total_time, 99 | percent_active_time); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/trace.rs: -------------------------------------------------------------------------------- 1 | //! The Trace trait must be implemented by every type that can be GC managed. 2 | 3 | 4 | use heap::TraceStack; 5 | 6 | 7 | /// Trace trait. Every type that can be managed by the GC must implement this trait. 8 | /// This trait is unsafe in that incorrectly implementing it can cause Undefined Behavior. 9 | pub unsafe trait Trace { 10 | /// If the type can contain GC managed pointers, this must return true 11 | fn traversible(&self) -> bool { 12 | false 13 | } 14 | 15 | /// If the type can contain GC managed pointers, this must visit each pointer. 16 | /// 17 | /// This function must be thread-safe! 18 | /// 19 | /// It must read a snapshot of the data structure it is implemented for. 20 | unsafe fn trace(&self, _stack: &mut TraceStack) {} 21 | } 22 | 23 | 24 | unsafe impl Trace for usize {} 25 | unsafe impl Trace for isize {} 26 | unsafe impl Trace for i8 {} 27 | unsafe impl Trace for u8 {} 28 | unsafe impl Trace for i16 {} 29 | unsafe impl Trace for u16 {} 30 | unsafe impl Trace for i32 {} 31 | unsafe impl Trace for u32 {} 32 | unsafe impl Trace for i64 {} 33 | unsafe impl Trace for u64 {} 34 | unsafe impl Trace for f32 {} 35 | unsafe impl Trace for f64 {} 36 | unsafe impl<'a> Trace for &'a str {} 37 | unsafe impl Trace for String {} 38 | -------------------------------------------------------------------------------- /src/youngheap.rs: -------------------------------------------------------------------------------- 1 | //! A partially-parallel young generation collector. 2 | //! 3 | //! Reading the journal into the root map is single-threaded. 4 | //! 5 | //! This is similar in construction to ParHeap, except that this object map must deal 6 | //! with reference counts from the journal. 7 | 8 | 9 | use std::cmp::max; 10 | use std::mem::transmute; 11 | use std::raw::TraitObject; 12 | use std::sync::Arc; 13 | use std::sync::atomic::{AtomicUsize, Ordering}; 14 | 15 | use scoped_pool::Pool; 16 | 17 | use constants::{BUFFER_RUN, DEC, FLAGS_MASK, INC, JOURNAL_RUN, NEW, NEW_BIT, NEW_INC}; 18 | use heap::{CollectOps, Object, ObjectBuf, RootMap, RootMeta, TraceStack}; 19 | use gcthread::{EntryReceiver, JournalList, ptr_shift}; 20 | use statistics::StatsLogger; 21 | use trace::Trace; 22 | 23 | 24 | /// Type that composes all the things we need to run garbage collection on young generation 25 | /// objects. 26 | /// 27 | /// The roots trie maps object addresses to their reference counts, vtables and `NEW` object 28 | /// flags. 29 | /// 30 | /// During tracing, positive reference count objects and non-`NEW` objects are considered 31 | /// possible roots and only `NEW` objects are considered for marking and sweeping. Entries 32 | /// can be both roots and `NEW`. 33 | /// 34 | /// Collection is run in a thread pool across all CPUs by default by sharding the root trie 35 | /// across threads. 36 | pub struct YoungHeap { 37 | /// Size of the thread pool 38 | num_threads: usize, 39 | 40 | /// A list of AppThread journals to read from 41 | journals: JournalList, 42 | 43 | /// Map of object addresses to reference counts and other data 44 | roots: RootMap, 45 | 46 | /// Buffer of deferred negative reference count adjustments 47 | deferred: ObjectBuf, 48 | 49 | /// The mature object space 50 | mature: T, 51 | 52 | /// Something that implements statistics logging 53 | logger: S, 54 | } 55 | 56 | 57 | impl YoungHeap { 58 | /// Create a new young generation heap and roots reference count tracker 59 | pub fn new(num_threads: usize, mature: T, logger: S) -> YoungHeap { 60 | YoungHeap { 61 | num_threads: num_threads, 62 | journals: JournalList::new(), 63 | roots: RootMap::new(), 64 | deferred: ObjectBuf::new(), 65 | mature: mature, 66 | logger: logger, 67 | } 68 | } 69 | 70 | /// Add a new journal to the list of journals to read 71 | pub fn add_journal(&mut self, recv: EntryReceiver) { 72 | self.journals.push(recv); 73 | } 74 | 75 | /// Returns the number of journals currently connected to the GC 76 | pub fn num_journals(&self) -> usize { 77 | self.journals.len() 78 | } 79 | 80 | /// Read all journals for a number of iterations, updating the roots and keeping a reference 81 | /// count increment for each, and putting decrements into the deferred buffer. 82 | /// 83 | /// This function is single-threaded and is the biggest GC throughput bottleneck. Setting a 84 | /// value in the trie is slow compared to allocation and writing/reading the journal. 85 | /// 86 | /// Easily consumes 80% of linear GC time. TODO: parallelize this function. 87 | /// 88 | /// Returns the number of journal entries read. 89 | pub fn read_journals(&mut self) -> usize { 90 | let mut entry_count = 0; 91 | 92 | // read through the journals a few times 93 | for _ in 0..JOURNAL_RUN { 94 | 95 | // for each journal 96 | for journal in self.journals.iter_mut() { 97 | 98 | 99 | // read the journal until empty or a limited number of entries have been pulled 100 | for entry in journal.iter_until_empty().take(BUFFER_RUN) { 101 | 102 | entry_count += 1; 103 | 104 | match entry.ptr & FLAGS_MASK { 105 | NEW_INC => { 106 | let ptr = entry.ptr >> ptr_shift(); 107 | self.roots.set(ptr, RootMeta::one(entry.vtable, NEW_BIT)); 108 | } 109 | 110 | NEW => { 111 | let ptr = entry.ptr >> ptr_shift(); 112 | self.roots.set(ptr, RootMeta::zero(entry.vtable, NEW_BIT)); 113 | } 114 | 115 | INC => { 116 | let ptr = entry.ptr >> ptr_shift(); 117 | 118 | let meta = self.roots.get_default_mut(ptr, || { 119 | RootMeta::zero(entry.vtable, 0) 120 | }); 121 | 122 | meta.inc(); 123 | } 124 | 125 | DEC => self.deferred.push(entry), 126 | 127 | _ => unreachable!(), 128 | } 129 | } 130 | } 131 | } 132 | 133 | // remove any disconnected journals 134 | self.journals.retain(|ref j| !j.is_disconnected()); 135 | 136 | entry_count 137 | } 138 | 139 | /// Do a young generation collection. Returns the number of new objects in the young generation 140 | /// heap. 141 | pub fn minor_collection(&mut self, pool: &mut Pool) -> usize { 142 | self.mark(pool); 143 | let (young_size, drop_count) = self.sweep(pool); 144 | self.merge_deferred(pool); 145 | 146 | self.logger.add_dropped(drop_count); 147 | 148 | young_size 149 | } 150 | 151 | /// Do a major collection, moving `NEW` objects to the mature heap and tracing the mature heap 152 | pub fn major_collection(&mut self, pool: &mut Pool) { 153 | // first move any new-objects into the mature heap by copying and unsetting the new-object 154 | // flag in the roots 155 | for (ptr, meta) in self.roots.iter_mut() { 156 | if !meta.unsync_is_unrooted() && meta.is_new() { 157 | // object must have a positive reference count and be marked as new-object to be 158 | // moved to the mature set 159 | self.mature.add_object(ptr, meta.vtable()); 160 | // unset the new-object bit. This object will now be treated as a simple reference 161 | // counted root and won't be dropped from here. 162 | meta.set_not_new(); 163 | } 164 | } 165 | 166 | let (heap_size, drop_count) = self.mature.collect(pool, &mut self.roots); 167 | 168 | self.logger.current_heap_size(heap_size); 169 | self.logger.add_dropped(drop_count); 170 | } 171 | 172 | /// Use >0 refcount objects and 0-refcount non-new objects to mark new objects 173 | fn mark(&mut self, pool: &mut Pool) { 174 | 175 | let shared_objects = self.roots.borrow_sync(); 176 | let sharded_objects = shared_objects.borrow_sharded(self.num_threads); 177 | 178 | pool.scoped(|scope| { 179 | 180 | for shard in sharded_objects.iter() { 181 | let objects = shared_objects.clone(); 182 | // here there is a shard of the heap and a shared reference to the whole 183 | // heap (objects) for each thread 184 | 185 | scope.execute(move || { 186 | let mut stack = TraceStack::new(); 187 | 188 | for (root_ptr, root_meta) in shard.iter() { 189 | if !root_meta.unsync_is_unrooted() || !root_meta.is_new() { 190 | // read the shard to find roots, which are non-zero-refcount 191 | // entries. Also consider non-new entries as possible roots of new 192 | // objects: this is our equivalent of searching a card table 193 | 194 | if root_meta.mark_and_needs_trace() { 195 | // mark the root, and if it needs tracing then look into it 196 | let obj = Object::from_trie_ptr(root_ptr, root_meta.vtable()); 197 | 198 | let object = obj.as_trace(); 199 | unsafe { object.trace(&mut stack) }; 200 | 201 | // now there may be some child objects on the trace stack: pull 202 | // them off and mark them too 203 | while let Some(obj) = stack.pop() { 204 | 205 | let ptr = obj.ptr >> ptr_shift(); 206 | if let Some(meta) = objects.get(ptr) { 207 | 208 | if meta.mark_and_needs_trace() { 209 | let object = obj.as_trace(); 210 | unsafe { object.trace(&mut stack) }; 211 | } 212 | } 213 | } 214 | } 215 | } 216 | } 217 | }); 218 | } 219 | }); 220 | } 221 | 222 | /// Drop unmarked new objects and remove unrooted objects. 223 | /// Returns tuple (young_object_count, dropped_count) 224 | fn sweep(&mut self, pool: &mut Pool) -> (usize, usize) { 225 | // set counters 226 | let collect_young_count= Arc::new(AtomicUsize::new(0)); 227 | let collect_drop_count = Arc::new(AtomicUsize::new(0)); 228 | 229 | let mut split_objects = self.roots.borrow_sharded(self.num_threads); 230 | 231 | pool.scoped(|scope| { 232 | 233 | for mut node in split_objects.drain() { 234 | 235 | // pass a reference to each counter to each thread 236 | let young_count = collect_young_count.clone(); 237 | let drop_count = collect_drop_count.clone(); 238 | 239 | scope.execute(move || { 240 | 241 | let mut young_counter = 0; 242 | let mut drop_counter = 0; 243 | 244 | node.retain_if(|ptr, meta| { 245 | 246 | if meta.is_new_and_unmarked() { 247 | drop_counter += 1; 248 | 249 | // unmarked new-object (implies zero-refcount) 250 | let obj = Object::from_trie_ptr(ptr, meta.vtable); 251 | let tobj: TraitObject = Object::into(obj); 252 | 253 | unsafe { 254 | let fatptr: *mut Trace = transmute(tobj); 255 | let owned = Box::from_raw(fatptr); 256 | drop(owned); 257 | } 258 | 259 | false 260 | 261 | } else if !meta.is_new() && meta.unsync_is_unrooted() { 262 | false 263 | 264 | } else { 265 | if meta.is_new() { 266 | young_counter += 1; 267 | } 268 | 269 | meta.unmark(); 270 | true 271 | } 272 | }); 273 | 274 | // write out the counters 275 | young_count.fetch_add(young_counter, Ordering::SeqCst); 276 | drop_count.fetch_add(drop_counter, Ordering::SeqCst); 277 | }); 278 | } 279 | }); 280 | 281 | // return the counters 282 | (collect_young_count.load(Ordering::Acquire), 283 | collect_drop_count.load(Ordering::Acquire)) 284 | } 285 | 286 | /// Move the deferred refcount decrements into the root set's reference counts. 287 | fn merge_deferred(&mut self, pool: &mut Pool) { 288 | let chunk_size = max(1, self.deferred.len() / self.num_threads); 289 | 290 | { 291 | let shared_roots = self.roots.borrow_sync(); 292 | let chunks = self.deferred.chunks(chunk_size); 293 | 294 | pool.scoped(|scope| { 295 | 296 | for chunk in chunks { 297 | 298 | let roots = shared_roots.clone(); 299 | 300 | scope.execute(move || { 301 | for object in chunk { 302 | let ptr = object.ptr >> ptr_shift(); 303 | 304 | if let Some(ref mut meta) = roots.get(ptr) { 305 | // this is the only place where the reference count needs to 306 | // be thread-safely adjusted 307 | meta.dec(); 308 | } else { 309 | // there should never be something in the deferred buffer that 310 | // isn't in the heap 311 | unreachable!(); 312 | } 313 | } 314 | }); 315 | } 316 | }); 317 | } 318 | 319 | self.deferred.clear(); 320 | } 321 | 322 | /// Return a reference to the logger 323 | pub fn logger(&mut self) -> &mut S { 324 | &mut self.logger 325 | } 326 | 327 | /// Call to return the logger on shutdown 328 | pub fn shutdown(self) -> S { 329 | self.logger 330 | } 331 | } 332 | --------------------------------------------------------------------------------