├── .gitignore
├── .travis.yml
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── TODO.md
├── doc
    ├── Implementation-Notes.md
    └── Project-RFC.md
├── examples
    ├── balloon_animals.rs
    ├── correctnesstest.rs
    ├── low_allocation_rate.rs
    └── small_objects_stress.rs
└── src
    ├── appthread.rs
    ├── constants.rs
    ├── gcthread.rs
    ├── heap.rs
    ├── journal.rs
    ├── lib.rs
    ├── parheap.rs
    ├── statistics.rs
    ├── trace.rs
    └── youngheap.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | Cargo.lock
3 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: false
 2 | 
 3 | language: rust
 4 | rust: nightly
 5 | 
 6 | after_success: |
 7 |     [ $TRAVIS_BRANCH = master ] &&
 8 |     [ $TRAVIS_PULL_REQUEST = false ] &&
 9 |     cargo doc &&
10 |     echo '<meta http-equiv=refresh content=0;url=mo_gc/index.html>' > target/doc/index.html &&
11 |     pip install --user ghp-import &&
12 |     ghp-import -n target/doc &&
13 |     git push -qf https://${TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages
14 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "mo-gc"
 3 | description = "MO, a pauseless, concurrent, generational, parallel mark-and-sweep garbage collector"
 4 | keywords = ["mo", "gc", "garbage", "collector"]
 5 | homepage = "https://github.com/pliniker/mo-gc"
 6 | repository = "https://github.com/pliniker/mo-gc"
 7 | documentation = "https://crates.fyi/crates/mo-gc"
 8 | version = "0.1.0"
 9 | license = "MIT/Apache-2.0"
10 | authors = ["Peter Liniker <peter.liniker@gmail.com>"]
11 | 
12 | [dependencies]
13 | bitmaptrie = { git = "https://github.com/pliniker/bitmaptrie-rs" }
14 | scoped-pool = "0.1"
15 | num_cpus = "0.2"
16 | time = "0.1"
17 | 
18 | [dev-dependencies]
19 | stopwatch = "0.0.6"
20 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 
203 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016 Peter Liniker
 2 | 
 3 | Permission is hereby granted, free of charge, to any
 4 | person obtaining a copy of this software and associated
 5 | documentation files (the "Software"), to deal in the
 6 | Software without restriction, including without
 7 | limitation the rights to use, copy, modify, merge,
 8 | publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software
10 | is furnished to do so, subject to the following
11 | conditions:
12 | 
13 | The above copyright notice and this permission notice
14 | shall be included in all copies or substantial portions
15 | of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 | DEALINGS IN THE SOFTWARE.
26 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## An experimental garbage collector in Rust
 2 | 
 3 | This is a very experimental garbage collector primarily built to research the viability of a
 4 | write barrier mechanism that does not depend on compiler GC support.
 5 | 
 6 | 
 7 | * [![Build Status](https://travis-ci.org/pliniker/mo-gc.svg?branch=master)](https://travis-ci.org/pliniker/mo-gc)
 8 | 
 9 | ### Further information
10 | 
11 | Please read the [Introduction to mo-gc](http://pliniker.github.io/mo-gc-intro.html) first.
12 | 
13 | * [Ideas](http://pliniker.github.io/mo-gc-ideas.html) expands on the further direction in the introduction.
14 | * [API Documentation](https://pliniker.github.io/mo-gc/), but also see the examples.
15 | * [Implementation Notes](https://github.com/pliniker/mo-gc/blob/master/doc/Implementation-Notes.md)
16 | * [Original draft design outline](https://github.com/pliniker/mo-gc/blob/master/doc/Project-RFC.md)
17 | * [Original discussion issue](https://github.com/pliniker/mo-gc/issues/1) on the original design.
18 | 
19 | ### See also
20 | 
21 | * [rust-gc](https://github.com/manishearth/rust-gc)
22 | * [crossbeam](https://github.com/aturon/crossbeam/)
23 | * [bacon-rajan-cc](https://github.com/fitzgen/bacon-rajan-cc)
24 | 
25 | ### About this Project
26 | 
27 | * Copyright &copy; 2015 Peter Liniker <peter.liniker@gmail.com>
28 | * Licensed under dual MIT/Apache-2.0
29 | * Named after [M-O](http://pixar.wikia.com/wiki/M-O).
30 | 
31 | 


--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
 1 | # Testing
 2 | 
 3 | * integration tests
 4 | * benchmarks
 5 | 
 6 | # Examples
 7 | 
 8 | * build some data structures, esp concurrent data structures
 9 | * see crossbeam for treiber stack example
10 | 
11 | # Issues
12 | 
13 | ## Race condition
14 | 
15 | There is currently a race condition where a pointer is read from the heap, rooted and then that
16 | pointer value on the heap is overwritten during the mark/sweep phase of collection. The
17 | rooting should ensure that the referenced object is marked, but the journal is not being
18 | read at this point and the reference count increment is too late to stop the object from being
19 | swept.
20 | 
21 | This race condition means that the mutator threads cannot currently use this GC as fully general
22 | purpose, or rather that data structures must be persistent.
23 | 
24 | The sequence of events causing the race condition is:
25 | 
26 |  * GC stops reading journal, enters mark phase
27 |  * mutator reads pointer to object A from heap, roots A, writing to journal
28 |  * mutator overwrites pointer on heap with new object B reference
29 |  * GC traces heap, marking new object B but not previously referenced object A
30 |  * GC sweeps, dropping A even though A was rooted
31 | 
32 | The benefit of fixing this issue is that this GC design becomes general purpose.
33 | 
34 | ### Additional write barrier
35 | 
36 | This race condition might be avoided by an additional synchronous write barrier: if a pointer A
37 | on the heap is going to be replaced by pointer B, the object A might be marked as "pinned"
38 | to prevent the sweep phase from dropping it. The sweep phase would unpin the object, after
39 | which if it has been rooted, the reference count increment will be picked up from the journal
40 | before the next mark phase.
41 | 
42 | This solution has the downside of adding a word to the size of every object,
43 | the cost of an atomic store on the app-thread side and the cost of an atomic load and store
44 | on the sweep phase. It would also make programs that use this GC less fork-friendly, as
45 | pinning objects would incur copy-on-write costs for memory pages that might otherwise remain
46 | read-only.
47 | 
48 | Question: just how atomic would the pinning operation need to be? It only needs to take effect
49 | during the mark phase but the pin flag would need to be readable by the sweep phase.
50 | 
51 | Experimentation will determine if this mechanism is worth the cost. There may be alternative
52 | implementation options that are more efficient: perhaps using a shared data structure to
53 | write pinned object pointers to that is consumed by a phase between mark and sweep that
54 | sets the marked flag on those objects?
55 | 
56 | ### Use the journal
57 | 
58 | The journal contains the rooting information needed to avoid this problem. Another possible
59 | solution may be to read the journal in the mark phase, _after_ marking any new roots, before
60 | moving on to the sweep phase.
61 | 
62 | This needs further thought.
63 | 
64 | ## Performance Bottlenecks
65 | 
66 | ### Journal processing
67 | 
68 | `Trie::set()` is the bottleneck in `YoungHeap::read_journals()`. This is a single-threaded
69 | function and consumes most of the GC linear time. It is the single greatest throughput limiter.
70 | If insertion into `bitmaptrie::Trie` could be parallelized, throughput would improve.
71 | 
72 | One option is to process each mutator journal on a separate thread but defer new-object
73 | insertion to a single thread. This way some parallelism is gained for processing reference
74 | count increments. This is still not optimal though.
75 | 
76 | ### The Allocator
77 | 
78 | Building on the generic allocator: jemalloc maintains a radix trie for allocation so there
79 | are two tries, increasing CPU and memory requirements. A custom allocator would
80 | solve this problem, but would introduce the problem of writing a scalable, fragmentation-
81 | minimizing allocator.
82 | 
83 | ## Collection Scheduling
84 | 
85 | This is currently very simple and has not been tuned at all.
86 | See `gcthread::gc_thread()` and `constants::*` for tuning.
87 | 


--------------------------------------------------------------------------------
/doc/Implementation-Notes.md:
--------------------------------------------------------------------------------
  1 | 
  2 | * Date: 2016-03-13
  3 | 
  4 | # Implementation Notes
  5 | 
  6 | The current implementation has been tested on x86 and x86_64. It has not bee tested on
  7 | ARM, though the ARM weaker memory model may highlight some flaws.
  8 | 
  9 | ## The journal
 10 | 
 11 | The journal is designed to never block the mutator. Each mutator thread allocates a buffer to
 12 | write reference count adjustments to. When the buffer is full, a new buffer is allocated.
 13 | The GC thread consumes the buffers. Thus the journal behaves like an infinitely sized
 14 | SPSC queue. Each mutator gets its own journal.
 15 | 
 16 | The values written by the mutator to a buffer are essentially `TraitObject`s that describe
 17 | a pointer to an object and the `Trace` trait virtual table. The virtual table pointer is
 18 | required to provide the `drop()` and `Trace::trace()` methods, as the GC thread does not
 19 | know concrete types at runtime.
 20 | 
 21 | Because heap allocations are word aligned, a pointer's two least significant bits can be used
 22 | as bit flags.
 23 | 
 24 | The object address has four possible values in it's LSBs:
 25 | 
 26 | * 0: reference count decrement
 27 | * 1: reference count increment
 28 | * 2: new object allocated, no reference count adjustment
 29 | * 3: new object allocated, reference count increment
 30 | 
 31 | The object vtable has one flag value that can be set:
 32 | 
 33 | * 2: the object is a container of other GC-managed objects and must be traced. This flag saves
 34 |   the mark phase from making virtual function calls for scalar objects.
 35 | 
 36 | ### Advantages
 37 | 
 38 | The mutator thread will never be blocked on writing to the journal unless the application hits
 39 | out-of-memory, thus providing a basic pauselessness guarantee.
 40 | 
 41 | The journal is very fast, not requiring atomics on the x86/64 TSO-memory-model architecture.
 42 | 
 43 | ### Disadvantages
 44 | 
 45 | If the GC thread cannot keep up with the mutator(s), the journal will continue to allocate
 46 | new buffers faster than the GC thread can consume them, contributing to the OOM death march.
 47 | 
 48 | ## Young generation heap and root reference counts
 49 | 
 50 | A young-generation heap map is implemented using a bitmapped vector trie, whose indeces are
 51 | word-sized: keys are object addresses, values are a composition of root reference count, the object
 52 | vtable and a word for flags for marking and sweeping.
 53 | 
 54 | The addresses used as keys are right-shifted to eliminate the least significant bits that are
 55 | always zero because heap allocations are word aligned.
 56 | 
 57 | The flags set on the object address have been processed at this point and the heap updated
 58 | accordingly. Reference count decrements are written to a deferred buffer for processing later.
 59 | 
 60 | For new objects, the heap map flags for the object are marked as `NEW`. These are the young
 61 | generation objects. Other entries in the map not marked as `NEW` are stack roots only.
 62 | 
 63 | Thus the young generation heap map combines pure stack-root references and new object references.
 64 | 
 65 | A typical generational GC keeps a data structure such a as a card table to discover pointers from
 66 | the mature object heap into the young generation heap. Write barriers are required to update the
 67 | card table when mature objects are written to. In our case, the non-`NEW` stack-root
 68 | references act as the set of mature objects that may have references to young generation objects.
 69 | Essentially, the journal is a type of write barrier.
 70 | 
 71 | When the young generation heap enters a mark phase, all objects that have a non-zero reference
 72 | count are considered potential roots. Only `NEW` objects are considered during sweeping.
 73 | 
 74 | Both marking and sweeping are done in parallel: during the mark phase, the heap map is sharded across
 75 | multiple threads for scanning for roots while each thread can look up entries in the whole map for
 76 | marking; during the sweep phase, the heap map is sharded across multiple threads for sweeping.
 77 | 
 78 | ### Advantages
 79 | 
 80 | This combined roots and new-objects map makes for a straightforwardly parallelizable mark and 
 81 | sweep implementation. The trie can be sharded into sub-tries and each sub-trie can be processed 
 82 | independently and mutated independently of the others while remaining thread safe without 
 83 | requiring locks or atomic access.
 84 | 
 85 | ### Disadvantages
 86 | 
 87 | Inserting into the trie is currently not parallelizable, making reading the journal into the trie
 88 | a single-threaded affair, impacting GC throughput.
 89 | 
 90 | On high rates of new object allocation, the GC thread currently cannot keep up with the
 91 | mutators rate of writing to the journal. The cause of this is not the journal itself: reading
 92 | and writing the journal can be done very fast. However, inserting and updating the heap map 
 93 | causes the GC thread to process the journal at half the rate at which a single mutator thread 
 94 | can allocate new objects.
 95 | 
 96 | If journal processing (trie insertion) can be parallelized, the GC throughput will hugely improve.
 97 | 
 98 | One part-way step may be to parallelize reference count updates while still processing new
 99 | objects in sequence.
100 | 
101 | ## The mature object heap
102 | 
103 | This heap map is similar to the young generation heap but does not consider reference counts
104 | or new objects. Marking and sweeping is parallelized similarly.
105 | 
106 | A mature heap collection is triggered when the young generation heap reaches a threshold count of
107 | `NEW` objects that it is managing. `NEW` object data is copied to the mature heap trie and
108 | the original entries in the young generation are unmarked as `NEW`. They become plain stack
109 | root entries.
110 | 


--------------------------------------------------------------------------------
/doc/Project-RFC.md:
--------------------------------------------------------------------------------
  1 | 
  2 | * Date: 2015-08-24
  3 | * Discussion issue: [pliniker/mo-gc#1](https://github.com/pliniker/mo-gc/issues/1)
  4 | 
  5 | # Summary
  6 | 
  7 | Mutator threads maintain precise-rooted GC-managed objects through smart
  8 | pointers on the stack that write reference-count increments and decrements to a
  9 | journal.
 10 | 
 11 | The reference-count journal is read by a GC thread that
 12 | maintains the actual reference count numbers in a cache of roots. When a
 13 | reference count reaches zero, the GC thread moves the pointer to a heap cache
 14 | data structure that is used by a tracing collector.
 15 | 
 16 | Because the GC thread runs concurrently with the mutator threads without
 17 | stopping them to scan stacks or trace, all GC-managed data structures that refer to
 18 | other GC-managed objects must provide a safe concurrent trace function.
 19 | 
 20 | Data structures' trace functions can implement any transactional
 21 | mechanism that provides the GC a snapshot of the data structure's
 22 | nested pointers for the duration of the trace function call.
 23 | 
 24 | # Why
 25 | 
 26 | Many languages and runtimes are hosted in the inherently unsafe languages
 27 | C and/or C++, from Python to GHC.
 28 | 
 29 | My interest in this project is in building a foundation, written in Rust, for
 30 | language runtimes on top of Rust. Since Rust is a modern
 31 | language for expressing low-level interactions with hardware, it is an
 32 | ideal alternative to C/C++ while providing the opportunity to avoid classes
 33 | of bugs common to C/C++ by default.
 34 | 
 35 | With the brilliant, notable exception of Rust, a garbage collector is an
 36 | essential luxury for most styles of programming. But how memory is managed in
 37 | a language can be an asset or a liability that becomes so intertwined with
 38 | the language semantics itself that it can become a huge undertaking to
 39 | modernize years later.
 40 | 
 41 | With that in mind, this GC is designed from the ground up to be concurrent
 42 | and never stop the world. The caveat is that data structures
 43 | need to be designed for concurrent reads and writes. In this world,
 44 | the GC is just another thread, reading data structures and freeing any that
 45 | are no longer live.
 46 | 
 47 | That seems a reasonable tradeoff in a time when scaling out by adding
 48 | processors rather than up through increased clock speed is now the status quo.
 49 | 
 50 | # What this is not
 51 | 
 52 | This is not particularly intended to be a general purpose GC, providing
 53 | a near drop-in replacement for `Rc<T>`, though it may be possible.
 54 | For that, I recommend looking at
 55 | [rust-gc](https://github.com/manishearth/rust-gc) or
 56 | [bacon-rajan-cc](https://github.com/fitzgen/bacon-rajan-cc).
 57 | 
 58 | This is also not primarily intended to be an ergonomic, native GC for all
 59 | concurrent data structures in Rust. For that, I recommend a first look at
 60 | [crossbeam](https://github.com/aturon/crossbeam/).
 61 | 
 62 | # Assumptions
 63 | 
 64 | This RFC assumes the use of the default Rust allocator, jemalloc, throughout
 65 | the GC. No custom allocator is described here at this time. Correspondingly,
 66 | the performance characteristics of jemalloc should be assumed.
 67 | 
 68 | # Journal Implementation
 69 | 
 70 | ## Mutator Threads
 71 | 
 72 | The purpose of using a journal is to minimize the burden on the mutator
 73 | threads as much as possible, pushing as much workload as possible over to the
 74 | GC thread, while avoiding pauses if that is possible.
 75 | 
 76 | In the most straightforward implementation, the journal can simply be a
 77 | MPSC channel shared between mutator threads and sending
 78 | reference count adjustments to the GC thread, that is, +1 and -1 for pointer
 79 | clone and drop respectively.
 80 | 
 81 | Performance for multiple mutator threads writing to an MPSC, with each
 82 | write causing an allocation, can be improved on based on the
 83 | [single writer principle][9] by 1) giving each mutator thread its own
 84 | channel and 2) buffering journal entries and passing a reference to the buffer
 85 | through the channel.
 86 | 
 87 | Buffering journal entries should reduce the number of extra allocations per
 88 | object created compared with a non-blocking MPSC channel.
 89 | 
 90 | A typical problem of reference counted objects is locality: every reference
 91 | count update requires a write to the object itself, making very inefficient
 92 | spatial memory access. The journal, being a series of buffers, each
 93 | of which is a contiguous block of memory, should give an efficiency gain
 94 | for the mutator threads.
 95 | 
 96 | It should be noted that the root smart-pointers shouldn't necessarily
 97 | be churning out reference count adjustments. This is Rust: prefer to borrow
 98 | a root smart-pointer before cloning it. This is one of the main features that
 99 | makes implementing this in Rust so attractive.
100 | 
101 | ### Implementation Notes
102 | 
103 | When newly rooting a pointer to the stack, the current buffer must be accessed.
104 | One solution is to use Thread Local Storage so that each thread will be able
105 | to access its own buffer at any time. The overhead of looking up the TLS
106 | pointer is a couple of extra instructions in a release build to check that
107 | the buffer data has been initialized
108 | 
109 | A journal buffer maintains a count at offset 0 to indicate how many words of
110 | adjustment data have been written. This count might be written to using
111 | [release](https://doc.rust-lang.org/std/sync/atomic/enum.Ordering.html) ordering
112 | while the GC thread might read the count using acquire ordering.
113 | 
114 | ## Garbage Collection Thread
115 | 
116 | In the basic MPSC use case, the GC thread reads reference count adjustments
117 | from the channel. For each inc/dec adjustment, it must look up the
118 | associated pointer in the cache of root pointers and update the total reference
119 | count for that pointer.
120 | 
121 | In the case of multiple channels, each sending a buffer of adjustments at a
122 | time, there will naturally be an ordering problem:
123 | 
124 | Thread A may, for a pointer, write the following to its journal:
125 | 
126 | |Action|adjustment| |
127 | | --- | --- | --- |
128 | |new pointer|+1||
129 | |clone pointer|+1|(move cloned pointer to Thread B)|
130 | |drop pointer|-1||
131 | 
132 | Thread B may do the following a brief moment later after receiving the
133 | cloned pointer:
134 | 
135 | |Action|adjustment| |
136 | | --- | --- | --- |
137 | |drop pointer|-1|(drop cloned pointer)|
138 | 
139 | The order in which these adjustments are processed by the GC thread may well
140 | be out of order, and there is no information available to restore the correct
141 | order. The decrement from Thread B might be processed first, followed by the
142 | first increment from Thread A, giving a momentary reference count of 0. The
143 | collector may kick in at that point, freeing the object and resulting in a
144 | possible use-after-free and possibly a double-free.
145 | 
146 | Here, learning from [Bacon2003][1], decrement adjustments should be
147 | buffered by an amount of time sufficient to clear all increment adjustments
148 | that occurred prior to those decrements. An appropriate amount of time might
149 | be provided by scanning the mutator threads'
150 | buffers one further iteration before applying the buffered decrements.
151 | 
152 | Increment adjustments can be applied immediately, always.
153 | 
154 | # Tracing
155 | 
156 | While more advanced or efficient algorithms might be applied here, this section
157 | will describe how two-colour mark and sweep can be applied.
158 | 
159 | As in [rust-gc][4], all types participating in GC must implement
160 | a trait that allows that type to be traced. (This is an inconvenience that
161 | a compiler plugin may be able to alleviate for many cases.)
162 | 
163 | The GC thread maintains two trie structures: one to map from roots to
164 | reference counts; a second to map from heap objects to any metadata needed to
165 | run `drop()` against them, and bits for marking objects as live.
166 | 
167 | The roots trie is traversed, calling the trace function for each. Every visited
168 | object is marked in the heap trie.
169 | 
170 | Then the heap trie is traversed and every unmarked entry is `drop()`ped and
171 | the live objects unmarked.
172 | 
173 | It is worth noting that by using a separate data structure for the heap and
174 | root caches that this GC scheme remains `fork()` memory friendly: the act
175 | of updating reference counts and marking heap objects does not force a
176 | page copy-on-write for every counted and marked object location.
177 | 
178 | # Concurrent Data Structures
179 | 
180 | To prevent data races between the mutator threads and the GC thread, all
181 | GC-managed data structures that contain pointers to other GC-managed objects
182 | must be transactional in updates to those relationships. That is, a
183 | `GcRoot<Vec<i32>>` can contain mutable data where the mutability follows only
184 | the Rust static analysis rules, but a `GcRoot<Vec<GcBox<i32>>>` must be
185 | reimplemented additionally with a transactional runtime nature.
186 | 
187 | The `Vec::trace()` method has to be able to provide a readonly
188 | snapshot of its contents to the GC thread and atomic updates to its
189 | contents.
190 | 
191 | Applying a compile-time distinction between these may be possible using the
192 | type system. Indeed, presenting a safe API is one of the challenges in
193 | implementing this.
194 | 
195 | As the `trace()` method is part of the data structure code itself, data
196 | structures should be free to implement any method of atomic update without the
197 | GC code or thread needing to be aware of transactions or their mechanism.
198 | 
199 | The `trace()` method may, depending on the data structure characteristics,
200 | opt to return immediately with an "defer" status, meaning that at the time
201 | of calling, it isn't expedient to obtain a readonly snapshot of the data
202 | structure for tracing. In that case, the GC thread will requeue the object
203 | for a later attempt.
204 | 
205 | Fortunately, concurrent data structures are fairly widely researched and
206 | in use by 2015 and I will not go into implementation details here.
207 | 
208 | # Tradeoffs
209 | 
210 | How throughput compares to other GC algorithms is left to
211 | readers more experienced in the field to say. My guess is that with the overhead
212 | of the journal while doing mostly new-generation collections that this
213 | algorithm should be competitive for multiple threads on multiprocessing
214 | machines. The single-threaded case will suffer from the concurrent data
215 | structure overhead.
216 | 
217 | Non-atomic objects must be transactional, adding the runtime and complexity
218 | cost associated with concurrent data structures: the garbage generated. In some
219 | circumstances there could be enormous amounts of garbage generated, raising the
220 | overall overhead of using the GC to where the GC thread affects throughput.
221 | 
222 | Jemalloc is said to give low fragmentation rates compared to other malloc
223 | implementations, but fragmentation is likely nonetheless.
224 | 
225 | At least this one language/compiler safety issue remains: referencing
226 | GC-managed pointers in a `drop()` is currently considered safe by the compiler
227 | as it has no awareness of the GC, but doing so is of course unsafe as the order
228 | of collection is non-deterministic leading to possible use-after-free in custom
229 | `drop()` functions.
230 | 
231 | # Rust Library Compatibility
232 | 
233 | As the GC takes over the lifetime management of any objects put under its
234 | control - and that transfer of control is completely under the control of
235 | the programmer - any Rust libraries should work with it, including low-level
236 | libraries such as [coroutine-rs](https://github.com/rustcc/coroutine-rs) and
237 | by extension [mioco](https://github.com/dpc/mioco).
238 | 
239 | This GC will never interfere with any code that uses only the native Rust
240 | memory management.
241 | 
242 | # Improvements
243 | 
244 | ## Compiler Plugin
245 | 
246 | It is possible to give the compiler some degree of awareness of GC requirements
247 | through custom plugins, as implemented in [rust-gc][4] and [servo][13]. The same
248 | may be applicable here.
249 | 
250 | In the future, this implementation would surely benefit from aspects of the
251 | planned [tracing hooks][5].
252 | 
253 | ## Generational Optimization
254 | 
255 | Since the mutator threads write a journal of all root pointers, all
256 | pointers that the mutator uses will be recorded. It may be possible
257 | for the GC thread to use that fact to process batches of journal changes
258 | in a generational manner, rather than having to trace the entire heap
259 | on every iteration. This needs further investigation.
260 | 
261 | ## Parallel Collection
262 | 
263 | The tries used in the GC should be amenable to parallelizing tracing which
264 | may be particularly beneficial in conjunction with tracing the whole heap.
265 | 
266 | ## Copying Collector
267 | 
268 | Any form of copying or moving collector would require a custom allocator and
269 | probably a Baker-style read barrier. The barrier could be implemented on the
270 | root smart pointers with the added expense of the mutator threads having to
271 | check whether the pointer must be updated on every dereference. There are
272 | pitfalls here though as the Rust compiler may optimize dereferences with
273 | pointers taking temporary but hard-to-discover root in CPU registers. It may
274 | be necessary to use the future tracing hooks to discover all roots to avoid
275 | Bad Things happening.
276 | 
277 | # Patent Issues
278 | 
279 | I have read through the patents granted to IBM and David F. Bacon that cover
280 | reference counting and have come to the conclusion that nothing described here
281 | infringes.
282 | 
283 | I have not read further afield though. My assumption has been that there is
284 | prior art for most garbage collection methods at this point.
285 | 
286 | # References
287 | 
288 | * [Bacon2003][1] Bacon et al, A Pure Reference Counting Garbage Collector
289 | * [Bacon2004][2] Bacon et al, A Unified Theory of Garbage Collection
290 | * [Oxischeme][3] Nick Fitzgerald, Memory Management in Oxischeme
291 | * [Manishearth/rust-gc][4] Manish Goregaokar, rust-gc project
292 | * [Rust blog][5] Rust in 2016
293 | * [rust-lang/rust#11399][6] Add garbage collector to std::gc
294 | * [rust-lang/rfcs#415][7] Garbage collection
295 | * [rust-lang/rust#2997][8] Tracing GC in rust
296 | * [Mechanical Sympathy][9] Martin Thompson, Single Writer Principle
297 | * [michaelwoerister/rs-persistent-datastructures][10] Michael Woerister, HAMT in Rust
298 | * [crossbeam][11] Aaron Turon, Lock-freedom without garbage collection
299 | * [Shenandoah][12] Shenandoah, a low-pause GC for the JVM
300 | * [Servo][13] Servo blog, JavaScript: Servo’s only garbage collector
301 | 
302 | [1]: http://researcher.watson.ibm.com/researcher/files/us-bacon/Bacon03Pure.pdf
303 | [2]: http://www.cs.virginia.edu/~cs415/reading/bacon-garbage.pdf
304 | [3]: http://fitzgeraldnick.com/weblog/60/
305 | [4]: https://github.com/Manishearth/rust-gc
306 | [5]: http://blog.rust-lang.org/2015/08/14/Next-year.html
307 | [6]: https://github.com/rust-lang/rust/pull/11399
308 | [7]: https://github.com/rust-lang/rfcs/issues/415
309 | [8]: https://github.com/rust-lang/rust/issues/2997
310 | [9]: http://mechanical-sympathy.blogspot.co.uk/2011/09/single-writer-principle.html
311 | [10]: https://github.com/michaelwoerister/rs-persistent-datastructures
312 | [11]: http://aturon.github.io/blog/2015/08/27/epoch/
313 | [12]: https://www.youtube.com/watch?v=QcwyKLlmXeY
314 | [13]: https://blog.mozilla.org/research/2014/08/26/javascript-servos-only-garbage-collector/
315 | 


--------------------------------------------------------------------------------
/examples/balloon_animals.rs:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | extern crate mo_gc;
  4 | use mo_gc::{Gc, GcRoot, GcThread, StatsLogger, Trace, TraceOps, TraceStack};
  5 | 
  6 | 
  7 | struct Segment {
  8 |     next: Gc<Segment>,
  9 | }
 10 | 
 11 | 
 12 | impl Segment {
 13 |     fn new() -> Segment {
 14 |         Segment {
 15 |             next: Gc::null()
 16 |         }
 17 |     }
 18 | 
 19 |     fn join_to(&mut self, to: Gc<Segment>) {
 20 |         self.next = to;
 21 |     }
 22 | }
 23 | 
 24 | 
 25 | unsafe impl Trace for Segment {
 26 |     fn traversible(&self) -> bool {
 27 |         true
 28 |     }
 29 | 
 30 |     unsafe fn trace(&self, heap: &mut TraceStack) {
 31 |         if let Some(ptr) = self.next.as_raw() {
 32 |             heap.push_to_trace(&*ptr);
 33 |         }
 34 |     }
 35 | }
 36 | 
 37 | 
 38 | struct Balloon {
 39 |     head: Gc<Segment>,
 40 |     tail: Gc<Segment>,
 41 | }
 42 | 
 43 | 
 44 | impl Balloon {
 45 |     fn inflate() -> Balloon {
 46 |         let body = Gc::new(Segment::new());
 47 |         Balloon {
 48 |             head: body,
 49 |             tail: body,
 50 |         }
 51 |     }
 52 | 
 53 |     fn twist(&mut self) {
 54 |         let mut new_seg = Gc::new(Segment::new());
 55 |         new_seg.join_to(self.head);
 56 |         self.head = new_seg;
 57 |     }
 58 | 
 59 |     fn complete(&mut self) {
 60 |         self.tail.next = self.head;
 61 |     }
 62 | 
 63 |     fn count(&mut self) {
 64 |         let mut count = 0;
 65 |         let mut current = self.head;
 66 | 
 67 |         loop {
 68 |             current = current.next;
 69 |             count += 1;
 70 | 
 71 |             if current.is(self.tail) {
 72 |                 break;
 73 |             }
 74 |         }
 75 | 
 76 |         if count != 1000 {
 77 |             println!("snake is short - only {} segments", count);
 78 |         }
 79 |     }
 80 | }
 81 | 
 82 | 
 83 | unsafe impl Trace for Balloon {
 84 |     fn traversible(&self) -> bool {
 85 |         true
 86 |     }
 87 | 
 88 |     unsafe fn trace(&self, heap: &mut TraceStack) {
 89 |         heap.push_to_trace(&*self.head as &Trace);
 90 |     }
 91 | }
 92 | 
 93 | 
 94 | fn snake() {
 95 |     // this many snake balloons
 96 |     for _snake in 0..5000 {
 97 |         let mut balloon = GcRoot::new(Balloon::inflate());
 98 | 
 99 |         // with this many segments each
100 |         for _segment in 0..1000 {
101 |             balloon.twist();
102 |         }
103 | 
104 |         balloon.complete();
105 |         balloon.count();
106 |     }
107 | }
108 | 
109 | 
110 | fn main() {
111 |     let gc = GcThread::spawn_gc();
112 | 
113 |     let snake_handle = gc.spawn(|| snake());
114 | 
115 |     let logger = gc.join().expect("gc failed");
116 |     logger.dump_to_stdout();
117 | 
118 |     snake_handle.join().expect("snake failed");
119 | }
120 | 


--------------------------------------------------------------------------------
/examples/correctnesstest.rs:
--------------------------------------------------------------------------------
 1 | #![feature(alloc_system)]
 2 | extern crate alloc_system;
 3 | 
 4 | 
 5 | extern crate mo_gc;
 6 | 
 7 | use mo_gc::{GcThread, GcRoot, StatsLogger, Trace};
 8 | 
 9 | 
10 | struct Thing {
11 |     value: [usize; 4]
12 | }
13 | 
14 | 
15 | unsafe impl Trace for Thing {}
16 | 
17 | 
18 | impl Thing {
19 |     fn new() -> Thing {
20 |         Thing {
21 |             value: [42; 4]
22 |         }
23 |     }
24 | }
25 | 
26 | 
27 | impl Drop for Thing {
28 |     fn drop(&mut self) {
29 |         // any heap corruption might be evident here
30 |         assert!(self.value[0] == 42);
31 |         assert!(self.value[1] == 42);
32 |         assert!(self.value[2] == 42);
33 |         assert!(self.value[3] == 42);
34 |     }
35 | }
36 | 
37 | 
38 | fn app() {
39 |     for _ in 0..10000000 {
40 |         let _new = GcRoot::new(Thing::new());
41 |     }
42 | }
43 | 
44 | 
45 | fn main() {
46 |     let gc = GcThread::spawn_gc();
47 | 
48 |     let app_handle = gc.spawn(|| app());
49 | 
50 |     let logger = gc.join().expect("gc failed");
51 |     logger.dump_to_stdout();
52 | 
53 |     app_handle.join().expect("app failed");
54 | }
55 | 


--------------------------------------------------------------------------------
/examples/low_allocation_rate.rs:
--------------------------------------------------------------------------------
 1 | 
 2 | extern crate stopwatch;
 3 | use stopwatch::Stopwatch;
 4 | 
 5 | extern crate mo_gc;
 6 | 
 7 | use std::thread;
 8 | use std::time::Duration;
 9 | 
10 | use mo_gc::{GcThread, GcRoot, Trace, StatsLogger};
11 | 
12 | 
13 | const THING_SIZE: usize = 8;
14 | const THING_COUNT: i64 = 2500000;
15 | 
16 | 
17 | struct Thing {
18 |     _data: [u64; THING_SIZE],
19 | }
20 | 
21 | 
22 | impl Thing {
23 |     fn new() -> Thing {
24 |         Thing { _data: [0; THING_SIZE] }
25 |     }
26 | }
27 | 
28 | 
29 | unsafe impl Trace for Thing {}
30 | 
31 | 
32 | fn app() {
33 |     let sw = Stopwatch::start_new();
34 | 
35 |     thread::sleep(Duration::from_millis(100));
36 | 
37 |     for count in 0..THING_COUNT {
38 |         let _new = GcRoot::new(Thing::new());
39 | 
40 |         if count & 0xfff == 0 {
41 |             thread::sleep(Duration::from_millis(50));
42 |         }
43 |     }
44 | 
45 |     let per_second = (THING_COUNT * 1000) / sw.elapsed_ms();
46 |     println!("app allocated {} objects at {} objects per second", THING_COUNT, per_second);
47 |     println!("app finished in {}ms", sw.elapsed_ms());
48 | }
49 | 
50 | 
51 | fn main() {
52 |     let gc = GcThread::spawn_gc();
53 | 
54 |     let app_handle = gc.spawn(|| app());
55 | 
56 |     let logger = gc.join().expect("gc failed");
57 |     logger.dump_to_stdout();
58 | 
59 |     app_handle.join().expect("app failed");
60 | }
61 | 


--------------------------------------------------------------------------------
/examples/small_objects_stress.rs:
--------------------------------------------------------------------------------
 1 | 
 2 | extern crate stopwatch;
 3 | use stopwatch::Stopwatch;
 4 | 
 5 | extern crate mo_gc;
 6 | 
 7 | use mo_gc::{GcThread, GcRoot, Trace, StatsLogger};
 8 | 
 9 | 
10 | const THING_SIZE: usize = 8;
11 | const THING_COUNT: i64 = 2500000;
12 | 
13 | 
14 | struct Thing {
15 |     _data: [u64; THING_SIZE],
16 | }
17 | 
18 | 
19 | impl Thing {
20 |     fn new() -> Thing {
21 |         Thing { _data: [0; THING_SIZE] }
22 |     }
23 | }
24 | 
25 | 
26 | unsafe impl Trace for Thing {}
27 | 
28 | 
29 | fn app() {
30 |     let sw = Stopwatch::start_new();
31 | 
32 |     for _ in 0..THING_COUNT {
33 |         let _new = GcRoot::new(Thing::new());
34 |     }
35 | 
36 |     let per_second = (THING_COUNT * 1000) / sw.elapsed_ms();
37 |     println!("app allocated {} objects at {} objects per second", THING_COUNT, per_second);
38 |     println!("app finished in {}ms", sw.elapsed_ms());
39 | }
40 | 
41 | 
42 | fn main() {
43 |     let gc = GcThread::spawn_gc();
44 | 
45 |     let app_handle1 = gc.spawn(|| app());
46 |     let app_handle2 = gc.spawn(|| app());
47 | 
48 |     let logger = gc.join().expect("gc failed");
49 |     logger.dump_to_stdout();
50 | 
51 |     app_handle1.join().expect("app failed");
52 |     app_handle2.join().expect("app failed");
53 | }
54 | 


--------------------------------------------------------------------------------
/src/appthread.rs:
--------------------------------------------------------------------------------
  1 | //! Types for the mutator to use to build data structures
  2 | 
  3 | 
  4 | use std::cell::Cell;
  5 | use std::mem::transmute;
  6 | use std::ops::{Deref, DerefMut};
  7 | use std::ptr::{null, null_mut};
  8 | use std::raw::TraitObject;
  9 | use std::sync::atomic::{AtomicPtr, Ordering};
 10 | use std::thread;
 11 | 
 12 | use constants::{INC_BIT, JOURNAL_BUFFER_SIZE, NEW_BIT, TRAVERSE_BIT};
 13 | use gcthread::{JournalSender, EntrySender};
 14 | use heap::{Object, TraceStack};
 15 | use journal;
 16 | use trace::Trace;
 17 | 
 18 | 
 19 | /// Each thread gets it's own EntrySender
 20 | thread_local!(
 21 |     static GC_JOURNAL: Cell<*const EntrySender> = Cell::new(null())
 22 | );
 23 | 
 24 | 
 25 | /// GcBox struct and traits: a boxed object that is GC managed
 26 | pub struct GcBox<T: Trace> {
 27 |     value: T,
 28 | }
 29 | 
 30 | 
 31 | /// Root smart pointer, sends reference count changes to the journal.
 32 | ///
 33 | /// Whenever a reference to an object on the heap must be retained on the stack, this type must be
 34 | /// used. It's use will ensure that the object will be seen as a root.
 35 | pub struct GcRoot<T: Trace> {
 36 |     ptr: *mut GcBox<T>,
 37 | }
 38 | 
 39 | 
 40 | /// Non-atomic pointer type. This type is `!Sync` and thus is useful for presenting a Rust-ish
 41 | /// API to a data structure where aliasing and mutability must follow the standard rules: there
 42 | /// can be only one mutator.
 43 | ///
 44 | /// *Important note:* even though this type is `!Sync`, any data structures that are composed of
 45 | /// `Gc` pointers must still be designed with the awareness that the GC thread will call `trace()`
 46 | /// at any point and so, must still be thread safe!
 47 | ///
 48 | /// This is not a root pointer type. It should be used inside data structures to reference other
 49 | /// GC-managed objects.
 50 | pub struct Gc<T: Trace> {
 51 |     ptr: *mut GcBox<T>,
 52 | }
 53 | 
 54 | 
 55 | /// Atomic pointer type that points at a traceable object. This type is `Sync` and can be used to
 56 | /// build concurrent data structures.
 57 | ///
 58 | /// This type should be used inside data structures to reference other GC-managed objects, but
 59 | /// provides interior mutability and atomic methods.
 60 | ///
 61 | /// TODO: cas, swap etc for GcRoot and Gc
 62 | pub struct GcAtomic<T: Trace> {
 63 |     ptr: AtomicPtr<GcBox<T>>,
 64 | }
 65 | 
 66 | 
 67 | /// An Application Thread, manages a thread-local reference to a tx channel
 68 | ///
 69 | /// TODO: a version of `spawn()` is required that can be called from an existing mutator thread.
 70 | pub struct AppThread;
 71 | 
 72 | 
 73 | impl AppThread {
 74 |     /// As thread::spawn but takes a journal Sender to initialize the thread_local instance with.
 75 |     pub fn spawn_from_gc<F, T>(tx: JournalSender, f: F) -> thread::JoinHandle<T>
 76 |         where F: FnOnce() -> T,
 77 |               F: Send + 'static,
 78 |               T: Send + 'static
 79 |     {
 80 |         thread::spawn(move || {
 81 |             let (jtx, jrx) = journal::make_journal(JOURNAL_BUFFER_SIZE);
 82 | 
 83 |             tx.send(jrx).expect("Failed to send a new Journal to the GC thread!");
 84 | 
 85 |             GC_JOURNAL.with(|j| {
 86 |                 j.set(&jtx);
 87 |             });
 88 | 
 89 |             f()
 90 |         })
 91 |     }
 92 | }
 93 | 
 94 | // Reference count functions. Only new-objects need to specify the traverse bit.
 95 | 
 96 | #[inline]
 97 | fn as_traitobject<T: Trace>(object: &T) -> TraitObject {
 98 |     let trace: &Trace = object;
 99 |     unsafe { transmute(trace) }
100 | }
101 | 
102 | 
103 | /// Write a reference count increment to the journal for a newly allocated object
104 | #[inline]
105 | fn write<T: Trace>(object: &T, is_new: bool, flags: usize) {
106 |     GC_JOURNAL.with(|j| {
107 |         let tx = unsafe { &*j.get() };
108 | 
109 |         let tobj = as_traitobject(object);
110 | 
111 |         // set the refcount-increment bit
112 |         let ptr = (tobj.data as usize) | flags;
113 | 
114 |         // set the traversible bit
115 |         let mut vtable = tobj.vtable as usize;
116 |         if is_new && object.traversible() {
117 |             vtable |= TRAVERSE_BIT;
118 |         }
119 | 
120 |         tx.send(Object {
121 |             ptr: ptr,
122 |             vtable: vtable,
123 |         });
124 |     });
125 | }
126 | 
127 | // GcBox implementation
128 | 
129 | impl<T: Trace> GcBox<T> {
130 |     fn new(value: T) -> GcBox<T> {
131 |         GcBox {
132 |             value: value,
133 |         }
134 |     }
135 | }
136 | 
137 | 
138 | unsafe impl<T: Trace> Trace for GcBox<T> {
139 |     #[inline]
140 |     fn traversible(&self) -> bool {
141 |         self.value.traversible()
142 |     }
143 | 
144 |     #[inline]
145 |     unsafe fn trace(&self, heap: &mut TraceStack) {
146 |         self.value.trace(heap);
147 |     }
148 | }
149 | 
150 | // GcRoot implementation
151 | 
152 | impl<T: Trace> GcRoot<T> {
153 |     /// Put a new object on the heap and hand ownership to the GC, writing a reference count
154 |     /// increment to the journal.
155 |     pub fn new(value: T) -> GcRoot<T> {
156 |         let boxed = Box::new(GcBox::new(value));
157 |         write(&*boxed, true, NEW_BIT | INC_BIT);
158 | 
159 |         GcRoot {
160 |             ptr: Box::into_raw(boxed)
161 |         }
162 |     }
163 | 
164 |     fn from_raw(ptr: *mut GcBox<T>) -> GcRoot<T> {
165 |         let root = GcRoot { ptr: ptr };
166 |         write(&*root, false, INC_BIT);
167 |         root
168 |     }
169 | 
170 |     fn ptr(&self) -> *mut GcBox<T> {
171 |         self.ptr
172 |     }
173 | 
174 |     fn value(&self) -> &T {
175 |         unsafe { &(*self.ptr).value }
176 |     }
177 | 
178 |     fn value_mut(&mut self) -> &mut T {
179 |         unsafe { &mut (*self.ptr).value }
180 |     }
181 | }
182 | 
183 | 
184 | impl<T: Trace> Drop for GcRoot<T> {
185 |     fn drop(&mut self) {
186 |         write(&**self, false, 0);
187 |     }
188 | }
189 | 
190 | 
191 | impl<T: Trace> Deref for GcRoot<T> {
192 |     type Target = T;
193 | 
194 |     fn deref(&self) -> &T {
195 |         self.value()
196 |     }
197 | }
198 | 
199 | 
200 | impl<T: Trace> DerefMut for GcRoot<T> {
201 |     fn deref_mut(&mut self) -> &mut T {
202 |         self.value_mut()
203 |     }
204 | }
205 | 
206 | 
207 | impl<T: Trace> Clone for GcRoot<T> {
208 |     fn clone(&self) -> Self {
209 |         GcRoot::from_raw(self.ptr())
210 |     }
211 | }
212 | 
213 | // Gc implementation
214 | 
215 | impl<T: Trace> Gc<T> {
216 |     /// Creates a new null pointer.
217 |     pub fn null() -> Gc<T> {
218 |         Gc {
219 |             ptr: null_mut(),
220 |         }
221 |     }
222 | 
223 |     /// Move a value to the heap and create a pointer to it.
224 |     pub fn new(value: T) -> Gc<T> {
225 |         let boxed = Box::new(GcBox::new(value));
226 |         write(&*boxed, true, NEW_BIT);
227 | 
228 |         Gc {
229 |             ptr: Box::into_raw(boxed)
230 |         }
231 |     }
232 | 
233 |     /// Return the raw pointer value, or None if it is a null pointer.
234 |     pub fn as_raw(&self) -> Option<*mut GcBox<T>> {
235 |         if self.ptr.is_null() {
236 |             None
237 |         } else {
238 |             Some(self.ptr)
239 |         }
240 |     }
241 | 
242 |     /// Pointer equality comparison.
243 |     pub fn is(&self, other: Gc<T>) -> bool {
244 |         self.ptr == other.ptr
245 |     }
246 | 
247 |     fn from_raw(ptr: *mut GcBox<T>) -> Gc<T> {
248 |         Gc {
249 |             ptr: ptr,
250 |         }
251 |     }
252 | 
253 |     fn ptr(&self) -> *mut GcBox<T> {
254 |         self.ptr
255 |     }
256 | 
257 |     fn value(&self) -> &T {
258 |         unsafe { &(*self.ptr).value }
259 |     }
260 | 
261 |     fn value_mut(&mut self) -> &mut T {
262 |         unsafe { &mut (*self.ptr).value }
263 |     }
264 | }
265 | 
266 | 
267 | impl<T: Trace> Deref for Gc<T> {
268 |     type Target = T;
269 | 
270 |     fn deref(&self) -> &T {
271 |         self.value()
272 |     }
273 | }
274 | 
275 | 
276 | impl<T: Trace> DerefMut for Gc<T> {
277 |     fn deref_mut(&mut self) -> &mut T {
278 |         self.value_mut()
279 |     }
280 | }
281 | 
282 | 
283 | impl<T: Trace> Clone for Gc<T> {
284 |     fn clone(&self) -> Self {
285 |         Gc {
286 |             ptr: self.ptr,
287 |         }
288 |     }
289 | }
290 | 
291 | 
292 | impl<T: Trace> Copy for Gc<T> {}
293 | 
294 | // GcAtomic implementation
295 | 
296 | impl<T: Trace> GcAtomic<T> {
297 |     /// Instantiate a new null pointer
298 |     pub fn null() -> GcAtomic<T> {
299 |         GcAtomic {
300 |             ptr: AtomicPtr::new(null_mut())
301 |         }
302 |     }
303 | 
304 |     /// Instantiate a new pointer, moving `value` to the heap. Writes to the journal.
305 |     pub fn new(value: T) -> GcAtomic<T> {
306 |         let boxed = Box::new(GcBox::new(value));
307 |         write(&*boxed, true, NEW_BIT);
308 | 
309 |         GcAtomic {
310 |             ptr: AtomicPtr::new(Box::into_raw(boxed)),
311 |         }
312 |     }
313 | 
314 |     /// Root the pointer by loading it into a `GcRoot<T>`
315 |     ///
316 |     /// Panics if `order` is `Release` or `AcqRel`.
317 |     pub fn load_into_root(&self, order: Ordering) -> GcRoot<T> {
318 |         let root = GcRoot {
319 |             ptr: self.ptr.load(order),
320 |         };
321 | 
322 |         write(&*root, false, INC_BIT);
323 |         root
324 |     }
325 | 
326 |     /// Copy the pointer into a new `Gc` instance.
327 |     ///
328 |     /// Panics if `order` is `Release` or `AcqRel`.
329 |     pub fn load_into_gc(&self, order: Ordering) -> Gc<T> {
330 |         Gc::from_raw(self.ptr.load(order))
331 |     }
332 | 
333 |     /// Fetch the current raw pointer value
334 |     ///
335 |     /// Panics if `order` is `Release` or `AcqRel`.
336 |     pub fn load_raw(&self, order: Ordering) -> *mut GcBox<T> {
337 |         self.ptr.load(order)
338 |     }
339 | 
340 |     /// Replace the current pointer value with the pointer from the given `GcRoot`.
341 |     ///
342 |     /// Panics if `order` is `Acquire` or `AcqRel`.
343 |     pub fn store_from_root(&self, root: GcRoot<T>, order: Ordering) {
344 |         self.ptr.store(root.ptr(), order);
345 |     }
346 | 
347 |     /// Replace the current pointer value with the pointer from the given `Gc`.
348 |     ///
349 |     /// Panics of `order` is `Acquire` or `AcqRel`.
350 |     pub fn store_from_gc(&self, gc: Gc<T>, order: Ordering) {
351 |         self.ptr.store(gc.ptr(), order);
352 |     }
353 | 
354 |     /// Replace the current pointer value with the given raw pointer
355 |     ///
356 |     /// Panics if `order` is `Acquire` or `AcqRel`.
357 |     pub fn store_raw(&self, ptr: *mut GcBox<T>, order: Ordering) {
358 |         self.ptr.store(ptr, order);
359 |     }
360 | }
361 | 


--------------------------------------------------------------------------------
/src/constants.rs:
--------------------------------------------------------------------------------
 1 | //! Numerous constants used as parameters to GC behavior
 2 | //!
 3 | //! The journal and GC parameters of these should become runtime rather than compile time.
 4 | 
 5 | 
 6 | // Journal and GC parameters
 7 | pub const JOURNAL_BUFFER_SIZE: usize = 32768;
 8 | pub const BUFFER_RUN: usize = 1024;
 9 | pub const JOURNAL_RUN: usize = 32;
10 | pub const MAX_SLEEP_DUR: usize = 100;  // milliseconds
11 | pub const MIN_SLEEP_DUR: usize = 1;    // milliseconds
12 | pub const MAJOR_COLLECT_THRESHOLD: usize = 1 << 20;
13 | 
14 | // Cache line in bytes
15 | pub const CACHE_LINE: usize = 64;
16 | 
17 | // Bits and masks
18 | pub const PTR_MASK: usize = !3;
19 | pub const MARK_BIT: usize = 1;
20 | pub const MARK_MASK: usize = !1;
21 | pub const TRAVERSE_BIT: usize = 2;
22 | 
23 | // mask for low bits of address of object through journal
24 | pub const FLAGS_MASK: usize = 3;
25 | 
26 | // bit number that indicates whether a reference count is being incremented
27 | pub const INC_BIT: usize = 1;
28 | // // bit number that indicates whether or not an object is newly allocated or not
29 | pub const NEW_BIT: usize = 2;
30 | pub const NEW_MASK: usize = !2;
31 | 
32 | // Values found in the 2 bits masked by FLAGS_MASK
33 | // new object, increment refcount value
34 | pub const NEW_INC: usize = 3;
35 | // new object not rooted value
36 | pub const NEW: usize = 2;
37 | // old object, increment refcount value
38 | pub const INC: usize = 1;
39 | // decrement refcount value
40 | pub const DEC: usize = 0;
41 | 


--------------------------------------------------------------------------------
/src/gcthread.rs:
--------------------------------------------------------------------------------
  1 | //! Garbage collection thread
  2 | 
  3 | 
  4 | use std::any::Any;
  5 | use std::cmp::min;
  6 | use std::mem::size_of;
  7 | use std::sync::mpsc;
  8 | use std::thread;
  9 | use std::time::Duration;
 10 | 
 11 | use num_cpus;
 12 | use scoped_pool::Pool;
 13 | 
 14 | use appthread::AppThread;
 15 | use constants::{MAJOR_COLLECT_THRESHOLD, MAX_SLEEP_DUR, MIN_SLEEP_DUR};
 16 | use heap::{CollectOps, Object};
 17 | use journal;
 18 | use parheap::ParHeap;
 19 | use statistics::{StatsLogger, DefaultLogger};
 20 | use youngheap::YoungHeap;
 21 | 
 22 | 
 23 | pub type EntryReceiver = journal::Receiver<Object>;
 24 | pub type EntrySender = journal::Sender<Object>;
 25 | 
 26 | pub type JournalReceiver = mpsc::Receiver<EntryReceiver>;
 27 | pub type JournalSender = mpsc::Sender<EntryReceiver>;
 28 | 
 29 | pub type JournalList = Vec<EntryReceiver>;
 30 | 
 31 | 
 32 | /// The Garbage Collection thread handle.
 33 | pub struct GcThread<S: StatsLogger> {
 34 |     /// This is cloned and given to app threads.
 35 |     tx_chan: JournalSender,
 36 | 
 37 |     /// The GC thread's handle to join on.
 38 |     handle: thread::JoinHandle<S>,
 39 | }
 40 | 
 41 | 
 42 | impl GcThread<DefaultLogger> {
 43 |     /// Spawn a GC thread with default parameters: a `ParHeap` and a `DefaultLogger` parallelized
 44 |     /// across all available CPUs.
 45 |     pub fn spawn_gc() -> GcThread<DefaultLogger> {
 46 |         let cores = num_cpus::get();
 47 |         Self::spawn_gc_with(cores, ParHeap::new(cores), DefaultLogger::new())
 48 |     }
 49 | }
 50 | 
 51 | 
 52 | impl<S: StatsLogger + 'static> GcThread<S> {
 53 |     /// Run the GC on the current thread, spawning another thread to run the application function
 54 |     /// on. Returns the AppThread std::thread::Thread handle. Caller must provide a custom
 55 |     /// StatsLogger implementation and a CollectOps heap implementation.
 56 |     pub fn spawn_gc_with<T>(num_threads: usize, mature: T, logger: S) -> GcThread<S>
 57 |         where T: CollectOps + Send + 'static
 58 |     {
 59 |         let (tx, rx) = mpsc::channel();
 60 | 
 61 |         let handle = thread::spawn(move || gc_thread(num_threads, rx, mature, logger));
 62 | 
 63 |         GcThread {
 64 |             tx_chan: tx,
 65 |             handle: handle,
 66 |         }
 67 |     }
 68 | 
 69 |     /// Spawn an app thread that journals to the GC thread.
 70 |     pub fn spawn<F, T>(&self, f: F) -> thread::JoinHandle<T>
 71 |         where F: FnOnce() -> T,
 72 |               F: Send + 'static,
 73 |               T: Send + 'static
 74 |     {
 75 |         AppThread::spawn_from_gc(self.tx_chan.clone(), f)
 76 |     }
 77 | 
 78 |     /// Wait for the GC thread to finish. On success, returns the object that implements
 79 |     /// `StatsLogger` for the calling thread to examine.
 80 |     pub fn join(self) -> Result<S, Box<Any + Send + 'static>> {
 81 |         self.handle.join()
 82 |     }
 83 | }
 84 | 
 85 | 
 86 | /// Main GC thread loop.
 87 | fn gc_thread<S, T>(num_threads: usize, rx_chan: JournalReceiver, mature: T, logger: S) -> S
 88 |     where S: StatsLogger,
 89 |           T: CollectOps + Send
 90 | {
 91 |     let mut pool = Pool::new(num_threads);
 92 | 
 93 |     let mut gc = YoungHeap::new(num_threads, mature, logger);
 94 | 
 95 |     // block, wait for first journal
 96 |     gc.add_journal(rx_chan.recv().expect("Failed to receive first app journal!"));
 97 | 
 98 |     gc.logger().mark_start_time();
 99 | 
100 |     // next duration to sleep if all journals are empty
101 |     let mut sleep_dur: usize = 0;
102 | 
103 |     // loop until all journals are disconnected
104 |     while gc.num_journals() > 0 {
105 | 
106 |         // new appthread connected
107 |         if let Ok(journal) = rx_chan.try_recv() {
108 |             gc.add_journal(journal);
109 |         }
110 | 
111 |         let entries_read = gc.read_journals();
112 | 
113 |         // sleep if nothing read from journal
114 |         if entries_read == 0 {
115 |             thread::sleep(Duration::from_millis(sleep_dur as u64));
116 | 
117 |             gc.logger().add_sleep(sleep_dur);
118 | 
119 |             // back off exponentially up to the max
120 |             sleep_dur = min(sleep_dur * 2, MAX_SLEEP_DUR);
121 |         } else {
122 |             // reset next sleep duration on receiving no entries
123 |             sleep_dur = MIN_SLEEP_DUR;
124 |         }
125 | 
126 |         // TODO: base this call on a duration since last call?
127 |         let young_count = gc.minor_collection(&mut pool);
128 | 
129 |         // do a major collection if the young count reaches a threshold and we're not just trying
130 |         // to keep up with the app threads
131 |         // TODO: force a major collection every n minutes
132 |         if sleep_dur != MIN_SLEEP_DUR && young_count >= MAJOR_COLLECT_THRESHOLD {
133 |             gc.major_collection(&mut pool);
134 |         }
135 |     }
136 | 
137 |     // do a final collection where all roots should be unrooted
138 |     gc.minor_collection(&mut pool);
139 |     gc.major_collection(&mut pool);
140 | 
141 |     // return logger to calling thread
142 |     gc.logger().mark_end_time();
143 |     gc.shutdown()
144 | }
145 | 
146 | 
147 | /// Pointers are word-aligned, meaning the least-significant 2 or 3 bits are always 0, depending
148 | /// on the word size.
149 | #[inline]
150 | pub fn ptr_shift() -> i32 {
151 |     if size_of::<usize>() == 32 {
152 |         2
153 |     } else {
154 |         3
155 |     }
156 | }
157 | 


--------------------------------------------------------------------------------
/src/heap.rs:
--------------------------------------------------------------------------------
  1 | //! Core heap traits and data types
  2 | //!
  3 | //! TODO: RootMeta and ObjectMeta have some things in common, perhaps use traits to abstract
  4 | //! the differences, then perhaps YoungHeap and ParHeap can share more code?
  5 | 
  6 | 
  7 | use std::cell::Cell;
  8 | use std::mem::transmute;
  9 | use std::raw::TraitObject;
 10 | use std::sync::atomic::{AtomicUsize, Ordering};
 11 | 
 12 | use bitmaptrie::Trie;
 13 | use scoped_pool::Pool;
 14 | 
 15 | use constants::{MARK_BIT, MARK_MASK, NEW_BIT, NEW_MASK, PTR_MASK, TRAVERSE_BIT};
 16 | use gcthread::ptr_shift;
 17 | use trace::Trace;
 18 | 
 19 | 
 20 | pub type ObjectBuf = Vec<Object>;
 21 | pub type RootMap = Trie<RootMeta>;
 22 | pub type HeapMap = Trie<ObjectMeta>;
 23 | 
 24 | 
 25 | /// A trait that describes Trace operations on a Heap
 26 | pub trait TraceOps {
 27 |     /// Buffer the given object for future tracing on the trace stack. This method should be called
 28 |     /// by objects that implement the Trace trait, from the Trace::trace() method.
 29 |     fn push_to_trace(&mut self, object: &Trace);
 30 | }
 31 | 
 32 | 
 33 | /// A trait that describes collection operations on a Heap
 34 | pub trait CollectOps {
 35 |     /// Add an object directly to the heap.
 36 |     fn add_object(&mut self, ptr: usize, vtable: usize);
 37 | 
 38 |     /// Run a collection iteration on the heap. Return the total heap size and the number of
 39 |     /// dropped objects.
 40 |     fn collect(&mut self, thread_pool: &mut Pool, roots: &mut RootMap) -> (usize, usize);
 41 | }
 42 | 
 43 | 
 44 | /// A journal item. Essentially just a Send-able TraitObject
 45 | #[derive(Copy, Clone)]
 46 | pub struct Object {
 47 |     pub ptr: usize,
 48 |     pub vtable: usize,
 49 | }
 50 | 
 51 | 
 52 | /// Root pointer metadata
 53 | pub struct RootMeta {
 54 |     /// the root reference count. This gets decremented by multiple threads and thus must be
 55 |     /// thread safe.
 56 |     pub refcount: AtomicUsize,
 57 |     /// the Trace trait vtable pointer
 58 |     pub vtable: usize,
 59 |     /// bits for flags
 60 |     pub flags: Cell<usize>,
 61 | }
 62 | 
 63 | 
 64 | /// A GC-managed pointer's metadata
 65 | pub struct ObjectMeta {
 66 |     /// Using bit 0 as the mark bit (MARK_BIT)
 67 |     /// Using bit 1 to indicate traversibility (TRAVERSE_BIT)
 68 |     /// Normally we'd use an AtomicUsize, but since the operations on the value are one-way,
 69 |     /// i.e. setting a mark bit in parallel, or unsetting it in parallel, we don't need to worry
 70 |     /// about data races. The worst that will happen is that two threads will try to trace the
 71 |     /// same object concurrently.
 72 |     pub vtable: Cell<usize>,
 73 | }
 74 | 
 75 | 
 76 | /// A type that contains a stack of objects to trace into. This type is separated out from the
 77 | /// main Heap type so that different collection strategies can be implemented without affecting
 78 | /// the client code. The `Trace` trait depends only this type, then, and not the whole Heap
 79 | /// type.
 80 | pub struct TraceStack {
 81 |     stack: ObjectBuf,
 82 | }
 83 | 
 84 | 
 85 | unsafe impl Send for Object {}
 86 | 
 87 | unsafe impl Send for RootMeta {}
 88 | unsafe impl Sync for RootMeta {}
 89 | 
 90 | unsafe impl Send for ObjectMeta {}
 91 | // We're using a Cell and not an Atomic in ObjectMeta but that is ok for how we are using it.
 92 | unsafe impl Sync for ObjectMeta {}
 93 | 
 94 | 
 95 | impl Object {
 96 |     pub fn from_trie_ptr(ptr: usize, vtable: usize) -> Object {
 97 |         Object {
 98 |             ptr: ptr << ptr_shift(),
 99 |             vtable: vtable,
100 |         }
101 |     }
102 | 
103 |     // Return this object as a Trace trait object reference
104 |     pub fn as_trace(&self) -> &Trace {
105 |         let tobj: TraitObject = Object::into(*self);
106 |         unsafe { transmute(tobj) }
107 |     }
108 | }
109 | 
110 | 
111 | impl From<TraitObject> for Object {
112 |     fn from(tobj: TraitObject) -> Object {
113 |         Object {
114 |             ptr: tobj.data as usize,
115 |             vtable: tobj.vtable as usize,
116 |         }
117 |     }
118 | }
119 | 
120 | 
121 | impl Into<TraitObject> for Object {
122 |     fn into(self) -> TraitObject {
123 |         TraitObject {
124 |             data: self.ptr as *mut (),
125 |             // make sure traverse and mark bits are cleared
126 |             vtable: (self.vtable & PTR_MASK) as *mut (),
127 |         }
128 |     }
129 | }
130 | 
131 | 
132 | impl RootMeta {
133 |     pub fn new(refcount: usize, vtable: usize, flags: usize) -> RootMeta {
134 |         RootMeta {
135 |             refcount: AtomicUsize::new(refcount),
136 |             vtable: vtable,
137 |             flags: Cell::new(flags),
138 |         }
139 |     }
140 | 
141 |     // Initialize with a reference count of 1
142 |     pub fn one(vtable: usize, flags: usize) -> RootMeta {
143 |         Self::new(1, vtable, flags)
144 |     }
145 | 
146 |     // Initialize with a reference count of 0
147 |     pub fn zero(vtable: usize, flags: usize) -> RootMeta {
148 |         Self::new(0, vtable, flags)
149 |     }
150 | 
151 |     // Increment the reference count by 1
152 |     #[inline]
153 |     pub fn inc(&self) {
154 |         self.refcount.fetch_add(1, Ordering::SeqCst);
155 |     }
156 | 
157 |     // Decrement the reference count by 1
158 |     #[inline]
159 |     pub fn dec(&self) {
160 |         self.refcount.fetch_sub(1, Ordering::SeqCst);
161 |     }
162 | 
163 |     // Increment the reference count by 1, thread unsafe
164 |     #[inline]
165 |     pub fn unsync_inc(&self) {
166 |         let refcount = self.unsync_refcount();
167 |         refcount.set(refcount.get() + 1);
168 |     }
169 | 
170 |     // Decrement the reference count by 1, thread unsafe
171 |     #[inline]
172 |     pub fn unsync_dec(&self) {
173 |         let refcount = self.unsync_refcount();
174 |         refcount.set(refcount.get() - 1);
175 |     }
176 | 
177 |     // Return true if this object has a zero reference count, thread unsafe
178 |     #[inline]
179 |     pub fn unsync_is_unrooted(&self) -> bool {
180 |         let refcount = self.unsync_refcount();
181 |         refcount.get() == 0
182 |     }
183 | 
184 |     // Return true if this is a new object
185 |     #[inline]
186 |     pub fn is_new(&self) -> bool {
187 |         self.flags.get() & NEW_BIT != 0
188 |     }
189 | 
190 |     // Return true if this is a new object and the mark bit is unset
191 |     #[inline]
192 |     pub fn is_new_and_unmarked(&self) -> bool {
193 |         self.flags.get() & (MARK_BIT | NEW_BIT) == NEW_BIT
194 |     }
195 | 
196 |     #[inline]
197 |     pub fn set_not_new(&self) {
198 |         self.flags.set(self.flags.get() & NEW_MASK);
199 |     }
200 | 
201 |     // Mark this object and return true if it needs to be traced into
202 |     #[inline]
203 |     pub fn mark_and_needs_trace(&self) -> bool {
204 |         let flags = self.flags.get();
205 | 
206 |         let was_unmarked = flags & MARK_BIT == 0;
207 |         if was_unmarked {
208 |             self.flags.set(flags | MARK_BIT);
209 |         }
210 | 
211 |         was_unmarked && flags & TRAVERSE_BIT != 0
212 |     }
213 | 
214 |     // Reset the mark bit back to 0
215 |     #[inline]
216 |     pub fn unmark(&self) {
217 |         self.flags.set(self.flags.get() & MARK_MASK);
218 |     }
219 | 
220 |     // Returns the vtable without any flags set
221 |     #[inline]
222 |     pub fn vtable(&self) -> usize {
223 |         self.vtable & PTR_MASK
224 |     }
225 | 
226 |     // oh the horror, to save a few clock cycles
227 |     #[inline]
228 |     fn unsync_refcount(&self) -> &Cell<usize> {
229 |         let refcount: &Cell<usize> = unsafe { transmute(&self.refcount) };
230 |         refcount
231 |     }
232 | }
233 | 
234 | 
235 | impl ObjectMeta {
236 |     pub fn new(vtable: usize) -> ObjectMeta {
237 |         ObjectMeta { vtable: Cell::new(vtable) }
238 |     }
239 | 
240 |     // Mark this object and return true if it needs to be traced into
241 |     #[inline]
242 |     pub fn mark_and_needs_trace(&self) -> bool {
243 |         let vtable = self.vtable.get();
244 | 
245 |         let was_marked = vtable & MARK_BIT == 0;
246 |         if !was_marked {
247 |             self.vtable.set(vtable | MARK_BIT);
248 |         }
249 | 
250 |         !was_marked && vtable & TRAVERSE_BIT != 0
251 |     }
252 | 
253 |     // Query the mark bit
254 |     #[inline]
255 |     pub fn is_marked(&self) -> bool {
256 |         self.vtable.get() & MARK_BIT != 0
257 |     }
258 | 
259 |     // Unset the mark bit
260 |     #[inline]
261 |     pub fn unmark(&self) {
262 |         let vtable = self.vtable.get();
263 |         self.vtable.set(vtable & MARK_MASK);
264 |     }
265 | 
266 |     // Get the vtable ptr without mark or traverse bits set
267 |     #[inline]
268 |     pub fn vtable(&self) -> usize {
269 |         self.vtable.get() & PTR_MASK
270 |     }
271 | }
272 | 
273 | 
274 | impl TraceStack {
275 |     pub fn new() -> TraceStack {
276 |         TraceStack { stack: ObjectBuf::new() }
277 |     }
278 | 
279 |     pub fn push(&mut self, obj: Object) {
280 |         self.stack.push(obj);
281 |     }
282 | 
283 |     pub fn pop(&mut self) -> Option<Object> {
284 |         self.stack.pop()
285 |     }
286 | 
287 |     // Create initial contents from a slice of Objects
288 |     pub fn from_roots(&mut self, slice: &[Object]) {
289 |         self.stack.extend_from_slice(slice);
290 |     }
291 | }
292 | 
293 | 
294 | impl TraceOps for TraceStack {
295 |     fn push_to_trace(&mut self, object: &Trace) {
296 |         let tobj: TraitObject = unsafe { transmute(object) };
297 |         self.stack.push(Object::from(tobj));
298 |     }
299 | }
300 | 


--------------------------------------------------------------------------------
/src/journal.rs:
--------------------------------------------------------------------------------
  1 | //! An SPSC queue implemented internally as a sequence of SPSC buffers.
  2 | //!
  3 | //! This queue will allocate new buffers indefinitely and eat up memory if the receiver doesn't
  4 | //! keep up. Performance is better if the receiver keeps up as the allocator will likely reuse
  5 | //! the same set of memory for each buffer.
  6 | //!
  7 | //! Because of TSO on x86, the store order by the sender means that the receiver can load values
  8 | //! from the buffer without worrying that it'll read invalid data ahead of the sender.
  9 | //! On other architectures, we use atomics with the associated performance penalty.
 10 | 
 11 | 
 12 | use std::cell::Cell;
 13 | use std::intrinsics::{needs_drop, abort};
 14 | use std::mem::{align_of, size_of};
 15 | use std::ptr::{null_mut, read, write, Unique};
 16 | use std::sync::Arc;
 17 | use std::sync::atomic::{AtomicPtr, Ordering};
 18 | 
 19 | #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
 20 | use std::sync::atomic::AtomicUsize;
 21 | 
 22 | extern crate alloc;
 23 | use self::alloc::heap::{allocate, deallocate};
 24 | 
 25 | use constants::CACHE_LINE;
 26 | 
 27 | 
 28 | /// TSO means that we don't need atomics on x86 and that will speed things up.
 29 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 30 | struct MaybeAtomicUsize {
 31 |     value: Cell<usize>,
 32 | }
 33 | 
 34 | 
 35 | /// On weaker memory model platforms, default to atomics.
 36 | #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
 37 | struct MaybeAtomicUsize {
 38 |     value: AtomicUsize,
 39 | }
 40 | 
 41 | 
 42 | /// A one-shot spsc buffer: once it's full and has been read, it is disposed of and a new Buffer<T>
 43 | /// is allocated.
 44 | struct Buffer<T> {
 45 |     data: Unique<T>,
 46 | 
 47 |     capacity: usize,
 48 | 
 49 |     head: MaybeAtomicUsize,
 50 | 
 51 |     _cachepadding: [u8; CACHE_LINE],
 52 | 
 53 |     tail: MaybeAtomicUsize,
 54 |     tail_max: MaybeAtomicUsize,
 55 | 
 56 |     next: AtomicPtr<Buffer<T>>,
 57 | }
 58 | 
 59 | 
 60 | /// Since the buffers are linked together by raw pointers, this struct assumes ownership of that
 61 | /// unsafe relationship, presenting it as safe.
 62 | struct BufferQueue<T> {
 63 |     // this pointer is only accessed by the Receiver
 64 |     head: Cell<*mut Buffer<T>>,
 65 | 
 66 |     _cachepadding: [u8; CACHE_LINE],
 67 | 
 68 |     // this pointer is only accessed by the Sender
 69 |     tail: Cell<*mut Buffer<T>>,
 70 |     // this value only written once by the Sender, read by the Receiver
 71 |     hup: Cell<bool>,
 72 | }
 73 | 
 74 | 
 75 | /// An iterator type that iters until the receiver returns empty.
 76 | pub struct EmptyIter<'a, T: 'a> {
 77 |     receiver: &'a mut Receiver<T>,
 78 | }
 79 | 
 80 | 
 81 | /// Similar to std::sync::mpsc::TryRecvError
 82 | pub enum RecvResult {
 83 |     Empty,
 84 |     Disconnected,
 85 | }
 86 | 
 87 | 
 88 | /// A journal reader type which can be sent to another thread
 89 | pub struct Receiver<T> {
 90 |     buffer: Arc<BufferQueue<T>>,
 91 | }
 92 | 
 93 | 
 94 | /// A journal writer type which can be sent to another thread
 95 | pub struct Sender<T> {
 96 |     buffer: Arc<BufferQueue<T>>,
 97 | }
 98 | 
 99 | 
100 | unsafe impl<T> Send for Sender<T> {}
101 | unsafe impl<T> Send for Receiver<T> {}
102 | 
103 | 
104 | impl<T> BufferQueue<T> {
105 |     fn new(capacity: usize) -> BufferQueue<T> {
106 |         let first_buffer = Box::new(Buffer::new(capacity));
107 |         let ptr = Box::into_raw(first_buffer);
108 | 
109 |         BufferQueue {
110 |             head: Cell::new(ptr),
111 |             _cachepadding: [0; CACHE_LINE],
112 |             tail: Cell::new(ptr),
113 |             hup: Cell::new(false),
114 |         }
115 |     }
116 | 
117 |     /// use by Sender only
118 |     fn tail(&self) -> *mut Buffer<T> {
119 |         self.tail.get()
120 |     }
121 | 
122 |     /// use by Receiver only
123 |     fn head(&self) -> *mut Buffer<T> {
124 |         self.head.get()
125 |     }
126 | 
127 |     /// use by Receiver only
128 |     fn replace_head(&self, next_head: *mut Buffer<T>) {
129 |         unsafe { Box::from_raw(self.head.get()) };
130 |         self.head.set(next_head);
131 |     }
132 | 
133 |     /// use by Receiver only
134 |     fn head_is_completed(&self) -> bool {
135 |         unsafe { &*self.head() }.is_completed()
136 |     }
137 | 
138 |     /// use by Receiver only
139 |     fn next_head(&self) -> Option<*mut Buffer<T>> {
140 |         unsafe { &*self.head() }.next_buffer()
141 |     }
142 | }
143 | 
144 | 
145 | impl<T> Drop for BufferQueue<T> {
146 |     /// Drop all unread buffers.
147 |     fn drop(&mut self) {
148 |         let mut head = Some(self.head.get());
149 | 
150 |         loop {
151 |             let mut next = None;
152 |             if let Some(head) = head {
153 |                 next = unsafe { &*head }.next_buffer();
154 | 
155 |                 unsafe {
156 |                     let owned = Box::from_raw(head);
157 |                     drop(owned);
158 |                 };
159 | 
160 |                 if let None = next {
161 |                     break;
162 |                 }
163 |             }
164 |             head = next;
165 |         }
166 |     }
167 | }
168 | 
169 | 
170 | impl<T> Sender<T> {
171 |     /// Send a value to the Receiver. TODO this should probably return some kind of error on
172 |     /// receiver hup.
173 |     pub fn send(&self, item: T) {
174 |         let result = unsafe { &*self.buffer.tail() }.write(item);
175 | 
176 |         if let Some(new_tail) = result {
177 |             self.buffer.tail.set(new_tail);
178 |         }
179 |     }
180 | }
181 | 
182 | 
183 | impl<T> Drop for Sender<T> {
184 |     fn drop(&mut self) {
185 |         // mark the last buffer as completed and set the HUP flag
186 |         unsafe { &*self.buffer.tail() }.mark_completed();
187 |         self.buffer.hup.set(true);
188 |     }
189 | }
190 | 
191 | 
192 | impl<T> Receiver<T> {
193 |     /// Read a value from the queue if there is one available, otherwise return without blocking
194 |     pub fn try_recv(&self) -> Result<T, RecvResult> {
195 |         let head = unsafe { &*self.buffer.head() };
196 |         let result = head.try_read();
197 | 
198 |         match result {
199 |             Some(value) => Ok(value),
200 | 
201 |             None => {
202 |                 // is this buffer completed by the sender?
203 |                 if self.buffer.head_is_completed() {
204 | 
205 |                     // is there a next buffer?
206 |                     if let Some(next_head) = self.buffer.next_head() {
207 |                         self.buffer.replace_head(next_head);
208 | 
209 |                         // peek at next buffer for a value befure returning empty
210 |                         let new_head = unsafe { &*self.buffer.head() };
211 |                         if let Some(value) = new_head.try_read() {
212 |                             Ok(value)
213 |                         } else {
214 |                             Err(RecvResult::Empty)
215 |                         }
216 | 
217 |                     } else {
218 |                         // no further buffer, did we get hung-up on?
219 |                         if self.buffer.hup.get() {
220 |                             Err(RecvResult::Disconnected)
221 |                         } else {
222 |                             Err(RecvResult::Empty)
223 |                         }
224 |                     }
225 |                 } else {
226 |                     Err(RecvResult::Empty)
227 |                 }
228 |             }
229 |         }
230 |     }
231 | 
232 | 
233 |     /// Make an Iterator that returns values until the queue is empty or disconnected.
234 |     pub fn iter_until_empty(&mut self) -> EmptyIter<T> {
235 |         EmptyIter { receiver: self }
236 |     }
237 | 
238 |     /// Has the Sender hung up?
239 |     pub fn is_disconnected(&self) -> bool {
240 |         if self.buffer.hup.get() {
241 |             if let None = self.buffer.next_head() {
242 |                 return unsafe { &*self.buffer.head() }.is_empty();
243 |             }
244 |         }
245 | 
246 |         false
247 |     }
248 | }
249 | 
250 | 
251 | /// Return a Sender/Receiver pair that can be handed over to other threads. The capacity is the
252 | /// requested size of each internal buffer and will be rounded to the next power of two.
253 | pub fn make_journal<T>(capacity: usize) -> (Sender<T>, Receiver<T>) {
254 |     let buffer = Arc::new(BufferQueue::new(capacity));
255 | 
256 |     (Sender { buffer: buffer.clone() },
257 |      Receiver { buffer: buffer })
258 | }
259 | 
260 | 
261 | impl<T> Buffer<T> {
262 |     /// Create a new Buffer<T> instance, rounding the capacity up to the nearest power of two.
263 |     fn new(requested_capacity: usize) -> Buffer<T> {
264 |         let rounded_capacity = requested_capacity.next_power_of_two();
265 | 
266 |         let data = unsafe {
267 |             let array = allocate(rounded_capacity * size_of::<T>(), align_of::<T>());
268 |             if array.is_null() {
269 |                 abort()
270 |             };
271 |             Unique::new(array as *mut T)
272 |         };
273 | 
274 |         Buffer {
275 |             data: data,
276 |             capacity: rounded_capacity,
277 |             head: MaybeAtomicUsize::new(0),
278 |             _cachepadding: [0; CACHE_LINE],
279 |             tail: MaybeAtomicUsize::new(0),
280 |             tail_max: MaybeAtomicUsize::new(rounded_capacity as usize),
281 |             next: AtomicPtr::new(null_mut()),
282 |         }
283 |     }
284 | 
285 |     /// Write to the buffer, returning Some(new_buffer) if the current one was full.
286 |     fn write(&self, item: T) -> Option<*mut Buffer<T>> {
287 |         let tail = self.tail.load(Ordering::Relaxed);
288 | 
289 |         if tail < self.tail_max.load(Ordering::Relaxed) {
290 |             // write to this buffer
291 |             unsafe { write(self.data.offset(tail as isize), item) };
292 |             self.tail.fetch_add(1, Ordering::Release);
293 |             None
294 |         } else {
295 |             // allocate a new buffer and write to that
296 |             let buffer = Box::new(Buffer::new(self.capacity));
297 |             buffer.write(item);
298 | 
299 |             // save the pointer to the new buffer for the receiver
300 |             let ptr = Box::into_raw(buffer);
301 |             self.next.store(ptr, Ordering::Release);
302 | 
303 |             Some(ptr)
304 |         }
305 |     }
306 | 
307 |     /// Read the next item from the buffer, returning None if the buffer is full or if the contents
308 |     /// thus far have been consumed.
309 |     fn try_read(&self) -> Option<T> {
310 |         let head = self.head.load(Ordering::Relaxed);
311 | 
312 |         if head < self.tail.load(Ordering::Acquire) {
313 |             // read from this buffer
314 |             let item = unsafe { read(self.data.offset(head as isize)) };
315 |             self.head.fetch_add(1, Ordering::Relaxed);
316 |             Some(item)
317 |         } else {
318 |             None
319 |         }
320 |     }
321 | 
322 |     /// Check the completion status.
323 |     fn is_completed(&self) -> bool {
324 |         self.tail_max.load(Ordering::Relaxed) == self.tail.load(Ordering::Acquire)
325 |     }
326 | 
327 |     /// Mark this buffer as full.
328 |     fn mark_completed(&self) {
329 |         self.tail_max.store(self.tail.load(Ordering::Relaxed), Ordering::Relaxed);
330 |     }
331 | 
332 |     /// Check for contents.
333 |     fn is_empty(&self) -> bool {
334 |         self.head.load(Ordering::Relaxed) == self.tail_max.load(Ordering::Relaxed)
335 |     }
336 | 
337 |     /// Fetch the pointer to the next buffer if the Sender has written one.
338 |     fn next_buffer(&self) -> Option<*mut Buffer<T>> {
339 |         let ptr = self.next.load(Ordering::Acquire);
340 | 
341 |         if ptr.is_null() {
342 |             None
343 |         } else {
344 |             Some(ptr)
345 |         }
346 |     }
347 | }
348 | 
349 | 
350 | impl<T> Drop for Buffer<T> {
351 |     fn drop(&mut self) {
352 |         unsafe {
353 |             // pop any remaining items if they need to be officially dropped
354 |             if needs_drop::<T>() {
355 |                 loop {
356 |                     match self.try_read() {
357 |                         None => break,
358 |                         _ => (),
359 |                     }
360 |                 }
361 |             }
362 | 
363 |             deallocate(self.data.get_mut() as *mut T as *mut u8,
364 |                        self.capacity * size_of::<T>(),
365 |                        align_of::<T>());
366 |         }
367 |     }
368 | }
369 | 
370 | 
371 | impl<'a, T> Iterator for EmptyIter<'a, T> {
372 |     type Item = T;
373 | 
374 |     /// Ignores disconnected state
375 |     fn next(&mut self) -> Option<Self::Item> {
376 |         if let Ok(item) = self.receiver.try_recv() {
377 |             Some(item)
378 |         } else {
379 |             None
380 |         }
381 |     }
382 | }
383 | 
384 | 
385 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
386 | impl MaybeAtomicUsize {
387 |     fn new(value: usize) -> MaybeAtomicUsize {
388 |         MaybeAtomicUsize { value: Cell::new(value) }
389 |     }
390 | 
391 |     #[inline]
392 |     fn load(&self, _ordering: Ordering) -> usize {
393 |         self.value.get()
394 |     }
395 | 
396 |     #[inline]
397 |     fn store(&self, value: usize, _ordering: Ordering) {
398 |         self.value.set(value);
399 |     }
400 | 
401 |     #[inline]
402 |     fn fetch_add(&self, value: usize, _ordering: Ordering) -> usize {
403 |         let old = self.value.get();
404 |         self.value.set(old + value);
405 |         old
406 |     }
407 | }
408 | 
409 | 
410 | #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
411 | impl MaybeAtomicUsize {
412 |     fn new(value: usize) -> MaybeAtomicUsize {
413 |         MaybeAtomicUsize { value: AtomicUsize::new(value) }
414 |     }
415 | 
416 |     #[inline]
417 |     fn load(&self, ordering: Ordering) -> usize {
418 |         self.value.load(ordering)
419 |     }
420 | 
421 |     #[inline]
422 |     fn store(&self, value: usize, ordering: Ordering) {
423 |         self.value.store(value, ordering);
424 |     }
425 | 
426 |     #[inline]
427 |     fn fetch_add(&self, value: usize, ordering: Ordering) -> usize {
428 |         self.value.fetch_add(value, ordering)
429 |     }
430 | }
431 | 
432 | 
433 | #[cfg(test)]
434 | mod tests {
435 | 
436 |     use super::{make_journal, RecvResult};
437 | 
438 | 
439 |     const TEST_COUNT: usize = 12345;
440 |     const TEST_BUFFER_SIZE: usize = 32;
441 | 
442 | 
443 |     #[test]
444 |     fn test_rx_tx() {
445 |         let (tx, rx) = make_journal::<usize>(TEST_BUFFER_SIZE);
446 | 
447 |         for i in 0..TEST_COUNT {
448 |             tx.send(i);
449 | 
450 |             let mut value = None;
451 | 
452 |             while let None = value {
453 |                 match rx.try_recv() {
454 |                     Ok(packet) => {
455 |                         assert!(packet == i);
456 |                         value = Some(packet);
457 |                     }
458 | 
459 |                     // may get Empty on transitioning from one buffer to the next
460 |                     Err(RecvResult::Empty) => continue,
461 |                     Err(RecvResult::Disconnected) => assert!(false),
462 |                 }
463 |             }
464 |         }
465 |     }
466 | 
467 |     #[test]
468 |     fn test_disconnect() {
469 |         let (tx, rx) = make_journal::<usize>(TEST_BUFFER_SIZE);
470 | 
471 |         drop(tx);
472 | 
473 |         match rx.try_recv() {
474 |             Err(RecvResult::Disconnected) => (),
475 |             _ => assert!(false),
476 |         }
477 |     }
478 | 
479 |     #[test]
480 |     fn test_running_disconnect_tx() {
481 |         let (tx, rx) = make_journal::<usize>(TEST_BUFFER_SIZE);
482 | 
483 |         // buffer up some values
484 |         for i in 0..TEST_COUNT {
485 |             tx.send(i);
486 |         }
487 | 
488 |         drop(tx);
489 | 
490 |         // should still be able to receive all buffered values
491 |         for i in 0..TEST_COUNT {
492 |             let mut value = None;
493 | 
494 |             while let None = value {
495 |                 match rx.try_recv() {
496 |                     Ok(packet) => {
497 |                         assert!(packet == i);
498 |                         value = Some(packet);
499 |                     }
500 | 
501 |                     // may get Empty on transitioning from one buffer to the next
502 |                     Err(RecvResult::Empty) => continue,
503 |                     Err(RecvResult::Disconnected) => assert!(false),
504 |                 }
505 |             }
506 |         }
507 | 
508 |         // should be disconnected
509 |         match rx.try_recv() {
510 |             Err(RecvResult::Disconnected) => (),
511 |             _ => assert!(false),
512 |         }
513 |     }
514 | 
515 |     #[test]
516 |     fn test_disconnect_rx() {
517 |         let (tx, rx) = make_journal::<usize>(TEST_BUFFER_SIZE);
518 | 
519 |         drop(rx);
520 | 
521 |         tx.send(42);
522 | 
523 |         // TODO: tx.send() should return a Result with a disconnected status
524 |     }
525 | }
526 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | #![feature(alloc)]
 2 | #![feature(core_intrinsics)]
 3 | #![feature(heap_api)]
 4 | #![feature(raw)]
 5 | #![feature(unique)]
 6 | 
 7 | 
 8 | //! # mo-gc
 9 | //!
10 | //! A pauseless, concurrent, generational, parallel mark-and-sweep garbage collector.
11 | //!
12 | //! This is an experimental design to research an idea into a pauseless garbage collector.
13 | //!
14 | //! The GC handles multiple OS thread mutators without stopping their worlds. It does this by
15 | //! deferring reference counting of stack-rooted pointers to the GC thread through a journal
16 | //! of stack root changes. The journal itself is fast to write to, adding an amortized 25% to
17 | //! the cost of `Box::new()` using jemalloc for a 64 byte object.
18 | //!
19 | //! Thus the mutator never needs to be stopped for it's stack to be scanned or for any collection
20 | //! phase.
21 | //!
22 | //! See [project TODO](https://github.com/pliniker/mo-gc/blob/master/TODO.md) for limitations.
23 | //!
24 | //! ## Usage
25 | //!
26 | //! Usage is best illustrated by the examples provided.
27 | 
28 | 
29 | extern crate bitmaptrie;
30 | extern crate num_cpus;
31 | extern crate scoped_pool;
32 | extern crate time;
33 | 
34 | 
35 | mod appthread;
36 | mod constants;
37 | mod gcthread;
38 | mod heap;
39 | mod journal;
40 | mod parheap;
41 | mod statistics;
42 | mod trace;
43 | mod youngheap;
44 | 
45 | 
46 | pub use appthread::{AppThread, Gc, GcAtomic, GcBox, GcRoot};
47 | pub use constants::*;
48 | pub use gcthread::GcThread;
49 | pub use heap::{CollectOps, TraceOps, TraceStack};
50 | pub use journal::{make_journal, Receiver, Sender};
51 | pub use parheap::ParHeap;
52 | pub use statistics::StatsLogger;
53 | pub use trace::Trace;
54 | pub use youngheap::YoungHeap;
55 | 


--------------------------------------------------------------------------------
/src/parheap.rs:
--------------------------------------------------------------------------------
  1 | //! A parallel collector for the entire heap.
  2 | 
  3 | 
  4 | use std::mem::transmute;
  5 | use std::raw::TraitObject;
  6 | use std::sync::Arc;
  7 | use std::sync::atomic::{AtomicUsize, Ordering};
  8 | 
  9 | use scoped_pool::Pool;
 10 | 
 11 | use gcthread::ptr_shift;
 12 | use heap::{CollectOps, HeapMap, Object, ObjectMeta, RootMap, TraceStack};
 13 | use trace::Trace;
 14 | 
 15 | 
 16 | /// This references all known GC-managed objects and handles marking and sweeping; parallel mark
 17 | /// and sweep version.
 18 | pub struct ParHeap {
 19 |     num_threads: usize,
 20 |     objects: HeapMap,
 21 | }
 22 | 
 23 | 
 24 | unsafe impl Send for ParHeap {}
 25 | 
 26 | 
 27 | impl ParHeap {
 28 |     /// In this heap implementation, work is split out into a thread pool. There is no knowing,
 29 |     /// though, how much work each split actually represents. One thread may receive a
 30 |     /// disproportionate amount of tracing or sweeping.
 31 |     pub fn new(num_threads: usize) -> ParHeap {
 32 |         ParHeap {
 33 |             num_threads: num_threads,
 34 |             objects: HeapMap::new(),
 35 |         }
 36 |     }
 37 | 
 38 |     /// A parallel mark implementation:
 39 |     ///  * shares a borrow of the main HeapMap among the thread pool
 40 |     ///  * divides the roots among the thread pool
 41 |     ///  * each thread traces from it's own slice of roots
 42 |     fn mark(&mut self, thread_pool: &mut Pool, roots: &mut RootMap) {
 43 |         // divide the roots among threads and trace
 44 |         let mut sharded_roots = roots.borrow_sharded(self.num_threads);
 45 | 
 46 |         thread_pool.scoped(|scope| {
 47 | 
 48 |             // borrow the main HeapMap for the duration of this scope
 49 |             let shared_objects = self.objects.borrow_sync();
 50 | 
 51 |             // split roots into a slice for each thread and hand a slice and an new-object
 52 |             // HeapMap to each job
 53 |             for roots in sharded_roots.drain() {
 54 | 
 55 |                 // make a thread-local trace stack and reference to the heap
 56 |                 let objects = shared_objects.clone();
 57 | 
 58 |                 // mark using the thread-local slice of roots
 59 |                 scope.execute(move || {
 60 | 
 61 |                     let mut stack = TraceStack::new();
 62 | 
 63 |                     for (root_ptr, root_meta) in roots.iter() {
 64 |                         if !root_meta.unsync_is_unrooted() && root_meta.mark_and_needs_trace() {
 65 |                             // read the shard to find roots, which are all positive-refcount
 66 |                             // entries. Trace the roots if they need it.
 67 | 
 68 |                             let obj = Object::from_trie_ptr(root_ptr, root_meta.vtable());
 69 | 
 70 |                             let object = obj.as_trace();
 71 |                             unsafe { object.trace(&mut stack) };
 72 | 
 73 |                             // now there may be some child objects on the trace stack: pull
 74 |                             // them off and mark them too
 75 |                             while let Some(obj) = stack.pop() {
 76 | 
 77 |                                 let ptr = obj.ptr >> ptr_shift();
 78 |                                 if let Some(meta) = objects.get(ptr) {
 79 | 
 80 |                                     if meta.mark_and_needs_trace() {
 81 |                                         let object = obj.as_trace();
 82 |                                         unsafe { object.trace(&mut stack) };
 83 |                                     }
 84 |                                 }
 85 |                             }
 86 |                         }
 87 |                     }
 88 |                 }); // execute
 89 |             }
 90 |         }); // scope
 91 |     }
 92 | 
 93 |     /// A parallel sweep implementation:
 94 |     ///  * the main HeapMap tree is split into subtrees and each thread is given a separate subtree
 95 |     ///    to sweep
 96 |     /// Returns a tuple of (heap_object_count, dropped_object_count)
 97 |     fn sweep(&mut self, thread_pool: &mut Pool) -> (usize, usize) {
 98 |         // set counters
 99 |         let collect_heap_size = Arc::new(AtomicUsize::new(0));
100 |         let collect_drop_count = Arc::new(AtomicUsize::new(0));
101 | 
102 |         // shard the heap
103 |         let mut sharded_objects = self.objects.borrow_sharded(self.num_threads);
104 | 
105 |         thread_pool.scoped(|scope| {
106 | 
107 |             for mut shard in sharded_objects.drain() {
108 | 
109 |                 // pass a reference to each counter to each thread
110 |                 let heap_size = collect_heap_size.clone();
111 |                 let drop_count = collect_drop_count.clone();
112 | 
113 |                 // each thread sweeps a sub-trie
114 |                 scope.execute(move || {
115 | 
116 |                     let mut heap_counter = 0;
117 |                     let mut drop_counter = 0;
118 | 
119 |                     shard.retain_if(|ptr, meta| {
120 |                         heap_counter += 1;
121 | 
122 |                         if !meta.is_marked() {
123 |                             drop_counter += 1;
124 | 
125 |                             // if not marked, drop the object
126 |                             let tobj = TraitObject {
127 |                                 data: (ptr << ptr_shift()) as *mut (),
128 |                                 vtable: meta.vtable() as *mut (),
129 |                             };
130 | 
131 |                             unsafe {
132 |                                 let fatptr: *mut Trace = transmute(tobj);
133 |                                 let owned = Box::from_raw(fatptr);
134 |                                 drop(owned);
135 |                             }
136 | 
137 |                             false
138 | 
139 |                         } else {
140 |                             // unmark the object
141 |                             meta.unmark();
142 |                             true
143 |                         }
144 |                     });
145 | 
146 |                     // write out the counters
147 |                     heap_size.fetch_add(heap_counter, Ordering::SeqCst);
148 |                     drop_count.fetch_add(drop_counter, Ordering::SeqCst);
149 |                 });
150 |             }
151 |         });
152 | 
153 |         // return the counters
154 |         (collect_heap_size.load(Ordering::Acquire),
155 |          collect_drop_count.load(Ordering::Acquire))
156 |     }
157 | }
158 | 
159 | 
160 | impl CollectOps for ParHeap {
161 |     /// Add an object directly to the heap. `ptr` is assumed to already be right-shift adjusted
162 |     fn add_object(&mut self, ptr: usize, vtable: usize) {
163 |         self.objects.set(ptr, ObjectMeta::new(vtable));
164 |     }
165 | 
166 |     /// Run a collection iteration on the heap. Return the total heap size and the number of
167 |     /// dropped objects.
168 |     fn collect(&mut self, thread_pool: &mut Pool, roots: &mut RootMap) -> (usize, usize) {
169 |         self.mark(thread_pool, roots);
170 |         self.sweep(thread_pool)
171 |     }
172 | }
173 | 


--------------------------------------------------------------------------------
/src/statistics.rs:
--------------------------------------------------------------------------------
  1 | //! Performance counters and statistics
  2 | 
  3 | 
  4 | use std::cmp::max;
  5 | 
  6 | use time::{get_time, Timespec};
  7 | 
  8 | 
  9 | /// Type that provides counters for the GC to gain some measure of performance.
 10 | pub trait StatsLogger: Send {
 11 |     /// mark start of time
 12 |     fn mark_start_time(&mut self);
 13 |     /// mark end of time
 14 |     fn mark_end_time(&mut self);
 15 |     /// add a number of milliseconds that the GcThread was asleep
 16 |     fn add_sleep(&mut self, ms: usize);
 17 | 
 18 |     /// add a count of dropped objects
 19 |     fn add_dropped(&mut self, count: usize);
 20 |     /// give the current heap object count
 21 |     fn current_heap_size(&mut self, size: usize);
 22 | 
 23 |     /// print statistics
 24 |     fn dump_to_stdout(&self);
 25 | 
 26 |     /// log something to stdout
 27 |     fn log(&self, string: &str) {
 28 |         println!("{}", string);
 29 |     }
 30 | }
 31 | 
 32 | 
 33 | pub struct DefaultLogger {
 34 |     max_heap_size: usize,
 35 | 
 36 |     total_dropped: usize,
 37 |     drop_iterations: usize,
 38 | 
 39 |     start_time: Timespec,
 40 |     stop_time: Timespec,
 41 |     sleep_time: u64,
 42 | }
 43 | 
 44 | 
 45 | unsafe impl Send for DefaultLogger {}
 46 | 
 47 | 
 48 | impl DefaultLogger {
 49 |     pub fn new() -> DefaultLogger {
 50 |         DefaultLogger {
 51 |             max_heap_size: 0,
 52 |             total_dropped: 0,
 53 |             drop_iterations: 0,
 54 |             start_time: Timespec::new(0, 0),
 55 |             stop_time: Timespec::new(0, 0),
 56 |             sleep_time: 0,
 57 |         }
 58 |     }
 59 | }
 60 | 
 61 | 
 62 | impl StatsLogger for DefaultLogger {
 63 |     fn mark_start_time(&mut self) {
 64 |         self.start_time = get_time();
 65 |     }
 66 | 
 67 |     fn mark_end_time(&mut self) {
 68 |         self.stop_time = get_time();
 69 |     }
 70 | 
 71 |     fn add_sleep(&mut self, ms: usize) {
 72 |         self.sleep_time += ms as u64;
 73 |     }
 74 | 
 75 |     fn add_dropped(&mut self, count: usize) {
 76 |         self.total_dropped += count;
 77 |         self.drop_iterations += 1;
 78 |     }
 79 | 
 80 |     fn current_heap_size(&mut self, size: usize) {
 81 |         self.max_heap_size = max(self.max_heap_size, size);
 82 |     }
 83 | 
 84 |     fn dump_to_stdout(&self) {
 85 |         // calculate timing
 86 |         let total_time = max((self.stop_time - self.start_time).num_milliseconds(), 1);
 87 |         let active_time = total_time - self.sleep_time as i64;
 88 |         let percent_active_time = active_time * 100 / total_time;
 89 | 
 90 |         // calculate drop rate
 91 |         let dropped_per_second = self.total_dropped as i64 * 1000 / active_time;
 92 | 
 93 |         println!("max-heap {}; dropped {} (per second {}); active {}/{}ms ({}%)",
 94 |                  self.max_heap_size,
 95 |                  self.total_dropped,
 96 |                  dropped_per_second,
 97 |                  active_time,
 98 |                  total_time,
 99 |                  percent_active_time);
100 |     }
101 | }
102 | 


--------------------------------------------------------------------------------
/src/trace.rs:
--------------------------------------------------------------------------------
 1 | //! The Trace trait must be implemented by every type that can be GC managed.
 2 | 
 3 | 
 4 | use heap::TraceStack;
 5 | 
 6 | 
 7 | /// Trace trait. Every type that can be managed by the GC must implement this trait.
 8 | /// This trait is unsafe in that incorrectly implementing it can cause Undefined Behavior.
 9 | pub unsafe trait Trace {
10 |     /// If the type can contain GC managed pointers, this must return true
11 |     fn traversible(&self) -> bool {
12 |         false
13 |     }
14 | 
15 |     /// If the type can contain GC managed pointers, this must visit each pointer.
16 |     ///
17 |     /// This function must be thread-safe!
18 |     ///
19 |     /// It must read a snapshot of the data structure it is implemented for.
20 |     unsafe fn trace(&self, _stack: &mut TraceStack) {}
21 | }
22 | 
23 | 
24 | unsafe impl Trace for usize {}
25 | unsafe impl Trace for isize {}
26 | unsafe impl Trace for i8 {}
27 | unsafe impl Trace for u8 {}
28 | unsafe impl Trace for i16 {}
29 | unsafe impl Trace for u16 {}
30 | unsafe impl Trace for i32 {}
31 | unsafe impl Trace for u32 {}
32 | unsafe impl Trace for i64 {}
33 | unsafe impl Trace for u64 {}
34 | unsafe impl Trace for f32 {}
35 | unsafe impl Trace for f64 {}
36 | unsafe impl<'a> Trace for &'a str {}
37 | unsafe impl Trace for String {}
38 | 


--------------------------------------------------------------------------------
/src/youngheap.rs:
--------------------------------------------------------------------------------
  1 | //! A partially-parallel young generation collector.
  2 | //!
  3 | //! Reading the journal into the root map is single-threaded.
  4 | //!
  5 | //! This is similar in construction to ParHeap, except that this object map must deal
  6 | //! with reference counts from the journal.
  7 | 
  8 | 
  9 | use std::cmp::max;
 10 | use std::mem::transmute;
 11 | use std::raw::TraitObject;
 12 | use std::sync::Arc;
 13 | use std::sync::atomic::{AtomicUsize, Ordering};
 14 | 
 15 | use scoped_pool::Pool;
 16 | 
 17 | use constants::{BUFFER_RUN, DEC, FLAGS_MASK, INC, JOURNAL_RUN, NEW, NEW_BIT, NEW_INC};
 18 | use heap::{CollectOps, Object, ObjectBuf, RootMap, RootMeta, TraceStack};
 19 | use gcthread::{EntryReceiver, JournalList, ptr_shift};
 20 | use statistics::StatsLogger;
 21 | use trace::Trace;
 22 | 
 23 | 
 24 | /// Type that composes all the things we need to run garbage collection on young generation
 25 | /// objects.
 26 | ///
 27 | /// The roots trie maps object addresses to their reference counts, vtables and `NEW` object
 28 | /// flags.
 29 | ///
 30 | /// During tracing, positive reference count objects and non-`NEW` objects are considered
 31 | /// possible roots and only `NEW` objects are considered for marking and sweeping. Entries
 32 | /// can be both roots and `NEW`.
 33 | ///
 34 | /// Collection is run in a thread pool across all CPUs by default by sharding the root trie
 35 | /// across threads.
 36 | pub struct YoungHeap<S: StatsLogger, T: CollectOps + Send> {
 37 |     /// Size of the thread pool
 38 |     num_threads: usize,
 39 | 
 40 |     /// A list of AppThread journals to read from
 41 |     journals: JournalList,
 42 | 
 43 |     /// Map of object addresses to reference counts and other data
 44 |     roots: RootMap,
 45 | 
 46 |     /// Buffer of deferred negative reference count adjustments
 47 |     deferred: ObjectBuf,
 48 | 
 49 |     /// The mature object space
 50 |     mature: T,
 51 | 
 52 |     /// Something that implements statistics logging
 53 |     logger: S,
 54 | }
 55 | 
 56 | 
 57 | impl<S: StatsLogger, T: CollectOps + Send> YoungHeap<S, T> {
 58 |     /// Create a new young generation heap and roots reference count tracker
 59 |     pub fn new(num_threads: usize, mature: T, logger: S) -> YoungHeap<S, T> {
 60 |         YoungHeap {
 61 |             num_threads: num_threads,
 62 |             journals: JournalList::new(),
 63 |             roots: RootMap::new(),
 64 |             deferred: ObjectBuf::new(),
 65 |             mature: mature,
 66 |             logger: logger,
 67 |         }
 68 |     }
 69 | 
 70 |     /// Add a new journal to the list of journals to read
 71 |     pub fn add_journal(&mut self, recv: EntryReceiver) {
 72 |         self.journals.push(recv);
 73 |     }
 74 | 
 75 |     /// Returns the number of journals currently connected to the GC
 76 |     pub fn num_journals(&self) -> usize {
 77 |         self.journals.len()
 78 |     }
 79 | 
 80 |     /// Read all journals for a number of iterations, updating the roots and keeping a reference
 81 |     /// count increment for each, and putting decrements into the deferred buffer.
 82 |     ///
 83 |     /// This function is single-threaded and is the biggest GC throughput bottleneck. Setting a
 84 |     /// value in the trie is slow compared to allocation and writing/reading the journal.
 85 |     ///
 86 |     /// Easily consumes 80% of linear GC time. TODO: parallelize this function.
 87 |     ///
 88 |     /// Returns the number of journal entries read.
 89 |     pub fn read_journals(&mut self) -> usize {
 90 |         let mut entry_count = 0;
 91 | 
 92 |         // read through the journals a few times
 93 |         for _ in 0..JOURNAL_RUN {
 94 | 
 95 |             // for each journal
 96 |             for journal in self.journals.iter_mut() {
 97 | 
 98 | 
 99 |                 // read the journal until empty or a limited number of entries have been pulled
100 |                 for entry in journal.iter_until_empty().take(BUFFER_RUN) {
101 | 
102 |                     entry_count += 1;
103 | 
104 |                     match entry.ptr & FLAGS_MASK {
105 |                         NEW_INC => {
106 |                             let ptr = entry.ptr >> ptr_shift();
107 |                             self.roots.set(ptr, RootMeta::one(entry.vtable, NEW_BIT));
108 |                         }
109 | 
110 |                         NEW => {
111 |                             let ptr = entry.ptr >> ptr_shift();
112 |                             self.roots.set(ptr, RootMeta::zero(entry.vtable, NEW_BIT));
113 |                         }
114 | 
115 |                         INC => {
116 |                             let ptr = entry.ptr >> ptr_shift();
117 | 
118 |                             let meta = self.roots.get_default_mut(ptr, || {
119 |                                 RootMeta::zero(entry.vtable, 0)
120 |                             });
121 | 
122 |                             meta.inc();
123 |                         }
124 | 
125 |                         DEC => self.deferred.push(entry),
126 | 
127 |                         _ => unreachable!(),
128 |                     }
129 |                 }
130 |             }
131 |         }
132 | 
133 |         // remove any disconnected journals
134 |         self.journals.retain(|ref j| !j.is_disconnected());
135 | 
136 |         entry_count
137 |     }
138 | 
139 |     /// Do a young generation collection. Returns the number of new objects in the young generation
140 |     /// heap.
141 |     pub fn minor_collection(&mut self, pool: &mut Pool) -> usize {
142 |         self.mark(pool);
143 |         let (young_size, drop_count) = self.sweep(pool);
144 |         self.merge_deferred(pool);
145 | 
146 |         self.logger.add_dropped(drop_count);
147 | 
148 |         young_size
149 |     }
150 | 
151 |     /// Do a major collection, moving `NEW` objects to the mature heap and tracing the mature heap
152 |     pub fn major_collection(&mut self, pool: &mut Pool) {
153 |         // first move any new-objects into the mature heap by copying and unsetting the new-object
154 |         // flag in the roots
155 |         for (ptr, meta) in self.roots.iter_mut() {
156 |             if !meta.unsync_is_unrooted() && meta.is_new() {
157 |                 // object must have a positive reference count and be marked as new-object to be
158 |                 // moved to the mature set
159 |                 self.mature.add_object(ptr, meta.vtable());
160 |                 // unset the new-object bit. This object will now be treated as a simple reference
161 |                 // counted root and won't be dropped from here.
162 |                 meta.set_not_new();
163 |             }
164 |         }
165 | 
166 |         let (heap_size, drop_count) = self.mature.collect(pool, &mut self.roots);
167 | 
168 |         self.logger.current_heap_size(heap_size);
169 |         self.logger.add_dropped(drop_count);
170 |     }
171 | 
172 |     /// Use >0 refcount objects and 0-refcount non-new objects to mark new objects
173 |     fn mark(&mut self, pool: &mut Pool) {
174 | 
175 |         let shared_objects = self.roots.borrow_sync();
176 |         let sharded_objects = shared_objects.borrow_sharded(self.num_threads);
177 | 
178 |         pool.scoped(|scope| {
179 | 
180 |             for shard in sharded_objects.iter() {
181 |                 let objects = shared_objects.clone();
182 |                 // here there is a shard of the heap and a shared reference to the whole
183 |                 // heap (objects) for each thread
184 | 
185 |                 scope.execute(move || {
186 |                     let mut stack = TraceStack::new();
187 | 
188 |                     for (root_ptr, root_meta) in shard.iter() {
189 |                         if !root_meta.unsync_is_unrooted() || !root_meta.is_new() {
190 |                             // read the shard to find roots, which are non-zero-refcount
191 |                             // entries. Also consider non-new entries as possible roots of new
192 |                             // objects: this is our equivalent of searching a card table
193 | 
194 |                             if root_meta.mark_and_needs_trace() {
195 |                                 // mark the root, and if it needs tracing then look into it
196 |                                 let obj = Object::from_trie_ptr(root_ptr, root_meta.vtable());
197 | 
198 |                                 let object = obj.as_trace();
199 |                                 unsafe { object.trace(&mut stack) };
200 | 
201 |                                 // now there may be some child objects on the trace stack: pull
202 |                                 // them off and mark them too
203 |                                 while let Some(obj) = stack.pop() {
204 | 
205 |                                     let ptr = obj.ptr >> ptr_shift();
206 |                                     if let Some(meta) = objects.get(ptr) {
207 | 
208 |                                         if meta.mark_and_needs_trace() {
209 |                                             let object = obj.as_trace();
210 |                                             unsafe { object.trace(&mut stack) };
211 |                                         }
212 |                                     }
213 |                                 }
214 |                             }
215 |                         }
216 |                     }
217 |                 });
218 |             }
219 |         });
220 |     }
221 | 
222 |     /// Drop unmarked new objects and remove unrooted objects.
223 |     /// Returns tuple (young_object_count, dropped_count)
224 |     fn sweep(&mut self, pool: &mut Pool) -> (usize, usize) {
225 |         // set counters
226 |         let collect_young_count= Arc::new(AtomicUsize::new(0));
227 |         let collect_drop_count = Arc::new(AtomicUsize::new(0));
228 | 
229 |         let mut split_objects = self.roots.borrow_sharded(self.num_threads);
230 | 
231 |         pool.scoped(|scope| {
232 | 
233 |             for mut node in split_objects.drain() {
234 | 
235 |                 // pass a reference to each counter to each thread
236 |                 let young_count = collect_young_count.clone();
237 |                 let drop_count = collect_drop_count.clone();
238 | 
239 |                 scope.execute(move || {
240 | 
241 |                     let mut young_counter = 0;
242 |                     let mut drop_counter = 0;
243 | 
244 |                     node.retain_if(|ptr, meta| {
245 | 
246 |                         if meta.is_new_and_unmarked() {
247 |                             drop_counter += 1;
248 | 
249 |                             // unmarked new-object (implies zero-refcount)
250 |                             let obj = Object::from_trie_ptr(ptr, meta.vtable);
251 |                             let tobj: TraitObject = Object::into(obj);
252 | 
253 |                             unsafe {
254 |                                 let fatptr: *mut Trace = transmute(tobj);
255 |                                 let owned = Box::from_raw(fatptr);
256 |                                 drop(owned);
257 |                             }
258 | 
259 |                             false
260 | 
261 |                         } else if !meta.is_new() && meta.unsync_is_unrooted() {
262 |                             false
263 | 
264 |                         } else {
265 |                             if meta.is_new() {
266 |                                 young_counter += 1;
267 |                             }
268 | 
269 |                             meta.unmark();
270 |                             true
271 |                         }
272 |                     });
273 | 
274 |                     // write out the counters
275 |                     young_count.fetch_add(young_counter, Ordering::SeqCst);
276 |                     drop_count.fetch_add(drop_counter, Ordering::SeqCst);
277 |                 });
278 |             }
279 |         });
280 | 
281 |         // return the counters
282 |         (collect_young_count.load(Ordering::Acquire),
283 |          collect_drop_count.load(Ordering::Acquire))
284 |     }
285 | 
286 |     /// Move the deferred refcount decrements into the root set's reference counts.
287 |     fn merge_deferred(&mut self, pool: &mut Pool) {
288 |         let chunk_size = max(1, self.deferred.len() / self.num_threads);
289 | 
290 |         {
291 |             let shared_roots = self.roots.borrow_sync();
292 |             let chunks = self.deferred.chunks(chunk_size);
293 | 
294 |             pool.scoped(|scope| {
295 | 
296 |                 for chunk in chunks {
297 | 
298 |                     let roots = shared_roots.clone();
299 | 
300 |                     scope.execute(move || {
301 |                         for object in chunk {
302 |                             let ptr = object.ptr >> ptr_shift();
303 | 
304 |                             if let Some(ref mut meta) = roots.get(ptr) {
305 |                                 // this is the only place where the reference count needs to
306 |                                 // be thread-safely adjusted
307 |                                 meta.dec();
308 |                             } else {
309 |                                 // there should never be something in the deferred buffer that
310 |                                 // isn't in the heap
311 |                                 unreachable!();
312 |                             }
313 |                         }
314 |                     });
315 |                 }
316 |             });
317 |         }
318 | 
319 |         self.deferred.clear();
320 |     }
321 | 
322 |     /// Return a reference to the logger
323 |     pub fn logger(&mut self) -> &mut S {
324 |         &mut self.logger
325 |     }
326 | 
327 |     /// Call to return the logger on shutdown
328 |     pub fn shutdown(self) -> S {
329 |         self.logger
330 |     }
331 | }
332 | 


--------------------------------------------------------------------------------