├── .github ├── FUNDING.yml └── workflows │ └── ci.yml ├── .gitignore ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── autoref-specialization └── README.md ├── bitfield-assertion ├── README.md └── demo │ ├── Cargo.toml │ ├── bitfield │ ├── Cargo.toml │ └── src │ │ └── lib.rs │ ├── impl │ ├── Cargo.toml │ └── src │ │ └── lib.rs │ └── main.rs ├── callable-types ├── README.md └── demo │ ├── Cargo.toml │ └── main.rs ├── function-epilogue ├── README.md └── demo │ ├── Cargo.toml │ └── main.rs ├── integer-match ├── README.md └── demo │ ├── Cargo.toml │ └── main.rs ├── readonly-fields ├── README.md └── demo │ ├── Cargo.toml │ └── main.rs └── unit-type-parameters ├── README.md └── demo ├── Cargo.toml └── main.rs /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: dtolnay 2 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | pull_request: 6 | workflow_dispatch: 7 | schedule: [cron: "40 1 * * *"] 8 | 9 | permissions: 10 | contents: read 11 | 12 | env: 13 | RUSTFLAGS: -Dwarnings 14 | 15 | jobs: 16 | pre_ci: 17 | uses: dtolnay/.github/.github/workflows/pre_ci.yml@master 18 | 19 | check: 20 | name: Rust ${{matrix.rust}} 21 | needs: pre_ci 22 | if: needs.pre_ci.outputs.continue 23 | runs-on: ubuntu-latest 24 | strategy: 25 | fail-fast: false 26 | matrix: 27 | rust: [nightly, beta, stable] 28 | timeout-minutes: 45 29 | steps: 30 | - uses: actions/checkout@v4 31 | - uses: dtolnay/rust-toolchain@master 32 | with: 33 | toolchain: ${{matrix.rust}} 34 | - run: cargo check --workspace --exclude case-study-bitfield-assertion --exclude case-study-readonly-fields 35 | - uses: actions/upload-artifact@v4 36 | if: matrix.rust == 'nightly' && always() 37 | with: 38 | name: Cargo.lock 39 | path: Cargo.lock 40 | continue-on-error: true 41 | 42 | outdated: 43 | name: Outdated 44 | runs-on: ubuntu-latest 45 | if: github.event_name != 'pull_request' 46 | timeout-minutes: 45 47 | steps: 48 | - uses: actions/checkout@v4 49 | - uses: dtolnay/rust-toolchain@stable 50 | - uses: dtolnay/install@cargo-outdated 51 | - run: cargo outdated --workspace --exit-code 1 52 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Cargo.lock 2 | target/ 3 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | "bitfield-assertion/demo", 4 | "bitfield-assertion/demo/bitfield", 5 | "bitfield-assertion/demo/impl", 6 | "callable-types/demo", 7 | "function-epilogue/demo", 8 | "integer-match/demo", 9 | "readonly-fields/demo", 10 | "unit-type-parameters/demo", 11 | ] 12 | resolver = "2" 13 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Permission is hereby granted, free of charge, to any 2 | person obtaining a copy of this software and associated 3 | documentation files (the "Software"), to deal in the 4 | Software without restriction, including without 5 | limitation the rights to use, copy, modify, merge, 6 | publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software 8 | is furnished to do so, subject to the following 9 | conditions: 10 | 11 | The above copyright notice and this permission notice 12 | shall be included in all copies or substantial portions 13 | of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Case studies 2 | 3 | This repository showcases some examples of tricky Rust code that I have 4 | encountered during my years working with a variety of advanced macro libraries 5 | in Rust (my own and others'). 6 | 7 |
8 | 9 | 10 | # [:postal\_horn:](#the-point) The point 11 | 12 | This project is dedicated to the one profound insight about Rust macro 13 | development: the difference between someone who is competent with macros vs an 14 | expert at macros mostly has nothing to do with how good they are "at macros". 15 | 16 | 90% of what enables people to push the limits of possibility in pursuit of a 17 | powerful and user-friendly macro library API is in their mastery of everything 18 | else about Rust outside of macros, and their creativity to put together ordinary 19 | language features in interesting ways that may not occur in handwritten code. 20 | 21 | You may occasionally come across Rust macros that you feel are really advanced 22 | or magical. If you ever feel this way, I encourage you to take a closer look and 23 | you'll discover that as far as the macro implementation itself is concerned, 24 | none of those libraries are doing anything remotely interesting. If it is a 25 | procedural macro, they always just parse some input in a boring way, crawl some 26 | syntax trees in a boring way to find out about the input, and paste together 27 | some output code in a boring way exactly like what you would learn in a few 28 | hours by working through any part of my [procedural macro workshop][workshop]. 29 | If it is a macro\_rules macro, everything is conceptually just as boring but 30 | when stretched to its limits it becomes a write-only syntax that poses a 31 | challenge for even the author to follow and understand later, let alone someone 32 | else not already fluent in the basics of macro\_rules. 33 | 34 | To the extent that there are any tricks to macro development, all of them 35 | revolve around *what* code the macros emit, not *how* the macros emit the code. 36 | This realization can be surprising to people who entered into macro development 37 | with a vague notion of procedural macros as a "compiler plugin" which they 38 | imagine must imply all sorts of complicated APIs for *how* to integrate with the 39 | rest of the compiler. That's not how it works. The only thing macros do is emit 40 | code that could have been written by hand. If you couldn't have come up with 41 | some piece of tricky code from one of those magical macros, learning more "about 42 | macros" won't change that; but learning more about every other part of Rust 43 | will. Inversely, once you come up with what code you want to generate, writing 44 | the macro to generate it is generally the easy part. 45 | 46 | [workshop]: https://github.com/dtolnay/proc-macro-workshop 47 | 48 |
49 | 50 | 51 | # [:boot:](#focus) Focus 52 | 53 | Yes, these case studies are drawn from use cases that arise from work on macros, 54 | but the macros are never the interesting part. The ingenuity and sophistication 55 | always lie in what Rust code ultimately gets emitted by the macro, and I think 56 | you will find that those are fully possible to appreciate even if you know 57 | nothing about macros. 58 | 59 | To that end, I make an effort to minimize the role of macros in these case 60 | studies. For each one I give only enough context about the relevant macro to 61 | explain a set of constraints that the generated code will need to comply with. 62 | The focus is on the generated code, which somehow solves the constraints using a 63 | clever combination of Rust language features unrelated to macros. Lastly and 64 | least importantly, I tie it back to the macro to point out that making a macro 65 | produce the generated code we came up with would be the easy part. 66 | 67 | Read and enjoy; I hope you find these an enlightening window into this corner of 68 | Rust that has so far not been put into words. 69 | 70 |
71 | 72 | 73 | # [:jack\_o\_lantern:](#case-studies) Case studies 74 | 75 |
76 | 77 | Function epilogue
78 | Topics: borrow checker, no_std, closures, lifetime elision
79 |
80 | 81 |
82 | 83 | Multiple of 8 const assertion
84 | Topics: diagnostics, name resolution, const evaluation, traits
85 |
86 | 87 |
88 | 89 | Unit struct with type parameters
90 | Topics: namespaces, glob imports, layout optimization, autotraits, documentation
91 |
92 | 93 |
94 | 95 | Read-only fields of mutable struct
96 | Topics: deref coercion, borrow checker, repr, unsafe code, documentation
97 |
98 | 99 |
100 | 101 | Consecutive integer match patterns
102 | Topics: macro_rules, const
103 |
104 | 105 |
106 | 107 | User-defined callable types
108 | Topics: deref coercion, closures, trait objects, repr, unsafe code
109 |
110 | 111 |
112 | 113 | Autoref-based stable specialization
114 | Topics: traits, method resolution
115 |
116 | 117 |
118 | 119 | #### License 120 | 121 | 122 | Licensed under either of Apache License, Version 123 | 2.0 or MIT license at your option. 124 | 125 | 126 |
127 | 128 | 129 | Unless you explicitly state otherwise, any contribution intentionally submitted 130 | for inclusion in this project by you, as defined in the Apache-2.0 license, 131 | shall be dual licensed as above, without any additional terms or conditions. 132 | 133 | -------------------------------------------------------------------------------- /autoref-specialization/README.md: -------------------------------------------------------------------------------- 1 | ## Autoref-based stable specialization 2 | 3 | "Specialization" refers to permitting overlapping impls in Rust's trait system 4 | so long as for every possible type, one of the applicable impls is "more 5 | specific" than the others for some intuitive but precisely defined notion of 6 | specific. Discussions about a specialization language feature have been ongoing 7 | for 4.5 years ([RFC 1210], [rust-lang/rust#31844]). Today the feature is 8 | partially implemented in rustc but is not yet sound when mixed with lifetimes 9 | ([rust-lang/rust#40582]) and requires more language design work and compiler 10 | work before it could be stabilized. 11 | 12 | [RFC 1210]: https://github.com/rust-lang/rfcs/pull/1210 13 | [rust-lang/rust#31844]: https://github.com/rust-lang/rust/issues/31844 14 | [rust-lang/rust#40582]: https://github.com/rust-lang/rust/issues/40582 15 | 16 | This page covers a stable, safe, generalizable technique for solving some of the 17 | use cases that would otherwise be blocked on specialization. 18 | 19 | The technique was originally developed for use by macros in the [Anyhow] crate. 20 | 21 | [Anyhow]: https://github.com/dtolnay/anyhow 22 | 23 |
24 | 25 | ### Context 26 | 27 | I'll explain the technique as applied to two use cases, one simpler to start 28 | with and then a more elaborate realistic one. 29 | 30 | The first use case is going to be a truly canonical application of 31 | specialization — a blanket impl with a separate fast path for some 32 | concrete type(s). The equivalent nightly-only specialized blanket impl would be 33 | like this: 34 | 35 | ```rust 36 | #![feature(specialization)] 37 | 38 | use std::fmt::{Display, Write}; 39 | 40 | pub trait MyToString { 41 | fn my_to_string(&self) -> String; 42 | } 43 | 44 | // General impl that applies to any T with a Display impl. 45 | impl MyToString for T { 46 | default fn my_to_string(&self) -> String { 47 | let mut buf = String::new(); 48 | buf.write_fmt(format_args!("{}", self)).unwrap(); 49 | buf.shrink_to_fit(); 50 | buf 51 | } 52 | } 53 | 54 | // Specialized impl to bypass the relatively expensive std::fmt machinery. 55 | impl MyToString for String { 56 | fn my_to_string(&self) -> String { 57 | self.clone() 58 | } 59 | } 60 | ``` 61 | 62 | Then the second use case will be closer to the real-life usage of this technique 63 | in Anyhow. We have an error type, and we want it to be constructible from any 64 | underlying type that has a `Display` impl. But if the underlying type *also* has 65 | a `std::error::Error` impl, we'd like to know about that by invoking a different 66 | constructor which will propagate the original error's source() and backtrace() 67 | information correctly. 68 | 69 | Ultimately we want both of the following to compile: 70 | 71 | ```rust 72 | fn demo1() -> Result<(), anyhow::Error> { 73 | // Turn a &str into an error. 74 | // &str implements Display but not std::error::Error. 75 | return Err(anyhow!("oh no!")); 76 | } 77 | 78 | fn demo2() -> Result<(), anyhow::Error> { 79 | // Turn an existing std::error::Error value into our error without 80 | // losing its source() and backtrace() if there is one. 81 | let io_error = fs::read("/tmp/nonexist").unwrap_err(); 82 | return Err(anyhow!(io_error)); 83 | } 84 | ``` 85 | 86 | Recall that `std::error::Error` has `Display` as a supertrait so the impl for 87 | `std::error::Error` is strictly more specific than the general impl that covers 88 | all `Display` types. 89 | 90 | ```rust 91 | #![feature(specialization)] 92 | 93 | use std::error::Error as StdError; 94 | use std::fmt::Display; 95 | 96 | pub struct Error(/* ... */); 97 | 98 | impl Error { 99 | pub(crate) fn from_fmt(error: T) -> Self {...} 100 | pub(crate) fn from_std_error(error: T) -> Self {...} 101 | } 102 | 103 | pub(crate) trait AnyhowNew { 104 | fn new(self) -> Error; 105 | } 106 | 107 | impl AnyhowNew for T { 108 | default fn new(self) -> Error { 109 | // no std error impl 110 | Error::from_fmt(self) 111 | } 112 | } 113 | 114 | impl AnyhowNew for T { 115 | fn new(self) -> Error { 116 | // able to use std error's source() and backtrace() 117 | Error::from_std_error(self) 118 | } 119 | } 120 | ``` 121 | 122 |
123 | 124 | ### Background: autoref 125 | 126 | To do specialization using only 100% stable and 100% safe code, we'll need some 127 | other mechanism to accomplish compile-time fallback through a prioritized 128 | sequence of behaviors. That is, we need some way to define a general impl and a 129 | tree of more specific impls where any invocation will resolve to the most 130 | specific applicable impl at compile time. 131 | 132 | Outside of `feature(specialization)`, Rust has at least one other language 133 | feature capable of doing this, which is method resolution autoref. 134 | 135 | As an introduction to autoref let's consider this program: 136 | 137 | ```rust 138 | struct Value(i32); 139 | 140 | impl Value { 141 | fn print(&self) { 142 | println!("it worked! {}", self.0); 143 | } 144 | } 145 | 146 | fn main() { 147 | let v = Value(0); 148 | v.print(); 149 | } 150 | ``` 151 | 152 | We make a variable `v` of type `Value` and call a method on it. If you've 153 | written any Rust code it will be obvious to you *that* this code works, but I'd 154 | like to dig into *why* it works. In particular, we have a value of type `Value` 155 | but the method `print` takes an argument of type `&Value`. Where is the code 156 | that turns `Value` into `&Value`? 157 | 158 | This is autoref — the compiler is inserting the required reference for you 159 | as part of resolving the method call. In effect, the code that executes is 160 | equivalent to if we had written `(&v).print()` or more explicitly 161 | `Value::print(&v)`, but it is "auto" because we never had to write `&` in the 162 | call. 163 | 164 | Note: autoref is not the same as deref, which is a different thing that method 165 | resolution does. In a way they are opposites; autoref is about *adding* a layer 166 | of reference to resolve a call; deref is about *removing* a layer of reference. 167 | Both are ubiquitous but invisible. 168 | 169 |
170 | 171 | ### Background: method resolution 172 | 173 | How does autoref get us stable specialization? To answer that, let's look at 174 | what happens if the same method name could be dispatched either with or without 175 | autoref. 176 | 177 | ```rust 178 | struct Value; 179 | 180 | trait Print { 181 | fn print(self); 182 | } 183 | 184 | impl Print for Value { 185 | fn print(self) { 186 | println!("called on Value"); 187 | } 188 | } 189 | 190 | impl Print for &Value { 191 | fn print(self) { 192 | println!("called on &Value"); 193 | } 194 | } 195 | 196 | fn main() { 197 | let v = Value; 198 | v.print(); 199 | } 200 | ``` 201 | 202 | Here `print` could refer to either `::print` which takes an 203 | argument of type `Value`, or to `<&Value as Print>::print` which takes an 204 | argument of type `&Value`. If you run this program you'll see it prints "called 205 | on Value". But if the first impl were removed, it would then print "called on 206 | &Value". In some sense the first impl is more specific from the point of 207 | view of the call we wrote; exactly what we'll need! 208 | 209 | To define the compiler's behavior more precisely, the rule is that if a method 210 | can be dispatched without autoref then it will be. Only if a method cannot be 211 | dispatched without autoref, the compiler will insert an autoref and attempt to 212 | resolve it again. 213 | 214 | This and some creativity should be all we need to solve the use cases that we 215 | saw up top. 216 | 217 |
218 | 219 | ### Simple application 220 | 221 | Recall that we have a String conversion that we wanted to implement in one way 222 | for any `T: Display` and in a more performant specialized way for specifically 223 | `String`. 224 | 225 | Here is the full implementation: 226 | 227 | ```rust 228 | use std::fmt::{Display, Write}; 229 | 230 | pub trait DisplayToString { 231 | fn my_to_string(&self) -> String; 232 | } 233 | 234 | // General impl that applies to any T with a Display impl. 235 | // 236 | // Note that the Self type of this impl is &T and so the method argument 237 | // is actually &&T! That makes this impl lower priority during method 238 | // resolution if the impl that accepts &String would also apply. 239 | impl DisplayToString for &T { 240 | fn my_to_string(&self) -> String { 241 | println!("called blanket impl"); 242 | 243 | let mut buf = String::new(); 244 | buf.write_fmt(format_args!("{}", self)).unwrap(); 245 | buf.shrink_to_fit(); 246 | buf 247 | } 248 | } 249 | 250 | pub trait StringToString { 251 | fn my_to_string(&self) -> String; 252 | } 253 | 254 | // Specialized impl to bypass the relatively expensive std::fmt machinery. 255 | // 256 | // The method argument is typed &String. 257 | impl StringToString for String { 258 | fn my_to_string(&self) -> String { 259 | println!("called specialized impl"); 260 | 261 | self.clone() 262 | } 263 | } 264 | 265 | macro_rules! convert_to_strings { 266 | ($($e:expr),*) => { 267 | [$( 268 | (&$e).my_to_string() 269 | ),*] 270 | }; 271 | } 272 | 273 | fn main() { 274 | let owned_string = "hacks".to_owned(); 275 | let strings = convert_to_strings![1, "&str", owned_string]; 276 | println!("{:?}", strings); 277 | } 278 | ``` 279 | 280 | If we run this program the output shows that our specialization works! 281 | 282 | ```console 283 | called blanket impl 284 | called blanket impl 285 | called specialized impl 286 | ["1", "&str", "hacks"] 287 | ``` 288 | 289 |
290 | 291 | ### Realistic application 292 | 293 | Recall that we have an Error type that we'd like to construct from any `T` that 294 | implements `Display`, but using a different constructor if `T` also implements 295 | `std::error::Error`. 296 | 297 | The reason this is more complicated than the previous use case is that my Error 298 | constructors want to receive the argument *by value*! That's bad news if we are 299 | relying on autoref because autoref is all about inserting a layer of reference. 300 | 301 | Instead we'll use a tagged dispatch strategy with a pair of method calls, the 302 | first using autoref-based specialization with a reference argument to select a 303 | tag, and the second based on that tag which takes ownership of the original 304 | argument. 305 | 306 | ```rust 307 | use std::error::Error as StdError; 308 | use std::fmt::Display; 309 | 310 | pub struct Error(/* ... */); 311 | 312 | // Our two constructors. The first is more general. 313 | impl Error { 314 | pub(crate) fn from_fmt(error: T) -> Self { 315 | println!("called Error::from_fmt"); 316 | Error {} 317 | } 318 | pub(crate) fn from_std_error(error: T) -> Self { 319 | _ = error.source(); // it works! 320 | println!("called Error::from_std_error"); 321 | Error {} 322 | } 323 | } 324 | 325 | macro_rules! anyhow { 326 | ($err:expr) => ({ 327 | #[allow(unused_imports)] 328 | use $crate::{DisplayKind, StdErrorKind}; 329 | match $err { 330 | error => (&error).anyhow_kind().new(error), 331 | } 332 | }); 333 | } 334 | 335 | // If the arg implements Display but not StdError, anyhow_kind() will 336 | // return this tag. 337 | struct DisplayTag; 338 | 339 | trait DisplayKind { 340 | #[inline] 341 | fn anyhow_kind(&self) -> DisplayTag { 342 | DisplayTag 343 | } 344 | } 345 | 346 | // Requires one extra autoref to call! Lower priority than StdErrorKind. 347 | impl DisplayKind for &T {} 348 | 349 | impl DisplayTag { 350 | #[inline] 351 | fn new(self, message: M) -> Error { 352 | Error::from_fmt(message) 353 | } 354 | } 355 | 356 | // If the arg implements StdError (and thus also Display), anyhow_kind() 357 | // will return this tag. 358 | struct StdErrorTag; 359 | 360 | trait StdErrorKind { 361 | #[inline] 362 | fn anyhow_kind(&self) -> StdErrorTag { 363 | StdErrorTag 364 | } 365 | } 366 | 367 | // Does not require any autoref if called as (&error).anyhow_kind(). 368 | impl StdErrorKind for T {} 369 | 370 | impl StdErrorTag { 371 | #[inline] 372 | fn new(self, error: E) -> Error { 373 | Error::from_std_error(error) 374 | } 375 | } 376 | 377 | fn main() { 378 | // Turn a &str into an error. 379 | // &str implements Display but not std::error::Error. 380 | let _err = anyhow!("oh no!"); 381 | 382 | // Turn an existing std::error::Error value into our error without 383 | // losing its source() and backtrace() if there is one. 384 | let io_error = std::fs::read("/tmp/nonexist").unwrap_err(); 385 | let _err = anyhow!(io_error); 386 | } 387 | ``` 388 | 389 |
390 | 391 | ### Limitations 392 | 393 | The way that this technique applies method resolution cannot be described by a 394 | trait bound, so for practical purposes you should think of this technique as 395 | working in macros only. 396 | 397 | That is, we can't do: 398 | 399 | ```rust 400 | pub fn demo(value: T) -> String { 401 | (&value).my_to_string() 402 | } 403 | ``` 404 | 405 | and get the specialized behavior. If we put `T: Display` in the trait bound, 406 | method resolution will use the impl for `T: Display` even if `T` happened to be 407 | instantiated as `String`. 408 | 409 | Depending on your use case, this is honestly fine! If you are a macro already 410 | then you're all set. If you can be made a macro, that's good too (like I did for 411 | `anyhow!` (though it was good for that to be a macro anyway so that it can 412 | accept format args the way println does)). If you can't possibly be a macro then 413 | this won't help you. 414 | 415 | I am excited to hear other people's experience applying this technique and I 416 | expect it to generalize quite well. 417 | -------------------------------------------------------------------------------- /bitfield-assertion/README.md: -------------------------------------------------------------------------------- 1 | ## Multiple of 8 const assertion 2 | 3 | We need a macro that will fail to compile if some expression is not a multiple 4 | of 8, without knowing the value of the expression until after name resolution 5 | which happens after macro expansion. 6 | 7 | This came up in the context of bitfields where sizes of fields are specified in 8 | bits but the application would like to require that the total size is an exact 9 | number of bytes. 10 | 11 | ```rust 12 | trait Field { 13 | const BITS: usize; 14 | } 15 | 16 | enum B3 {} 17 | impl Field for B3 { 18 | const BITS: usize = 3; 19 | } 20 | 21 | enum B5 {} 22 | impl Field for B5 { 23 | const BITS: usize = 5; 24 | } 25 | 26 | fn main() { 27 | require_multiple_of_eight!(B3::BITS + B5::BITS); 28 | } 29 | ``` 30 | 31 | As always, we would like the error message to be as precise and useful as 32 | possible even though in this case the macro does not control the exact message 33 | because this error can only be detected after name resolution. 34 | 35 |
36 | 37 | ### First attempt 38 | 39 | The two main ways a macro can trigger compile-time errors after macro expansion 40 | are in const evaluation and in type checking. 41 | 42 | Let's look at const evaluation first by writing a `const` that can be 43 | successfully computed if and only if the input expression is a multiple of 8. 44 | There are many ways to do this but one way is to use `$e % 8` as an index into 45 | an array where the only legal index would be 0. 46 | 47 | ```rust 48 | macro_rules! require_multiple_of_eight { 49 | ($e:expr) => { 50 | const REQUIRE_MULTIPLE_OF_EIGHT: () = [()][$e % 8]; 51 | _ = REQUIRE_MULTIPLE_OF_EIGHT; 52 | }; 53 | } 54 | ``` 55 | 56 | This seems like it should get the job done but it doesn't quite. There are some 57 | weird optimizations around const evaluation. In particular a `cargo check` would 58 | not need to evaluate this constant. It does a simple type check only which 59 | determines that *if* the constant does evaluate successfully then its type would 60 | be `()` which matches the declared type so everything is okay. On the other hand 61 | `cargo build` does need to perform the evaluation. We end up in a situation 62 | where `cargo check` can succeed at the same time as `cargo build` fails, which 63 | is not good. 64 | 65 | Separately, this approach does not give us any opportunity to control the 66 | message part of the error. If the same macro needed to evaluate multiple 67 | assertions, the caller couldn't tell which one was failing. 68 | 69 | The message looks like: 70 | 71 | ```console 72 | error[E0080]: erroneous constant used 73 | --> src/main.rs:8:10 74 | | 75 | 8 | #[derive(Bitfield)] 76 | | ^^^^^^^^ referenced constant has errors 77 | ``` 78 | 79 |
80 | 81 | ### Second attempt 82 | 83 | Let's use `$e` to produce something that only type checks if the given 84 | expression is a multiple of 8. 85 | 86 | Currently the only place that expressions can appear in the type grammar is in 87 | the length of a fixed sized array, so we will rely on that. 88 | 89 | ```rust 90 | macro_rules! require_multiple_of_eight { 91 | ($e:expr) => { 92 | _ = <[(); $e % 8] as $crate::MultipleOfEight>::check(); 93 | }; 94 | } 95 | 96 | trait MultipleOfEight { 97 | fn check() {} 98 | } 99 | 100 | impl MultipleOfEight for [(); 0] {} 101 | ``` 102 | 103 | This is pretty good! The array type `[(); $e % 8]` only implements the required 104 | trait if `$e % 8` is zero. The trait solver's error message mentions 105 | "MultipleOfEight" which adequately indicates to the user what went wrong. 106 | 107 | ```console 108 | error[E0277]: the trait bound `[(); 6]: MultipleOfEight` is not satisfied 109 | --> src/main.rs:8:10 110 | | 111 | 8 | #[derive(Bitfield)] 112 | | ^^^^^^^^ the trait `MultipleOfEight` is not implemented for `[(); 6]` 113 | | 114 | = help: the following implementations were found: 115 | <[(); 0] as MultipleOfEight> 116 | = note: required by `MultipleOfEight::check` 117 | ``` 118 | 119 | There are some things to improve upon though. The error message includes this 120 | distracting array type `[(); 6]` that is not obviously related to what the 121 | caller might have written. Also the note mentioning the method 122 | `MultipleOfEight::check` is just noise as far as the caller would be concerned. 123 | 124 |
125 | 126 | ### Solution 127 | 128 | Let's solve this without a method call and without the array type being the 129 | thing with a missing trait impl. 130 | 131 | ```rust 132 | macro_rules! require_multiple_of_eight { 133 | ($e:expr) => { 134 | let _: $crate::MultipleOfEight<[(); $e % 8]>; 135 | }; 136 | } 137 | 138 | type MultipleOfEight = <::Marker as TotalSizeIsMultipleOfEightBits>::Check; 139 | 140 | enum ZeroMod8 {} 141 | enum OneMod8 {} 142 | enum TwoMod8 {} 143 | enum ThreeMod8 {} 144 | enum FourMod8 {} 145 | enum FiveMod8 {} 146 | enum SixMod8 {} 147 | enum SevenMod8 {} 148 | 149 | trait Array { 150 | type Marker; 151 | } 152 | 153 | impl Array for [(); 0] { 154 | type Marker = ZeroMod8; 155 | } 156 | 157 | impl Array for [(); 1] { 158 | type Marker = OneMod8; 159 | } 160 | 161 | impl Array for [(); 2] { 162 | type Marker = TwoMod8; 163 | } 164 | 165 | impl Array for [(); 3] { 166 | type Marker = ThreeMod8; 167 | } 168 | 169 | impl Array for [(); 4] { 170 | type Marker = FourMod8; 171 | } 172 | 173 | impl Array for [(); 5] { 174 | type Marker = FiveMod8; 175 | } 176 | 177 | impl Array for [(); 6] { 178 | type Marker = SixMod8; 179 | } 180 | 181 | impl Array for [(); 7] { 182 | type Marker = SevenMod8; 183 | } 184 | 185 | trait TotalSizeIsMultipleOfEightBits { 186 | type Check; 187 | } 188 | 189 | impl TotalSizeIsMultipleOfEightBits for ZeroMod8 { 190 | type Check = (); 191 | } 192 | ``` 193 | 194 | In this code the `::Marker` always resolves to one of `ZeroMod8` 195 | through `SevenMod8`. But then only `ZeroMod8` implements 196 | `TotalSizeIsMultipleOfEightBits`. 197 | 198 | Here is the error message, pretty helpful and free of the distractions from the 199 | second attempt. 200 | 201 | ```console 202 | error[E0277]: the trait bound `SixMod8: TotalSizeIsMultipleOfEightBits` is not satisfied 203 | --> src/main.rs:8:10 204 | | 205 | 8 | #[derive(Bitfield)] 206 | | ^^^^^^^^ the trait `TotalSizeIsMultipleOfEightBits` is not implemented for `SixMod8` 207 | ``` 208 | 209 |
210 | 211 | ### Future 212 | 213 | Someone should write an RFC for const\_assert. Something like: 214 | 215 | ```rust 216 | const_assert!($e % 8 == 0, "total size is required to be a multiple of 8 bits"); 217 | ``` 218 | 219 | Having this provided by the compiler would let us give better error messages 220 | with less effort than the solution above. 221 | -------------------------------------------------------------------------------- /bitfield-assertion/demo/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "case-study-bitfield-assertion" 3 | version = "0.0.0" 4 | authors = ["David Tolnay "] 5 | edition = "2021" 6 | publish = false 7 | 8 | [[bin]] 9 | name = "case-study" 10 | path = "main.rs" 11 | 12 | [dependencies] 13 | bitfield = { path = "bitfield" } 14 | -------------------------------------------------------------------------------- /bitfield-assertion/demo/bitfield/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "bitfield" 3 | version = "0.0.0" 4 | authors = ["David Tolnay "] 5 | edition = "2021" 6 | publish = false 7 | 8 | [dependencies] 9 | bitfield-impl = { path = "../impl" } 10 | -------------------------------------------------------------------------------- /bitfield-assertion/demo/bitfield/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub use bitfield_impl::bitfield; 2 | 3 | pub trait Field { 4 | const BITS: usize; 5 | } 6 | 7 | bitfield_impl::generate_specifiers!(); 8 | 9 | pub type MultipleOfEight = <::Marker as TotalSizeIsMultipleOfEightBits>::Check; 10 | 11 | pub enum ZeroMod8 {} 12 | pub enum OneMod8 {} 13 | pub enum TwoMod8 {} 14 | pub enum ThreeMod8 {} 15 | pub enum FourMod8 {} 16 | pub enum FiveMod8 {} 17 | pub enum SixMod8 {} 18 | pub enum SevenMod8 {} 19 | 20 | pub trait Array { 21 | type Marker; 22 | } 23 | 24 | impl Array for [(); 0] { 25 | type Marker = ZeroMod8; 26 | } 27 | 28 | impl Array for [(); 1] { 29 | type Marker = OneMod8; 30 | } 31 | 32 | impl Array for [(); 2] { 33 | type Marker = TwoMod8; 34 | } 35 | 36 | impl Array for [(); 3] { 37 | type Marker = ThreeMod8; 38 | } 39 | 40 | impl Array for [(); 4] { 41 | type Marker = FourMod8; 42 | } 43 | 44 | impl Array for [(); 5] { 45 | type Marker = FiveMod8; 46 | } 47 | 48 | impl Array for [(); 6] { 49 | type Marker = SixMod8; 50 | } 51 | 52 | impl Array for [(); 7] { 53 | type Marker = SevenMod8; 54 | } 55 | 56 | pub trait TotalSizeIsMultipleOfEightBits { 57 | type Check; 58 | } 59 | 60 | impl TotalSizeIsMultipleOfEightBits for ZeroMod8 { 61 | type Check = (); 62 | } 63 | -------------------------------------------------------------------------------- /bitfield-assertion/demo/impl/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "bitfield-impl" 3 | version = "0.0.0" 4 | authors = ["David Tolnay "] 5 | edition = "2021" 6 | publish = false 7 | 8 | [lib] 9 | proc-macro = true 10 | 11 | [dependencies] 12 | quote = "1.0" 13 | syn = "2.0" 14 | -------------------------------------------------------------------------------- /bitfield-assertion/demo/impl/src/lib.rs: -------------------------------------------------------------------------------- 1 | use proc_macro::TokenStream; 2 | use quote::{format_ident, quote}; 3 | use syn::{parse_macro_input, Data, DeriveInput}; 4 | 5 | #[proc_macro_attribute] 6 | pub fn bitfield(_args: TokenStream, input: TokenStream) -> TokenStream { 7 | let input = parse_macro_input!(input as DeriveInput); 8 | 9 | let fields = match &input.data { 10 | Data::Struct(data) => data.fields.iter().map(|field| &field.ty), 11 | _ => unimplemented!(), 12 | }; 13 | 14 | TokenStream::from(quote! { 15 | fn __bitfield() { 16 | let _: bitfield::MultipleOfEight< 17 | [(); (0 #(+ <#fields as bitfield::Field>::BITS)*) % 8] 18 | >; 19 | } 20 | }) 21 | } 22 | 23 | #[proc_macro] 24 | pub fn generate_specifiers(_input: TokenStream) -> TokenStream { 25 | (0usize..=64usize) 26 | .map(|width| { 27 | let name = format_ident!("B{}", width); 28 | TokenStream::from(quote! { 29 | pub enum #name {} 30 | 31 | impl Field for #name { 32 | const BITS: usize = #width; 33 | } 34 | }) 35 | }) 36 | .collect() 37 | } 38 | -------------------------------------------------------------------------------- /bitfield-assertion/demo/main.rs: -------------------------------------------------------------------------------- 1 | use bitfield::*; 2 | 3 | #[bitfield] // (1+3+4+23)%8 != 0 4 | struct NotQuiteFourBytes { 5 | a: B1, 6 | b: B3, 7 | c: B4, 8 | d: B23, 9 | } 10 | 11 | fn main() {} 12 | -------------------------------------------------------------------------------- /callable-types/README.md: -------------------------------------------------------------------------------- 1 | ## User-defined callable types 2 | 3 | Various languages have ways of making user-defined objects callable with 4 | function call syntax: C++'s [`operator ()`][cpp], Python's [`__call__`][python], 5 | Swift's [`@dynamicCallable`][swift], Kotlin's [`invoke`][kotlin], PHP's 6 | [`__invoke`][php], Scala's [`apply`][scala], etc. 7 | 8 | [cpp]: https://en.cppreference.com/w/cpp/language/operators#Function_call_operator 9 | [python]: https://docs.python.org/3/reference/datamodel.html#object.__call__ 10 | [swift]: https://docs.swift.org/swift-book/ReferenceManual/Attributes.html 11 | [kotlin]: https://kotlinlang.org/docs/reference/operator-overloading.html#invoke 12 | [php]: https://www.php.net/manual/en/language.oop5.magic.php#object.invoke 13 | [scala]: https://scala-lang.org/files/archive/spec/2.12/06-expressions.html#function-applications 14 | 15 | Something along these lines exists in Rust in the form of the [`std::ops::Fn`] 16 | trait. When you write a closure expression, under the hood it becomes a struct 17 | with some unique type that captures the necessary state from the closure's 18 | environment and provides an implementation of this `Fn` trait to make it 19 | callable. This isn't quite like the examples cited from other languages because 20 | the trait can only be implemented by the compiler, not by the user for their own 21 | data structures. 22 | 23 | [`std::ops::Fn`]: https://doc.rust-lang.org/nightly/std/ops/trait.Fn.html 24 | 25 | I was playing around with this functionality involving closures to stretch the 26 | possibilities a bit. Mainly I wondered whether there is anything that can be 27 | written in the gap in the code below to make our data structure work like a 28 | callable function object *on a stable compiler* despite this not being a feature 29 | of the language. 30 | 31 | ```rust 32 | /// Function object that adds some number to its input. 33 | struct Plus { 34 | n: u32, 35 | } 36 | 37 | impl Plus { 38 | fn call(&self, arg: u32) -> u32 { 39 | self.n + arg 40 | } 41 | } 42 | 43 | // [Something special here ...] 44 | 45 | fn main() { 46 | let one_plus = Plus { n: 1 }; 47 | let sum = one_plus(2); 48 | assert_eq!(sum, 1 + 2); 49 | } 50 | ``` 51 | 52 | It turns out that yes, it is possible to make this work (with caveats). 53 | 54 |
55 | 56 | ### Background 57 | 58 | We will use an interesting combination of `Deref`, closures, trait objects, and 59 | unsafe code. 60 | 61 | We will stick to functions with the signature `fn(&self, u32) -> u32` to get the 62 | simplest thing working, but everything generalizes to other signatures. 63 | 64 | To explain the relevance of `Deref`, observe that the function call operator 65 | performs deref coercions to find a `Fn` impl. In the following code we write 66 | `f(2)` to call an object `f` of type `&Callable`, which does not itself 67 | implement the `Fn` trait. But `&Callable` dereferences to `&fn(u32) -> u32` 68 | which does, so that is what gets called. 69 | 70 | ```rust 71 | use std::ops::Deref; 72 | 73 | struct Callable; 74 | 75 | impl Deref for Callable { 76 | type Target = fn(u32) -> u32; 77 | 78 | fn deref(&self) -> &'static Self::Target { 79 | &(one_plus as fn(u32) -> u32) 80 | } 81 | } 82 | 83 | fn one_plus(arg: u32) -> u32 { 84 | 1 + arg 85 | } 86 | 87 | fn main() { 88 | let f = &Callable; 89 | assert_eq!(f(2), 1 + 2); 90 | } 91 | ``` 92 | 93 |
94 | 95 | ### First attempt 96 | 97 | The code under Background is syntactically on the right track because it enables 98 | writing parentheses for function call notation on a value of user-defined type. 99 | But since the thing being called in that code after deref coercion is just a 100 | function pointer, the value of `self` (the object being invoked as a function) 101 | is not accessible to the function body, which makes this severely limited in 102 | usefulness. 103 | 104 | What we want conceptually is this kind of thing: 105 | 106 | ```rust 107 | impl Callable { 108 | fn call(&self, arg: u32) -> u32 { 109 | // Function body 110 | } 111 | } 112 | 113 | impl Deref for Callable { 114 | type Target = ???; 115 | 116 | fn deref(&self) -> &Self::Target { 117 | &|arg| self.call(arg) 118 | } 119 | } 120 | ``` 121 | 122 | That is, the thing being called after deref coercion would be a closure that has 123 | captured `self` and receives all the non-`self` args to set up a call to the 124 | intended function body. 125 | 126 | We can even spell out a type for `Target` that makes this look correctly typed. 127 | 128 | ```rust 129 | impl Deref for Callable { 130 | type Target = dyn Fn(u32) -> u32; 131 | 132 | fn deref(&self) -> &Self::Target { 133 | &|arg| self.call(arg) 134 | } 135 | } 136 | ``` 137 | 138 | The borrow checker explains (not that clearly in this case) that this 139 | implementation would not be sound. The reference being returned by `deref` is 140 | dangling because it refers to a closure object on the stack frame of the `deref` 141 | call that is destroyed during the return. 142 | 143 | ```console 144 | error[E0495]: cannot infer an appropriate lifetime due to conflicting requirements 145 | --> src/main.rs:15:10 146 | | 147 | 15 | &|arg| self.call(arg) 148 | | ^^^^^^^^^^^^^^^^^^^^ 149 | | 150 | note: first, the lifetime cannot outlive the anonymous lifetime #1 defined on the method body at 14:5... 151 | --> src/main.rs:14:5 152 | | 153 | 14 | / fn deref(&self) -> &Self::Target { 154 | 15 | | &|arg| self.call(arg) 155 | 16 | | } 156 | | |_____^ 157 | = note: ...so that the types are compatible: 158 | expected &&Callable 159 | found &&Callable 160 | = note: but, the lifetime must be valid for the static lifetime... 161 | = note: ...so that the expression is assignable: 162 | expected &(dyn std::ops::Fn(u32) -> u32 + 'static) 163 | found &dyn std::ops::Fn(u32) -> u32 164 | ``` 165 | 166 | To see it more clearly, this closure would have desugared to something like the 167 | following: 168 | 169 | ```rust 170 | impl Deref for Callable { 171 | type Target = dyn Fn(u32) -> u32; 172 | 173 | fn deref(&self) -> &Self::Target { 174 | // Generated by the compiler as the memory representation 175 | // of `|arg| self.call(arg)`. 176 | struct GeneratedClosure<'a> { 177 | self_: &'a Callable, 178 | } 179 | 180 | // Also generated by the compiler. 181 | impl<'a> Fn(u32) -> u32 for GeneratedClosure<'a> { 182 | fn call(&self, arg: u32) -> u32 { 183 | let self_ = self.self_; 184 | 185 | // Body of `|arg| self.call(arg)`. 186 | self_.call(arg) 187 | } 188 | } 189 | 190 | // Expanded view of `&|arg| self.call(arg)`. 191 | let generated_closure = GeneratedClosure { self_: self }; 192 | let reference_to_closure: &GeneratedClosure = &generated_closure; 193 | let reference_to_trait_object = reference_to_closure as &dyn Fn(u32) -> u32; 194 | reference_to_trait_object 195 | } 196 | } 197 | ``` 198 | 199 |
200 | 201 | ### Second attempt 202 | 203 | If we temporarily conflate the types `GeneratedClosure` and `&Callable`, notice 204 | how in the desugared code from the first attempt we have `deref` returning 205 | `&&Callable` (as a reference to trait object) and `GeneratedClosure::call` 206 | accepting `&&Callable` as its first argument. The inner reference lives long 207 | enough to match deref's signature but the outer reference does not; the outer 208 | reference points to the inner reference which exists on `deref`'s stack frame 209 | and goes out of scope. 210 | 211 | What we would love to trick the compiler into doing is something more like: 212 | 213 | ```rust 214 | impl Deref for Callable { 215 | type Target = dyn Fn(u32) -> u32; 216 | 217 | fn deref(&self) -> &Self::Target { 218 | // Generated by the compiler (???) 219 | #[repr(transparent)] 220 | struct GeneratedClosure { 221 | self_: Callable, 222 | } 223 | 224 | // Also generated by the compiler (???) 225 | impl Fn(u32) -> u32 for GeneratedClosure { 226 | fn call(&self, arg: u32) -> u32 { 227 | let self_ = &self.self_; 228 | 229 | // Body of the closure we would write. 230 | self_.call(arg) 231 | } 232 | } 233 | 234 | let reference_to_closure = &GeneratedClosure { self_: *self }; 235 | let reference_to_trait_object = reference_to_closure as &dyn Fn(u32) -> u32; 236 | reference_to_trait_object 237 | } 238 | } 239 | ``` 240 | 241 | Here instead we have `deref` returning `&Callable` (as a reference to trait 242 | object) and `GeneratedClosure::call` accepting `&Callable`. The conversion from 243 | `&Callable` to `&GeneratedClosure` is sound as long as `Callable` and 244 | `GeneratedClosure` have the same memory representation, which would be 245 | guaranteed by `#[repr(transparent)]`. That conversion results in a reference 246 | pointing to the caller's `Callable` rather than to anything on `deref`'s stack 247 | frame, so it lives long enough that this would be a safe and working 248 | implementation of the intended functionality. 249 | 250 | Let's think about what closure we would need to write in order for the compiler 251 | to come up with the above data structure and `Fn` trait impl. 252 | 253 | We know it would need to capture a value of type `Callable` by value. This 254 | begins to sound problematic because there would never exist an owned value of 255 | type `Callable` accessible to the `Deref` impl, only as a borrowed `&Callable`. 256 | 257 | But an imaginary uninitialized `Callable` gets the job done: 258 | 259 | ```rust 260 | let uninit_callable: Callable = unsafe { mem::uninitialized() }; 261 | let uninit_closure = move |arg: u32| Callable::call(&uninit_callable, arg); 262 | mem::forget(uninit_closure); 263 | ``` 264 | 265 | This code makes an uninitialized owned `Callable`, moves ownership of it into a 266 | closure that captures a `Callable` by value and nothing else, and then prevents 267 | a `Drop` call on the closure because we must not drop its uninitialized 268 | contents. At runtime this would all be noop but it gets the compiler to generate 269 | the right data structure and `Fn` trait impl shown above. 270 | 271 | The remaining part is to turn `self` into a trait object based on this `Fn` 272 | impl, the equivalent of `&GeneratedClosure { self_: *self } as &dyn Fn(u32) -> 273 | u32`. 274 | 275 | Ordinarily we would reach for a `mem::transmute::<&Callable, 276 | &GeneratedClosure>(self)` or `&*(self as *const Callable as *const 277 | GeneratedClosure)`, but in this case that won't work because the closure's real 278 | type is generated and does not have a name that we can refer to. A different 279 | technique is needed: 280 | 281 | ```rust 282 | fn second<'a, T>(_a: &T, b: &'a T) -> &'a T { 283 | b 284 | } 285 | let reference_to_closure = second(&uninit_closure, unsafe { mem::transmute(self) }); 286 | ``` 287 | 288 | This uses generic type inference to deduce the return type of the transmute as 289 | identical to a reference to the closure's type, whatever that might be. 290 | 291 | At this point we have a closure to make into a trait object. 292 | 293 | ```rust 294 | let reference_to_trait_object = reference_to_closure as &dyn Fn(u32) -> u32; 295 | ``` 296 | 297 | The impl all at once looks like: 298 | 299 | ```rust 300 | impl Deref for Callable { 301 | type Target = dyn Fn(u32) -> u32; 302 | 303 | fn deref(&self) -> &Self::Target { 304 | let uninit_callable: Self = unsafe { mem::uninitialized() }; 305 | let uninit_closure = move |arg: u32| Self::call(&uninit_callable, arg); 306 | fn second<'a, T>(_a: &T, b: &'a T) -> &'a T { 307 | b 308 | } 309 | let reference_to_closure = second(&uninit_closure, unsafe { mem::transmute(self) }); 310 | mem::forget(uninit_closure); 311 | let reference_to_trait_object = reference_to_closure as &dyn Fn(u32) -> u32; 312 | reference_to_trait_object 313 | } 314 | } 315 | ``` 316 | 317 |
318 | 319 | ### Third attempt 320 | 321 | I called out `#[repr(transparent)]` earlier on, but then didn't bring it up 322 | again in the context of the closure-based implementation. We have written a 323 | closure that captures a type `Callable` by value so it makes sense why it would 324 | be represented like `struct GeneratedClosure { captured: Callable }` but: 325 | 326 | - it is not a guarantee made by the language that a closure capturing `Callable` 327 | by value is represented in memory the same as `struct { Callable }`; 328 | 329 | - nor is it a guarantee that `struct { Callable }` would be represented the same 330 | as `Callable`. 331 | 332 | So this is the big caveat; don't count on this to work now or continue working 333 | in the future. Nothing on this page is a robust solution, only interesting. For 334 | now I think this is the closest we get, by adding an assertion as a basic smoke 335 | test that the closure matches the expected size: 336 | 337 | ```rust 338 | use std::mem; 339 | use std::ops::Deref; 340 | 341 | /// Function object that adds some number to its input. 342 | struct Plus { 343 | n: u32, 344 | } 345 | 346 | impl Plus { 347 | fn call(&self, arg: u32) -> u32 { 348 | self.n + arg 349 | } 350 | } 351 | 352 | impl Deref for Plus { 353 | type Target = dyn Fn(u32) -> u32; 354 | 355 | fn deref(&self) -> &Self::Target { 356 | let uninit_callable: Self = unsafe { mem::uninitialized() }; 357 | let uninit_closure = move |arg: u32| Self::call(&uninit_callable, arg); 358 | let size_of_closure = mem::size_of_val(&uninit_closure); 359 | fn second<'a, T>(_a: &T, b: &'a T) -> &'a T { 360 | b 361 | } 362 | let reference_to_closure = second(&uninit_closure, unsafe { mem::transmute(self) }); 363 | mem::forget(uninit_closure); 364 | assert_eq!(size_of_closure, mem::size_of::()); 365 | let reference_to_trait_object = reference_to_closure as &dyn Fn(u32) -> u32; 366 | reference_to_trait_object 367 | } 368 | } 369 | 370 | fn main() { 371 | let one_plus = Plus { n: 1 }; 372 | let sum = one_plus(2); 373 | assert_eq!(sum, 1 + 2); 374 | } 375 | ``` 376 | 377 |
378 | 379 | ### Fourth attempt 380 | 381 | There is one remaining problem to sort out. The following line from the third 382 | attempt may contain undefined behavior: 383 | 384 | ```rust 385 | let uninit_callable: Self = unsafe { mem::uninitialized() }; 386 | ``` 387 | 388 | Usually the most common way that creating an uninitialized value of an unknown 389 | type in generic code causes undefined behavior is if an expression like 390 | `mem::uninitialized::()` might be instantiated with a choice of `T` that is 391 | uninhabited, such as the `!` type. When that happens, the compiler is free to 392 | turn the `mem::uninitialized` call into [`unreachable_unchecked`] and plummet 393 | off the end of your function, even though you intended for this line to be a 394 | noop. 395 | 396 | [`unreachable_unchecked`]: https://doc.rust-lang.org/std/hint/fn.unreachable_unchecked.html 397 | 398 | As used here, that's not a concern -- we know `Self` is inhabited at runtime 399 | because there exists a `&Self` in scope that was passed in by the caller. If 400 | `Self` were uninhabited, it would be impossible for the caller to have an 401 | instance of `Self` on which to borrow (`&self`) and call `deref`. 402 | 403 | Instead we need to worry about the second most common way that creating 404 | uninitialized values of an unknown type causes undefined behavior, and that's if 405 | the uninitialized type has nontrivial validity invariants. In our case if the 406 | memory representation of `Self` contains a bool, char, `&`, `&mut`, Box, 407 | NonZero, or any other type where not all possible values are valid, then 408 | `mem::uninitialized::()` is immediate UB. 409 | 410 | The correct way to manipulate uninitialized memory of generic type is through 411 | [`MaybeUninit`]. 412 | 413 | [`MaybeUninit`]: https://doc.rust-lang.org/std/mem/union.MaybeUninit.html 414 | 415 | ```rust 416 | let uninit_callable = MaybeUninit::::uninit(); 417 | let uninit_closure = move |arg: u32| Self::call( 418 | unsafe { &*uninit_callable.as_ptr() }, 419 | arg, 420 | ); 421 | ``` 422 | 423 | The final expanded code all together is: 424 | 425 | ```rust 426 | use std::mem::{self, MaybeUninit}; 427 | use std::ops::Deref; 428 | 429 | /// Function object that adds some number to its input. 430 | struct Plus { 431 | n: u32, 432 | } 433 | 434 | impl Plus { 435 | fn call(&self, arg: u32) -> u32 { 436 | self.n + arg 437 | } 438 | } 439 | 440 | impl Deref for Plus { 441 | type Target = dyn Fn(u32) -> u32; 442 | 443 | fn deref(&self) -> &Self::Target { 444 | let uninit_callable = MaybeUninit::::uninit(); 445 | let uninit_closure = move |arg: u32| Self::call( 446 | unsafe { &*uninit_callable.as_ptr() }, 447 | arg, 448 | ); 449 | let size_of_closure = mem::size_of_val(&uninit_closure); 450 | fn second<'a, T>(_a: &T, b: &'a T) -> &'a T { 451 | b 452 | } 453 | let reference_to_closure = second(&uninit_closure, unsafe { mem::transmute(self) }); 454 | mem::forget(uninit_closure); 455 | assert_eq!(size_of_closure, mem::size_of::()); 456 | let reference_to_trait_object = reference_to_closure as &dyn Fn(u32) -> u32; 457 | reference_to_trait_object 458 | } 459 | } 460 | 461 | fn main() { 462 | let one_plus = Plus { n: 1 }; 463 | let sum = one_plus(2); 464 | assert_eq!(sum, 1 + 2); 465 | } 466 | ``` 467 | 468 |
469 | 470 | ### Implementation 471 | 472 | Packaging this up into a macro is the easy part. We would most likely want an 473 | attribute macro on an impl block that turns the block's one method into the fake 474 | `Fn` impl. 475 | 476 | ```rust 477 | /// Function object that adds some number to its input. 478 | struct Plus { 479 | n: u32, 480 | } 481 | 482 | #[hackfn] 483 | impl Plus { 484 | fn call(&self, arg: u32) -> u32 { 485 | self.n + arg 486 | } 487 | } 488 | 489 | fn main() { 490 | let one_plus = Plus { n: 1 }; 491 | let sum = one_plus(2); 492 | assert_eq!(sum, 1 + 2); 493 | } 494 | ``` 495 | 496 |
497 | 498 | End note: I feel that the technique of returning trait objects from 499 | `&`-returning trait methods like `Deref`, `Index`, `Borrow` etc is underexplored 500 | and there are major impactful applications waiting to be discovered in that 501 | area. [This StackOverflow answer][hashmap] demonstrates one amazing example in 502 | the context of *How to implement HashMap with two keys?*. A more basic one is 503 | the [slice of a multidimensional array][refcast] example from RefCast; this 504 | involves a dynamically sized slice rather than a trait object but the underlying 505 | idea is similar. I think that these two and the case study are scratching the 506 | surface of something bigger with exciting applications. Note that those two 507 | links are all safe code; unsafe is not inherent to this technique. 508 | 509 | [hashmap]: https://stackoverflow.com/a/45795699/6086311 510 | [refcast]: https://github.com/dtolnay/ref-cast#realistic-example 511 | -------------------------------------------------------------------------------- /callable-types/demo/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "case-study-callable-types" 3 | version = "0.0.0" 4 | authors = ["David Tolnay "] 5 | edition = "2021" 6 | publish = false 7 | 8 | [[bin]] 9 | name = "case-study" 10 | path = "main.rs" 11 | -------------------------------------------------------------------------------- /callable-types/demo/main.rs: -------------------------------------------------------------------------------- 1 | use std::mem::{self, MaybeUninit}; 2 | use std::ops::Deref; 3 | 4 | /// Function object that adds some number to its input. 5 | struct Plus { 6 | n: u32, 7 | } 8 | 9 | impl Plus { 10 | fn call(&self, arg: u32) -> u32 { 11 | self.n + arg 12 | } 13 | } 14 | 15 | impl Deref for Plus { 16 | type Target = dyn Fn(u32) -> u32; 17 | 18 | fn deref(&self) -> &Self::Target { 19 | let uninit_callable = MaybeUninit::::uninit(); 20 | let uninit_closure = move |arg: u32| Self::call(unsafe { &*uninit_callable.as_ptr() }, arg); 21 | let size_of_closure = mem::size_of_val(&uninit_closure); 22 | fn second<'a, T>(_a: &T, b: &'a T) -> &'a T { 23 | b 24 | } 25 | let reference_to_closure = second(&uninit_closure, unsafe { mem::transmute(self) }); 26 | mem::forget(uninit_closure); 27 | assert_eq!(size_of_closure, mem::size_of::()); 28 | let reference_to_trait_object = reference_to_closure as &dyn Fn(u32) -> u32; 29 | reference_to_trait_object 30 | } 31 | } 32 | 33 | fn main() { 34 | let one_plus = Plus { n: 1 }; 35 | let sum = one_plus(2); 36 | assert_eq!(sum, 1 + 2); 37 | dbg!(one_plus(2)); 38 | } 39 | -------------------------------------------------------------------------------- /function-epilogue/README.md: -------------------------------------------------------------------------------- 1 | ## Function epilogue 2 | 3 | For the [`#[no_panic]`][no-panic] macro I needed the ability to have some piece 4 | of code invoked during all *panicking* exit paths out of a function. 5 | 6 | [no-panic]: https://github.com/dtolnay/no-panic 7 | 8 |
9 | 10 | ### First attempt 11 | 12 | Having something execute on *all* exit paths is reasonably simple -- place a 13 | guard object in a local variable and its `Drop` impl will run whether the 14 | function body succeeds or panics. This may be a good approach for something like 15 | instrumenting functions with tracing on entry and exit. 16 | 17 | ```rust 18 | // Before 19 | fn f(a: Arg1, b: Arg2) -> Ret { 20 | // (Original function body) 21 | } 22 | 23 | // After; insert guard object 24 | fn f(a: Arg1, b: Arg2) -> Ret { 25 | struct Guard; 26 | impl Drop for Guard { 27 | fn drop(&mut self) { 28 | // Do the thing 29 | } 30 | } 31 | let _guard = Guard; 32 | 33 | // (Original function body) 34 | } 35 | ``` 36 | 37 | From here we can have the guard's `Drop` impl check 38 | [`std::thread::panicking`][panicking] to determine whether the call is taking 39 | place during a panicking exit path. 40 | 41 | [panicking]: https://doc.rust-lang.org/std/thread/fn.panicking.html 42 | 43 | ```rust 44 | impl Drop for Guard { 45 | fn drop(&mut self) { 46 | if std::thread::panicking() { 47 | // Do the thing 48 | } 49 | } 50 | } 51 | ``` 52 | 53 | Two things made this not suitable for my case: 54 | 55 | - There is no equivalent in libcore, so this only works if my caller's crate is 56 | using the standard library. 57 | 58 | - The code inside of `if std::thread::panicking() { ... }` gets linked whether 59 | or not a panic is possible. The implementation of the panicking check is based 60 | on reading a panic counter out of a thread\_local and cannot be optimized out. 61 | In the case of `#[no_panic]`, the whole macro is based on using the 62 | information of whether something gets linked to tell whether a panic is 63 | possible so I needed the linking to behave well. 64 | 65 |
66 | 67 | ### Second attempt 68 | 69 | Let's evaluate the body of the function and then make the guard not get dropped 70 | if the function produces a value as opposed to panicking. 71 | 72 | ```rust 73 | fn f(a: Arg1, b: Arg2) -> Ret { 74 | struct Guard; 75 | impl Drop for Guard { 76 | fn drop(&mut self) { 77 | // Do the thing 78 | } 79 | } 80 | let guard = Guard; 81 | 82 | let value = { 83 | // (Original function body) 84 | }; 85 | 86 | mem::forget(guard); 87 | value 88 | } 89 | ``` 90 | 91 | If the original function panics, we don't make it to the `mem::forget` so the 92 | guard object is dropped as part of dropping the stack frame of `f` during the 93 | panic. If the original function body returns without panicking, we skip the 94 | guard's drop prior to returning from `f`. 95 | 96 | This is on the right track! It works with no\_std, and no longer relies on the 97 | thread\_local inside of `std::thread::panicking` so it optimizes away extremely 98 | reliably in functions that can never panic. 99 | 100 | There is a problem around functions that contain a `return` expression. If the 101 | original function body performs a `return`, that would now return from `f` 102 | without running `mem::forget` on the guard object, so the thing that we want to 103 | run only when panicking would incorrectly run. 104 | 105 |
106 | 107 | ### Third attempt 108 | 109 | Let's consolidate all the non-panicking exit paths into one place via a function 110 | call and make the guard not get dropped if the function call returns without 111 | panicking. 112 | 113 | ```rust 114 | fn f(a: Arg1, b: Arg2) -> Ret { 115 | struct Guard; 116 | impl Drop for Guard { 117 | fn drop(&mut self) { 118 | // Do the thing 119 | } 120 | } 121 | let guard = Guard; 122 | 123 | fn original_f(a: Arg1, b: Arg2) -> Ret { 124 | // (Original function body) 125 | } 126 | let value = original_f(a, b); 127 | 128 | mem::forget(guard); 129 | value 130 | } 131 | ``` 132 | 133 | This is like the second attempt except that it works when the original function 134 | body contains a `return` expression. 135 | 136 | This is pretty good. It has the desired behavior and is compatible with most 137 | function signatures. 138 | 139 |
140 | 141 | ### Fourth attempt 142 | 143 | What do we do in this case? 144 | 145 | ```rust 146 | fn f(&self, a: Arg1, b: Arg2) -> Ret { 147 | ... 148 | } 149 | ``` 150 | 151 | The scheme from the third attempt of duplicating the function signature into an 152 | internal `original_f` will not work because `&self` arguments can only occur in 153 | members of an impl block, not in any other position that a function can be 154 | defined. 155 | 156 | ```rust 157 | struct S; 158 | 159 | impl S { 160 | fn f(&self, a: Arg1, b: Arg2) -> Ret { 161 | ... 162 | let guard = Guard; 163 | 164 | fn original_f(&self, a: Arg1, b: Arg2) -> Ret { 165 | // (Original function body) 166 | } 167 | let value = original_f(self, a, b); 168 | 169 | mem::forget(guard); 170 | value 171 | } 172 | } 173 | ``` 174 | 175 | ```console 176 | error: unexpected `self` argument in function 177 | --> src/main.rs:8:24 178 | | 179 | 8 | fn original_f(&self, a: Arg1, b: Arg2) -> Ret { 180 | | ^^^^ `self` is only valid as the first argument of an associated function 181 | ``` 182 | 183 | It doesn't work to try to generate `fn original_f(_self: &S, ...) -> Ret` 184 | because the macro generating this will be an attribute macro placed on the 185 | function -- it would only receive the function `f` as input not including the 186 | impl block header, so the correct type for `self` can't be known. 187 | 188 | ```rust 189 | impl ??? { 190 | fn f(&self, a: Arg1, b: Arg2) -> Ret { 191 | ... 192 | let guard = Guard; 193 | 194 | fn original_f(_self: &???, a: Arg1, b: Arg2) -> Ret { 195 | // (Original function body) 196 | } 197 | let value = original_f(self, a, b); 198 | 199 | mem::forget(guard); 200 | value 201 | } 202 | } 203 | ``` 204 | 205 | The argument type `_self: &Self` can't be used because a function like 206 | `original_f` is its own self-contained item and does not have access to an outer 207 | `Self` or type parameters. 208 | 209 | ```console 210 | error[E0401]: can't use generic parameters from outer function 211 | --> src/main.rs:8:31 212 | | 213 | 1 | impl S { 214 | | ---- `Self` type implicitly declared here, by this `impl` 215 | ... 216 | 8 | fn original_f(_self: &Self, a: Arg1, b: Arg2) -> Ret { 217 | | ^^^^ 218 | | | 219 | | use of generic parameter from outer function 220 | | use a type here instead 221 | ``` 222 | 223 | Maybe we could ask the user to write our attribute macro on the impl block 224 | rather than on functions but this would be confusing; a solution that does not 225 | require this would be better. 226 | 227 | It also doesn't work in general to place the `original_f` outside of `f`, as a 228 | `#[doc(hidden)]` method next to `f`. This would work inside of an impl block 229 | containing inherent methods, but not inside of a trait impl block containing 230 | trait methods since those are limited to the set of methods required by the 231 | trait. 232 | 233 | ```rust 234 | impl ??? { 235 | fn original_f(&self, a: Arg1, b: Arg2) -> Ret { 236 | // (Original function body) 237 | } 238 | 239 | fn f(&self, a: Arg1, b: Arg2) -> Ret { 240 | ... 241 | let guard = Guard; 242 | 243 | let value = Self::original_f(self, a, b); 244 | 245 | mem::forget(guard); 246 | value 247 | } 248 | } 249 | ``` 250 | 251 | To finally give a viable fourth attempt, let's write `original_f` as a closure 252 | instead because closures are not a self-contained item and *do* have access to 253 | an outer `Self`. 254 | 255 | ```rust 256 | fn f(&self, a: Arg1, b: Arg2) -> Ret { 257 | ... 258 | let guard = Guard; 259 | 260 | let original_f = |_self: &Self, a: Arg1, b: Arg2| -> Ret { 261 | // (Original function body, with self replaced by _self) 262 | }; 263 | let value = original_f(self, a, b); 264 | 265 | mem::forget(guard); 266 | value 267 | } 268 | ``` 269 | 270 | Here we pass the function arguments along to a closure that has the same 271 | signature as the outer function and captures nothing. Method receivers in the 272 | form of `&self`, `&mut self`, and `self` would be passed as closure arguments 273 | `_self: &Self`, `_self: &mut Self`, `_self: Self` respectively with the original 274 | function body adjusted to refer to `_self` anywhere that it originally referred 275 | to `self`. The leading underscore on `_self` is meaningful in that it suppresses 276 | unused variable lints; Rust does not warn when a method accepts `self` but does 277 | not refer to it, so we want to preserve that behavior in the generated closure. 278 | 279 | This really seems like it should work. But... 280 | 281 |
282 | 283 | ### Fifth attempt 284 | 285 | The borrow checker doesn't like it. In the case of a method signature that 286 | borrows from `self`: 287 | 288 | ```rust 289 | fn f(&self) -> &i32 { 290 | ... 291 | let guard = Guard; 292 | 293 | let original_f = |_self: &Self| -> &i32 { 294 | &_self.0 295 | }; 296 | let value = original_f(self); 297 | 298 | mem::forget(guard); 299 | value 300 | } 301 | ``` 302 | 303 | we get this interesting error: 304 | 305 | ```console 306 | error[E0495]: cannot infer an appropriate lifetime for borrow expression due to conflicting requirements 307 | --> src/main.rs:17:13 308 | | 309 | 17 | &_self.0 310 | | ^^^^^^^^ 311 | | 312 | note: first, the lifetime cannot outlive the anonymous lifetime #1 defined on the body at 16:26... 313 | --> src/main.rs:16:26 314 | | 315 | 16 | let original_f = |_self: &Self| -> &i32 { 316 | | __________________________^ 317 | 17 | | &_self.0 318 | 18 | | }; 319 | | |_________^ 320 | note: ...so that reference does not outlive borrowed content 321 | --> src/main.rs:17:13 322 | | 323 | 17 | &_self.0 324 | | ^^^^^^^^ 325 | note: but, the lifetime must be valid for the anonymous lifetime #1 defined on the method body at 7:5... 326 | --> src/main.rs:7:5 327 | | 328 | 7 | / fn f(&self) -> &i32 { 329 | 8 | | struct Guard; 330 | 9 | | impl Drop for Guard { 331 | 10 | | fn drop(&mut self) { 332 | ... | 333 | 22 | | value 334 | 23 | | } 335 | | |_____^ 336 | note: ...so that reference does not outlive borrowed content 337 | --> src/main.rs:22:9 338 | | 339 | 22 | value 340 | | ^^^^^ 341 | ``` 342 | 343 | I can't tell where this went wrong but casting the closure to a function pointer 344 | with the right signature seems to fix it. This requires rustc 1.23+. 345 | 346 | ```rust 347 | fn f(&self) -> &i32 { 348 | ... 349 | let guard = Guard; 350 | 351 | let original_f = |_self: &Self| -> &i32 { 352 | // (Original function body, with self replaced by _self) 353 | } as fn(&Self) -> &i32; 354 | let value = original_f(self); 355 | 356 | mem::forget(guard); 357 | value 358 | } 359 | ``` 360 | 361 |
362 | 363 | ### Sixth attempt 364 | 365 | Let's take a closer look at what is meant by "self replaced by \_self". 366 | 367 | The simple way for a macro to accomplish this would be by traversing the entire 368 | token stream representing the function body and substituting a `_self` token 369 | anywhere that `self` occurs. This is correct as long as `self` always refers to 370 | the method receiver... but sometimes it may not. Let's say the user has written: 371 | 372 | ```rust 373 | fn f(&self) { 374 | struct UserGuard; 375 | impl Drop for UserGuard { 376 | fn drop(&mut self) { 377 | // Notice the `self` on the previous line 378 | ... 379 | } 380 | } 381 | 382 | ... 383 | } 384 | ``` 385 | 386 | The ability to place structs and impl blocks inside a function body was super 387 | helpful to us so far because that's how we have been doing *our* Guard object. 388 | But the user is free to do it too! In this snippet they have written a function 389 | body that uses the token `self` in a way that does *not* refer to the `f` 390 | method's receiver. If we naively replace every `self` in their function body 391 | with `_self` as indicated in the fifth attempt, the result is invalid Rust 392 | syntax: 393 | 394 | ```rust 395 | fn f(&self) -> &i32 { 396 | struct Guard; 397 | impl Drop for Guard { 398 | fn drop(&mut self) { 399 | // This is the guard generated by our macro 400 | } 401 | } 402 | let guard = Guard; 403 | 404 | let original_f = |_self: &Self| -> &i32 { 405 | struct UserGuard; 406 | impl Drop for UserGuard { 407 | fn drop(&mut _self) { 408 | // Invalid Rust syntax on the previous line 409 | ... 410 | } 411 | } 412 | 413 | ... 414 | } as fn(&Self) -> &i32; 415 | let value = original_f(self); 416 | 417 | mem::forget(guard); 418 | value 419 | } 420 | ``` 421 | 422 | ```console 423 | error: expected one of `:` or `@`, found `)` 424 | --> src/main.rs:19:31 425 | | 426 | 19 | fn drop(&mut _self) { 427 | | ^ expected one of `:` or `@` here 428 | ``` 429 | 430 | So replacing *every* `self` is not right. The next simplest possibility would be 431 | to parse the user's function body using Syn and write a [`VisitMut`] to perform 432 | the replacement against the parsed syntax tree without traversing into nested 433 | impl blocks. 434 | 435 | [`VisitMut`]: https://docs.rs/syn/0.15/syn/visit_mut/index.html 436 | 437 | That is more correct than replacing *every* `self` but it still isn't correct 438 | because we can't know how to treat unexpanded macros. If the user's function 439 | body contains a call to `somemacro!(self)`, there would be no way to tell 440 | whether this expands to an expression like `vec![self]` in which we need to 441 | replace, vs an impl block like `impl Drop for UserGuard` in which we want to not 442 | replace. 443 | 444 | I think there is no solution to this today in Rust, so we will need to keep it 445 | as a limitation that sometimes our macro would generate invalid code, or else 446 | solve what we are doing in a way that does not involve doing *any* token 447 | replacement of `self`. 448 | 449 | So that we don't need replacement, let's try having our generated closure 450 | capture `self` from the outer method `f`'s receiver argument. 451 | 452 | There are a lot of different ways to slice and dice this, but ultimately they 453 | all fall apart for borrow checker reasons when &mut is involved. 454 | 455 | ```rust 456 | struct S(i32); 457 | 458 | impl S { 459 | // Before: compiles and works 460 | fn f(&mut self) -> &mut i32 { 461 | &mut self.0 462 | } 463 | 464 | // After: does not compile 465 | fn f(&mut self) -> &mut i32 { 466 | ... 467 | let guard = Guard; 468 | 469 | let original_f = move || { 470 | // Original function body: 471 | &mut self.0 472 | }; 473 | let value = original_f(); 474 | 475 | mem::forget(guard); 476 | value 477 | } 478 | } 479 | ``` 480 | 481 | ```console 482 | error[E0495]: cannot infer an appropriate lifetime for borrow expression due to conflicting requirements 483 | --> src/main.rs:16:13 484 | | 485 | 16 | &mut self.0 486 | | ^^^^^^^^^^^ 487 | ``` 488 | 489 | Remember how we had to add a cast to function pointer type in the fifth attempt 490 | to solve this same borrow checker failure? Well once the closure is capturing 491 | things, it can no longer be cast to a function pointer. Using `impl FnOnce` or 492 | `&mut dyn FnMut` here don't work either; as far as I can tell the correct type 493 | for these closure's cannot be accurately described in Rust's type system. 494 | 495 | ```rust 496 | fn f(&mut self) -> &mut i32 { 497 | ... 498 | let guard = Guard; 499 | 500 | let original_f: impl FnOnce() -> &mut i32 = move || { 501 | // Original function body: 502 | &mut self.0 503 | }; 504 | let value = original_f(); 505 | 506 | mem::forget(guard); 507 | value 508 | } 509 | ``` 510 | 511 | ```console 512 | error[E0106]: missing lifetime specifier 513 | --> src/main.rs:17:42 514 | | 515 | 17 | let original_f: impl FnOnce() -> &mut i32 = move || { 516 | | ^ help: consider giving it a 'static lifetime: `&'static` 517 | | 518 | = help: this function's return type contains a borrowed value, but there is no value for it to be borrowed from 519 | ``` 520 | 521 | There isn't a way for the lifetime in the signature of a closure to unify with 522 | the elided lifetime in `f`'s signature. 523 | 524 | I tried a lot of variations in this direction but found it to be a dead end. I 525 | would love to have someone bring to my attention a reliable solution that does 526 | not involve replacing `self` tokens on a heuristic basis. 527 | 528 |
529 | 530 | ### Lifetime elision 531 | 532 | As a recap, what we have so far is the closure casted to function pointer 533 | approach from the fifth attempt combined with the `VisitMut` replacement 534 | approach discussed under the sixth attempt. All together the expansion would 535 | behave like this: 536 | 537 | ```rust 538 | // Before 539 | fn f(&self, a: Arg1, b: Arg2) -> Ret { 540 | // (Original function body) 541 | } 542 | 543 | // After 544 | fn f(&self, a: Arg1, b: Arg2) -> Ret { 545 | struct Guard; 546 | impl Drop for Guard { 547 | fn drop(&mut self) { 548 | // Do the thing 549 | } 550 | } 551 | let guard = Guard; 552 | 553 | let original_f = |_self: &Self, a: Arg1, b: Arg2| -> Ret { 554 | // (Original function body, with self replaced by _self 555 | // except in nested impls) 556 | } as fn(&Self, Arg1, Arg2) -> Ret; 557 | 558 | let value = original_f(self, a, b); 559 | 560 | mem::forget(guard); 561 | value 562 | } 563 | ``` 564 | 565 | Unfortunately we are not done because lifetime elision wrecks this approach. To 566 | make it concrete let me give you some possible definitions for the receiver 567 | type, `Arg1`, `Arg2`, `Ret`, and the function body, with lifetime elision in the 568 | mix: 569 | 570 | ```rust 571 | struct S(i32); 572 | type Arg1<'a> = &'a (); 573 | type Arg2 = (); 574 | type Ret<'a> = &'a i32; 575 | 576 | impl S { 577 | fn f(&self, _a: Arg1, _b: Arg2) -> Ret { 578 | &self.0 579 | } 580 | } 581 | ``` 582 | 583 | This compiles, with `S::f` eliding three lifetimes: the ones on `&self`, `Arg1`, 584 | and `Ret`. 585 | 586 | Let's apply our expansion. 587 | 588 | ```rust 589 | impl S { 590 | fn f(&self, _a: Arg1, _b: Arg2) -> Ret { 591 | struct Guard; 592 | impl Drop for Guard { 593 | fn drop(&mut self) { 594 | // Do the thing 595 | } 596 | } 597 | let guard = Guard; 598 | 599 | let original_f = |_self: &Self, _a: Arg1, _b: Arg2| -> Ret { 600 | &_self.0 601 | } as fn(&Self, Arg1, Arg2) -> Ret; 602 | 603 | let value = original_f(self, _a, _b); 604 | 605 | mem::forget(guard); 606 | value 607 | } 608 | } 609 | ``` 610 | 611 | ```console 612 | error[E0106]: missing lifetime specifier 613 | --> src/main.rs:13:39 614 | | 615 | 13 | } as fn(&Self, Arg1, Arg2) -> Ret; 616 | | ^^^ expected lifetime parameter 617 | | 618 | = help: this function's return type contains a borrowed value, but the signature does not say whether it is borrowed from argument 1 or argument 2 619 | ``` 620 | 621 | So what happened here? This is hitting a special behavior of lifetime elision in 622 | methods that accept `self` by reference. The signature of `S::f` is not 623 | `fn(&Self, Arg1, Arg2) -> Ret`, as much as it may look like it. Instead it is 624 | `for<'r, 'a> fn(&'r Self, Arg1<'a>, Arg2) -> Ret<'r>`. The compiler's error 625 | message is pointing out that `fn(&Self, Arg1, Arg2) -> Ret` isn't even a legal 626 | function type given the types involved here. 627 | 628 | The relevant elision behavior goes something like this: in methods that accept 629 | `self` by reference, elided lifetimes in the return type are assumed to refer to 630 | the receiver's lifetime regardless of the number of other other lifetimes among 631 | the other arguments. Meanwhile in functions without `self` or that accept `self` 632 | by value, elided lifetimes in the return type are permitted only if the function 633 | has exactly one input lifetime parameter across all the arguments; otherwise the 634 | signature is invalid. This rule reduces the occurrence of explicit lifetimes 635 | being necessary in method signatures, but makes life complicated for macros as 636 | we are experiencing here. 637 | 638 | The function pointer type in our generated code `fn(&Self, Arg1, Arg2) -> Ret` 639 | is invalid because it has elided the lifetime on `Ret` in the return type but 640 | there is more than one input lifetime: there is one as part of `&Self` and one 641 | as part of `Arg1`. And function pointers never get the 642 | method-with-self-by-reference special elision behavior. The thing that we have 643 | spelled `&Self` in the function pointer is just some ordinary argument type, not 644 | a method receiver. 645 | 646 | This lifetime elision complication effectively rules out the possibility of 647 | using a function pointer in our solution. This puts us in dire straits because: 648 | 649 | - as seen in the second attempt, we really need some kind of function or closure 650 | in order for early returns to work right; 651 | 652 | - as seen in the fourth attempt, it needs to be a *nested* function or closure 653 | so that this whole thing can be used inside trait impl blocks; 654 | 655 | - also from the fourth attempt, it can't be a nested function because the 656 | signature may need to involve `Self`; 657 | 658 | - from the sixth attempt, making `self` available in the closure body through 659 | closure capture is a dead end due to borrow checker trouble; 660 | 661 | - from the fifth attempt, passing `self` as a closure argument doesn't work 662 | unless we use a function pointer; 663 | 664 | - lifetime elision rules make it impossible to come up with the right function 665 | pointer type. 666 | 667 |
668 | 669 | ### Seventh attempt and solution 670 | 671 | For reasons that are beyond me, the following expansion seems to solve the 672 | entire set of constraints at once. Why is the rebinding of all the arguments 673 | necessary? I don't know, but without it we're in the same failing situation as 674 | back in the sixth attempt under the sentence that says "they all fall apart for 675 | borrow checker reasons when &mut is involved." 676 | 677 | ```rust 678 | // Before 679 | fn f(&mut self, a: Arg1, b: Arg2) -> Ret { 680 | // (Original function body) 681 | } 682 | 683 | // After 684 | fn f(&mut self, a: Arg1, b: Arg2) -> Ret { 685 | struct Guard; 686 | impl Drop for Guard { 687 | fn drop(&mut self) { 688 | // Do the thing 689 | } 690 | } 691 | let guard = Guard; 692 | 693 | let value = (move || { 694 | // Rebind all the arguments: 695 | let _self = self; 696 | let a = a; 697 | let b = b; 698 | 699 | // (Original function body, with self replaced by _self 700 | // except in nested impls) 701 | })(); 702 | 703 | mem::forget(guard); 704 | value 705 | } 706 | ``` 707 | 708 | I am pretty disappointed that the best known solution involves this obscure 709 | rebinding trick to work around what seems like a borrow checker limitation, and 710 | as a consequence suffers from its own limitation around use of `self` inside 711 | unexpanded macros within the function body (see sixth attempt). I guess this 712 | shows there is still much room remaining for borrow checker improvements! 713 | 714 | In any case, this expansion is part of the implementation used for the 715 | [`no-panic`][no-panic] crate. 716 | -------------------------------------------------------------------------------- /function-epilogue/demo/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "case-study-function-epilogue" 3 | version = "0.0.0" 4 | authors = ["David Tolnay "] 5 | edition = "2021" 6 | publish = false 7 | 8 | [[bin]] 9 | name = "case-study" 10 | path = "main.rs" 11 | -------------------------------------------------------------------------------- /function-epilogue/demo/main.rs: -------------------------------------------------------------------------------- 1 | use std::mem; 2 | 3 | pub struct S(i32); 4 | pub type Arg1<'a> = &'a i32; 5 | pub type Arg2 = i32; 6 | pub type Ret<'a> = (&'a mut i32, i32); 7 | 8 | impl S { 9 | pub fn original_f(&mut self, a: Arg1, b: Arg2) -> Ret { 10 | (&mut self.0, a + b) 11 | } 12 | 13 | pub fn generated_f(&mut self, a: Arg1, b: Arg2) -> Ret { 14 | struct Guard; 15 | impl Drop for Guard { 16 | fn drop(&mut self) { 17 | // Do the thing 18 | } 19 | } 20 | let guard = Guard; 21 | 22 | let value = (move || { 23 | let _self = self; 24 | let a = a; 25 | let b = b; 26 | 27 | // Original function body, with self replaced by _self 28 | // except in nested impls: 29 | 30 | (&mut _self.0, a + b) 31 | })(); 32 | 33 | mem::forget(guard); 34 | value 35 | } 36 | } 37 | 38 | fn main() { 39 | let _ = S; 40 | } 41 | -------------------------------------------------------------------------------- /integer-match/README.md: -------------------------------------------------------------------------------- 1 | ## Consecutive integer match patterns 2 | 3 | This came up in a macro that wanted to take a comma-separated sequence of 4 | expressions like `themacro!('A', 'B', f())` and emit a `match` expression indexed by 5 | position in the sequence: 6 | 7 | ```rust 8 | match VALUE { 9 | 0 => 'A', 10 | 1 => 'B', 11 | 2 => f(), 12 | _ => unimplemented!(), 13 | } 14 | ``` 15 | 16 | As a macro\_rules macro, a core limitation was that we can't make identifiers 17 | dynamically, so the generated code would be limited to using some fixed number 18 | of identifiers regardless of how many expressions are in the macro input. 19 | 20 | In the actual use case, this `match` was just one part of a more complicated 21 | macro; we wouldn't necessarily want a macro for doing literally what is 22 | described here by itself. 23 | 24 |
25 | 26 | ### Rejected solutions 27 | 28 | **Procedural macro.** The whole thing could have been made a 29 | procedural macro instead. A procedural macro would be able to emit exactly a 30 | match expression as shown above. However the stable Rust compiler does not yet 31 | support calling procedural macros in expression position, so the procedural 32 | macro would have needed to be restricted to nightly only. Also it would mean 33 | pulling in some extra dependencies for parsing. 34 | 35 | **Change input syntax.** The input syntax for the macro could have 36 | been changed to require the caller to pass their own counter in the input: 37 | something like `themacro!((0, 'A'), (1, 'B'), (2, f()))`. This makes things easy 38 | for the macro implementation but at the expense of the caller, which was the 39 | wrong tradeoff. Here is what that would look like implemented: 40 | 41 | ```rust 42 | // Force caller to provide their own counter. 43 | macro_rules! themacro { 44 | ($(($i:pat, $e:expr)),*) => { 45 | match VALUE { 46 | $($i => $e,)* 47 | _ => unimplemented!(), 48 | } 49 | }; 50 | } 51 | ``` 52 | 53 |
54 | 55 | ### Good solutions 56 | 57 | **If-else chain.** We can make the macro expand to a chain of if-else 58 | comparisons structured like this, with a counter in a local variable: 59 | 60 | ```rust 61 | { 62 | let _value = VALUE; 63 | let mut _i = 0; 64 | if { 65 | let eq = _value == _i; 66 | _i += 1; 67 | eq 68 | } { 69 | $e 70 | } else if { 71 | let eq = _value == _i; 72 | _i += 1; 73 | eq 74 | } { 75 | $e 76 | } else if { 77 | let eq = _value == _i; 78 | _i += 1; 79 | eq 80 | } { 81 | $e 82 | } else { 83 | unimplemented!() 84 | } 85 | } 86 | ``` 87 | 88 | The conditions of the `if` are equivalent to `_value == _i++` except that unary 89 | increment does not exist in Rust. 90 | 91 | The leading underscore in the local variables `_value` and `_i` is meaningful in 92 | that it suppresses some of the compiler's lints on unused variables, unused 93 | assignment, and unused mut. If the caller's sequence of expressions is empty, 94 | then `_value` and `_i` are never read and `_i` is never mutated. If the caller's 95 | sequence of expressions is nonempty, the value written to `_i` by the last `_i 96 | += 1` is never read. We could alternatively use `#[allow(unused_variables, 97 | unused_mut, unused_assignments)]` but placing these attributes in a way that 98 | they apply correctly to the macro-generated local variables but not to the 99 | caller's $e expressions makes things more complicated. 100 | 101 | Notice that the way the if-else chain is structured there is a clear chunk of 102 | repeating tokens -- each `if` through the following `else`. That repeating 103 | structure makes it very easy for this to be generated from a macro\_rules macro 104 | in one step of expansion. 105 | 106 | ```rust 107 | macro_rules! themacro { 108 | ($($e:expr),*) => {{ 109 | let value = VALUE; 110 | let mut i = 0; 111 | $( 112 | if { 113 | let eq = value == i; 114 | i += 1; 115 | eq 116 | } { 117 | $e 118 | } else 119 | )* { 120 | unimplemented!() 121 | } 122 | }}; 123 | } 124 | ``` 125 | 126 |
127 | 128 | **Const counter.** In some situations we may really want to stick 129 | with a `match` expression rather than an if-else chain, for example if the value 130 | being matched is just part of a larger data structure and we need to bind other 131 | parts of the data structure by-move in the same match. 132 | 133 | We can't expand to a `match` in which the patterns are integer literals `0`, 134 | `1`, `2` etc as shown in the introduction, at least not while supporting an 135 | arbitrary number of input expressions, because macro\_rules can only copy and 136 | paste tokens around, never come up with new tokens. If the caller passes 9999 137 | input expressions, there wouldn't be any way for a macro\_rules macro to conjure 138 | up a `9998` integer literal token to place in the output. 139 | 140 | We also can't expand to arithmetic patterns because this is not legal Rust 141 | syntax. 142 | 143 | ```rust 144 | match VALUE { 145 | 0 => $e, 146 | 0 + 1 => $e, 147 | 0 + 1 + 1 => $e, 148 | ... 149 | } 150 | ``` 151 | 152 | Instead we will make generated code that looks like this: 153 | 154 | ```rust 155 | { 156 | mod m { 157 | pub const X: usize = 0; 158 | pub mod m { 159 | pub const X: usize = super::X + 1; 160 | pub mod m { 161 | pub const X: usize = super::X + 1; 162 | } 163 | } 164 | } 165 | match VALUE { 166 | m::X => $e, 167 | m::m::X => $e, 168 | m::m::m::X => $e, 169 | _ => unimplemented!(), 170 | } 171 | } 172 | ``` 173 | 174 | The nested modules here provide a way to avoid needing unique names for each 175 | const, which macro\_rules wouldn't be able to create. 176 | 177 | Figuring out the right generated code is the hard part. The macro implementation 178 | ends up being an unremarkable tt-muncher macro that produces one layer of the 179 | nesting at a time. 180 | 181 | ```rust 182 | macro_rules! themacro { 183 | ($($v:expr),*) => { 184 | $crate::themacro_helper! { 185 | path: (m::X) 186 | def: () 187 | arms: () 188 | $($v),* 189 | } 190 | }; 191 | } 192 | 193 | macro_rules! themacro_helper { 194 | ( 195 | path: ($($path:tt)*) 196 | def: ($($def:tt)*) 197 | arms: ($(($i:pat, $v:expr))*) 198 | ) => {{ 199 | #[allow(dead_code)] 200 | mod m { 201 | pub const X: usize = 0; 202 | $($def)* 203 | } 204 | match VALUE { 205 | $( 206 | $i => $v, 207 | )* 208 | _ => unimplemented!(), 209 | } 210 | }}; 211 | ( 212 | path: ($($path:tt)*) 213 | def: ($($def:tt)*) 214 | arms: ($(($i:pat, $v:expr))*) 215 | $next:expr $(, $rest:expr)* 216 | ) => { 217 | $crate::themacro_helper! { 218 | path: (m::$($path)*) 219 | def: (pub mod m { pub const X: usize = super::X + 1; $($def)* }) 220 | arms: ($(($i, $v))* ($($path)*, $next)) 221 | $($rest),* 222 | } 223 | }; 224 | } 225 | ``` 226 | -------------------------------------------------------------------------------- /integer-match/demo/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "case-study-integer-match" 3 | version = "0.0.0" 4 | authors = ["David Tolnay "] 5 | edition = "2021" 6 | publish = false 7 | 8 | [[bin]] 9 | name = "case-study" 10 | path = "main.rs" 11 | -------------------------------------------------------------------------------- /integer-match/demo/main.rs: -------------------------------------------------------------------------------- 1 | #[macro_export] 2 | macro_rules! themacro { 3 | ($($v:expr),*) => { 4 | $crate::themacro_helper! { 5 | path: (m::X) 6 | def: () 7 | arms: () 8 | $($v),* 9 | } 10 | }; 11 | } 12 | 13 | #[macro_export] 14 | macro_rules! themacro_helper { 15 | ( 16 | path: ($($path:tt)*) 17 | def: ($($def:tt)*) 18 | arms: ($(($i:pat, $v:expr))*) 19 | ) => {{ 20 | #[allow(dead_code)] 21 | mod m { 22 | pub const X: usize = 0; 23 | $($def)* 24 | } 25 | match VALUE { 26 | $( 27 | $i => $v, 28 | )* 29 | _ => unimplemented!(), 30 | } 31 | }}; 32 | ( 33 | path: ($($path:tt)*) 34 | def: ($($def:tt)*) 35 | arms: ($(($i:pat, $v:expr))*) 36 | $next:expr $(, $rest:expr)* 37 | ) => { 38 | $crate::themacro_helper! { 39 | path: (m::$($path)*) 40 | def: (pub mod m { pub const X: usize = super::X + 1; $($def)* }) 41 | arms: ($(($i, $v))* ($($path)*, $next)) 42 | $($rest),* 43 | } 44 | }; 45 | } 46 | 47 | fn main() { 48 | const VALUE: usize = 2; 49 | dbg!(VALUE); 50 | dbg!(themacro!('A', 'B', 'C')); 51 | } 52 | -------------------------------------------------------------------------------- /readonly-fields/README.md: -------------------------------------------------------------------------------- 1 | ## Read-only fields of mutable struct 2 | 3 | In [`oqueue`] I wanted to expose a field of one of the structs in the API, but 4 | not allow it to be mutated even if the caller has &mut access to the 5 | surrounding struct. 6 | 7 | [`oqueue`]: https://github.com/dtolnay/oqueue 8 | 9 |
10 | 11 | ### Rejected approaches 12 | 13 | **Public field.** The field cannot be `pub` because mutating it 14 | directly would enable the caller to violate invariants of the API. 15 | 16 | ```rust 17 | // Bad: caller can mutate, task.index += 1 18 | 19 | pub struct Task { 20 | pub index: usize, 21 | // other private fields 22 | } 23 | ``` 24 | 25 | **Private field, public getter.** This would be the textbook 26 | solution. 27 | 28 | ```rust 29 | // Bad: caller needs to write task.index() instead of task.index 30 | 31 | pub struct Task { 32 | index: usize, 33 | // other private fields 34 | } 35 | 36 | impl Task { 37 | pub fn index(&self) -> usize { 38 | self.index 39 | } 40 | } 41 | ``` 42 | 43 | For the ways that this API is commonly used as an argument to other function 44 | calls, I felt that the additional method call parentheses from the getter would 45 | be noisy and provide zero benefit. Rust users already understand how struct 46 | fields work and would be happy to access this value as a field if I can let 47 | them. From the role of this type in the crate's API it is very unlikely that 48 | someone would want to mutate the field, but still we need to protect against it 49 | for correctness. 50 | 51 |
52 | 53 | ### Background 54 | 55 | The way `.` field access syntax works, if there is no field found with the right 56 | name then the language will look at the type's `Deref` impl or a sequence of 57 | `Deref` impls to determine the field being named. This behavior is important for 58 | making smart pointers like `Box` convenient to use: 59 | 60 | ```rust 61 | // Somewhere in the standard library: 62 | // 63 | // pub struct Box { 64 | // ptr: *mut T, 65 | // } 66 | 67 | struct S { 68 | x: String, 69 | } 70 | 71 | fn f(s: Box) { 72 | // Box has no field called x so it isn't obvious why 73 | // this line would be legal, but Box dereferences to 74 | // S which does have that field. 75 | println!("{}", s.x); 76 | } 77 | ``` 78 | 79 | Importantly for encapsulation, the deref behavior takes place even if a field 80 | with the right name exists on the original type but is private. Suppose that 81 | `Box` were implemented by storing the heap pointer it owns in a private field 82 | called `ptr`. In that case we would still want the following code to refer to 83 | the user's `ptr` field, rather than erroring because `ptr` exists on `Box` and 84 | is private: 85 | 86 | ```rust 87 | struct S { 88 | ptr: *const u8, 89 | } 90 | 91 | fn f(s: Box) { 92 | println!("{:p}", s.ptr); 93 | } 94 | ``` 95 | 96 | The final detail relevant to our original use case is that fields accessed 97 | through a `Deref` impl cannot be mutated unless the outer type also implements 98 | `DerefMut`. The `Deref` method signature looks like `fn deref(&self) -> 99 | &Self::Target` while the `DerefMut` signature looks like `fn deref_mut(&mut 100 | self) -> &mut Self::Target`. 101 | 102 |
103 | 104 | ### First attempt 105 | 106 | We can implement read-only fields by moving the state behind a `Deref` impl to a 107 | type with the appropriate fields public. Without a `DerefMut` impl, this makes 108 | all accessible fields read-only outside of the current module. 109 | 110 | ```rust 111 | pub struct Task { 112 | inner: ReadOnlyTask, 113 | } 114 | 115 | pub struct ReadOnlyTask { 116 | pub index: usize, 117 | // other private fields 118 | } 119 | 120 | impl Deref for Task { 121 | type Target = ReadOnlyTask; 122 | 123 | fn deref(&self) -> &Self::Target { 124 | &self.inner 125 | } 126 | } 127 | ``` 128 | 129 | This is pretty good from the point of view of downstream code. As intended, code 130 | from outside the module can access `task.index` through deref but cannot mutate 131 | `task.index`. 132 | 133 | The big problem with this approach is that it distresses the borrow checker. 134 | From inside the module, if code takes a reference to one of the private fields 135 | through deref, say `&task.other`, deref gets a reference to the whole `&Task` 136 | which precludes then mutating some different fields while retaining the 137 | reference. 138 | 139 | ```console 140 | error[E0506]: cannot assign to `task.inner.another` because it is borrowed 141 | --> src/main.rs:8:5 142 | | 143 | 7 | let other = &task.other; 144 | | ---- borrow of `task.inner.another` occurs here 145 | 8 | task.inner.another = 1; 146 | | ^^^^^^^^^^^^^^^^^^^^^^ assignment to borrowed `task.inner.another` occurs here 147 | ``` 148 | 149 | To work around this, practically all code within the module would need to be 150 | written in terms of `task.inner.*` explicitly rather than relying on derefs, 151 | which is unpleasant. 152 | 153 |
154 | 155 | ### Second attempt 156 | 157 | We can keep the original struct but dereference to a struct with the same memory 158 | layout and public fields, still not implementing `DerefMut`. 159 | 160 | For this to be sound, we need to guarantee that both copies of the struct have 161 | the same layout in memory. This is *not* guaranteed just by having the same 162 | fields with the same types in both. One way to do it is by using `#[repr(C)]` to 163 | tie both structs to C's struct layout rules, because those do guarantee the same 164 | layout for structs with identical fields. 165 | 166 | ```rust 167 | #[repr(C)] 168 | pub struct Task { 169 | index: usize, 170 | // other private fields 171 | } 172 | 173 | #[repr(C)] 174 | pub struct ReadOnlyTask { 175 | pub index: usize, 176 | // the same private fields 177 | } 178 | 179 | impl Deref for Task { 180 | type Target = ReadOnlyTask; 181 | 182 | fn deref(&self) -> &Self::Target { 183 | unsafe { &*(self as *const Self as *const Self::Target) } 184 | } 185 | } 186 | ``` 187 | 188 | This works as intended. Code from inside this module can access and mutate the 189 | private `task.index` directly, while code from outside the module can access 190 | `task.index` through `Deref` and cannot mutate it even if the `Task` they hold 191 | is mutable. 192 | 193 | ```console 194 | error[E0594]: cannot assign to data in a `&` reference 195 | --> main.rs:8:5 196 | | 197 | 8 | task.index += 1; 198 | | ^^^^^^^^^^^^^^^ cannot assign 199 | ``` 200 | 201 | But this is not a complete solution because we really want the field to appear 202 | as a public field in Rustdoc so that readers of the documentation immediately 203 | understand how to use it. The documentation experience should be as though this 204 | field were declared `pub`. 205 | 206 |
207 | 208 | ### Third attempt 209 | 210 | We can use [`#[cfg(doc)]`][cfgdoc] to distinguish when documentation is being 211 | rendered, which is available since Rust 1.41. 212 | 213 | [cfgdoc]: https://doc.rust-lang.org/1.67.0/rustdoc/advanced-features.html#cfgdoc-documenting-platform-specific-or-feature-specific-information 214 | 215 | ```rust 216 | #[repr(C)] 217 | pub struct Task { 218 | #[cfg(doc)] 219 | pub index: usize, 220 | 221 | #[cfg(not(doc))] 222 | index: usize, 223 | 224 | // other private fields 225 | } 226 | 227 | #[doc(hidden)] 228 | #[repr(C)] 229 | pub struct ReadOnlyTask { 230 | pub index: usize, 231 | // the same private fields 232 | } 233 | 234 | #[doc(hidden)] 235 | impl Deref for Task { 236 | type Target = ReadOnlyTask; 237 | 238 | fn deref(&self) -> &Self::Target { 239 | unsafe { &*(self as *const Self as *const Self::Target) } 240 | } 241 | } 242 | ``` 243 | 244 | This renders as intended in rustdoc as: 245 | 246 | ```console 247 | pub struct Task { 248 | pub index: usize, 249 | // some fields omitted 250 | } 251 | ``` 252 | 253 | so readers immediately know how to access the field. From the role of this type 254 | in the crate's API it is unlikely that anyone would want to mutate the field, 255 | but just in case, the field's documentation points out that it is read-only. 256 | 257 |
258 | 259 | ### Implementation 260 | 261 | Once the right strategy for generated code has been worked out, [productizing 262 | the behavior as an attribute macro][readonly] is the easy part: 263 | 264 | [readonly]: https://github.com/dtolnay/readonly 265 | 266 | ```rust 267 | /// ... 268 | #[readonly::make] 269 | pub struct Task { 270 | /// ... 271 | /// 272 | /// This field is read-only; writing to its value will not compile. 273 | pub index: usize, 274 | 275 | // other private fields 276 | } 277 | ``` 278 | -------------------------------------------------------------------------------- /readonly-fields/demo/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "case-study-readonly-fields" 3 | version = "0.0.0" 4 | authors = ["David Tolnay "] 5 | edition = "2021" 6 | publish = false 7 | 8 | [[bin]] 9 | name = "case-study" 10 | path = "main.rs" 11 | -------------------------------------------------------------------------------- /readonly-fields/demo/main.rs: -------------------------------------------------------------------------------- 1 | pub use oqueue::Task; 2 | 3 | mod oqueue { 4 | use core::ops::Deref; 5 | 6 | #[derive(Default)] 7 | #[repr(C)] 8 | pub struct Task { 9 | #[cfg(doc)] 10 | pub index: usize, 11 | 12 | #[cfg(not(doc))] 13 | index: usize, 14 | 15 | // Other private fields: 16 | q: usize, 17 | } 18 | 19 | #[doc(hidden)] 20 | #[repr(C)] 21 | pub struct ReadOnlyTask { 22 | pub index: usize, 23 | 24 | // The same private fields: 25 | q: usize, 26 | } 27 | 28 | #[doc(hidden)] 29 | impl Deref for Task { 30 | type Target = ReadOnlyTask; 31 | 32 | fn deref(&self) -> &Self::Target { 33 | unsafe { &*(self as *const Self as *const Self::Target) } 34 | } 35 | } 36 | 37 | #[allow(dead_code)] 38 | pub fn from_within_module(task: &mut Task) { 39 | task.index += 1; 40 | } 41 | } 42 | 43 | fn from_outside_module(task: &mut Task) { 44 | task.index += 1; // cannot assign 45 | } 46 | 47 | fn main() { 48 | let mut task = Task::default(); 49 | oqueue::from_within_module(&mut task); 50 | from_outside_module(&mut task); 51 | } 52 | -------------------------------------------------------------------------------- /unit-type-parameters/README.md: -------------------------------------------------------------------------------- 1 | ## Unit struct with type parameters 2 | 3 | [`PhantomData`] is a lang item which means it is currently implemented using 4 | dedicated logic in the compiler, but it turns out all of its behavior can be 5 | implemented from ordinary Rust code. This gives a good opportunity to explore 6 | namespaces in Rust name resolution. 7 | 8 | [`PhantomData`]: https://doc.rust-lang.org/std/marker/struct.PhantomData.html 9 | 10 | The defining characteristic of `PhantomData` is that it is a unit struct with a 11 | type parameter, which is not otherwise allowed by Rust. 12 | 13 | ```rust 14 | struct MyPhantomData; 15 | 16 | fn main() { 17 | let _: MyPhantomData = MyPhantomData; 18 | } 19 | ``` 20 | 21 | ```console 22 | error[E0392]: parameter `T` is never used 23 | --> src/main.rs:1:22 24 | | 25 | 1 | struct MyPhantomData; 26 | | ^ unused parameter 27 | | 28 | = help: consider removing `T` or using a marker such as `std::marker::PhantomData` 29 | ``` 30 | 31 | This is a hard error, not a warning that can be suppressed like some other lints 32 | about unused code. Rust needs to insist on all type parameters appearing somehow 33 | in the data structure because it is critical for determining [variance]. 34 | 35 | [variance]: https://doc.rust-lang.org/nomicon/subtyping.html 36 | 37 | We will develop an attribute macro to make this work by assuming covariance for 38 | the type parameter the same as `PhantomData`. As always, the hard part is 39 | figuring out what code to generate, not writing the macro. 40 | 41 | ```rust 42 | #[phantom] 43 | struct MyPhantomData; 44 | 45 | fn main() { 46 | let _: MyPhantomData = MyPhantomData; 47 | } 48 | ``` 49 | 50 | Solving this functionality opens some interesting design possibilities for 51 | libraries that want something that is usable like `PhantomData` but is a locally 52 | defined type, meaning the library can control the impl of traits like 53 | `IntoIterator` on it. The iteration API of [`inventory`] is an example of such a 54 | type in a public crate. 55 | 56 | [`inventory`]: https://github.com/dtolnay/inventory 57 | 58 |
59 | 60 | ### Background 61 | 62 | Names of things in Rust exist in one of three namespaces: 63 | 64 | - The type namespace: structs, enums, unions, traits, modules, enum variants. 65 | 66 | - The value namespace: functions, local variables, statics, consts, tuple struct 67 | constructors, unit struct instances, tuple variant constructors, unit 68 | variants instances. 69 | 70 | - The macro namespace: macro\_rules macros, function-like procedural macros, 71 | attribute macros, derive macros. 72 | 73 | The following is not a precise rule, but the intuition is that something exists 74 | in the type namespace if you can write: 75 | 76 | ```rust 77 | let _: TYPE; 78 | ``` 79 | 80 | while something exists in the value namespace if you can write: 81 | 82 | ```rust 83 | let _ = VALUE; 84 | ``` 85 | 86 | These two syntactic positions are always unambiguous in the Rust grammar, so 87 | permitting the same name to refer to different things in each namespace does not 88 | introduce ambiguity. 89 | 90 | It is possible to have the same name refer to different things in all three 91 | namespaces at once: 92 | 93 | ```rust 94 | // X in the macro namespace 95 | macro_rules! X { 96 | () => {}; 97 | } 98 | 99 | // X in the type namespace 100 | struct X {} 101 | 102 | // X in the value namespace 103 | const X: () = (); 104 | 105 | fn main() { 106 | // unambiguously the macro X 107 | X!(); 108 | 109 | // unambiguously the type X 110 | let _: X; 111 | 112 | // unambiguously the value X 113 | let _ = X; 114 | } 115 | ``` 116 | 117 | Some definitions place a name into more than one namespace. For example unit 118 | structs (`struct S;`) and tuple structs (`struct S(A, B);`) are both types and 119 | values. The value corresponding to a unit struct is like a constant whose value 120 | is that unit struct, and the value corresponding to a tuple struct is like a 121 | function that takes the tuple elements and returns the tuple struct. 122 | 123 | Braced structs (`struct S { a: A }`) are types only. 124 | 125 |
126 | 127 | ### Strategy 128 | 129 | `PhantomData`, being a unit struct, consists of a type component and a value 130 | component. When you write `use std::marker::PhantomData` you are importing both. 131 | 132 | ```rust 133 | use std::marker::PhantomData; 134 | 135 | fn main() { 136 | let _: PhantomData = PhantomData::; 137 | } 138 | ``` 139 | 140 | In implementing our own `PhantomData` we will tackle the two namespaces one 141 | after the other. 142 | 143 | In the value namespace we will need something that makes the following valid: 144 | 145 | ```rust 146 | fn main() { 147 | let _ = MyPhantomData::; 148 | } 149 | ``` 150 | 151 | And in the type namespace we will need something for this: 152 | 153 | ```rust 154 | fn main() { 155 | let _: MyPhantomData; 156 | } 157 | ``` 158 | 159 | Independently these would be easy, but the hard part will be making it so that 160 | `MyPhantomData::` as a value has a type that matches 161 | `MyPhantomData`. 162 | 163 | ```rust 164 | fn main() { 165 | let _: MyPhantomData = MyPhantomData::; 166 | } 167 | ``` 168 | 169 |
170 | 171 | ### Value namespace 172 | 173 | In the value namespace basically our only tool relevant to this project is unit 174 | variants. The other obvious candidates in the value namespace (statics and 175 | consts) cannot carry a type parameter. 176 | 177 | You may be familiar with type parameters on unit variants already, maybe without 178 | thinking about it, from dealing with `Option`: 179 | 180 | ```rust 181 | fn main() { 182 | let mut x = None::; 183 | 184 | // equivalent to: 185 | let mut x: Option = None; 186 | } 187 | ``` 188 | 189 | Here is how we would make a unit variant with a type parameter that can be 190 | imported and used in value position: 191 | 192 | ```rust 193 | mod phantom { 194 | pub use self::ImplementationDetail::MyPhantomData; 195 | 196 | pub enum ImplementationDetail { 197 | MyPhantomData, 198 | 199 | #[allow(dead_code)] 200 | #[doc(hidden)] 201 | Marker(*const T), 202 | } 203 | } 204 | 205 | use phantom::MyPhantomData; 206 | 207 | fn main() { 208 | let _ = MyPhantomData::; 209 | } 210 | ``` 211 | 212 | The marker variant is responsible for using the type parameter `T` in some way 213 | that gives it the right variance. There are many correct alternatives but I made 214 | it hold `*const T` as one example of a type that is covariant in `T` and works 215 | with dynamically sized `T: ?Sized`. We will come back to autotrait impls later. 216 | 217 |
218 | 219 | ### Type namespace 220 | 221 | Clearly in the previous section the type of the enum variant 222 | `MyPhantomData::` is the enum type `ImplementationDetail`. We just 223 | need to call it something else, namely `MyPhantomData`. 224 | 225 | Changing the name doesn't immediately work. 226 | 227 | ```rust 228 | mod phantom { 229 | pub use self::MyPhantomData::MyPhantomData; 230 | 231 | pub enum MyPhantomData { 232 | MyPhantomData, 233 | 234 | #[allow(dead_code)] 235 | #[doc(hidden)] 236 | Marker(*const T), 237 | } 238 | } 239 | ``` 240 | 241 | ```console 242 | error[E0255]: the name `MyPhantomData` is defined multiple times 243 | --> src/main.rs:4:5 244 | | 245 | 2 | pub use self::MyPhantomData::MyPhantomData; 246 | | ---------------------------------- previous import of the type `MyPhantomData` here 247 | 3 | 248 | 4 | pub enum MyPhantomData { 249 | | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ `MyPhantomData` redefined here 250 | | 251 | = note: `MyPhantomData` must be defined only once in the type namespace of this module 252 | help: you can use `as` to change the binding name of the import 253 | | 254 | 2 | pub use self::MyPhantomData::MyPhantomData as OtherMyPhantomData; 255 | | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 256 | ``` 257 | 258 | The behavior seen here is that all enum variants of any style (struct variant, 259 | tuple variant, unit variant) occupy both the value namespace and the type 260 | namespace. Our code had defined `enum MyPhantomData` as a type, but then 261 | imported `self::MyPhantomData::MyPhantomData` which is both a value and type, 262 | resulting in a conflict in the type namespace. 263 | 264 | Naively we might expect that unit variants and tuple variants occupy only the 265 | value namespace while struct variants occupy only the type namespace. Unit 266 | variants necessarily need something in the value namespace through which you 267 | refer to their value, and tuple variants necessarily need something in the value 268 | namespace that behaves like a function through which you construct them. And 269 | struct variants need something to make curly brace initialization work, which 270 | seems like it should be the type namespace because plain structs with named 271 | fields exist in the type namespace only. But apparently this is not how things 272 | work -- maybe to leave things open for language evolution in which enum variants 273 | become usable as refinement types. 274 | 275 | In any case, the way to work around conflicts is via wildcard imports. These are 276 | allowed to overlap with non-wildcard imports or explicit definitions, in which 277 | case the non-wildcard takes precedence. The precedence applies independently 278 | within each namespace. 279 | 280 | ```rust 281 | mod phantom { 282 | // Imports the enum variant in both type and value namespace, 283 | // but in the type namespace it gets shadowed by the definition 284 | // `enum MyPhantomData` below. 285 | pub use self::MyPhantomData::*; 286 | 287 | pub enum MyPhantomData { 288 | MyPhantomData, 289 | 290 | #[allow(dead_code)] 291 | #[doc(hidden)] 292 | Marker(*const T), 293 | } 294 | } 295 | 296 | use phantom::MyPhantomData; 297 | 298 | fn main() { 299 | let _: MyPhantomData = MyPhantomData::; 300 | } 301 | ``` 302 | 303 | Pretty neat! There are some quirks to sort out still, but this is on the right 304 | track. 305 | 306 |
307 | 308 | ### Memory representation 309 | 310 | We want `std::mem::size_of::>() == 0`. 311 | 312 | In the definition above, it would currently be a whopping 16 or 24 bytes 313 | depending on whether `T` is dynamically sized. The marker variant takes up space 314 | for a pointer or fat pointer, and there is an enum discriminant as well which 315 | needs 1 bit, and we get a further 63 bits of padding for alignment reasons. 316 | 317 | Two things need to change: we need the marker variant not to contain storage, 318 | and we need the discriminant not to exist. 319 | 320 | We can eliminate the discriminant by making the marker variant's data zero sized 321 | and statically impossible. The compiler is smart enough to elide the 322 | discriminant when this happens. 323 | 324 | For various complicated but reasonably good reasons, just making the data 325 | impossible without making it zero sized (such as `Marker(Void, *const T)`) is 326 | not sufficient. 327 | 328 | ```rust 329 | mod phantom { 330 | pub use self::MyPhantomData::*; 331 | 332 | pub enum MyPhantomData { 333 | MyPhantomData, 334 | 335 | #[allow(dead_code)] 336 | #[doc(hidden)] 337 | Marker(Void, [*const T; 0]), 338 | } 339 | 340 | pub enum Void {} 341 | } 342 | 343 | use phantom::MyPhantomData; 344 | 345 | fn main() { 346 | assert_eq!(std::mem::size_of::>(), 0); 347 | } 348 | ``` 349 | 350 |
351 | 352 | ### Autotraits 353 | 354 | The standard library's `PhantomData` has `impl Send` and 355 | `impl Sync`. Our type so far has neither of these because 356 | `*const T` does not. 357 | 358 | A simple fix would be `Marker(Void, [Box; 0])` but then we depend on a memory 359 | allocator for no reason. This fix works because `Box` has the same `Send` and 360 | `Sync` impls as `T`. 361 | 362 | Without `Box`, the same impls can be written unsafely. 363 | 364 | ```rust 365 | mod phantom { 366 | pub use self::MyPhantomData::*; 367 | 368 | pub enum MyPhantomData { 369 | MyPhantomData, 370 | 371 | #[allow(dead_code)] 372 | #[doc(hidden)] 373 | Marker(Void, [*const T; 0]), 374 | } 375 | 376 | pub enum Void {} 377 | 378 | unsafe impl Send for MyPhantomData {} 379 | unsafe impl Sync for MyPhantomData {} 380 | } 381 | ``` 382 | 383 |
384 | 385 | ### Documentation 386 | 387 | Rustdoc would render our type as: 388 | 389 | ```console 390 | pub enum MyPhantomData { 391 | MyPhantomData, 392 | // some variants omitted 393 | } 394 | ``` 395 | 396 | which is technically accurate, but misleading relative to how we want users to 397 | conceptualize this construct. 398 | 399 | There isn't a great solution to this, but you may or may not find the following 400 | more appealing: 401 | 402 | ```rust 403 | mod phantom { 404 | pub use self::MyPhantomData::*; 405 | 406 | pub enum MyPhantomData { 407 | MyPhantomData, 408 | 409 | #[allow(dead_code)] 410 | #[doc(hidden)] 411 | Marker(Void, [*const T; 0]), 412 | } 413 | 414 | pub enum Void {} 415 | 416 | unsafe impl Send for MyPhantomData {} 417 | unsafe impl Sync for MyPhantomData {} 418 | } 419 | 420 | /// ... documentation illustrating how to use. 421 | #[allow(type_alias_bounds)] 422 | pub type MyPhantomData = phantom::MyPhantomData; 423 | 424 | #[doc(hidden)] 425 | pub use self::phantom::*; 426 | ``` 427 | 428 | Rustdoc renders: 429 | 430 | ```console 431 | type MyPhantomData = MyPhantomData; 432 | ``` 433 | 434 | which hides the implementation detail and drives focus to your handwritten 435 | documentation to show how the type is intended to be used. 436 | 437 | The `#[allow(type_alias_bounds)]` attribute suppresses a future compatibility 438 | lint that triggers on type aliases with trait bounds on the left hand side. The 439 | Rust compiler currently does not respect such bounds but this behavior is 440 | considered a compiler bug and is subject to change, potentially breaking code 441 | involving trait bounds in type aliases -- hence the lint. Our code above is in 442 | the clear because the bounds in the type alias exactly match the bounds implied 443 | by well-formedness of the right hand side, so the meaning is the same whether or 444 | not the compiler looks at the type alias bounds. We want the bounds there 445 | because they do appear correctly in Rustdoc. 446 | 447 |
448 | 449 | ### Implementation 450 | 451 | Once the generated code is figured out, packaging this into [an attribute 452 | macro][ghost] is the easy part. 453 | 454 | [ghost]: https://github.com/dtolnay/ghost 455 | 456 | ```rust 457 | /// ... documentation illustrating how to use. 458 | #[phantom] 459 | struct MyPhantomData; 460 | ``` 461 | 462 | In fact we might as well make it work for any number of type parameters and 463 | lifetimes, as well as trait bounds and where-clauses. 464 | 465 | ```rust 466 | #[phantom] 467 | struct Crazy<'a, V: 'a, T> where &'a V: IntoIterator; 468 | ``` 469 | -------------------------------------------------------------------------------- /unit-type-parameters/demo/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "case-study-unit-type-parameters" 3 | version = "0.0.0" 4 | authors = ["David Tolnay "] 5 | edition = "2021" 6 | publish = false 7 | 8 | [[bin]] 9 | name = "case-study" 10 | path = "main.rs" 11 | -------------------------------------------------------------------------------- /unit-type-parameters/demo/main.rs: -------------------------------------------------------------------------------- 1 | mod phantom { 2 | pub use self::MyPhantomData::*; 3 | 4 | pub enum MyPhantomData { 5 | MyPhantomData, 6 | 7 | #[allow(dead_code)] 8 | #[doc(hidden)] 9 | Marker(Void, [*const T; 0]), 10 | } 11 | 12 | pub enum Void {} 13 | 14 | unsafe impl Send for MyPhantomData {} 15 | unsafe impl Sync for MyPhantomData {} 16 | } 17 | 18 | /// ... documentation illustrating how to use. 19 | #[allow(type_alias_bounds)] 20 | pub type MyPhantomData = phantom::MyPhantomData; 21 | 22 | #[doc(hidden)] 23 | pub use self::phantom::*; 24 | 25 | fn main() { 26 | let _: MyPhantomData = MyPhantomData::; 27 | } 28 | --------------------------------------------------------------------------------