├── .github
├── FUNDING.yml
└── workflows
│ └── ci.yml
├── .gitignore
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── autoref-specialization
└── README.md
├── bitfield-assertion
├── README.md
└── demo
│ ├── Cargo.toml
│ ├── bitfield
│ ├── Cargo.toml
│ └── src
│ │ └── lib.rs
│ ├── impl
│ ├── Cargo.toml
│ └── src
│ │ └── lib.rs
│ └── main.rs
├── callable-types
├── README.md
└── demo
│ ├── Cargo.toml
│ └── main.rs
├── function-epilogue
├── README.md
└── demo
│ ├── Cargo.toml
│ └── main.rs
├── integer-match
├── README.md
└── demo
│ ├── Cargo.toml
│ └── main.rs
├── readonly-fields
├── README.md
└── demo
│ ├── Cargo.toml
│ └── main.rs
└── unit-type-parameters
├── README.md
└── demo
├── Cargo.toml
└── main.rs
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: dtolnay
2 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | push:
5 | pull_request:
6 | workflow_dispatch:
7 | schedule: [cron: "40 1 * * *"]
8 |
9 | permissions:
10 | contents: read
11 |
12 | env:
13 | RUSTFLAGS: -Dwarnings
14 |
15 | jobs:
16 | pre_ci:
17 | uses: dtolnay/.github/.github/workflows/pre_ci.yml@master
18 |
19 | check:
20 | name: Rust ${{matrix.rust}}
21 | needs: pre_ci
22 | if: needs.pre_ci.outputs.continue
23 | runs-on: ubuntu-latest
24 | strategy:
25 | fail-fast: false
26 | matrix:
27 | rust: [nightly, beta, stable]
28 | timeout-minutes: 45
29 | steps:
30 | - uses: actions/checkout@v4
31 | - uses: dtolnay/rust-toolchain@master
32 | with:
33 | toolchain: ${{matrix.rust}}
34 | - run: cargo check --workspace --exclude case-study-bitfield-assertion --exclude case-study-readonly-fields
35 | - uses: actions/upload-artifact@v4
36 | if: matrix.rust == 'nightly' && always()
37 | with:
38 | name: Cargo.lock
39 | path: Cargo.lock
40 | continue-on-error: true
41 |
42 | outdated:
43 | name: Outdated
44 | runs-on: ubuntu-latest
45 | if: github.event_name != 'pull_request'
46 | timeout-minutes: 45
47 | steps:
48 | - uses: actions/checkout@v4
49 | - uses: dtolnay/rust-toolchain@stable
50 | - uses: dtolnay/install@cargo-outdated
51 | - run: cargo outdated --workspace --exit-code 1
52 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | Cargo.lock
2 | target/
3 |
--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [workspace]
2 | members = [
3 | "bitfield-assertion/demo",
4 | "bitfield-assertion/demo/bitfield",
5 | "bitfield-assertion/demo/impl",
6 | "callable-types/demo",
7 | "function-epilogue/demo",
8 | "integer-match/demo",
9 | "readonly-fields/demo",
10 | "unit-type-parameters/demo",
11 | ]
12 | resolver = "2"
13 |
--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | Permission is hereby granted, free of charge, to any
2 | person obtaining a copy of this software and associated
3 | documentation files (the "Software"), to deal in the
4 | Software without restriction, including without
5 | limitation the rights to use, copy, modify, merge,
6 | publish, distribute, sublicense, and/or sell copies of
7 | the Software, and to permit persons to whom the Software
8 | is furnished to do so, subject to the following
9 | conditions:
10 |
11 | The above copyright notice and this permission notice
12 | shall be included in all copies or substantial portions
13 | of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 | DEALINGS IN THE SOFTWARE.
24 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Case studies
2 |
3 | This repository showcases some examples of tricky Rust code that I have
4 | encountered during my years working with a variety of advanced macro libraries
5 | in Rust (my own and others').
6 |
7 |
8 |
9 |
10 | # [:postal\_horn:](#the-point) The point
11 |
12 | This project is dedicated to the one profound insight about Rust macro
13 | development: the difference between someone who is competent with macros vs an
14 | expert at macros mostly has nothing to do with how good they are "at macros".
15 |
16 | 90% of what enables people to push the limits of possibility in pursuit of a
17 | powerful and user-friendly macro library API is in their mastery of everything
18 | else about Rust outside of macros, and their creativity to put together ordinary
19 | language features in interesting ways that may not occur in handwritten code.
20 |
21 | You may occasionally come across Rust macros that you feel are really advanced
22 | or magical. If you ever feel this way, I encourage you to take a closer look and
23 | you'll discover that as far as the macro implementation itself is concerned,
24 | none of those libraries are doing anything remotely interesting. If it is a
25 | procedural macro, they always just parse some input in a boring way, crawl some
26 | syntax trees in a boring way to find out about the input, and paste together
27 | some output code in a boring way exactly like what you would learn in a few
28 | hours by working through any part of my [procedural macro workshop][workshop].
29 | If it is a macro\_rules macro, everything is conceptually just as boring but
30 | when stretched to its limits it becomes a write-only syntax that poses a
31 | challenge for even the author to follow and understand later, let alone someone
32 | else not already fluent in the basics of macro\_rules.
33 |
34 | To the extent that there are any tricks to macro development, all of them
35 | revolve around *what* code the macros emit, not *how* the macros emit the code.
36 | This realization can be surprising to people who entered into macro development
37 | with a vague notion of procedural macros as a "compiler plugin" which they
38 | imagine must imply all sorts of complicated APIs for *how* to integrate with the
39 | rest of the compiler. That's not how it works. The only thing macros do is emit
40 | code that could have been written by hand. If you couldn't have come up with
41 | some piece of tricky code from one of those magical macros, learning more "about
42 | macros" won't change that; but learning more about every other part of Rust
43 | will. Inversely, once you come up with what code you want to generate, writing
44 | the macro to generate it is generally the easy part.
45 |
46 | [workshop]: https://github.com/dtolnay/proc-macro-workshop
47 |
48 |
49 |
50 |
51 | # [:boot:](#focus) Focus
52 |
53 | Yes, these case studies are drawn from use cases that arise from work on macros,
54 | but the macros are never the interesting part. The ingenuity and sophistication
55 | always lie in what Rust code ultimately gets emitted by the macro, and I think
56 | you will find that those are fully possible to appreciate even if you know
57 | nothing about macros.
58 |
59 | To that end, I make an effort to minimize the role of macros in these case
60 | studies. For each one I give only enough context about the relevant macro to
61 | explain a set of constraints that the generated code will need to comply with.
62 | The focus is on the generated code, which somehow solves the constraints using a
63 | clever combination of Rust language features unrelated to macros. Lastly and
64 | least importantly, I tie it back to the macro to point out that making a macro
65 | produce the generated code we came up with would be the easy part.
66 |
67 | Read and enjoy; I hope you find these an enlightening window into this corner of
68 | Rust that has so far not been put into words.
69 |
70 |
71 |
72 |
73 | # [:jack\_o\_lantern:](#case-studies) Case studies
74 |
75 |
80 |
81 |
86 |
87 |
92 |
93 |
98 |
99 |
104 |
105 |
110 |
111 |
116 |
117 |
118 |
119 | #### License
120 |
121 |
122 | Licensed under either of Apache License, Version
123 | 2.0 or MIT license at your option.
124 |
125 |
126 |
127 |
128 |
129 | Unless you explicitly state otherwise, any contribution intentionally submitted
130 | for inclusion in this project by you, as defined in the Apache-2.0 license,
131 | shall be dual licensed as above, without any additional terms or conditions.
132 |
133 |
--------------------------------------------------------------------------------
/autoref-specialization/README.md:
--------------------------------------------------------------------------------
1 | ## Autoref-based stable specialization
2 |
3 | "Specialization" refers to permitting overlapping impls in Rust's trait system
4 | so long as for every possible type, one of the applicable impls is "more
5 | specific" than the others for some intuitive but precisely defined notion of
6 | specific. Discussions about a specialization language feature have been ongoing
7 | for 4.5 years ([RFC 1210], [rust-lang/rust#31844]). Today the feature is
8 | partially implemented in rustc but is not yet sound when mixed with lifetimes
9 | ([rust-lang/rust#40582]) and requires more language design work and compiler
10 | work before it could be stabilized.
11 |
12 | [RFC 1210]: https://github.com/rust-lang/rfcs/pull/1210
13 | [rust-lang/rust#31844]: https://github.com/rust-lang/rust/issues/31844
14 | [rust-lang/rust#40582]: https://github.com/rust-lang/rust/issues/40582
15 |
16 | This page covers a stable, safe, generalizable technique for solving some of the
17 | use cases that would otherwise be blocked on specialization.
18 |
19 | The technique was originally developed for use by macros in the [Anyhow] crate.
20 |
21 | [Anyhow]: https://github.com/dtolnay/anyhow
22 |
23 |
24 |
25 | ### Context
26 |
27 | I'll explain the technique as applied to two use cases, one simpler to start
28 | with and then a more elaborate realistic one.
29 |
30 | The first use case is going to be a truly canonical application of
31 | specialization — a blanket impl with a separate fast path for some
32 | concrete type(s). The equivalent nightly-only specialized blanket impl would be
33 | like this:
34 |
35 | ```rust
36 | #![feature(specialization)]
37 |
38 | use std::fmt::{Display, Write};
39 |
40 | pub trait MyToString {
41 | fn my_to_string(&self) -> String;
42 | }
43 |
44 | // General impl that applies to any T with a Display impl.
45 | impl MyToString for T {
46 | default fn my_to_string(&self) -> String {
47 | let mut buf = String::new();
48 | buf.write_fmt(format_args!("{}", self)).unwrap();
49 | buf.shrink_to_fit();
50 | buf
51 | }
52 | }
53 |
54 | // Specialized impl to bypass the relatively expensive std::fmt machinery.
55 | impl MyToString for String {
56 | fn my_to_string(&self) -> String {
57 | self.clone()
58 | }
59 | }
60 | ```
61 |
62 | Then the second use case will be closer to the real-life usage of this technique
63 | in Anyhow. We have an error type, and we want it to be constructible from any
64 | underlying type that has a `Display` impl. But if the underlying type *also* has
65 | a `std::error::Error` impl, we'd like to know about that by invoking a different
66 | constructor which will propagate the original error's source() and backtrace()
67 | information correctly.
68 |
69 | Ultimately we want both of the following to compile:
70 |
71 | ```rust
72 | fn demo1() -> Result<(), anyhow::Error> {
73 | // Turn a &str into an error.
74 | // &str implements Display but not std::error::Error.
75 | return Err(anyhow!("oh no!"));
76 | }
77 |
78 | fn demo2() -> Result<(), anyhow::Error> {
79 | // Turn an existing std::error::Error value into our error without
80 | // losing its source() and backtrace() if there is one.
81 | let io_error = fs::read("/tmp/nonexist").unwrap_err();
82 | return Err(anyhow!(io_error));
83 | }
84 | ```
85 |
86 | Recall that `std::error::Error` has `Display` as a supertrait so the impl for
87 | `std::error::Error` is strictly more specific than the general impl that covers
88 | all `Display` types.
89 |
90 | ```rust
91 | #![feature(specialization)]
92 |
93 | use std::error::Error as StdError;
94 | use std::fmt::Display;
95 |
96 | pub struct Error(/* ... */);
97 |
98 | impl Error {
99 | pub(crate) fn from_fmt(error: T) -> Self {...}
100 | pub(crate) fn from_std_error(error: T) -> Self {...}
101 | }
102 |
103 | pub(crate) trait AnyhowNew {
104 | fn new(self) -> Error;
105 | }
106 |
107 | impl AnyhowNew for T {
108 | default fn new(self) -> Error {
109 | // no std error impl
110 | Error::from_fmt(self)
111 | }
112 | }
113 |
114 | impl AnyhowNew for T {
115 | fn new(self) -> Error {
116 | // able to use std error's source() and backtrace()
117 | Error::from_std_error(self)
118 | }
119 | }
120 | ```
121 |
122 |
123 |
124 | ### Background: autoref
125 |
126 | To do specialization using only 100% stable and 100% safe code, we'll need some
127 | other mechanism to accomplish compile-time fallback through a prioritized
128 | sequence of behaviors. That is, we need some way to define a general impl and a
129 | tree of more specific impls where any invocation will resolve to the most
130 | specific applicable impl at compile time.
131 |
132 | Outside of `feature(specialization)`, Rust has at least one other language
133 | feature capable of doing this, which is method resolution autoref.
134 |
135 | As an introduction to autoref let's consider this program:
136 |
137 | ```rust
138 | struct Value(i32);
139 |
140 | impl Value {
141 | fn print(&self) {
142 | println!("it worked! {}", self.0);
143 | }
144 | }
145 |
146 | fn main() {
147 | let v = Value(0);
148 | v.print();
149 | }
150 | ```
151 |
152 | We make a variable `v` of type `Value` and call a method on it. If you've
153 | written any Rust code it will be obvious to you *that* this code works, but I'd
154 | like to dig into *why* it works. In particular, we have a value of type `Value`
155 | but the method `print` takes an argument of type `&Value`. Where is the code
156 | that turns `Value` into `&Value`?
157 |
158 | This is autoref — the compiler is inserting the required reference for you
159 | as part of resolving the method call. In effect, the code that executes is
160 | equivalent to if we had written `(&v).print()` or more explicitly
161 | `Value::print(&v)`, but it is "auto" because we never had to write `&` in the
162 | call.
163 |
164 | Note: autoref is not the same as deref, which is a different thing that method
165 | resolution does. In a way they are opposites; autoref is about *adding* a layer
166 | of reference to resolve a call; deref is about *removing* a layer of reference.
167 | Both are ubiquitous but invisible.
168 |
169 |
170 |
171 | ### Background: method resolution
172 |
173 | How does autoref get us stable specialization? To answer that, let's look at
174 | what happens if the same method name could be dispatched either with or without
175 | autoref.
176 |
177 | ```rust
178 | struct Value;
179 |
180 | trait Print {
181 | fn print(self);
182 | }
183 |
184 | impl Print for Value {
185 | fn print(self) {
186 | println!("called on Value");
187 | }
188 | }
189 |
190 | impl Print for &Value {
191 | fn print(self) {
192 | println!("called on &Value");
193 | }
194 | }
195 |
196 | fn main() {
197 | let v = Value;
198 | v.print();
199 | }
200 | ```
201 |
202 | Here `print` could refer to either `::print` which takes an
203 | argument of type `Value`, or to `<&Value as Print>::print` which takes an
204 | argument of type `&Value`. If you run this program you'll see it prints "called
205 | on Value". But if the first impl were removed, it would then print "called on
206 | &Value". In some sense the first impl is more specific from the point of
207 | view of the call we wrote; exactly what we'll need!
208 |
209 | To define the compiler's behavior more precisely, the rule is that if a method
210 | can be dispatched without autoref then it will be. Only if a method cannot be
211 | dispatched without autoref, the compiler will insert an autoref and attempt to
212 | resolve it again.
213 |
214 | This and some creativity should be all we need to solve the use cases that we
215 | saw up top.
216 |
217 |
218 |
219 | ### Simple application
220 |
221 | Recall that we have a String conversion that we wanted to implement in one way
222 | for any `T: Display` and in a more performant specialized way for specifically
223 | `String`.
224 |
225 | Here is the full implementation:
226 |
227 | ```rust
228 | use std::fmt::{Display, Write};
229 |
230 | pub trait DisplayToString {
231 | fn my_to_string(&self) -> String;
232 | }
233 |
234 | // General impl that applies to any T with a Display impl.
235 | //
236 | // Note that the Self type of this impl is &T and so the method argument
237 | // is actually &&T! That makes this impl lower priority during method
238 | // resolution if the impl that accepts &String would also apply.
239 | impl DisplayToString for &T {
240 | fn my_to_string(&self) -> String {
241 | println!("called blanket impl");
242 |
243 | let mut buf = String::new();
244 | buf.write_fmt(format_args!("{}", self)).unwrap();
245 | buf.shrink_to_fit();
246 | buf
247 | }
248 | }
249 |
250 | pub trait StringToString {
251 | fn my_to_string(&self) -> String;
252 | }
253 |
254 | // Specialized impl to bypass the relatively expensive std::fmt machinery.
255 | //
256 | // The method argument is typed &String.
257 | impl StringToString for String {
258 | fn my_to_string(&self) -> String {
259 | println!("called specialized impl");
260 |
261 | self.clone()
262 | }
263 | }
264 |
265 | macro_rules! convert_to_strings {
266 | ($($e:expr),*) => {
267 | [$(
268 | (&$e).my_to_string()
269 | ),*]
270 | };
271 | }
272 |
273 | fn main() {
274 | let owned_string = "hacks".to_owned();
275 | let strings = convert_to_strings![1, "&str", owned_string];
276 | println!("{:?}", strings);
277 | }
278 | ```
279 |
280 | If we run this program the output shows that our specialization works!
281 |
282 | ```console
283 | called blanket impl
284 | called blanket impl
285 | called specialized impl
286 | ["1", "&str", "hacks"]
287 | ```
288 |
289 |
290 |
291 | ### Realistic application
292 |
293 | Recall that we have an Error type that we'd like to construct from any `T` that
294 | implements `Display`, but using a different constructor if `T` also implements
295 | `std::error::Error`.
296 |
297 | The reason this is more complicated than the previous use case is that my Error
298 | constructors want to receive the argument *by value*! That's bad news if we are
299 | relying on autoref because autoref is all about inserting a layer of reference.
300 |
301 | Instead we'll use a tagged dispatch strategy with a pair of method calls, the
302 | first using autoref-based specialization with a reference argument to select a
303 | tag, and the second based on that tag which takes ownership of the original
304 | argument.
305 |
306 | ```rust
307 | use std::error::Error as StdError;
308 | use std::fmt::Display;
309 |
310 | pub struct Error(/* ... */);
311 |
312 | // Our two constructors. The first is more general.
313 | impl Error {
314 | pub(crate) fn from_fmt(error: T) -> Self {
315 | println!("called Error::from_fmt");
316 | Error {}
317 | }
318 | pub(crate) fn from_std_error(error: T) -> Self {
319 | _ = error.source(); // it works!
320 | println!("called Error::from_std_error");
321 | Error {}
322 | }
323 | }
324 |
325 | macro_rules! anyhow {
326 | ($err:expr) => ({
327 | #[allow(unused_imports)]
328 | use $crate::{DisplayKind, StdErrorKind};
329 | match $err {
330 | error => (&error).anyhow_kind().new(error),
331 | }
332 | });
333 | }
334 |
335 | // If the arg implements Display but not StdError, anyhow_kind() will
336 | // return this tag.
337 | struct DisplayTag;
338 |
339 | trait DisplayKind {
340 | #[inline]
341 | fn anyhow_kind(&self) -> DisplayTag {
342 | DisplayTag
343 | }
344 | }
345 |
346 | // Requires one extra autoref to call! Lower priority than StdErrorKind.
347 | impl DisplayKind for &T {}
348 |
349 | impl DisplayTag {
350 | #[inline]
351 | fn new(self, message: M) -> Error {
352 | Error::from_fmt(message)
353 | }
354 | }
355 |
356 | // If the arg implements StdError (and thus also Display), anyhow_kind()
357 | // will return this tag.
358 | struct StdErrorTag;
359 |
360 | trait StdErrorKind {
361 | #[inline]
362 | fn anyhow_kind(&self) -> StdErrorTag {
363 | StdErrorTag
364 | }
365 | }
366 |
367 | // Does not require any autoref if called as (&error).anyhow_kind().
368 | impl StdErrorKind for T {}
369 |
370 | impl StdErrorTag {
371 | #[inline]
372 | fn new(self, error: E) -> Error {
373 | Error::from_std_error(error)
374 | }
375 | }
376 |
377 | fn main() {
378 | // Turn a &str into an error.
379 | // &str implements Display but not std::error::Error.
380 | let _err = anyhow!("oh no!");
381 |
382 | // Turn an existing std::error::Error value into our error without
383 | // losing its source() and backtrace() if there is one.
384 | let io_error = std::fs::read("/tmp/nonexist").unwrap_err();
385 | let _err = anyhow!(io_error);
386 | }
387 | ```
388 |
389 |
390 |
391 | ### Limitations
392 |
393 | The way that this technique applies method resolution cannot be described by a
394 | trait bound, so for practical purposes you should think of this technique as
395 | working in macros only.
396 |
397 | That is, we can't do:
398 |
399 | ```rust
400 | pub fn demo(value: T) -> String {
401 | (&value).my_to_string()
402 | }
403 | ```
404 |
405 | and get the specialized behavior. If we put `T: Display` in the trait bound,
406 | method resolution will use the impl for `T: Display` even if `T` happened to be
407 | instantiated as `String`.
408 |
409 | Depending on your use case, this is honestly fine! If you are a macro already
410 | then you're all set. If you can be made a macro, that's good too (like I did for
411 | `anyhow!` (though it was good for that to be a macro anyway so that it can
412 | accept format args the way println does)). If you can't possibly be a macro then
413 | this won't help you.
414 |
415 | I am excited to hear other people's experience applying this technique and I
416 | expect it to generalize quite well.
417 |
--------------------------------------------------------------------------------
/bitfield-assertion/README.md:
--------------------------------------------------------------------------------
1 | ## Multiple of 8 const assertion
2 |
3 | We need a macro that will fail to compile if some expression is not a multiple
4 | of 8, without knowing the value of the expression until after name resolution
5 | which happens after macro expansion.
6 |
7 | This came up in the context of bitfields where sizes of fields are specified in
8 | bits but the application would like to require that the total size is an exact
9 | number of bytes.
10 |
11 | ```rust
12 | trait Field {
13 | const BITS: usize;
14 | }
15 |
16 | enum B3 {}
17 | impl Field for B3 {
18 | const BITS: usize = 3;
19 | }
20 |
21 | enum B5 {}
22 | impl Field for B5 {
23 | const BITS: usize = 5;
24 | }
25 |
26 | fn main() {
27 | require_multiple_of_eight!(B3::BITS + B5::BITS);
28 | }
29 | ```
30 |
31 | As always, we would like the error message to be as precise and useful as
32 | possible even though in this case the macro does not control the exact message
33 | because this error can only be detected after name resolution.
34 |
35 |
36 |
37 | ### First attempt
38 |
39 | The two main ways a macro can trigger compile-time errors after macro expansion
40 | are in const evaluation and in type checking.
41 |
42 | Let's look at const evaluation first by writing a `const` that can be
43 | successfully computed if and only if the input expression is a multiple of 8.
44 | There are many ways to do this but one way is to use `$e % 8` as an index into
45 | an array where the only legal index would be 0.
46 |
47 | ```rust
48 | macro_rules! require_multiple_of_eight {
49 | ($e:expr) => {
50 | const REQUIRE_MULTIPLE_OF_EIGHT: () = [()][$e % 8];
51 | _ = REQUIRE_MULTIPLE_OF_EIGHT;
52 | };
53 | }
54 | ```
55 |
56 | This seems like it should get the job done but it doesn't quite. There are some
57 | weird optimizations around const evaluation. In particular a `cargo check` would
58 | not need to evaluate this constant. It does a simple type check only which
59 | determines that *if* the constant does evaluate successfully then its type would
60 | be `()` which matches the declared type so everything is okay. On the other hand
61 | `cargo build` does need to perform the evaluation. We end up in a situation
62 | where `cargo check` can succeed at the same time as `cargo build` fails, which
63 | is not good.
64 |
65 | Separately, this approach does not give us any opportunity to control the
66 | message part of the error. If the same macro needed to evaluate multiple
67 | assertions, the caller couldn't tell which one was failing.
68 |
69 | The message looks like:
70 |
71 | ```console
72 | error[E0080]: erroneous constant used
73 | --> src/main.rs:8:10
74 | |
75 | 8 | #[derive(Bitfield)]
76 | | ^^^^^^^^ referenced constant has errors
77 | ```
78 |
79 |
80 |
81 | ### Second attempt
82 |
83 | Let's use `$e` to produce something that only type checks if the given
84 | expression is a multiple of 8.
85 |
86 | Currently the only place that expressions can appear in the type grammar is in
87 | the length of a fixed sized array, so we will rely on that.
88 |
89 | ```rust
90 | macro_rules! require_multiple_of_eight {
91 | ($e:expr) => {
92 | _ = <[(); $e % 8] as $crate::MultipleOfEight>::check();
93 | };
94 | }
95 |
96 | trait MultipleOfEight {
97 | fn check() {}
98 | }
99 |
100 | impl MultipleOfEight for [(); 0] {}
101 | ```
102 |
103 | This is pretty good! The array type `[(); $e % 8]` only implements the required
104 | trait if `$e % 8` is zero. The trait solver's error message mentions
105 | "MultipleOfEight" which adequately indicates to the user what went wrong.
106 |
107 | ```console
108 | error[E0277]: the trait bound `[(); 6]: MultipleOfEight` is not satisfied
109 | --> src/main.rs:8:10
110 | |
111 | 8 | #[derive(Bitfield)]
112 | | ^^^^^^^^ the trait `MultipleOfEight` is not implemented for `[(); 6]`
113 | |
114 | = help: the following implementations were found:
115 | <[(); 0] as MultipleOfEight>
116 | = note: required by `MultipleOfEight::check`
117 | ```
118 |
119 | There are some things to improve upon though. The error message includes this
120 | distracting array type `[(); 6]` that is not obviously related to what the
121 | caller might have written. Also the note mentioning the method
122 | `MultipleOfEight::check` is just noise as far as the caller would be concerned.
123 |
124 |
125 |
126 | ### Solution
127 |
128 | Let's solve this without a method call and without the array type being the
129 | thing with a missing trait impl.
130 |
131 | ```rust
132 | macro_rules! require_multiple_of_eight {
133 | ($e:expr) => {
134 | let _: $crate::MultipleOfEight<[(); $e % 8]>;
135 | };
136 | }
137 |
138 | type MultipleOfEight = <::Marker as TotalSizeIsMultipleOfEightBits>::Check;
139 |
140 | enum ZeroMod8 {}
141 | enum OneMod8 {}
142 | enum TwoMod8 {}
143 | enum ThreeMod8 {}
144 | enum FourMod8 {}
145 | enum FiveMod8 {}
146 | enum SixMod8 {}
147 | enum SevenMod8 {}
148 |
149 | trait Array {
150 | type Marker;
151 | }
152 |
153 | impl Array for [(); 0] {
154 | type Marker = ZeroMod8;
155 | }
156 |
157 | impl Array for [(); 1] {
158 | type Marker = OneMod8;
159 | }
160 |
161 | impl Array for [(); 2] {
162 | type Marker = TwoMod8;
163 | }
164 |
165 | impl Array for [(); 3] {
166 | type Marker = ThreeMod8;
167 | }
168 |
169 | impl Array for [(); 4] {
170 | type Marker = FourMod8;
171 | }
172 |
173 | impl Array for [(); 5] {
174 | type Marker = FiveMod8;
175 | }
176 |
177 | impl Array for [(); 6] {
178 | type Marker = SixMod8;
179 | }
180 |
181 | impl Array for [(); 7] {
182 | type Marker = SevenMod8;
183 | }
184 |
185 | trait TotalSizeIsMultipleOfEightBits {
186 | type Check;
187 | }
188 |
189 | impl TotalSizeIsMultipleOfEightBits for ZeroMod8 {
190 | type Check = ();
191 | }
192 | ```
193 |
194 | In this code the `::Marker` always resolves to one of `ZeroMod8`
195 | through `SevenMod8`. But then only `ZeroMod8` implements
196 | `TotalSizeIsMultipleOfEightBits`.
197 |
198 | Here is the error message, pretty helpful and free of the distractions from the
199 | second attempt.
200 |
201 | ```console
202 | error[E0277]: the trait bound `SixMod8: TotalSizeIsMultipleOfEightBits` is not satisfied
203 | --> src/main.rs:8:10
204 | |
205 | 8 | #[derive(Bitfield)]
206 | | ^^^^^^^^ the trait `TotalSizeIsMultipleOfEightBits` is not implemented for `SixMod8`
207 | ```
208 |
209 |
210 |
211 | ### Future
212 |
213 | Someone should write an RFC for const\_assert. Something like:
214 |
215 | ```rust
216 | const_assert!($e % 8 == 0, "total size is required to be a multiple of 8 bits");
217 | ```
218 |
219 | Having this provided by the compiler would let us give better error messages
220 | with less effort than the solution above.
221 |
--------------------------------------------------------------------------------
/bitfield-assertion/demo/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "case-study-bitfield-assertion"
3 | version = "0.0.0"
4 | authors = ["David Tolnay "]
5 | edition = "2021"
6 | publish = false
7 |
8 | [[bin]]
9 | name = "case-study"
10 | path = "main.rs"
11 |
12 | [dependencies]
13 | bitfield = { path = "bitfield" }
14 |
--------------------------------------------------------------------------------
/bitfield-assertion/demo/bitfield/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "bitfield"
3 | version = "0.0.0"
4 | authors = ["David Tolnay "]
5 | edition = "2021"
6 | publish = false
7 |
8 | [dependencies]
9 | bitfield-impl = { path = "../impl" }
10 |
--------------------------------------------------------------------------------
/bitfield-assertion/demo/bitfield/src/lib.rs:
--------------------------------------------------------------------------------
1 | pub use bitfield_impl::bitfield;
2 |
3 | pub trait Field {
4 | const BITS: usize;
5 | }
6 |
7 | bitfield_impl::generate_specifiers!();
8 |
9 | pub type MultipleOfEight = <::Marker as TotalSizeIsMultipleOfEightBits>::Check;
10 |
11 | pub enum ZeroMod8 {}
12 | pub enum OneMod8 {}
13 | pub enum TwoMod8 {}
14 | pub enum ThreeMod8 {}
15 | pub enum FourMod8 {}
16 | pub enum FiveMod8 {}
17 | pub enum SixMod8 {}
18 | pub enum SevenMod8 {}
19 |
20 | pub trait Array {
21 | type Marker;
22 | }
23 |
24 | impl Array for [(); 0] {
25 | type Marker = ZeroMod8;
26 | }
27 |
28 | impl Array for [(); 1] {
29 | type Marker = OneMod8;
30 | }
31 |
32 | impl Array for [(); 2] {
33 | type Marker = TwoMod8;
34 | }
35 |
36 | impl Array for [(); 3] {
37 | type Marker = ThreeMod8;
38 | }
39 |
40 | impl Array for [(); 4] {
41 | type Marker = FourMod8;
42 | }
43 |
44 | impl Array for [(); 5] {
45 | type Marker = FiveMod8;
46 | }
47 |
48 | impl Array for [(); 6] {
49 | type Marker = SixMod8;
50 | }
51 |
52 | impl Array for [(); 7] {
53 | type Marker = SevenMod8;
54 | }
55 |
56 | pub trait TotalSizeIsMultipleOfEightBits {
57 | type Check;
58 | }
59 |
60 | impl TotalSizeIsMultipleOfEightBits for ZeroMod8 {
61 | type Check = ();
62 | }
63 |
--------------------------------------------------------------------------------
/bitfield-assertion/demo/impl/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "bitfield-impl"
3 | version = "0.0.0"
4 | authors = ["David Tolnay "]
5 | edition = "2021"
6 | publish = false
7 |
8 | [lib]
9 | proc-macro = true
10 |
11 | [dependencies]
12 | quote = "1.0"
13 | syn = "2.0"
14 |
--------------------------------------------------------------------------------
/bitfield-assertion/demo/impl/src/lib.rs:
--------------------------------------------------------------------------------
1 | use proc_macro::TokenStream;
2 | use quote::{format_ident, quote};
3 | use syn::{parse_macro_input, Data, DeriveInput};
4 |
5 | #[proc_macro_attribute]
6 | pub fn bitfield(_args: TokenStream, input: TokenStream) -> TokenStream {
7 | let input = parse_macro_input!(input as DeriveInput);
8 |
9 | let fields = match &input.data {
10 | Data::Struct(data) => data.fields.iter().map(|field| &field.ty),
11 | _ => unimplemented!(),
12 | };
13 |
14 | TokenStream::from(quote! {
15 | fn __bitfield() {
16 | let _: bitfield::MultipleOfEight<
17 | [(); (0 #(+ <#fields as bitfield::Field>::BITS)*) % 8]
18 | >;
19 | }
20 | })
21 | }
22 |
23 | #[proc_macro]
24 | pub fn generate_specifiers(_input: TokenStream) -> TokenStream {
25 | (0usize..=64usize)
26 | .map(|width| {
27 | let name = format_ident!("B{}", width);
28 | TokenStream::from(quote! {
29 | pub enum #name {}
30 |
31 | impl Field for #name {
32 | const BITS: usize = #width;
33 | }
34 | })
35 | })
36 | .collect()
37 | }
38 |
--------------------------------------------------------------------------------
/bitfield-assertion/demo/main.rs:
--------------------------------------------------------------------------------
1 | use bitfield::*;
2 |
3 | #[bitfield] // (1+3+4+23)%8 != 0
4 | struct NotQuiteFourBytes {
5 | a: B1,
6 | b: B3,
7 | c: B4,
8 | d: B23,
9 | }
10 |
11 | fn main() {}
12 |
--------------------------------------------------------------------------------
/callable-types/README.md:
--------------------------------------------------------------------------------
1 | ## User-defined callable types
2 |
3 | Various languages have ways of making user-defined objects callable with
4 | function call syntax: C++'s [`operator ()`][cpp], Python's [`__call__`][python],
5 | Swift's [`@dynamicCallable`][swift], Kotlin's [`invoke`][kotlin], PHP's
6 | [`__invoke`][php], Scala's [`apply`][scala], etc.
7 |
8 | [cpp]: https://en.cppreference.com/w/cpp/language/operators#Function_call_operator
9 | [python]: https://docs.python.org/3/reference/datamodel.html#object.__call__
10 | [swift]: https://docs.swift.org/swift-book/ReferenceManual/Attributes.html
11 | [kotlin]: https://kotlinlang.org/docs/reference/operator-overloading.html#invoke
12 | [php]: https://www.php.net/manual/en/language.oop5.magic.php#object.invoke
13 | [scala]: https://scala-lang.org/files/archive/spec/2.12/06-expressions.html#function-applications
14 |
15 | Something along these lines exists in Rust in the form of the [`std::ops::Fn`]
16 | trait. When you write a closure expression, under the hood it becomes a struct
17 | with some unique type that captures the necessary state from the closure's
18 | environment and provides an implementation of this `Fn` trait to make it
19 | callable. This isn't quite like the examples cited from other languages because
20 | the trait can only be implemented by the compiler, not by the user for their own
21 | data structures.
22 |
23 | [`std::ops::Fn`]: https://doc.rust-lang.org/nightly/std/ops/trait.Fn.html
24 |
25 | I was playing around with this functionality involving closures to stretch the
26 | possibilities a bit. Mainly I wondered whether there is anything that can be
27 | written in the gap in the code below to make our data structure work like a
28 | callable function object *on a stable compiler* despite this not being a feature
29 | of the language.
30 |
31 | ```rust
32 | /// Function object that adds some number to its input.
33 | struct Plus {
34 | n: u32,
35 | }
36 |
37 | impl Plus {
38 | fn call(&self, arg: u32) -> u32 {
39 | self.n + arg
40 | }
41 | }
42 |
43 | // [Something special here ...]
44 |
45 | fn main() {
46 | let one_plus = Plus { n: 1 };
47 | let sum = one_plus(2);
48 | assert_eq!(sum, 1 + 2);
49 | }
50 | ```
51 |
52 | It turns out that yes, it is possible to make this work (with caveats).
53 |
54 |
55 |
56 | ### Background
57 |
58 | We will use an interesting combination of `Deref`, closures, trait objects, and
59 | unsafe code.
60 |
61 | We will stick to functions with the signature `fn(&self, u32) -> u32` to get the
62 | simplest thing working, but everything generalizes to other signatures.
63 |
64 | To explain the relevance of `Deref`, observe that the function call operator
65 | performs deref coercions to find a `Fn` impl. In the following code we write
66 | `f(2)` to call an object `f` of type `&Callable`, which does not itself
67 | implement the `Fn` trait. But `&Callable` dereferences to `&fn(u32) -> u32`
68 | which does, so that is what gets called.
69 |
70 | ```rust
71 | use std::ops::Deref;
72 |
73 | struct Callable;
74 |
75 | impl Deref for Callable {
76 | type Target = fn(u32) -> u32;
77 |
78 | fn deref(&self) -> &'static Self::Target {
79 | &(one_plus as fn(u32) -> u32)
80 | }
81 | }
82 |
83 | fn one_plus(arg: u32) -> u32 {
84 | 1 + arg
85 | }
86 |
87 | fn main() {
88 | let f = &Callable;
89 | assert_eq!(f(2), 1 + 2);
90 | }
91 | ```
92 |
93 |
94 |
95 | ### First attempt
96 |
97 | The code under Background is syntactically on the right track because it enables
98 | writing parentheses for function call notation on a value of user-defined type.
99 | But since the thing being called in that code after deref coercion is just a
100 | function pointer, the value of `self` (the object being invoked as a function)
101 | is not accessible to the function body, which makes this severely limited in
102 | usefulness.
103 |
104 | What we want conceptually is this kind of thing:
105 |
106 | ```rust
107 | impl Callable {
108 | fn call(&self, arg: u32) -> u32 {
109 | // Function body
110 | }
111 | }
112 |
113 | impl Deref for Callable {
114 | type Target = ???;
115 |
116 | fn deref(&self) -> &Self::Target {
117 | &|arg| self.call(arg)
118 | }
119 | }
120 | ```
121 |
122 | That is, the thing being called after deref coercion would be a closure that has
123 | captured `self` and receives all the non-`self` args to set up a call to the
124 | intended function body.
125 |
126 | We can even spell out a type for `Target` that makes this look correctly typed.
127 |
128 | ```rust
129 | impl Deref for Callable {
130 | type Target = dyn Fn(u32) -> u32;
131 |
132 | fn deref(&self) -> &Self::Target {
133 | &|arg| self.call(arg)
134 | }
135 | }
136 | ```
137 |
138 | The borrow checker explains (not that clearly in this case) that this
139 | implementation would not be sound. The reference being returned by `deref` is
140 | dangling because it refers to a closure object on the stack frame of the `deref`
141 | call that is destroyed during the return.
142 |
143 | ```console
144 | error[E0495]: cannot infer an appropriate lifetime due to conflicting requirements
145 | --> src/main.rs:15:10
146 | |
147 | 15 | &|arg| self.call(arg)
148 | | ^^^^^^^^^^^^^^^^^^^^
149 | |
150 | note: first, the lifetime cannot outlive the anonymous lifetime #1 defined on the method body at 14:5...
151 | --> src/main.rs:14:5
152 | |
153 | 14 | / fn deref(&self) -> &Self::Target {
154 | 15 | | &|arg| self.call(arg)
155 | 16 | | }
156 | | |_____^
157 | = note: ...so that the types are compatible:
158 | expected &&Callable
159 | found &&Callable
160 | = note: but, the lifetime must be valid for the static lifetime...
161 | = note: ...so that the expression is assignable:
162 | expected &(dyn std::ops::Fn(u32) -> u32 + 'static)
163 | found &dyn std::ops::Fn(u32) -> u32
164 | ```
165 |
166 | To see it more clearly, this closure would have desugared to something like the
167 | following:
168 |
169 | ```rust
170 | impl Deref for Callable {
171 | type Target = dyn Fn(u32) -> u32;
172 |
173 | fn deref(&self) -> &Self::Target {
174 | // Generated by the compiler as the memory representation
175 | // of `|arg| self.call(arg)`.
176 | struct GeneratedClosure<'a> {
177 | self_: &'a Callable,
178 | }
179 |
180 | // Also generated by the compiler.
181 | impl<'a> Fn(u32) -> u32 for GeneratedClosure<'a> {
182 | fn call(&self, arg: u32) -> u32 {
183 | let self_ = self.self_;
184 |
185 | // Body of `|arg| self.call(arg)`.
186 | self_.call(arg)
187 | }
188 | }
189 |
190 | // Expanded view of `&|arg| self.call(arg)`.
191 | let generated_closure = GeneratedClosure { self_: self };
192 | let reference_to_closure: &GeneratedClosure = &generated_closure;
193 | let reference_to_trait_object = reference_to_closure as &dyn Fn(u32) -> u32;
194 | reference_to_trait_object
195 | }
196 | }
197 | ```
198 |
199 |
200 |
201 | ### Second attempt
202 |
203 | If we temporarily conflate the types `GeneratedClosure` and `&Callable`, notice
204 | how in the desugared code from the first attempt we have `deref` returning
205 | `&&Callable` (as a reference to trait object) and `GeneratedClosure::call`
206 | accepting `&&Callable` as its first argument. The inner reference lives long
207 | enough to match deref's signature but the outer reference does not; the outer
208 | reference points to the inner reference which exists on `deref`'s stack frame
209 | and goes out of scope.
210 |
211 | What we would love to trick the compiler into doing is something more like:
212 |
213 | ```rust
214 | impl Deref for Callable {
215 | type Target = dyn Fn(u32) -> u32;
216 |
217 | fn deref(&self) -> &Self::Target {
218 | // Generated by the compiler (???)
219 | #[repr(transparent)]
220 | struct GeneratedClosure {
221 | self_: Callable,
222 | }
223 |
224 | // Also generated by the compiler (???)
225 | impl Fn(u32) -> u32 for GeneratedClosure {
226 | fn call(&self, arg: u32) -> u32 {
227 | let self_ = &self.self_;
228 |
229 | // Body of the closure we would write.
230 | self_.call(arg)
231 | }
232 | }
233 |
234 | let reference_to_closure = &GeneratedClosure { self_: *self };
235 | let reference_to_trait_object = reference_to_closure as &dyn Fn(u32) -> u32;
236 | reference_to_trait_object
237 | }
238 | }
239 | ```
240 |
241 | Here instead we have `deref` returning `&Callable` (as a reference to trait
242 | object) and `GeneratedClosure::call` accepting `&Callable`. The conversion from
243 | `&Callable` to `&GeneratedClosure` is sound as long as `Callable` and
244 | `GeneratedClosure` have the same memory representation, which would be
245 | guaranteed by `#[repr(transparent)]`. That conversion results in a reference
246 | pointing to the caller's `Callable` rather than to anything on `deref`'s stack
247 | frame, so it lives long enough that this would be a safe and working
248 | implementation of the intended functionality.
249 |
250 | Let's think about what closure we would need to write in order for the compiler
251 | to come up with the above data structure and `Fn` trait impl.
252 |
253 | We know it would need to capture a value of type `Callable` by value. This
254 | begins to sound problematic because there would never exist an owned value of
255 | type `Callable` accessible to the `Deref` impl, only as a borrowed `&Callable`.
256 |
257 | But an imaginary uninitialized `Callable` gets the job done:
258 |
259 | ```rust
260 | let uninit_callable: Callable = unsafe { mem::uninitialized() };
261 | let uninit_closure = move |arg: u32| Callable::call(&uninit_callable, arg);
262 | mem::forget(uninit_closure);
263 | ```
264 |
265 | This code makes an uninitialized owned `Callable`, moves ownership of it into a
266 | closure that captures a `Callable` by value and nothing else, and then prevents
267 | a `Drop` call on the closure because we must not drop its uninitialized
268 | contents. At runtime this would all be noop but it gets the compiler to generate
269 | the right data structure and `Fn` trait impl shown above.
270 |
271 | The remaining part is to turn `self` into a trait object based on this `Fn`
272 | impl, the equivalent of `&GeneratedClosure { self_: *self } as &dyn Fn(u32) ->
273 | u32`.
274 |
275 | Ordinarily we would reach for a `mem::transmute::<&Callable,
276 | &GeneratedClosure>(self)` or `&*(self as *const Callable as *const
277 | GeneratedClosure)`, but in this case that won't work because the closure's real
278 | type is generated and does not have a name that we can refer to. A different
279 | technique is needed:
280 |
281 | ```rust
282 | fn second<'a, T>(_a: &T, b: &'a T) -> &'a T {
283 | b
284 | }
285 | let reference_to_closure = second(&uninit_closure, unsafe { mem::transmute(self) });
286 | ```
287 |
288 | This uses generic type inference to deduce the return type of the transmute as
289 | identical to a reference to the closure's type, whatever that might be.
290 |
291 | At this point we have a closure to make into a trait object.
292 |
293 | ```rust
294 | let reference_to_trait_object = reference_to_closure as &dyn Fn(u32) -> u32;
295 | ```
296 |
297 | The impl all at once looks like:
298 |
299 | ```rust
300 | impl Deref for Callable {
301 | type Target = dyn Fn(u32) -> u32;
302 |
303 | fn deref(&self) -> &Self::Target {
304 | let uninit_callable: Self = unsafe { mem::uninitialized() };
305 | let uninit_closure = move |arg: u32| Self::call(&uninit_callable, arg);
306 | fn second<'a, T>(_a: &T, b: &'a T) -> &'a T {
307 | b
308 | }
309 | let reference_to_closure = second(&uninit_closure, unsafe { mem::transmute(self) });
310 | mem::forget(uninit_closure);
311 | let reference_to_trait_object = reference_to_closure as &dyn Fn(u32) -> u32;
312 | reference_to_trait_object
313 | }
314 | }
315 | ```
316 |
317 |
318 |
319 | ### Third attempt
320 |
321 | I called out `#[repr(transparent)]` earlier on, but then didn't bring it up
322 | again in the context of the closure-based implementation. We have written a
323 | closure that captures a type `Callable` by value so it makes sense why it would
324 | be represented like `struct GeneratedClosure { captured: Callable }` but:
325 |
326 | - it is not a guarantee made by the language that a closure capturing `Callable`
327 | by value is represented in memory the same as `struct { Callable }`;
328 |
329 | - nor is it a guarantee that `struct { Callable }` would be represented the same
330 | as `Callable`.
331 |
332 | So this is the big caveat; don't count on this to work now or continue working
333 | in the future. Nothing on this page is a robust solution, only interesting. For
334 | now I think this is the closest we get, by adding an assertion as a basic smoke
335 | test that the closure matches the expected size:
336 |
337 | ```rust
338 | use std::mem;
339 | use std::ops::Deref;
340 |
341 | /// Function object that adds some number to its input.
342 | struct Plus {
343 | n: u32,
344 | }
345 |
346 | impl Plus {
347 | fn call(&self, arg: u32) -> u32 {
348 | self.n + arg
349 | }
350 | }
351 |
352 | impl Deref for Plus {
353 | type Target = dyn Fn(u32) -> u32;
354 |
355 | fn deref(&self) -> &Self::Target {
356 | let uninit_callable: Self = unsafe { mem::uninitialized() };
357 | let uninit_closure = move |arg: u32| Self::call(&uninit_callable, arg);
358 | let size_of_closure = mem::size_of_val(&uninit_closure);
359 | fn second<'a, T>(_a: &T, b: &'a T) -> &'a T {
360 | b
361 | }
362 | let reference_to_closure = second(&uninit_closure, unsafe { mem::transmute(self) });
363 | mem::forget(uninit_closure);
364 | assert_eq!(size_of_closure, mem::size_of::());
365 | let reference_to_trait_object = reference_to_closure as &dyn Fn(u32) -> u32;
366 | reference_to_trait_object
367 | }
368 | }
369 |
370 | fn main() {
371 | let one_plus = Plus { n: 1 };
372 | let sum = one_plus(2);
373 | assert_eq!(sum, 1 + 2);
374 | }
375 | ```
376 |
377 |
378 |
379 | ### Fourth attempt
380 |
381 | There is one remaining problem to sort out. The following line from the third
382 | attempt may contain undefined behavior:
383 |
384 | ```rust
385 | let uninit_callable: Self = unsafe { mem::uninitialized() };
386 | ```
387 |
388 | Usually the most common way that creating an uninitialized value of an unknown
389 | type in generic code causes undefined behavior is if an expression like
390 | `mem::uninitialized::()` might be instantiated with a choice of `T` that is
391 | uninhabited, such as the `!` type. When that happens, the compiler is free to
392 | turn the `mem::uninitialized` call into [`unreachable_unchecked`] and plummet
393 | off the end of your function, even though you intended for this line to be a
394 | noop.
395 |
396 | [`unreachable_unchecked`]: https://doc.rust-lang.org/std/hint/fn.unreachable_unchecked.html
397 |
398 | As used here, that's not a concern -- we know `Self` is inhabited at runtime
399 | because there exists a `&Self` in scope that was passed in by the caller. If
400 | `Self` were uninhabited, it would be impossible for the caller to have an
401 | instance of `Self` on which to borrow (`&self`) and call `deref`.
402 |
403 | Instead we need to worry about the second most common way that creating
404 | uninitialized values of an unknown type causes undefined behavior, and that's if
405 | the uninitialized type has nontrivial validity invariants. In our case if the
406 | memory representation of `Self` contains a bool, char, `&`, `&mut`, Box,
407 | NonZero, or any other type where not all possible values are valid, then
408 | `mem::uninitialized::()` is immediate UB.
409 |
410 | The correct way to manipulate uninitialized memory of generic type is through
411 | [`MaybeUninit`].
412 |
413 | [`MaybeUninit`]: https://doc.rust-lang.org/std/mem/union.MaybeUninit.html
414 |
415 | ```rust
416 | let uninit_callable = MaybeUninit::::uninit();
417 | let uninit_closure = move |arg: u32| Self::call(
418 | unsafe { &*uninit_callable.as_ptr() },
419 | arg,
420 | );
421 | ```
422 |
423 | The final expanded code all together is:
424 |
425 | ```rust
426 | use std::mem::{self, MaybeUninit};
427 | use std::ops::Deref;
428 |
429 | /// Function object that adds some number to its input.
430 | struct Plus {
431 | n: u32,
432 | }
433 |
434 | impl Plus {
435 | fn call(&self, arg: u32) -> u32 {
436 | self.n + arg
437 | }
438 | }
439 |
440 | impl Deref for Plus {
441 | type Target = dyn Fn(u32) -> u32;
442 |
443 | fn deref(&self) -> &Self::Target {
444 | let uninit_callable = MaybeUninit::::uninit();
445 | let uninit_closure = move |arg: u32| Self::call(
446 | unsafe { &*uninit_callable.as_ptr() },
447 | arg,
448 | );
449 | let size_of_closure = mem::size_of_val(&uninit_closure);
450 | fn second<'a, T>(_a: &T, b: &'a T) -> &'a T {
451 | b
452 | }
453 | let reference_to_closure = second(&uninit_closure, unsafe { mem::transmute(self) });
454 | mem::forget(uninit_closure);
455 | assert_eq!(size_of_closure, mem::size_of::());
456 | let reference_to_trait_object = reference_to_closure as &dyn Fn(u32) -> u32;
457 | reference_to_trait_object
458 | }
459 | }
460 |
461 | fn main() {
462 | let one_plus = Plus { n: 1 };
463 | let sum = one_plus(2);
464 | assert_eq!(sum, 1 + 2);
465 | }
466 | ```
467 |
468 |
469 |
470 | ### Implementation
471 |
472 | Packaging this up into a macro is the easy part. We would most likely want an
473 | attribute macro on an impl block that turns the block's one method into the fake
474 | `Fn` impl.
475 |
476 | ```rust
477 | /// Function object that adds some number to its input.
478 | struct Plus {
479 | n: u32,
480 | }
481 |
482 | #[hackfn]
483 | impl Plus {
484 | fn call(&self, arg: u32) -> u32 {
485 | self.n + arg
486 | }
487 | }
488 |
489 | fn main() {
490 | let one_plus = Plus { n: 1 };
491 | let sum = one_plus(2);
492 | assert_eq!(sum, 1 + 2);
493 | }
494 | ```
495 |
496 |
497 |
498 | End note: I feel that the technique of returning trait objects from
499 | `&`-returning trait methods like `Deref`, `Index`, `Borrow` etc is underexplored
500 | and there are major impactful applications waiting to be discovered in that
501 | area. [This StackOverflow answer][hashmap] demonstrates one amazing example in
502 | the context of *How to implement HashMap with two keys?*. A more basic one is
503 | the [slice of a multidimensional array][refcast] example from RefCast; this
504 | involves a dynamically sized slice rather than a trait object but the underlying
505 | idea is similar. I think that these two and the case study are scratching the
506 | surface of something bigger with exciting applications. Note that those two
507 | links are all safe code; unsafe is not inherent to this technique.
508 |
509 | [hashmap]: https://stackoverflow.com/a/45795699/6086311
510 | [refcast]: https://github.com/dtolnay/ref-cast#realistic-example
511 |
--------------------------------------------------------------------------------
/callable-types/demo/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "case-study-callable-types"
3 | version = "0.0.0"
4 | authors = ["David Tolnay "]
5 | edition = "2021"
6 | publish = false
7 |
8 | [[bin]]
9 | name = "case-study"
10 | path = "main.rs"
11 |
--------------------------------------------------------------------------------
/callable-types/demo/main.rs:
--------------------------------------------------------------------------------
1 | use std::mem::{self, MaybeUninit};
2 | use std::ops::Deref;
3 |
4 | /// Function object that adds some number to its input.
5 | struct Plus {
6 | n: u32,
7 | }
8 |
9 | impl Plus {
10 | fn call(&self, arg: u32) -> u32 {
11 | self.n + arg
12 | }
13 | }
14 |
15 | impl Deref for Plus {
16 | type Target = dyn Fn(u32) -> u32;
17 |
18 | fn deref(&self) -> &Self::Target {
19 | let uninit_callable = MaybeUninit::::uninit();
20 | let uninit_closure = move |arg: u32| Self::call(unsafe { &*uninit_callable.as_ptr() }, arg);
21 | let size_of_closure = mem::size_of_val(&uninit_closure);
22 | fn second<'a, T>(_a: &T, b: &'a T) -> &'a T {
23 | b
24 | }
25 | let reference_to_closure = second(&uninit_closure, unsafe { mem::transmute(self) });
26 | mem::forget(uninit_closure);
27 | assert_eq!(size_of_closure, mem::size_of::());
28 | let reference_to_trait_object = reference_to_closure as &dyn Fn(u32) -> u32;
29 | reference_to_trait_object
30 | }
31 | }
32 |
33 | fn main() {
34 | let one_plus = Plus { n: 1 };
35 | let sum = one_plus(2);
36 | assert_eq!(sum, 1 + 2);
37 | dbg!(one_plus(2));
38 | }
39 |
--------------------------------------------------------------------------------
/function-epilogue/README.md:
--------------------------------------------------------------------------------
1 | ## Function epilogue
2 |
3 | For the [`#[no_panic]`][no-panic] macro I needed the ability to have some piece
4 | of code invoked during all *panicking* exit paths out of a function.
5 |
6 | [no-panic]: https://github.com/dtolnay/no-panic
7 |
8 |
9 |
10 | ### First attempt
11 |
12 | Having something execute on *all* exit paths is reasonably simple -- place a
13 | guard object in a local variable and its `Drop` impl will run whether the
14 | function body succeeds or panics. This may be a good approach for something like
15 | instrumenting functions with tracing on entry and exit.
16 |
17 | ```rust
18 | // Before
19 | fn f(a: Arg1, b: Arg2) -> Ret {
20 | // (Original function body)
21 | }
22 |
23 | // After; insert guard object
24 | fn f(a: Arg1, b: Arg2) -> Ret {
25 | struct Guard;
26 | impl Drop for Guard {
27 | fn drop(&mut self) {
28 | // Do the thing
29 | }
30 | }
31 | let _guard = Guard;
32 |
33 | // (Original function body)
34 | }
35 | ```
36 |
37 | From here we can have the guard's `Drop` impl check
38 | [`std::thread::panicking`][panicking] to determine whether the call is taking
39 | place during a panicking exit path.
40 |
41 | [panicking]: https://doc.rust-lang.org/std/thread/fn.panicking.html
42 |
43 | ```rust
44 | impl Drop for Guard {
45 | fn drop(&mut self) {
46 | if std::thread::panicking() {
47 | // Do the thing
48 | }
49 | }
50 | }
51 | ```
52 |
53 | Two things made this not suitable for my case:
54 |
55 | - There is no equivalent in libcore, so this only works if my caller's crate is
56 | using the standard library.
57 |
58 | - The code inside of `if std::thread::panicking() { ... }` gets linked whether
59 | or not a panic is possible. The implementation of the panicking check is based
60 | on reading a panic counter out of a thread\_local and cannot be optimized out.
61 | In the case of `#[no_panic]`, the whole macro is based on using the
62 | information of whether something gets linked to tell whether a panic is
63 | possible so I needed the linking to behave well.
64 |
65 |
66 |
67 | ### Second attempt
68 |
69 | Let's evaluate the body of the function and then make the guard not get dropped
70 | if the function produces a value as opposed to panicking.
71 |
72 | ```rust
73 | fn f(a: Arg1, b: Arg2) -> Ret {
74 | struct Guard;
75 | impl Drop for Guard {
76 | fn drop(&mut self) {
77 | // Do the thing
78 | }
79 | }
80 | let guard = Guard;
81 |
82 | let value = {
83 | // (Original function body)
84 | };
85 |
86 | mem::forget(guard);
87 | value
88 | }
89 | ```
90 |
91 | If the original function panics, we don't make it to the `mem::forget` so the
92 | guard object is dropped as part of dropping the stack frame of `f` during the
93 | panic. If the original function body returns without panicking, we skip the
94 | guard's drop prior to returning from `f`.
95 |
96 | This is on the right track! It works with no\_std, and no longer relies on the
97 | thread\_local inside of `std::thread::panicking` so it optimizes away extremely
98 | reliably in functions that can never panic.
99 |
100 | There is a problem around functions that contain a `return` expression. If the
101 | original function body performs a `return`, that would now return from `f`
102 | without running `mem::forget` on the guard object, so the thing that we want to
103 | run only when panicking would incorrectly run.
104 |
105 |
106 |
107 | ### Third attempt
108 |
109 | Let's consolidate all the non-panicking exit paths into one place via a function
110 | call and make the guard not get dropped if the function call returns without
111 | panicking.
112 |
113 | ```rust
114 | fn f(a: Arg1, b: Arg2) -> Ret {
115 | struct Guard;
116 | impl Drop for Guard {
117 | fn drop(&mut self) {
118 | // Do the thing
119 | }
120 | }
121 | let guard = Guard;
122 |
123 | fn original_f(a: Arg1, b: Arg2) -> Ret {
124 | // (Original function body)
125 | }
126 | let value = original_f(a, b);
127 |
128 | mem::forget(guard);
129 | value
130 | }
131 | ```
132 |
133 | This is like the second attempt except that it works when the original function
134 | body contains a `return` expression.
135 |
136 | This is pretty good. It has the desired behavior and is compatible with most
137 | function signatures.
138 |
139 |
140 |
141 | ### Fourth attempt
142 |
143 | What do we do in this case?
144 |
145 | ```rust
146 | fn f(&self, a: Arg1, b: Arg2) -> Ret {
147 | ...
148 | }
149 | ```
150 |
151 | The scheme from the third attempt of duplicating the function signature into an
152 | internal `original_f` will not work because `&self` arguments can only occur in
153 | members of an impl block, not in any other position that a function can be
154 | defined.
155 |
156 | ```rust
157 | struct S;
158 |
159 | impl S {
160 | fn f(&self, a: Arg1, b: Arg2) -> Ret {
161 | ...
162 | let guard = Guard;
163 |
164 | fn original_f(&self, a: Arg1, b: Arg2) -> Ret {
165 | // (Original function body)
166 | }
167 | let value = original_f(self, a, b);
168 |
169 | mem::forget(guard);
170 | value
171 | }
172 | }
173 | ```
174 |
175 | ```console
176 | error: unexpected `self` argument in function
177 | --> src/main.rs:8:24
178 | |
179 | 8 | fn original_f(&self, a: Arg1, b: Arg2) -> Ret {
180 | | ^^^^ `self` is only valid as the first argument of an associated function
181 | ```
182 |
183 | It doesn't work to try to generate `fn original_f(_self: &S, ...) -> Ret`
184 | because the macro generating this will be an attribute macro placed on the
185 | function -- it would only receive the function `f` as input not including the
186 | impl block header, so the correct type for `self` can't be known.
187 |
188 | ```rust
189 | impl ??? {
190 | fn f(&self, a: Arg1, b: Arg2) -> Ret {
191 | ...
192 | let guard = Guard;
193 |
194 | fn original_f(_self: &???, a: Arg1, b: Arg2) -> Ret {
195 | // (Original function body)
196 | }
197 | let value = original_f(self, a, b);
198 |
199 | mem::forget(guard);
200 | value
201 | }
202 | }
203 | ```
204 |
205 | The argument type `_self: &Self` can't be used because a function like
206 | `original_f` is its own self-contained item and does not have access to an outer
207 | `Self` or type parameters.
208 |
209 | ```console
210 | error[E0401]: can't use generic parameters from outer function
211 | --> src/main.rs:8:31
212 | |
213 | 1 | impl S {
214 | | ---- `Self` type implicitly declared here, by this `impl`
215 | ...
216 | 8 | fn original_f(_self: &Self, a: Arg1, b: Arg2) -> Ret {
217 | | ^^^^
218 | | |
219 | | use of generic parameter from outer function
220 | | use a type here instead
221 | ```
222 |
223 | Maybe we could ask the user to write our attribute macro on the impl block
224 | rather than on functions but this would be confusing; a solution that does not
225 | require this would be better.
226 |
227 | It also doesn't work in general to place the `original_f` outside of `f`, as a
228 | `#[doc(hidden)]` method next to `f`. This would work inside of an impl block
229 | containing inherent methods, but not inside of a trait impl block containing
230 | trait methods since those are limited to the set of methods required by the
231 | trait.
232 |
233 | ```rust
234 | impl ??? {
235 | fn original_f(&self, a: Arg1, b: Arg2) -> Ret {
236 | // (Original function body)
237 | }
238 |
239 | fn f(&self, a: Arg1, b: Arg2) -> Ret {
240 | ...
241 | let guard = Guard;
242 |
243 | let value = Self::original_f(self, a, b);
244 |
245 | mem::forget(guard);
246 | value
247 | }
248 | }
249 | ```
250 |
251 | To finally give a viable fourth attempt, let's write `original_f` as a closure
252 | instead because closures are not a self-contained item and *do* have access to
253 | an outer `Self`.
254 |
255 | ```rust
256 | fn f(&self, a: Arg1, b: Arg2) -> Ret {
257 | ...
258 | let guard = Guard;
259 |
260 | let original_f = |_self: &Self, a: Arg1, b: Arg2| -> Ret {
261 | // (Original function body, with self replaced by _self)
262 | };
263 | let value = original_f(self, a, b);
264 |
265 | mem::forget(guard);
266 | value
267 | }
268 | ```
269 |
270 | Here we pass the function arguments along to a closure that has the same
271 | signature as the outer function and captures nothing. Method receivers in the
272 | form of `&self`, `&mut self`, and `self` would be passed as closure arguments
273 | `_self: &Self`, `_self: &mut Self`, `_self: Self` respectively with the original
274 | function body adjusted to refer to `_self` anywhere that it originally referred
275 | to `self`. The leading underscore on `_self` is meaningful in that it suppresses
276 | unused variable lints; Rust does not warn when a method accepts `self` but does
277 | not refer to it, so we want to preserve that behavior in the generated closure.
278 |
279 | This really seems like it should work. But...
280 |
281 |
282 |
283 | ### Fifth attempt
284 |
285 | The borrow checker doesn't like it. In the case of a method signature that
286 | borrows from `self`:
287 |
288 | ```rust
289 | fn f(&self) -> &i32 {
290 | ...
291 | let guard = Guard;
292 |
293 | let original_f = |_self: &Self| -> &i32 {
294 | &_self.0
295 | };
296 | let value = original_f(self);
297 |
298 | mem::forget(guard);
299 | value
300 | }
301 | ```
302 |
303 | we get this interesting error:
304 |
305 | ```console
306 | error[E0495]: cannot infer an appropriate lifetime for borrow expression due to conflicting requirements
307 | --> src/main.rs:17:13
308 | |
309 | 17 | &_self.0
310 | | ^^^^^^^^
311 | |
312 | note: first, the lifetime cannot outlive the anonymous lifetime #1 defined on the body at 16:26...
313 | --> src/main.rs:16:26
314 | |
315 | 16 | let original_f = |_self: &Self| -> &i32 {
316 | | __________________________^
317 | 17 | | &_self.0
318 | 18 | | };
319 | | |_________^
320 | note: ...so that reference does not outlive borrowed content
321 | --> src/main.rs:17:13
322 | |
323 | 17 | &_self.0
324 | | ^^^^^^^^
325 | note: but, the lifetime must be valid for the anonymous lifetime #1 defined on the method body at 7:5...
326 | --> src/main.rs:7:5
327 | |
328 | 7 | / fn f(&self) -> &i32 {
329 | 8 | | struct Guard;
330 | 9 | | impl Drop for Guard {
331 | 10 | | fn drop(&mut self) {
332 | ... |
333 | 22 | | value
334 | 23 | | }
335 | | |_____^
336 | note: ...so that reference does not outlive borrowed content
337 | --> src/main.rs:22:9
338 | |
339 | 22 | value
340 | | ^^^^^
341 | ```
342 |
343 | I can't tell where this went wrong but casting the closure to a function pointer
344 | with the right signature seems to fix it. This requires rustc 1.23+.
345 |
346 | ```rust
347 | fn f(&self) -> &i32 {
348 | ...
349 | let guard = Guard;
350 |
351 | let original_f = |_self: &Self| -> &i32 {
352 | // (Original function body, with self replaced by _self)
353 | } as fn(&Self) -> &i32;
354 | let value = original_f(self);
355 |
356 | mem::forget(guard);
357 | value
358 | }
359 | ```
360 |
361 |
362 |
363 | ### Sixth attempt
364 |
365 | Let's take a closer look at what is meant by "self replaced by \_self".
366 |
367 | The simple way for a macro to accomplish this would be by traversing the entire
368 | token stream representing the function body and substituting a `_self` token
369 | anywhere that `self` occurs. This is correct as long as `self` always refers to
370 | the method receiver... but sometimes it may not. Let's say the user has written:
371 |
372 | ```rust
373 | fn f(&self) {
374 | struct UserGuard;
375 | impl Drop for UserGuard {
376 | fn drop(&mut self) {
377 | // Notice the `self` on the previous line
378 | ...
379 | }
380 | }
381 |
382 | ...
383 | }
384 | ```
385 |
386 | The ability to place structs and impl blocks inside a function body was super
387 | helpful to us so far because that's how we have been doing *our* Guard object.
388 | But the user is free to do it too! In this snippet they have written a function
389 | body that uses the token `self` in a way that does *not* refer to the `f`
390 | method's receiver. If we naively replace every `self` in their function body
391 | with `_self` as indicated in the fifth attempt, the result is invalid Rust
392 | syntax:
393 |
394 | ```rust
395 | fn f(&self) -> &i32 {
396 | struct Guard;
397 | impl Drop for Guard {
398 | fn drop(&mut self) {
399 | // This is the guard generated by our macro
400 | }
401 | }
402 | let guard = Guard;
403 |
404 | let original_f = |_self: &Self| -> &i32 {
405 | struct UserGuard;
406 | impl Drop for UserGuard {
407 | fn drop(&mut _self) {
408 | // Invalid Rust syntax on the previous line
409 | ...
410 | }
411 | }
412 |
413 | ...
414 | } as fn(&Self) -> &i32;
415 | let value = original_f(self);
416 |
417 | mem::forget(guard);
418 | value
419 | }
420 | ```
421 |
422 | ```console
423 | error: expected one of `:` or `@`, found `)`
424 | --> src/main.rs:19:31
425 | |
426 | 19 | fn drop(&mut _self) {
427 | | ^ expected one of `:` or `@` here
428 | ```
429 |
430 | So replacing *every* `self` is not right. The next simplest possibility would be
431 | to parse the user's function body using Syn and write a [`VisitMut`] to perform
432 | the replacement against the parsed syntax tree without traversing into nested
433 | impl blocks.
434 |
435 | [`VisitMut`]: https://docs.rs/syn/0.15/syn/visit_mut/index.html
436 |
437 | That is more correct than replacing *every* `self` but it still isn't correct
438 | because we can't know how to treat unexpanded macros. If the user's function
439 | body contains a call to `somemacro!(self)`, there would be no way to tell
440 | whether this expands to an expression like `vec![self]` in which we need to
441 | replace, vs an impl block like `impl Drop for UserGuard` in which we want to not
442 | replace.
443 |
444 | I think there is no solution to this today in Rust, so we will need to keep it
445 | as a limitation that sometimes our macro would generate invalid code, or else
446 | solve what we are doing in a way that does not involve doing *any* token
447 | replacement of `self`.
448 |
449 | So that we don't need replacement, let's try having our generated closure
450 | capture `self` from the outer method `f`'s receiver argument.
451 |
452 | There are a lot of different ways to slice and dice this, but ultimately they
453 | all fall apart for borrow checker reasons when &mut is involved.
454 |
455 | ```rust
456 | struct S(i32);
457 |
458 | impl S {
459 | // Before: compiles and works
460 | fn f(&mut self) -> &mut i32 {
461 | &mut self.0
462 | }
463 |
464 | // After: does not compile
465 | fn f(&mut self) -> &mut i32 {
466 | ...
467 | let guard = Guard;
468 |
469 | let original_f = move || {
470 | // Original function body:
471 | &mut self.0
472 | };
473 | let value = original_f();
474 |
475 | mem::forget(guard);
476 | value
477 | }
478 | }
479 | ```
480 |
481 | ```console
482 | error[E0495]: cannot infer an appropriate lifetime for borrow expression due to conflicting requirements
483 | --> src/main.rs:16:13
484 | |
485 | 16 | &mut self.0
486 | | ^^^^^^^^^^^
487 | ```
488 |
489 | Remember how we had to add a cast to function pointer type in the fifth attempt
490 | to solve this same borrow checker failure? Well once the closure is capturing
491 | things, it can no longer be cast to a function pointer. Using `impl FnOnce` or
492 | `&mut dyn FnMut` here don't work either; as far as I can tell the correct type
493 | for these closure's cannot be accurately described in Rust's type system.
494 |
495 | ```rust
496 | fn f(&mut self) -> &mut i32 {
497 | ...
498 | let guard = Guard;
499 |
500 | let original_f: impl FnOnce() -> &mut i32 = move || {
501 | // Original function body:
502 | &mut self.0
503 | };
504 | let value = original_f();
505 |
506 | mem::forget(guard);
507 | value
508 | }
509 | ```
510 |
511 | ```console
512 | error[E0106]: missing lifetime specifier
513 | --> src/main.rs:17:42
514 | |
515 | 17 | let original_f: impl FnOnce() -> &mut i32 = move || {
516 | | ^ help: consider giving it a 'static lifetime: `&'static`
517 | |
518 | = help: this function's return type contains a borrowed value, but there is no value for it to be borrowed from
519 | ```
520 |
521 | There isn't a way for the lifetime in the signature of a closure to unify with
522 | the elided lifetime in `f`'s signature.
523 |
524 | I tried a lot of variations in this direction but found it to be a dead end. I
525 | would love to have someone bring to my attention a reliable solution that does
526 | not involve replacing `self` tokens on a heuristic basis.
527 |
528 |
529 |
530 | ### Lifetime elision
531 |
532 | As a recap, what we have so far is the closure casted to function pointer
533 | approach from the fifth attempt combined with the `VisitMut` replacement
534 | approach discussed under the sixth attempt. All together the expansion would
535 | behave like this:
536 |
537 | ```rust
538 | // Before
539 | fn f(&self, a: Arg1, b: Arg2) -> Ret {
540 | // (Original function body)
541 | }
542 |
543 | // After
544 | fn f(&self, a: Arg1, b: Arg2) -> Ret {
545 | struct Guard;
546 | impl Drop for Guard {
547 | fn drop(&mut self) {
548 | // Do the thing
549 | }
550 | }
551 | let guard = Guard;
552 |
553 | let original_f = |_self: &Self, a: Arg1, b: Arg2| -> Ret {
554 | // (Original function body, with self replaced by _self
555 | // except in nested impls)
556 | } as fn(&Self, Arg1, Arg2) -> Ret;
557 |
558 | let value = original_f(self, a, b);
559 |
560 | mem::forget(guard);
561 | value
562 | }
563 | ```
564 |
565 | Unfortunately we are not done because lifetime elision wrecks this approach. To
566 | make it concrete let me give you some possible definitions for the receiver
567 | type, `Arg1`, `Arg2`, `Ret`, and the function body, with lifetime elision in the
568 | mix:
569 |
570 | ```rust
571 | struct S(i32);
572 | type Arg1<'a> = &'a ();
573 | type Arg2 = ();
574 | type Ret<'a> = &'a i32;
575 |
576 | impl S {
577 | fn f(&self, _a: Arg1, _b: Arg2) -> Ret {
578 | &self.0
579 | }
580 | }
581 | ```
582 |
583 | This compiles, with `S::f` eliding three lifetimes: the ones on `&self`, `Arg1`,
584 | and `Ret`.
585 |
586 | Let's apply our expansion.
587 |
588 | ```rust
589 | impl S {
590 | fn f(&self, _a: Arg1, _b: Arg2) -> Ret {
591 | struct Guard;
592 | impl Drop for Guard {
593 | fn drop(&mut self) {
594 | // Do the thing
595 | }
596 | }
597 | let guard = Guard;
598 |
599 | let original_f = |_self: &Self, _a: Arg1, _b: Arg2| -> Ret {
600 | &_self.0
601 | } as fn(&Self, Arg1, Arg2) -> Ret;
602 |
603 | let value = original_f(self, _a, _b);
604 |
605 | mem::forget(guard);
606 | value
607 | }
608 | }
609 | ```
610 |
611 | ```console
612 | error[E0106]: missing lifetime specifier
613 | --> src/main.rs:13:39
614 | |
615 | 13 | } as fn(&Self, Arg1, Arg2) -> Ret;
616 | | ^^^ expected lifetime parameter
617 | |
618 | = help: this function's return type contains a borrowed value, but the signature does not say whether it is borrowed from argument 1 or argument 2
619 | ```
620 |
621 | So what happened here? This is hitting a special behavior of lifetime elision in
622 | methods that accept `self` by reference. The signature of `S::f` is not
623 | `fn(&Self, Arg1, Arg2) -> Ret`, as much as it may look like it. Instead it is
624 | `for<'r, 'a> fn(&'r Self, Arg1<'a>, Arg2) -> Ret<'r>`. The compiler's error
625 | message is pointing out that `fn(&Self, Arg1, Arg2) -> Ret` isn't even a legal
626 | function type given the types involved here.
627 |
628 | The relevant elision behavior goes something like this: in methods that accept
629 | `self` by reference, elided lifetimes in the return type are assumed to refer to
630 | the receiver's lifetime regardless of the number of other other lifetimes among
631 | the other arguments. Meanwhile in functions without `self` or that accept `self`
632 | by value, elided lifetimes in the return type are permitted only if the function
633 | has exactly one input lifetime parameter across all the arguments; otherwise the
634 | signature is invalid. This rule reduces the occurrence of explicit lifetimes
635 | being necessary in method signatures, but makes life complicated for macros as
636 | we are experiencing here.
637 |
638 | The function pointer type in our generated code `fn(&Self, Arg1, Arg2) -> Ret`
639 | is invalid because it has elided the lifetime on `Ret` in the return type but
640 | there is more than one input lifetime: there is one as part of `&Self` and one
641 | as part of `Arg1`. And function pointers never get the
642 | method-with-self-by-reference special elision behavior. The thing that we have
643 | spelled `&Self` in the function pointer is just some ordinary argument type, not
644 | a method receiver.
645 |
646 | This lifetime elision complication effectively rules out the possibility of
647 | using a function pointer in our solution. This puts us in dire straits because:
648 |
649 | - as seen in the second attempt, we really need some kind of function or closure
650 | in order for early returns to work right;
651 |
652 | - as seen in the fourth attempt, it needs to be a *nested* function or closure
653 | so that this whole thing can be used inside trait impl blocks;
654 |
655 | - also from the fourth attempt, it can't be a nested function because the
656 | signature may need to involve `Self`;
657 |
658 | - from the sixth attempt, making `self` available in the closure body through
659 | closure capture is a dead end due to borrow checker trouble;
660 |
661 | - from the fifth attempt, passing `self` as a closure argument doesn't work
662 | unless we use a function pointer;
663 |
664 | - lifetime elision rules make it impossible to come up with the right function
665 | pointer type.
666 |
667 |
668 |
669 | ### Seventh attempt and solution
670 |
671 | For reasons that are beyond me, the following expansion seems to solve the
672 | entire set of constraints at once. Why is the rebinding of all the arguments
673 | necessary? I don't know, but without it we're in the same failing situation as
674 | back in the sixth attempt under the sentence that says "they all fall apart for
675 | borrow checker reasons when &mut is involved."
676 |
677 | ```rust
678 | // Before
679 | fn f(&mut self, a: Arg1, b: Arg2) -> Ret {
680 | // (Original function body)
681 | }
682 |
683 | // After
684 | fn f(&mut self, a: Arg1, b: Arg2) -> Ret {
685 | struct Guard;
686 | impl Drop for Guard {
687 | fn drop(&mut self) {
688 | // Do the thing
689 | }
690 | }
691 | let guard = Guard;
692 |
693 | let value = (move || {
694 | // Rebind all the arguments:
695 | let _self = self;
696 | let a = a;
697 | let b = b;
698 |
699 | // (Original function body, with self replaced by _self
700 | // except in nested impls)
701 | })();
702 |
703 | mem::forget(guard);
704 | value
705 | }
706 | ```
707 |
708 | I am pretty disappointed that the best known solution involves this obscure
709 | rebinding trick to work around what seems like a borrow checker limitation, and
710 | as a consequence suffers from its own limitation around use of `self` inside
711 | unexpanded macros within the function body (see sixth attempt). I guess this
712 | shows there is still much room remaining for borrow checker improvements!
713 |
714 | In any case, this expansion is part of the implementation used for the
715 | [`no-panic`][no-panic] crate.
716 |
--------------------------------------------------------------------------------
/function-epilogue/demo/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "case-study-function-epilogue"
3 | version = "0.0.0"
4 | authors = ["David Tolnay "]
5 | edition = "2021"
6 | publish = false
7 |
8 | [[bin]]
9 | name = "case-study"
10 | path = "main.rs"
11 |
--------------------------------------------------------------------------------
/function-epilogue/demo/main.rs:
--------------------------------------------------------------------------------
1 | use std::mem;
2 |
3 | pub struct S(i32);
4 | pub type Arg1<'a> = &'a i32;
5 | pub type Arg2 = i32;
6 | pub type Ret<'a> = (&'a mut i32, i32);
7 |
8 | impl S {
9 | pub fn original_f(&mut self, a: Arg1, b: Arg2) -> Ret {
10 | (&mut self.0, a + b)
11 | }
12 |
13 | pub fn generated_f(&mut self, a: Arg1, b: Arg2) -> Ret {
14 | struct Guard;
15 | impl Drop for Guard {
16 | fn drop(&mut self) {
17 | // Do the thing
18 | }
19 | }
20 | let guard = Guard;
21 |
22 | let value = (move || {
23 | let _self = self;
24 | let a = a;
25 | let b = b;
26 |
27 | // Original function body, with self replaced by _self
28 | // except in nested impls:
29 |
30 | (&mut _self.0, a + b)
31 | })();
32 |
33 | mem::forget(guard);
34 | value
35 | }
36 | }
37 |
38 | fn main() {
39 | let _ = S;
40 | }
41 |
--------------------------------------------------------------------------------
/integer-match/README.md:
--------------------------------------------------------------------------------
1 | ## Consecutive integer match patterns
2 |
3 | This came up in a macro that wanted to take a comma-separated sequence of
4 | expressions like `themacro!('A', 'B', f())` and emit a `match` expression indexed by
5 | position in the sequence:
6 |
7 | ```rust
8 | match VALUE {
9 | 0 => 'A',
10 | 1 => 'B',
11 | 2 => f(),
12 | _ => unimplemented!(),
13 | }
14 | ```
15 |
16 | As a macro\_rules macro, a core limitation was that we can't make identifiers
17 | dynamically, so the generated code would be limited to using some fixed number
18 | of identifiers regardless of how many expressions are in the macro input.
19 |
20 | In the actual use case, this `match` was just one part of a more complicated
21 | macro; we wouldn't necessarily want a macro for doing literally what is
22 | described here by itself.
23 |
24 |
25 |
26 | ### Rejected solutions
27 |
28 | **Procedural macro.** The whole thing could have been made a
29 | procedural macro instead. A procedural macro would be able to emit exactly a
30 | match expression as shown above. However the stable Rust compiler does not yet
31 | support calling procedural macros in expression position, so the procedural
32 | macro would have needed to be restricted to nightly only. Also it would mean
33 | pulling in some extra dependencies for parsing.
34 |
35 | **Change input syntax.** The input syntax for the macro could have
36 | been changed to require the caller to pass their own counter in the input:
37 | something like `themacro!((0, 'A'), (1, 'B'), (2, f()))`. This makes things easy
38 | for the macro implementation but at the expense of the caller, which was the
39 | wrong tradeoff. Here is what that would look like implemented:
40 |
41 | ```rust
42 | // Force caller to provide their own counter.
43 | macro_rules! themacro {
44 | ($(($i:pat, $e:expr)),*) => {
45 | match VALUE {
46 | $($i => $e,)*
47 | _ => unimplemented!(),
48 | }
49 | };
50 | }
51 | ```
52 |
53 |
54 |
55 | ### Good solutions
56 |
57 | **If-else chain.** We can make the macro expand to a chain of if-else
58 | comparisons structured like this, with a counter in a local variable:
59 |
60 | ```rust
61 | {
62 | let _value = VALUE;
63 | let mut _i = 0;
64 | if {
65 | let eq = _value == _i;
66 | _i += 1;
67 | eq
68 | } {
69 | $e
70 | } else if {
71 | let eq = _value == _i;
72 | _i += 1;
73 | eq
74 | } {
75 | $e
76 | } else if {
77 | let eq = _value == _i;
78 | _i += 1;
79 | eq
80 | } {
81 | $e
82 | } else {
83 | unimplemented!()
84 | }
85 | }
86 | ```
87 |
88 | The conditions of the `if` are equivalent to `_value == _i++` except that unary
89 | increment does not exist in Rust.
90 |
91 | The leading underscore in the local variables `_value` and `_i` is meaningful in
92 | that it suppresses some of the compiler's lints on unused variables, unused
93 | assignment, and unused mut. If the caller's sequence of expressions is empty,
94 | then `_value` and `_i` are never read and `_i` is never mutated. If the caller's
95 | sequence of expressions is nonempty, the value written to `_i` by the last `_i
96 | += 1` is never read. We could alternatively use `#[allow(unused_variables,
97 | unused_mut, unused_assignments)]` but placing these attributes in a way that
98 | they apply correctly to the macro-generated local variables but not to the
99 | caller's $e expressions makes things more complicated.
100 |
101 | Notice that the way the if-else chain is structured there is a clear chunk of
102 | repeating tokens -- each `if` through the following `else`. That repeating
103 | structure makes it very easy for this to be generated from a macro\_rules macro
104 | in one step of expansion.
105 |
106 | ```rust
107 | macro_rules! themacro {
108 | ($($e:expr),*) => {{
109 | let value = VALUE;
110 | let mut i = 0;
111 | $(
112 | if {
113 | let eq = value == i;
114 | i += 1;
115 | eq
116 | } {
117 | $e
118 | } else
119 | )* {
120 | unimplemented!()
121 | }
122 | }};
123 | }
124 | ```
125 |
126 |
127 |
128 | **Const counter.** In some situations we may really want to stick
129 | with a `match` expression rather than an if-else chain, for example if the value
130 | being matched is just part of a larger data structure and we need to bind other
131 | parts of the data structure by-move in the same match.
132 |
133 | We can't expand to a `match` in which the patterns are integer literals `0`,
134 | `1`, `2` etc as shown in the introduction, at least not while supporting an
135 | arbitrary number of input expressions, because macro\_rules can only copy and
136 | paste tokens around, never come up with new tokens. If the caller passes 9999
137 | input expressions, there wouldn't be any way for a macro\_rules macro to conjure
138 | up a `9998` integer literal token to place in the output.
139 |
140 | We also can't expand to arithmetic patterns because this is not legal Rust
141 | syntax.
142 |
143 | ```rust
144 | match VALUE {
145 | 0 => $e,
146 | 0 + 1 => $e,
147 | 0 + 1 + 1 => $e,
148 | ...
149 | }
150 | ```
151 |
152 | Instead we will make generated code that looks like this:
153 |
154 | ```rust
155 | {
156 | mod m {
157 | pub const X: usize = 0;
158 | pub mod m {
159 | pub const X: usize = super::X + 1;
160 | pub mod m {
161 | pub const X: usize = super::X + 1;
162 | }
163 | }
164 | }
165 | match VALUE {
166 | m::X => $e,
167 | m::m::X => $e,
168 | m::m::m::X => $e,
169 | _ => unimplemented!(),
170 | }
171 | }
172 | ```
173 |
174 | The nested modules here provide a way to avoid needing unique names for each
175 | const, which macro\_rules wouldn't be able to create.
176 |
177 | Figuring out the right generated code is the hard part. The macro implementation
178 | ends up being an unremarkable tt-muncher macro that produces one layer of the
179 | nesting at a time.
180 |
181 | ```rust
182 | macro_rules! themacro {
183 | ($($v:expr),*) => {
184 | $crate::themacro_helper! {
185 | path: (m::X)
186 | def: ()
187 | arms: ()
188 | $($v),*
189 | }
190 | };
191 | }
192 |
193 | macro_rules! themacro_helper {
194 | (
195 | path: ($($path:tt)*)
196 | def: ($($def:tt)*)
197 | arms: ($(($i:pat, $v:expr))*)
198 | ) => {{
199 | #[allow(dead_code)]
200 | mod m {
201 | pub const X: usize = 0;
202 | $($def)*
203 | }
204 | match VALUE {
205 | $(
206 | $i => $v,
207 | )*
208 | _ => unimplemented!(),
209 | }
210 | }};
211 | (
212 | path: ($($path:tt)*)
213 | def: ($($def:tt)*)
214 | arms: ($(($i:pat, $v:expr))*)
215 | $next:expr $(, $rest:expr)*
216 | ) => {
217 | $crate::themacro_helper! {
218 | path: (m::$($path)*)
219 | def: (pub mod m { pub const X: usize = super::X + 1; $($def)* })
220 | arms: ($(($i, $v))* ($($path)*, $next))
221 | $($rest),*
222 | }
223 | };
224 | }
225 | ```
226 |
--------------------------------------------------------------------------------
/integer-match/demo/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "case-study-integer-match"
3 | version = "0.0.0"
4 | authors = ["David Tolnay "]
5 | edition = "2021"
6 | publish = false
7 |
8 | [[bin]]
9 | name = "case-study"
10 | path = "main.rs"
11 |
--------------------------------------------------------------------------------
/integer-match/demo/main.rs:
--------------------------------------------------------------------------------
1 | #[macro_export]
2 | macro_rules! themacro {
3 | ($($v:expr),*) => {
4 | $crate::themacro_helper! {
5 | path: (m::X)
6 | def: ()
7 | arms: ()
8 | $($v),*
9 | }
10 | };
11 | }
12 |
13 | #[macro_export]
14 | macro_rules! themacro_helper {
15 | (
16 | path: ($($path:tt)*)
17 | def: ($($def:tt)*)
18 | arms: ($(($i:pat, $v:expr))*)
19 | ) => {{
20 | #[allow(dead_code)]
21 | mod m {
22 | pub const X: usize = 0;
23 | $($def)*
24 | }
25 | match VALUE {
26 | $(
27 | $i => $v,
28 | )*
29 | _ => unimplemented!(),
30 | }
31 | }};
32 | (
33 | path: ($($path:tt)*)
34 | def: ($($def:tt)*)
35 | arms: ($(($i:pat, $v:expr))*)
36 | $next:expr $(, $rest:expr)*
37 | ) => {
38 | $crate::themacro_helper! {
39 | path: (m::$($path)*)
40 | def: (pub mod m { pub const X: usize = super::X + 1; $($def)* })
41 | arms: ($(($i, $v))* ($($path)*, $next))
42 | $($rest),*
43 | }
44 | };
45 | }
46 |
47 | fn main() {
48 | const VALUE: usize = 2;
49 | dbg!(VALUE);
50 | dbg!(themacro!('A', 'B', 'C'));
51 | }
52 |
--------------------------------------------------------------------------------
/readonly-fields/README.md:
--------------------------------------------------------------------------------
1 | ## Read-only fields of mutable struct
2 |
3 | In [`oqueue`] I wanted to expose a field of one of the structs in the API, but
4 | not allow it to be mutated even if the caller has &mut access to the
5 | surrounding struct.
6 |
7 | [`oqueue`]: https://github.com/dtolnay/oqueue
8 |
9 |
10 |
11 | ### Rejected approaches
12 |
13 | **Public field.** The field cannot be `pub` because mutating it
14 | directly would enable the caller to violate invariants of the API.
15 |
16 | ```rust
17 | // Bad: caller can mutate, task.index += 1
18 |
19 | pub struct Task {
20 | pub index: usize,
21 | // other private fields
22 | }
23 | ```
24 |
25 | **Private field, public getter.** This would be the textbook
26 | solution.
27 |
28 | ```rust
29 | // Bad: caller needs to write task.index() instead of task.index
30 |
31 | pub struct Task {
32 | index: usize,
33 | // other private fields
34 | }
35 |
36 | impl Task {
37 | pub fn index(&self) -> usize {
38 | self.index
39 | }
40 | }
41 | ```
42 |
43 | For the ways that this API is commonly used as an argument to other function
44 | calls, I felt that the additional method call parentheses from the getter would
45 | be noisy and provide zero benefit. Rust users already understand how struct
46 | fields work and would be happy to access this value as a field if I can let
47 | them. From the role of this type in the crate's API it is very unlikely that
48 | someone would want to mutate the field, but still we need to protect against it
49 | for correctness.
50 |
51 |
52 |
53 | ### Background
54 |
55 | The way `.` field access syntax works, if there is no field found with the right
56 | name then the language will look at the type's `Deref` impl or a sequence of
57 | `Deref` impls to determine the field being named. This behavior is important for
58 | making smart pointers like `Box` convenient to use:
59 |
60 | ```rust
61 | // Somewhere in the standard library:
62 | //
63 | // pub struct Box {
64 | // ptr: *mut T,
65 | // }
66 |
67 | struct S {
68 | x: String,
69 | }
70 |
71 | fn f(s: Box) {
72 | // Box has no field called x so it isn't obvious why
73 | // this line would be legal, but Box dereferences to
74 | // S which does have that field.
75 | println!("{}", s.x);
76 | }
77 | ```
78 |
79 | Importantly for encapsulation, the deref behavior takes place even if a field
80 | with the right name exists on the original type but is private. Suppose that
81 | `Box` were implemented by storing the heap pointer it owns in a private field
82 | called `ptr`. In that case we would still want the following code to refer to
83 | the user's `ptr` field, rather than erroring because `ptr` exists on `Box` and
84 | is private:
85 |
86 | ```rust
87 | struct S {
88 | ptr: *const u8,
89 | }
90 |
91 | fn f(s: Box) {
92 | println!("{:p}", s.ptr);
93 | }
94 | ```
95 |
96 | The final detail relevant to our original use case is that fields accessed
97 | through a `Deref` impl cannot be mutated unless the outer type also implements
98 | `DerefMut`. The `Deref` method signature looks like `fn deref(&self) ->
99 | &Self::Target` while the `DerefMut` signature looks like `fn deref_mut(&mut
100 | self) -> &mut Self::Target`.
101 |
102 |
103 |
104 | ### First attempt
105 |
106 | We can implement read-only fields by moving the state behind a `Deref` impl to a
107 | type with the appropriate fields public. Without a `DerefMut` impl, this makes
108 | all accessible fields read-only outside of the current module.
109 |
110 | ```rust
111 | pub struct Task {
112 | inner: ReadOnlyTask,
113 | }
114 |
115 | pub struct ReadOnlyTask {
116 | pub index: usize,
117 | // other private fields
118 | }
119 |
120 | impl Deref for Task {
121 | type Target = ReadOnlyTask;
122 |
123 | fn deref(&self) -> &Self::Target {
124 | &self.inner
125 | }
126 | }
127 | ```
128 |
129 | This is pretty good from the point of view of downstream code. As intended, code
130 | from outside the module can access `task.index` through deref but cannot mutate
131 | `task.index`.
132 |
133 | The big problem with this approach is that it distresses the borrow checker.
134 | From inside the module, if code takes a reference to one of the private fields
135 | through deref, say `&task.other`, deref gets a reference to the whole `&Task`
136 | which precludes then mutating some different fields while retaining the
137 | reference.
138 |
139 | ```console
140 | error[E0506]: cannot assign to `task.inner.another` because it is borrowed
141 | --> src/main.rs:8:5
142 | |
143 | 7 | let other = &task.other;
144 | | ---- borrow of `task.inner.another` occurs here
145 | 8 | task.inner.another = 1;
146 | | ^^^^^^^^^^^^^^^^^^^^^^ assignment to borrowed `task.inner.another` occurs here
147 | ```
148 |
149 | To work around this, practically all code within the module would need to be
150 | written in terms of `task.inner.*` explicitly rather than relying on derefs,
151 | which is unpleasant.
152 |
153 |
154 |
155 | ### Second attempt
156 |
157 | We can keep the original struct but dereference to a struct with the same memory
158 | layout and public fields, still not implementing `DerefMut`.
159 |
160 | For this to be sound, we need to guarantee that both copies of the struct have
161 | the same layout in memory. This is *not* guaranteed just by having the same
162 | fields with the same types in both. One way to do it is by using `#[repr(C)]` to
163 | tie both structs to C's struct layout rules, because those do guarantee the same
164 | layout for structs with identical fields.
165 |
166 | ```rust
167 | #[repr(C)]
168 | pub struct Task {
169 | index: usize,
170 | // other private fields
171 | }
172 |
173 | #[repr(C)]
174 | pub struct ReadOnlyTask {
175 | pub index: usize,
176 | // the same private fields
177 | }
178 |
179 | impl Deref for Task {
180 | type Target = ReadOnlyTask;
181 |
182 | fn deref(&self) -> &Self::Target {
183 | unsafe { &*(self as *const Self as *const Self::Target) }
184 | }
185 | }
186 | ```
187 |
188 | This works as intended. Code from inside this module can access and mutate the
189 | private `task.index` directly, while code from outside the module can access
190 | `task.index` through `Deref` and cannot mutate it even if the `Task` they hold
191 | is mutable.
192 |
193 | ```console
194 | error[E0594]: cannot assign to data in a `&` reference
195 | --> main.rs:8:5
196 | |
197 | 8 | task.index += 1;
198 | | ^^^^^^^^^^^^^^^ cannot assign
199 | ```
200 |
201 | But this is not a complete solution because we really want the field to appear
202 | as a public field in Rustdoc so that readers of the documentation immediately
203 | understand how to use it. The documentation experience should be as though this
204 | field were declared `pub`.
205 |
206 |
207 |
208 | ### Third attempt
209 |
210 | We can use [`#[cfg(doc)]`][cfgdoc] to distinguish when documentation is being
211 | rendered, which is available since Rust 1.41.
212 |
213 | [cfgdoc]: https://doc.rust-lang.org/1.67.0/rustdoc/advanced-features.html#cfgdoc-documenting-platform-specific-or-feature-specific-information
214 |
215 | ```rust
216 | #[repr(C)]
217 | pub struct Task {
218 | #[cfg(doc)]
219 | pub index: usize,
220 |
221 | #[cfg(not(doc))]
222 | index: usize,
223 |
224 | // other private fields
225 | }
226 |
227 | #[doc(hidden)]
228 | #[repr(C)]
229 | pub struct ReadOnlyTask {
230 | pub index: usize,
231 | // the same private fields
232 | }
233 |
234 | #[doc(hidden)]
235 | impl Deref for Task {
236 | type Target = ReadOnlyTask;
237 |
238 | fn deref(&self) -> &Self::Target {
239 | unsafe { &*(self as *const Self as *const Self::Target) }
240 | }
241 | }
242 | ```
243 |
244 | This renders as intended in rustdoc as:
245 |
246 | ```console
247 | pub struct Task {
248 | pub index: usize,
249 | // some fields omitted
250 | }
251 | ```
252 |
253 | so readers immediately know how to access the field. From the role of this type
254 | in the crate's API it is unlikely that anyone would want to mutate the field,
255 | but just in case, the field's documentation points out that it is read-only.
256 |
257 |
258 |
259 | ### Implementation
260 |
261 | Once the right strategy for generated code has been worked out, [productizing
262 | the behavior as an attribute macro][readonly] is the easy part:
263 |
264 | [readonly]: https://github.com/dtolnay/readonly
265 |
266 | ```rust
267 | /// ...
268 | #[readonly::make]
269 | pub struct Task {
270 | /// ...
271 | ///
272 | /// This field is read-only; writing to its value will not compile.
273 | pub index: usize,
274 |
275 | // other private fields
276 | }
277 | ```
278 |
--------------------------------------------------------------------------------
/readonly-fields/demo/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "case-study-readonly-fields"
3 | version = "0.0.0"
4 | authors = ["David Tolnay "]
5 | edition = "2021"
6 | publish = false
7 |
8 | [[bin]]
9 | name = "case-study"
10 | path = "main.rs"
11 |
--------------------------------------------------------------------------------
/readonly-fields/demo/main.rs:
--------------------------------------------------------------------------------
1 | pub use oqueue::Task;
2 |
3 | mod oqueue {
4 | use core::ops::Deref;
5 |
6 | #[derive(Default)]
7 | #[repr(C)]
8 | pub struct Task {
9 | #[cfg(doc)]
10 | pub index: usize,
11 |
12 | #[cfg(not(doc))]
13 | index: usize,
14 |
15 | // Other private fields:
16 | q: usize,
17 | }
18 |
19 | #[doc(hidden)]
20 | #[repr(C)]
21 | pub struct ReadOnlyTask {
22 | pub index: usize,
23 |
24 | // The same private fields:
25 | q: usize,
26 | }
27 |
28 | #[doc(hidden)]
29 | impl Deref for Task {
30 | type Target = ReadOnlyTask;
31 |
32 | fn deref(&self) -> &Self::Target {
33 | unsafe { &*(self as *const Self as *const Self::Target) }
34 | }
35 | }
36 |
37 | #[allow(dead_code)]
38 | pub fn from_within_module(task: &mut Task) {
39 | task.index += 1;
40 | }
41 | }
42 |
43 | fn from_outside_module(task: &mut Task) {
44 | task.index += 1; // cannot assign
45 | }
46 |
47 | fn main() {
48 | let mut task = Task::default();
49 | oqueue::from_within_module(&mut task);
50 | from_outside_module(&mut task);
51 | }
52 |
--------------------------------------------------------------------------------
/unit-type-parameters/README.md:
--------------------------------------------------------------------------------
1 | ## Unit struct with type parameters
2 |
3 | [`PhantomData`] is a lang item which means it is currently implemented using
4 | dedicated logic in the compiler, but it turns out all of its behavior can be
5 | implemented from ordinary Rust code. This gives a good opportunity to explore
6 | namespaces in Rust name resolution.
7 |
8 | [`PhantomData`]: https://doc.rust-lang.org/std/marker/struct.PhantomData.html
9 |
10 | The defining characteristic of `PhantomData` is that it is a unit struct with a
11 | type parameter, which is not otherwise allowed by Rust.
12 |
13 | ```rust
14 | struct MyPhantomData;
15 |
16 | fn main() {
17 | let _: MyPhantomData = MyPhantomData;
18 | }
19 | ```
20 |
21 | ```console
22 | error[E0392]: parameter `T` is never used
23 | --> src/main.rs:1:22
24 | |
25 | 1 | struct MyPhantomData;
26 | | ^ unused parameter
27 | |
28 | = help: consider removing `T` or using a marker such as `std::marker::PhantomData`
29 | ```
30 |
31 | This is a hard error, not a warning that can be suppressed like some other lints
32 | about unused code. Rust needs to insist on all type parameters appearing somehow
33 | in the data structure because it is critical for determining [variance].
34 |
35 | [variance]: https://doc.rust-lang.org/nomicon/subtyping.html
36 |
37 | We will develop an attribute macro to make this work by assuming covariance for
38 | the type parameter the same as `PhantomData`. As always, the hard part is
39 | figuring out what code to generate, not writing the macro.
40 |
41 | ```rust
42 | #[phantom]
43 | struct MyPhantomData;
44 |
45 | fn main() {
46 | let _: MyPhantomData = MyPhantomData;
47 | }
48 | ```
49 |
50 | Solving this functionality opens some interesting design possibilities for
51 | libraries that want something that is usable like `PhantomData` but is a locally
52 | defined type, meaning the library can control the impl of traits like
53 | `IntoIterator` on it. The iteration API of [`inventory`] is an example of such a
54 | type in a public crate.
55 |
56 | [`inventory`]: https://github.com/dtolnay/inventory
57 |
58 |
59 |
60 | ### Background
61 |
62 | Names of things in Rust exist in one of three namespaces:
63 |
64 | - The type namespace: structs, enums, unions, traits, modules, enum variants.
65 |
66 | - The value namespace: functions, local variables, statics, consts, tuple struct
67 | constructors, unit struct instances, tuple variant constructors, unit
68 | variants instances.
69 |
70 | - The macro namespace: macro\_rules macros, function-like procedural macros,
71 | attribute macros, derive macros.
72 |
73 | The following is not a precise rule, but the intuition is that something exists
74 | in the type namespace if you can write:
75 |
76 | ```rust
77 | let _: TYPE;
78 | ```
79 |
80 | while something exists in the value namespace if you can write:
81 |
82 | ```rust
83 | let _ = VALUE;
84 | ```
85 |
86 | These two syntactic positions are always unambiguous in the Rust grammar, so
87 | permitting the same name to refer to different things in each namespace does not
88 | introduce ambiguity.
89 |
90 | It is possible to have the same name refer to different things in all three
91 | namespaces at once:
92 |
93 | ```rust
94 | // X in the macro namespace
95 | macro_rules! X {
96 | () => {};
97 | }
98 |
99 | // X in the type namespace
100 | struct X {}
101 |
102 | // X in the value namespace
103 | const X: () = ();
104 |
105 | fn main() {
106 | // unambiguously the macro X
107 | X!();
108 |
109 | // unambiguously the type X
110 | let _: X;
111 |
112 | // unambiguously the value X
113 | let _ = X;
114 | }
115 | ```
116 |
117 | Some definitions place a name into more than one namespace. For example unit
118 | structs (`struct S;`) and tuple structs (`struct S(A, B);`) are both types and
119 | values. The value corresponding to a unit struct is like a constant whose value
120 | is that unit struct, and the value corresponding to a tuple struct is like a
121 | function that takes the tuple elements and returns the tuple struct.
122 |
123 | Braced structs (`struct S { a: A }`) are types only.
124 |
125 |
126 |
127 | ### Strategy
128 |
129 | `PhantomData`, being a unit struct, consists of a type component and a value
130 | component. When you write `use std::marker::PhantomData` you are importing both.
131 |
132 | ```rust
133 | use std::marker::PhantomData;
134 |
135 | fn main() {
136 | let _: PhantomData = PhantomData::;
137 | }
138 | ```
139 |
140 | In implementing our own `PhantomData` we will tackle the two namespaces one
141 | after the other.
142 |
143 | In the value namespace we will need something that makes the following valid:
144 |
145 | ```rust
146 | fn main() {
147 | let _ = MyPhantomData::;
148 | }
149 | ```
150 |
151 | And in the type namespace we will need something for this:
152 |
153 | ```rust
154 | fn main() {
155 | let _: MyPhantomData;
156 | }
157 | ```
158 |
159 | Independently these would be easy, but the hard part will be making it so that
160 | `MyPhantomData::` as a value has a type that matches
161 | `MyPhantomData`.
162 |
163 | ```rust
164 | fn main() {
165 | let _: MyPhantomData = MyPhantomData::;
166 | }
167 | ```
168 |
169 |
170 |
171 | ### Value namespace
172 |
173 | In the value namespace basically our only tool relevant to this project is unit
174 | variants. The other obvious candidates in the value namespace (statics and
175 | consts) cannot carry a type parameter.
176 |
177 | You may be familiar with type parameters on unit variants already, maybe without
178 | thinking about it, from dealing with `Option`:
179 |
180 | ```rust
181 | fn main() {
182 | let mut x = None::;
183 |
184 | // equivalent to:
185 | let mut x: Option = None;
186 | }
187 | ```
188 |
189 | Here is how we would make a unit variant with a type parameter that can be
190 | imported and used in value position:
191 |
192 | ```rust
193 | mod phantom {
194 | pub use self::ImplementationDetail::MyPhantomData;
195 |
196 | pub enum ImplementationDetail {
197 | MyPhantomData,
198 |
199 | #[allow(dead_code)]
200 | #[doc(hidden)]
201 | Marker(*const T),
202 | }
203 | }
204 |
205 | use phantom::MyPhantomData;
206 |
207 | fn main() {
208 | let _ = MyPhantomData::;
209 | }
210 | ```
211 |
212 | The marker variant is responsible for using the type parameter `T` in some way
213 | that gives it the right variance. There are many correct alternatives but I made
214 | it hold `*const T` as one example of a type that is covariant in `T` and works
215 | with dynamically sized `T: ?Sized`. We will come back to autotrait impls later.
216 |
217 |
218 |
219 | ### Type namespace
220 |
221 | Clearly in the previous section the type of the enum variant
222 | `MyPhantomData::` is the enum type `ImplementationDetail`. We just
223 | need to call it something else, namely `MyPhantomData`.
224 |
225 | Changing the name doesn't immediately work.
226 |
227 | ```rust
228 | mod phantom {
229 | pub use self::MyPhantomData::MyPhantomData;
230 |
231 | pub enum MyPhantomData {
232 | MyPhantomData,
233 |
234 | #[allow(dead_code)]
235 | #[doc(hidden)]
236 | Marker(*const T),
237 | }
238 | }
239 | ```
240 |
241 | ```console
242 | error[E0255]: the name `MyPhantomData` is defined multiple times
243 | --> src/main.rs:4:5
244 | |
245 | 2 | pub use self::MyPhantomData::MyPhantomData;
246 | | ---------------------------------- previous import of the type `MyPhantomData` here
247 | 3 |
248 | 4 | pub enum MyPhantomData {
249 | | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ `MyPhantomData` redefined here
250 | |
251 | = note: `MyPhantomData` must be defined only once in the type namespace of this module
252 | help: you can use `as` to change the binding name of the import
253 | |
254 | 2 | pub use self::MyPhantomData::MyPhantomData as OtherMyPhantomData;
255 | | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
256 | ```
257 |
258 | The behavior seen here is that all enum variants of any style (struct variant,
259 | tuple variant, unit variant) occupy both the value namespace and the type
260 | namespace. Our code had defined `enum MyPhantomData` as a type, but then
261 | imported `self::MyPhantomData::MyPhantomData` which is both a value and type,
262 | resulting in a conflict in the type namespace.
263 |
264 | Naively we might expect that unit variants and tuple variants occupy only the
265 | value namespace while struct variants occupy only the type namespace. Unit
266 | variants necessarily need something in the value namespace through which you
267 | refer to their value, and tuple variants necessarily need something in the value
268 | namespace that behaves like a function through which you construct them. And
269 | struct variants need something to make curly brace initialization work, which
270 | seems like it should be the type namespace because plain structs with named
271 | fields exist in the type namespace only. But apparently this is not how things
272 | work -- maybe to leave things open for language evolution in which enum variants
273 | become usable as refinement types.
274 |
275 | In any case, the way to work around conflicts is via wildcard imports. These are
276 | allowed to overlap with non-wildcard imports or explicit definitions, in which
277 | case the non-wildcard takes precedence. The precedence applies independently
278 | within each namespace.
279 |
280 | ```rust
281 | mod phantom {
282 | // Imports the enum variant in both type and value namespace,
283 | // but in the type namespace it gets shadowed by the definition
284 | // `enum MyPhantomData` below.
285 | pub use self::MyPhantomData::*;
286 |
287 | pub enum MyPhantomData {
288 | MyPhantomData,
289 |
290 | #[allow(dead_code)]
291 | #[doc(hidden)]
292 | Marker(*const T),
293 | }
294 | }
295 |
296 | use phantom::MyPhantomData;
297 |
298 | fn main() {
299 | let _: MyPhantomData = MyPhantomData::;
300 | }
301 | ```
302 |
303 | Pretty neat! There are some quirks to sort out still, but this is on the right
304 | track.
305 |
306 |
307 |
308 | ### Memory representation
309 |
310 | We want `std::mem::size_of::>() == 0`.
311 |
312 | In the definition above, it would currently be a whopping 16 or 24 bytes
313 | depending on whether `T` is dynamically sized. The marker variant takes up space
314 | for a pointer or fat pointer, and there is an enum discriminant as well which
315 | needs 1 bit, and we get a further 63 bits of padding for alignment reasons.
316 |
317 | Two things need to change: we need the marker variant not to contain storage,
318 | and we need the discriminant not to exist.
319 |
320 | We can eliminate the discriminant by making the marker variant's data zero sized
321 | and statically impossible. The compiler is smart enough to elide the
322 | discriminant when this happens.
323 |
324 | For various complicated but reasonably good reasons, just making the data
325 | impossible without making it zero sized (such as `Marker(Void, *const T)`) is
326 | not sufficient.
327 |
328 | ```rust
329 | mod phantom {
330 | pub use self::MyPhantomData::*;
331 |
332 | pub enum MyPhantomData {
333 | MyPhantomData,
334 |
335 | #[allow(dead_code)]
336 | #[doc(hidden)]
337 | Marker(Void, [*const T; 0]),
338 | }
339 |
340 | pub enum Void {}
341 | }
342 |
343 | use phantom::MyPhantomData;
344 |
345 | fn main() {
346 | assert_eq!(std::mem::size_of::>(), 0);
347 | }
348 | ```
349 |
350 |
351 |
352 | ### Autotraits
353 |
354 | The standard library's `PhantomData` has `impl Send` and
355 | `impl Sync`. Our type so far has neither of these because
356 | `*const T` does not.
357 |
358 | A simple fix would be `Marker(Void, [Box; 0])` but then we depend on a memory
359 | allocator for no reason. This fix works because `Box` has the same `Send` and
360 | `Sync` impls as `T`.
361 |
362 | Without `Box`, the same impls can be written unsafely.
363 |
364 | ```rust
365 | mod phantom {
366 | pub use self::MyPhantomData::*;
367 |
368 | pub enum MyPhantomData {
369 | MyPhantomData,
370 |
371 | #[allow(dead_code)]
372 | #[doc(hidden)]
373 | Marker(Void, [*const T; 0]),
374 | }
375 |
376 | pub enum Void {}
377 |
378 | unsafe impl Send for MyPhantomData {}
379 | unsafe impl Sync for MyPhantomData {}
380 | }
381 | ```
382 |
383 |
384 |
385 | ### Documentation
386 |
387 | Rustdoc would render our type as:
388 |
389 | ```console
390 | pub enum MyPhantomData {
391 | MyPhantomData,
392 | // some variants omitted
393 | }
394 | ```
395 |
396 | which is technically accurate, but misleading relative to how we want users to
397 | conceptualize this construct.
398 |
399 | There isn't a great solution to this, but you may or may not find the following
400 | more appealing:
401 |
402 | ```rust
403 | mod phantom {
404 | pub use self::MyPhantomData::*;
405 |
406 | pub enum MyPhantomData {
407 | MyPhantomData,
408 |
409 | #[allow(dead_code)]
410 | #[doc(hidden)]
411 | Marker(Void, [*const T; 0]),
412 | }
413 |
414 | pub enum Void {}
415 |
416 | unsafe impl Send for MyPhantomData {}
417 | unsafe impl Sync for MyPhantomData {}
418 | }
419 |
420 | /// ... documentation illustrating how to use.
421 | #[allow(type_alias_bounds)]
422 | pub type MyPhantomData = phantom::MyPhantomData;
423 |
424 | #[doc(hidden)]
425 | pub use self::phantom::*;
426 | ```
427 |
428 | Rustdoc renders:
429 |
430 | ```console
431 | type MyPhantomData = MyPhantomData;
432 | ```
433 |
434 | which hides the implementation detail and drives focus to your handwritten
435 | documentation to show how the type is intended to be used.
436 |
437 | The `#[allow(type_alias_bounds)]` attribute suppresses a future compatibility
438 | lint that triggers on type aliases with trait bounds on the left hand side. The
439 | Rust compiler currently does not respect such bounds but this behavior is
440 | considered a compiler bug and is subject to change, potentially breaking code
441 | involving trait bounds in type aliases -- hence the lint. Our code above is in
442 | the clear because the bounds in the type alias exactly match the bounds implied
443 | by well-formedness of the right hand side, so the meaning is the same whether or
444 | not the compiler looks at the type alias bounds. We want the bounds there
445 | because they do appear correctly in Rustdoc.
446 |
447 |
448 |
449 | ### Implementation
450 |
451 | Once the generated code is figured out, packaging this into [an attribute
452 | macro][ghost] is the easy part.
453 |
454 | [ghost]: https://github.com/dtolnay/ghost
455 |
456 | ```rust
457 | /// ... documentation illustrating how to use.
458 | #[phantom]
459 | struct MyPhantomData;
460 | ```
461 |
462 | In fact we might as well make it work for any number of type parameters and
463 | lifetimes, as well as trait bounds and where-clauses.
464 |
465 | ```rust
466 | #[phantom]
467 | struct Crazy<'a, V: 'a, T> where &'a V: IntoIterator- ;
468 | ```
469 |
--------------------------------------------------------------------------------
/unit-type-parameters/demo/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "case-study-unit-type-parameters"
3 | version = "0.0.0"
4 | authors = ["David Tolnay "]
5 | edition = "2021"
6 | publish = false
7 |
8 | [[bin]]
9 | name = "case-study"
10 | path = "main.rs"
11 |
--------------------------------------------------------------------------------
/unit-type-parameters/demo/main.rs:
--------------------------------------------------------------------------------
1 | mod phantom {
2 | pub use self::MyPhantomData::*;
3 |
4 | pub enum MyPhantomData {
5 | MyPhantomData,
6 |
7 | #[allow(dead_code)]
8 | #[doc(hidden)]
9 | Marker(Void, [*const T; 0]),
10 | }
11 |
12 | pub enum Void {}
13 |
14 | unsafe impl Send for MyPhantomData {}
15 | unsafe impl Sync for MyPhantomData {}
16 | }
17 |
18 | /// ... documentation illustrating how to use.
19 | #[allow(type_alias_bounds)]
20 | pub type MyPhantomData = phantom::MyPhantomData;
21 |
22 | #[doc(hidden)]
23 | pub use self::phantom::*;
24 |
25 | fn main() {
26 | let _: MyPhantomData = MyPhantomData::;
27 | }
28 |
--------------------------------------------------------------------------------