├── .github
    ├── FUNDING.yml
    └── workflows
    │   └── ci.yml
├── .gitignore
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── autoref-specialization
    └── README.md
├── bitfield-assertion
    ├── README.md
    └── demo
    │   ├── Cargo.toml
    │   ├── bitfield
    │       ├── Cargo.toml
    │       └── src
    │       │   └── lib.rs
    │   ├── impl
    │       ├── Cargo.toml
    │       └── src
    │       │   └── lib.rs
    │   └── main.rs
├── callable-types
    ├── README.md
    └── demo
    │   ├── Cargo.toml
    │   └── main.rs
├── function-epilogue
    ├── README.md
    └── demo
    │   ├── Cargo.toml
    │   └── main.rs
├── integer-match
    ├── README.md
    └── demo
    │   ├── Cargo.toml
    │   └── main.rs
├── readonly-fields
    ├── README.md
    └── demo
    │   ├── Cargo.toml
    │   └── main.rs
└── unit-type-parameters
    ├── README.md
    └── demo
        ├── Cargo.toml
        └── main.rs


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: dtolnay
2 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 |   workflow_dispatch:
 7 |   schedule: [cron: "40 1 * * *"]
 8 | 
 9 | permissions:
10 |   contents: read
11 | 
12 | env:
13 |   RUSTFLAGS: -Dwarnings
14 | 
15 | jobs:
16 |   pre_ci:
17 |     uses: dtolnay/.github/.github/workflows/pre_ci.yml@master
18 | 
19 |   check:
20 |     name: Rust ${{matrix.rust}}
21 |     needs: pre_ci
22 |     if: needs.pre_ci.outputs.continue
23 |     runs-on: ubuntu-latest
24 |     strategy:
25 |       fail-fast: false
26 |       matrix:
27 |         rust: [nightly, beta, stable]
28 |     timeout-minutes: 45
29 |     steps:
30 |       - uses: actions/checkout@v4
31 |       - uses: dtolnay/rust-toolchain@master
32 |         with:
33 |           toolchain: ${{matrix.rust}}
34 |       - run: cargo check --workspace --exclude case-study-bitfield-assertion --exclude case-study-readonly-fields
35 |       - uses: actions/upload-artifact@v4
36 |         if: matrix.rust == 'nightly' && always()
37 |         with:
38 |           name: Cargo.lock
39 |           path: Cargo.lock
40 |         continue-on-error: true
41 | 
42 |   outdated:
43 |     name: Outdated
44 |     runs-on: ubuntu-latest
45 |     if: github.event_name != 'pull_request'
46 |     timeout-minutes: 45
47 |     steps:
48 |       - uses: actions/checkout@v4
49 |       - uses: dtolnay/rust-toolchain@stable
50 |       - uses: dtolnay/install@cargo-outdated
51 |       - run: cargo outdated --workspace --exit-code 1
52 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | Cargo.lock
2 | target/
3 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | members = [
 3 |     "bitfield-assertion/demo",
 4 |     "bitfield-assertion/demo/bitfield",
 5 |     "bitfield-assertion/demo/impl",
 6 |     "callable-types/demo",
 7 |     "function-epilogue/demo",
 8 |     "integer-match/demo",
 9 |     "readonly-fields/demo",
10 |     "unit-type-parameters/demo",
11 | ]
12 | resolver = "2"
13 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 |                               Apache License
  2 |                         Version 2.0, January 2004
  3 |                      http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1. Definitions.
  8 | 
  9 |    "License" shall mean the terms and conditions for use, reproduction,
 10 |    and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |    "Licensor" shall mean the copyright owner or entity authorized by
 13 |    the copyright owner that is granting the License.
 14 | 
 15 |    "Legal Entity" shall mean the union of the acting entity and all
 16 |    other entities that control, are controlled by, or are under common
 17 |    control with that entity. For the purposes of this definition,
 18 |    "control" means (i) the power, direct or indirect, to cause the
 19 |    direction or management of such entity, whether by contract or
 20 |    otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |    outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |    "You" (or "Your") shall mean an individual or Legal Entity
 24 |    exercising permissions granted by this License.
 25 | 
 26 |    "Source" form shall mean the preferred form for making modifications,
 27 |    including but not limited to software source code, documentation
 28 |    source, and configuration files.
 29 | 
 30 |    "Object" form shall mean any form resulting from mechanical
 31 |    transformation or translation of a Source form, including but
 32 |    not limited to compiled object code, generated documentation,
 33 |    and conversions to other media types.
 34 | 
 35 |    "Work" shall mean the work of authorship, whether in Source or
 36 |    Object form, made available under the License, as indicated by a
 37 |    copyright notice that is included in or attached to the work
 38 |    (an example is provided in the Appendix below).
 39 | 
 40 |    "Derivative Works" shall mean any work, whether in Source or Object
 41 |    form, that is based on (or derived from) the Work and for which the
 42 |    editorial revisions, annotations, elaborations, or other modifications
 43 |    represent, as a whole, an original work of authorship. For the purposes
 44 |    of this License, Derivative Works shall not include works that remain
 45 |    separable from, or merely link (or bind by name) to the interfaces of,
 46 |    the Work and Derivative Works thereof.
 47 | 
 48 |    "Contribution" shall mean any work of authorship, including
 49 |    the original version of the Work and any modifications or additions
 50 |    to that Work or Derivative Works thereof, that is intentionally
 51 |    submitted to Licensor for inclusion in the Work by the copyright owner
 52 |    or by an individual or Legal Entity authorized to submit on behalf of
 53 |    the copyright owner. For the purposes of this definition, "submitted"
 54 |    means any form of electronic, verbal, or written communication sent
 55 |    to the Licensor or its representatives, including but not limited to
 56 |    communication on electronic mailing lists, source code control systems,
 57 |    and issue tracking systems that are managed by, or on behalf of, the
 58 |    Licensor for the purpose of discussing and improving the Work, but
 59 |    excluding communication that is conspicuously marked or otherwise
 60 |    designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |    "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |    on behalf of whom a Contribution has been received by Licensor and
 64 |    subsequently incorporated within the Work.
 65 | 
 66 | 2. Grant of Copyright License. Subject to the terms and conditions of
 67 |    this License, each Contributor hereby grants to You a perpetual,
 68 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |    copyright license to reproduce, prepare Derivative Works of,
 70 |    publicly display, publicly perform, sublicense, and distribute the
 71 |    Work and such Derivative Works in Source or Object form.
 72 | 
 73 | 3. Grant of Patent License. Subject to the terms and conditions of
 74 |    this License, each Contributor hereby grants to You a perpetual,
 75 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |    (except as stated in this section) patent license to make, have made,
 77 |    use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |    where such license applies only to those patent claims licensable
 79 |    by such Contributor that are necessarily infringed by their
 80 |    Contribution(s) alone or by combination of their Contribution(s)
 81 |    with the Work to which such Contribution(s) was submitted. If You
 82 |    institute patent litigation against any entity (including a
 83 |    cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |    or a Contribution incorporated within the Work constitutes direct
 85 |    or contributory patent infringement, then any patent licenses
 86 |    granted to You under this License for that Work shall terminate
 87 |    as of the date such litigation is filed.
 88 | 
 89 | 4. Redistribution. You may reproduce and distribute copies of the
 90 |    Work or Derivative Works thereof in any medium, with or without
 91 |    modifications, and in Source or Object form, provided that You
 92 |    meet the following conditions:
 93 | 
 94 |    (a) You must give any other recipients of the Work or
 95 |        Derivative Works a copy of this License; and
 96 | 
 97 |    (b) You must cause any modified files to carry prominent notices
 98 |        stating that You changed the files; and
 99 | 
100 |    (c) You must retain, in the Source form of any Derivative Works
101 |        that You distribute, all copyright, patent, trademark, and
102 |        attribution notices from the Source form of the Work,
103 |        excluding those notices that do not pertain to any part of
104 |        the Derivative Works; and
105 | 
106 |    (d) If the Work includes a "NOTICE" text file as part of its
107 |        distribution, then any Derivative Works that You distribute must
108 |        include a readable copy of the attribution notices contained
109 |        within such NOTICE file, excluding those notices that do not
110 |        pertain to any part of the Derivative Works, in at least one
111 |        of the following places: within a NOTICE text file distributed
112 |        as part of the Derivative Works; within the Source form or
113 |        documentation, if provided along with the Derivative Works; or,
114 |        within a display generated by the Derivative Works, if and
115 |        wherever such third-party notices normally appear. The contents
116 |        of the NOTICE file are for informational purposes only and
117 |        do not modify the License. You may add Your own attribution
118 |        notices within Derivative Works that You distribute, alongside
119 |        or as an addendum to the NOTICE text from the Work, provided
120 |        that such additional attribution notices cannot be construed
121 |        as modifying the License.
122 | 
123 |    You may add Your own copyright statement to Your modifications and
124 |    may provide additional or different license terms and conditions
125 |    for use, reproduction, or distribution of Your modifications, or
126 |    for any such Derivative Works as a whole, provided Your use,
127 |    reproduction, and distribution of the Work otherwise complies with
128 |    the conditions stated in this License.
129 | 
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 |    any Contribution intentionally submitted for inclusion in the Work
132 |    by You to the Licensor shall be under the terms and conditions of
133 |    this License, without any additional terms or conditions.
134 |    Notwithstanding the above, nothing herein shall supersede or modify
135 |    the terms of any separate license agreement you may have executed
136 |    with Licensor regarding such Contributions.
137 | 
138 | 6. Trademarks. This License does not grant permission to use the trade
139 |    names, trademarks, service marks, or product names of the Licensor,
140 |    except as required for reasonable and customary use in describing the
141 |    origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 |    agreed to in writing, Licensor provides the Work (and each
145 |    Contributor provides its Contributions) on an "AS IS" BASIS,
146 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |    implied, including, without limitation, any warranties or conditions
148 |    of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |    PARTICULAR PURPOSE. You are solely responsible for determining the
150 |    appropriateness of using or redistributing the Work and assume any
151 |    risks associated with Your exercise of permissions under this License.
152 | 
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 |    whether in tort (including negligence), contract, or otherwise,
155 |    unless required by applicable law (such as deliberate and grossly
156 |    negligent acts) or agreed to in writing, shall any Contributor be
157 |    liable to You for damages, including any direct, indirect, special,
158 |    incidental, or consequential damages of any character arising as a
159 |    result of this License or out of the use or inability to use the
160 |    Work (including but not limited to damages for loss of goodwill,
161 |    work stoppage, computer failure or malfunction, or any and all
162 |    other commercial damages or losses), even if such Contributor
163 |    has been advised of the possibility of such damages.
164 | 
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 |    the Work or Derivative Works thereof, You may choose to offer,
167 |    and charge a fee for, acceptance of support, warranty, indemnity,
168 |    or other liability obligations and/or rights consistent with this
169 |    License. However, in accepting such obligations, You may act only
170 |    on Your own behalf and on Your sole responsibility, not on behalf
171 |    of any other Contributor, and only if You agree to indemnify,
172 |    defend, and hold each Contributor harmless for any liability
173 |    incurred by, or claims asserted against, such Contributor by reason
174 |    of your accepting any such warranty or additional liability.
175 | 
176 | END OF TERMS AND CONDITIONS
177 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | Permission is hereby granted, free of charge, to any
 2 | person obtaining a copy of this software and associated
 3 | documentation files (the "Software"), to deal in the
 4 | Software without restriction, including without
 5 | limitation the rights to use, copy, modify, merge,
 6 | publish, distribute, sublicense, and/or sell copies of
 7 | the Software, and to permit persons to whom the Software
 8 | is furnished to do so, subject to the following
 9 | conditions:
10 | 
11 | The above copyright notice and this permission notice
12 | shall be included in all copies or substantial portions
13 | of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 | DEALINGS IN THE SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## Case&nbsp;studies
  2 | 
  3 | This repository showcases some examples of tricky Rust code that I have
  4 | encountered during my years working with a variety of advanced macro libraries
  5 | in Rust (my own and others').
  6 | 
  7 | <br>
  8 | 
  9 | <a name="the-point"></a>
 10 | # [:postal\_horn:](#the-point)&ensp;The point
 11 | 
 12 | This project is dedicated to the one profound insight about Rust macro
 13 | development: the difference between someone who is competent with macros vs an
 14 | expert at macros mostly has nothing to do with how good they are "at macros".
 15 | 
 16 | 90% of what enables people to push the limits of possibility in pursuit of a
 17 | powerful and user-friendly macro library API is in their mastery of everything
 18 | else about Rust outside of macros, and their creativity to put together ordinary
 19 | language features in interesting ways that may not occur in handwritten code.
 20 | 
 21 | You may occasionally come across Rust macros that you feel are really advanced
 22 | or magical. If you ever feel this way, I encourage you to take a closer look and
 23 | you'll discover that as far as the macro implementation itself is concerned,
 24 | none of those libraries are doing anything remotely interesting. If it is a
 25 | procedural macro, they always just parse some input in a boring way, crawl some
 26 | syntax trees in a boring way to find out about the input, and paste together
 27 | some output code in a boring way exactly like what you would learn in a few
 28 | hours by working through any part of my [procedural macro workshop][workshop].
 29 | If it is a macro\_rules macro, everything is conceptually just as boring but
 30 | when stretched to its limits it becomes a write-only syntax that poses a
 31 | challenge for even the author to follow and understand later, let alone someone
 32 | else not already fluent in the basics of macro\_rules.
 33 | 
 34 | To the extent that there are any tricks to macro development, all of them
 35 | revolve around *what* code the macros emit, not *how* the macros emit the code.
 36 | This realization can be surprising to people who entered into macro development
 37 | with a vague notion of procedural macros as a "compiler plugin" which they
 38 | imagine must imply all sorts of complicated APIs for *how* to integrate with the
 39 | rest of the compiler. That's not how it works. The only thing macros do is emit
 40 | code that could have been written by hand. If you couldn't have come up with
 41 | some piece of tricky code from one of those magical macros, learning more "about
 42 | macros" won't change that; but learning more about every other part of Rust
 43 | will. Inversely, once you come up with what code you want to generate, writing
 44 | the macro to generate it is generally the easy part.
 45 | 
 46 | [workshop]: https://github.com/dtolnay/proc-macro-workshop
 47 | 
 48 | <br>
 49 | 
 50 | <a name="focus"></a>
 51 | # [:boot:](#focus)&ensp;Focus
 52 | 
 53 | Yes, these case studies are drawn from use cases that arise from work on macros,
 54 | but the macros are never the interesting part. The ingenuity and sophistication
 55 | always lie in what Rust code ultimately gets emitted by the macro, and I think
 56 | you will find that those are fully possible to appreciate even if you know
 57 | nothing about macros.
 58 | 
 59 | To that end, I make an effort to minimize the role of macros in these case
 60 | studies. For each one I give only enough context about the relevant macro to
 61 | explain a set of constraints that the generated code will need to comply with.
 62 | The focus is on the generated code, which somehow solves the constraints using a
 63 | clever combination of Rust language features unrelated to macros. Lastly and
 64 | least importantly, I tie it back to the macro to point out that making a macro
 65 | produce the generated code we came up with would be the easy part.
 66 | 
 67 | Read and enjoy; I hope you find these an enlightening window into this corner of
 68 | Rust that has so far not been put into words.
 69 | 
 70 | <br>
 71 | 
 72 | <a name="case-studies"></a>
 73 | # [:jack\_o\_lantern:](#case-studies)&ensp;Case studies
 74 | 
 75 | <table><tr><td><ul><li></li></ul></td><td><b>
 76 | <a href="https://github.com/dtolnay/case-studies/blob/master/function-epilogue/README.md">
 77 | Function epilogue</a></b><br>
 78 | <sub><i>Topics: borrow checker, no_std, closures, lifetime elision</i></sub><br>
 79 | </td></tr></table>
 80 | 
 81 | <table><tr><td><ul><li></li></ul></td><td><b>
 82 | <a href="https://github.com/dtolnay/case-studies/blob/master/bitfield-assertion/README.md">
 83 | Multiple of 8 const assertion</a></b><br>
 84 | <sub><i>Topics: diagnostics, name resolution, const evaluation, traits</i></sub><br>
 85 | </td></tr></table>
 86 | 
 87 | <table><tr><td><ul><li></li></ul></td><td><b>
 88 | <a href="https://github.com/dtolnay/case-studies/blob/master/unit-type-parameters/README.md">
 89 | Unit struct with type parameters</a></b><br>
 90 | <sub><i>Topics: namespaces, glob imports, layout optimization, autotraits, documentation</i></sub><br>
 91 | </td></tr></table>
 92 | 
 93 | <table><tr><td><ul><li></li></ul></td><td><b>
 94 | <a href="https://github.com/dtolnay/case-studies/blob/master/readonly-fields/README.md">
 95 | Read-only fields of mutable struct</a></b><br>
 96 | <sub><i>Topics: deref coercion, borrow checker, repr, unsafe code, documentation</i></sub><br>
 97 | </td></tr></table>
 98 | 
 99 | <table><tr><td><ul><li></li></ul></td><td><b>
100 | <a href="https://github.com/dtolnay/case-studies/blob/master/integer-match/README.md">
101 | Consecutive integer match patterns</a></b><br>
102 | <sub><i>Topics: macro_rules, const</i></sub><br>
103 | </td></tr></table>
104 | 
105 | <table><tr><td><ul><li></li></ul></td><td><b>
106 | <a href="https://github.com/dtolnay/case-studies/blob/master/callable-types/README.md">
107 | User-defined callable types</a></b><br>
108 | <sub><i>Topics: deref coercion, closures, trait objects, repr, unsafe code</i></sub><br>
109 | </td></tr></table>
110 | 
111 | <table><tr><td><ul><li></li></ul></td><td><b>
112 | <a href="https://github.com/dtolnay/case-studies/blob/master/autoref-specialization/README.md">
113 | Autoref-based stable specialization</a></b><br>
114 | <sub><i>Topics: traits, method resolution</i></sub><br>
115 | </td></tr></table>
116 | 
117 | <br>
118 | 
119 | #### License
120 | 
121 | <sup>
122 | Licensed under either of <a href="LICENSE-APACHE">Apache License, Version
123 | 2.0</a> or <a href="LICENSE-MIT">MIT license</a> at your option.
124 | </sup>
125 | 
126 | <br>
127 | 
128 | <sub>
129 | Unless you explicitly state otherwise, any contribution intentionally submitted
130 | for inclusion in this project by you, as defined in the Apache-2.0 license,
131 | shall be dual licensed as above, without any additional terms or conditions.
132 | </sub>
133 | 


--------------------------------------------------------------------------------
/autoref-specialization/README.md:
--------------------------------------------------------------------------------
  1 | ## Autoref-based stable specialization
  2 | 
  3 | "Specialization" refers to permitting overlapping impls in Rust's trait system
  4 | so long as for every possible type, one of the applicable impls is "more
  5 | specific" than the others for some intuitive but precisely defined notion of
  6 | specific. Discussions about a specialization language feature have been ongoing
  7 | for 4.5 years ([RFC 1210], [rust-lang/rust#31844]). Today the feature is
  8 | partially implemented in rustc but is not yet sound when mixed with lifetimes
  9 | ([rust-lang/rust#40582]) and requires more language design work and compiler
 10 | work before it could be stabilized.
 11 | 
 12 | [RFC 1210]: https://github.com/rust-lang/rfcs/pull/1210
 13 | [rust-lang/rust#31844]: https://github.com/rust-lang/rust/issues/31844
 14 | [rust-lang/rust#40582]: https://github.com/rust-lang/rust/issues/40582
 15 | 
 16 | This page covers a stable, safe, generalizable technique for solving some of the
 17 | use cases that would otherwise be blocked on specialization.
 18 | 
 19 | The technique was originally developed for use by macros in the [Anyhow] crate.
 20 | 
 21 | [Anyhow]: https://github.com/dtolnay/anyhow
 22 | 
 23 | <br>
 24 | 
 25 | ### Context
 26 | 
 27 | I'll explain the technique as applied to two use cases, one simpler to start
 28 | with and then a more elaborate realistic one.
 29 | 
 30 | The first use case is going to be a truly canonical application of
 31 | specialization &mdash; a blanket impl with a separate fast path for some
 32 | concrete type(s). The equivalent nightly-only specialized blanket impl would be
 33 | like this:
 34 | 
 35 | ```rust
 36 | #![feature(specialization)]
 37 | 
 38 | use std::fmt::{Display, Write};
 39 | 
 40 | pub trait MyToString {
 41 |     fn my_to_string(&self) -> String;
 42 | }
 43 | 
 44 | // General impl that applies to any T with a Display impl.
 45 | impl<T: Display> MyToString for T {
 46 |     default fn my_to_string(&self) -> String {
 47 |         let mut buf = String::new();
 48 |         buf.write_fmt(format_args!("{}", self)).unwrap();
 49 |         buf.shrink_to_fit();
 50 |         buf
 51 |     }
 52 | }
 53 | 
 54 | // Specialized impl to bypass the relatively expensive std::fmt machinery.
 55 | impl MyToString for String {
 56 |     fn my_to_string(&self) -> String {
 57 |         self.clone()
 58 |     }
 59 | }
 60 | ```
 61 | 
 62 | Then the second use case will be closer to the real-life usage of this technique
 63 | in Anyhow. We have an error type, and we want it to be constructible from any
 64 | underlying type that has a `Display` impl. But if the underlying type *also* has
 65 | a `std::error::Error` impl, we'd like to know about that by invoking a different
 66 | constructor which will propagate the original error's source() and backtrace()
 67 | information correctly.
 68 | 
 69 | Ultimately we want both of the following to compile:
 70 | 
 71 | ```rust
 72 | fn demo1() -> Result<(), anyhow::Error> {
 73 |     // Turn a &str into an error.
 74 |     // &str implements Display but not std::error::Error.
 75 |     return Err(anyhow!("oh no!"));
 76 | }
 77 | 
 78 | fn demo2() -> Result<(), anyhow::Error> {
 79 |     // Turn an existing std::error::Error value into our error without
 80 |     // losing its source() and backtrace() if there is one.
 81 |     let io_error = fs::read("/tmp/nonexist").unwrap_err();
 82 |     return Err(anyhow!(io_error));
 83 | }
 84 | ```
 85 | 
 86 | Recall that `std::error::Error` has `Display` as a supertrait so the impl for
 87 | `std::error::Error` is strictly more specific than the general impl that covers
 88 | all `Display` types.
 89 | 
 90 | ```rust
 91 | #![feature(specialization)]
 92 | 
 93 | use std::error::Error as StdError;
 94 | use std::fmt::Display;
 95 | 
 96 | pub struct Error(/* ... */);
 97 | 
 98 | impl Error {
 99 |     pub(crate) fn from_fmt<T: Display>(error: T) -> Self {...}
100 |     pub(crate) fn from_std_error<T: StdError>(error: T) -> Self {...}
101 | }
102 | 
103 | pub(crate) trait AnyhowNew {
104 |     fn new(self) -> Error;
105 | }
106 | 
107 | impl<T: Display> AnyhowNew for T {
108 |     default fn new(self) -> Error {
109 |         // no std error impl
110 |         Error::from_fmt(self)
111 |     }
112 | }
113 | 
114 | impl<T: StdError> AnyhowNew for T {
115 |     fn new(self) -> Error {
116 |         // able to use std error's source() and backtrace()
117 |         Error::from_std_error(self)
118 |     }
119 | }
120 | ```
121 | 
122 | <br>
123 | 
124 | ### Background: autoref
125 | 
126 | To do specialization using only 100% stable and 100% safe code, we'll need some
127 | other mechanism to accomplish compile-time fallback through a prioritized
128 | sequence of behaviors. That is, we need some way to define a general impl and a
129 | tree of more specific impls where any invocation will resolve to the most
130 | specific applicable impl at compile time.
131 | 
132 | Outside of `feature(specialization)`, Rust has at least one other language
133 | feature capable of doing this, which is method resolution autoref.
134 | 
135 | As an introduction to autoref let's consider this program:
136 | 
137 | ```rust
138 | struct Value(i32);
139 | 
140 | impl Value {
141 |     fn print(&self) {
142 |         println!("it worked! {}", self.0);
143 |     }
144 | }
145 | 
146 | fn main() {
147 |     let v = Value(0);
148 |     v.print();
149 | }
150 | ```
151 | 
152 | We make a variable `v` of type `Value` and call a method on it. If you've
153 | written any Rust code it will be obvious to you *that* this code works, but I'd
154 | like to dig into *why* it works. In particular, we have a value of type `Value`
155 | but the method `print` takes an argument of type `&Value`. Where is the code
156 | that turns `Value` into `&Value`?
157 | 
158 | This is autoref &mdash; the compiler is inserting the required reference for you
159 | as part of resolving the method call. In effect, the code that executes is
160 | equivalent to if we had written `(&v).print()` or more explicitly
161 | `Value::print(&v)`, but it is "auto" because we never had to write `&` in the
162 | call.
163 | 
164 | Note: autoref is not the same as deref, which is a different thing that method
165 | resolution does. In a way they are opposites; autoref is about *adding* a layer
166 | of reference to resolve a call; deref is about *removing* a layer of reference.
167 | Both are ubiquitous but invisible.
168 | 
169 | <br>
170 | 
171 | ### Background: method resolution
172 | 
173 | How does autoref get us stable specialization? To answer that, let's look at
174 | what happens if the same method name could be dispatched either with or without
175 | autoref.
176 | 
177 | ```rust
178 | struct Value;
179 | 
180 | trait Print {
181 |     fn print(self);
182 | }
183 | 
184 | impl Print for Value {
185 |     fn print(self) {
186 |         println!("called on Value");
187 |     }
188 | }
189 | 
190 | impl Print for &Value {
191 |     fn print(self) {
192 |         println!("called on &Value");
193 |     }
194 | }
195 | 
196 | fn main() {
197 |     let v = Value;
198 |     v.print();
199 | }
200 | ```
201 | 
202 | Here `print` could refer to either `<Value as Print>::print` which takes an
203 | argument of type `Value`, or to `<&Value as Print>::print` which takes an
204 | argument of type `&Value`. If you run this program you'll see it prints "called
205 | on Value". But if the first impl were removed, it would then print "called on
206 | &amp;Value". In some sense the first impl is more specific from the point of
207 | view of the call we wrote; exactly what we'll need!
208 | 
209 | To define the compiler's behavior more precisely, the rule is that if a method
210 | can be dispatched without autoref then it will be. Only if a method cannot be
211 | dispatched without autoref, the compiler will insert an autoref and attempt to
212 | resolve it again.
213 | 
214 | This and some creativity should be all we need to solve the use cases that we
215 | saw up top.
216 | 
217 | <br>
218 | 
219 | ### Simple application
220 | 
221 | Recall that we have a String conversion that we wanted to implement in one way
222 | for any `T: Display` and in a more performant specialized way for specifically
223 | `String`.
224 | 
225 | Here is the full implementation:
226 | 
227 | ```rust
228 | use std::fmt::{Display, Write};
229 | 
230 | pub trait DisplayToString {
231 |     fn my_to_string(&self) -> String;
232 | }
233 | 
234 | // General impl that applies to any T with a Display impl.
235 | //
236 | // Note that the Self type of this impl is &T and so the method argument
237 | // is actually &&T! That makes this impl lower priority during method
238 | // resolution if the impl that accepts &String would also apply.
239 | impl<T: Display> DisplayToString for &T {
240 |     fn my_to_string(&self) -> String {
241 |         println!("called blanket impl");
242 | 
243 |         let mut buf = String::new();
244 |         buf.write_fmt(format_args!("{}", self)).unwrap();
245 |         buf.shrink_to_fit();
246 |         buf
247 |     }
248 | }
249 | 
250 | pub trait StringToString {
251 |     fn my_to_string(&self) -> String;
252 | }
253 | 
254 | // Specialized impl to bypass the relatively expensive std::fmt machinery.
255 | //
256 | // The method argument is typed &String.
257 | impl StringToString for String {
258 |     fn my_to_string(&self) -> String {
259 |         println!("called specialized impl");
260 | 
261 |         self.clone()
262 |     }
263 | }
264 | 
265 | macro_rules! convert_to_strings {
266 |     ($($e:expr),*) => {
267 |         [$(
268 |             (&$e).my_to_string()
269 |         ),*]
270 |     };
271 | }
272 | 
273 | fn main() {
274 |     let owned_string = "hacks".to_owned();
275 |     let strings = convert_to_strings![1, "&str", owned_string];
276 |     println!("{:?}", strings);
277 | }
278 | ```
279 | 
280 | If we run this program the output shows that our specialization works!
281 | 
282 | ```console
283 | called blanket impl
284 | called blanket impl
285 | called specialized impl
286 | ["1", "&str", "hacks"]
287 | ```
288 | 
289 | <br>
290 | 
291 | ### Realistic application
292 | 
293 | Recall that we have an Error type that we'd like to construct from any `T` that
294 | implements `Display`, but using a different constructor if `T` also implements
295 | `std::error::Error`.
296 | 
297 | The reason this is more complicated than the previous use case is that my Error
298 | constructors want to receive the argument *by value*! That's bad news if we are
299 | relying on autoref because autoref is all about inserting a layer of reference.
300 | 
301 | Instead we'll use a tagged dispatch strategy with a pair of method calls, the
302 | first using autoref-based specialization with a reference argument to select a
303 | tag, and the second based on that tag which takes ownership of the original
304 | argument.
305 | 
306 | ```rust
307 | use std::error::Error as StdError;
308 | use std::fmt::Display;
309 | 
310 | pub struct Error(/* ... */);
311 | 
312 | // Our two constructors. The first is more general.
313 | impl Error {
314 |     pub(crate) fn from_fmt<T: Display>(error: T) -> Self {
315 |         println!("called Error::from_fmt");
316 |         Error {}
317 |     }
318 |     pub(crate) fn from_std_error<T: StdError>(error: T) -> Self {
319 |         _ = error.source(); // it works!
320 |         println!("called Error::from_std_error");
321 |         Error {}
322 |     }
323 | }
324 | 
325 | macro_rules! anyhow {
326 |     ($err:expr) => ({
327 |         #[allow(unused_imports)]
328 |         use $crate::{DisplayKind, StdErrorKind};
329 |         match $err {
330 |             error => (&error).anyhow_kind().new(error),
331 |         }
332 |     });
333 | }
334 | 
335 | // If the arg implements Display but not StdError, anyhow_kind() will
336 | // return this tag.
337 | struct DisplayTag;
338 | 
339 | trait DisplayKind {
340 |     #[inline]
341 |     fn anyhow_kind(&self) -> DisplayTag {
342 |         DisplayTag
343 |     }
344 | }
345 | 
346 | // Requires one extra autoref to call! Lower priority than StdErrorKind.
347 | impl<T: Display> DisplayKind for &T {}
348 | 
349 | impl DisplayTag {
350 |     #[inline]
351 |     fn new<M: Display>(self, message: M) -> Error {
352 |         Error::from_fmt(message)
353 |     }
354 | }
355 | 
356 | // If the arg implements StdError (and thus also Display), anyhow_kind()
357 | // will return this tag.
358 | struct StdErrorTag;
359 | 
360 | trait StdErrorKind {
361 |     #[inline]
362 |     fn anyhow_kind(&self) -> StdErrorTag {
363 |         StdErrorTag
364 |     }
365 | }
366 | 
367 | // Does not require any autoref if called as (&error).anyhow_kind().
368 | impl<T: StdError> StdErrorKind for T {}
369 | 
370 | impl StdErrorTag {
371 |     #[inline]
372 |     fn new<E: StdError>(self, error: E) -> Error {
373 |         Error::from_std_error(error)
374 |     }
375 | }
376 | 
377 | fn main() {
378 |     // Turn a &str into an error.
379 |     // &str implements Display but not std::error::Error.
380 |     let _err = anyhow!("oh no!");
381 | 
382 |     // Turn an existing std::error::Error value into our error without
383 |     // losing its source() and backtrace() if there is one.
384 |     let io_error = std::fs::read("/tmp/nonexist").unwrap_err();
385 |     let _err = anyhow!(io_error);
386 | }
387 | ```
388 | 
389 | <br>
390 | 
391 | ### Limitations
392 | 
393 | The way that this technique applies method resolution cannot be described by a
394 | trait bound, so for practical purposes you should think of this technique as
395 | working in macros only.
396 | 
397 | That is, we can't do:
398 | 
399 | ```rust
400 | pub fn demo<T: ???>(value: T) -> String {
401 |     (&value).my_to_string()
402 | }
403 | ```
404 | 
405 | and get the specialized behavior. If we put `T: Display` in the trait bound,
406 | method resolution will use the impl for `T: Display` even if `T` happened to be
407 | instantiated as `String`.
408 | 
409 | Depending on your use case, this is honestly fine! If you are a macro already
410 | then you're all set. If you can be made a macro, that's good too (like I did for
411 | `anyhow!` (though it was good for that to be a macro anyway so that it can
412 | accept format args the way println does)). If you can't possibly be a macro then
413 | this won't help you.
414 | 
415 | I am excited to hear other people's experience applying this technique and I
416 | expect it to generalize quite well.
417 | 


--------------------------------------------------------------------------------
/bitfield-assertion/README.md:
--------------------------------------------------------------------------------
  1 | ## Multiple of 8 const assertion
  2 | 
  3 | We need a macro that will fail to compile if some expression is not a multiple
  4 | of 8, without knowing the value of the expression until after name resolution
  5 | which happens after macro expansion.
  6 | 
  7 | This came up in the context of bitfields where sizes of fields are specified in
  8 | bits but the application would like to require that the total size is an exact
  9 | number of bytes.
 10 | 
 11 | ```rust
 12 | trait Field {
 13 |     const BITS: usize;
 14 | }
 15 | 
 16 | enum B3 {}
 17 | impl Field for B3 {
 18 |     const BITS: usize = 3;
 19 | }
 20 | 
 21 | enum B5 {}
 22 | impl Field for B5 {
 23 |     const BITS: usize = 5;
 24 | }
 25 | 
 26 | fn main() {
 27 |     require_multiple_of_eight!(B3::BITS + B5::BITS);
 28 | }
 29 | ```
 30 | 
 31 | As always, we would like the error message to be as precise and useful as
 32 | possible even though in this case the macro does not control the exact message
 33 | because this error can only be detected after name resolution.
 34 | 
 35 | <br>
 36 | 
 37 | ### First attempt
 38 | 
 39 | The two main ways a macro can trigger compile-time errors after macro expansion
 40 | are in const evaluation and in type checking.
 41 | 
 42 | Let's look at const evaluation first by writing a `const` that can be
 43 | successfully computed if and only if the input expression is a multiple of 8.
 44 | There are many ways to do this but one way is to use `$e % 8` as an index into
 45 | an array where the only legal index would be 0.
 46 | 
 47 | ```rust
 48 | macro_rules! require_multiple_of_eight {
 49 |     ($e:expr) => {
 50 |         const REQUIRE_MULTIPLE_OF_EIGHT: () = [()][$e % 8];
 51 |         _ = REQUIRE_MULTIPLE_OF_EIGHT;
 52 |     };
 53 | }
 54 | ```
 55 | 
 56 | This seems like it should get the job done but it doesn't quite. There are some
 57 | weird optimizations around const evaluation. In particular a `cargo check` would
 58 | not need to evaluate this constant. It does a simple type check only which
 59 | determines that *if* the constant does evaluate successfully then its type would
 60 | be `()` which matches the declared type so everything is okay. On the other hand
 61 | `cargo build` does need to perform the evaluation. We end up in a situation
 62 | where `cargo check` can succeed at the same time as `cargo build` fails, which
 63 | is not good.
 64 | 
 65 | Separately, this approach does not give us any opportunity to control the
 66 | message part of the error. If the same macro needed to evaluate multiple
 67 | assertions, the caller couldn't tell which one was failing.
 68 | 
 69 | The message looks like:
 70 | 
 71 | ```console
 72 | error[E0080]: erroneous constant used
 73 |  --> src/main.rs:8:10
 74 |   |
 75 | 8 | #[derive(Bitfield)]
 76 |   |          ^^^^^^^^ referenced constant has errors
 77 | ```
 78 | 
 79 | <br>
 80 | 
 81 | ### Second attempt
 82 | 
 83 | Let's use `$e` to produce something that only type checks if the given
 84 | expression is a multiple of 8.
 85 | 
 86 | Currently the only place that expressions can appear in the type grammar is in
 87 | the length of a fixed sized array, so we will rely on that.
 88 | 
 89 | ```rust
 90 | macro_rules! require_multiple_of_eight {
 91 |     ($e:expr) => {
 92 |         _ = <[(); $e % 8] as $crate::MultipleOfEight>::check();
 93 |     };
 94 | }
 95 | 
 96 | trait MultipleOfEight {
 97 |     fn check() {}
 98 | }
 99 | 
100 | impl MultipleOfEight for [(); 0] {}
101 | ```
102 | 
103 | This is pretty good! The array type `[(); $e % 8]` only implements the required
104 | trait if `$e % 8` is zero. The trait solver's error message mentions
105 | "MultipleOfEight" which adequately indicates to the user what went wrong.
106 | 
107 | ```console
108 | error[E0277]: the trait bound `[(); 6]: MultipleOfEight` is not satisfied
109 |  --> src/main.rs:8:10
110 |   |
111 | 8 | #[derive(Bitfield)]
112 |   |          ^^^^^^^^ the trait `MultipleOfEight` is not implemented for `[(); 6]`
113 |   |
114 |   = help: the following implementations were found:
115 |             <[(); 0] as MultipleOfEight>
116 |   = note: required by `MultipleOfEight::check`
117 | ```
118 | 
119 | There are some things to improve upon though. The error message includes this
120 | distracting array type `[(); 6]` that is not obviously related to what the
121 | caller might have written. Also the note mentioning the method
122 | `MultipleOfEight::check` is just noise as far as the caller would be concerned.
123 | 
124 | <br>
125 | 
126 | ### Solution
127 | 
128 | Let's solve this without a method call and without the array type being the
129 | thing with a missing trait impl.
130 | 
131 | ```rust
132 | macro_rules! require_multiple_of_eight {
133 |     ($e:expr) => {
134 |         let _: $crate::MultipleOfEight<[(); $e % 8]>;
135 |     };
136 | }
137 | 
138 | type MultipleOfEight<T> = <<T as Array>::Marker as TotalSizeIsMultipleOfEightBits>::Check;
139 | 
140 | enum ZeroMod8 {}
141 | enum OneMod8 {}
142 | enum TwoMod8 {}
143 | enum ThreeMod8 {}
144 | enum FourMod8 {}
145 | enum FiveMod8 {}
146 | enum SixMod8 {}
147 | enum SevenMod8 {}
148 | 
149 | trait Array {
150 |     type Marker;
151 | }
152 | 
153 | impl Array for [(); 0] {
154 |     type Marker = ZeroMod8;
155 | }
156 | 
157 | impl Array for [(); 1] {
158 |     type Marker = OneMod8;
159 | }
160 | 
161 | impl Array for [(); 2] {
162 |     type Marker = TwoMod8;
163 | }
164 | 
165 | impl Array for [(); 3] {
166 |     type Marker = ThreeMod8;
167 | }
168 | 
169 | impl Array for [(); 4] {
170 |     type Marker = FourMod8;
171 | }
172 | 
173 | impl Array for [(); 5] {
174 |     type Marker = FiveMod8;
175 | }
176 | 
177 | impl Array for [(); 6] {
178 |     type Marker = SixMod8;
179 | }
180 | 
181 | impl Array for [(); 7] {
182 |     type Marker = SevenMod8;
183 | }
184 | 
185 | trait TotalSizeIsMultipleOfEightBits {
186 |     type Check;
187 | }
188 | 
189 | impl TotalSizeIsMultipleOfEightBits for ZeroMod8 {
190 |     type Check = ();
191 | }
192 | ```
193 | 
194 | In this code the `<T as Array>::Marker` always resolves to one of `ZeroMod8`
195 | through `SevenMod8`. But then only `ZeroMod8` implements
196 | `TotalSizeIsMultipleOfEightBits`.
197 | 
198 | Here is the error message, pretty helpful and free of the distractions from the
199 | second attempt.
200 | 
201 | ```console
202 | error[E0277]: the trait bound `SixMod8: TotalSizeIsMultipleOfEightBits` is not satisfied
203 |  --> src/main.rs:8:10
204 |   |
205 | 8 | #[derive(Bitfield)]
206 |   |          ^^^^^^^^ the trait `TotalSizeIsMultipleOfEightBits` is not implemented for `SixMod8`
207 | ```
208 | 
209 | <br>
210 | 
211 | ### Future
212 | 
213 | Someone should write an RFC for const\_assert. Something like:
214 | 
215 | ```rust
216 | const_assert!($e % 8 == 0, "total size is required to be a multiple of 8 bits");
217 | ```
218 | 
219 | Having this provided by the compiler would let us give better error messages
220 | with less effort than the solution above.
221 | 


--------------------------------------------------------------------------------
/bitfield-assertion/demo/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "case-study-bitfield-assertion"
 3 | version = "0.0.0"
 4 | authors = ["David Tolnay <dtolnay@gmail.com>"]
 5 | edition = "2021"
 6 | publish = false
 7 | 
 8 | [[bin]]
 9 | name = "case-study"
10 | path = "main.rs"
11 | 
12 | [dependencies]
13 | bitfield = { path = "bitfield" }
14 | 


--------------------------------------------------------------------------------
/bitfield-assertion/demo/bitfield/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "bitfield"
 3 | version = "0.0.0"
 4 | authors = ["David Tolnay <dtolnay@gmail.com>"]
 5 | edition = "2021"
 6 | publish = false
 7 | 
 8 | [dependencies]
 9 | bitfield-impl = { path = "../impl" }
10 | 


--------------------------------------------------------------------------------
/bitfield-assertion/demo/bitfield/src/lib.rs:
--------------------------------------------------------------------------------
 1 | pub use bitfield_impl::bitfield;
 2 | 
 3 | pub trait Field {
 4 |     const BITS: usize;
 5 | }
 6 | 
 7 | bitfield_impl::generate_specifiers!();
 8 | 
 9 | pub type MultipleOfEight<T> = <<T as Array>::Marker as TotalSizeIsMultipleOfEightBits>::Check;
10 | 
11 | pub enum ZeroMod8 {}
12 | pub enum OneMod8 {}
13 | pub enum TwoMod8 {}
14 | pub enum ThreeMod8 {}
15 | pub enum FourMod8 {}
16 | pub enum FiveMod8 {}
17 | pub enum SixMod8 {}
18 | pub enum SevenMod8 {}
19 | 
20 | pub trait Array {
21 |     type Marker;
22 | }
23 | 
24 | impl Array for [(); 0] {
25 |     type Marker = ZeroMod8;
26 | }
27 | 
28 | impl Array for [(); 1] {
29 |     type Marker = OneMod8;
30 | }
31 | 
32 | impl Array for [(); 2] {
33 |     type Marker = TwoMod8;
34 | }
35 | 
36 | impl Array for [(); 3] {
37 |     type Marker = ThreeMod8;
38 | }
39 | 
40 | impl Array for [(); 4] {
41 |     type Marker = FourMod8;
42 | }
43 | 
44 | impl Array for [(); 5] {
45 |     type Marker = FiveMod8;
46 | }
47 | 
48 | impl Array for [(); 6] {
49 |     type Marker = SixMod8;
50 | }
51 | 
52 | impl Array for [(); 7] {
53 |     type Marker = SevenMod8;
54 | }
55 | 
56 | pub trait TotalSizeIsMultipleOfEightBits {
57 |     type Check;
58 | }
59 | 
60 | impl TotalSizeIsMultipleOfEightBits for ZeroMod8 {
61 |     type Check = ();
62 | }
63 | 


--------------------------------------------------------------------------------
/bitfield-assertion/demo/impl/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "bitfield-impl"
 3 | version = "0.0.0"
 4 | authors = ["David Tolnay <dtolnay@gmail.com>"]
 5 | edition = "2021"
 6 | publish = false
 7 | 
 8 | [lib]
 9 | proc-macro = true
10 | 
11 | [dependencies]
12 | quote = "1.0"
13 | syn = "2.0"
14 | 


--------------------------------------------------------------------------------
/bitfield-assertion/demo/impl/src/lib.rs:
--------------------------------------------------------------------------------
 1 | use proc_macro::TokenStream;
 2 | use quote::{format_ident, quote};
 3 | use syn::{parse_macro_input, Data, DeriveInput};
 4 | 
 5 | #[proc_macro_attribute]
 6 | pub fn bitfield(_args: TokenStream, input: TokenStream) -> TokenStream {
 7 |     let input = parse_macro_input!(input as DeriveInput);
 8 | 
 9 |     let fields = match &input.data {
10 |         Data::Struct(data) => data.fields.iter().map(|field| &field.ty),
11 |         _ => unimplemented!(),
12 |     };
13 | 
14 |     TokenStream::from(quote! {
15 |         fn __bitfield() {
16 |             let _: bitfield::MultipleOfEight<
17 |                 [(); (0 #(+ <#fields as bitfield::Field>::BITS)*) % 8]
18 |             >;
19 |         }
20 |     })
21 | }
22 | 
23 | #[proc_macro]
24 | pub fn generate_specifiers(_input: TokenStream) -> TokenStream {
25 |     (0usize..=64usize)
26 |         .map(|width| {
27 |             let name = format_ident!("B{}", width);
28 |             TokenStream::from(quote! {
29 |                 pub enum #name {}
30 | 
31 |                 impl Field for #name {
32 |                     const BITS: usize = #width;
33 |                 }
34 |             })
35 |         })
36 |         .collect()
37 | }
38 | 


--------------------------------------------------------------------------------
/bitfield-assertion/demo/main.rs:
--------------------------------------------------------------------------------
 1 | use bitfield::*;
 2 | 
 3 | #[bitfield] // (1+3+4+23)%8 != 0
 4 | struct NotQuiteFourBytes {
 5 |     a: B1,
 6 |     b: B3,
 7 |     c: B4,
 8 |     d: B23,
 9 | }
10 | 
11 | fn main() {}
12 | 


--------------------------------------------------------------------------------
/callable-types/README.md:
--------------------------------------------------------------------------------
  1 | ## User-defined callable types
  2 | 
  3 | Various languages have ways of making user-defined objects callable with
  4 | function call syntax: C++'s [`operator ()`][cpp], Python's [`__call__`][python],
  5 | Swift's [`@dynamicCallable`][swift], Kotlin's [`invoke`][kotlin], PHP's
  6 | [`__invoke`][php], Scala's [`apply`][scala], etc.
  7 | 
  8 | [cpp]: https://en.cppreference.com/w/cpp/language/operators#Function_call_operator
  9 | [python]: https://docs.python.org/3/reference/datamodel.html#object.__call__
 10 | [swift]: https://docs.swift.org/swift-book/ReferenceManual/Attributes.html
 11 | [kotlin]: https://kotlinlang.org/docs/reference/operator-overloading.html#invoke
 12 | [php]: https://www.php.net/manual/en/language.oop5.magic.php#object.invoke
 13 | [scala]: https://scala-lang.org/files/archive/spec/2.12/06-expressions.html#function-applications
 14 | 
 15 | Something along these lines exists in Rust in the form of the [`std::ops::Fn`]
 16 | trait. When you write a closure expression, under the hood it becomes a struct
 17 | with some unique type that captures the necessary state from the closure's
 18 | environment and provides an implementation of this `Fn` trait to make it
 19 | callable. This isn't quite like the examples cited from other languages because
 20 | the trait can only be implemented by the compiler, not by the user for their own
 21 | data structures.
 22 | 
 23 | [`std::ops::Fn`]: https://doc.rust-lang.org/nightly/std/ops/trait.Fn.html
 24 | 
 25 | I was playing around with this functionality involving closures to stretch the
 26 | possibilities a bit. Mainly I wondered whether there is anything that can be
 27 | written in the gap in the code below to make our data structure work like a
 28 | callable function object *on a stable compiler* despite this not being a feature
 29 | of the language.
 30 | 
 31 | ```rust
 32 | /// Function object that adds some number to its input.
 33 | struct Plus {
 34 |     n: u32,
 35 | }
 36 | 
 37 | impl Plus {
 38 |     fn call(&self, arg: u32) -> u32 {
 39 |         self.n + arg
 40 |     }
 41 | }
 42 | 
 43 | // [Something special here ...]
 44 | 
 45 | fn main() {
 46 |     let one_plus = Plus { n: 1 };
 47 |     let sum = one_plus(2);
 48 |     assert_eq!(sum, 1 + 2);
 49 | }
 50 | ```
 51 | 
 52 | It turns out that yes, it is possible to make this work (with caveats).
 53 | 
 54 | <br>
 55 | 
 56 | ### Background
 57 | 
 58 | We will use an interesting combination of `Deref`, closures, trait objects, and
 59 | unsafe code.
 60 | 
 61 | We will stick to functions with the signature `fn(&self, u32) -> u32` to get the
 62 | simplest thing working, but everything generalizes to other signatures.
 63 | 
 64 | To explain the relevance of `Deref`, observe that the function call operator
 65 | performs deref coercions to find a `Fn` impl. In the following code we write
 66 | `f(2)` to call an object `f` of type `&Callable`, which does not itself
 67 | implement the `Fn` trait. But `&Callable` dereferences to `&fn(u32) -> u32`
 68 | which does, so that is what gets called.
 69 | 
 70 | ```rust
 71 | use std::ops::Deref;
 72 | 
 73 | struct Callable;
 74 | 
 75 | impl Deref for Callable {
 76 |     type Target = fn(u32) -> u32;
 77 | 
 78 |     fn deref(&self) -> &'static Self::Target {
 79 |         &(one_plus as fn(u32) -> u32)
 80 |     }
 81 | }
 82 | 
 83 | fn one_plus(arg: u32) -> u32 {
 84 |     1 + arg
 85 | }
 86 | 
 87 | fn main() {
 88 |     let f = &Callable;
 89 |     assert_eq!(f(2), 1 + 2);
 90 | }
 91 | ```
 92 | 
 93 | <br>
 94 | 
 95 | ### First attempt
 96 | 
 97 | The code under Background is syntactically on the right track because it enables
 98 | writing parentheses for function call notation on a value of user-defined type.
 99 | But since the thing being called in that code after deref coercion is just a
100 | function pointer, the value of `self` (the object being invoked as a function)
101 | is not accessible to the function body, which makes this severely limited in
102 | usefulness.
103 | 
104 | What we want conceptually is this kind of thing:
105 | 
106 | ```rust
107 | impl Callable {
108 |     fn call(&self, arg: u32) -> u32 {
109 |         // Function body
110 |     }
111 | }
112 | 
113 | impl Deref for Callable {
114 |     type Target = ???;
115 | 
116 |     fn deref(&self) -> &Self::Target {
117 |         &|arg| self.call(arg)
118 |     }
119 | }
120 | ```
121 | 
122 | That is, the thing being called after deref coercion would be a closure that has
123 | captured `self` and receives all the non-`self` args to set up a call to the
124 | intended function body.
125 | 
126 | We can even spell out a type for `Target` that makes this look correctly typed.
127 | 
128 | ```rust
129 | impl Deref for Callable {
130 |     type Target = dyn Fn(u32) -> u32;
131 | 
132 |     fn deref(&self) -> &Self::Target {
133 |         &|arg| self.call(arg)
134 |     }
135 | }
136 | ```
137 | 
138 | The borrow checker explains (not that clearly in this case) that this
139 | implementation would not be sound. The reference being returned by `deref` is
140 | dangling because it refers to a closure object on the stack frame of the `deref`
141 | call that is destroyed during the return.
142 | 
143 | ```console
144 | error[E0495]: cannot infer an appropriate lifetime due to conflicting requirements
145 |   --> src/main.rs:15:10
146 |    |
147 | 15 |         &|arg| self.call(arg)
148 |    |          ^^^^^^^^^^^^^^^^^^^^
149 |    |
150 | note: first, the lifetime cannot outlive the anonymous lifetime #1 defined on the method body at 14:5...
151 |   --> src/main.rs:14:5
152 |    |
153 | 14 | /     fn deref(&self) -> &Self::Target {
154 | 15 | |         &|arg| self.call(arg)
155 | 16 | |     }
156 |    | |_____^
157 |    = note: ...so that the types are compatible:
158 |            expected &&Callable
159 |               found &&Callable
160 |    = note: but, the lifetime must be valid for the static lifetime...
161 |    = note: ...so that the expression is assignable:
162 |            expected &(dyn std::ops::Fn(u32) -> u32 + 'static)
163 |               found &dyn std::ops::Fn(u32) -> u32
164 | ```
165 | 
166 | To see it more clearly, this closure would have desugared to something like the
167 | following:
168 | 
169 | ```rust
170 | impl Deref for Callable {
171 |     type Target = dyn Fn(u32) -> u32;
172 | 
173 |     fn deref(&self) -> &Self::Target {
174 |         // Generated by the compiler as the memory representation
175 |         // of `|arg| self.call(arg)`.
176 |         struct GeneratedClosure<'a> {
177 |             self_: &'a Callable,
178 |         }
179 | 
180 |         // Also generated by the compiler.
181 |         impl<'a> Fn(u32) -> u32 for GeneratedClosure<'a> {
182 |             fn call(&self, arg: u32) -> u32 {
183 |                 let self_ = self.self_;
184 | 
185 |                 // Body of `|arg| self.call(arg)`.
186 |                 self_.call(arg)
187 |             }
188 |         }
189 | 
190 |         // Expanded view of `&|arg| self.call(arg)`.
191 |         let generated_closure = GeneratedClosure { self_: self };
192 |         let reference_to_closure: &GeneratedClosure = &generated_closure;
193 |         let reference_to_trait_object = reference_to_closure as &dyn Fn(u32) -> u32;
194 |         reference_to_trait_object
195 |     }
196 | }
197 | ```
198 | 
199 | <br>
200 | 
201 | ### Second attempt
202 | 
203 | If we temporarily conflate the types `GeneratedClosure` and `&Callable`, notice
204 | how in the desugared code from the first attempt we have `deref` returning
205 | `&&Callable` (as a reference to trait object) and `GeneratedClosure::call`
206 | accepting `&&Callable` as its first argument. The inner reference lives long
207 | enough to match deref's signature but the outer reference does not; the outer
208 | reference points to the inner reference which exists on `deref`'s stack frame
209 | and goes out of scope.
210 | 
211 | What we would love to trick the compiler into doing is something more like:
212 | 
213 | ```rust
214 | impl Deref for Callable {
215 |     type Target = dyn Fn(u32) -> u32;
216 | 
217 |     fn deref(&self) -> &Self::Target {
218 |         // Generated by the compiler (???)
219 |         #[repr(transparent)]
220 |         struct GeneratedClosure {
221 |             self_: Callable,
222 |         }
223 | 
224 |         // Also generated by the compiler (???)
225 |         impl Fn(u32) -> u32 for GeneratedClosure {
226 |             fn call(&self, arg: u32) -> u32 {
227 |                 let self_ = &self.self_;
228 | 
229 |                 // Body of the closure we would write.
230 |                 self_.call(arg)
231 |             }
232 |         }
233 | 
234 |         let reference_to_closure = &GeneratedClosure { self_: *self };
235 |         let reference_to_trait_object = reference_to_closure as &dyn Fn(u32) -> u32;
236 |         reference_to_trait_object
237 |     }
238 | }
239 | ```
240 | 
241 | Here instead we have `deref` returning `&Callable` (as a reference to trait
242 | object) and `GeneratedClosure::call` accepting `&Callable`. The conversion from
243 | `&Callable` to `&GeneratedClosure` is sound as long as `Callable` and
244 | `GeneratedClosure` have the same memory representation, which would be
245 | guaranteed by `#[repr(transparent)]`. That conversion results in a reference
246 | pointing to the caller's `Callable` rather than to anything on `deref`'s stack
247 | frame, so it lives long enough that this would be a safe and working
248 | implementation of the intended functionality.
249 | 
250 | Let's think about what closure we would need to write in order for the compiler
251 | to come up with the above data structure and `Fn` trait impl.
252 | 
253 | We know it would need to capture a value of type `Callable` by value. This
254 | begins to sound problematic because there would never exist an owned value of
255 | type `Callable` accessible to the `Deref` impl, only as a borrowed `&Callable`.
256 | 
257 | But an imaginary uninitialized `Callable` gets the job done:
258 | 
259 | ```rust
260 | let uninit_callable: Callable = unsafe { mem::uninitialized() };
261 | let uninit_closure = move |arg: u32| Callable::call(&uninit_callable, arg);
262 | mem::forget(uninit_closure);
263 | ```
264 | 
265 | This code makes an uninitialized owned `Callable`, moves ownership of it into a
266 | closure that captures a `Callable` by value and nothing else, and then prevents
267 | a `Drop` call on the closure because we must not drop its uninitialized
268 | contents. At runtime this would all be noop but it gets the compiler to generate
269 | the right data structure and `Fn` trait impl shown above.
270 | 
271 | The remaining part is to turn `self` into a trait object based on this `Fn`
272 | impl, the equivalent of `&GeneratedClosure { self_: *self } as &dyn Fn(u32) ->
273 | u32`.
274 | 
275 | Ordinarily we would reach for a `mem::transmute::<&Callable,
276 | &GeneratedClosure>(self)` or `&*(self as *const Callable as *const
277 | GeneratedClosure)`, but in this case that won't work because the closure's real
278 | type is generated and does not have a name that we can refer to. A different
279 | technique is needed:
280 | 
281 | ```rust
282 | fn second<'a, T>(_a: &T, b: &'a T) -> &'a T {
283 |     b
284 | }
285 | let reference_to_closure = second(&uninit_closure, unsafe { mem::transmute(self) });
286 | ```
287 | 
288 | This uses generic type inference to deduce the return type of the transmute as
289 | identical to a reference to the closure's type, whatever that might be.
290 | 
291 | At this point we have a closure to make into a trait object.
292 | 
293 | ```rust
294 | let reference_to_trait_object = reference_to_closure as &dyn Fn(u32) -> u32;
295 | ```
296 | 
297 | The impl all at once looks like:
298 | 
299 | ```rust
300 | impl Deref for Callable {
301 |     type Target = dyn Fn(u32) -> u32;
302 | 
303 |     fn deref(&self) -> &Self::Target {
304 |         let uninit_callable: Self = unsafe { mem::uninitialized() };
305 |         let uninit_closure = move |arg: u32| Self::call(&uninit_callable, arg);
306 |         fn second<'a, T>(_a: &T, b: &'a T) -> &'a T {
307 |             b
308 |         }
309 |         let reference_to_closure = second(&uninit_closure, unsafe { mem::transmute(self) });
310 |         mem::forget(uninit_closure);
311 |         let reference_to_trait_object = reference_to_closure as &dyn Fn(u32) -> u32;
312 |         reference_to_trait_object
313 |     }
314 | }
315 | ```
316 | 
317 | <br>
318 | 
319 | ### Third attempt
320 | 
321 | I called out `#[repr(transparent)]` earlier on, but then didn't bring it up
322 | again in the context of the closure-based implementation. We have written a
323 | closure that captures a type `Callable` by value so it makes sense why it would
324 | be represented like `struct GeneratedClosure { captured: Callable }` but:
325 | 
326 | - it is not a guarantee made by the language that a closure capturing `Callable`
327 |   by value is represented in memory the same as `struct { Callable }`;
328 | 
329 | - nor is it a guarantee that `struct { Callable }` would be represented the same
330 |   as `Callable`.
331 | 
332 | So this is the big caveat; don't count on this to work now or continue working
333 | in the future. Nothing on this page is a robust solution, only interesting. For
334 | now I think this is the closest we get, by adding an assertion as a basic smoke
335 | test that the closure matches the expected size:
336 | 
337 | ```rust
338 | use std::mem;
339 | use std::ops::Deref;
340 | 
341 | /// Function object that adds some number to its input.
342 | struct Plus {
343 |     n: u32,
344 | }
345 | 
346 | impl Plus {
347 |     fn call(&self, arg: u32) -> u32 {
348 |         self.n + arg
349 |     }
350 | }
351 | 
352 | impl Deref for Plus {
353 |     type Target = dyn Fn(u32) -> u32;
354 | 
355 |     fn deref(&self) -> &Self::Target {
356 |         let uninit_callable: Self = unsafe { mem::uninitialized() };
357 |         let uninit_closure = move |arg: u32| Self::call(&uninit_callable, arg);
358 |         let size_of_closure = mem::size_of_val(&uninit_closure);
359 |         fn second<'a, T>(_a: &T, b: &'a T) -> &'a T {
360 |             b
361 |         }
362 |         let reference_to_closure = second(&uninit_closure, unsafe { mem::transmute(self) });
363 |         mem::forget(uninit_closure);
364 |         assert_eq!(size_of_closure, mem::size_of::<Self>());
365 |         let reference_to_trait_object = reference_to_closure as &dyn Fn(u32) -> u32;
366 |         reference_to_trait_object
367 |     }
368 | }
369 | 
370 | fn main() {
371 |     let one_plus = Plus { n: 1 };
372 |     let sum = one_plus(2);
373 |     assert_eq!(sum, 1 + 2);
374 | }
375 | ```
376 | 
377 | <br>
378 | 
379 | ### Fourth attempt
380 | 
381 | There is one remaining problem to sort out. The following line from the third
382 | attempt may contain undefined behavior:
383 | 
384 | ```rust
385 | let uninit_callable: Self = unsafe { mem::uninitialized() };
386 | ```
387 | 
388 | Usually the most common way that creating an uninitialized value of an unknown
389 | type in generic code causes undefined behavior is if an expression like
390 | `mem::uninitialized::<T>()` might be instantiated with a choice of `T` that is
391 | uninhabited, such as the `!` type. When that happens, the compiler is free to
392 | turn the `mem::uninitialized` call into [`unreachable_unchecked`] and plummet
393 | off the end of your function, even though you intended for this line to be a
394 | noop.
395 | 
396 | [`unreachable_unchecked`]: https://doc.rust-lang.org/std/hint/fn.unreachable_unchecked.html
397 | 
398 | As used here, that's not a concern -- we know `Self` is inhabited at runtime
399 | because there exists a `&Self` in scope that was passed in by the caller. If
400 | `Self` were uninhabited, it would be impossible for the caller to have an
401 | instance of `Self` on which to borrow (`&self`) and call `deref`.
402 | 
403 | Instead we need to worry about the second most common way that creating
404 | uninitialized values of an unknown type causes undefined behavior, and that's if
405 | the uninitialized type has nontrivial validity invariants. In our case if the
406 | memory representation of `Self` contains a bool, char, `&`, `&mut`, Box,
407 | NonZero, or any other type where not all possible values are valid, then
408 | `mem::uninitialized::<Self>()` is immediate UB.
409 | 
410 | The correct way to manipulate uninitialized memory of generic type is through
411 | [`MaybeUninit`].
412 | 
413 | [`MaybeUninit`]: https://doc.rust-lang.org/std/mem/union.MaybeUninit.html
414 | 
415 | ```rust
416 | let uninit_callable = MaybeUninit::<Self>::uninit();
417 | let uninit_closure = move |arg: u32| Self::call(
418 |     unsafe { &*uninit_callable.as_ptr() },
419 |     arg,
420 | );
421 | ```
422 | 
423 | The final expanded code all together is:
424 | 
425 | ```rust
426 | use std::mem::{self, MaybeUninit};
427 | use std::ops::Deref;
428 | 
429 | /// Function object that adds some number to its input.
430 | struct Plus {
431 |     n: u32,
432 | }
433 | 
434 | impl Plus {
435 |     fn call(&self, arg: u32) -> u32 {
436 |         self.n + arg
437 |     }
438 | }
439 | 
440 | impl Deref for Plus {
441 |     type Target = dyn Fn(u32) -> u32;
442 | 
443 |     fn deref(&self) -> &Self::Target {
444 |         let uninit_callable = MaybeUninit::<Self>::uninit();
445 |         let uninit_closure = move |arg: u32| Self::call(
446 |             unsafe { &*uninit_callable.as_ptr() },
447 |             arg,
448 |         );
449 |         let size_of_closure = mem::size_of_val(&uninit_closure);
450 |         fn second<'a, T>(_a: &T, b: &'a T) -> &'a T {
451 |             b
452 |         }
453 |         let reference_to_closure = second(&uninit_closure, unsafe { mem::transmute(self) });
454 |         mem::forget(uninit_closure);
455 |         assert_eq!(size_of_closure, mem::size_of::<Self>());
456 |         let reference_to_trait_object = reference_to_closure as &dyn Fn(u32) -> u32;
457 |         reference_to_trait_object
458 |     }
459 | }
460 | 
461 | fn main() {
462 |     let one_plus = Plus { n: 1 };
463 |     let sum = one_plus(2);
464 |     assert_eq!(sum, 1 + 2);
465 | }
466 | ```
467 | 
468 | <br>
469 | 
470 | ### Implementation
471 | 
472 | Packaging this up into a macro is the easy part. We would most likely want an
473 | attribute macro on an impl block that turns the block's one method into the fake
474 | `Fn` impl.
475 | 
476 | ```rust
477 | /// Function object that adds some number to its input.
478 | struct Plus {
479 |     n: u32,
480 | }
481 | 
482 | #[hackfn]
483 | impl Plus {
484 |     fn call(&self, arg: u32) -> u32 {
485 |         self.n + arg
486 |     }
487 | }
488 | 
489 | fn main() {
490 |     let one_plus = Plus { n: 1 };
491 |     let sum = one_plus(2);
492 |     assert_eq!(sum, 1 + 2);
493 | }
494 | ```
495 | 
496 | <br>
497 | 
498 | End note: I feel that the technique of returning trait objects from
499 | `&`-returning trait methods like `Deref`, `Index`, `Borrow` etc is underexplored
500 | and there are major impactful applications waiting to be discovered in that
501 | area. [This StackOverflow answer][hashmap] demonstrates one amazing example in
502 | the context of *How to implement HashMap with two keys?*. A more basic one is
503 | the [slice of a multidimensional array][refcast] example from RefCast; this
504 | involves a dynamically sized slice rather than a trait object but the underlying
505 | idea is similar. I think that these two and the case study are scratching the
506 | surface of something bigger with exciting applications. Note that those two
507 | links are all safe code; unsafe is not inherent to this technique.
508 | 
509 | [hashmap]: https://stackoverflow.com/a/45795699/6086311
510 | [refcast]: https://github.com/dtolnay/ref-cast#realistic-example
511 | 


--------------------------------------------------------------------------------
/callable-types/demo/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "case-study-callable-types"
 3 | version = "0.0.0"
 4 | authors = ["David Tolnay <dtolnay@gmail.com>"]
 5 | edition = "2021"
 6 | publish = false
 7 | 
 8 | [[bin]]
 9 | name = "case-study"
10 | path = "main.rs"
11 | 


--------------------------------------------------------------------------------
/callable-types/demo/main.rs:
--------------------------------------------------------------------------------
 1 | use std::mem::{self, MaybeUninit};
 2 | use std::ops::Deref;
 3 | 
 4 | /// Function object that adds some number to its input.
 5 | struct Plus {
 6 |     n: u32,
 7 | }
 8 | 
 9 | impl Plus {
10 |     fn call(&self, arg: u32) -> u32 {
11 |         self.n + arg
12 |     }
13 | }
14 | 
15 | impl Deref for Plus {
16 |     type Target = dyn Fn(u32) -> u32;
17 | 
18 |     fn deref(&self) -> &Self::Target {
19 |         let uninit_callable = MaybeUninit::<Self>::uninit();
20 |         let uninit_closure = move |arg: u32| Self::call(unsafe { &*uninit_callable.as_ptr() }, arg);
21 |         let size_of_closure = mem::size_of_val(&uninit_closure);
22 |         fn second<'a, T>(_a: &T, b: &'a T) -> &'a T {
23 |             b
24 |         }
25 |         let reference_to_closure = second(&uninit_closure, unsafe { mem::transmute(self) });
26 |         mem::forget(uninit_closure);
27 |         assert_eq!(size_of_closure, mem::size_of::<Self>());
28 |         let reference_to_trait_object = reference_to_closure as &dyn Fn(u32) -> u32;
29 |         reference_to_trait_object
30 |     }
31 | }
32 | 
33 | fn main() {
34 |     let one_plus = Plus { n: 1 };
35 |     let sum = one_plus(2);
36 |     assert_eq!(sum, 1 + 2);
37 |     dbg!(one_plus(2));
38 | }
39 | 


--------------------------------------------------------------------------------
/function-epilogue/README.md:
--------------------------------------------------------------------------------
  1 | ## Function epilogue
  2 | 
  3 | For the [`#[no_panic]`][no-panic] macro I needed the ability to have some piece
  4 | of code invoked during all *panicking* exit paths out of a function.
  5 | 
  6 | [no-panic]: https://github.com/dtolnay/no-panic
  7 | 
  8 | <br>
  9 | 
 10 | ### First attempt
 11 | 
 12 | Having something execute on *all* exit paths is reasonably simple -- place a
 13 | guard object in a local variable and its `Drop` impl will run whether the
 14 | function body succeeds or panics. This may be a good approach for something like
 15 | instrumenting functions with tracing on entry and exit.
 16 | 
 17 | ```rust
 18 | // Before
 19 | fn f(a: Arg1, b: Arg2) -> Ret {
 20 |     // (Original function body)
 21 | }
 22 | 
 23 | // After; insert guard object
 24 | fn f(a: Arg1, b: Arg2) -> Ret {
 25 |     struct Guard;
 26 |     impl Drop for Guard {
 27 |         fn drop(&mut self) {
 28 |             // Do the thing
 29 |         }
 30 |     }
 31 |     let _guard = Guard;
 32 | 
 33 |     // (Original function body)
 34 | }
 35 | ```
 36 | 
 37 | From here we can have the guard's `Drop` impl check
 38 | [`std::thread::panicking`][panicking] to determine whether the call is taking
 39 | place during a panicking exit path.
 40 | 
 41 | [panicking]: https://doc.rust-lang.org/std/thread/fn.panicking.html
 42 | 
 43 | ```rust
 44 | impl Drop for Guard {
 45 |     fn drop(&mut self) {
 46 |         if std::thread::panicking() {
 47 |             // Do the thing
 48 |         }
 49 |     }
 50 | }
 51 | ```
 52 | 
 53 | Two things made this not suitable for my case:
 54 | 
 55 | - There is no equivalent in libcore, so this only works if my caller's crate is
 56 |   using the standard library.
 57 | 
 58 | - The code inside of `if std::thread::panicking() { ... }` gets linked whether
 59 |   or not a panic is possible. The implementation of the panicking check is based
 60 |   on reading a panic counter out of a thread\_local and cannot be optimized out.
 61 |   In the case of `#[no_panic]`, the whole macro is based on using the
 62 |   information of whether something gets linked to tell whether a panic is
 63 |   possible so I needed the linking to behave well.
 64 | 
 65 | <br>
 66 | 
 67 | ### Second attempt
 68 | 
 69 | Let's evaluate the body of the function and then make the guard not get dropped
 70 | if the function produces a value as opposed to panicking.
 71 | 
 72 | ```rust
 73 | fn f(a: Arg1, b: Arg2) -> Ret {
 74 |     struct Guard;
 75 |     impl Drop for Guard {
 76 |         fn drop(&mut self) {
 77 |             // Do the thing
 78 |         }
 79 |     }
 80 |     let guard = Guard;
 81 | 
 82 |     let value = {
 83 |         // (Original function body)
 84 |     };
 85 | 
 86 |     mem::forget(guard);
 87 |     value
 88 | }
 89 | ```
 90 | 
 91 | If the original function panics, we don't make it to the `mem::forget` so the
 92 | guard object is dropped as part of dropping the stack frame of `f` during the
 93 | panic. If the original function body returns without panicking, we skip the
 94 | guard's drop prior to returning from `f`.
 95 | 
 96 | This is on the right track! It works with no\_std, and no longer relies on the
 97 | thread\_local inside of `std::thread::panicking` so it optimizes away extremely
 98 | reliably in functions that can never panic.
 99 | 
100 | There is a problem around functions that contain a `return` expression. If the
101 | original function body performs a `return`, that would now return from `f`
102 | without running `mem::forget` on the guard object, so the thing that we want to
103 | run only when panicking would incorrectly run.
104 | 
105 | <br>
106 | 
107 | ### Third attempt
108 | 
109 | Let's consolidate all the non-panicking exit paths into one place via a function
110 | call and make the guard not get dropped if the function call returns without
111 | panicking.
112 | 
113 | ```rust
114 | fn f(a: Arg1, b: Arg2) -> Ret {
115 |     struct Guard;
116 |     impl Drop for Guard {
117 |         fn drop(&mut self) {
118 |             // Do the thing
119 |         }
120 |     }
121 |     let guard = Guard;
122 | 
123 |     fn original_f(a: Arg1, b: Arg2) -> Ret {
124 |         // (Original function body)
125 |     }
126 |     let value = original_f(a, b);
127 | 
128 |     mem::forget(guard);
129 |     value
130 | }
131 | ```
132 | 
133 | This is like the second attempt except that it works when the original function
134 | body contains a `return` expression.
135 | 
136 | This is pretty good. It has the desired behavior and is compatible with most
137 | function signatures.
138 | 
139 | <br>
140 | 
141 | ### Fourth attempt
142 | 
143 | What do we do in this case?
144 | 
145 | ```rust
146 | fn f(&self, a: Arg1, b: Arg2) -> Ret {
147 |     ...
148 | }
149 | ```
150 | 
151 | The scheme from the third attempt of duplicating the function signature into an
152 | internal `original_f` will not work because `&self` arguments can only occur in
153 | members of an impl block, not in any other position that a function can be
154 | defined.
155 | 
156 | ```rust
157 | struct S;
158 | 
159 | impl S {
160 |     fn f(&self, a: Arg1, b: Arg2) -> Ret {
161 |         ...
162 |         let guard = Guard;
163 | 
164 |         fn original_f(&self, a: Arg1, b: Arg2) -> Ret {
165 |             // (Original function body)
166 |         }
167 |         let value = original_f(self, a, b);
168 | 
169 |         mem::forget(guard);
170 |         value
171 |     }
172 | }
173 | ```
174 | 
175 | ```console
176 | error: unexpected `self` argument in function
177 |  --> src/main.rs:8:24
178 |   |
179 | 8 |         fn original_f(&self, a: Arg1, b: Arg2) -> Ret {
180 |   |                        ^^^^ `self` is only valid as the first argument of an associated function
181 | ```
182 | 
183 | It doesn't work to try to generate `fn original_f(_self: &S, ...) -> Ret`
184 | because the macro generating this will be an attribute macro placed on the
185 | function -- it would only receive the function `f` as input not including the
186 | impl block header, so the correct type for `self` can't be known.
187 | 
188 | ```rust
189 | impl ??? {
190 |     fn f(&self, a: Arg1, b: Arg2) -> Ret {
191 |         ...
192 |         let guard = Guard;
193 | 
194 |         fn original_f(_self: &???, a: Arg1, b: Arg2) -> Ret {
195 |             // (Original function body)
196 |         }
197 |         let value = original_f(self, a, b);
198 | 
199 |         mem::forget(guard);
200 |         value
201 |     }
202 | }
203 | ```
204 | 
205 | The argument type `_self: &Self` can't be used because a function like
206 | `original_f` is its own self-contained item and does not have access to an outer
207 | `Self` or type parameters.
208 | 
209 | ```console
210 | error[E0401]: can't use generic parameters from outer function
211 |  --> src/main.rs:8:31
212 |   |
213 | 1 | impl S {
214 |   | ---- `Self` type implicitly declared here, by this `impl`
215 | ...
216 | 8 |         fn original_f(_self: &Self, a: Arg1, b: Arg2) -> Ret {
217 |   |                               ^^^^
218 |   |                               |
219 |   |                               use of generic parameter from outer function
220 |   |                               use a type here instead
221 | ```
222 | 
223 | Maybe we could ask the user to write our attribute macro on the impl block
224 | rather than on functions but this would be confusing; a solution that does not
225 | require this would be better.
226 | 
227 | It also doesn't work in general to place the `original_f` outside of `f`, as a
228 | `#[doc(hidden)]` method next to `f`. This would work inside of an impl block
229 | containing inherent methods, but not inside of a trait impl block containing
230 | trait methods since those are limited to the set of methods required by the
231 | trait.
232 | 
233 | ```rust
234 | impl ??? {
235 |     fn original_f(&self, a: Arg1, b: Arg2) -> Ret {
236 |         // (Original function body)
237 |     }
238 | 
239 |     fn f(&self, a: Arg1, b: Arg2) -> Ret {
240 |         ...
241 |         let guard = Guard;
242 | 
243 |         let value = Self::original_f(self, a, b);
244 | 
245 |         mem::forget(guard);
246 |         value
247 |     }
248 | }
249 | ```
250 | 
251 | To finally give a viable fourth attempt, let's write `original_f` as a closure
252 | instead because closures are not a self-contained item and *do* have access to
253 | an outer `Self`.
254 | 
255 | ```rust
256 | fn f(&self, a: Arg1, b: Arg2) -> Ret {
257 |     ...
258 |     let guard = Guard;
259 | 
260 |     let original_f = |_self: &Self, a: Arg1, b: Arg2| -> Ret {
261 |         // (Original function body, with self replaced by _self)
262 |     };
263 |     let value = original_f(self, a, b);
264 | 
265 |     mem::forget(guard);
266 |     value
267 | }
268 | ```
269 | 
270 | Here we pass the function arguments along to a closure that has the same
271 | signature as the outer function and captures nothing. Method receivers in the
272 | form of `&self`, `&mut self`, and `self` would be passed as closure arguments
273 | `_self: &Self`, `_self: &mut Self`, `_self: Self` respectively with the original
274 | function body adjusted to refer to `_self` anywhere that it originally referred
275 | to `self`. The leading underscore on `_self` is meaningful in that it suppresses
276 | unused variable lints; Rust does not warn when a method accepts `self` but does
277 | not refer to it, so we want to preserve that behavior in the generated closure.
278 | 
279 | This really seems like it should work. But...
280 | 
281 | <br>
282 | 
283 | ### Fifth attempt
284 | 
285 | The borrow checker doesn't like it. In the case of a method signature that
286 | borrows from `self`:
287 | 
288 | ```rust
289 | fn f(&self) -> &i32 {
290 |     ...
291 |     let guard = Guard;
292 | 
293 |     let original_f = |_self: &Self| -> &i32 {
294 |         &_self.0
295 |     };
296 |     let value = original_f(self);
297 | 
298 |     mem::forget(guard);
299 |     value
300 | }
301 | ```
302 | 
303 | we get this interesting error:
304 | 
305 | ```console
306 | error[E0495]: cannot infer an appropriate lifetime for borrow expression due to conflicting requirements
307 |   --> src/main.rs:17:13
308 |    |
309 | 17 |             &_self.0
310 |    |             ^^^^^^^^
311 |    |
312 | note: first, the lifetime cannot outlive the anonymous lifetime #1 defined on the body at 16:26...
313 |   --> src/main.rs:16:26
314 |    |
315 | 16 |           let original_f = |_self: &Self| -> &i32 {
316 |    |  __________________________^
317 | 17 | |             &_self.0
318 | 18 | |         };
319 |    | |_________^
320 | note: ...so that reference does not outlive borrowed content
321 |   --> src/main.rs:17:13
322 |    |
323 | 17 |             &_self.0
324 |    |             ^^^^^^^^
325 | note: but, the lifetime must be valid for the anonymous lifetime #1 defined on the method body at 7:5...
326 |   --> src/main.rs:7:5
327 |    |
328 | 7  | /     fn f(&self) -> &i32 {
329 | 8  | |         struct Guard;
330 | 9  | |         impl Drop for Guard {
331 | 10 | |             fn drop(&mut self) {
332 | ...  |
333 | 22 | |         value
334 | 23 | |     }
335 |    | |_____^
336 | note: ...so that reference does not outlive borrowed content
337 |   --> src/main.rs:22:9
338 |    |
339 | 22 |         value
340 |    |         ^^^^^
341 | ```
342 | 
343 | I can't tell where this went wrong but casting the closure to a function pointer
344 | with the right signature seems to fix it. This requires rustc 1.23+.
345 | 
346 | ```rust
347 | fn f(&self) -> &i32 {
348 |     ...
349 |     let guard = Guard;
350 | 
351 |     let original_f = |_self: &Self| -> &i32 {
352 |         // (Original function body, with self replaced by _self)
353 |     } as fn(&Self) -> &i32;
354 |     let value = original_f(self);
355 | 
356 |     mem::forget(guard);
357 |     value
358 | }
359 | ```
360 | 
361 | <br>
362 | 
363 | ### Sixth attempt
364 | 
365 | Let's take a closer look at what is meant by "self replaced by \_self".
366 | 
367 | The simple way for a macro to accomplish this would be by traversing the entire
368 | token stream representing the function body and substituting a `_self` token
369 | anywhere that `self` occurs. This is correct as long as `self` always refers to
370 | the method receiver... but sometimes it may not. Let's say the user has written:
371 | 
372 | ```rust
373 | fn f(&self) {
374 |     struct UserGuard;
375 |     impl Drop for UserGuard {
376 |         fn drop(&mut self) {
377 |             // Notice the `self` on the previous line
378 |             ...
379 |         }
380 |     }
381 | 
382 |     ...
383 | }
384 | ```
385 | 
386 | The ability to place structs and impl blocks inside a function body was super
387 | helpful to us so far because that's how we have been doing *our* Guard object.
388 | But the user is free to do it too! In this snippet they have written a function
389 | body that uses the token `self` in a way that does *not* refer to the `f`
390 | method's receiver. If we naively replace every `self` in their function body
391 | with `_self` as indicated in the fifth attempt, the result is invalid Rust
392 | syntax:
393 | 
394 | ```rust
395 | fn f(&self) -> &i32 {
396 |     struct Guard;
397 |     impl Drop for Guard {
398 |         fn drop(&mut self) {
399 |             // This is the guard generated by our macro
400 |         }
401 |     }
402 |     let guard = Guard;
403 | 
404 |     let original_f = |_self: &Self| -> &i32 {
405 |         struct UserGuard;
406 |         impl Drop for UserGuard {
407 |             fn drop(&mut _self) {
408 |                 // Invalid Rust syntax on the previous line
409 |                 ...
410 |             }
411 |         }
412 | 
413 |         ...
414 |     } as fn(&Self) -> &i32;
415 |     let value = original_f(self);
416 | 
417 |     mem::forget(guard);
418 |     value
419 | }
420 | ```
421 | 
422 | ```console
423 | error: expected one of `:` or `@`, found `)`
424 |   --> src/main.rs:19:31
425 |    |
426 | 19 |             fn drop(&mut _self) {
427 |    |                               ^ expected one of `:` or `@` here
428 | ```
429 | 
430 | So replacing *every* `self` is not right. The next simplest possibility would be
431 | to parse the user's function body using Syn and write a [`VisitMut`] to perform
432 | the replacement against the parsed syntax tree without traversing into nested
433 | impl blocks.
434 | 
435 | [`VisitMut`]: https://docs.rs/syn/0.15/syn/visit_mut/index.html
436 | 
437 | That is more correct than replacing *every* `self` but it still isn't correct
438 | because we can't know how to treat unexpanded macros. If the user's function
439 | body contains a call to `somemacro!(self)`, there would be no way to tell
440 | whether this expands to an expression like `vec![self]` in which we need to
441 | replace, vs an impl block like `impl Drop for UserGuard` in which we want to not
442 | replace.
443 | 
444 | I think there is no solution to this today in Rust, so we will need to keep it
445 | as a limitation that sometimes our macro would generate invalid code, or else
446 | solve what we are doing in a way that does not involve doing *any* token
447 | replacement of `self`.
448 | 
449 | So that we don't need replacement, let's try having our generated closure
450 | capture `self` from the outer method `f`'s receiver argument.
451 | 
452 | There are a lot of different ways to slice and dice this, but ultimately they
453 | all fall apart for borrow checker reasons when &mut is involved.
454 | 
455 | ```rust
456 | struct S(i32);
457 | 
458 | impl S {
459 |     // Before: compiles and works
460 |     fn f(&mut self) -> &mut i32 {
461 |         &mut self.0
462 |     }
463 | 
464 |     // After: does not compile
465 |     fn f(&mut self) -> &mut i32 {
466 |         ...
467 |         let guard = Guard;
468 | 
469 |         let original_f = move || {
470 |             // Original function body:
471 |             &mut self.0
472 |         };
473 |         let value = original_f();
474 | 
475 |         mem::forget(guard);
476 |         value
477 |     }
478 | }
479 | ```
480 | 
481 | ```console
482 | error[E0495]: cannot infer an appropriate lifetime for borrow expression due to conflicting requirements
483 |   --> src/main.rs:16:13
484 |    |
485 | 16 |             &mut self.0
486 |    |             ^^^^^^^^^^^
487 | ```
488 | 
489 | Remember how we had to add a cast to function pointer type in the fifth attempt
490 | to solve this same borrow checker failure? Well once the closure is capturing
491 | things, it can no longer be cast to a function pointer. Using `impl FnOnce` or
492 | `&mut dyn FnMut` here don't work either; as far as I can tell the correct type
493 | for these closure's cannot be accurately described in Rust's type system.
494 | 
495 | ```rust
496 | fn f(&mut self) -> &mut i32 {
497 |     ...
498 |     let guard = Guard;
499 | 
500 |     let original_f: impl FnOnce() -> &mut i32 = move || {
501 |         // Original function body:
502 |         &mut self.0
503 |     };
504 |     let value = original_f();
505 | 
506 |     mem::forget(guard);
507 |     value
508 | }
509 | ```
510 | 
511 | ```console
512 | error[E0106]: missing lifetime specifier
513 |   --> src/main.rs:17:42
514 |    |
515 | 17 |         let original_f: impl FnOnce() -> &mut i32 = move || {
516 |    |                                          ^ help: consider giving it a 'static lifetime: `&'static`
517 |    |
518 |    = help: this function's return type contains a borrowed value, but there is no value for it to be borrowed from
519 | ```
520 | 
521 | There isn't a way for the lifetime in the signature of a closure to unify with
522 | the elided lifetime in `f`'s signature.
523 | 
524 | I tried a lot of variations in this direction but found it to be a dead end. I
525 | would love to have someone bring to my attention a reliable solution that does
526 | not involve replacing `self` tokens on a heuristic basis.
527 | 
528 | <br>
529 | 
530 | ### Lifetime elision
531 | 
532 | As a recap, what we have so far is the closure casted to function pointer
533 | approach from the fifth attempt combined with the `VisitMut` replacement
534 | approach discussed under the sixth attempt. All together the expansion would
535 | behave like this:
536 | 
537 | ```rust
538 | // Before
539 | fn f(&self, a: Arg1, b: Arg2) -> Ret {
540 |     // (Original function body)
541 | }
542 | 
543 | // After
544 | fn f(&self, a: Arg1, b: Arg2) -> Ret {
545 |     struct Guard;
546 |     impl Drop for Guard {
547 |         fn drop(&mut self) {
548 |             // Do the thing
549 |         }
550 |     }
551 |     let guard = Guard;
552 | 
553 |     let original_f = |_self: &Self, a: Arg1, b: Arg2| -> Ret {
554 |         // (Original function body, with self replaced by _self
555 |         //  except in nested impls)
556 |     } as fn(&Self, Arg1, Arg2) -> Ret;
557 | 
558 |     let value = original_f(self, a, b);
559 | 
560 |     mem::forget(guard);
561 |     value
562 | }
563 | ```
564 | 
565 | Unfortunately we are not done because lifetime elision wrecks this approach. To
566 | make it concrete let me give you some possible definitions for the receiver
567 | type, `Arg1`, `Arg2`, `Ret`, and the function body, with lifetime elision in the
568 | mix:
569 | 
570 | ```rust
571 | struct S(i32);
572 | type Arg1<'a> = &'a ();
573 | type Arg2 = ();
574 | type Ret<'a> = &'a i32;
575 | 
576 | impl S {
577 |     fn f(&self, _a: Arg1, _b: Arg2) -> Ret {
578 |         &self.0
579 |     }
580 | }
581 | ```
582 | 
583 | This compiles, with `S::f` eliding three lifetimes: the ones on `&self`, `Arg1`,
584 | and `Ret`.
585 | 
586 | Let's apply our expansion.
587 | 
588 | ```rust
589 | impl S {
590 |     fn f(&self, _a: Arg1, _b: Arg2) -> Ret {
591 |         struct Guard;
592 |         impl Drop for Guard {
593 |             fn drop(&mut self) {
594 |                 // Do the thing
595 |             }
596 |         }
597 |         let guard = Guard;
598 | 
599 |         let original_f = |_self: &Self, _a: Arg1, _b: Arg2| -> Ret {
600 |             &_self.0
601 |         } as fn(&Self, Arg1, Arg2) -> Ret;
602 | 
603 |         let value = original_f(self, _a, _b);
604 | 
605 |         mem::forget(guard);
606 |         value
607 |     }
608 | }
609 | ```
610 | 
611 | ```console
612 | error[E0106]: missing lifetime specifier
613 |   --> src/main.rs:13:39
614 |    |
615 | 13 |         } as fn(&Self, Arg1, Arg2) -> Ret;
616 |    |                                       ^^^ expected lifetime parameter
617 |    |
618 |    = help: this function's return type contains a borrowed value, but the signature does not say whether it is borrowed from argument 1 or argument 2
619 | ```
620 | 
621 | So what happened here? This is hitting a special behavior of lifetime elision in
622 | methods that accept `self` by reference. The signature of `S::f` is not
623 | `fn(&Self, Arg1, Arg2) -> Ret`, as much as it may look like it. Instead it is
624 | `for<'r, 'a> fn(&'r Self, Arg1<'a>, Arg2) -> Ret<'r>`. The compiler's error
625 | message is pointing out that `fn(&Self, Arg1, Arg2) -> Ret` isn't even a legal
626 | function type given the types involved here.
627 | 
628 | The relevant elision behavior goes something like this: in methods that accept
629 | `self` by reference, elided lifetimes in the return type are assumed to refer to
630 | the receiver's lifetime regardless of the number of other other lifetimes among
631 | the other arguments. Meanwhile in functions without `self` or that accept `self`
632 | by value, elided lifetimes in the return type are permitted only if the function
633 | has exactly one input lifetime parameter across all the arguments; otherwise the
634 | signature is invalid. This rule reduces the occurrence of explicit lifetimes
635 | being necessary in method signatures, but makes life complicated for macros as
636 | we are experiencing here.
637 | 
638 | The function pointer type in our generated code `fn(&Self, Arg1, Arg2) -> Ret`
639 | is invalid because it has elided the lifetime on `Ret` in the return type but
640 | there is more than one input lifetime: there is one as part of `&Self` and one
641 | as part of `Arg1`. And function pointers never get the
642 | method-with-self-by-reference special elision behavior. The thing that we have
643 | spelled `&Self` in the function pointer is just some ordinary argument type, not
644 | a method receiver.
645 | 
646 | This lifetime elision complication effectively rules out the possibility of
647 | using a function pointer in our solution. This puts us in dire straits because:
648 | 
649 | - as seen in the second attempt, we really need some kind of function or closure
650 |   in order for early returns to work right;
651 | 
652 | - as seen in the fourth attempt, it needs to be a *nested* function or closure
653 |   so that this whole thing can be used inside trait impl blocks;
654 | 
655 | - also from the fourth attempt, it can't be a nested function because the
656 |   signature may need to involve `Self`;
657 | 
658 | - from the sixth attempt, making `self` available in the closure body through
659 |   closure capture is a dead end due to borrow checker trouble;
660 | 
661 | - from the fifth attempt, passing `self` as a closure argument doesn't work
662 |   unless we use a function pointer;
663 | 
664 | - lifetime elision rules make it impossible to come up with the right function
665 |   pointer type.
666 | 
667 | <br>
668 | 
669 | ### Seventh attempt and solution
670 | 
671 | For reasons that are beyond me, the following expansion seems to solve the
672 | entire set of constraints at once. Why is the rebinding of all the arguments
673 | necessary? I don't know, but without it we're in the same failing situation as
674 | back in the sixth attempt under the sentence that says "they all fall apart for
675 | borrow checker reasons when &mut is involved."
676 | 
677 | ```rust
678 | // Before
679 | fn f(&mut self, a: Arg1, b: Arg2) -> Ret {
680 |     // (Original function body)
681 | }
682 | 
683 | // After
684 | fn f(&mut self, a: Arg1, b: Arg2) -> Ret {
685 |     struct Guard;
686 |     impl Drop for Guard {
687 |         fn drop(&mut self) {
688 |             // Do the thing
689 |         }
690 |     }
691 |     let guard = Guard;
692 | 
693 |     let value = (move || {
694 |         // Rebind all the arguments:
695 |         let _self = self;
696 |         let a = a;
697 |         let b = b;
698 | 
699 |         // (Original function body, with self replaced by _self
700 |         //  except in nested impls)
701 |     })();
702 | 
703 |     mem::forget(guard);
704 |     value
705 | }
706 | ```
707 | 
708 | I am pretty disappointed that the best known solution involves this obscure
709 | rebinding trick to work around what seems like a borrow checker limitation, and
710 | as a consequence suffers from its own limitation around use of `self` inside
711 | unexpanded macros within the function body (see sixth attempt). I guess this
712 | shows there is still much room remaining for borrow checker improvements!
713 | 
714 | In any case, this expansion is part of the implementation used for the
715 | [`no-panic`][no-panic] crate.
716 | 


--------------------------------------------------------------------------------
/function-epilogue/demo/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "case-study-function-epilogue"
 3 | version = "0.0.0"
 4 | authors = ["David Tolnay <dtolnay@gmail.com>"]
 5 | edition = "2021"
 6 | publish = false
 7 | 
 8 | [[bin]]
 9 | name = "case-study"
10 | path = "main.rs"
11 | 


--------------------------------------------------------------------------------
/function-epilogue/demo/main.rs:
--------------------------------------------------------------------------------
 1 | use std::mem;
 2 | 
 3 | pub struct S(i32);
 4 | pub type Arg1<'a> = &'a i32;
 5 | pub type Arg2 = i32;
 6 | pub type Ret<'a> = (&'a mut i32, i32);
 7 | 
 8 | impl S {
 9 |     pub fn original_f(&mut self, a: Arg1, b: Arg2) -> Ret {
10 |         (&mut self.0, a + b)
11 |     }
12 | 
13 |     pub fn generated_f(&mut self, a: Arg1, b: Arg2) -> Ret {
14 |         struct Guard;
15 |         impl Drop for Guard {
16 |             fn drop(&mut self) {
17 |                 // Do the thing
18 |             }
19 |         }
20 |         let guard = Guard;
21 | 
22 |         let value = (move || {
23 |             let _self = self;
24 |             let a = a;
25 |             let b = b;
26 | 
27 |             // Original function body, with self replaced by _self
28 |             // except in nested impls:
29 | 
30 |             (&mut _self.0, a + b)
31 |         })();
32 | 
33 |         mem::forget(guard);
34 |         value
35 |     }
36 | }
37 | 
38 | fn main() {
39 |     let _ = S;
40 | }
41 | 


--------------------------------------------------------------------------------
/integer-match/README.md:
--------------------------------------------------------------------------------
  1 | ## Consecutive integer match patterns
  2 | 
  3 | This came up in a macro that wanted to take a comma-separated sequence of
  4 | expressions like `themacro!('A', 'B', f())` and emit a `match` expression indexed by
  5 | position in the sequence:
  6 | 
  7 | ```rust
  8 | match VALUE {
  9 |     0 => 'A',
 10 |     1 => 'B',
 11 |     2 => f(),
 12 |     _ => unimplemented!(),
 13 | }
 14 | ```
 15 | 
 16 | As a macro\_rules macro, a core limitation was that we can't make identifiers
 17 | dynamically, so the generated code would be limited to using some fixed number
 18 | of identifiers regardless of how many expressions are in the macro input.
 19 | 
 20 | In the actual use case, this `match` was just one part of a more complicated
 21 | macro; we wouldn't necessarily want a macro for doing literally what is
 22 | described here by itself.
 23 | 
 24 | <br>
 25 | 
 26 | ### Rejected solutions
 27 | 
 28 | <kbd>**Procedural macro.**</kbd> The whole thing could have been made a
 29 | procedural macro instead. A procedural macro would be able to emit exactly a
 30 | match expression as shown above. However the stable Rust compiler does not yet
 31 | support calling procedural macros in expression position, so the procedural
 32 | macro would have needed to be restricted to nightly only. Also it would mean
 33 | pulling in some extra dependencies for parsing.
 34 | 
 35 | <kbd>**Change input syntax.**</kbd> The input syntax for the macro could have
 36 | been changed to require the caller to pass their own counter in the input:
 37 | something like `themacro!((0, 'A'), (1, 'B'), (2, f()))`. This makes things easy
 38 | for the macro implementation but at the expense of the caller, which was the
 39 | wrong tradeoff. Here is what that would look like implemented:
 40 | 
 41 | ```rust
 42 | // Force caller to provide their own counter.
 43 | macro_rules! themacro {
 44 |     ($(($i:pat, $e:expr)),*) => {
 45 |         match VALUE {
 46 |             $($i => $e,)*
 47 |             _ => unimplemented!(),
 48 |         }
 49 |     };
 50 | }
 51 | ```
 52 | 
 53 | <br>
 54 | 
 55 | ### Good solutions
 56 | 
 57 | <kbd>**If-else chain.**</kbd> We can make the macro expand to a chain of if-else
 58 | comparisons structured like this, with a counter in a local variable:
 59 | 
 60 | ```rust
 61 | {
 62 |     let _value = VALUE;
 63 |     let mut _i = 0;
 64 |     if {
 65 |         let eq = _value == _i;
 66 |         _i += 1;
 67 |         eq
 68 |     } {
 69 |         $e
 70 |     } else if {
 71 |         let eq = _value == _i;
 72 |         _i += 1;
 73 |         eq
 74 |     } {
 75 |         $e
 76 |     } else if {
 77 |         let eq = _value == _i;
 78 |         _i += 1;
 79 |         eq
 80 |     } {
 81 |         $e
 82 |     } else {
 83 |         unimplemented!()
 84 |     }
 85 | }
 86 | ```
 87 | 
 88 | The conditions of the `if` are equivalent to `_value == _i++` except that unary
 89 | increment does not exist in Rust.
 90 | 
 91 | The leading underscore in the local variables `_value` and `_i` is meaningful in
 92 | that it suppresses some of the compiler's lints on unused variables, unused
 93 | assignment, and unused mut. If the caller's sequence of expressions is empty,
 94 | then `_value` and `_i` are never read and `_i` is never mutated. If the caller's
 95 | sequence of expressions is nonempty, the value written to `_i` by the last `_i
 96 | += 1` is never read. We could alternatively use `#[allow(unused_variables,
 97 | unused_mut, unused_assignments)]` but placing these attributes in a way that
 98 | they apply correctly to the macro-generated local variables but not to the
 99 | caller's $e expressions makes things more complicated.
100 | 
101 | Notice that the way the if-else chain is structured there is a clear chunk of
102 | repeating tokens -- each `if` through the following `else`. That repeating
103 | structure makes it very easy for this to be generated from a macro\_rules macro
104 | in one step of expansion.
105 | 
106 | ```rust
107 | macro_rules! themacro {
108 |     ($($e:expr),*) => {{
109 |         let value = VALUE;
110 |         let mut i = 0;
111 |         $(
112 |             if {
113 |                 let eq = value == i;
114 |                 i += 1;
115 |                 eq
116 |             } {
117 |                 $e
118 |             } else
119 |         )* {
120 |             unimplemented!()
121 |         }
122 |     }};
123 | }
124 | ```
125 | 
126 | <br>
127 | 
128 | <kbd>**Const counter.**</kbd> In some situations we may really want to stick
129 | with a `match` expression rather than an if-else chain, for example if the value
130 | being matched is just part of a larger data structure and we need to bind other
131 | parts of the data structure by-move in the same match.
132 | 
133 | We can't expand to a `match` in which the patterns are integer literals `0`,
134 | `1`, `2` etc as shown in the introduction, at least not while supporting an
135 | arbitrary number of input expressions, because macro\_rules can only copy and
136 | paste tokens around, never come up with new tokens. If the caller passes 9999
137 | input expressions, there wouldn't be any way for a macro\_rules macro to conjure
138 | up a `9998` integer literal token to place in the output.
139 | 
140 | We also can't expand to arithmetic patterns because this is not legal Rust
141 | syntax.
142 | 
143 | ```rust
144 | match VALUE {
145 |     0 => $e,
146 |     0 + 1 => $e,
147 |     0 + 1 + 1 => $e,
148 |     ...
149 | }
150 | ```
151 | 
152 | Instead we will make generated code that looks like this:
153 | 
154 | ```rust
155 | {
156 |     mod m {
157 |         pub const X: usize = 0;
158 |         pub mod m {
159 |             pub const X: usize = super::X + 1;
160 |             pub mod m {
161 |                 pub const X: usize = super::X + 1;
162 |             }
163 |         }
164 |     }
165 |     match VALUE {
166 |         m::X => $e,
167 |         m::m::X => $e,
168 |         m::m::m::X => $e,
169 |         _ => unimplemented!(),
170 |     }
171 | }
172 | ```
173 | 
174 | The nested modules here provide a way to avoid needing unique names for each
175 | const, which macro\_rules wouldn't be able to create.
176 | 
177 | Figuring out the right generated code is the hard part. The macro implementation
178 | ends up being an unremarkable tt-muncher macro that produces one layer of the
179 | nesting at a time.
180 | 
181 | ```rust
182 | macro_rules! themacro {
183 |     ($($v:expr),*) => {
184 |         $crate::themacro_helper! {
185 |             path: (m::X)
186 |             def: ()
187 |             arms: ()
188 |             $($v),*
189 |         }
190 |     };
191 | }
192 | 
193 | macro_rules! themacro_helper {
194 |     (
195 |         path: ($($path:tt)*)
196 |         def: ($($def:tt)*)
197 |         arms: ($(($i:pat, $v:expr))*)
198 |     ) => {{
199 |         #[allow(dead_code)]
200 |         mod m {
201 |             pub const X: usize = 0;
202 |             $($def)*
203 |         }
204 |         match VALUE {
205 |             $(
206 |                 $i => $v,
207 |             )*
208 |             _ => unimplemented!(),
209 |         }
210 |     }};
211 |     (
212 |         path: ($($path:tt)*)
213 |         def: ($($def:tt)*)
214 |         arms: ($(($i:pat, $v:expr))*)
215 |         $next:expr $(, $rest:expr)*
216 |     ) => {
217 |         $crate::themacro_helper! {
218 |             path: (m::$($path)*)
219 |             def: (pub mod m { pub const X: usize = super::X + 1; $($def)* })
220 |             arms: ($(($i, $v))* ($($path)*, $next))
221 |             $($rest),*
222 |         }
223 |     };
224 | }
225 | ```
226 | 


--------------------------------------------------------------------------------
/integer-match/demo/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "case-study-integer-match"
 3 | version = "0.0.0"
 4 | authors = ["David Tolnay <dtolnay@gmail.com>"]
 5 | edition = "2021"
 6 | publish = false
 7 | 
 8 | [[bin]]
 9 | name = "case-study"
10 | path = "main.rs"
11 | 


--------------------------------------------------------------------------------
/integer-match/demo/main.rs:
--------------------------------------------------------------------------------
 1 | #[macro_export]
 2 | macro_rules! themacro {
 3 |     ($($v:expr),*) => {
 4 |         $crate::themacro_helper! {
 5 |             path: (m::X)
 6 |             def: ()
 7 |             arms: ()
 8 |             $($v),*
 9 |         }
10 |     };
11 | }
12 | 
13 | #[macro_export]
14 | macro_rules! themacro_helper {
15 |     (
16 |         path: ($($path:tt)*)
17 |         def: ($($def:tt)*)
18 |         arms: ($(($i:pat, $v:expr))*)
19 |     ) => {{
20 |         #[allow(dead_code)]
21 |         mod m {
22 |             pub const X: usize = 0;
23 |             $($def)*
24 |         }
25 |         match VALUE {
26 |             $(
27 |                 $i => $v,
28 |             )*
29 |             _ => unimplemented!(),
30 |         }
31 |     }};
32 |     (
33 |         path: ($($path:tt)*)
34 |         def: ($($def:tt)*)
35 |         arms: ($(($i:pat, $v:expr))*)
36 |         $next:expr $(, $rest:expr)*
37 |     ) => {
38 |         $crate::themacro_helper! {
39 |             path: (m::$($path)*)
40 |             def: (pub mod m { pub const X: usize = super::X + 1; $($def)* })
41 |             arms: ($(($i, $v))* ($($path)*, $next))
42 |             $($rest),*
43 |         }
44 |     };
45 | }
46 | 
47 | fn main() {
48 |     const VALUE: usize = 2;
49 |     dbg!(VALUE);
50 |     dbg!(themacro!('A', 'B', 'C'));
51 | }
52 | 


--------------------------------------------------------------------------------
/readonly-fields/README.md:
--------------------------------------------------------------------------------
  1 | ## Read-only fields of mutable struct
  2 | 
  3 | In [`oqueue`] I wanted to expose a field of one of the structs in the API, but
  4 | not allow it to be mutated even if the caller has &amp;mut access to the
  5 | surrounding struct.
  6 | 
  7 | [`oqueue`]: https://github.com/dtolnay/oqueue
  8 | 
  9 | <br>
 10 | 
 11 | ### Rejected approaches
 12 | 
 13 | <kbd>**Public field.**</kbd> The field cannot be `pub` because mutating it
 14 | directly would enable the caller to violate invariants of the API.
 15 | 
 16 | ```rust
 17 | // Bad: caller can mutate, task.index += 1
 18 | 
 19 | pub struct Task {
 20 |     pub index: usize,
 21 |     // other private fields
 22 | }
 23 | ```
 24 | 
 25 | <kbd>**Private field, public getter.**</kbd> This would be the textbook
 26 | solution.
 27 | 
 28 | ```rust
 29 | // Bad: caller needs to write task.index() instead of task.index
 30 | 
 31 | pub struct Task {
 32 |     index: usize,
 33 |     // other private fields
 34 | }
 35 | 
 36 | impl Task {
 37 |     pub fn index(&self) -> usize {
 38 |         self.index
 39 |     }
 40 | }
 41 | ```
 42 | 
 43 | For the ways that this API is commonly used as an argument to other function
 44 | calls, I felt that the additional method call parentheses from the getter would
 45 | be noisy and provide zero benefit. Rust users already understand how struct
 46 | fields work and would be happy to access this value as a field if I can let
 47 | them. From the role of this type in the crate's API it is very unlikely that
 48 | someone would want to mutate the field, but still we need to protect against it
 49 | for correctness.
 50 | 
 51 | <br>
 52 | 
 53 | ### Background
 54 | 
 55 | The way `.` field access syntax works, if there is no field found with the right
 56 | name then the language will look at the type's `Deref` impl or a sequence of
 57 | `Deref` impls to determine the field being named. This behavior is important for
 58 | making smart pointers like `Box` convenient to use:
 59 | 
 60 | ```rust
 61 | // Somewhere in the standard library:
 62 | //
 63 | // pub struct Box<T: ?Sized> {
 64 | //     ptr: *mut T,
 65 | // }
 66 | 
 67 | struct S {
 68 |     x: String,
 69 | }
 70 | 
 71 | fn f(s: Box<S>) {
 72 |     // Box<S> has no field called x so it isn't obvious why
 73 |     // this line would be legal, but Box<S> dereferences to
 74 |     // S which does have that field.
 75 |     println!("{}", s.x);
 76 | }
 77 | ```
 78 | 
 79 | Importantly for encapsulation, the deref behavior takes place even if a field
 80 | with the right name exists on the original type but is private. Suppose that
 81 | `Box` were implemented by storing the heap pointer it owns in a private field
 82 | called `ptr`. In that case we would still want the following code to refer to
 83 | the user's `ptr` field, rather than erroring because `ptr` exists on `Box` and
 84 | is private:
 85 | 
 86 | ```rust
 87 | struct S {
 88 |     ptr: *const u8,
 89 | }
 90 | 
 91 | fn f(s: Box<S>) {
 92 |     println!("{:p}", s.ptr);
 93 | }
 94 | ```
 95 | 
 96 | The final detail relevant to our original use case is that fields accessed
 97 | through a `Deref` impl cannot be mutated unless the outer type also implements
 98 | `DerefMut`. The `Deref` method signature looks like `fn deref(&self) ->
 99 | &Self::Target` while the `DerefMut` signature looks like `fn deref_mut(&mut
100 | self) -> &mut Self::Target`.
101 | 
102 | <br>
103 | 
104 | ### First attempt
105 | 
106 | We can implement read-only fields by moving the state behind a `Deref` impl to a
107 | type with the appropriate fields public. Without a `DerefMut` impl, this makes
108 | all accessible fields read-only outside of the current module.
109 | 
110 | ```rust
111 | pub struct Task {
112 |     inner: ReadOnlyTask,
113 | }
114 | 
115 | pub struct ReadOnlyTask {
116 |     pub index: usize,
117 |     // other private fields
118 | }
119 | 
120 | impl Deref for Task {
121 |     type Target = ReadOnlyTask;
122 | 
123 |     fn deref(&self) -> &Self::Target {
124 |         &self.inner
125 |     }
126 | }
127 | ```
128 | 
129 | This is pretty good from the point of view of downstream code. As intended, code
130 | from outside the module can access `task.index` through deref but cannot mutate
131 | `task.index`.
132 | 
133 | The big problem with this approach is that it distresses the borrow checker.
134 | From inside the module, if code takes a reference to one of the private fields
135 | through deref, say `&task.other`, deref gets a reference to the whole `&Task`
136 | which precludes then mutating some different fields while retaining the
137 | reference.
138 | 
139 | ```console
140 | error[E0506]: cannot assign to `task.inner.another` because it is borrowed
141 |  --> src/main.rs:8:5
142 |   |
143 | 7 |     let other = &task.other;
144 |   |                  ---- borrow of `task.inner.another` occurs here
145 | 8 |     task.inner.another = 1;
146 |   |     ^^^^^^^^^^^^^^^^^^^^^^ assignment to borrowed `task.inner.another` occurs here
147 | ```
148 | 
149 | To work around this, practically all code within the module would need to be
150 | written in terms of `task.inner.*` explicitly rather than relying on derefs,
151 | which is unpleasant.
152 | 
153 | <br>
154 | 
155 | ### Second attempt
156 | 
157 | We can keep the original struct but dereference to a struct with the same memory
158 | layout and public fields, still not implementing `DerefMut`.
159 | 
160 | For this to be sound, we need to guarantee that both copies of the struct have
161 | the same layout in memory. This is *not* guaranteed just by having the same
162 | fields with the same types in both. One way to do it is by using `#[repr(C)]` to
163 | tie both structs to C's struct layout rules, because those do guarantee the same
164 | layout for structs with identical fields.
165 | 
166 | ```rust
167 | #[repr(C)]
168 | pub struct Task {
169 |     index: usize,
170 |     // other private fields
171 | }
172 | 
173 | #[repr(C)]
174 | pub struct ReadOnlyTask {
175 |     pub index: usize,
176 |     // the same private fields
177 | }
178 | 
179 | impl Deref for Task {
180 |     type Target = ReadOnlyTask;
181 | 
182 |     fn deref(&self) -> &Self::Target {
183 |         unsafe { &*(self as *const Self as *const Self::Target) }
184 |     }
185 | }
186 | ```
187 | 
188 | This works as intended. Code from inside this module can access and mutate the
189 | private `task.index` directly, while code from outside the module can access
190 | `task.index` through `Deref` and cannot mutate it even if the `Task` they hold
191 | is mutable.
192 | 
193 | ```console
194 | error[E0594]: cannot assign to data in a `&` reference
195 |  --> main.rs:8:5
196 |   |
197 | 8 |     task.index += 1;
198 |   |     ^^^^^^^^^^^^^^^ cannot assign
199 | ```
200 | 
201 | But this is not a complete solution because we really want the field to appear
202 | as a public field in Rustdoc so that readers of the documentation immediately
203 | understand how to use it. The documentation experience should be as though this
204 | field were declared `pub`.
205 | 
206 | <br>
207 | 
208 | ### Third attempt
209 | 
210 | We can use [`#[cfg(doc)]`][cfgdoc] to distinguish when documentation is being
211 | rendered, which is available since Rust 1.41.
212 | 
213 | [cfgdoc]: https://doc.rust-lang.org/1.67.0/rustdoc/advanced-features.html#cfgdoc-documenting-platform-specific-or-feature-specific-information
214 | 
215 | ```rust
216 | #[repr(C)]
217 | pub struct Task {
218 |     #[cfg(doc)]
219 |     pub index: usize,
220 | 
221 |     #[cfg(not(doc))]
222 |     index: usize,
223 | 
224 |     // other private fields
225 | }
226 | 
227 | #[doc(hidden)]
228 | #[repr(C)]
229 | pub struct ReadOnlyTask {
230 |     pub index: usize,
231 |     // the same private fields
232 | }
233 | 
234 | #[doc(hidden)]
235 | impl Deref for Task {
236 |     type Target = ReadOnlyTask;
237 | 
238 |     fn deref(&self) -> &Self::Target {
239 |         unsafe { &*(self as *const Self as *const Self::Target) }
240 |     }
241 | }
242 | ```
243 | 
244 | This renders as intended in rustdoc as:
245 | 
246 | ```console
247 | pub struct Task {
248 |     pub index: usize,
249 |     // some fields omitted
250 | }
251 | ```
252 | 
253 | so readers immediately know how to access the field. From the role of this type
254 | in the crate's API it is unlikely that anyone would want to mutate the field,
255 | but just in case, the field's documentation points out that it is read-only.
256 | 
257 | <br>
258 | 
259 | ### Implementation
260 | 
261 | Once the right strategy for generated code has been worked out, [productizing
262 | the behavior as an attribute macro][readonly] is the easy part:
263 | 
264 | [readonly]: https://github.com/dtolnay/readonly
265 | 
266 | ```rust
267 | /// ...
268 | #[readonly::make]
269 | pub struct Task {
270 |     /// ...
271 |     ///
272 |     /// This field is read-only; writing to its value will not compile.
273 |     pub index: usize,
274 | 
275 |     // other private fields
276 | }
277 | ```
278 | 


--------------------------------------------------------------------------------
/readonly-fields/demo/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "case-study-readonly-fields"
 3 | version = "0.0.0"
 4 | authors = ["David Tolnay <dtolnay@gmail.com>"]
 5 | edition = "2021"
 6 | publish = false
 7 | 
 8 | [[bin]]
 9 | name = "case-study"
10 | path = "main.rs"
11 | 


--------------------------------------------------------------------------------
/readonly-fields/demo/main.rs:
--------------------------------------------------------------------------------
 1 | pub use oqueue::Task;
 2 | 
 3 | mod oqueue {
 4 |     use core::ops::Deref;
 5 | 
 6 |     #[derive(Default)]
 7 |     #[repr(C)]
 8 |     pub struct Task {
 9 |         #[cfg(doc)]
10 |         pub index: usize,
11 | 
12 |         #[cfg(not(doc))]
13 |         index: usize,
14 | 
15 |         // Other private fields:
16 |         q: usize,
17 |     }
18 | 
19 |     #[doc(hidden)]
20 |     #[repr(C)]
21 |     pub struct ReadOnlyTask {
22 |         pub index: usize,
23 | 
24 |         // The same private fields:
25 |         q: usize,
26 |     }
27 | 
28 |     #[doc(hidden)]
29 |     impl Deref for Task {
30 |         type Target = ReadOnlyTask;
31 | 
32 |         fn deref(&self) -> &Self::Target {
33 |             unsafe { &*(self as *const Self as *const Self::Target) }
34 |         }
35 |     }
36 | 
37 |     #[allow(dead_code)]
38 |     pub fn from_within_module(task: &mut Task) {
39 |         task.index += 1;
40 |     }
41 | }
42 | 
43 | fn from_outside_module(task: &mut Task) {
44 |     task.index += 1; // cannot assign
45 | }
46 | 
47 | fn main() {
48 |     let mut task = Task::default();
49 |     oqueue::from_within_module(&mut task);
50 |     from_outside_module(&mut task);
51 | }
52 | 


--------------------------------------------------------------------------------
/unit-type-parameters/README.md:
--------------------------------------------------------------------------------
  1 | ## Unit struct with type parameters
  2 | 
  3 | [`PhantomData<T>`] is a lang item which means it is currently implemented using
  4 | dedicated logic in the compiler, but it turns out all of its behavior can be
  5 | implemented from ordinary Rust code. This gives a good opportunity to explore
  6 | namespaces in Rust name resolution.
  7 | 
  8 | [`PhantomData<T>`]: https://doc.rust-lang.org/std/marker/struct.PhantomData.html
  9 | 
 10 | The defining characteristic of `PhantomData` is that it is a unit struct with a
 11 | type parameter, which is not otherwise allowed by Rust.
 12 | 
 13 | ```rust
 14 | struct MyPhantomData<T: ?Sized>;
 15 | 
 16 | fn main() {
 17 |     let _: MyPhantomData<usize> = MyPhantomData;
 18 | }
 19 | ```
 20 | 
 21 | ```console
 22 | error[E0392]: parameter `T` is never used
 23 |  --> src/main.rs:1:22
 24 |   |
 25 | 1 | struct MyPhantomData<T: ?Sized>;
 26 |   |                      ^ unused parameter
 27 |   |
 28 |   = help: consider removing `T` or using a marker such as `std::marker::PhantomData`
 29 | ```
 30 | 
 31 | This is a hard error, not a warning that can be suppressed like some other lints
 32 | about unused code. Rust needs to insist on all type parameters appearing somehow
 33 | in the data structure because it is critical for determining [variance].
 34 | 
 35 | [variance]: https://doc.rust-lang.org/nomicon/subtyping.html
 36 | 
 37 | We will develop an attribute macro to make this work by assuming covariance for
 38 | the type parameter the same as `PhantomData`. As always, the hard part is
 39 | figuring out what code to generate, not writing the macro.
 40 | 
 41 | ```rust
 42 | #[phantom]
 43 | struct MyPhantomData<T: ?Sized>;
 44 | 
 45 | fn main() {
 46 |     let _: MyPhantomData<usize> = MyPhantomData;
 47 | }
 48 | ```
 49 | 
 50 | Solving this functionality opens some interesting design possibilities for
 51 | libraries that want something that is usable like `PhantomData` but is a locally
 52 | defined type, meaning the library can control the impl of traits like
 53 | `IntoIterator` on it. The iteration API of [`inventory`] is an example of such a
 54 | type in a public crate.
 55 | 
 56 | [`inventory`]: https://github.com/dtolnay/inventory
 57 | 
 58 | <br>
 59 | 
 60 | ### Background
 61 | 
 62 | Names of things in Rust exist in one of three namespaces:
 63 | 
 64 | - The type namespace: structs, enums, unions, traits, modules, enum variants.
 65 | 
 66 | - The value namespace: functions, local variables, statics, consts, tuple struct
 67 |   constructors, unit struct instances, tuple variant constructors, unit
 68 |   variants instances.
 69 | 
 70 | - The macro namespace: macro\_rules macros, function-like procedural macros,
 71 |   attribute macros, derive macros.
 72 | 
 73 | The following is not a precise rule, but the intuition is that something exists
 74 | in the type namespace if you can write:
 75 | 
 76 | ```rust
 77 | let _: TYPE;
 78 | ```
 79 | 
 80 | while something exists in the value namespace if you can write:
 81 | 
 82 | ```rust
 83 | let _ = VALUE;
 84 | ```
 85 | 
 86 | These two syntactic positions are always unambiguous in the Rust grammar, so
 87 | permitting the same name to refer to different things in each namespace does not
 88 | introduce ambiguity.
 89 | 
 90 | It is possible to have the same name refer to different things in all three
 91 | namespaces at once:
 92 | 
 93 | ```rust
 94 | // X in the macro namespace
 95 | macro_rules! X {
 96 |     () => {};
 97 | }
 98 | 
 99 | // X in the type namespace
100 | struct X {}
101 | 
102 | // X in the value namespace
103 | const X: () = ();
104 | 
105 | fn main() {
106 |     // unambiguously the macro X
107 |     X!();
108 | 
109 |     // unambiguously the type X
110 |     let _: X;
111 | 
112 |     // unambiguously the value X
113 |     let _ = X;
114 | }
115 | ```
116 | 
117 | Some definitions place a name into more than one namespace. For example unit
118 | structs (`struct S;`) and tuple structs (`struct S(A, B);`) are both types and
119 | values. The value corresponding to a unit struct is like a constant whose value
120 | is that unit struct, and the value corresponding to a tuple struct is like a
121 | function that takes the tuple elements and returns the tuple struct.
122 | 
123 | Braced structs (`struct S { a: A }`) are types only.
124 | 
125 | <br>
126 | 
127 | ### Strategy
128 | 
129 | `PhantomData`, being a unit struct, consists of a type component and a value
130 | component. When you write `use std::marker::PhantomData` you are importing both.
131 | 
132 | ```rust
133 | use std::marker::PhantomData;
134 | 
135 | fn main() {
136 |     let _: PhantomData<usize> = PhantomData::<usize>;
137 | }
138 | ```
139 | 
140 | In implementing our own `PhantomData` we will tackle the two namespaces one
141 | after the other.
142 | 
143 | In the value namespace we will need something that makes the following valid:
144 | 
145 | ```rust
146 | fn main() {
147 |     let _ = MyPhantomData::<usize>;
148 | }
149 | ```
150 | 
151 | And in the type namespace we will need something for this:
152 | 
153 | ```rust
154 | fn main() {
155 |     let _: MyPhantomData<usize>;
156 | }
157 | ```
158 | 
159 | Independently these would be easy, but the hard part will be making it so that
160 | `MyPhantomData::<usize>` as a value has a type that matches
161 | `MyPhantomData<usize>`.
162 | 
163 | ```rust
164 | fn main() {
165 |     let _: MyPhantomData<usize> = MyPhantomData::<usize>;
166 | }
167 | ```
168 | 
169 | <br>
170 | 
171 | ### Value namespace
172 | 
173 | In the value namespace basically our only tool relevant to this project is unit
174 | variants. The other obvious candidates in the value namespace (statics and
175 | consts) cannot carry a type parameter.
176 | 
177 | You may be familiar with type parameters on unit variants already, maybe without
178 | thinking about it, from dealing with `Option`:
179 | 
180 | ```rust
181 | fn main() {
182 |     let mut x = None::<usize>;
183 | 
184 |     // equivalent to:
185 |     let mut x: Option<usize> = None;
186 | }
187 | ```
188 | 
189 | Here is how we would make a unit variant with a type parameter that can be
190 | imported and used in value position:
191 | 
192 | ```rust
193 | mod phantom {
194 |     pub use self::ImplementationDetail::MyPhantomData;
195 | 
196 |     pub enum ImplementationDetail<T: ?Sized> {
197 |         MyPhantomData,
198 | 
199 |         #[allow(dead_code)]
200 |         #[doc(hidden)]
201 |         Marker(*const T),
202 |     }
203 | }
204 | 
205 | use phantom::MyPhantomData;
206 | 
207 | fn main() {
208 |     let _ = MyPhantomData::<usize>;
209 | }
210 | ```
211 | 
212 | The marker variant is responsible for using the type parameter `T` in some way
213 | that gives it the right variance. There are many correct alternatives but I made
214 | it hold `*const T` as one example of a type that is covariant in `T` and works
215 | with dynamically sized `T: ?Sized`. We will come back to autotrait impls later.
216 | 
217 | <br>
218 | 
219 | ### Type namespace
220 | 
221 | Clearly in the previous section the type of the enum variant
222 | `MyPhantomData::<usize>` is the enum type `ImplementationDetail<usize>`. We just
223 | need to call it something else, namely `MyPhantomData<usize>`.
224 | 
225 | Changing the name doesn't immediately work.
226 | 
227 | ```rust
228 | mod phantom {
229 |     pub use self::MyPhantomData::MyPhantomData;
230 | 
231 |     pub enum MyPhantomData<T: ?Sized> {
232 |         MyPhantomData,
233 | 
234 |         #[allow(dead_code)]
235 |         #[doc(hidden)]
236 |         Marker(*const T),
237 |     }
238 | }
239 | ```
240 | 
241 | ```console
242 | error[E0255]: the name `MyPhantomData` is defined multiple times
243 |  --> src/main.rs:4:5
244 |   |
245 | 2 |     pub use self::MyPhantomData::MyPhantomData;
246 |   |             ---------------------------------- previous import of the type `MyPhantomData` here
247 | 3 | 
248 | 4 |     pub enum MyPhantomData<T: ?Sized> {
249 |   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ `MyPhantomData` redefined here
250 |   |
251 |   = note: `MyPhantomData` must be defined only once in the type namespace of this module
252 | help: you can use `as` to change the binding name of the import
253 |   |
254 | 2 |     pub use self::MyPhantomData::MyPhantomData as OtherMyPhantomData;
255 |   |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
256 | ```
257 | 
258 | The behavior seen here is that all enum variants of any style (struct variant,
259 | tuple variant, unit variant) occupy both the value namespace and the type
260 | namespace. Our code had defined `enum MyPhantomData` as a type, but then
261 | imported `self::MyPhantomData::MyPhantomData` which is both a value and type,
262 | resulting in a conflict in the type namespace.
263 | 
264 | Naively we might expect that unit variants and tuple variants occupy only the
265 | value namespace while struct variants occupy only the type namespace. Unit
266 | variants necessarily need something in the value namespace through which you
267 | refer to their value, and tuple variants necessarily need something in the value
268 | namespace that behaves like a function through which you construct them. And
269 | struct variants need something to make curly brace initialization work, which
270 | seems like it should be the type namespace because plain structs with named
271 | fields exist in the type namespace only. But apparently this is not how things
272 | work -- maybe to leave things open for language evolution in which enum variants
273 | become usable as refinement types.
274 | 
275 | In any case, the way to work around conflicts is via wildcard imports. These are
276 | allowed to overlap with non-wildcard imports or explicit definitions, in which
277 | case the non-wildcard takes precedence. The precedence applies independently
278 | within each namespace.
279 | 
280 | ```rust
281 | mod phantom {
282 |     // Imports the enum variant in both type and value namespace,
283 |     // but in the type namespace it gets shadowed by the definition
284 |     // `enum MyPhantomData` below.
285 |     pub use self::MyPhantomData::*;
286 | 
287 |     pub enum MyPhantomData<T: ?Sized> {
288 |         MyPhantomData,
289 | 
290 |         #[allow(dead_code)]
291 |         #[doc(hidden)]
292 |         Marker(*const T),
293 |     }
294 | }
295 | 
296 | use phantom::MyPhantomData;
297 | 
298 | fn main() {
299 |     let _: MyPhantomData<usize> = MyPhantomData::<usize>;
300 | }
301 | ```
302 | 
303 | Pretty neat! There are some quirks to sort out still, but this is on the right
304 | track.
305 | 
306 | <br>
307 | 
308 | ### Memory representation
309 | 
310 | We want `std::mem::size_of::<MyPhantomData<T>>() == 0`.
311 | 
312 | In the definition above, it would currently be a whopping 16 or 24 bytes
313 | depending on whether `T` is dynamically sized. The marker variant takes up space
314 | for a pointer or fat pointer, and there is an enum discriminant as well which
315 | needs 1 bit, and we get a further 63 bits of padding for alignment reasons.
316 | 
317 | Two things need to change: we need the marker variant not to contain storage,
318 | and we need the discriminant not to exist.
319 | 
320 | We can eliminate the discriminant by making the marker variant's data zero sized
321 | and statically impossible. The compiler is smart enough to elide the
322 | discriminant when this happens.
323 | 
324 | For various complicated but reasonably good reasons, just making the data
325 | impossible without making it zero sized (such as `Marker(Void, *const T)`) is
326 | not sufficient.
327 | 
328 | ```rust
329 | mod phantom {
330 |     pub use self::MyPhantomData::*;
331 | 
332 |     pub enum MyPhantomData<T: ?Sized> {
333 |         MyPhantomData,
334 | 
335 |         #[allow(dead_code)]
336 |         #[doc(hidden)]
337 |         Marker(Void, [*const T; 0]),
338 |     }
339 | 
340 |     pub enum Void {}
341 | }
342 | 
343 | use phantom::MyPhantomData;
344 | 
345 | fn main() {
346 |     assert_eq!(std::mem::size_of::<MyPhantomData<usize>>(), 0);
347 | }
348 | ```
349 | 
350 | <br>
351 | 
352 | ### Autotraits
353 | 
354 | The standard library's `PhantomData<T>` has `impl<T: ?Sized + Send> Send` and
355 | `impl<T: ?Sized + Sync> Sync`. Our type so far has neither of these because
356 | `*const T` does not.
357 | 
358 | A simple fix would be `Marker(Void, [Box<T>; 0])` but then we depend on a memory
359 | allocator for no reason. This fix works because `Box<T>` has the same `Send` and
360 | `Sync` impls as `T`.
361 | 
362 | Without `Box`, the same impls can be written unsafely.
363 | 
364 | ```rust
365 | mod phantom {
366 |     pub use self::MyPhantomData::*;
367 | 
368 |     pub enum MyPhantomData<T: ?Sized> {
369 |         MyPhantomData,
370 | 
371 |         #[allow(dead_code)]
372 |         #[doc(hidden)]
373 |         Marker(Void, [*const T; 0]),
374 |     }
375 | 
376 |     pub enum Void {}
377 | 
378 |     unsafe impl<T: ?Sized + Send> Send for MyPhantomData<T> {}
379 |     unsafe impl<T: ?Sized + Sync> Sync for MyPhantomData<T> {}
380 | }
381 | ```
382 | 
383 | <br>
384 | 
385 | ### Documentation
386 | 
387 | Rustdoc would render our type as:
388 | 
389 | ```console
390 | pub enum MyPhantomData<T: ?Sized> {
391 |     MyPhantomData,
392 |     // some variants omitted
393 | }
394 | ```
395 | 
396 | which is technically accurate, but misleading relative to how we want users to
397 | conceptualize this construct.
398 | 
399 | There isn't a great solution to this, but you may or may not find the following
400 | more appealing:
401 | 
402 | ```rust
403 | mod phantom {
404 |     pub use self::MyPhantomData::*;
405 | 
406 |     pub enum MyPhantomData<T: ?Sized> {
407 |         MyPhantomData,
408 | 
409 |         #[allow(dead_code)]
410 |         #[doc(hidden)]
411 |         Marker(Void, [*const T; 0]),
412 |     }
413 | 
414 |     pub enum Void {}
415 | 
416 |     unsafe impl<T: ?Sized + Send> Send for MyPhantomData<T> {}
417 |     unsafe impl<T: ?Sized + Sync> Sync for MyPhantomData<T> {}
418 | }
419 | 
420 | /// ... documentation illustrating how to use.
421 | #[allow(type_alias_bounds)]
422 | pub type MyPhantomData<T: ?Sized> = phantom::MyPhantomData<T>;
423 | 
424 | #[doc(hidden)]
425 | pub use self::phantom::*;
426 | ```
427 | 
428 | Rustdoc renders:
429 | 
430 | ```console
431 | type MyPhantomData<T: ?Sized> = MyPhantomData<T>;
432 | ```
433 | 
434 | which hides the implementation detail and drives focus to your handwritten
435 | documentation to show how the type is intended to be used.
436 | 
437 | The `#[allow(type_alias_bounds)]` attribute suppresses a future compatibility
438 | lint that triggers on type aliases with trait bounds on the left hand side. The
439 | Rust compiler currently does not respect such bounds but this behavior is
440 | considered a compiler bug and is subject to change, potentially breaking code
441 | involving trait bounds in type aliases -- hence the lint. Our code above is in
442 | the clear because the bounds in the type alias exactly match the bounds implied
443 | by well-formedness of the right hand side, so the meaning is the same whether or
444 | not the compiler looks at the type alias bounds. We want the bounds there
445 | because they do appear correctly in Rustdoc.
446 | 
447 | <br>
448 | 
449 | ### Implementation
450 | 
451 | Once the generated code is figured out, packaging this into [an attribute
452 | macro][ghost] is the easy part.
453 | 
454 | [ghost]: https://github.com/dtolnay/ghost
455 | 
456 | ```rust
457 | /// ... documentation illustrating how to use.
458 | #[phantom]
459 | struct MyPhantomData<T: ?Sized>;
460 | ```
461 | 
462 | In fact we might as well make it work for any number of type parameters and
463 | lifetimes, as well as trait bounds and where-clauses.
464 | 
465 | ```rust
466 | #[phantom]
467 | struct Crazy<'a, V: 'a, T> where &'a V: IntoIterator<Item = T>;
468 | ```
469 | 


--------------------------------------------------------------------------------
/unit-type-parameters/demo/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "case-study-unit-type-parameters"
 3 | version = "0.0.0"
 4 | authors = ["David Tolnay <dtolnay@gmail.com>"]
 5 | edition = "2021"
 6 | publish = false
 7 | 
 8 | [[bin]]
 9 | name = "case-study"
10 | path = "main.rs"
11 | 


--------------------------------------------------------------------------------
/unit-type-parameters/demo/main.rs:
--------------------------------------------------------------------------------
 1 | mod phantom {
 2 |     pub use self::MyPhantomData::*;
 3 | 
 4 |     pub enum MyPhantomData<T: ?Sized> {
 5 |         MyPhantomData,
 6 | 
 7 |         #[allow(dead_code)]
 8 |         #[doc(hidden)]
 9 |         Marker(Void, [*const T; 0]),
10 |     }
11 | 
12 |     pub enum Void {}
13 | 
14 |     unsafe impl<T: ?Sized + Send> Send for MyPhantomData<T> {}
15 |     unsafe impl<T: ?Sized + Sync> Sync for MyPhantomData<T> {}
16 | }
17 | 
18 | /// ... documentation illustrating how to use.
19 | #[allow(type_alias_bounds)]
20 | pub type MyPhantomData<T: ?Sized> = phantom::MyPhantomData<T>;
21 | 
22 | #[doc(hidden)]
23 | pub use self::phantom::*;
24 | 
25 | fn main() {
26 |     let _: MyPhantomData<usize> = MyPhantomData::<usize>;
27 | }
28 | 


--------------------------------------------------------------------------------