├── .github
    └── workflows
    │   └── main.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Cargo.toml
├── LICENSE
├── ORG_CODE_OF_CONDUCT.md
├── README.md
├── fuzz
    ├── .gitignore
    ├── Cargo.toml
    └── fuzz_targets
    │   ├── differential.rs
    │   ├── irreducible.rs
    │   ├── opt_diff.rs
    │   ├── parse_ir.rs
    │   ├── roundtrip.rs
    │   └── roundtrip_roundtrip.rs
├── scripts
    ├── check.sh
    └── reduce-predicate.sh
├── src
    ├── backend
    │   ├── localify.rs
    │   ├── mod.rs
    │   ├── reducify.rs
    │   ├── stackify.rs
    │   └── treeify.rs
    ├── bin
    │   └── waffle-util.rs
    ├── cfg
    │   ├── domtree.rs
    │   ├── mod.rs
    │   └── postorder.rs
    ├── entity.rs
    ├── errors.rs
    ├── frontend.rs
    ├── fuzzing.rs
    ├── interp.rs
    ├── ir.rs
    ├── ir
    │   ├── debug.rs
    │   ├── display.rs
    │   ├── func.rs
    │   ├── module.rs
    │   └── value.rs
    ├── lib.rs
    ├── op_traits.rs
    ├── ops.rs
    ├── passes.rs
    ├── passes
    │   ├── basic_opt.rs
    │   ├── dom_pass.rs
    │   ├── empty_blocks.rs
    │   ├── maxssa.rs
    │   └── resolve_aliases.rs
    ├── pool.rs
    └── scoped_map.rs
└── tests
    ├── roundtrip.rs
    └── roundtrip
        ├── README.md
        ├── non-nullable-funcrefs.wat
        ├── ref-null.wat
        ├── test-simd.wat
        ├── test.wat
        ├── test2.wat
        └── typed-funcref.wat


/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on: pull_request
 4 | 
 5 | env:
 6 |   CARGO_TERM_COLOR: always
 7 | 
 8 | jobs:
 9 |   build:
10 |     runs-on: ${{ matrix.os }}
11 |     strategy:
12 |       matrix:
13 |         include:
14 |         - build: x86_64-linux
15 |           os: ubuntu-latest
16 |     steps:
17 |     - uses: actions/checkout@v2
18 |     - name: Build
19 |       run: cargo build --verbose
20 |     - name: Run tests
21 |       run: cargo test --verbose
22 | 
23 |   check_fuzz:
24 |     runs-on: ubuntu-latest
25 |     steps:
26 |     - uses: actions/checkout@v2
27 |     - run: cargo install cargo-fuzz
28 |     - run: cargo fuzz check --dev -s none
29 | 
30 |   rustfmt:
31 |     runs-on: ubuntu-latest
32 |     steps:
33 |     - uses: actions/checkout@v2
34 |     - run: rustup update stable --no-self-update
35 |     - run: rustup default stable
36 |     - run: rustup component add rustfmt
37 |     - run: cargo fmt --all -- --check
38 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | *~
3 | .*.swp
4 | Cargo.lock
5 | /*.wasm
6 | wasm_tests/*.wasm
7 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | *Note*: this Code of Conduct pertains to individuals' behavior. Please also see the [Organizational Code of Conduct][OCoC].
 4 | 
 5 | ## Our Pledge
 6 | 
 7 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
 8 | 
 9 | ## Our Standards
10 | 
11 | Examples of behavior that contributes to creating a positive environment include:
12 | 
13 | * Using welcoming and inclusive language
14 | * Being respectful of differing viewpoints and experiences
15 | * Gracefully accepting constructive criticism
16 | * Focusing on what is best for the community
17 | * Showing empathy towards other community members
18 | 
19 | Examples of unacceptable behavior by participants include:
20 | 
21 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
22 | * Trolling, insulting/derogatory comments, and personal or political attacks
23 | * Public or private harassment
24 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
25 | * Other conduct which could reasonably be considered inappropriate in a professional setting
26 | 
27 | ## Our Responsibilities
28 | 
29 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
30 | 
31 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
32 | 
33 | ## Scope
34 | 
35 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
36 | 
37 | ## Enforcement
38 | 
39 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the Bytecode Alliance CoC team at [report@bytecodealliance.org](mailto:report@bytecodealliance.org). The CoC team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The CoC team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
40 | 
41 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the Bytecode Alliance's leadership.
42 | 
43 | ## Attribution
44 | 
45 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
46 | 
47 | [OCoC]: https://github.com/bytecodealliance/wasmtime/blob/main/ORG_CODE_OF_CONDUCT.md
48 | [homepage]: https://www.contributor-covenant.org
49 | [version]: https://www.contributor-covenant.org/version/1/4/
50 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to waffle
 2 | 
 3 | ## Code of Conduct
 4 | 
 5 | waffle is a [Bytecode Alliance] project. It follows the Bytecode Alliance's [Code
 6 | of Conduct] and [Organizational Code of Conduct].
 7 | 
 8 | [Bytecode Alliance]: https://bytecodealliance.org/
 9 | [Code of Conduct]: CODE_OF_CONDUCT.md
10 | [Organizational Code of Conduct]: ORG_CODE_OF_CONDUCT.md
11 | 
12 | ## Building
13 | 
14 | ```
15 | $ cargo build
16 | ```
17 | 
18 | ## Testing
19 | 
20 | ```
21 | $ cargo test
22 | ```
23 | 
24 | ## Fuzzing
25 | 
26 | ```
27 | $ cargo fuzz run roundtrip
28 | $ cargo fuzz run differential
29 | $ cargo fuzz run irreducible
30 | ```
31 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "waffle"
 3 | version = "0.1.1"
 4 | description = "Wasm Analysis Framework For Lightweight Experiments"
 5 | authors = ["Chris Fallin <chris@cfallin.org>"]
 6 | license = "Apache-2.0 WITH LLVM-exception"
 7 | edition = "2018"
 8 | repository = "https://github.com/bytecodealliance/waffle"
 9 | 
10 | [dependencies]
11 | wasmparser = "0.212"
12 | wasm-encoder = "0.212"
13 | anyhow = "1.0"
14 | structopt = "0.3"
15 | log = "0.4"
16 | env_logger = "0.11"
17 | fxhash = "0.2"
18 | smallvec = "1.13"
19 | rayon = "1.10"
20 | lazy_static = "1.4"
21 | libc = "0.2"
22 | addr2line = "0.21"
23 | 
24 | # For fuzzing only. Versions must match those in fuzz/Cargo.toml.
25 | libfuzzer-sys = { version = "0.4.7", optional = true }
26 | wasm-smith = { version = "0.202", optional = true }
27 | 
28 | [dev-dependencies]
29 | wat = "1.212.0"
30 | 
31 | [features]
32 | default = []
33 | fuzzing = ["libfuzzer-sys", "wasm-smith"]
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 
204 | 
205 | --- LLVM Exceptions to the Apache 2.0 License ----
206 | 
207 | As an exception, if, as a result of your compiling your source code, portions
208 | of this Software are embedded into an Object form of such source code, you
209 | may redistribute such embedded portions in such Object form without complying
210 | with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
211 | 
212 | In addition, if you combine or link compiled forms of this Software with
213 | software that is licensed under the GPLv2 ("Combined Software") and if a
214 | court of competent jurisdiction determines that the patent provision (Section
215 | 3), the indemnity provision (Section 9) or other Section of the License
216 | conflicts with the conditions of the GPLv2, you may retroactively and
217 | prospectively choose to deem waived or otherwise exclude such Section(s) of
218 | the License, but only in their entirety and only with respect to the Combined
219 | Software.
220 | 
221 | 


--------------------------------------------------------------------------------
/ORG_CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Bytecode Alliance Organizational Code of Conduct (OCoC)
  2 | 
  3 | *Note*: this Code of Conduct pertains to organizations' behavior. Please also see the [Individual Code of Conduct](CODE_OF_CONDUCT.md).
  4 | 
  5 | ## Preamble
  6 | 
  7 | The Bytecode Alliance (BA) welcomes involvement from organizations,
  8 | including commercial organizations.  This document is an
  9 | *organizational* code of conduct, intended particularly to provide
 10 | guidance to commercial organizations.  It is distinct from the
 11 | [Individual Code of Conduct (ICoC)](CODE_OF_CONDUCT.md), and does not 
 12 | replace the ICoC. This OCoC applies to any group of people acting in 
 13 | concert as a BA member or as a participant in BA activities, whether 
 14 | or not that group is formally incorporated in some jurisdiction.
 15 | 
 16 | The code of conduct described below is not a set of rigid rules, and
 17 | we did not write it to encompass every conceivable scenario that might
 18 | arise.  For example, it is theoretically possible there would be times
 19 | when asserting patents is in the best interest of the BA community as
 20 | a whole.  In such instances, consult with the BA, strive for
 21 | consensus, and interpret these rules with an intent that is generous
 22 | to the community the BA serves.
 23 | 
 24 | While we may revise these guidelines from time to time based on
 25 | real-world experience, overall they are based on a simple principle:
 26 | 
 27 | *Bytecode Alliance members should observe the distinction between
 28 |  public community functions and private functions — especially
 29 |  commercial ones — and should ensure that the latter support, or at
 30 |  least do not harm, the former.*
 31 | 
 32 | ## Guidelines
 33 | 
 34 |  * **Do not cause confusion about Wasm standards or interoperability.** 
 35 |  
 36 |    Having an interoperable WebAssembly core is a high priority for
 37 |    the BA, and members should strive to preserve that core.  It is fine
 38 |    to develop additional non-standard features or APIs, but they
 39 |    should always be clearly distinguished from the core interoperable
 40 |    Wasm.
 41 |  
 42 |    Treat the WebAssembly name and any BA-associated names with
 43 |    respect, and follow BA trademark and branding guidelines.  If you
 44 |    distribute a customized version of software originally produced by
 45 |    the BA, or if you build a product or service using BA-derived
 46 |    software, use names that clearly distinguish your work from the
 47 |    original.  (You should still provide proper attribution to the
 48 |    original, of course, wherever such attribution would normally be
 49 |    given.)
 50 |      
 51 |    Further, do not use the WebAssembly name or BA-associated names in
 52 |    other public namespaces in ways that could cause confusion, e.g.,
 53 |    in company names, names of commercial service offerings, domain
 54 |    names, publicly-visible social media accounts or online service
 55 |    accounts, etc.  It may sometimes be reasonable, however, to
 56 |    register such a name in a new namespace and then immediately donate
 57 |    control of that account to the BA, because that would help the project
 58 |    maintain its identity.
 59 | 
 60 |    For further guidance, see the BA Trademark and Branding Policy
 61 |    [TODO: create policy, then insert link].
 62 |      
 63 |  * **Do not restrict contributors.** If your company requires
 64 |    employees or contractors to sign non-compete agreements, those
 65 |    agreements must not prevent people from participating in the BA or
 66 |    contributing to related projects.
 67 | 
 68 |    This does not mean that all non-compete agreements are incompatible
 69 |    with this code of conduct.  For example, a company may restrict an
 70 |    employee's ability to solicit the company's customers.  However, an
 71 |    agreement must not block any form of technical or social
 72 |    participation in BA activities, including but not limited to the
 73 |    implementation of particular features.
 74 | 
 75 |    The accumulation of experience and expertise in individual persons,
 76 |    who are ultimately free to direct their energy and attention as
 77 |    they decide, is one of the most important drivers of progress in
 78 |    open source projects.  A company that limits this freedom may hinder
 79 |    the success of the BA's efforts.
 80 | 
 81 |  * **Do not use patents as offensive weapons.** If any BA participant
 82 |    prevents the adoption or development of BA technologies by
 83 |    asserting its patents, that undermines the purpose of the
 84 |    coalition.  The collaboration fostered by the BA cannot include
 85 |    members who act to undermine its work.
 86 |  
 87 |  * **Practice responsible disclosure** for security vulnerabilities.
 88 |    Use designated, non-public reporting channels to disclose technical
 89 |    vulnerabilities, and give the project a reasonable period to
 90 |    respond, remediate, and patch.  [TODO: optionally include the
 91 |    security vulnerability reporting URL here.]
 92 | 
 93 |    Vulnerability reporters may patch their company's own offerings, as
 94 |    long as that patching does not significantly delay the reporting of
 95 |    the vulnerability.  Vulnerability information should never be used
 96 |    for unilateral commercial advantage.  Vendors may legitimately
 97 |    compete on the speed and reliability with which they deploy
 98 |    security fixes, but withholding vulnerability information damages
 99 |    everyone in the long run by risking harm to the BA project's
100 |    reputation and to the security of all users.
101 | 
102 |  * **Respect the letter and spirit of open source practice.** While
103 |      there is not space to list here all possible aspects of standard
104 |      open source practice, some examples will help show what we mean:
105 | 
106 |    * Abide by all applicable open source license terms.  Do not engage
107 |      in copyright violation or misattribution of any kind.
108 | 
109 |    * Do not claim others' ideas or designs as your own.
110 | 
111 |    * When others engage in publicly visible work (e.g., an upcoming
112 |      demo that is coordinated in a public issue tracker), do not
113 |      unilaterally announce early releases or early demonstrations of
114 |      that work ahead of their schedule in order to secure private
115 |      advantage (such as marketplace advantage) for yourself.
116 | 
117 |    The BA reserves the right to determine what constitutes good open
118 |    source practices and to take action as it deems appropriate to
119 |    encourage, and if necessary enforce, such practices.
120 | 
121 | ## Enforcement
122 | 
123 | Instances of organizational behavior in violation of the OCoC may 
124 | be reported by contacting the Bytecode Alliance CoC team at 
125 | [report@bytecodealliance.org](mailto:report@bytecodealliance.org). The 
126 | CoC team will review and investigate all complaints, and will respond 
127 | in a way that it deems appropriate to the circumstances. The CoC team 
128 | is obligated to maintain confidentiality with regard to the reporter of 
129 | an incident. Further details of specific enforcement policies may be 
130 | posted separately.
131 | 
132 | When the BA deems an organization in violation of this OCoC, the BA
133 | will, at its sole discretion, determine what action to take.  The BA
134 | will decide what type, degree, and duration of corrective action is
135 | needed, if any, before a violating organization can be considered for
136 | membership (if it was not already a member) or can have its membership
137 | reinstated (if it was a member and the BA canceled its membership due
138 | to the violation).
139 | 
140 | In practice, the BA's first approach will be to start a conversation,
141 | with punitive enforcement used only as a last resort.  Violations
142 | often turn out to be unintentional and swiftly correctable with all
143 | parties acting in good faith.
144 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 |   <h1>waffle</h1>
  3 | 
  4 |   <p>
  5 |     <strong>waffle Wasm compiler library</strong>
  6 |   </p>
  7 | 
  8 |   <strong>A <a href="https://bytecodealliance.org/">Bytecode Alliance</a> project</strong>
  9 | 
 10 |   <p>
 11 |     <a href="https://github.com/bytecodealliance/waffle/actions?query=workflow%3ACI"><img src="https://github.com/bytecodealliance/waffle/workflows/CI/badge.svg" alt="build status" /></a>
 12 |     <a href="https://bytecodealliance.zulipchat.com/#narrow/stream/223391-wasm"><img src="https://img.shields.io/badge/zulip-join_chat-brightgreen.svg" alt="zulip chat" /></a>
 13 |     <a href="https://docs.rs/waffle"><img src="https://docs.rs/waffle/badge.svg" alt="Documentation Status" /></a>
 14 |   </p>
 15 | 
 16 |   <h3>
 17 |     <a href="https://docs.rs/waffle">API Docs</a>
 18 |     <span> | </span>
 19 |     <a href="https://github.com/bytecodealliance/waffle/blob/main/CONTRIBUTING.md">Contributing</a>
 20 |     <span> | </span>
 21 |     <a href="https://bytecodealliance.zulipchat.com/#narrow/stream/223391-wasm">Chat</a>
 22 |   </h3>
 23 | </div>
 24 | 
 25 | waffle (the Wasm Analysis Framework for Lightweight Experimentation)
 26 | is an SSA IR compiler framework for Wasm-to-Wasm transforms, in Rust.
 27 | 
 28 | ## Status
 29 | 
 30 | The transforms from Wasm to IR and from IR to Wasm work well, and has been
 31 | fuzzed in various ways. In particular, waffle is fuzzed by roundtripping Wasm
 32 | through SSA IR and back, and differentially executing the original and
 33 | roundtripped Wasm under Wasmtime (with limits on execution time). At this time,
 34 | no correctness bugs are known.
 35 | 
 36 | Waffle is able to roundtrip (convert to IR, then compile back to Wasm) complex
 37 | modules such as the SpiderMonkey JS engine compiled to Wasm.
 38 | 
 39 | Waffle has some basic mid-end optimizations working, such as GVN and constant
 40 | propagation. Much more could be done on this.
 41 | 
 42 | There are various ways in which the generated Wasm bytecode could be improved;
 43 | work is ongoing on this.
 44 | 
 45 | waffle is in use by [weval](https://github.com/bytecodealliance/weval), the
 46 | WebAssembly partial evaluator, and was developed for this purpose.
 47 | 
 48 | ## Architecture
 49 | 
 50 | The IR is a CFG of blocks, containing operators that correspond 1-to-1 to Wasm
 51 | operators. Dataflow is via SSA, and blocks have blockparams (rather than
 52 | phi-nodes). Wasm locals are not used in the IR (they are converted to SSA).
 53 | 
 54 | The frontend converts Wasm into this IR by building SSA as it goes, inserting
 55 | blockparams when it discovers multiple reaching definitions for a local.
 56 | Multivalue Wasm (parameters and results for every control-flow block) is fully
 57 | supported, and converted to SSA. This process more or less works like
 58 | Cranelift's does, except that memory, table, etc. operations remain at the Wasm
 59 | abstraction layer (are not lowered into implementation details), and arithmetic
 60 | operators mirror Wasm's exactly.
 61 | 
 62 | The backend operates in four stages:
 63 | 
 64 | * [Reducifier](src/backend/reducify.rs), which uses context-sensitive
 65 |   block duplication to turn loops with side-entrances (which are
 66 |   irreducible) into reducible control flow, suitable for lowering to
 67 |   Wasm control-flow primitives.
 68 | 
 69 | * [Structured control flow recovery](src/backend/stackify.rs), which uses
 70 |   [Ramsey's algorithm](https://dl.acm.org/doi/abs/10.1145/3547621) to convert
 71 |   the CFG back into an AST of Wasm control-flow primitives (blocks, loops, and
 72 |   if-then AST nodes).
 73 | 
 74 | * [Treeification](src/backend/treeify.rs), which computes whether some SSA
 75 |   values are used only once and can be moved to just before their single
 76 |   consumer, computing the value directly onto the Wasm stack without the need
 77 |   for an intermediate local. This is a very simple form of code scheduling.
 78 | 
 79 | * [Localification](src/backend/localify.rs), which performs a register
 80 |   allocation (using a simple linear-scan algorithm) to assign all SSA values to
 81 |   locals such that no live-ranges overlap in the same local.
 82 | 
 83 | ## Comparisons / Related Work
 84 | 
 85 | - Like [Binaryen](https://github.com/WebAssembly/binaryen) but with an SSA IR,
 86 |   rather than an AST-based IR. Dataflow analyses are much easier when one
 87 |   doesn't have to handle arbitrary reads and writes to locals. Binaryen is able
 88 |   to stackify/reloop arbitrary control flow (CFG to Wasm) but does not
 89 |   implement the other direction (Wasm to CFG), and it has only a C/C++ API, not
 90 |   Rust.
 91 | 
 92 | - Like [Walrus](https://github.com/rustwasm/walrus) but also with an SSA IR.
 93 |   Walrus is in Rust and designed for Wasm-to-Wasm transforms as well, but its
 94 |   IR mirrors the Wasm bytecode closely and thus presents the same difficulties
 95 |   as Binaryen for traditional CFG-of-SSA-style compiler analyses and
 96 |   transforms.
 97 | 
 98 | - Halfway like
 99 |   [Cranelift](https://github.com/bytecodealliance/wasmtime/tree/main/cranelift/),
100 |   in that the IR is similar to Cranelift's (a CFG of SSA IR with blockparams),
101 |   but with the Wasm backend as well (Cranelift only does Wasm-to-IR). WAFFLE's
102 |   IR also deliberately remains at the Wasm abstraction level, maintaining
103 |   1-to-1 correspondence with all operators and maintaining the concepts of
104 |   memories, tables, etc., while Cranelift lowers operations and storage
105 |   abstractions into runtime/embedding-specific implementation details in the
106 |   IR.
107 | 


--------------------------------------------------------------------------------
/fuzz/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | target
3 | corpus
4 | artifacts
5 | 


--------------------------------------------------------------------------------
/fuzz/Cargo.toml:
--------------------------------------------------------------------------------
 1 | 
 2 | [package]
 3 | name = "waffle-fuzz"
 4 | version = "0.0.0"
 5 | authors = ["Automatically generated"]
 6 | publish = false
 7 | edition = "2018"
 8 | 
 9 | [package.metadata]
10 | cargo-fuzz = true
11 | 
12 | [dependencies]
13 | libfuzzer-sys = { version = "0.4.7" }
14 | arbitrary = { version = "1.3.2", features = ["derive"] }
15 | wasm-smith = "0.202.0"
16 | env_logger = "0.9"
17 | log = "0.4"
18 | wasmparser = "0.202.0"
19 | wasmtime = "19.0"
20 | 
21 | [dependencies.waffle]
22 | path = ".."
23 | features = ["fuzzing"]
24 | 
25 | # Prevent this from interfering with workspaces
26 | [workspace]
27 | members = ["."]
28 | 
29 | [[bin]]
30 | name = "parse_ir"
31 | path = "fuzz_targets/parse_ir.rs"
32 | test = false
33 | doc = false
34 | 
35 | [[bin]]
36 | name = "roundtrip"
37 | path = "fuzz_targets/roundtrip.rs"
38 | test = false
39 | doc = false
40 | 
41 | [[bin]]
42 | name = "roundtrip_roundtrip"
43 | path = "fuzz_targets/roundtrip_roundtrip.rs"
44 | test = false
45 | doc = false
46 | 
47 | [[bin]]
48 | name = "differential"
49 | path = "fuzz_targets/differential.rs"
50 | test = false
51 | doc = false
52 | 
53 | [[bin]]
54 | name = "opt_diff"
55 | path = "fuzz_targets/opt_diff.rs"
56 | test = false
57 | doc = false
58 | 
59 | [[bin]]
60 | name = "irreducible"
61 | path = "fuzz_targets/irreducible.rs"
62 | test = false
63 | doc = false
64 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/differential.rs:
--------------------------------------------------------------------------------
  1 | #![no_main]
  2 | use libfuzzer_sys::fuzz_target;
  3 | use std::sync::atomic::{AtomicU64, Ordering};
  4 | 
  5 | use waffle::{FrontendOptions, Module, OptOptions};
  6 | 
  7 | fuzz_target!(|module: waffle::fuzzing::ArbitraryModule| {
  8 |     let module = module.0;
  9 |     let _ = env_logger::try_init();
 10 |     log::debug!("original module: {:?}", module);
 11 | 
 12 |     let orig_bytes = module.to_bytes();
 13 | 
 14 |     if waffle::fuzzing::reject(&orig_bytes[..]) {
 15 |         log::debug!("Discarding fuzz run. Body:\n{:?}", module);
 16 |         return;
 17 |     } else {
 18 |         log::info!("body: {:?}", module);
 19 |     }
 20 | 
 21 |     let mut config = wasmtime::Config::default();
 22 |     config.consume_fuel(true);
 23 |     let engine = wasmtime::Engine::new(&config).unwrap();
 24 |     let orig_module =
 25 |         wasmtime::Module::new(&engine, &orig_bytes[..]).expect("failed to parse original wasm");
 26 |     let mut orig_store = wasmtime::Store::new(&engine, ());
 27 |     orig_store.set_fuel(10000).unwrap();
 28 |     let orig_instance = wasmtime::Instance::new(&mut orig_store, &orig_module, &[]);
 29 |     let orig_instance = match orig_instance {
 30 |         Ok(orig_instance) => orig_instance,
 31 |         Err(e) => {
 32 |             log::info!("cannot run start on orig intsance ({:?}); discarding", e);
 33 |             return;
 34 |         }
 35 |     };
 36 | 
 37 |     let mut parsed_module =
 38 |         Module::from_wasm_bytes(&orig_bytes[..], &FrontendOptions::default()).unwrap();
 39 |     parsed_module.expand_all_funcs().unwrap();
 40 |     parsed_module.per_func_body(|body| body.optimize(&OptOptions::default()));
 41 |     let roundtrip_bytes = parsed_module.to_wasm_bytes().unwrap();
 42 | 
 43 |     if let Ok(filename) = std::env::var("FUZZ_DUMP_WASM") {
 44 |         std::fs::write(format!("{}_orig.wasm", filename), &orig_bytes[..]).unwrap();
 45 |         std::fs::write(format!("{}_roundtrip.wasm", filename), &roundtrip_bytes[..]).unwrap();
 46 |     }
 47 | 
 48 |     let total = TOTAL.fetch_add(1, Ordering::Relaxed);
 49 | 
 50 |     let roundtrip_module = wasmtime::Module::new(&engine, &roundtrip_bytes[..])
 51 |         .expect("failed to parse roundtripped wasm");
 52 |     let mut roundtrip_store = wasmtime::Store::new(&engine, ());
 53 |     // After roundtrip, fuel consumption rate may differ. That's fine;
 54 |     // what matters is that it terminated above without a trap (hence
 55 |     // halts in a reasonable time).
 56 |     roundtrip_store.set_fuel(u64::MAX).unwrap();
 57 |     let roundtrip_instance = wasmtime::Instance::new(&mut roundtrip_store, &roundtrip_module, &[])
 58 |         .expect("cannot instantiate roundtripped wasm");
 59 | 
 60 |     // Ensure exports are equal.
 61 | 
 62 |     let a_globals: Vec<_> = orig_instance
 63 |         .exports(&mut orig_store)
 64 |         .filter_map(|e| e.into_global())
 65 |         .collect();
 66 |     let a_globals: Vec<wasmtime::Val> = a_globals
 67 |         .into_iter()
 68 |         .map(|g| g.get(&mut orig_store))
 69 |         .collect();
 70 |     let a_mems: Vec<wasmtime::Memory> = orig_instance
 71 |         .exports(&mut orig_store)
 72 |         .filter_map(|e| e.into_memory())
 73 |         .collect();
 74 | 
 75 |     let b_globals: Vec<_> = roundtrip_instance
 76 |         .exports(&mut roundtrip_store)
 77 |         .filter_map(|e| e.into_global())
 78 |         .collect();
 79 |     let b_globals: Vec<wasmtime::Val> = b_globals
 80 |         .into_iter()
 81 |         .map(|g| g.get(&mut roundtrip_store))
 82 |         .collect();
 83 |     let b_mems: Vec<wasmtime::Memory> = roundtrip_instance
 84 |         .exports(&mut roundtrip_store)
 85 |         .filter_map(|e| e.into_memory())
 86 |         .collect();
 87 | 
 88 |     log::info!("a_globals = {:?}", a_globals);
 89 |     log::info!("b_globals = {:?}", b_globals);
 90 | 
 91 |     assert_eq!(a_globals.len(), b_globals.len());
 92 |     for (a, b) in a_globals.into_iter().zip(b_globals.into_iter()) {
 93 |         match (a, b) {
 94 |             (wasmtime::Val::I32(a), wasmtime::Val::I32(b)) => assert_eq!(a, b),
 95 |             (wasmtime::Val::I64(a), wasmtime::Val::I64(b)) => assert_eq!(a, b),
 96 |             (wasmtime::Val::F32(a), wasmtime::Val::F32(b)) => assert_eq!(a, b),
 97 |             (wasmtime::Val::F64(a), wasmtime::Val::F64(b)) => assert_eq!(a, b),
 98 |             _ => panic!("mismatching types"),
 99 |         }
100 |     }
101 | 
102 |     assert_eq!(a_mems.len(), b_mems.len());
103 |     for (a, b) in a_mems.into_iter().zip(b_mems.into_iter()) {
104 |         let a_data = a.data(&orig_store);
105 |         let b_data = b.data(&roundtrip_store);
106 |         assert_eq!(a_data, b_data);
107 |     }
108 | 
109 |     success(total);
110 | });
111 | 
112 | static TOTAL: AtomicU64 = AtomicU64::new(0);
113 | static SUCCESS: AtomicU64 = AtomicU64::new(0);
114 | 
115 | fn success(total: u64) {
116 |     let value = SUCCESS.fetch_add(1, Ordering::Relaxed);
117 |     if value % 100 == 0 {
118 |         eprintln!("SUCCESS: {} / TOTAL: {}", value, total);
119 |     }
120 | }
121 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/irreducible.rs:
--------------------------------------------------------------------------------
  1 | //! Fuzzing irreducible control flow handling.
  2 | //!
  3 | //! 1. Generate a testcase with an arbitrary CFG.
  4 | //! 2. Compile it.
  5 | //!
  6 | //! (That's it.) Showing equivalence of execution is a harder problem
  7 | //! and is left as a future exercise.
  8 | 
  9 | #![no_main]
 10 | use arbitrary::Arbitrary;
 11 | use libfuzzer_sys::{fuzz_target, Corpus};
 12 | use waffle::{
 13 |     entity::PerEntity, Block, BlockTarget, FuncDecl, FunctionBody, Module, SignatureData,
 14 |     Terminator, Type,
 15 | };
 16 | 
 17 | #[derive(Clone, Debug, Arbitrary)]
 18 | struct CFG {
 19 |     num_blocks: u8,
 20 |     edges: Vec<(u8, u8)>,
 21 | }
 22 | 
 23 | impl CFG {
 24 |     fn to_module(&self) -> Option<Module> {
 25 |         let mut module = Module::empty();
 26 |         let sig = module.signatures.push(SignatureData {
 27 |             params: vec![Type::I32],
 28 |             returns: vec![],
 29 |         });
 30 | 
 31 |         let num_blocks = u32::from(std::cmp::max(1, self.num_blocks));
 32 | 
 33 |         let mut body = FunctionBody::new(&module, sig);
 34 | 
 35 |         // Entry block0 already present; add the rest.
 36 |         for _ in 1..num_blocks {
 37 |             let block = body.add_block();
 38 |             body.add_blockparam(block, Type::I32);
 39 |         }
 40 | 
 41 |         let mut edges_by_origin: PerEntity<Block, Vec<Block>> = PerEntity::default();
 42 |         for &(from, to) in &self.edges {
 43 |             if from >= self.num_blocks || to >= self.num_blocks {
 44 |                 return None;
 45 |             }
 46 |             let from = Block::from(u32::from(from));
 47 |             let to = Block::from(u32::from(to));
 48 |             edges_by_origin[from].push(to);
 49 |         }
 50 | 
 51 |         for (block, def) in body.blocks.entries_mut() {
 52 |             let param = def.params[0].1;
 53 |             let dests = &edges_by_origin[block];
 54 |             let mut targets = dests
 55 |                 .iter()
 56 |                 .map(|&dest| BlockTarget {
 57 |                     block: dest,
 58 |                     args: vec![param],
 59 |                 })
 60 |                 .collect::<Vec<_>>();
 61 |             let terminator = match dests.len() {
 62 |                 0 => Terminator::Return {
 63 |                     values: vec![param],
 64 |                 },
 65 |                 1 => Terminator::Br {
 66 |                     target: targets[0].clone(),
 67 |                 },
 68 |                 2 => Terminator::CondBr {
 69 |                     cond: param,
 70 |                     if_true: targets[0].clone(),
 71 |                     if_false: targets[1].clone(),
 72 |                 },
 73 |                 _ => {
 74 |                     let default = targets.pop().unwrap();
 75 |                     Terminator::Select {
 76 |                         value: param,
 77 |                         targets,
 78 |                         default,
 79 |                     }
 80 |                 }
 81 |             };
 82 |             def.terminator = terminator;
 83 |         }
 84 | 
 85 |         body.recompute_edges();
 86 |         body.validate().unwrap();
 87 |         module
 88 |             .funcs
 89 |             .push(FuncDecl::Body(sig, "func0".to_string(), body));
 90 | 
 91 |         Some(module)
 92 |     }
 93 | }
 94 | 
 95 | fuzz_target!(|cfg: CFG| -> Corpus {
 96 |     let _ = env_logger::try_init();
 97 |     let module = match cfg.to_module() {
 98 |         Some(m) => m,
 99 |         None => return Corpus::Reject,
100 |     };
101 |     let _ = module.to_wasm_bytes().unwrap();
102 |     Corpus::Keep
103 | });
104 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/opt_diff.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | use libfuzzer_sys::fuzz_target;
 3 | 
 4 | use waffle::{FrontendOptions, InterpContext, InterpResult, Module, OptOptions};
 5 | 
 6 | fuzz_target!(|module: waffle::fuzzing::ArbitraryModule| {
 7 |     let module = module.0;
 8 |     let _ = env_logger::try_init();
 9 |     log::debug!("original module: {:?}", module);
10 | 
11 |     let orig_bytes = module.to_bytes();
12 | 
13 |     if waffle::fuzzing::reject(&orig_bytes[..]) {
14 |         log::debug!("Discarding fuzz run. Body:\n{:?}", module);
15 |         return;
16 |     } else {
17 |         log::info!("body: {:?}", module);
18 |     }
19 | 
20 |     let mut parsed_module =
21 |         Module::from_wasm_bytes(&orig_bytes[..], &FrontendOptions::default()).unwrap();
22 |     parsed_module.expand_all_funcs().unwrap();
23 | 
24 |     let start = parsed_module.start_func.unwrap();
25 | 
26 |     let mut orig_ctx = match InterpContext::new(&parsed_module) {
27 |         Ok(ctx) => ctx,
28 |         Err(e) => {
29 |             log::trace!("Rejecting due to instantiation error: {:?}", e);
30 |             return;
31 |         }
32 |     };
33 |     orig_ctx.fuel = 10000;
34 | 
35 |     match orig_ctx.call(&parsed_module, start, &[]) {
36 |         InterpResult::OutOfFuel => {
37 |             // Silently reject.
38 |             log::trace!("Rejecting due to timeout in orig");
39 |             return;
40 |         }
41 |         InterpResult::Trap(..) => {
42 |             // Silently reject.
43 |             log::trace!("Rejecting due to trap in orig");
44 |             return;
45 |         }
46 |         InterpResult::Ok(_) => {}
47 |     }
48 | 
49 |     let mut opt_module = parsed_module.clone();
50 |     parsed_module.per_func_body(|body| body.optimize(&OptOptions::default()));
51 |     opt_module.per_func_body(|body| body.convert_to_max_ssa(None));
52 | 
53 |     let mut opt_ctx = InterpContext::new(&opt_module).unwrap();
54 |     // Allow a little leeway for opts to not actually optimize.
55 |     opt_ctx.fuel = 20000;
56 |     opt_ctx.call(&opt_module, start, &[]).ok().unwrap();
57 | 
58 |     log::trace!(
59 |         "Orig ran in {} fuel; opt ran in {} fuel",
60 |         10000 - orig_ctx.fuel,
61 |         20000 - opt_ctx.fuel
62 |     );
63 | 
64 |     assert_eq!(orig_ctx.memories, opt_ctx.memories);
65 |     assert_eq!(orig_ctx.globals, opt_ctx.globals);
66 | });
67 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/parse_ir.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | use libfuzzer_sys::fuzz_target;
 3 | 
 4 | use waffle::{FrontendOptions, Module};
 5 | 
 6 | fuzz_target!(|module: wasm_smith::Module| {
 7 |     let _ = env_logger::try_init();
 8 |     let _parsed_module =
 9 |         Module::from_wasm_bytes(&module.to_bytes()[..], &FrontendOptions::default()).unwrap();
10 | });
11 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/roundtrip.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | use libfuzzer_sys::fuzz_target;
 3 | 
 4 | use waffle::{FrontendError, FrontendOptions, Module, OptOptions};
 5 | 
 6 | fuzz_target!(|module: wasm_smith::Module| {
 7 |     let _ = env_logger::try_init();
 8 |     log::debug!("original module: {:?}", module);
 9 |     let orig_bytes = module.to_bytes();
10 |     let mut parsed_module =
11 |         match Module::from_wasm_bytes(&orig_bytes[..], &FrontendOptions::default()) {
12 |             Ok(m) => m,
13 |             Err(e) => {
14 |                 match e.downcast::<FrontendError>() {
15 |                     Ok(FrontendError::UnsupportedFeature(_)) | Ok(FrontendError::TooLarge(_)) => {
16 |                         // Just skip this case.
17 |                         return;
18 |                     }
19 |                     Ok(e) => {
20 |                         panic!("Frontend error: {:?}", e);
21 |                     }
22 |                     Err(e) => {
23 |                         panic!("Other error when parsing module: {:?}", e);
24 |                     }
25 |                 }
26 |             }
27 |         };
28 |     parsed_module.expand_all_funcs().unwrap();
29 |     parsed_module.per_func_body(|body| body.optimize(&OptOptions::default()));
30 |     let _ = parsed_module.to_wasm_bytes();
31 | });
32 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/roundtrip_roundtrip.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | use libfuzzer_sys::fuzz_target;
 3 | 
 4 | use waffle::{FrontendOptions, Module};
 5 | 
 6 | fuzz_target!(|module: wasm_smith::Module| {
 7 |     let _ = env_logger::try_init();
 8 |     log::debug!("original module: {:?}", module);
 9 |     let orig_bytes = module.to_bytes();
10 |     let parsed_module =
11 |         Module::from_wasm_bytes(&orig_bytes[..], &FrontendOptions::default()).unwrap();
12 |     let roundtrip_bytes = parsed_module.to_wasm_bytes().unwrap();
13 |     if let Ok(filename) = std::env::var("ROUNDTRIP_WASM_SAVE") {
14 |         std::fs::write(filename, &roundtrip_bytes[..]).unwrap();
15 |     }
16 |     let parsed_roundtrip_module =
17 |         Module::from_wasm_bytes(&roundtrip_bytes[..], &FrontendOptions::default()).unwrap();
18 |     let _ = parsed_roundtrip_module.to_wasm_bytes();
19 | });
20 | 


--------------------------------------------------------------------------------
/scripts/check.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | set -e
4 | 
5 | cargo fmt --check
6 | cargo check
7 | cargo +nightly fuzz check
8 | 


--------------------------------------------------------------------------------
/scripts/reduce-predicate.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | timeout 1 wasmtime run --disable-cache $1
 4 | if [ $? -ne 0 ]; then
 5 |     echo bad: initial run crashes too
 6 |     exit 1
 7 | fi
 8 | target/release/waffle-util roundtrip -i $1 -o o.wasm
 9 | if [ $? -ne 0 ]; then
10 |     echo bad: roundtrip
11 |     exit 1
12 | fi
13 | wasmtime run --disable-cache o.wasm
14 | if [ $? -ne 0 ]; then
15 |     echo ok: still crashes
16 |     exit 0
17 | else
18 |     echo bad: no longer crashes
19 |     exit 1
20 | fi
21 | 


--------------------------------------------------------------------------------
/src/backend/localify.rs:
--------------------------------------------------------------------------------
  1 | //! Localification: a simple form of register allocation that picks
  2 | //! locations for SSA values in Wasm locals.
  3 | 
  4 | use crate::backend::treeify::Trees;
  5 | use crate::cfg::CFGInfo;
  6 | use crate::entity::{EntityVec, PerEntity};
  7 | use crate::ir::{Block, FunctionBody, Local, Type, Value, ValueDef};
  8 | use smallvec::{smallvec, SmallVec};
  9 | use std::collections::{HashMap, HashSet};
 10 | use std::ops::Range;
 11 | 
 12 | #[derive(Clone, Debug, Default)]
 13 | pub struct Localifier {
 14 |     pub values: PerEntity<Value, SmallVec<[Local; 2]>>,
 15 |     pub locals: EntityVec<Local, Type>,
 16 | }
 17 | 
 18 | impl Localifier {
 19 |     pub fn compute(body: &FunctionBody, cfg: &CFGInfo, trees: &Trees) -> Self {
 20 |         Context::new(body, cfg, trees).compute()
 21 |     }
 22 | }
 23 | 
 24 | struct Context<'a> {
 25 |     body: &'a FunctionBody,
 26 |     cfg: &'a CFGInfo,
 27 |     trees: &'a Trees,
 28 |     results: Localifier,
 29 | 
 30 |     /// Precise liveness for each block: live Values at the end.
 31 |     block_end_live: PerEntity<Block, HashSet<Value>>,
 32 | 
 33 |     /// Liveranges for each Value, in an arbitrary index space
 34 |     /// (concretely, the span of first to last instruction visit step
 35 |     /// index in an RPO walk over the function body).
 36 |     ranges: HashMap<Value, Range<usize>>,
 37 |     /// Number of points.
 38 |     points: usize,
 39 | }
 40 | 
 41 | trait Visitor {
 42 |     fn visit_use(&mut self, _: Value) {}
 43 |     fn visit_def(&mut self, _: Value) {}
 44 |     fn post_inst(&mut self, _: Value) {}
 45 |     fn pre_inst(&mut self, _: Value) {}
 46 |     fn post_term(&mut self) {}
 47 |     fn pre_term(&mut self) {}
 48 |     fn post_params(&mut self) {}
 49 |     fn pre_params(&mut self) {}
 50 | }
 51 | 
 52 | struct BlockVisitor<'a, V: Visitor> {
 53 |     body: &'a FunctionBody,
 54 |     trees: &'a Trees,
 55 |     visitor: V,
 56 | }
 57 | impl<'a, V: Visitor> BlockVisitor<'a, V> {
 58 |     fn new(body: &'a FunctionBody, trees: &'a Trees, visitor: V) -> Self {
 59 |         log::trace!(
 60 |             "localify: running on:\n{}",
 61 |             body.display_verbose("| ", None)
 62 |         );
 63 |         Self {
 64 |             body,
 65 |             trees,
 66 |             visitor,
 67 |         }
 68 |     }
 69 |     fn visit_block(&mut self, block: Block) {
 70 |         self.visitor.post_term();
 71 |         self.body.blocks[block].terminator.visit_uses(|u| {
 72 |             self.visit_use(u);
 73 |         });
 74 |         self.visitor.pre_term();
 75 | 
 76 |         for &inst in self.body.blocks[block].insts.iter().rev() {
 77 |             if self.trees.owner.contains_key(&inst) || self.trees.remat.contains(&inst) {
 78 |                 continue;
 79 |             }
 80 |             self.visitor.post_inst(inst);
 81 |             self.visit_inst(inst, /* root = */ true);
 82 |             self.visitor.pre_inst(inst);
 83 |         }
 84 | 
 85 |         self.visitor.post_params();
 86 |         for &(_, param) in &self.body.blocks[block].params {
 87 |             self.visitor.visit_def(param);
 88 |         }
 89 |         self.visitor.pre_params();
 90 |     }
 91 |     fn visit_inst(&mut self, value: Value, root: bool) {
 92 |         // If this is an instruction...
 93 |         if let ValueDef::Operator(_, args, _) = &self.body.values[value] {
 94 |             // If root, we need to process the def.
 95 |             if root {
 96 |                 self.visitor.visit_def(value);
 97 |             }
 98 |             // Handle uses.
 99 |             for &arg in &self.body.arg_pool[*args] {
100 |                 self.visit_use(arg);
101 |             }
102 |         }
103 |     }
104 |     fn visit_use(&mut self, value: Value) {
105 |         let value = self.body.resolve_alias(value);
106 |         if let ValueDef::PickOutput(value, _, _) = self.body.values[value] {
107 |             self.visit_use(value);
108 |             return;
109 |         }
110 |         if self.trees.owner.contains_key(&value) {
111 |             // If this is a treeified value, then don't process the use,
112 |             // but process the instruction directly here.
113 |             self.visit_inst(value, /* root = */ false);
114 |         } else {
115 |             // Otherwise, this is a proper use.
116 |             self.visitor.visit_use(value);
117 |         }
118 |     }
119 | }
120 | 
121 | impl<'a> Context<'a> {
122 |     fn new(body: &'a FunctionBody, cfg: &'a CFGInfo, trees: &'a Trees) -> Self {
123 |         let mut results = Localifier::default();
124 | 
125 |         // Create locals for function args.
126 |         for &(ty, value) in &body.blocks[body.entry].params {
127 |             let param_local = results.locals.push(ty);
128 |             results.values[value] = smallvec![param_local];
129 |         }
130 | 
131 |         Self {
132 |             body,
133 |             cfg,
134 |             trees,
135 |             results,
136 |             block_end_live: PerEntity::default(),
137 |             ranges: HashMap::default(),
138 |             points: 0,
139 |         }
140 |     }
141 | 
142 |     fn compute_liveness(&mut self) {
143 |         struct LivenessVisitor {
144 |             live: HashSet<Value>,
145 |         }
146 |         impl Visitor for LivenessVisitor {
147 |             fn visit_use(&mut self, value: Value) {
148 |                 self.live.insert(value);
149 |             }
150 |             fn visit_def(&mut self, value: Value) {
151 |                 self.live.remove(&value);
152 |             }
153 |         }
154 | 
155 |         let mut workqueue: Vec<Block> = self.cfg.rpo.values().cloned().collect();
156 |         let mut workqueue_set: HashSet<Block> = workqueue.iter().cloned().collect();
157 |         while let Some(block) = workqueue.pop() {
158 |             workqueue_set.remove(&block);
159 |             let live = self.block_end_live[block].clone();
160 |             let mut visitor = BlockVisitor::new(self.body, self.trees, LivenessVisitor { live });
161 |             visitor.visit_block(block);
162 |             let live = visitor.visitor.live;
163 | 
164 |             for &pred in &self.body.blocks[block].preds {
165 |                 let pred_live = &mut self.block_end_live[pred];
166 |                 let mut changed = false;
167 |                 for &value in &live {
168 |                     if pred_live.insert(value) {
169 |                         changed = true;
170 |                     }
171 |                 }
172 |                 if changed && workqueue_set.insert(pred) {
173 |                     workqueue.push(pred);
174 |                 }
175 |             }
176 |         }
177 |     }
178 | 
179 |     fn find_ranges(&mut self) {
180 |         let mut point = 0;
181 | 
182 |         struct LiveRangeVisitor<'b> {
183 |             point: &'b mut usize,
184 |             live: HashMap<Value, usize>,
185 |             ranges: &'b mut HashMap<Value, Range<usize>>,
186 |         }
187 |         impl<'b> Visitor for LiveRangeVisitor<'b> {
188 |             fn pre_params(&mut self) {
189 |                 *self.point += 1;
190 |             }
191 |             fn pre_inst(&mut self, _: Value) {
192 |                 *self.point += 1;
193 |             }
194 |             fn pre_term(&mut self) {
195 |                 *self.point += 1;
196 |             }
197 |             fn visit_use(&mut self, value: Value) {
198 |                 self.live.entry(value).or_insert(*self.point);
199 |             }
200 |             fn visit_def(&mut self, value: Value) {
201 |                 let range = if let Some(start) = self.live.remove(&value) {
202 |                     start..(*self.point + 1)
203 |                 } else {
204 |                     *self.point..(*self.point + 1)
205 |                 };
206 |                 let existing_range = self.ranges.entry(value).or_insert(range.clone());
207 |                 existing_range.start = std::cmp::min(existing_range.start, range.start);
208 |                 existing_range.end = std::cmp::max(existing_range.end, range.end);
209 |             }
210 |         }
211 | 
212 |         for &block in self.cfg.rpo.values().rev() {
213 |             let visitor = LiveRangeVisitor {
214 |                 live: HashMap::default(),
215 |                 point: &mut point,
216 |                 ranges: &mut self.ranges,
217 |             };
218 |             let mut visitor = BlockVisitor::new(&self.body, &self.trees, visitor);
219 |             // Live-outs to succ blocks: in this block-local
220 |             // handling, model them as uses as the end of the block.
221 |             for &livein in &self.block_end_live[block] {
222 |                 let livein = self.body.resolve_alias(livein);
223 |                 visitor.visitor.visit_use(livein);
224 |             }
225 |             // Visit all insts.
226 |             visitor.visit_block(block);
227 |             // Live-ins from pred blocks: anything still live has a
228 |             // virtual def at top of block.
229 |             let still_live = visitor.visitor.live.keys().cloned().collect::<Vec<_>>();
230 |             for live in still_live {
231 |                 visitor.visitor.visit_def(live);
232 |             }
233 |         }
234 | 
235 |         self.points = point + 1;
236 |     }
237 | 
238 |     fn allocate(&mut self) {
239 |         // Sort values by ranges' starting points, then value to break ties.
240 |         let mut ranges: Vec<(Value, std::ops::Range<usize>)> =
241 |             self.ranges.iter().map(|(k, v)| (*k, v.clone())).collect();
242 |         ranges.sort_unstable_by_key(|(val, range)| (range.start, *val));
243 | 
244 |         // Keep a list of expiring Locals by expiry point.
245 |         let mut expiring: HashMap<usize, SmallVec<[(Type, Local); 8]>> = HashMap::new();
246 | 
247 |         // Iterate over allocation space, processing range starts (at
248 |         // which point we allocate) and ends (at which point we add to
249 |         // the freelist).
250 |         let mut range_idx = 0;
251 |         let mut freelist: HashMap<Type, Vec<Local>> = HashMap::new();
252 | 
253 |         for i in 0..self.points {
254 |             // Process ends. (Ends are exclusive, so we do them
255 |             // first; another range can grab the local at the same
256 |             // point index in this same iteration.)
257 |             if let Some(expiring) = expiring.remove(&i) {
258 |                 for (ty, local) in expiring {
259 |                     log::trace!(" -> expiring {} of type {} back to freelist", local, ty);
260 |                     freelist.entry(ty).or_insert_with(|| vec![]).push(local);
261 |                 }
262 |             }
263 | 
264 |             // Process starts.
265 |             while range_idx < ranges.len() && ranges[range_idx].1.start == i {
266 |                 let (value, range) = ranges[range_idx].clone();
267 |                 range_idx += 1;
268 |                 log::trace!(
269 |                     "localify: processing range for {}: {}..{}",
270 |                     value,
271 |                     range.start,
272 |                     range.end
273 |                 );
274 | 
275 |                 // If the value is an arg on block0, ignore; these
276 |                 // already have fixed locations.
277 |                 if let &ValueDef::BlockParam(b, _, _) = &self.body.values[value] {
278 |                     if b == self.body.entry {
279 |                         continue;
280 |                     }
281 |                 }
282 | 
283 |                 // Try getting a local from the freelist; if not,
284 |                 // allocate a new one.
285 |                 let mut allocs = smallvec![];
286 |                 let expiring = expiring.entry(range.end).or_insert_with(|| smallvec![]);
287 |                 for &ty in self.body.values[value].tys(&self.body.type_pool) {
288 |                     let local = freelist
289 |                         .get_mut(&ty)
290 |                         .and_then(|v| v.pop())
291 |                         .unwrap_or_else(|| {
292 |                             log::trace!(" -> allocating new local of type {}", ty);
293 |                             self.results.locals.push(ty)
294 |                         });
295 |                     log::trace!(" -> got local {} of type {}", local, ty);
296 |                     allocs.push(local);
297 |                     expiring.push((ty, local));
298 |                 }
299 |                 self.results.values[value] = allocs;
300 |             }
301 |         }
302 |     }
303 | 
304 |     fn compute(mut self) -> Localifier {
305 |         self.compute_liveness();
306 |         self.find_ranges();
307 |         self.allocate();
308 |         self.results
309 |     }
310 | }
311 | 


--------------------------------------------------------------------------------
/src/backend/reducify.rs:
--------------------------------------------------------------------------------
  1 | //! Reducification: turning a potentially irreducible CFG into a
  2 | //! reducible CFG. We perform context-sensitive code duplication to
  3 | //! "peel off" the parts of loops that are reached by side-entrances,
  4 | //! branching back to the main loop as soon as control passes through
  5 | //! the loop header again.
  6 | //!
  7 | //! # Limitations
  8 | //!
  9 | //! ***WARNING*** EXPONENTIAL BLOWUP POTENTIAL ***WARNING***
 10 | //!
 11 | //! This pass is designed on the assumption that irreducible control
 12 | //! flow is rare, and needs to be handled somehow but it's OK to,
 13 | //! e.g., duplicate most of a loop body to do so. The tradeoff that
 14 | //! we're aiming for is that we want zero runtime overhead -- we do
 15 | //! not want a performance cliff if someone accidentally introduces an
 16 | //! irreducible edge -- and we're hoping that this remains rare. If
 17 | //! you feed this pass a state machine, or a fully-connected clique,
 18 | //! for example, or even a deep nest of loops, one can get much worse
 19 | //! than 2x code-size increase. You have been warned!
 20 | //!
 21 | //! In the future we may consider a hybrid approach where we start
 22 | //! with this algorithm, keep track of block-count increase, and abort
 23 | //! and move to a Relooper-style (dynamic label variable-based)
 24 | //! approach with no code duplication if a threshold is reached.
 25 | //!
 26 | //! ***WARNING*** EXPONENTIAL BLOWUP POTENTIAL ***WARNING***
 27 | //!
 28 | //! # Finding Loop Headers
 29 | //!
 30 | //! The basic idea is that we compute RPO and treat all backedges in
 31 | //! RPO (i.e., edges from rpo-index i to rpo-index j, where j <= i) as
 32 | //! loop backedges, with all blocks "under the edge" (with RPO indices
 33 | //! i..=j) in the loop. We then "properly nest" loops, so if we have,
 34 | //! e.g.:
 35 | //!
 36 | //! ```plain
 37 | //!     block0
 38 | //!     block1  |
 39 | //!     block2  | loop     |
 40 | //!     block3  |          |
 41 | //!     block4             | loop
 42 | //! ```
 43 | //!
 44 | //! we "fix the nesting" by pushing down the lower extent of the first
 45 | //! loop to block4. We do so in a single post-pass fixup scan that
 46 | //! keeps a stack, pushes when meeting a loop header, pops while the
 47 | //! innermost is no longer in the initial header-set, then ensures
 48 | //! that all header-blockson the stack are inserted into every
 49 | //! header-set it passes over.
 50 | //!
 51 | //! The effect of this is to compute a loop nest *as if* irreducible
 52 | //! edges (side loop entrances) did not exist. We'll fix them up later
 53 | //! with the code duplication.
 54 | //!
 55 | //! # Finding Irreducible Loop Headers
 56 | //!
 57 | //! After computing header-sets, find edges from B1 to B2 such that
 58 | //! headers(B2) - headers(B1) - {B2} is non-empty -- that is, we add a
 59 | //! header block (enter a new loop) going from B1 to B2, and that new
 60 | //! header block is not B2 itself. This is a "side-entrance" into a
 61 | //! loop, and is irreducible.
 62 | //!
 63 | //! # Duplicating Code
 64 | //!
 65 | //! We create blocks under contexts defined by "skipped
 66 | //! headers", where the context is computed at at an edge
 67 | //! (From, To) as (where `U` is set union, `-` is set
 68 | //! subtraction, `&` is set intersection, `!S` is the set
 69 | //! complement):
 70 | //!
 71 | //! ```plain
 72 | //!     Gen = (headers(To) - headers(From)) - {To}
 73 | //!         = headers(To) & !headers(From) & !{To}
 74 | //!     Kill = (headers(From) - headers(To)) U {To}
 75 | //!          = (headers(From) & !headers(To)) U {To}
 76 | //!
 77 | //! let ToContext = (FromContext - Kill) U Gen
 78 | //!               = (FromContext & !Kill) U Gen
 79 | //!               = (FromContext & !((headers(From) & !headers(To)) U {To})) U
 80 | //!                 (headers(To) & !headers(From) & !{To})
 81 | //!               = (FromContext & !((headers(From) U {To}) & (!headers(To) U {To}))) U
 82 | //!                 (headers(To) & !headers(From) & !{To})
 83 | //!               = (FromContext & (!(headers(From) U {To}) U !(!headers(To) U {To}))) U
 84 | //!                 (headers(To) & !headers(From) & !{To})
 85 | //!               = (FromContext & ((!headers(From) & !{To}) U (headers(To) & !{To}))) U
 86 | //!                 (headers(To) & !headers(From) & !{To})
 87 | //!               = (FromContext & !headers(From) & !{To}) U
 88 | //!                 (FromContext & headers(To) & !{To}) U
 89 | //!                 (headers(To) & !headers(From) & !{To})
 90 | //! ```
 91 | //!
 92 | //! invariant: for every B, we only ever have a context C where C c headers(B)
 93 | //!
 94 | //! then the first term goes away (FromContext & !headers(From)
 95 | //! = 0) and we can simplify to:
 96 | //!
 97 | //! ```plain
 98 | //! let ToContext = headers(To) & !{To} & (FromContext U !headers(From))
 99 | //! ```
100 | //!
101 | //! in other words: when entering a loop except through its
102 | //! header, add to context; stay in that context inside the
103 | //! loop; leave the context when we leave the loop.
104 | //!
105 | //! Note that in the case with no irreducible edges, this
106 | //! becomes the special case where every context is {} and no
107 | //! blocks are actually duplicated (but we returned early above
108 | //! to avoid this no-op transform).
109 | //!
110 | //! Patching up use-def links is somewhat tricky. Consider the
111 | //! CFG:
112 | //!
113 | //! ```plain
114 | //!         1
115 | //!        / \
116 | //!       /   \
117 | //!      2 --> 3
118 | //!      2 <-- 3
119 | //!           /
120 | //!          4
121 | //! ```
122 | //!
123 | //! Which is irreducible (it contains the canonical irreducible
124 | //! graph 1->2, 2->3, 3->2) and has an exit-path with block 4
125 | //! that is dominated by block 3. Block 4 can thus use values
126 | //! defined in block 3, but if we perform elaboration as:
127 | //!
128 | //! ```plain
129 | //!     1
130 | //!   /  \__
131 | //!  2<.<--3'
132 | //!  v ^   |
133 | //!  3-/  _|
134 | //!   \ /
135 | //!    4
136 | //! ```
137 | //!
138 | //! that is, we have two copies of the block 3,and each has an
139 | //! exit to the one copy of 4.
140 | //!
141 | //! Any values defined in 3 and used in 4 in the original CFG
142 | //! will need to pass through blockparams to merge the two
143 | //! versions in the elaborated CFG.
144 | //!
145 | //! To fix this problem, we perform a max-SSA cut at all blocks
146 | //! that have an in-edge from a block with a larger header-set
147 | //! (i.e., a loop exit edge) if the exited loop has a
148 | //! side-entrance; this is the only way in which we can have a
149 | //! merge-point between different copies of the same subgraph.
150 | 
151 | use crate::entity::EntityRef;
152 | use crate::{cfg::CFGInfo, cfg::RPOIndex, entity::PerEntity, Block, FunctionBody, Value, ValueDef};
153 | use fxhash::{FxHashMap, FxHashSet};
154 | use smallvec::SmallVec;
155 | use std::borrow::Cow;
156 | use std::collections::{HashSet, VecDeque};
157 | 
158 | pub struct Reducifier<'a> {
159 |     body: &'a FunctionBody,
160 |     cfg: CFGInfo,
161 |     blocks: PerEntity<Block, BlockState>,
162 | }
163 | 
164 | #[derive(Debug, Clone, Default)]
165 | struct BlockState {
166 |     headers: FxHashSet<Block>,
167 |     is_header: bool,
168 | }
169 | 
170 | impl<'a> Reducifier<'a> {
171 |     pub fn new(body: &'a FunctionBody) -> Reducifier<'a> {
172 |         let cfg = CFGInfo::new(body);
173 |         Reducifier {
174 |             body,
175 |             cfg,
176 |             blocks: PerEntity::default(),
177 |         }
178 |     }
179 | 
180 |     pub fn run(&mut self) -> Cow<'a, FunctionBody> {
181 |         // First, compute all of the loop header-sets.
182 |         // - Start by computing RPO.
183 |         // - Find backedges (edges (a, b) where rpo(b) <= rpo(a)).
184 |         // - For each backedge, mark extent of rpo-indices "under"
185 |         //   edge as within header.
186 |         // - Do one forward pass to properly nest regions, keeping
187 |         //   stack of headers when we entered their regions and
188 |         //   enforcing LIFO by extending appropriately.
189 |         let cfg = CFGInfo::new(&self.body);
190 | 
191 |         let mut has_irreducible = false;
192 |         for (rpo, &block) in cfg.rpo.entries() {
193 |             for &succ in &self.body.blocks[block].succs {
194 |                 let succ_rpo = cfg.rpo_pos[succ].unwrap();
195 |                 if succ_rpo.index() <= rpo.index() && !cfg.dominates(succ, block) {
196 |                     has_irreducible = true;
197 |                 }
198 |             }
199 |         }
200 |         if !has_irreducible {
201 |             return Cow::Borrowed(self.body);
202 |         }
203 | 
204 |         for (rpo, &block) in cfg.rpo.entries() {
205 |             for &succ in &self.body.blocks[block].succs {
206 |                 let succ_rpo = cfg.rpo_pos[succ].unwrap();
207 |                 if succ_rpo.index() <= rpo.index() {
208 |                     for i in succ_rpo.index()..=rpo.index() {
209 |                         let b = cfg.rpo[RPOIndex::new(i)];
210 |                         self.blocks[b].headers.insert(succ);
211 |                         self.blocks[b].is_header = true;
212 |                     }
213 |                 }
214 |             }
215 |         }
216 | 
217 |         let mut header_stack = vec![];
218 |         for &block in cfg.rpo.values() {
219 |             while let Some(innermost) = header_stack.last() {
220 |                 if !self.blocks[block].headers.contains(innermost) {
221 |                     header_stack.pop();
222 |                 } else {
223 |                     break;
224 |                 }
225 |             }
226 |             if self.blocks[block].is_header {
227 |                 header_stack.push(block);
228 |             }
229 | 
230 |             for &header in &header_stack {
231 |                 self.blocks[block].headers.insert(header);
232 |             }
233 |         }
234 | 
235 |         // Now, check whether any irreducible edges exist: edges from
236 |         // B1 to B2 where headers(B2) - headers(B1) - {B2} is not
237 |         // empty (i.e., the edge jumps into a new loop -- adds a new
238 |         // header -- without going through that header block).
239 |         let mut irreducible_headers: FxHashSet<Block> = FxHashSet::default();
240 |         for (block, data) in self.body.blocks.entries() {
241 |             let headers = &self.blocks[block].headers;
242 |             for &succ in &data.succs {
243 |                 log::trace!("examining edge {} -> {}", block, succ);
244 |                 for &succ_header in &self.blocks[succ].headers {
245 |                     log::trace!("  successor {} has header {}", succ, succ_header);
246 |                     if succ_header != succ && !headers.contains(&succ_header) {
247 |                         log::trace!("    -> irreducible edge");
248 |                         irreducible_headers.insert(succ_header);
249 |                     }
250 |                 }
251 |             }
252 |         }
253 | 
254 |         if log::log_enabled!(log::Level::Trace) {
255 |             for block in self.body.blocks.iter() {
256 |                 let mut headers = self.blocks[block]
257 |                     .headers
258 |                     .iter()
259 |                     .cloned()
260 |                     .collect::<Vec<_>>();
261 |                 headers.sort();
262 |                 log::trace!("* {}: header set {:?}", block, headers);
263 |             }
264 |         }
265 | 
266 |         // Now, in the irreducible case, "elaborate" the CFG.
267 | 
268 |         // First do limited conversion to max-SSA to fix up references
269 |         // across contexts.
270 |         let mut cut_blocks = HashSet::default();
271 |         for (block, data) in self.body.blocks.entries() {
272 |             for &succ in &data.succs {
273 |                 // Loop exits
274 |                 for header in &self.blocks[block].headers {
275 |                     if !self.blocks[succ].headers.contains(header)
276 |                         && irreducible_headers.contains(header)
277 |                     {
278 |                         log::trace!("cut-block at loop exit: {}", succ);
279 |                         cut_blocks.insert(succ);
280 |                     }
281 |                 }
282 |                 // Loop side entries
283 |                 for header in &self.blocks[succ].headers {
284 |                     if !self.blocks[block].headers.contains(header) && *header != succ {
285 |                         log::trace!("cut-block at loop side entry: {}", succ);
286 |                         cut_blocks.insert(succ);
287 |                     }
288 |                 }
289 |             }
290 |         }
291 | 
292 |         let mut new_body = self.body.clone();
293 |         let cfg = CFGInfo::new(&new_body);
294 |         crate::passes::resolve_aliases::run(&mut new_body);
295 |         crate::passes::maxssa::run(&mut new_body, Some(cut_blocks), &cfg);
296 |         crate::passes::resolve_aliases::run(&mut new_body);
297 | 
298 |         log::trace!("after max-SSA run:\n{}\n", new_body.display("| ", None));
299 | 
300 |         // Implicitly, context {} has an identity-map from old block
301 |         // number to new block number. We use the map only for
302 |         // non-empty contexts.
303 |         let mut context_map: FxHashMap<Vec<Block>, usize> = FxHashMap::default();
304 |         let mut contexts: Vec<Vec<Block>> = vec![vec![]];
305 |         context_map.insert(vec![], 0);
306 |         let mut block_map: FxHashMap<(usize, Block), Block> = FxHashMap::default();
307 |         let mut value_map: FxHashMap<(usize, Value), Value> = FxHashMap::default();
308 | 
309 |         // List of (ctx, new block) tuples for duplicated code.
310 |         let mut cloned_blocks: Vec<(usize, Block)> = vec![];
311 |         // Map from block in new body to (ctx, orig block) target, to
312 |         // allow updating terminators.
313 |         let mut terminators: FxHashMap<Block, Vec<(usize, Block)>> = FxHashMap::default();
314 | 
315 |         let mut queue: VecDeque<(usize, Block)> = VecDeque::new();
316 |         let mut visited: FxHashSet<(usize, Block)> = FxHashSet::default();
317 |         queue.push_back((0, new_body.entry));
318 |         visited.insert((0, new_body.entry));
319 |         while let Some((ctx, block)) = queue.pop_front() {
320 |             log::trace!(
321 |                 "elaborate: block {} in context {} ({:?})",
322 |                 block,
323 |                 ctx,
324 |                 contexts[ctx]
325 |             );
326 | 
327 |             // If this is a non-default context, replicate the block.
328 |             let new_block = if ctx != 0 {
329 |                 log::trace!("cloning block {} in new context", block);
330 |                 let new_block = new_body.add_block();
331 |                 new_body.blocks[new_block].desc = format!("Cloned {}", block);
332 |                 let params = new_body.blocks[block].params.clone();
333 |                 for (ty, val) in params {
334 |                     let blockparam = new_body.add_blockparam(new_block, ty);
335 |                     value_map.insert((ctx, val), blockparam);
336 |                 }
337 | 
338 |                 block_map.insert((ctx, block), new_block);
339 |                 cloned_blocks.push((ctx, new_block));
340 | 
341 |                 // Copy over all value definitions, but don't rewrite
342 |                 // args yet -- we'll do a separate pass for that.
343 |                 let insts = new_body.blocks[block].insts.clone();
344 |                 for value in insts {
345 |                     let def = new_body.values[value].clone();
346 |                     let new_value = new_body.values.push(def);
347 |                     value_map.insert((ctx, value), new_value);
348 |                     new_body.blocks[new_block].insts.push(new_value);
349 |                 }
350 | 
351 |                 // Copy over the terminator but don't update yet --
352 |                 // we'll do that later too.
353 |                 new_body.blocks[new_block].terminator = new_body.blocks[block].terminator.clone();
354 | 
355 |                 new_block
356 |             } else {
357 |                 block
358 |             };
359 | 
360 |             // For every terminator, determine the target context:
361 |             //
362 |             // let ToContext = headers(To) & !{To} & (FromContext U !headers(From))
363 |             let term = terminators.entry(new_block).or_insert_with(|| vec![]);
364 |             let succs = new_body.blocks[block].succs.clone();
365 |             for succ in succs {
366 |                 let mut ctx_blocks = self.blocks[succ]
367 |                     .headers
368 |                     .iter()
369 |                     .cloned()
370 |                     .collect::<Vec<_>>();
371 |                 ctx_blocks.sort();
372 |                 ctx_blocks.retain(|&header_block| {
373 |                     header_block != succ
374 |                         && (contexts[ctx].contains(&header_block)
375 |                             || !self.blocks[block].headers.contains(&header_block))
376 |                 });
377 |                 let to_ctx = *context_map.entry(ctx_blocks.clone()).or_insert_with(|| {
378 |                     let id = contexts.len();
379 |                     contexts.push(ctx_blocks);
380 |                     id
381 |                 });
382 |                 log::trace!(
383 |                     "elaborate: edge {} -> {} from ctx {:?} goes to ctx {:?}",
384 |                     block,
385 |                     succ,
386 |                     contexts[ctx],
387 |                     contexts[to_ctx]
388 |                 );
389 | 
390 |                 term.push((to_ctx, succ));
391 | 
392 |                 if visited.insert((to_ctx, succ)) {
393 |                     log::trace!("enqueue block {} ctx {}", succ, to_ctx);
394 |                     queue.push_back((to_ctx, succ));
395 |                 }
396 |             }
397 |         }
398 | 
399 |         // Second pass: rewrite args, and set up terminators. Both
400 |         // happen in a second pass so that we have the block- and
401 |         // value-map available for all blocks and values, regardless
402 |         // of cycles or processing order.
403 |         for (ctx, new_block) in cloned_blocks {
404 |             for &inst in &new_body.blocks[new_block].insts {
405 |                 match &mut new_body.values[inst] {
406 |                     ValueDef::Operator(_, args, _) => {
407 |                         let new_args = new_body.arg_pool[*args]
408 |                             .iter()
409 |                             .map(|&val| value_map.get(&(ctx, val)).cloned().unwrap_or(val))
410 |                             .collect::<SmallVec<[Value; 4]>>();
411 |                         let new_args = new_body.arg_pool.from_iter(new_args.into_iter());
412 |                         *args = new_args;
413 |                     }
414 |                     ValueDef::PickOutput(val, _, _) | ValueDef::Alias(val) => {
415 |                         *val = value_map.get(&(ctx, *val)).cloned().unwrap_or(*val);
416 |                     }
417 |                     _ => unreachable!(),
418 |                 }
419 |             }
420 | 
421 |             new_body.blocks[new_block]
422 |                 .terminator
423 |                 .update_uses(|u| *u = value_map.get(&(ctx, *u)).cloned().unwrap_or(*u));
424 |         }
425 | 
426 |         for (block, block_def) in new_body.blocks.entries_mut() {
427 |             log::trace!("processing terminators for block {}", block);
428 |             let terms = match terminators.get(&block) {
429 |                 Some(t) => t,
430 |                 // If no entry in `terminators`, we didn't visit the
431 |                 // block; it must not be reachable.
432 |                 None => continue,
433 |             };
434 |             let mut terms = terms.iter();
435 |             block_def.terminator.update_targets(|target| {
436 |                 let &(to_ctx, to_orig_block) = terms.next().unwrap();
437 |                 target.block = block_map
438 |                     .get(&(to_ctx, to_orig_block))
439 |                     .cloned()
440 |                     .unwrap_or(to_orig_block);
441 |             });
442 |         }
443 | 
444 |         new_body.recompute_edges();
445 | 
446 |         log::trace!("After duplication:\n{}\n", new_body.display("| ", None));
447 | 
448 |         new_body.validate().unwrap();
449 |         new_body.verify_reducible().unwrap();
450 | 
451 |         Cow::Owned(new_body)
452 |     }
453 | }
454 | 
455 | #[cfg(test)]
456 | mod test {
457 |     use super::*;
458 |     use crate::{
459 |         entity::EntityRef, BlockTarget, FuncDecl, Module, Operator, SignatureData, Terminator, Type,
460 |     };
461 | 
462 |     #[test]
463 |     fn test_irreducible() {
464 |         let _ = env_logger::try_init();
465 | 
466 |         let mut module = Module::empty();
467 |         let sig = module.signatures.push(SignatureData {
468 |             params: vec![Type::I32, Type::I64, Type::F64],
469 |             returns: vec![Type::I64],
470 |         });
471 |         let mut body = FunctionBody::new(&module, sig);
472 | 
473 |         let block1 = body.entry;
474 |         let block2 = body.add_block();
475 |         let block3 = body.add_block();
476 |         let block4 = body.add_block();
477 | 
478 |         let arg0 = body.blocks[block1].params[0].1;
479 |         let arg1 = body.blocks[block1].params[1].1;
480 |         let arg2 = body.blocks[block1].params[2].1;
481 | 
482 |         body.set_terminator(
483 |             block1,
484 |             Terminator::CondBr {
485 |                 cond: arg0,
486 |                 if_true: BlockTarget {
487 |                     block: block2,
488 |                     args: vec![arg1],
489 |                 },
490 |                 if_false: BlockTarget {
491 |                     block: block3,
492 |                     args: vec![arg2],
493 |                 },
494 |             },
495 |         );
496 | 
497 |         let block2_param = body.add_blockparam(block2, Type::I64);
498 |         let block3_param = body.add_blockparam(block3, Type::F64);
499 | 
500 |         let block2_param_cast = body.add_op(
501 |             block2,
502 |             Operator::F64ReinterpretI64,
503 |             &[block2_param],
504 |             &[Type::F64],
505 |         );
506 | 
507 |         let block3_param_cast = body.add_op(
508 |             block3,
509 |             Operator::I64ReinterpretF64,
510 |             &[block3_param],
511 |             &[Type::I64],
512 |         );
513 | 
514 |         body.set_terminator(
515 |             block2,
516 |             Terminator::Br {
517 |                 target: BlockTarget {
518 |                     block: block3,
519 |                     args: vec![block2_param_cast],
520 |                 },
521 |             },
522 |         );
523 |         body.set_terminator(
524 |             block3,
525 |             Terminator::CondBr {
526 |                 cond: arg0,
527 |                 if_true: BlockTarget {
528 |                     block: block2,
529 |                     args: vec![block3_param_cast],
530 |                 },
531 |                 if_false: BlockTarget {
532 |                     block: block4,
533 |                     args: vec![],
534 |                 },
535 |             },
536 |         );
537 | 
538 |         body.set_terminator(
539 |             block4,
540 |             Terminator::Return {
541 |                 values: vec![block3_param_cast],
542 |             },
543 |         );
544 | 
545 |         log::debug!("Body:\n{}", body.display("| ", Some(&module)));
546 | 
547 |         body.validate().unwrap();
548 | 
549 |         let mut reducifier = Reducifier::new(&body);
550 |         let new_body = reducifier.run();
551 | 
552 |         new_body.validate().unwrap();
553 | 
554 |         log::debug!("Reducified body:\n{}", body.display("| ", Some(&module)));
555 | 
556 |         let cfg = CFGInfo::new(&new_body);
557 |         for (block, def) in new_body.blocks.entries() {
558 |             for &succ in &def.succs {
559 |                 // For any edge to a block earlier in RPO, that block
560 |                 // must dominate us.
561 |                 if cfg.rpo_pos[succ].unwrap().index() <= cfg.rpo_pos[block].unwrap().index() {
562 |                     assert!(cfg.dominates(succ, block));
563 |                 }
564 |             }
565 |         }
566 | 
567 |         // Now ensure we can generate a Wasm module (with reducible
568 |         // control flow).
569 |         module
570 |             .funcs
571 |             .push(FuncDecl::Body(sig, "func0".to_string(), body));
572 |         let wasm = module.to_wasm_bytes().unwrap();
573 |         log::debug!("wasm bytes: {:?}", wasm);
574 |     }
575 | }
576 | 


--------------------------------------------------------------------------------
/src/backend/stackify.rs:
--------------------------------------------------------------------------------
  1 | //! Stackify implementation to produce structured control flow from an
  2 | //! arbitrary CFG.
  3 | //!
  4 | //! See the paper
  5 | //!
  6 | //! - Norman Ramsey. Beyond Relooper: recursive translation of
  7 | //!   unstructured control flow to structured control flow. In ICFP
  8 | //!   2022 (Functional Pearl). https://dl.acm.org/doi/10.1145/3547621
  9 | //!
 10 | //! for more details on how this algorithm works.
 11 | 
 12 | use crate::cfg::CFGInfo;
 13 | use crate::entity::EntityRef;
 14 | use crate::ir::{Block, BlockTarget, FunctionBody, Terminator, Type, Value};
 15 | use std::collections::HashSet;
 16 | use std::convert::TryFrom;
 17 | 
 18 | #[derive(Clone, Debug)]
 19 | pub enum WasmBlock<'a> {
 20 |     /// A Wasm block that has the given contents and whose label jumps
 21 |     /// to the given CFG block exit.
 22 |     Block {
 23 |         body: Vec<WasmBlock<'a>>,
 24 |         out: Block,
 25 |     },
 26 |     /// A Wasm loop that has the given contents and whose label jumps
 27 |     /// to the given CFG block header.
 28 |     Loop {
 29 |         body: Vec<WasmBlock<'a>>,
 30 |         header: Block,
 31 |     },
 32 |     /// A leaf node: one CFG block.
 33 |     Leaf { block: Block },
 34 |     /// A translated unconditional branch.
 35 |     Br { target: WasmLabel },
 36 |     /// A translated conditional.
 37 |     If {
 38 |         cond: Value,
 39 |         if_true: Vec<WasmBlock<'a>>,
 40 |         if_false: Vec<WasmBlock<'a>>,
 41 |     },
 42 |     /// A translated select (switch).
 43 |     Select {
 44 |         selector: Value,
 45 |         targets: Vec<WasmLabel>,
 46 |         default: WasmLabel,
 47 |     },
 48 |     /// Blockparam transfer.
 49 |     BlockParams {
 50 |         from: &'a [Value],
 51 |         to: &'a [(Type, Value)],
 52 |     },
 53 |     /// A function return instruction.
 54 |     Return { values: &'a [Value] },
 55 |     /// An unreachable instruction.
 56 |     Unreachable,
 57 | }
 58 | 
 59 | /// A Wasm branch target label: number of scopes outward to branch to.
 60 | #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 61 | pub struct WasmLabel(u32);
 62 | impl WasmLabel {
 63 |     fn new(i: usize) -> WasmLabel {
 64 |         WasmLabel(u32::try_from(i).unwrap())
 65 |     }
 66 |     fn add(&self, extra: usize) -> WasmLabel {
 67 |         WasmLabel(self.0.checked_add(u32::try_from(extra).unwrap()).unwrap())
 68 |     }
 69 |     pub fn index(&self) -> u32 {
 70 |         self.0
 71 |     }
 72 | }
 73 | 
 74 | pub struct Context<'a, 'b> {
 75 |     body: &'a FunctionBody,
 76 |     cfg: &'b CFGInfo,
 77 |     merge_nodes: HashSet<Block>,
 78 |     loop_headers: HashSet<Block>,
 79 |     ctrl_stack: Vec<CtrlEntry>,
 80 |     // Explicit recursion:
 81 |     // - Stack of actions/continuations.
 82 |     process_stack: Vec<StackEntry<'a>>,
 83 |     // - Stack of result/body vectors.
 84 |     result: Vec<Vec<WasmBlock<'a>>>,
 85 |     // - Stack of merge-node-children lists.
 86 |     merge_node_children: Vec<Vec<Block>>,
 87 | }
 88 | 
 89 | #[derive(Clone, Copy, Debug)]
 90 | enum CtrlEntry {
 91 |     Block { out: Block },
 92 |     Loop { header: Block },
 93 |     IfThenElse,
 94 | }
 95 | 
 96 | impl CtrlEntry {
 97 |     fn label(&self) -> Block {
 98 |         match self {
 99 |             CtrlEntry::Block { out } => *out,
100 |             CtrlEntry::Loop { header } => *header,
101 |             CtrlEntry::IfThenElse => Block::invalid(),
102 |         }
103 |     }
104 | }
105 | 
106 | #[derive(Clone, Copy, Debug)]
107 | enum StackEntry<'a> {
108 |     DomSubtree(Block),
109 |     EndDomSubtree,
110 |     NodeWithin(Block, usize),
111 |     FinishLoop(Block),
112 |     FinishBlock(Block),
113 |     Else,
114 |     FinishIf(Value),
115 |     DoBranch(Block, &'a BlockTarget),
116 | }
117 | 
118 | impl<'a, 'b> Context<'a, 'b> {
119 |     pub fn new(body: &'a FunctionBody, cfg: &'b CFGInfo) -> anyhow::Result<Self> {
120 |         let (merge_nodes, loop_headers) = Self::compute_merge_nodes_and_loop_headers(body, cfg)?;
121 |         Ok(Self {
122 |             body,
123 |             cfg,
124 |             merge_nodes,
125 |             loop_headers,
126 |             ctrl_stack: vec![],
127 |             process_stack: vec![],
128 |             result: vec![],
129 |             merge_node_children: vec![],
130 |         })
131 |     }
132 | 
133 |     fn compute_merge_nodes_and_loop_headers(
134 |         body: &FunctionBody,
135 |         cfg: &CFGInfo,
136 |     ) -> anyhow::Result<(HashSet<Block>, HashSet<Block>)> {
137 |         let mut loop_headers = HashSet::new();
138 |         let mut branched_once = HashSet::new();
139 |         let mut merge_nodes = HashSet::new();
140 | 
141 |         for (block_rpo, &block) in cfg.rpo.entries() {
142 |             for &succ in &body.blocks[block].succs {
143 |                 log::trace!(
144 |                     "block {} ({}) rpo {} has succ {} ({})",
145 |                     block,
146 |                     body.blocks[block].desc,
147 |                     block_rpo,
148 |                     succ,
149 |                     body.blocks[succ].desc,
150 |                 );
151 |                 let succ_rpo = cfg.rpo_pos[succ].unwrap();
152 |                 log::trace!(" -> succ rpo {}", succ_rpo);
153 |                 if succ_rpo <= block_rpo {
154 |                     if !cfg.dominates(succ, block) {
155 |                         anyhow::bail!(
156 |                             "Irreducible control flow: edge from {} ({}) to {} ({})",
157 |                             block,
158 |                             body.blocks[block].desc,
159 |                             succ,
160 |                             body.blocks[succ].desc
161 |                         );
162 |                     }
163 |                     // Backward branch.
164 |                     loop_headers.insert(succ);
165 |                 } else {
166 |                     // Forward branch.
167 |                     if !branched_once.insert(succ) {
168 |                         merge_nodes.insert(succ);
169 |                     }
170 |                 }
171 |             }
172 |         }
173 | 
174 |         // Make any `select` target a "merge node" too, so it gets its
175 |         // own block.
176 |         for &block in cfg.rpo.values() {
177 |             if let &Terminator::Select {
178 |                 ref targets,
179 |                 ref default,
180 |                 ..
181 |             } = &body.blocks[block].terminator
182 |             {
183 |                 for target in &targets[..] {
184 |                     merge_nodes.insert(target.block);
185 |                 }
186 |                 merge_nodes.insert(default.block);
187 |             }
188 |         }
189 | 
190 |         Ok((merge_nodes, loop_headers))
191 |     }
192 | 
193 |     pub fn compute(mut self) -> Vec<WasmBlock<'a>> {
194 |         self.result.push(vec![]);
195 |         self.process_stack
196 |             .push(StackEntry::DomSubtree(self.cfg.entry));
197 |         while let Some(top) = self.process_stack.pop() {
198 |             self.process(top);
199 |         }
200 |         self.result.pop().unwrap()
201 |     }
202 | 
203 |     fn process(&mut self, entry: StackEntry<'a>) {
204 |         match entry {
205 |             StackEntry::DomSubtree(block) => {
206 |                 self.handle_dom_subtree(block);
207 |             }
208 |             StackEntry::EndDomSubtree => {
209 |                 self.end_dom_subtree();
210 |             }
211 |             StackEntry::NodeWithin(block, start) => {
212 |                 self.node_within(block, start);
213 |             }
214 |             StackEntry::FinishLoop(header) => {
215 |                 self.finish_loop(header);
216 |             }
217 |             StackEntry::FinishBlock(out) => {
218 |                 self.finish_block(out);
219 |             }
220 |             StackEntry::Else => {
221 |                 self.else_();
222 |             }
223 |             StackEntry::FinishIf(cond) => {
224 |                 self.finish_if(cond);
225 |             }
226 |             StackEntry::DoBranch(source, target) => {
227 |                 self.do_branch(source, target);
228 |             }
229 |         }
230 |     }
231 | 
232 |     fn handle_dom_subtree(&mut self, block: Block) {
233 |         let mut merge_node_children = self
234 |             .cfg
235 |             .dom_children(block)
236 |             .filter(|child| self.merge_nodes.contains(&child))
237 |             .collect::<Vec<_>>();
238 |         // Sort merge nodes so highest RPO number comes first.
239 |         merge_node_children
240 |             .sort_unstable_by_key(|&block| std::cmp::Reverse(self.cfg.rpo_pos[block]));
241 | 
242 |         let is_loop_header = self.loop_headers.contains(&block);
243 | 
244 |         log::trace!(
245 |             "handle_dom_subtree: block {} merge_nodes {:?} loop_header {}",
246 |             block,
247 |             merge_node_children,
248 |             is_loop_header
249 |         );
250 | 
251 |         // `merge_node_children` stack entry is popped by `EndDomSubtree`.
252 |         self.merge_node_children.push(merge_node_children);
253 |         self.process_stack.push(StackEntry::EndDomSubtree);
254 | 
255 |         if is_loop_header {
256 |             // Control stack and block-list-result-stack entries are
257 |             // popped by `FinishLoop`.
258 |             self.ctrl_stack.push(CtrlEntry::Loop { header: block });
259 |             self.result.push(vec![]);
260 |             self.process_stack.push(StackEntry::FinishLoop(block));
261 |             self.process_stack.push(StackEntry::NodeWithin(block, 0));
262 |         } else {
263 |             // "tail-call" to `NodeWithin` step, but use existing
264 |             // result-stack entry.
265 |             self.process_stack.push(StackEntry::NodeWithin(block, 0));
266 |         }
267 |     }
268 | 
269 |     fn end_dom_subtree(&mut self) {
270 |         self.merge_node_children.pop();
271 |     }
272 | 
273 |     fn finish_loop(&mut self, header: Block) {
274 |         self.ctrl_stack.pop();
275 |         let body = self.result.pop().unwrap();
276 |         self.result
277 |             .last_mut()
278 |             .unwrap()
279 |             .push(WasmBlock::Loop { body, header });
280 |     }
281 | 
282 |     fn resolve_target(ctrl_stack: &[CtrlEntry], target: Block) -> WasmLabel {
283 |         log::trace!("resolve_target: {} in stack {:?}", target, ctrl_stack);
284 |         WasmLabel(
285 |             u32::try_from(
286 |                 ctrl_stack
287 |                     .iter()
288 |                     .rev()
289 |                     .position(|frame| frame.label() == target)
290 |                     .expect("Target must be in control stack"),
291 |             )
292 |             .expect("More than 2^32 frames"),
293 |         )
294 |     }
295 | 
296 |     fn do_branch(&mut self, source: Block, target: &'a BlockTarget) {
297 |         let into = self.result.last_mut().unwrap();
298 |         log::trace!("do_branch: {} -> {:?}", source, target);
299 |         // This will be a branch to some entry in the control stack if
300 |         // the target is either a merge block, or is a backward branch
301 |         // (by RPO number).
302 |         if self.merge_nodes.contains(&target.block)
303 |             || self.cfg.rpo_pos[target.block] <= self.cfg.rpo_pos[source]
304 |         {
305 |             let index = Self::resolve_target(&self.ctrl_stack[..], target.block);
306 |             Self::do_blockparam_transfer(
307 |                 &target.args[..],
308 |                 &self.body.blocks[target.block].params[..],
309 |                 into,
310 |             );
311 |             into.push(WasmBlock::Br { target: index });
312 |         } else {
313 |             // Otherwise, we must dominate the block, so just emit it inline.
314 |             debug_assert!(self.cfg.dominates(source, target.block));
315 |             Self::do_blockparam_transfer(
316 |                 &target.args[..],
317 |                 &self.body.blocks[target.block].params[..],
318 |                 into,
319 |             );
320 |             self.process_stack
321 |                 .push(StackEntry::DomSubtree(target.block));
322 |         }
323 |     }
324 | 
325 |     fn do_branch_select(
326 |         &mut self,
327 |         selector: Value,
328 |         targets: &'a [BlockTarget],
329 |         default: &'a BlockTarget,
330 |     ) {
331 |         let into = self.result.last_mut().unwrap();
332 |         log::trace!("do_branch_select: {:?}, default {:?}", targets, default);
333 |         let mut body = vec![WasmBlock::Select {
334 |             selector,
335 |             targets: (0..targets.len())
336 |                 .map(|i| WasmLabel::new(i))
337 |                 .collect::<Vec<_>>(),
338 |             default: WasmLabel::new(targets.len()),
339 |         }];
340 | 
341 |         let mut extra = targets.len() + 1;
342 |         for target in targets.iter().chain(std::iter::once(default)) {
343 |             extra -= 1;
344 |             let outer_body = vec![
345 |                 WasmBlock::Block {
346 |                     body,
347 |                     out: Block::invalid(),
348 |                 },
349 |                 WasmBlock::BlockParams {
350 |                     from: &target.args[..],
351 |                     to: &self.body.blocks[target.block].params[..],
352 |                 },
353 |                 WasmBlock::Br {
354 |                     target: Self::resolve_target(&self.ctrl_stack[..], target.block).add(extra),
355 |                 },
356 |             ];
357 |             body = outer_body;
358 |         }
359 | 
360 |         into.extend(body.into_iter());
361 |     }
362 | 
363 |     fn do_blockparam_transfer(
364 |         from: &'a [Value],
365 |         to: &'a [(Type, Value)],
366 |         into: &mut Vec<WasmBlock<'a>>,
367 |     ) {
368 |         into.push(WasmBlock::BlockParams { from, to });
369 |     }
370 | 
371 |     fn finish_block(&mut self, out: Block) {
372 |         self.ctrl_stack.pop();
373 |         let body = self.result.pop().unwrap();
374 |         self.result
375 |             .last_mut()
376 |             .unwrap()
377 |             .push(WasmBlock::Block { body, out });
378 |     }
379 | 
380 |     fn else_(&mut self) {
381 |         self.result.push(vec![]);
382 |     }
383 | 
384 |     fn finish_if(&mut self, cond: Value) {
385 |         let else_body = self.result.pop().unwrap();
386 |         let if_body = self.result.pop().unwrap();
387 |         self.ctrl_stack.pop();
388 |         self.result.last_mut().unwrap().push(WasmBlock::If {
389 |             cond,
390 |             if_true: if_body,
391 |             if_false: else_body,
392 |         });
393 |     }
394 | 
395 |     fn node_within(&mut self, block: Block, merge_node_start: usize) {
396 |         let merge_nodes = self.merge_node_children.last().unwrap();
397 |         log::trace!("node_within: block {} merge_nodes {:?}", block, merge_nodes);
398 |         let merge_nodes = &merge_nodes[merge_node_start..];
399 |         let into = self.result.last_mut().unwrap();
400 | 
401 |         if let Some(&first) = merge_nodes.first() {
402 |             // Post-`first` body.
403 |             self.process_stack.push(StackEntry::DomSubtree(first));
404 |             // Block with `first` as its out-label (forward label).
405 |             self.ctrl_stack.push(CtrlEntry::Block { out: first });
406 |             self.result.push(vec![]);
407 |             self.process_stack.push(StackEntry::FinishBlock(first));
408 |             self.process_stack
409 |                 .push(StackEntry::NodeWithin(block, merge_node_start + 1));
410 |         } else {
411 |             // Leaf node: emit contents!
412 |             into.push(WasmBlock::Leaf { block });
413 |             match &self.body.blocks[block].terminator {
414 |                 &Terminator::Br { ref target } => {
415 |                     self.process_stack.push(StackEntry::DoBranch(block, target));
416 |                 }
417 |                 &Terminator::CondBr {
418 |                     cond,
419 |                     ref if_true,
420 |                     ref if_false,
421 |                 } => {
422 |                     self.ctrl_stack.push(CtrlEntry::IfThenElse);
423 |                     self.process_stack.push(StackEntry::FinishIf(cond));
424 |                     self.process_stack
425 |                         .push(StackEntry::DoBranch(block, if_false));
426 |                     self.process_stack.push(StackEntry::Else);
427 |                     self.process_stack
428 |                         .push(StackEntry::DoBranch(block, if_true));
429 |                     self.result.push(vec![]); // if-body
430 |                 }
431 |                 &Terminator::Select {
432 |                     value,
433 |                     ref targets,
434 |                     ref default,
435 |                 } => {
436 |                     self.do_branch_select(value, targets, default);
437 |                 }
438 |                 &Terminator::Return { ref values } => {
439 |                     into.push(WasmBlock::Return { values });
440 |                 }
441 |                 &Terminator::Unreachable | &Terminator::None => {
442 |                     into.push(WasmBlock::Unreachable);
443 |                 }
444 |             }
445 |         }
446 |     }
447 | }
448 | 


--------------------------------------------------------------------------------
/src/backend/treeify.rs:
--------------------------------------------------------------------------------
  1 | //! Treeification: placing some values "under" others if only used
  2 | //! once, to generate more AST-like Wasm code.
  3 | 
  4 | use crate::entity::EntityRef;
  5 | use crate::ir::{FunctionBody, Value, ValueDef};
  6 | use crate::Operator;
  7 | use fxhash::{FxHashMap as HashMap, FxHashSet as HashSet};
  8 | use std::convert::TryFrom;
  9 | 
 10 | /// One "argument slot" of an operator defining a value.
 11 | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 12 | pub struct ValueArg(Value, u16);
 13 | 
 14 | #[derive(Clone, Debug)]
 15 | pub struct Trees {
 16 |     /// Is a value placed "under" the given arg slot of the given
 17 |     /// other value?
 18 |     pub owner: HashMap<Value, ValueArg>,
 19 |     /// For a given value that is defined by an operator, which
 20 |     /// Values, if any, live at each slot?
 21 |     pub owned: HashMap<ValueArg, Value>,
 22 |     /// Values that are regenerated every time they are used.
 23 |     pub remat: HashSet<Value>,
 24 | }
 25 | 
 26 | fn is_remat(op: &Operator) -> bool {
 27 |     // Only ops with no args can be always-rematerialized.
 28 |     match op {
 29 |         Operator::I32Const { .. }
 30 |         | Operator::I64Const { .. }
 31 |         | Operator::F32Const { .. }
 32 |         | Operator::F64Const { .. } => true,
 33 |         _ => false,
 34 |     }
 35 | }
 36 | 
 37 | impl Trees {
 38 |     pub fn compute(body: &FunctionBody) -> Trees {
 39 |         let mut owner = HashMap::default();
 40 |         let mut owned = HashMap::default();
 41 |         let mut remat = HashSet::default();
 42 |         let mut multi_use = HashSet::default();
 43 | 
 44 |         for block_def in body.blocks.values() {
 45 |             let mut last_non_pure = None;
 46 |             for &value in &block_def.insts {
 47 |                 match &body.values[value] {
 48 |                     &ValueDef::Operator(op, args, _) => {
 49 |                         // Ignore operators with invalid args: these must
 50 |                         // always be unreachable.
 51 |                         if body.arg_pool[args].iter().any(|arg| arg.is_invalid()) {
 52 |                             continue;
 53 |                         }
 54 |                         // If this is an always-rematerialized operator,
 55 |                         // mark it as such and continue.
 56 |                         if is_remat(&op) {
 57 |                             remat.insert(value);
 58 |                             continue;
 59 |                         }
 60 | 
 61 |                         // For each of the args, if the value is produced
 62 |                         // by a single-output op and is movable, and is
 63 |                         // not already recorded in `multi_use`, place it
 64 |                         // in the arg slot. Otherwise if owned already
 65 |                         // somewhere else, undo that and put in
 66 |                         // `multi_use`.
 67 |                         for (i, &arg) in body.arg_pool[args].iter().enumerate() {
 68 |                             let arg = body.resolve_alias(arg);
 69 |                             if multi_use.contains(&arg) {
 70 |                                 continue;
 71 |                             } else if let Some(old_owner) = owner.remove(&arg) {
 72 |                                 owned.remove(&old_owner);
 73 |                                 multi_use.insert(arg);
 74 |                             } else if Self::is_movable(body, arg) || Some(arg) == last_non_pure {
 75 |                                 let pos = u16::try_from(i).unwrap();
 76 |                                 let value_arg = ValueArg(value, pos);
 77 |                                 owner.insert(arg, value_arg);
 78 |                                 owned.insert(value_arg, arg);
 79 |                             }
 80 |                         }
 81 | 
 82 |                         if !op.is_pure() {
 83 |                             last_non_pure = Some(value);
 84 |                         }
 85 |                     }
 86 |                     &ValueDef::PickOutput(..) => {
 87 |                         // Can ignore use: multi-arity values are never treeified.
 88 |                     }
 89 |                     &ValueDef::BlockParam(..)
 90 |                     | &ValueDef::Alias(..)
 91 |                     | &ValueDef::Placeholder(..)
 92 |                     | &ValueDef::None => {}
 93 |                 }
 94 |             }
 95 |         }
 96 |         for block in body.blocks.values() {
 97 |             block.terminator.visit_uses(|u| {
 98 |                 let u = body.resolve_alias(u);
 99 |                 if let Some(old_owner) = owner.remove(&u) {
100 |                     owned.remove(&old_owner);
101 |                 }
102 |             });
103 |         }
104 | 
105 |         Trees {
106 |             owner,
107 |             owned,
108 |             remat,
109 |         }
110 |     }
111 | 
112 |     fn is_single_output_op(body: &FunctionBody, value: Value) -> Option<Operator> {
113 |         match &body.values[value] {
114 |             &ValueDef::Operator(op, _, ref tys) if tys.len() == 1 => Some(op),
115 |             _ => None,
116 |         }
117 |     }
118 | 
119 |     fn is_movable(body: &FunctionBody, value: Value) -> bool {
120 |         Self::is_single_output_op(body, value)
121 |             .map(|op| op.is_pure())
122 |             .unwrap_or(false)
123 |     }
124 | }
125 | 


--------------------------------------------------------------------------------
/src/bin/waffle-util.rs:
--------------------------------------------------------------------------------
  1 | //! WAFFLE command-line tool.
  2 | 
  3 | use anyhow::Result;
  4 | use log::debug;
  5 | use std::path::PathBuf;
  6 | use structopt::StructOpt;
  7 | use waffle::{entity::EntityRef, FrontendOptions, Func, Module, OptOptions};
  8 | 
  9 | #[derive(Debug, StructOpt)]
 10 | #[structopt(name = "waffle-util", about = "WAFFLE utility.")]
 11 | struct Options {
 12 |     #[structopt(short, long)]
 13 |     debug: bool,
 14 | 
 15 |     #[structopt(
 16 |         help = "Do basic optimizations: GVN and const-prop",
 17 |         long = "basic-opts"
 18 |     )]
 19 |     basic_opts: bool,
 20 | 
 21 |     #[structopt(
 22 |         help = "Enable parsing of debug-info from input",
 23 |         short = "g",
 24 |         long = "debug-info"
 25 |     )]
 26 |     debug_info: bool,
 27 | 
 28 |     #[structopt(help = "Transform to maximal SSA", long = "max-ssa")]
 29 |     max_ssa: bool,
 30 | 
 31 |     #[structopt(subcommand)]
 32 |     command: Command,
 33 | }
 34 | 
 35 | #[derive(Debug, StructOpt)]
 36 | enum Command {
 37 |     #[structopt(name = "print-ir", about = "Parse Wasm and print resulting IR")]
 38 |     PrintIR {
 39 |         #[structopt(help = "Wasm file to parse")]
 40 |         wasm: PathBuf,
 41 |     },
 42 |     #[structopt(name = "print-func", about = "Parse Wasm and print one function body")]
 43 |     PrintFunc {
 44 |         #[structopt(help = "Wasm file to parse")]
 45 |         wasm: PathBuf,
 46 |         #[structopt(help = "Index of Wasm function to print")]
 47 |         func: usize,
 48 |     },
 49 |     #[structopt(name = "roundtrip", about = "Round-trip Wasm through IR")]
 50 |     RoundTrip {
 51 |         #[structopt(help = "Wasm file to parse", short = "i")]
 52 |         input: PathBuf,
 53 |         #[structopt(help = "Wasm file to produce", short = "o")]
 54 |         output: PathBuf,
 55 |     },
 56 | }
 57 | 
 58 | fn apply_options(opts: &Options, module: &mut Module) -> Result<()> {
 59 |     module.expand_all_funcs()?;
 60 |     if opts.basic_opts {
 61 |         module.per_func_body(|body| body.optimize(&OptOptions::default()));
 62 |     }
 63 |     if opts.max_ssa {
 64 |         module.per_func_body(|body| body.convert_to_max_ssa(None));
 65 |     }
 66 |     Ok(())
 67 | }
 68 | 
 69 | fn main() -> Result<()> {
 70 |     let opts = Options::from_args();
 71 | 
 72 |     let mut logger = env_logger::Builder::from_default_env();
 73 |     if opts.debug {
 74 |         logger.filter_level(log::LevelFilter::Debug);
 75 |     }
 76 |     let _ = logger.try_init();
 77 | 
 78 |     let mut options = FrontendOptions::default();
 79 |     options.debug = opts.debug_info;
 80 | 
 81 |     match &opts.command {
 82 |         Command::PrintIR { wasm } => {
 83 |             let bytes = std::fs::read(wasm)?;
 84 |             debug!("Loaded {} bytes of Wasm data", bytes.len());
 85 |             let mut module = Module::from_wasm_bytes(&bytes[..], &options)?;
 86 |             apply_options(&opts, &mut module)?;
 87 |             println!("{}", module.display());
 88 |         }
 89 |         Command::PrintFunc { wasm, func } => {
 90 |             let bytes = std::fs::read(wasm)?;
 91 |             debug!("Loaded {} bytes of Wasm data", bytes.len());
 92 |             let mut module = Module::from_wasm_bytes(&bytes[..], &options)?;
 93 |             apply_options(&opts, &mut module)?;
 94 |             println!(
 95 |                 "{}",
 96 |                 module.funcs[Func::new(*func)]
 97 |                     .body()
 98 |                     .unwrap()
 99 |                     .display_verbose("", Some(&module))
100 |             );
101 |         }
102 |         Command::RoundTrip { input, output } => {
103 |             let bytes = std::fs::read(input)?;
104 |             debug!("Loaded {} bytes of Wasm data", bytes.len());
105 |             let mut module = Module::from_wasm_bytes(&bytes[..], &options)?;
106 |             apply_options(&opts, &mut module)?;
107 |             let produced = module.to_wasm_bytes()?;
108 |             std::fs::write(output, &produced[..])?;
109 |         }
110 |     }
111 | 
112 |     Ok(())
113 | }
114 | 


--------------------------------------------------------------------------------
/src/cfg/domtree.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Derives from the dominator tree implementation in regalloc.rs, which is
  3 |  * licensed under the Apache Public License 2.0 with LLVM Exception. See:
  4 |  * https://github.com/bytecodealliance/regalloc.rs
  5 |  */
  6 | 
  7 | // This is an implementation of the algorithm described in
  8 | //
  9 | //   A Simple, Fast Dominance Algorithm
 10 | //   Keith D. Cooper, Timothy J. Harvey, and Ken Kennedy
 11 | //   Department of Computer Science, Rice University, Houston, Texas, USA
 12 | //   TR-06-33870
 13 | //   https://www.cs.rice.edu/~keith/EMBED/dom.pdf
 14 | 
 15 | use crate::entity::{EntityRef, PerEntity};
 16 | use crate::ir::Block;
 17 | 
 18 | // Helper
 19 | fn merge_sets(
 20 |     idom: &PerEntity<Block, Block>, // map from Block to Block
 21 |     block_to_rpo: &PerEntity<Block, Option<u32>>,
 22 |     mut node1: Block,
 23 |     mut node2: Block,
 24 | ) -> Block {
 25 |     while node1 != node2 {
 26 |         if node1.is_invalid() || node2.is_invalid() {
 27 |             return Block::invalid();
 28 |         }
 29 |         let rpo1 = block_to_rpo[node1].unwrap();
 30 |         let rpo2 = block_to_rpo[node2].unwrap();
 31 |         if rpo1 > rpo2 {
 32 |             node1 = idom[node1];
 33 |         } else if rpo2 > rpo1 {
 34 |             node2 = idom[node2];
 35 |         }
 36 |     }
 37 |     assert!(node1 == node2);
 38 |     node1
 39 | }
 40 | 
 41 | pub fn calculate<'a, PredFn: Fn(Block) -> &'a [Block]>(
 42 |     preds: PredFn,
 43 |     post_ord: &[Block],
 44 |     start: Block,
 45 | ) -> PerEntity<Block, Block> {
 46 |     // We have post_ord, which is the postorder sequence.
 47 | 
 48 |     // Compute maps from RPO to block number and vice-versa.
 49 |     let mut block_to_rpo: PerEntity<Block, Option<u32>> = PerEntity::default();
 50 |     for (i, rpo_block) in post_ord.iter().rev().enumerate() {
 51 |         block_to_rpo[*rpo_block] = Some(i as u32);
 52 |     }
 53 | 
 54 |     let mut idom: PerEntity<Block, Block> = PerEntity::default();
 55 | 
 56 |     // The start node must have itself as a parent.
 57 |     idom[start] = start;
 58 | 
 59 |     let mut changed = true;
 60 |     while changed {
 61 |         changed = false;
 62 |         // Consider blocks in reverse postorder. Skip any that are unreachable.
 63 |         for &node in post_ord.iter().rev() {
 64 |             let rponum = block_to_rpo[node].unwrap();
 65 | 
 66 |             let mut parent = Block::invalid();
 67 |             for &pred in preds(node).iter() {
 68 |                 let pred_rpo = match block_to_rpo[pred] {
 69 |                     Some(r) => r,
 70 |                     None => {
 71 |                         // Skip unreachable preds.
 72 |                         continue;
 73 |                     }
 74 |                 };
 75 |                 if pred_rpo < rponum {
 76 |                     parent = pred;
 77 |                     break;
 78 |                 }
 79 |             }
 80 | 
 81 |             if parent != Block::invalid() {
 82 |                 for &pred in preds(node).iter() {
 83 |                     if pred == parent {
 84 |                         continue;
 85 |                     }
 86 |                     if idom[pred] == Block::invalid() {
 87 |                         continue;
 88 |                     }
 89 |                     parent = merge_sets(&idom, &block_to_rpo, parent, pred);
 90 |                 }
 91 |             }
 92 | 
 93 |             if parent != Block::invalid() && parent != idom[node] {
 94 |                 idom[node] = parent;
 95 |                 changed = true;
 96 |             }
 97 |         }
 98 |     }
 99 | 
100 |     // Now set the start node's dominator-tree parent to "invalid";
101 |     // this allows the loop in `dominates` to terminate.
102 |     idom[start] = Block::invalid();
103 | 
104 |     idom
105 | }
106 | 
107 | pub fn dominates(idom: &PerEntity<Block, Block>, a: Block, mut b: Block) -> bool {
108 |     loop {
109 |         if a == b {
110 |             return true;
111 |         }
112 |         if b.is_invalid() {
113 |             return false;
114 |         }
115 |         b = idom[b];
116 |     }
117 | }
118 | 


--------------------------------------------------------------------------------
/src/cfg/mod.rs:
--------------------------------------------------------------------------------
  1 | //! Lightweight CFG analyses.
  2 | 
  3 | // Borrowed from regalloc2's cfg.rs, which is also Apache-2.0 with
  4 | // LLVM exception.
  5 | 
  6 | use crate::declare_entity;
  7 | use crate::entity::{EntityRef, EntityVec, PerEntity};
  8 | use crate::ir::{Block, FunctionBody, Terminator, Value, ValueDef};
  9 | use smallvec::SmallVec;
 10 | 
 11 | pub mod domtree;
 12 | pub mod postorder;
 13 | 
 14 | declare_entity!(RPOIndex, "rpo");
 15 | 
 16 | /// Auxiliary analyses of the control-flow graph.
 17 | #[derive(Clone, Debug)]
 18 | pub struct CFGInfo {
 19 |     /// Entry block.
 20 |     pub entry: Block,
 21 |     /// Blocks that end in return.
 22 |     pub return_blocks: Vec<Block>,
 23 |     /// Reverse-postorder traversal of blocks.
 24 |     pub rpo: EntityVec<RPOIndex, Block>,
 25 |     /// Position of each block in RPO, if reachable.
 26 |     pub rpo_pos: PerEntity<Block, Option<RPOIndex>>,
 27 |     /// Domtree parents, indexed by block.
 28 |     pub domtree: PerEntity<Block, Block>,
 29 |     /// Domtree children.
 30 |     pub domtree_children: PerEntity<Block, DomtreeChildren>,
 31 |     /// Defining block for a given value.
 32 |     pub def_block: PerEntity<Value, Block>,
 33 |     /// Preds for a given block.
 34 |     pub preds: PerEntity<Block, SmallVec<[Block; 4]>>,
 35 |     /// A given block's position in each predecessor's successor list.
 36 |     pub pred_pos: PerEntity<Block, SmallVec<[usize; 4]>>,
 37 | }
 38 | 
 39 | #[derive(Clone, Debug, Default)]
 40 | pub struct DomtreeChildren {
 41 |     pub child: Block,
 42 |     pub next: Block,
 43 | }
 44 | 
 45 | pub struct DomtreeChildIter<'a> {
 46 |     domtree_children: &'a PerEntity<Block, DomtreeChildren>,
 47 |     block: Block,
 48 | }
 49 | 
 50 | impl<'a> Iterator for DomtreeChildIter<'a> {
 51 |     type Item = Block;
 52 |     fn next(&mut self) -> Option<Block> {
 53 |         if self.block.is_invalid() {
 54 |             None
 55 |         } else {
 56 |             let block = self.block;
 57 |             self.block = self.domtree_children[block].next;
 58 |             Some(block)
 59 |         }
 60 |     }
 61 | }
 62 | 
 63 | impl CFGInfo {
 64 |     pub fn new(f: &FunctionBody) -> CFGInfo {
 65 |         let mut return_blocks = vec![];
 66 |         let mut preds: PerEntity<Block, SmallVec<[Block; 4]>> = PerEntity::default();
 67 |         let mut pred_pos: PerEntity<Block, SmallVec<[usize; 4]>> = PerEntity::default();
 68 |         for (block_id, block) in f.blocks.entries() {
 69 |             if let Terminator::Return { .. } = &block.terminator {
 70 |                 return_blocks.push(block_id);
 71 |             }
 72 |             let mut target_idx = 0;
 73 |             block.terminator.visit_targets(|target| {
 74 |                 preds[target.block].push(block_id);
 75 |                 pred_pos[target.block].push(target_idx);
 76 |                 target_idx += 1;
 77 |             });
 78 |         }
 79 | 
 80 |         let postorder = postorder::calculate(f.entry, |block| &f.blocks[block].succs[..]);
 81 | 
 82 |         let domtree =
 83 |             domtree::calculate(|block| &f.blocks[block].preds[..], &postorder[..], f.entry);
 84 | 
 85 |         let mut domtree_children: PerEntity<Block, DomtreeChildren> = PerEntity::default();
 86 |         for block in f.blocks.iter().rev() {
 87 |             let idom = domtree[block];
 88 |             if idom.is_valid() {
 89 |                 let next = domtree_children[idom].child;
 90 |                 domtree_children[block].next = next;
 91 |                 domtree_children[idom].child = block;
 92 |             }
 93 |         }
 94 | 
 95 |         let mut def_block: PerEntity<Value, Block> = PerEntity::default();
 96 |         for (block, block_def) in f.blocks.entries() {
 97 |             for &(_, param) in &block_def.params {
 98 |                 def_block[param] = block;
 99 |             }
100 |             for &value in &block_def.insts {
101 |                 def_block[value] = block;
102 |             }
103 |         }
104 |         for value in f.values.iter() {
105 |             let orig_value = f.resolve_alias(value);
106 |             let underlying_value = match &f.values[orig_value] {
107 |                 &ValueDef::PickOutput(value, ..) => value,
108 |                 _ => orig_value,
109 |             };
110 |             def_block[value] = def_block[underlying_value];
111 |         }
112 | 
113 |         let mut rpo = postorder;
114 |         rpo.reverse();
115 |         let rpo = EntityVec::from(rpo);
116 |         let mut rpo_pos = PerEntity::default();
117 |         for (rpo, &block) in rpo.entries() {
118 |             rpo_pos[block] = Some(rpo);
119 |         }
120 | 
121 |         CFGInfo {
122 |             entry: f.entry,
123 |             return_blocks,
124 |             rpo,
125 |             rpo_pos,
126 |             domtree,
127 |             domtree_children,
128 |             def_block,
129 |             preds,
130 |             pred_pos,
131 |         }
132 |     }
133 | 
134 |     pub fn dominates(&self, a: Block, b: Block) -> bool {
135 |         domtree::dominates(&self.domtree, a, b)
136 |     }
137 | 
138 |     pub fn dom_children<'a>(&'a self, block: Block) -> DomtreeChildIter<'a> {
139 |         DomtreeChildIter {
140 |             domtree_children: &self.domtree_children,
141 |             block: self.domtree_children[block].child,
142 |         }
143 |     }
144 | }
145 | 


--------------------------------------------------------------------------------
/src/cfg/postorder.rs:
--------------------------------------------------------------------------------
 1 | //! Fast postorder computation.
 2 | 
 3 | // Borrowed from regalloc2's postorder.rs, which is also Apache-2.0
 4 | // with LLVM-exception.
 5 | 
 6 | use crate::entity::PerEntity;
 7 | use crate::ir::Block;
 8 | use smallvec::{smallvec, SmallVec};
 9 | 
10 | pub fn calculate<'a, SuccFn: Fn(Block) -> &'a [Block]>(
11 |     entry: Block,
12 |     succ_blocks: SuccFn,
13 | ) -> Vec<Block> {
14 |     let mut ret = vec![];
15 | 
16 |     // State: visited-block map, and explicit DFS stack.
17 |     let mut visited: PerEntity<Block, bool> = PerEntity::default();
18 | 
19 |     #[derive(Debug)]
20 |     struct State<'a> {
21 |         block: Block,
22 |         succs: &'a [Block],
23 |         next_succ: usize,
24 |     }
25 |     let mut stack: SmallVec<[State; 64]> = smallvec![];
26 | 
27 |     visited[entry] = true;
28 |     stack.push(State {
29 |         block: entry,
30 |         succs: succ_blocks(entry),
31 |         next_succ: 0,
32 |     });
33 | 
34 |     while let Some(ref mut state) = stack.last_mut() {
35 |         log::trace!("postorder: TOS is {:?}", state);
36 |         // Perform one action: push to new succ, skip an already-visited succ, or pop.
37 |         if state.next_succ < state.succs.len() {
38 |             let succ = state.succs[state.next_succ];
39 |             log::trace!(" -> succ {}", succ);
40 |             state.next_succ += 1;
41 |             if !visited[succ] {
42 |                 log::trace!(" -> visiting");
43 |                 visited[succ] = true;
44 |                 stack.push(State {
45 |                     block: succ,
46 |                     succs: succ_blocks(succ),
47 |                     next_succ: 0,
48 |                 });
49 |             }
50 |         } else {
51 |             log::trace!("retreating from {}", state.block);
52 |             ret.push(state.block);
53 |             stack.pop();
54 |         }
55 |     }
56 | 
57 |     ret
58 | }
59 | 


--------------------------------------------------------------------------------
/src/entity.rs:
--------------------------------------------------------------------------------
  1 | //! Type-safe indices and indexed containers.
  2 | 
  3 | use std::default::Default;
  4 | use std::fmt::Debug;
  5 | use std::hash::Hash;
  6 | use std::marker::PhantomData;
  7 | use std::ops::{Index, IndexMut};
  8 | 
  9 | /// An index into an index-space of entities.
 10 | pub trait EntityRef: Clone + Copy + PartialEq + Eq + PartialOrd + Ord + Hash {
 11 |     /// Create a new type-safe index value from a known index.
 12 |     fn new(value: usize) -> Self;
 13 |     /// Get the index value of this type-safe index. Must be a valid
 14 |     /// index (will panic otherwise).
 15 |     fn index(self) -> usize;
 16 |     /// A sentinel invalid value.
 17 |     fn invalid() -> Self;
 18 |     /// Is this index a valid index (not equal to `Self::invalid()`)?
 19 |     fn is_valid(self) -> bool {
 20 |         self != Self::invalid()
 21 |     }
 22 |     /// Is this index an invalid index (equal to `Self::invalid()`)?
 23 |     fn is_invalid(self) -> bool {
 24 |         self == Self::invalid()
 25 |     }
 26 |     /// Turn a valid index into `Some(index)`, and an invalid index
 27 |     /// into `None`.
 28 |     fn maybe_index(self) -> Option<usize> {
 29 |         if self.is_valid() {
 30 |             Some(self.index())
 31 |         } else {
 32 |             None
 33 |         }
 34 |     }
 35 | }
 36 | 
 37 | #[macro_export]
 38 | macro_rules! declare_entity {
 39 |     ($name:tt, $prefix:tt) => {
 40 |         #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
 41 |         pub struct $name(u32);
 42 | 
 43 |         impl $crate::entity::EntityRef for $name {
 44 |             fn new(value: usize) -> Self {
 45 |                 use std::convert::TryFrom;
 46 |                 let value = u32::try_from(value).unwrap();
 47 |                 debug_assert!(value != u32::MAX);
 48 |                 Self(value)
 49 |             }
 50 |             fn index(self) -> usize {
 51 |                 debug_assert!(self.is_valid());
 52 |                 self.0 as usize
 53 |             }
 54 |             fn invalid() -> Self {
 55 |                 Self(u32::MAX)
 56 |             }
 57 |         }
 58 | 
 59 |         impl std::convert::From<u32> for $name {
 60 |             fn from(val: u32) -> Self {
 61 |                 <Self as $crate::entity::EntityRef>::new(val as usize)
 62 |             }
 63 |         }
 64 | 
 65 |         impl std::default::Default for $name {
 66 |             fn default() -> Self {
 67 |                 <Self as $crate::entity::EntityRef>::invalid()
 68 |             }
 69 |         }
 70 | 
 71 |         impl std::fmt::Debug for $name {
 72 |             fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
 73 |                 write!(f, "{}{}", $prefix, self.0)
 74 |             }
 75 |         }
 76 |         impl std::fmt::Display for $name {
 77 |             fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
 78 |                 write!(f, "{}{}", $prefix, self.0)
 79 |             }
 80 |         }
 81 |     };
 82 | }
 83 | 
 84 | /// A vector that *defines* an entity index space, holding the data
 85 | /// for each entity.
 86 | #[derive(Clone, Debug)]
 87 | pub struct EntityVec<Idx: EntityRef, T: Clone + Debug>(Vec<T>, PhantomData<Idx>);
 88 | 
 89 | impl<Idx: EntityRef, T: Clone + Debug> std::default::Default for EntityVec<Idx, T> {
 90 |     fn default() -> Self {
 91 |         Self(vec![], PhantomData)
 92 |     }
 93 | }
 94 | 
 95 | impl<Idx: EntityRef, T: Clone + Debug> From<Vec<T>> for EntityVec<Idx, T> {
 96 |     fn from(vec: Vec<T>) -> Self {
 97 |         Self(vec, PhantomData)
 98 |     }
 99 | }
100 | 
101 | impl<Idx: EntityRef, T: Clone + Debug> EntityVec<Idx, T> {
102 |     /// Add a new entity, returning its assigned index.
103 |     pub fn push(&mut self, t: T) -> Idx {
104 |         let idx = Idx::new(self.0.len());
105 |         self.0.push(t);
106 |         idx
107 |     }
108 | 
109 |     /// Get the number of entities in this entity space.
110 |     pub fn len(&self) -> usize {
111 |         self.0.len()
112 |     }
113 | 
114 |     /// Get an iterator over the index-space.
115 |     pub fn iter(&self) -> impl DoubleEndedIterator<Item = Idx> {
116 |         (0..self.0.len()).map(|index| Idx::new(index))
117 |     }
118 | 
119 |     /// Get an iterator over (borrows of) entity values.
120 |     pub fn values(&self) -> impl DoubleEndedIterator<Item = &T> {
121 |         self.0.iter()
122 |     }
123 | 
124 |     /// Get an iterator over (mutable borrows of) entity values.
125 |     pub fn values_mut(&mut self) -> impl DoubleEndedIterator<Item = &mut T> {
126 |         self.0.iter_mut()
127 |     }
128 | 
129 |     /// Get an iterator over index, borrow-of-entity tuples.
130 |     pub fn entries(&self) -> impl DoubleEndedIterator<Item = (Idx, &T)> {
131 |         self.0
132 |             .iter()
133 |             .enumerate()
134 |             .map(|(index, t)| (Idx::new(index), t))
135 |     }
136 | 
137 |     /// Get an iterator over index, mutable-borrow-of-entity tuples.
138 |     pub fn entries_mut(&mut self) -> impl Iterator<Item = (Idx, &mut T)> {
139 |         self.0
140 |             .iter_mut()
141 |             .enumerate()
142 |             .map(|(index, t)| (Idx::new(index), t))
143 |     }
144 | 
145 |     /// Typesafe element access, returning `None` if `idx` is the
146 |     /// invalid index.
147 |     pub fn get(&self, idx: Idx) -> Option<&T> {
148 |         self.0.get(idx.maybe_index()?)
149 |     }
150 | 
151 |     /// Typesafe mutable element access, returning `None` if `idx` is
152 |     /// the invalid index.
153 |     pub fn get_mut(&mut self, idx: Idx) -> Option<&mut T> {
154 |         self.0.get_mut(idx.maybe_index()?)
155 |     }
156 | 
157 |     /// Convert this `EntityVec` into the underlying `Vec` and return
158 |     /// it.
159 |     pub fn into_vec(self) -> Vec<T> {
160 |         self.0
161 |     }
162 | }
163 | 
164 | impl<Idx: EntityRef, T: Clone + Debug> Index<Idx> for EntityVec<Idx, T> {
165 |     type Output = T;
166 |     fn index(&self, idx: Idx) -> &T {
167 |         &self.0[idx.index()]
168 |     }
169 | }
170 | 
171 | impl<Idx: EntityRef, T: Clone + Debug> IndexMut<Idx> for EntityVec<Idx, T> {
172 |     fn index_mut(&mut self, idx: Idx) -> &mut T {
173 |         &mut self.0[idx.index()]
174 |     }
175 | }
176 | 
177 | /// Vector of state per entity in an index-space that does *not*
178 | /// define the index-space. In other words, this container will not
179 | /// pass out new indices, it will only allow associating state with
180 | /// existing indices; and it requires a default value for data at an
181 | /// index not yet assigned.
182 | #[derive(Clone, Debug, Default)]
183 | pub struct PerEntity<Idx: EntityRef, T: Clone + Debug + Default>(Vec<T>, PhantomData<Idx>, T);
184 | 
185 | impl<Idx: EntityRef, T: Clone + Debug + Default> Index<Idx> for PerEntity<Idx, T> {
186 |     type Output = T;
187 |     fn index(&self, idx: Idx) -> &T {
188 |         debug_assert!(idx.is_valid());
189 |         self.0.get(idx.index()).unwrap_or(&self.2)
190 |     }
191 | }
192 | 
193 | impl<Idx: EntityRef, T: Clone + Debug + Default> IndexMut<Idx> for PerEntity<Idx, T> {
194 |     fn index_mut(&mut self, idx: Idx) -> &mut T {
195 |         debug_assert!(idx.is_valid());
196 |         if idx.index() >= self.0.len() {
197 |             self.0.resize(idx.index() + 1, T::default());
198 |         }
199 |         &mut self.0[idx.index()]
200 |     }
201 | }
202 | 
203 | impl<Idx: EntityRef, T: Clone + Debug + Default + PartialEq> PartialEq for PerEntity<Idx, T> {
204 |     fn eq(&self, other: &Self) -> bool {
205 |         self.0 == other.0
206 |     }
207 | }
208 | impl<Idx: EntityRef, T: Clone + Debug + Default + PartialEq + Eq> Eq for PerEntity<Idx, T> {}
209 | 


--------------------------------------------------------------------------------
/src/errors.rs:
--------------------------------------------------------------------------------
 1 | //! Error types.
 2 | 
 3 | /// An error that occurs when translating Wasm to IR.
 4 | #[derive(Clone, Debug)]
 5 | pub enum FrontendError {
 6 |     /// The given WebAssembly feature is not supported.
 7 |     UnsupportedFeature(String),
 8 |     /// Some dimension of the WebAssembly module is too large to be
 9 |     /// supported by this library.
10 |     TooLarge(String),
11 |     /// An internal error occurred.
12 |     Internal(String),
13 | }
14 | 
15 | impl std::fmt::Display for FrontendError {
16 |     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
17 |         std::fmt::Debug::fmt(self, f)
18 |     }
19 | }
20 | 
21 | impl std::error::Error for FrontendError {}
22 | 


--------------------------------------------------------------------------------
/src/fuzzing.rs:
--------------------------------------------------------------------------------
 1 | //! Fuzzing-specific utilities.
 2 | 
 3 | use libfuzzer_sys::arbitrary;
 4 | 
 5 | /// Should this module be rejected early during fuzzing due to an
 6 | /// unsupported feature?
 7 | ///
 8 | /// Public/exported only for access by fuzzers.
 9 | pub fn reject(bytes: &[u8]) -> bool {
10 |     let parser = wasmparser::Parser::new(0);
11 |     let mut has_start = false;
12 |     let mut has_global_set = false;
13 |     let mut num_globals = 0;
14 |     for payload in parser.parse_all(bytes) {
15 |         match payload.unwrap() {
16 |             wasmparser::Payload::CodeSectionEntry(body) => {
17 |                 for op in body.get_operators_reader().unwrap() {
18 |                     let op = op.unwrap();
19 |                     match op {
20 |                         wasmparser::Operator::GlobalSet { .. } => {
21 |                             has_global_set = true;
22 |                         }
23 |                         _ => {}
24 |                     }
25 |                 }
26 |             }
27 |             wasmparser::Payload::StartSection { .. } => {
28 |                 has_start = true;
29 |             }
30 |             wasmparser::Payload::ExportSection(reader) => {
31 |                 for export in reader {
32 |                     let export = export.unwrap();
33 |                     match &export.kind {
34 |                         &wasmparser::ExternalKind::Global => {
35 |                             num_globals += 1;
36 |                         }
37 |                         _ => {}
38 |                     }
39 |                 }
40 |             }
41 |             wasmparser::Payload::MemorySection(reader) => {
42 |                 for mem in reader {
43 |                     let mem = mem.unwrap();
44 |                     if mem.maximum.is_none() || mem.maximum.unwrap() > 100 {
45 |                         return true;
46 |                     }
47 |                 }
48 |             }
49 |             _ => {}
50 |         }
51 |     }
52 | 
53 |     if !has_start || !has_global_set || num_globals < 1 {
54 |         return true;
55 |     }
56 | 
57 |     false
58 | }
59 | 
60 | /// Get the configuration that we expect fuzzing targets to use to
61 | /// generate modules with `wasm_smith`.
62 | fn fuzzing_config() -> wasm_smith::Config {
63 |     wasm_smith::Config {
64 |         min_funcs: 1,
65 |         max_funcs: 1,
66 |         min_memories: 1,
67 |         max_memories: 1,
68 |         min_globals: 10,
69 |         max_globals: 10,
70 |         min_tables: 0,
71 |         max_tables: 0,
72 |         min_imports: 0,
73 |         max_imports: 0,
74 |         min_exports: 12,
75 |         max_exports: 12,
76 |         allow_start_export: true,
77 |         canonicalize_nans: true,
78 |         max_memory32_pages: 1,
79 |         ..Default::default()
80 |     }
81 | }
82 | 
83 | /// A wrapper around `Module` that uses `arbitrary` to generate new
84 | /// modules.
85 | #[derive(Debug)]
86 | pub struct ArbitraryModule(pub wasm_smith::Module);
87 | 
88 | impl<'a> arbitrary::Arbitrary<'a> for ArbitraryModule {
89 |     fn arbitrary(u: &mut arbitrary::Unstructured<'_>) -> arbitrary::Result<Self> {
90 |         Ok(Self(wasm_smith::Module::new(fuzzing_config(), u)?))
91 |     }
92 | }
93 | 


--------------------------------------------------------------------------------
/src/ir.rs:
--------------------------------------------------------------------------------
  1 | //! Intermediate representation for Wasm.
  2 | 
  3 | use crate::declare_entity;
  4 | 
  5 | /// Types in waffle's IR.
  6 | ///
  7 | /// These types correspond to (a subset of) the primitive Wasm value
  8 | /// types: integers, floats, SIMD vectors, and function references
  9 | /// (optionally typed).
 10 | ///
 11 | /// Every SSA value in a function body has a `Type`, unless it is a
 12 | /// tuple (multi-value or zero-value result).
 13 | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 14 | pub enum Type {
 15 |     /// A 32-bit integer. Signedness is unspecified: individual
 16 |     /// operators specify how they handle sign.
 17 |     I32,
 18 |     /// A 64-bit integer. Signedness is unspecified: individual
 19 |     /// operators specify how they handle sign.
 20 |     I64,
 21 |     /// A 32-bit IEEE 754 floating point value. Semantics around NaNs
 22 |     /// are defined by individual operators; from the point of view of
 23 |     /// IR scaffolding, floating-point values are bags of bits.
 24 |     F32,
 25 |     /// A 64-bit IEEE 754 floating point value. Semantics around NaNs
 26 |     /// are defined by individual operators; from the point of view of
 27 |     /// IR scaffolding, floating-point values are bags of bits.
 28 |     F64,
 29 |     /// A 128-bit SIMD vector value. Lanes and lane types are
 30 |     /// specified by individual operators; from the point of view of
 31 |     /// IR scaffolding, SIMD vector values are bags of bits.
 32 |     V128,
 33 |     /// A function reference.
 34 |     FuncRef,
 35 |     /// A typed function reference, optionally nullable, and with type
 36 |     /// specified by a signature index in the module's signature
 37 |     /// index-space.
 38 |     TypedFuncRef(bool, u32),
 39 | }
 40 | impl From<wasmparser::ValType> for Type {
 41 |     fn from(ty: wasmparser::ValType) -> Self {
 42 |         match ty {
 43 |             wasmparser::ValType::I32 => Type::I32,
 44 |             wasmparser::ValType::I64 => Type::I64,
 45 |             wasmparser::ValType::F32 => Type::F32,
 46 |             wasmparser::ValType::F64 => Type::F64,
 47 |             wasmparser::ValType::V128 => Type::V128,
 48 |             wasmparser::ValType::Ref(r) => r.into(),
 49 |         }
 50 |     }
 51 | }
 52 | impl From<wasmparser::RefType> for Type {
 53 |     fn from(ty: wasmparser::RefType) -> Self {
 54 |         match ty.type_index() {
 55 |             Some(idx) => {
 56 |                 let nullable = ty.is_nullable();
 57 |                 Type::TypedFuncRef(nullable, idx.as_module_index().unwrap())
 58 |             }
 59 |             None => Type::FuncRef,
 60 |         }
 61 |     }
 62 | }
 63 | 
 64 | impl std::fmt::Display for Type {
 65 |     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
 66 |         match self {
 67 |             Type::I32 => write!(f, "i32"),
 68 |             Type::I64 => write!(f, "i64"),
 69 |             Type::F32 => write!(f, "f32"),
 70 |             Type::F64 => write!(f, "f64"),
 71 |             Type::V128 => write!(f, "v128"),
 72 |             Type::FuncRef => write!(f, "funcref"),
 73 |             Type::TypedFuncRef(nullable, idx) => write!(
 74 |                 f,
 75 |                 "funcref({}, {})",
 76 |                 if *nullable { "null" } else { "not_null" },
 77 |                 idx
 78 |             ),
 79 |         }
 80 |     }
 81 | }
 82 | 
 83 | impl From<Type> for wasm_encoder::ValType {
 84 |     fn from(ty: Type) -> wasm_encoder::ValType {
 85 |         match ty {
 86 |             Type::I32 => wasm_encoder::ValType::I32,
 87 |             Type::I64 => wasm_encoder::ValType::I64,
 88 |             Type::F32 => wasm_encoder::ValType::F32,
 89 |             Type::F64 => wasm_encoder::ValType::F64,
 90 |             Type::V128 => wasm_encoder::ValType::V128,
 91 |             Type::FuncRef | Type::TypedFuncRef(..) => wasm_encoder::ValType::Ref(ty.into()),
 92 |         }
 93 |     }
 94 | }
 95 | 
 96 | impl From<Type> for wasm_encoder::RefType {
 97 |     fn from(ty: Type) -> wasm_encoder::RefType {
 98 |         match ty {
 99 |             Type::FuncRef => wasm_encoder::RefType::FUNCREF,
100 |             Type::TypedFuncRef(nullable, idx) => wasm_encoder::RefType {
101 |                 nullable,
102 |                 heap_type: wasm_encoder::HeapType::Concrete(idx),
103 |             },
104 |             _ => panic!("Cannot convert {:?} into reftype", ty),
105 |         }
106 |     }
107 | }
108 | 
109 | // Per-module index spaces:
110 | 
111 | // A signature (list of parameter types and list of return types) in
112 | // the module.
113 | declare_entity!(Signature, "sig");
114 | // A function in the module.
115 | declare_entity!(Func, "func");
116 | // A global variable in the module.
117 | declare_entity!(Global, "global");
118 | // A table in the module.
119 | declare_entity!(Table, "table");
120 | // A memory in the module.
121 | declare_entity!(Memory, "memory");
122 | 
123 | // Per-function index spaces:
124 | 
125 | // A basic block in one function body.
126 | declare_entity!(Block, "block");
127 | // A local variable (storage slot) in one function body.
128 | declare_entity!(Local, "local");
129 | // An SSA value in one function body.
130 | declare_entity!(Value, "v");
131 | 
132 | mod module;
133 | pub use module::*;
134 | mod func;
135 | pub use func::*;
136 | mod value;
137 | pub use value::*;
138 | mod display;
139 | pub use display::*;
140 | mod debug;
141 | pub use debug::*;
142 | 


--------------------------------------------------------------------------------
/src/ir/debug.rs:
--------------------------------------------------------------------------------
  1 | //! Debug info (currently, source-location maps).
  2 | 
  3 | use crate::declare_entity;
  4 | use crate::entity::EntityVec;
  5 | use addr2line::gimli;
  6 | use std::collections::hash_map::Entry as HashEntry;
  7 | use std::collections::HashMap;
  8 | 
  9 | declare_entity!(SourceFile, "file");
 10 | declare_entity!(SourceLoc, "loc");
 11 | 
 12 | #[derive(Clone, Debug, Default)]
 13 | pub struct Debug {
 14 |     /// Interned source-file names, indexed by a `SourceFile` entity
 15 |     /// index.
 16 |     pub source_files: EntityVec<SourceFile, String>,
 17 |     source_file_dedup: HashMap<String, SourceFile>,
 18 |     /// Interned source locations (file, line, and column),, indexed
 19 |     /// by a `SourceLoc` entity index.
 20 |     pub source_locs: EntityVec<SourceLoc, SourceLocData>,
 21 |     source_loc_dedup: HashMap<SourceLocData, SourceLoc>,
 22 | }
 23 | 
 24 | /// A "source location": a filename (interned to an ID), a line, and a
 25 | /// column.
 26 | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 27 | pub struct SourceLocData {
 28 |     pub file: SourceFile,
 29 |     pub line: u32,
 30 |     pub col: u32,
 31 | }
 32 | 
 33 | impl Debug {
 34 |     /// Intern a filename to an ID.
 35 |     pub fn intern_file(&mut self, path: &str) -> SourceFile {
 36 |         if let Some(id) = self.source_file_dedup.get(path) {
 37 |             return *id;
 38 |         }
 39 |         let id = self.source_files.push(path.to_owned());
 40 |         self.source_file_dedup.insert(path.to_owned(), id);
 41 |         id
 42 |     }
 43 | 
 44 |     /// Intern a location (line and column) in an already-interned
 45 |     /// filename.
 46 |     pub fn intern_loc(&mut self, file: SourceFile, line: u32, col: u32) -> SourceLoc {
 47 |         let data = SourceLocData { file, line, col };
 48 |         match self.source_loc_dedup.entry(data) {
 49 |             HashEntry::Vacant(v) => {
 50 |                 let id = self.source_locs.push(data);
 51 |                 *v.insert(id)
 52 |             }
 53 |             HashEntry::Occupied(o) => *o.get(),
 54 |         }
 55 |     }
 56 | }
 57 | 
 58 | /// A map from ranges of offsets in the original Wasm file to source
 59 | /// locations.
 60 | #[derive(Clone, Debug, Default)]
 61 | pub struct DebugMap {
 62 |     /// Offset of code section relative to the Wasm file start.
 63 |     pub code_offset: u32,
 64 |     /// Each tuple is `(start, len, loc)`. The `start` offset is
 65 |     /// relative to the code section.
 66 |     pub tuples: Vec<(u32, u32, SourceLoc)>,
 67 | }
 68 | 
 69 | impl DebugMap {
 70 |     pub(crate) fn from_dwarf<R: gimli::Reader>(
 71 |         dwarf: gimli::Dwarf<R>,
 72 |         debug: &mut Debug,
 73 |         code_offset: u32,
 74 |     ) -> anyhow::Result<DebugMap> {
 75 |         let ctx = addr2line::Context::from_dwarf(dwarf)?;
 76 |         let mut tuples = vec![];
 77 | 
 78 |         let mut locs = ctx.find_location_range(0, u64::MAX).unwrap();
 79 |         while let Some((start, len, loc)) = locs.next() {
 80 |             let file = debug.intern_file(loc.file.unwrap_or(""));
 81 |             let loc = debug.intern_loc(file, loc.line.unwrap_or(0), loc.column.unwrap_or(0));
 82 |             log::trace!("tuple: loc {} start {:x} len {:x}", loc, start, len);
 83 |             tuples.push((start as u32, len as u32, loc));
 84 |         }
 85 |         tuples.sort();
 86 | 
 87 |         let mut last = 0;
 88 |         tuples.retain(|&(start, len, _)| {
 89 |             let retain = start >= last;
 90 |             if retain {
 91 |                 last = start + len;
 92 |             }
 93 |             retain
 94 |         });
 95 | 
 96 |         Ok(DebugMap {
 97 |             code_offset,
 98 |             tuples,
 99 |         })
100 |     }
101 | }
102 | 


--------------------------------------------------------------------------------
/src/ir/display.rs:
--------------------------------------------------------------------------------
  1 | //! Displaying IR.
  2 | 
  3 | use super::{Func, FuncDecl, FunctionBody, Module, SourceLoc, ValueDef};
  4 | use crate::entity::EntityRef;
  5 | use std::collections::HashMap;
  6 | use std::fmt::{self, Display, Formatter, Result as FmtResult};
  7 | 
  8 | /// Hooks to print information after instruction, before and after blocks
  9 | /// and before and after functions.
 10 | pub trait PrintDecorator {
 11 |     /// Print arbitrary text after an instruction.
 12 |     ///
 13 |     /// Invoked after every instruction in a block. The instruction has already been printed on its own line;
 14 |     /// this method can print content after the operator, if desired.
 15 |     fn after_inst(&self, _value: super::Value, _f: &mut fmt::Formatter) -> fmt::Result {
 16 |         Ok(())
 17 |     }
 18 | 
 19 |     /// Print arbitrary text before the body of a block.
 20 |     ///
 21 |     /// Invoked before the block body. The block id and parameters have already been printed on its own line;
 22 |     /// this method can print content on the line below the block id, before the body of the block is printed, if desired.
 23 |     fn before_block(&self, _block: super::Block, _f: &mut fmt::Formatter) -> fmt::Result {
 24 |         Ok(())
 25 |     }
 26 | 
 27 |     /// Print arbitrary text after the body of a block.
 28 |     ///
 29 |     /// Invoked after the block body, before the terminator. The block body has already been printed on its own line(s);
 30 |     /// this method can print content on the line after the last instruction in the block body, before the terminator is printed.
 31 |     fn after_block(&self, _block: super::Block, _f: &mut fmt::Formatter) -> fmt::Result {
 32 |         Ok(())
 33 |     }
 34 | 
 35 |     /// Print arbitrary text before the body of a function.
 36 |     ///
 37 |     /// Invoked before the function body is printed. The function id and signature have already been printed on its own line;
 38 |     /// this method can print content on the line before the function signature line, before the function body is printed.
 39 |     fn before_function_body(&self, _f: &mut fmt::Formatter) -> fmt::Result {
 40 |         Ok(())
 41 |     }
 42 | 
 43 |     /// Print arbitrary text after the body of a function.
 44 |     ///
 45 |     /// Invoked after the function body is printed. The function body has already been printed;
 46 |     /// this method can print content on the line after the return block of the function, before the last curly brace to end the function is printed.
 47 |     fn after_function_body(&self, _f: &mut fmt::Formatter) -> fmt::Result {
 48 |         Ok(())
 49 |     }
 50 | }
 51 | 
 52 | #[derive(Default)]
 53 | pub(crate) struct NOPPrintDecorator();
 54 | impl PrintDecorator for NOPPrintDecorator {}
 55 | 
 56 | /// A wrapper around a `FunctionBody` together with some auxiliary
 57 | /// information to perform a pretty-print of that function.
 58 | pub struct FunctionBodyDisplay<'a, PD: PrintDecorator> {
 59 |     pub(crate) body: &'a FunctionBody,
 60 |     pub(crate) indent: &'a str,
 61 |     pub(crate) verbose: bool,
 62 |     pub(crate) module: Option<&'a Module<'a>>,
 63 |     pub(crate) decorator: Option<&'a PD>,
 64 | }
 65 | 
 66 | impl<'a, PD: PrintDecorator> Display for FunctionBodyDisplay<'a, PD> {
 67 |     fn fmt(&self, f: &mut Formatter) -> FmtResult {
 68 |         let arg_tys = self
 69 |             .body
 70 |             .locals
 71 |             .values()
 72 |             .take(self.body.n_params)
 73 |             .map(|&ty| format!("{}", ty))
 74 |             .collect::<Vec<_>>();
 75 |         let ret_tys = self
 76 |             .body
 77 |             .rets
 78 |             .iter()
 79 |             .map(|&ty| format!("{}", ty))
 80 |             .collect::<Vec<_>>();
 81 |         writeln!(
 82 |             f,
 83 |             "{}function({}) -> {} {{",
 84 |             self.indent,
 85 |             arg_tys.join(", "),
 86 |             ret_tys.join(", ")
 87 |         )?;
 88 | 
 89 |         if let Some(decorator) = self.decorator {
 90 |             decorator.before_function_body(f)?;
 91 |         }
 92 | 
 93 |         for (value, value_def) in self.body.values.entries() {
 94 |             match value_def {
 95 |                 ValueDef::Operator(op, args, tys) if self.verbose => {
 96 |                     writeln!(
 97 |                         f,
 98 |                         "{}    {} = {} {} # {} ",
 99 |                         self.indent,
100 |                         value,
101 |                         op,
102 |                         self.body.arg_pool[*args]
103 |                             .iter()
104 |                             .map(|arg| format!("{}", arg))
105 |                             .collect::<Vec<_>>()
106 |                             .join(", "),
107 |                         self.body.type_pool[*tys]
108 |                             .iter()
109 |                             .map(|arg| format!("{}", arg))
110 |                             .collect::<Vec<_>>()
111 |                             .join(", "),
112 |                     )?;
113 |                 }
114 |                 ValueDef::BlockParam(block, idx, ty) if self.verbose => writeln!(
115 |                     f,
116 |                     "{}    {} = blockparam {}, {} # {}",
117 |                     self.indent, value, block, idx, ty
118 |                 )?,
119 |                 ValueDef::Alias(alias_target) => {
120 |                     if self.verbose {
121 |                         writeln!(f, "{}    {} = {}", self.indent, value, alias_target)?
122 |                     }
123 |                 }
124 |                 ValueDef::PickOutput(val, idx, ty) => {
125 |                     writeln!(f, "{}    {} = {}.{} # {}", self.indent, value, val, idx, ty)?
126 |                 }
127 |                 ValueDef::Placeholder(ty) => {
128 |                     writeln!(f, "{}    {} = placeholder # {}", self.indent, value, ty)?
129 |                 }
130 |                 ValueDef::None => writeln!(f, "{}    {} = none", self.indent, value)?,
131 |                 _ => {}
132 |             }
133 |         }
134 | 
135 |         for (block_id, block) in self.body.blocks.entries() {
136 |             let block_params = block
137 |                 .params
138 |                 .iter()
139 |                 .map(|(ty, val)| format!("{}: {}", val, ty))
140 |                 .collect::<Vec<_>>();
141 |             writeln!(
142 |                 f,
143 |                 "{}  {}({}): # {}",
144 |                 self.indent,
145 |                 block_id,
146 |                 block_params.join(", "),
147 |                 block.desc
148 |             )?;
149 | 
150 |             if let Some(decorator) = self.decorator {
151 |                 decorator.before_block(block_id, f)?
152 |             };
153 | 
154 |             writeln!(
155 |                 f,
156 |                 "{}    # preds: {}",
157 |                 self.indent,
158 |                 block
159 |                     .preds
160 |                     .iter()
161 |                     .map(|pred| format!("{} ({})", pred, self.body.blocks[*pred].desc))
162 |                     .collect::<Vec<_>>()
163 |                     .join(", ")
164 |             )?;
165 |             writeln!(
166 |                 f,
167 |                 "{}    # succs: {}",
168 |                 self.indent,
169 |                 block
170 |                     .succs
171 |                     .iter()
172 |                     .map(|succ| format!("{} ({})", succ, self.body.blocks[*succ].desc))
173 |                     .collect::<Vec<_>>()
174 |                     .join(", ")
175 |             )?;
176 |             for (_, param) in &block.params {
177 |                 if let Some(local) = self.body.value_locals[*param] {
178 |                     writeln!(f, "{}    # {}: {}", self.indent, param, local)?;
179 |                 }
180 |             }
181 |             for &inst in &block.insts {
182 |                 if let Some(local) = self.body.value_locals[inst] {
183 |                     writeln!(f, "{}    # {}: {}", self.indent, inst, local)?;
184 |                 }
185 |                 match &self.body.values[inst] {
186 |                     ValueDef::Operator(op, args, tys) => {
187 |                         let args = self.body.arg_pool[*args]
188 |                             .iter()
189 |                             .map(|&v| format!("{}", v))
190 |                             .collect::<Vec<_>>();
191 |                         let tys = self.body.type_pool[*tys]
192 |                             .iter()
193 |                             .map(|&ty| format!("{}", ty))
194 |                             .collect::<Vec<_>>();
195 |                         let loc = if self.body.source_locs[inst] != SourceLoc::invalid()
196 |                             && self.module.is_some()
197 |                         {
198 |                             let module = self.module.as_ref().unwrap();
199 |                             let loc = self.body.source_locs[inst];
200 |                             let data = &module.debug.source_locs[loc];
201 |                             let filename = &module.debug.source_files[data.file];
202 |                             format!("@{} {}:{}:{}", loc, filename, data.line, data.col)
203 |                         } else {
204 |                             "".to_owned()
205 |                         };
206 |                         write!(
207 |                             f,
208 |                             "{}    {} = {} {} # {} {} ",
209 |                             self.indent,
210 |                             inst,
211 |                             op,
212 |                             args.join(", "),
213 |                             tys.join(", "),
214 |                             loc,
215 |                         )?;
216 |                         if let Some(decorator) = self.decorator {
217 |                             decorator.after_inst(inst, f)?;
218 |                         }
219 |                         writeln!(f, "")?;
220 |                     }
221 |                     ValueDef::PickOutput(val, idx, ty) => {
222 |                         writeln!(f, "{}    {} = {}.{} # {}", self.indent, inst, val, idx, ty)?;
223 |                     }
224 |                     ValueDef::Alias(val) => {
225 |                         writeln!(f, "{}    {} = {}", self.indent, inst, val)?;
226 |                     }
227 |                     _ => unreachable!(),
228 |                 }
229 |             }
230 |             if let Some(decorator) = self.decorator {
231 |                 decorator.after_block(block_id, f)?;
232 |             }
233 |             writeln!(f, "{}    {}", self.indent, block.terminator)?;
234 |         }
235 | 
236 |         if let Some(decorator) = self.decorator {
237 |             decorator.after_function_body(f)?;
238 |         }
239 |         writeln!(f, "{}}}", self.indent)?;
240 | 
241 |         Ok(())
242 |     }
243 | }
244 | 
245 | pub struct ModuleDisplay<'a, PD: PrintDecorator> {
246 |     pub(crate) module: &'a Module<'a>,
247 |     pub(crate) decorators: Option<Box<dyn Fn(Func) -> PD>>,
248 | }
249 | 
250 | impl<'a, PD: PrintDecorator> Display for ModuleDisplay<'a, PD> {
251 |     fn fmt(&self, f: &mut Formatter) -> FmtResult {
252 |         writeln!(f, "module {{")?;
253 |         if let Some(func) = self.module.start_func {
254 |             writeln!(f, "    start = {}", func)?;
255 |         }
256 |         let mut sig_strs = HashMap::new();
257 |         for (sig, sig_data) in self.module.signatures.entries() {
258 |             let arg_tys = sig_data
259 |                 .params
260 |                 .iter()
261 |                 .map(|&ty| format!("{}", ty))
262 |                 .collect::<Vec<_>>();
263 |             let ret_tys = sig_data
264 |                 .returns
265 |                 .iter()
266 |                 .map(|&ty| format!("{}", ty))
267 |                 .collect::<Vec<_>>();
268 |             let sig_str = format!("{} -> {}", arg_tys.join(", "), ret_tys.join(", "));
269 |             sig_strs.insert(sig, sig_str.clone());
270 |             writeln!(f, "  {}: {}", sig, sig_str)?;
271 |         }
272 |         for (global, global_data) in self.module.globals.entries() {
273 |             writeln!(
274 |                 f,
275 |                 "  {}: {:?} # {}",
276 |                 global, global_data.value, global_data.ty
277 |             )?;
278 |         }
279 |         for (table, table_data) in self.module.tables.entries() {
280 |             writeln!(f, "  {}: {}", table, table_data.ty)?;
281 |             if let Some(funcs) = &table_data.func_elements {
282 |                 for (i, &func) in funcs.iter().enumerate() {
283 |                     writeln!(f, "    {}[{}]: {}", table, i, func)?;
284 |                 }
285 |             }
286 |         }
287 |         for (memory, memory_data) in self.module.memories.entries() {
288 |             writeln!(
289 |                 f,
290 |                 "  {}: initial {} max {:?}",
291 |                 memory, memory_data.initial_pages, memory_data.maximum_pages
292 |             )?;
293 |             for seg in &memory_data.segments {
294 |                 writeln!(
295 |                     f,
296 |                     "    {} offset {}: # {} bytes",
297 |                     memory,
298 |                     seg.offset,
299 |                     seg.data.len()
300 |                 )?;
301 |             }
302 |         }
303 |         for import in &self.module.imports {
304 |             writeln!(
305 |                 f,
306 |                 "  import \"{}\".\"{}\": {}",
307 |                 import.module, import.name, import.kind
308 |             )?;
309 |         }
310 |         for export in &self.module.exports {
311 |             writeln!(f, "  export \"{}\": {}", export.name, export.kind)?;
312 |         }
313 |         for (func, func_decl) in self.module.funcs.entries() {
314 |             match func_decl {
315 |                 FuncDecl::Body(sig, name, body) => {
316 |                     writeln!(
317 |                         f,
318 |                         "  {} \"{}\": {} = # {}",
319 |                         func,
320 |                         name,
321 |                         sig,
322 |                         sig_strs.get(&sig).unwrap()
323 |                     )?;
324 | 
325 |                     if let Some(decorator) = &self.decorators {
326 |                         let decorator = &(*decorator)(func);
327 |                         writeln!(
328 |                             f,
329 |                             "{}",
330 |                             body.display_with_decorator("    ", Some(self.module), decorator)
331 |                         )?;
332 |                     }
333 |                 }
334 |                 FuncDecl::Lazy(sig, name, reader) => {
335 |                     writeln!(
336 |                         f,
337 |                         "  {} \"{}\": {} = # {}",
338 |                         func,
339 |                         name,
340 |                         sig,
341 |                         sig_strs.get(&sig).unwrap()
342 |                     )?;
343 |                     writeln!(f, "  # raw bytes (length {})", reader.range().len())?;
344 |                 }
345 |                 FuncDecl::Compiled(sig, name, _) => {
346 |                     writeln!(
347 |                         f,
348 |                         "  {} \"{}\": {} = # {}",
349 |                         func,
350 |                         name,
351 |                         sig,
352 |                         sig_strs.get(&sig).unwrap()
353 |                     )?;
354 |                     writeln!(f, "  # already compiled")?;
355 |                 }
356 |                 FuncDecl::Import(sig, name) => {
357 |                     writeln!(
358 |                         f,
359 |                         "  {} \"{}\": {} # {}",
360 |                         func,
361 |                         name,
362 |                         sig,
363 |                         sig_strs.get(&sig).unwrap()
364 |                     )?;
365 |                 }
366 |                 FuncDecl::None => {
367 |                     writeln!(f, "  {}: none", func)?;
368 |                 }
369 |             }
370 |         }
371 |         for (loc, loc_data) in self.module.debug.source_locs.entries() {
372 |             writeln!(
373 |                 f,
374 |                 "  {} = {} line {} column {}",
375 |                 loc, loc_data.file, loc_data.line, loc_data.col
376 |             )?;
377 |         }
378 |         for (file, file_name) in self.module.debug.source_files.entries() {
379 |             writeln!(f, "  {} = \"{}\"", file, file_name)?;
380 |         }
381 |         writeln!(f, "}}")?;
382 |         Ok(())
383 |     }
384 | }
385 | 


--------------------------------------------------------------------------------
/src/ir/module.rs:
--------------------------------------------------------------------------------
  1 | use super::{
  2 |     Func, FuncDecl, Global, Memory, ModuleDisplay, NOPPrintDecorator, PrintDecorator, Signature,
  3 |     Table, Type,
  4 | };
  5 | use crate::entity::{EntityRef, EntityVec};
  6 | use crate::ir::{Debug, DebugMap, FunctionBody};
  7 | use crate::{backend, frontend};
  8 | use anyhow::Result;
  9 | use std::collections::BTreeMap;
 10 | 
 11 | pub use crate::frontend::FrontendOptions;
 12 | 
 13 | /// A Wasm module, represented as a collection of IR entities.
 14 | ///
 15 | /// The module retains a reference to the original Wasm module's bytes
 16 | /// in memory, so that function bodies can optionally refer to ranges
 17 | /// of bytecode in the original module without parsing, lifting to IR,
 18 | /// and recompiling them. A new module may be built without original
 19 | /// bytes (i.e., a `Module<'static>` with `orig_bytes: &[]`) using
 20 | /// `Module::empty()`.
 21 | ///
 22 | /// The ordinary flow for a tool that processes a Wasm module is:
 23 | ///
 24 | /// - Parse an existing Wasm module using `Module::from_wasm_bytes()`.
 25 | /// - For any functions where IR is required, parse the original
 26 | ///   bytecode into IR using `Module::expand_func()`.
 27 | /// - Modify these function bodies (update the `FuncDecl`), and/or
 28 | ///   append new function bodies to `funcs`.
 29 | /// - Compile the IR to a new Wasm module with
 30 | ///   `Module::to_wasm_bytes()`.
 31 | #[derive(Clone, Debug)]
 32 | pub struct Module<'a> {
 33 |     /// The original Wasm module this module was parsed from, if
 34 |     /// any. Used only for "lazy function bodies", which retain a
 35 |     /// range that can refer into this slice.
 36 |     pub orig_bytes: Option<&'a [u8]>,
 37 |     /// The functions in this module: imports, un-expanded ("lazily
 38 |     /// parsed") functions, functions as IR, or IR compiled into new
 39 |     /// bytecode.
 40 |     pub funcs: EntityVec<Func, FuncDecl<'a>>,
 41 |     /// Type signatures, referred to by `funcs`, `imports` and
 42 |     /// `exports`.
 43 |     pub signatures: EntityVec<Signature, SignatureData>,
 44 |     /// Global variables in this module.
 45 |     pub globals: EntityVec<Global, GlobalData>,
 46 |     /// Tables in this module.
 47 |     pub tables: EntityVec<Table, TableData>,
 48 |     /// Imports into this module. Function imports must also have an
 49 |     /// entry at the appropriate function index in `funcs`.
 50 |     pub imports: Vec<Import>,
 51 |     /// Exports from this module.
 52 |     pub exports: Vec<Export>,
 53 |     /// Memories/heapds that this module contains.
 54 |     pub memories: EntityVec<Memory, MemoryData>,
 55 |     /// The "start function" invoked at instantiation, if any.
 56 |     pub start_func: Option<Func>,
 57 |     /// Debug-info associated with function bodies: interning pools
 58 |     /// for source file names and locations in those files.
 59 |     pub debug: Debug,
 60 |     /// Maps from original Wasm bytecode offsets to source locations.
 61 |     pub debug_map: DebugMap,
 62 |     /// Other custom sections retained for re-serialization.
 63 |     pub custom_sections: BTreeMap<String, &'a [u8]>,
 64 | }
 65 | 
 66 | /// A function signature definition.
 67 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 68 | pub struct SignatureData {
 69 |     /// Parameters: a Wasm function may have zero or more primitive
 70 |     /// types as parameters.
 71 |     pub params: Vec<Type>,
 72 |     /// Returns: a Wasm function (when using the multivalue extension,
 73 |     /// which we assume to be present) may have zero or more primitive
 74 |     /// types as return values.
 75 |     pub returns: Vec<Type>,
 76 | }
 77 | 
 78 | /// The size of a single Wasm page, used in memory definitions.
 79 | pub const WASM_PAGE: usize = 0x1_0000; // 64KiB
 80 | 
 81 | /// A memory definition.
 82 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 83 | pub struct MemoryData {
 84 |     /// How many Wasm pages (64KiB size) in the initial memory size?
 85 |     pub initial_pages: usize,
 86 |     /// How many Wasm pages (64KiB size) in the maximum memory size?
 87 |     pub maximum_pages: Option<usize>,
 88 |     /// Initialization data (initial image) for this memory.
 89 |     pub segments: Vec<MemorySegment>,
 90 | }
 91 | 
 92 | /// A segment of data in a memory's initial state.
 93 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 94 | pub struct MemorySegment {
 95 |     /// The offset of this data.
 96 |     pub offset: usize,
 97 |     /// The data, overlaid on previously-existing data at this offset.
 98 |     pub data: Vec<u8>,
 99 | }
100 | 
101 | /// A table definition.
102 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
103 | pub struct TableData {
104 |     /// The type of element in this table.
105 |     pub ty: Type,
106 |     /// The initial size (in elements) of this table.
107 |     pub initial: u64,
108 |     /// The maximum size (in elements), if any, of this table.
109 |     pub max: Option<u64>,
110 |     /// If this is a table of function references, the initial
111 |     /// contents of the table. `null` funcrefs are represented by
112 |     /// `Func::invalid()`.
113 |     pub func_elements: Option<Vec<Func>>,
114 | }
115 | 
116 | /// A global-variable definition.
117 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
118 | pub struct GlobalData {
119 |     /// The type of this global variable.
120 |     pub ty: Type,
121 |     /// The initial value of this global variable, as a bundle of 64
122 |     /// bits (all primitive types, `i32`/`i64`/`f32`/`f64`, can be
123 |     /// represented in this way).
124 |     pub value: Option<u64>,
125 |     /// Whether this global variable is mutable.
126 |     pub mutable: bool,
127 | }
128 | 
129 | impl From<&wasmparser::FuncType> for SignatureData {
130 |     fn from(fty: &wasmparser::FuncType) -> Self {
131 |         Self {
132 |             params: fty
133 |                 .params()
134 |                 .iter()
135 |                 .map(|&ty| ty.into())
136 |                 .collect::<Vec<Type>>(),
137 |             returns: fty
138 |                 .results()
139 |                 .iter()
140 |                 .map(|&ty| ty.into())
141 |                 .collect::<Vec<Type>>(),
142 |         }
143 |     }
144 | }
145 | impl From<wasmparser::FuncType> for SignatureData {
146 |     fn from(fty: wasmparser::FuncType) -> Self {
147 |         (&fty).into()
148 |     }
149 | }
150 | 
151 | /// A module import definition.
152 | #[derive(Clone, Debug)]
153 | pub struct Import {
154 |     /// The name of the module the import comes from.
155 |     pub module: String,
156 |     /// The name of the export within that module that this import
157 |     /// comes from.
158 |     pub name: String,
159 |     /// The kind of import and its specific entity index.
160 |     pub kind: ImportKind,
161 | }
162 | 
163 | /// The kind of of a Wasm import, including the specific entity index
164 | /// that the import corresponds to.
165 | #[derive(Clone, Debug, PartialEq, Eq)]
166 | pub enum ImportKind {
167 |     /// An import of a table.
168 |     Table(Table),
169 |     /// An import of a function.
170 |     Func(Func),
171 |     /// An import of a global.
172 |     Global(Global),
173 |     /// An import of a memory.
174 |     Memory(Memory),
175 | }
176 | 
177 | impl std::fmt::Display for ImportKind {
178 |     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
179 |         match self {
180 |             ImportKind::Table(table) => write!(f, "{}", table)?,
181 |             ImportKind::Func(func) => write!(f, "{}", func)?,
182 |             ImportKind::Global(global) => write!(f, "{}", global)?,
183 |             ImportKind::Memory(mem) => write!(f, "{}", mem)?,
184 |         }
185 |         Ok(())
186 |     }
187 | }
188 | 
189 | /// A module export definition.
190 | #[derive(Clone, Debug)]
191 | pub struct Export {
192 |     /// The name of this export.
193 |     pub name: String,
194 |     /// The kind of export and its specific entity index.
195 |     pub kind: ExportKind,
196 | }
197 | 
198 | /// The kind of a Wasm export, including the specific entity index
199 | /// that this export directive exports.
200 | #[derive(Clone, Debug)]
201 | pub enum ExportKind {
202 |     /// An export of a table.
203 |     Table(Table),
204 |     /// An export of a function.
205 |     Func(Func),
206 |     /// An export of a global.
207 |     Global(Global),
208 |     /// An export of a memory.
209 |     Memory(Memory),
210 | }
211 | 
212 | impl std::fmt::Display for ExportKind {
213 |     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
214 |         match self {
215 |             ExportKind::Table(table) => write!(f, "{}", table)?,
216 |             ExportKind::Func(func) => write!(f, "{}", func)?,
217 |             ExportKind::Global(global) => write!(f, "{}", global)?,
218 |             ExportKind::Memory(memory) => write!(f, "{}", memory)?,
219 |         }
220 |         Ok(())
221 |     }
222 | }
223 | 
224 | impl<'a> Module<'a> {
225 |     /// Create a new empty Wasm module, ready for entities to be added.
226 |     pub fn empty() -> Module<'static> {
227 |         Module {
228 |             orig_bytes: None,
229 |             funcs: EntityVec::default(),
230 |             signatures: EntityVec::default(),
231 |             globals: EntityVec::default(),
232 |             tables: EntityVec::default(),
233 |             imports: vec![],
234 |             exports: vec![],
235 |             memories: EntityVec::default(),
236 |             start_func: None,
237 |             debug: Debug::default(),
238 |             debug_map: DebugMap::default(),
239 |             custom_sections: BTreeMap::default(),
240 |         }
241 |     }
242 | 
243 |     /// Parse a WebAssembly module, as a slice of bytes in memory,
244 |     /// into a waffle Module ready to be manipulated and recompile.
245 |     pub fn from_wasm_bytes(bytes: &'a [u8], options: &FrontendOptions) -> Result<Self> {
246 |         frontend::wasm_to_ir(bytes, options)
247 |     }
248 | 
249 |     /// Take this module and strip its reference to the original
250 |     /// bytes, producing a module with the same logical contents.
251 |     ///
252 |     /// Note that this has a few side-effects:
253 |     /// - Any (non-debug) custom sections are lost; i.e., they will
254 |     ///   not be roundtripped from the original Wasm module.
255 |     /// - All function bodies are expanded to IR so they can be
256 |     ///   recompiled into new bytecode. The bytecode should be
257 |     ///   equivalent, but will not literally be the same bytecode as the
258 |     ///   original module.
259 |     pub fn without_orig_bytes(self) -> Module<'static> {
260 |         Module {
261 |             orig_bytes: None,
262 |             funcs: EntityVec::from(
263 |                 self.funcs
264 |                     .into_vec()
265 |                     .into_iter()
266 |                     .map(|decl| decl.without_orig_bytes())
267 |                     .collect::<Vec<_>>(),
268 |             ),
269 |             signatures: self.signatures,
270 |             globals: self.globals,
271 |             tables: self.tables,
272 |             imports: self.imports,
273 |             exports: self.exports,
274 |             memories: self.memories,
275 |             start_func: self.start_func,
276 |             debug: self.debug,
277 |             debug_map: self.debug_map,
278 |             custom_sections: BTreeMap::default(),
279 |         }
280 |     }
281 | 
282 |     /// Compile the module to Wasm bytecode.
283 |     pub fn to_wasm_bytes(&self) -> Result<Vec<u8>> {
284 |         backend::compile(self)
285 |     }
286 | 
287 |     /// Perform some work on each function body with IR.
288 |     pub fn per_func_body<F: Fn(&mut FunctionBody)>(&mut self, f: F) {
289 |         for func_decl in self.funcs.values_mut() {
290 |             if let Some(body) = func_decl.body_mut() {
291 |                 f(body);
292 |             }
293 |         }
294 |     }
295 | 
296 |     /// Expand a function body, parsing its lazy reference to original
297 |     /// bytecode into IR if needed.
298 |     pub fn expand_func<'b>(&'b mut self, id: Func) -> Result<&'b mut FuncDecl<'a>> {
299 |         if let FuncDecl::Lazy(..) = self.funcs[id] {
300 |             // End the borrow. This is cheap (a slice copy).
301 |             let mut func = self.funcs[id].clone();
302 |             func.parse(self)?;
303 |             self.funcs[id] = func;
304 |         }
305 |         Ok(&mut self.funcs[id])
306 |     }
307 | 
308 |     /// Clone a function body *without* expanding it, and return a
309 |     /// *new* function body with IR expanded. Useful when a tool
310 |     /// appends new functions that are processed versions of an
311 |     /// original function (which itself must remain as well).
312 |     pub fn clone_and_expand_body(&self, id: Func) -> Result<FunctionBody> {
313 |         let mut body = self.funcs[id].clone();
314 |         body.parse(self)?;
315 |         Ok(match body {
316 |             FuncDecl::Body(_, _, body) => body,
317 |             _ => unreachable!(),
318 |         })
319 |     }
320 | 
321 |     /// For all functions that are lazy references to initial
322 |     /// bytecode, expand them into IR.
323 |     pub fn expand_all_funcs(&mut self) -> Result<()> {
324 |         for id in 0..self.funcs.len() {
325 |             let id = Func::new(id);
326 |             self.expand_func(id)?;
327 |         }
328 |         Ok(())
329 |     }
330 | 
331 |     /// Return a wrapper that implements Display on this module,
332 |     /// pretty-printing it as textual IR.
333 |     pub fn display<'b>(&'b self) -> ModuleDisplay<'b, impl PrintDecorator>
334 |     where
335 |         'b: 'a,
336 |     {
337 |         ModuleDisplay::<NOPPrintDecorator> {
338 |             module: self,
339 |             decorators: None,
340 |         }
341 |     }
342 | 
343 |     /// Return a wrapper that implements Display on this module,
344 |     /// pretty-printing it as textual IR with some additional text whose
345 |     /// printing is described in Decorator.
346 |     pub fn display_with_decorator<'b, PD: PrintDecorator>(
347 |         &'b self,
348 |         decorators: Box<dyn Fn(Func) -> PD>,
349 |     ) -> ModuleDisplay<'b, PD>
350 |     where
351 |         'b: 'a,
352 |     {
353 |         ModuleDisplay {
354 |             module: self,
355 |             decorators: Some(decorators),
356 |         }
357 |     }
358 | 
359 |     /// Internal (used during parsing): create an empty module, with
360 |     /// the given slice of original Wasm bytecode. Used during parsing
361 |     /// and meant to be filled in as the Wasm bytecode is processed.
362 |     pub(crate) fn with_orig_bytes(orig_bytes: &'a [u8]) -> Module<'a> {
363 |         Module {
364 |             orig_bytes: Some(orig_bytes),
365 |             funcs: EntityVec::default(),
366 |             signatures: EntityVec::default(),
367 |             globals: EntityVec::default(),
368 |             tables: EntityVec::default(),
369 |             imports: vec![],
370 |             exports: vec![],
371 |             memories: EntityVec::default(),
372 |             start_func: None,
373 |             debug: Debug::default(),
374 |             debug_map: DebugMap::default(),
375 |             custom_sections: BTreeMap::default(),
376 |         }
377 |     }
378 | }
379 | 
380 | #[cfg(test)]
381 | mod test {
382 |     use super::*;
383 | 
384 |     #[test]
385 |     fn empty_module_valid() {
386 |         let module = Module::empty();
387 |         let _ = module.to_wasm_bytes().unwrap();
388 |     }
389 | }
390 | 


--------------------------------------------------------------------------------
/src/ir/value.rs:
--------------------------------------------------------------------------------
 1 | use super::{Block, Type, Value};
 2 | use crate::pool::{ListPool, ListRef};
 3 | use crate::Operator;
 4 | 
 5 | /// A definition of an SSA value.
 6 | #[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
 7 | pub enum ValueDef {
 8 |     /// This value is a block parameter of the given block, with the
 9 |     /// given parameter position/index, and the given type.
10 |     BlockParam(Block, u32, Type),
11 |     /// This value is an operator, taking the given arguments, and
12 |     /// producing the given result types.
13 |     ///
14 |     /// The result of an operator may be a single `Type` or a tuple of
15 |     /// types; in the latter case, valid IR must use `PickOutput` to
16 |     /// project out individual elements and use them.
17 |     Operator(Operator, ListRef<Value>, ListRef<Type>),
18 |     /// This value projects out one result of a multi-result
19 |     /// instruction: given the value, the index in the result tuple,
20 |     /// it produces a value of the given type.
21 |     PickOutput(Value, u32, Type),
22 |     /// This value is an alias of another value.
23 |     Alias(Value),
24 |     /// This value is a placeholder to be filled in later (e.g.,
25 |     /// during SSA construction, may become a blockparam or an
26 |     /// alias). Placeholders have fixed types that cannot change once
27 |     /// they are filled in.
28 |     Placeholder(Type),
29 |     /// No value: must be filled in before processing.
30 |     #[default]
31 |     None,
32 | }
33 | 
34 | impl ValueDef {
35 |     /// Get the type of this value. Requires the type-pool. If this
36 |     /// value is an operator with zero or multiple result types, this
37 |     /// returns `None`.
38 |     pub fn ty(&self, types: &ListPool<Type>) -> Option<Type> {
39 |         match self {
40 |             &ValueDef::BlockParam(_, _, ty) => Some(ty),
41 |             &ValueDef::Operator(_, _, tys) if tys.len() == 0 => None,
42 |             &ValueDef::Operator(_, _, tys) if tys.len() == 1 => Some(types[tys][0]),
43 |             &ValueDef::PickOutput(_, _, ty) => Some(ty),
44 |             &ValueDef::Placeholder(ty) => Some(ty),
45 |             _ => None,
46 |         }
47 |     }
48 | 
49 |     /// Get the tuple of types of this value.
50 |     pub fn tys<'a>(&'a self, types: &'a ListPool<Type>) -> &'a [Type] {
51 |         match self {
52 |             &ValueDef::Operator(_, _, tys) => &types[tys],
53 |             &ValueDef::BlockParam(_, _, ref ty)
54 |             | &ValueDef::PickOutput(_, _, ref ty)
55 |             | &ValueDef::Placeholder(ref ty) => std::slice::from_ref(ty),
56 |             _ => &[],
57 |         }
58 |     }
59 | 
60 |     /// Visit all other values used by this value with the given
61 |     /// visitor function.
62 |     pub fn visit_uses<F: FnMut(Value)>(&self, arg_pool: &ListPool<Value>, mut f: F) {
63 |         match self {
64 |             &ValueDef::BlockParam { .. } => {}
65 |             &ValueDef::Operator(_, args, _) => {
66 |                 for &arg in &arg_pool[args] {
67 |                     f(arg);
68 |                 }
69 |             }
70 |             &ValueDef::PickOutput(from, ..) => f(from),
71 |             &ValueDef::Alias(value) => f(value),
72 |             &ValueDef::Placeholder(_) => {}
73 |             &ValueDef::None => panic!(),
74 |         }
75 |     }
76 | 
77 |     /// Visit and update all other values used by this value with the
78 |     /// given visitor function.
79 |     pub fn update_uses<F: FnMut(&mut Value)>(&mut self, arg_pool: &mut ListPool<Value>, mut f: F) {
80 |         match self {
81 |             &mut ValueDef::BlockParam { .. } => {}
82 |             &mut ValueDef::Operator(_, args, _) => {
83 |                 for arg in &mut arg_pool[args] {
84 |                     f(arg);
85 |                 }
86 |             }
87 |             &mut ValueDef::PickOutput(ref mut from, ..) => f(from),
88 |             &mut ValueDef::Alias(ref mut value) => f(value),
89 |             &mut ValueDef::Placeholder(_) => {}
90 |             &mut ValueDef::None => panic!(),
91 |         }
92 |     }
93 | }
94 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! WAFFLE Wasm analysis framework.
 2 | //!
 3 | //! waffle is a *decompiler and compiler* library for WebAssembly: it
 4 | //! defines an SSA-based IR (intermediate representation), with a
 5 | //! frontend that translates Wasm to this IR, and a backend that
 6 | //! compiles this IR back to Wasm. It can be used by programs that
 7 | //! need to transform/modify or add new code to Wasm modules.
 8 | //!
 9 | //! A good starting point is the `Module`: it can be constructed from
10 | //! Wasm bytecode in memory with `Module::from_wasm_bytes()` and
11 | //! recompiled to Wasm with `Module::to_wasm_bytes()`, after
12 | //! modifications are performed or new code is added. A new module can
13 | //! also be built from scratch with `Module::empty()`.
14 | 
15 | #![allow(dead_code)]
16 | 
17 | // Re-export wasmparser for easier use of the right version by our embedders.
18 | pub use wasmparser;
19 | // Likewise for wasm-encoder.
20 | pub use wasm_encoder;
21 | 
22 | mod backend;
23 | pub mod cfg;
24 | pub mod entity;
25 | mod errors;
26 | mod frontend;
27 | mod ir;
28 | mod op_traits;
29 | mod ops;
30 | pub mod passes;
31 | pub mod pool;
32 | mod scoped_map;
33 | 
34 | pub use errors::*;
35 | pub use ir::*;
36 | pub use op_traits::SideEffect;
37 | pub use ops::{Ieee32, Ieee64, MemoryArg, Operator};
38 | 
39 | mod interp;
40 | pub use interp::*;
41 | 
42 | pub use passes::basic_opt::OptOptions;
43 | 
44 | #[cfg(feature = "fuzzing")]
45 | pub mod fuzzing;
46 | 


--------------------------------------------------------------------------------
/src/passes.rs:
--------------------------------------------------------------------------------
1 | //! Passes.
2 | 
3 | pub mod basic_opt;
4 | pub mod dom_pass;
5 | pub mod empty_blocks;
6 | pub mod maxssa;
7 | pub mod resolve_aliases;
8 | 


--------------------------------------------------------------------------------
/src/passes/basic_opt.rs:
--------------------------------------------------------------------------------
  1 | //! Basic optimizations: GVN and constant-propagation/folding.
  2 | 
  3 | use crate::cfg::CFGInfo;
  4 | use crate::interp::{const_eval, ConstVal};
  5 | use crate::ir::*;
  6 | use crate::passes::dom_pass::{dom_pass, DomtreePass};
  7 | use crate::pool::ListRef;
  8 | use crate::scoped_map::ScopedMap;
  9 | use crate::Operator;
 10 | use smallvec::{smallvec, SmallVec};
 11 | 
 12 | #[derive(Clone, Debug)]
 13 | pub struct OptOptions {
 14 |     pub gvn: bool,
 15 |     pub cprop: bool,
 16 |     pub redundant_blockparams: bool,
 17 | }
 18 | 
 19 | impl std::default::Default for OptOptions {
 20 |     fn default() -> Self {
 21 |         OptOptions {
 22 |             gvn: true,
 23 |             cprop: true,
 24 |             redundant_blockparams: true,
 25 |         }
 26 |     }
 27 | }
 28 | 
 29 | pub(crate) fn basic_opt(body: &mut FunctionBody, cfg: &CFGInfo, options: &OptOptions) {
 30 |     loop {
 31 |         let mut pass = BasicOptPass {
 32 |             map: ScopedMap::default(),
 33 |             cfg,
 34 |             options,
 35 |             changed: false,
 36 |         };
 37 |         dom_pass::<BasicOptPass>(body, cfg, &mut pass);
 38 |         if !pass.changed {
 39 |             break;
 40 |         }
 41 |     }
 42 | }
 43 | 
 44 | #[derive(Debug)]
 45 | struct BasicOptPass<'a> {
 46 |     map: ScopedMap<ValueDef, Value>,
 47 |     cfg: &'a CFGInfo,
 48 |     options: &'a OptOptions,
 49 |     changed: bool,
 50 | }
 51 | 
 52 | impl<'a> DomtreePass for BasicOptPass<'a> {
 53 |     fn enter(&mut self, block: Block, body: &mut FunctionBody) {
 54 |         self.map.push_level();
 55 |         self.optimize(block, body);
 56 |     }
 57 | 
 58 |     fn leave(&mut self, _block: Block, _body: &mut FunctionBody) {
 59 |         self.map.pop_level();
 60 |     }
 61 | }
 62 | 
 63 | fn value_is_pure(value: Value, body: &FunctionBody) -> bool {
 64 |     match body.values[value] {
 65 |         ValueDef::Operator(op, ..) if op.is_pure() => true,
 66 |         _ => false,
 67 |     }
 68 | }
 69 | 
 70 | fn value_is_const(value: Value, body: &FunctionBody) -> ConstVal {
 71 |     match body.values[value] {
 72 |         ValueDef::Operator(Operator::I32Const { value }, _, _) => ConstVal::I32(value),
 73 |         ValueDef::Operator(Operator::I64Const { value }, _, _) => ConstVal::I64(value),
 74 |         ValueDef::Operator(Operator::F32Const { value }, _, _) => ConstVal::F32(value),
 75 |         ValueDef::Operator(Operator::F64Const { value }, _, _) => ConstVal::F64(value),
 76 |         _ => ConstVal::None,
 77 |     }
 78 | }
 79 | 
 80 | fn const_op(val: ConstVal) -> Operator {
 81 |     match val {
 82 |         ConstVal::I32(value) => Operator::I32Const { value },
 83 |         ConstVal::I64(value) => Operator::I64Const { value },
 84 |         ConstVal::F32(value) => Operator::F32Const { value },
 85 |         ConstVal::F64(value) => Operator::F64Const { value },
 86 |         _ => unreachable!(),
 87 |     }
 88 | }
 89 | 
 90 | fn remove_all_from_vec<T: Clone>(v: &mut Vec<T>, indices: &[usize]) {
 91 |     let mut out = 0;
 92 |     let mut indices_i = 0;
 93 |     for i in 0..v.len() {
 94 |         let keep = indices_i == indices.len() || indices[indices_i] != i;
 95 |         if keep {
 96 |             if out < i {
 97 |                 v[out] = v[i].clone();
 98 |             }
 99 |             out += 1;
100 |         } else {
101 |             indices_i += 1;
102 |         }
103 |     }
104 | 
105 |     v.truncate(out);
106 | }
107 | 
108 | impl<'a> BasicOptPass<'a> {
109 |     fn optimize(&mut self, block: Block, body: &mut FunctionBody) {
110 |         if self.options.redundant_blockparams && block != body.entry {
111 |             // Pass over blockparams, checking all inputs. If all inputs
112 |             // resolve to the same SSA value, remove the blockparam and
113 |             // make it an alias of that value. If all inputs resolve to
114 |             // the same constant value, remove the blockparam and insert a
115 |             // new copy of that constant.
116 |             let mut blockparams_to_remove: SmallVec<[usize; 4]> = smallvec![];
117 |             let mut const_insts_to_insert: SmallVec<[Value; 4]> = smallvec![];
118 |             for (i, &(ty, blockparam)) in body.blocks[block].params.iter().enumerate() {
119 |                 let mut inputs: SmallVec<[Value; 4]> = smallvec![];
120 |                 let mut const_val = None;
121 |                 for (&pred, &pos) in self.cfg.preds[block]
122 |                     .iter()
123 |                     .zip(self.cfg.pred_pos[block].iter())
124 |                 {
125 |                     let input = body.blocks[pred]
126 |                         .terminator
127 |                         .visit_target(pos, |target| target.args[i]);
128 |                     let input = body.resolve_alias(input);
129 |                     if input != blockparam {
130 |                         inputs.push(input);
131 |                     }
132 |                     const_val = ConstVal::meet(const_val, Some(value_is_const(input, body)));
133 |                 }
134 |                 let const_val = const_val.unwrap();
135 | 
136 |                 assert!(inputs.len() > 0);
137 |                 if inputs.iter().all(|x| *x == inputs[0]) {
138 |                     // All inputs are the same value; remove the
139 |                     // blockparam and rewrite it as an alias of the one
140 |                     // single value.
141 |                     body.values[blockparam] = ValueDef::Alias(inputs[0]);
142 |                     blockparams_to_remove.push(i);
143 |                 } else if const_val != ConstVal::None {
144 |                     // All inputs are the same constant; remove the
145 |                     // blockparam and rewrite it as a new constant
146 |                     // operator.
147 |                     let ty = body.type_pool.single(ty);
148 |                     body.values[blockparam] =
149 |                         ValueDef::Operator(const_op(const_val), ListRef::default(), ty);
150 |                     const_insts_to_insert.push(blockparam);
151 |                     blockparams_to_remove.push(i);
152 |                 }
153 |             }
154 | 
155 |             if !const_insts_to_insert.is_empty() || !blockparams_to_remove.is_empty() {
156 |                 self.changed = true;
157 |             }
158 | 
159 |             for inst in const_insts_to_insert {
160 |                 body.blocks[block].insts.insert(0, inst);
161 |             }
162 | 
163 |             remove_all_from_vec(&mut body.blocks[block].params, &blockparams_to_remove[..]);
164 |             for (&pred, &pos) in self.cfg.preds[block]
165 |                 .iter()
166 |                 .zip(self.cfg.pred_pos[block].iter())
167 |             {
168 |                 body.blocks[pred].terminator.update_target(pos, |target| {
169 |                     remove_all_from_vec(&mut target.args, &blockparams_to_remove[..])
170 |                 });
171 |             }
172 |         }
173 | 
174 |         // Pass over instructions, updating in place.
175 |         let mut i = 0;
176 |         while i < body.blocks[block].insts.len() {
177 |             let inst = body.blocks[block].insts[i];
178 |             i += 1;
179 |             if value_is_pure(inst, body) {
180 |                 let mut value = body.values[inst].clone();
181 | 
182 |                 // Resolve aliases in the arg lists.
183 |                 match &mut value {
184 |                     &mut ValueDef::Operator(_, args, _) => {
185 |                         for i in 0..args.len() {
186 |                             let val = body.arg_pool[args][i];
187 |                             let new_val = body.resolve_and_update_alias(val);
188 |                             body.arg_pool[args][i] = new_val;
189 |                             self.changed |= new_val != val;
190 |                         }
191 |                     }
192 |                     &mut ValueDef::PickOutput(ref mut val, ..) => {
193 |                         let updated = body.resolve_and_update_alias(*val);
194 |                         *val = updated;
195 |                         self.changed |= updated != *val;
196 |                     }
197 |                     _ => {}
198 |                 }
199 | 
200 |                 // Try to constant-propagate.
201 |                 if self.options.cprop {
202 |                     if let ValueDef::Operator(op, args, ..) = &value {
203 |                         let arg_values = body.arg_pool[*args]
204 |                             .iter()
205 |                             .map(|&arg| value_is_const(arg, body))
206 |                             .collect::<Vec<_>>();
207 |                         let const_val = match op {
208 |                             Operator::I32Const { .. }
209 |                             | Operator::I64Const { .. }
210 |                             | Operator::F32Const { .. }
211 |                             | Operator::F64Const { .. }
212 |                             | Operator::V128Const { .. } => None,
213 |                             _ => const_eval(op, &arg_values[..], None),
214 |                         };
215 |                         match const_val {
216 |                             Some(ConstVal::I32(val)) => {
217 |                                 value = ValueDef::Operator(
218 |                                     Operator::I32Const { value: val },
219 |                                     ListRef::default(),
220 |                                     body.single_type_list(Type::I32),
221 |                                 );
222 |                                 body.values[inst] = value.clone();
223 |                                 self.changed = true;
224 |                             }
225 |                             Some(ConstVal::I64(val)) => {
226 |                                 value = ValueDef::Operator(
227 |                                     Operator::I64Const { value: val },
228 |                                     ListRef::default(),
229 |                                     body.single_type_list(Type::I64),
230 |                                 );
231 |                                 body.values[inst] = value.clone();
232 |                                 self.changed = true;
233 |                             }
234 |                             Some(ConstVal::F32(val)) => {
235 |                                 value = ValueDef::Operator(
236 |                                     Operator::F32Const { value: val },
237 |                                     ListRef::default(),
238 |                                     body.single_type_list(Type::F32),
239 |                                 );
240 |                                 body.values[inst] = value.clone();
241 |                                 self.changed = true;
242 |                             }
243 |                             Some(ConstVal::F64(val)) => {
244 |                                 value = ValueDef::Operator(
245 |                                     Operator::F64Const { value: val },
246 |                                     ListRef::default(),
247 |                                     body.single_type_list(Type::F64),
248 |                                 );
249 |                                 body.values[inst] = value.clone();
250 |                                 self.changed = true;
251 |                             }
252 |                             _ => {}
253 |                         }
254 |                     }
255 |                 }
256 | 
257 |                 if self.options.gvn {
258 |                     // GVN: look for already-existing copies of this
259 |                     // value.
260 |                     if let Some(value) = self.map.get(&value) {
261 |                         body.set_alias(inst, *value);
262 |                         i -= 1;
263 |                         body.blocks[block].insts.remove(i);
264 |                         self.changed = true;
265 |                         continue;
266 |                     }
267 |                     self.map.insert(value, inst);
268 |                 }
269 |             }
270 |         }
271 |     }
272 | }
273 | 


--------------------------------------------------------------------------------
/src/passes/dom_pass.rs:
--------------------------------------------------------------------------------
 1 | //! Simple framework for a domtree-based pass.
 2 | 
 3 | use crate::cfg::CFGInfo;
 4 | use crate::ir::{Block, FunctionBody};
 5 | 
 6 | pub trait DomtreePass {
 7 |     fn enter(&mut self, _block: Block, _body: &mut FunctionBody) {}
 8 |     fn leave(&mut self, _block: Block, _body: &mut FunctionBody) {}
 9 | }
10 | 
11 | pub fn dom_pass<P: DomtreePass>(body: &mut FunctionBody, cfg: &CFGInfo, pass: &mut P) {
12 |     visit::<P>(body, cfg, pass, body.entry);
13 | }
14 | 
15 | fn visit<P: DomtreePass>(body: &mut FunctionBody, cfg: &CFGInfo, pass: &mut P, block: Block) {
16 |     pass.enter(block, body);
17 |     for child in cfg.dom_children(block) {
18 |         visit(body, cfg, pass, child);
19 |     }
20 |     pass.leave(block, body);
21 | }
22 | 


--------------------------------------------------------------------------------
/src/passes/empty_blocks.rs:
--------------------------------------------------------------------------------
 1 | //! Pass to remove empty blocks.
 2 | 
 3 | use crate::entity::EntityRef;
 4 | use crate::ir::{Block, BlockTarget, FunctionBody, Terminator};
 5 | 
 6 | /// Determines whether a block (i) has no blockparams, and (ii) is
 7 | /// solely a jump to another block. We can remove these blocks.
 8 | ///
 9 | /// Why can't we remove blocks that are solely jumps but *do* have
10 | /// blockparams? Because They still serve a purpose in SSA: they
11 | /// define these blockparams as a join of multiple possible other
12 | /// definitions in preds.
13 | fn block_is_empty_jump(body: &FunctionBody, block: Block) -> Option<BlockTarget> {
14 |     // Must be empty except for terminator, and must have no
15 |     // blockparams, and must have an unconditional-branch terminator.
16 |     if body.blocks[block].insts.len() > 0 {
17 |         return None;
18 |     }
19 |     if body.blocks[block].params.len() > 0 {
20 |         return None;
21 |     }
22 |     let target = match &body.blocks[block].terminator {
23 |         &Terminator::Br { ref target } => target,
24 |         _ => return None,
25 |     };
26 | 
27 |     Some(target.clone())
28 | }
29 | 
30 | fn rewrite_target(
31 |     forwardings: &[Option<BlockTarget>],
32 |     target: &BlockTarget,
33 | ) -> Option<BlockTarget> {
34 |     if target.args.len() > 0 {
35 |         return None;
36 |     }
37 |     forwardings[target.block.index()].clone()
38 | }
39 | 
40 | pub(crate) fn run(body: &mut FunctionBody) {
41 |     log::trace!(
42 |         "empty_blocks: running on func:\n{}\n",
43 |         body.display_verbose("| ", None)
44 |     );
45 | 
46 |     // Identify empty blocks, and to where they should forward.
47 |     let forwardings = body
48 |         .blocks
49 |         .iter()
50 |         .map(|block| {
51 |             if block != body.entry {
52 |                 block_is_empty_jump(body, block)
53 |             } else {
54 |                 None
55 |             }
56 |         })
57 |         .collect::<Vec<_>>();
58 | 
59 |     // Rewrite every target according to a forwarding (or potentially
60 |     // a chain of composed forwardings).
61 |     for block_data in body.blocks.values_mut() {
62 |         block_data.terminator.update_targets(|target| {
63 |             if let Some(new_target) = rewrite_target(&forwardings[..], target) {
64 |                 log::trace!("empty_blocks: replacing {:?} with {:?}", target, new_target);
65 |                 *target = new_target;
66 |             }
67 |         });
68 |     }
69 | 
70 |     // Recompute preds/succs.
71 |     body.recompute_edges();
72 | 
73 |     log::trace!(
74 |         "empty_blocks: finished:\n{}\n",
75 |         body.display_verbose("| ", None)
76 |     );
77 | }
78 | 


--------------------------------------------------------------------------------
/src/passes/maxssa.rs:
--------------------------------------------------------------------------------
  1 | //! Conversion pass that creates "maximal SSA": only local uses (no
  2 | //! uses of defs in other blocks), with all values explicitly passed
  3 | //! through blockparams. This makes some other transforms easier
  4 | //! because it removes the need to worry about adding blockparams when
  5 | //! mutating the CFG (all possible blockparams are already there!).
  6 | 
  7 | use crate::cfg::CFGInfo;
  8 | use crate::entity::PerEntity;
  9 | use crate::ir::{Block, FunctionBody, Value, ValueDef};
 10 | use std::collections::{BTreeSet, HashMap, HashSet};
 11 | 
 12 | pub(crate) fn run(body: &mut FunctionBody, cut_blocks: Option<HashSet<Block>>, cfg: &CFGInfo) {
 13 |     MaxSSAPass::new(cut_blocks).run(body, cfg);
 14 | }
 15 | 
 16 | struct MaxSSAPass {
 17 |     /// Blocks at which all live values must cross through blockparams
 18 |     /// (or if None, then all blocks).
 19 |     cut_blocks: Option<HashSet<Block>>,
 20 |     /// Additional block args that must be passed to each block, in
 21 |     /// order. Value numbers are *original* values.
 22 |     new_args: PerEntity<Block, Vec<Value>>,
 23 |     /// For each block, a value map: from original value to local copy
 24 |     /// of value.
 25 |     value_map: HashMap<(Block, Value), Value>,
 26 | }
 27 | 
 28 | impl MaxSSAPass {
 29 |     fn new(cut_blocks: Option<HashSet<Block>>) -> Self {
 30 |         Self {
 31 |             cut_blocks,
 32 |             new_args: PerEntity::default(),
 33 |             value_map: HashMap::new(),
 34 |         }
 35 |     }
 36 | 
 37 |     fn run(mut self, body: &mut FunctionBody, cfg: &CFGInfo) {
 38 |         for block in body.blocks.iter() {
 39 |             self.visit(body, cfg, block);
 40 |         }
 41 |         self.update(body);
 42 |     }
 43 | 
 44 |     fn visit(&mut self, body: &mut FunctionBody, cfg: &CFGInfo, block: Block) {
 45 |         // For each use in the block, process the use. Collect all
 46 |         // uses first to deduplicate and allow more efficient
 47 |         // processing (and to appease the borrow checker).
 48 |         let mut uses = BTreeSet::default();
 49 |         for &inst in &body.blocks[block].insts {
 50 |             match &body.values[inst] {
 51 |                 &ValueDef::Operator(_, args, _) => {
 52 |                     for &arg in &body.arg_pool[args] {
 53 |                         let arg = body.resolve_alias(arg);
 54 |                         uses.insert(arg);
 55 |                     }
 56 |                 }
 57 |                 &ValueDef::PickOutput(value, ..) => {
 58 |                     let value = body.resolve_alias(value);
 59 |                     uses.insert(value);
 60 |                 }
 61 |                 _ => {}
 62 |             }
 63 |         }
 64 |         body.blocks[block].terminator.visit_uses(|u| {
 65 |             let u = body.resolve_alias(u);
 66 |             uses.insert(u);
 67 |         });
 68 | 
 69 |         for u in uses {
 70 |             self.visit_use(body, cfg, block, u);
 71 |         }
 72 |     }
 73 | 
 74 |     fn visit_use(&mut self, body: &mut FunctionBody, cfg: &CFGInfo, block: Block, value: Value) {
 75 |         #[derive(Clone, Copy, PartialEq, Eq)]
 76 |         enum Phase {
 77 |             Pre,
 78 |             Post,
 79 |         }
 80 | 
 81 |         let mut stack = vec![(block, value, Phase::Pre, None)];
 82 | 
 83 |         while let Some(&(block, value, phase, _)) = stack.last() {
 84 |             match phase {
 85 |                 Phase::Pre => {
 86 |                     if self.value_map.contains_key(&(block, value)) {
 87 |                         stack.pop();
 88 |                         continue;
 89 |                     }
 90 |                     if cfg.def_block[value] == block {
 91 |                         stack.pop();
 92 |                         continue;
 93 |                     }
 94 | 
 95 |                     self.new_args[block].push(value);
 96 | 
 97 |                     // Create a placeholder value.
 98 |                     let ty = body.values[value].ty(&body.type_pool).unwrap();
 99 |                     let blockparam = body.add_blockparam(block, ty);
100 |                     self.value_map.insert((block, value), blockparam);
101 | 
102 |                     stack.pop();
103 |                     stack.push((block, value, Phase::Post, Some(blockparam)));
104 | 
105 |                     // Recursively visit preds and use the value there, to
106 |                     // ensure they have the value available as well.
107 |                     for i in 0..body.blocks[block].preds.len() {
108 |                         // Don't borrow for whole loop while iterating (`body` is
109 |                         // taken as mut by recursion, but we don't add preds).
110 |                         let pred = body.blocks[block].preds[i];
111 |                         stack.push((pred, value, Phase::Pre, None));
112 |                     }
113 |                 }
114 | 
115 |                 Phase::Post => {
116 |                     let Some((_, _, _, Some(blockparam))) = stack.pop() else {
117 |                         unreachable!()
118 |                     };
119 | 
120 |                     // If all preds have the same value, and this is not a
121 |                     // cut-block, rewrite the blockparam to an alias instead.
122 |                     if !self.is_cut_block(block) {
123 |                         if let Some(pred_value) = iter_all_same(
124 |                             body.blocks[block]
125 |                                 .preds
126 |                                 .iter()
127 |                                 .map(|&pred| *self.value_map.get(&(pred, value)).unwrap_or(&value))
128 |                                 .filter(|&val| val != blockparam),
129 |                         ) {
130 |                             body.blocks[block].params.pop();
131 |                             self.new_args[block].pop();
132 |                             body.values[blockparam] = ValueDef::Alias(pred_value);
133 |                             self.value_map.insert((block, value), pred_value);
134 |                         }
135 |                     }
136 |                 }
137 |             }
138 |         }
139 |     }
140 | 
141 |     fn is_cut_block(&self, block: Block) -> bool {
142 |         self.cut_blocks
143 |             .as_ref()
144 |             .map(|cut_blocks| cut_blocks.contains(&block))
145 |             .unwrap_or(true)
146 |     }
147 | 
148 |     fn update_branch_args(&mut self, body: &mut FunctionBody) {
149 |         for (block, blockdata) in body.blocks.entries_mut() {
150 |             blockdata.terminator.update_targets(|target| {
151 |                 for &new_arg in &self.new_args[target.block] {
152 |                     let actual_value = self
153 |                         .value_map
154 |                         .get(&(block, new_arg))
155 |                         .copied()
156 |                         .unwrap_or(new_arg);
157 |                     target.args.push(actual_value);
158 |                 }
159 |             });
160 |         }
161 |     }
162 | 
163 |     fn update_uses(&mut self, body: &mut FunctionBody, block: Block) {
164 |         let resolve = |body: &FunctionBody, value: Value| {
165 |             let value = body.resolve_alias(value);
166 |             self.value_map
167 |                 .get(&(block, value))
168 |                 .copied()
169 |                 .unwrap_or(value)
170 |         };
171 | 
172 |         for i in 0..body.blocks[block].insts.len() {
173 |             let inst = body.blocks[block].insts[i];
174 |             let mut def = std::mem::take(&mut body.values[inst]);
175 |             match &mut def {
176 |                 ValueDef::Operator(_, args, _) => {
177 |                     for i in 0..args.len() {
178 |                         let val = body.arg_pool[*args][i];
179 |                         let val = resolve(body, val);
180 |                         body.arg_pool[*args][i] = val;
181 |                     }
182 |                 }
183 |                 ValueDef::PickOutput(value, ..) => {
184 |                     *value = resolve(body, *value);
185 |                 }
186 |                 ValueDef::Alias(value) => {
187 |                     *value = resolve(body, *value);
188 |                 }
189 |                 _ => {}
190 |             }
191 |             body.values[inst] = def;
192 |         }
193 |         let mut term = std::mem::take(&mut body.blocks[block].terminator);
194 |         term.update_uses(|u| {
195 |             *u = resolve(body, *u);
196 |         });
197 |         body.blocks[block].terminator = term;
198 |     }
199 | 
200 |     fn update(&mut self, body: &mut FunctionBody) {
201 |         self.update_branch_args(body);
202 |         for block in body.blocks.iter() {
203 |             self.update_uses(body, block);
204 |         }
205 |     }
206 | }
207 | 
208 | fn iter_all_same<Item: PartialEq + Eq + Copy, I: Iterator<Item = Item>>(iter: I) -> Option<Item> {
209 |     let mut item = None;
210 |     for val in iter {
211 |         if *item.get_or_insert(val) != val {
212 |             return None;
213 |         }
214 |     }
215 |     item
216 | }
217 | 


--------------------------------------------------------------------------------
/src/passes/resolve_aliases.rs:
--------------------------------------------------------------------------------
 1 | //! Resolve all aliases.
 2 | 
 3 | use crate::{FunctionBody, ValueDef};
 4 | 
 5 | pub fn run(body: &mut FunctionBody) {
 6 |     log::debug!(
 7 |         "Resolve aliases: running on:\n{}\n",
 8 |         body.display_verbose("| ", None),
 9 |     );
10 |     for value in body.values.iter() {
11 |         let mut value_def = std::mem::take(&mut body.values[value]);
12 |         match &mut value_def {
13 |             ValueDef::Operator(_, args, _) => {
14 |                 for i in 0..args.len() {
15 |                     let val = body.arg_pool[*args][i];
16 |                     let val = body.resolve_and_update_alias(val);
17 |                     body.arg_pool[*args][i] = val;
18 |                 }
19 |             }
20 |             ValueDef::PickOutput(val, _idx, _ty) => {
21 |                 *val = body.resolve_alias(*val);
22 |             }
23 |             ValueDef::Alias(val) => {
24 |                 *val = body.resolve_alias(*val);
25 |             }
26 |             _ => {}
27 |         }
28 |         body.values[value] = value_def;
29 |     }
30 |     let mut blocks = std::mem::take(&mut body.blocks);
31 |     for block in blocks.values_mut() {
32 |         block.terminator.update_targets(|target| {
33 |             for arg in &mut target.args {
34 |                 *arg = body.resolve_alias(*arg);
35 |             }
36 |         });
37 |     }
38 |     body.blocks = blocks;
39 | }
40 | 


--------------------------------------------------------------------------------
/src/pool.rs:
--------------------------------------------------------------------------------
  1 | //! Pooled list data structure.
  2 | //!
  3 | //! The IR for a function contains many small lists: the lists of
  4 | //! arguments and result values for each operator, and the list of
  5 | //! result types for each operator as well. It would be fairly
  6 | //! inefficient to manage these lists as many separate memory
  7 | //! allocations, each with the overhead of a `Vec` (24 bytes on a
  8 | //! 64-bit system) in addition to the storage block. So, instead, we
  9 | //! aggregate these lists by keeping them all in one large `Vec` (per
 10 | //! kind) and holding *index ranges* as virtual handles to lists in
 11 | //! the rest of the IR.
 12 | //!
 13 | //! We define a general abstraction here `ListPool<T>` for a list of
 14 | //! `T`, with a `ListRef<T>` that together with the pool can yield an
 15 | //! actual slice. This container is instantiated several times in the
 16 | //! `FunctionBody`, namely for the `arg_pool` and `type_pool`.
 17 | 
 18 | use std::convert::TryFrom;
 19 | use std::fmt::Debug;
 20 | use std::marker::PhantomData;
 21 | use std::ops::{Index, IndexMut};
 22 | 
 23 | /// A "storage pool" backing many `ListRef`s of the given type.
 24 | #[derive(Clone, Debug)]
 25 | pub struct ListPool<T: Clone + Debug> {
 26 |     storage: Vec<T>,
 27 | }
 28 | 
 29 | impl<T: Clone + Debug> Default for ListPool<T> {
 30 |     fn default() -> Self {
 31 |         ListPool { storage: vec![] }
 32 |     }
 33 | }
 34 | 
 35 | /// A handle to a list stored in a `ListPool`.
 36 | ///
 37 | /// The handle can be used to yield the actual slice, given the pool,
 38 | /// but has much smaller overhead than a separately-owned `Vec`: e.g.,
 39 | /// 8 bytes on 64-bit systems, rather than 24 bytes, and no separate
 40 | /// memory allocation overhead.
 41 | #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
 42 | pub struct ListRef<T>(u32, u32, PhantomData<T>);
 43 | 
 44 | impl<T> Default for ListRef<T> {
 45 |     fn default() -> Self {
 46 |         ListRef(0, 0, PhantomData)
 47 |     }
 48 | }
 49 | 
 50 | impl<T: Clone + Debug> ListPool<T> {
 51 |     /// Create a new list in this pool from the items yielded by the
 52 |     /// given iterator.
 53 |     pub fn from_iter<I: Iterator<Item = T>>(&mut self, iter: I) -> ListRef<T> {
 54 |         let start = u32::try_from(self.storage.len()).unwrap();
 55 |         self.storage.extend(iter);
 56 |         let end = u32::try_from(self.storage.len()).unwrap();
 57 |         ListRef(start, end, PhantomData)
 58 |     }
 59 |     /// Convenience method: create a list from a single item.
 60 |     pub fn single(&mut self, value: T) -> ListRef<T> {
 61 |         self.from_iter(std::iter::once(value))
 62 |     }
 63 |     /// Convenience methodS: create a list from exactly two items.
 64 |     pub fn double(&mut self, a: T, b: T) -> ListRef<T> {
 65 |         self.from_iter(std::iter::once(a).chain(std::iter::once(b)))
 66 |     }
 67 |     /// Convenience method: create a list from exactly three items.
 68 |     pub fn triple(&mut self, a: T, b: T, c: T) -> ListRef<T> {
 69 |         self.from_iter(
 70 |             std::iter::once(a)
 71 |                 .chain(std::iter::once(b))
 72 |                 .chain(std::iter::once(c)),
 73 |         )
 74 |     }
 75 |     /// Allocate a list of the given size with `size` copies of the
 76 |     /// value `initial`.
 77 |     pub fn allocate(&mut self, size: usize, initial: T) -> ListRef<T> {
 78 |         self.from_iter(std::iter::repeat(initial).take(size))
 79 |     }
 80 |     /// Perform a deep-clone of a list: copy it to a new list and
 81 |     /// return the handle of that list.
 82 |     pub fn deep_clone(&mut self, list: ListRef<T>) -> ListRef<T> {
 83 |         self.storage.reserve(list.len());
 84 |         let start = u32::try_from(self.storage.len()).unwrap();
 85 |         for i in list.0..list.1 {
 86 |             self.storage.push(self.storage[i as usize].clone());
 87 |         }
 88 |         let end = u32::try_from(self.storage.len()).unwrap();
 89 |         ListRef(start, end, PhantomData)
 90 |     }
 91 | }
 92 | 
 93 | impl<T: Clone + Debug> Index<ListRef<T>> for ListPool<T> {
 94 |     type Output = [T];
 95 |     fn index(&self, index: ListRef<T>) -> &[T] {
 96 |         &self.storage[index.0 as usize..index.1 as usize]
 97 |     }
 98 | }
 99 | 
100 | impl<T: Clone + Debug> IndexMut<ListRef<T>> for ListPool<T> {
101 |     fn index_mut(&mut self, index: ListRef<T>) -> &mut [T] {
102 |         &mut self.storage[index.0 as usize..index.1 as usize]
103 |     }
104 | }
105 | 
106 | impl<T> ListRef<T> {
107 |     /// Return the number of items in this list. (We do not need the
108 |     /// pool to compute this.)
109 |     pub fn len(&self) -> usize {
110 |         (self.1 - self.0) as usize
111 |     }
112 |     /// Return whether this list is empty. (We do not need the pool to
113 |     /// compute this.)
114 |     pub fn is_empty(&self) -> bool {
115 |         self.len() == 0
116 |     }
117 | }
118 | 


--------------------------------------------------------------------------------
/src/scoped_map.rs:
--------------------------------------------------------------------------------
  1 | //! Scoped hashmap.
  2 | //!
  3 | //! This container keeps a map from a type `K` to `V`, but with a
  4 | //! twist: it supports "push" and "pop" operations, which alter a
  5 | //! stack-level, and on "pop", any mappings created at the current
  6 | //! stack level or greater are removed. This is useful in conjunction
  7 | //! with tree-structured walks over code: for example, a GVN pass that
  8 | //! traverses the domtree, keeping "availability" of already-computed
  9 | //! expressions in the map only with the scope of a subtree.
 10 | //!
 11 | //! For efficiency, the scoped hashmap does not actually perform a
 12 | //! "remove" operation in the underlying hashmap for every operation
 13 | //! removed by a pop. This would result in O(n) pop cost,
 14 | //! worst-case. Instead, we fast-invalidate all mappings by
 15 | //! incrementing a generation number, and checking that generation on
 16 | //! access, ignoring stale entries. We need a separate generation
 17 | //! number for each level of the scoped hashmap; "push" increments the
 18 | //! generation on the new level ("this is the 64th time we have been
 19 | //! at level 3") and when a value is inserted, it records the level it
 20 | //! is at and the current generation of that level.
 21 | //!
 22 | //! Note that this scheme does *not* support shadowing: if we set a
 23 | //! particular key `k1` to value `v1` at level 3, then set it again at
 24 | //! level 4, we will remove it when we pop level 4; the old value set
 25 | //! at level 3 does not re-appear. Doing so would require a more
 26 | //! complex data structure, and is unnecessary for our use-cases. In
 27 | //! particular, we use this for GVN, where if a key already exists, we
 28 | //! use it rather than setting it again in a more nested scope.
 29 | 
 30 | use fxhash::FxHashMap;
 31 | use std::fmt::Debug;
 32 | use std::hash::Hash;
 33 | 
 34 | /// A scoped hashmap: a key-value map with "push" and "pop" operations
 35 | /// and the ability to quickly remove mappings created at a given
 36 | /// level when popping.
 37 | #[derive(Clone, Debug)]
 38 | pub struct ScopedMap<K: Hash + Eq + Clone + Debug, V: Clone + Debug> {
 39 |     map: FxHashMap<K, ScopedMapEntry<V>>,
 40 |     gen: u32,
 41 |     gen_by_level: Vec<u32>,
 42 | }
 43 | 
 44 | impl<K: Hash + Eq + Clone + Debug, V: Clone + Debug> std::default::Default for ScopedMap<K, V> {
 45 |     fn default() -> Self {
 46 |         ScopedMap::new()
 47 |     }
 48 | }
 49 | 
 50 | /// An entry in the scoped hashmap.
 51 | #[derive(Clone, Debug)]
 52 | struct ScopedMapEntry<V: Clone + Debug> {
 53 |     /// The generation of the level at which this entry was created,
 54 |     /// when it was created.
 55 |     gen: u32,
 56 |     /// The level at which this entry was created.
 57 |     level: u32,
 58 |     /// The value associated with this key.
 59 |     value: V,
 60 | }
 61 | 
 62 | impl<K: Hash + Eq + Clone + Debug, V: Clone + Debug> ScopedMap<K, V> {
 63 |     /// Create an empty scoped hashmap.
 64 |     pub fn new() -> ScopedMap<K, V> {
 65 |         ScopedMap {
 66 |             map: FxHashMap::default(),
 67 |             gen: 0,
 68 |             gen_by_level: vec![0],
 69 |         }
 70 |     }
 71 | 
 72 |     /// Create a new sub-level.
 73 |     pub fn push_level(&mut self) {
 74 |         self.gen += 1;
 75 |         self.gen_by_level.push(self.gen);
 76 |     }
 77 | 
 78 |     /// Pop the current level, removing all mappings created at this
 79 |     /// level.
 80 |     pub fn pop_level(&mut self) {
 81 |         self.gen_by_level.pop();
 82 |     }
 83 | 
 84 |     /// Insert a mapping, associating it with the current level, and
 85 |     /// overwriting if one already exists.
 86 |     pub fn insert(&mut self, k: K, v: V) {
 87 |         self.map.insert(
 88 |             k,
 89 |             ScopedMapEntry {
 90 |                 gen: *self.gen_by_level.last().unwrap(),
 91 |                 level: (self.gen_by_level.len() - 1) as u32,
 92 |                 value: v,
 93 |             },
 94 |         );
 95 |     }
 96 | 
 97 |     /// Get the mapping for the given key, if any.
 98 |     pub fn get(&self, k: &K) -> Option<&V> {
 99 |         self.map.get(k).and_then(|entry| {
100 |             let level = entry.level as usize;
101 |             if level < self.gen_by_level.len() && entry.gen == self.gen_by_level[level] {
102 |                 Some(&entry.value)
103 |             } else {
104 |                 None
105 |             }
106 |         })
107 |     }
108 | }
109 | 


--------------------------------------------------------------------------------
/tests/roundtrip.rs:
--------------------------------------------------------------------------------
 1 | //! Integration test to ensure that roundtripping works.
 2 | 
 3 | use std::path::PathBuf;
 4 | use waffle::{FrontendOptions, Module};
 5 | 
 6 | fn get_wats() -> Vec<PathBuf> {
 7 |     let test_dir = std::env::current_dir()
 8 |         .unwrap()
 9 |         .join("tests")
10 |         .join("roundtrip");
11 |     let mut ret = vec![];
12 |     for item in std::fs::read_dir(test_dir).unwrap() {
13 |         let path = item.unwrap().path();
14 |         if path.extension().and_then(|s| s.to_str()) == Some("wat") {
15 |             ret.push(path);
16 |         }
17 |     }
18 |     ret.sort(); // Deterministic test order.
19 |     ret
20 | }
21 | 
22 | #[test]
23 | fn idempotent_roundtrips() {
24 |     for wat in get_wats() {
25 |         let bytes1 = wat::parse_file(&wat).unwrap();
26 |         let opts = FrontendOptions::default();
27 |         let module1 = Module::from_wasm_bytes(&bytes1, &opts).unwrap();
28 |         let bytes2 = module1.to_wasm_bytes().unwrap();
29 |         let module2 = Module::from_wasm_bytes(&bytes2, &opts).unwrap();
30 |         let bytes3 = module2.to_wasm_bytes().unwrap();
31 |         assert_eq!(bytes2, bytes3);
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/tests/roundtrip/README.md:
--------------------------------------------------------------------------------
1 | This directory contains Wasm modules (in text format as `.wat` files) that we
2 | roundtrip through waffle. We are testing that we can roundtrip (e.g., that
3 | we support all the included features), and that roundtripping twice gets us the
4 | same module (i.e., that it is idempotent after one "normalization"), but not
5 | that it is comprehensively correct for all of Wasm. Fuzzing (including the
6 | differential-execution targets `differential` and `opt_diff`) should be
7 | considered the more authoritative source of robust roundtripping correctness.
8 | 


--------------------------------------------------------------------------------
/tests/roundtrip/non-nullable-funcrefs.wat:
--------------------------------------------------------------------------------
1 | (module
2 |   (type $t (func (param i32) (result i32)))
3 |   (func $f1 (param i32) (result i32) local.get 0)
4 |   (func $f2 (param i32) (result i32) local.get 0)
5 |   (table $t 1 1 (ref null $t))
6 |   (elem $t (i32.const 0) (ref null $t)
7 |         (item (ref.func $f1))
8 |         (item (ref.func $f2))))
9 | 


--------------------------------------------------------------------------------
/tests/roundtrip/ref-null.wat:
--------------------------------------------------------------------------------
1 | (module
2 |   (type $t (func (param i32)))
3 |   (func (result (ref null $t))
4 |         (ref.null $t)))
5 | 


--------------------------------------------------------------------------------
/tests/roundtrip/test-simd.wat:
--------------------------------------------------------------------------------
 1 | (module
 2 |   (memory 1 1)
 3 |   (func (export "pack") (param i64 i64) (result v128)
 4 |         v128.const i64x2 0 0
 5 |         local.get 0
 6 |         i64x2.replace_lane 0
 7 |         local.get 1
 8 |         i64x2.replace_lane 1
 9 |         return)
10 |   (func (export "unpack") (param v128) (result i64 i64)
11 |         local.get 0
12 |         i64x2.extract_lane 0
13 |         local.get 0
14 |         i64x2.extract_lane 1
15 |         return)
16 |   (func (export "load") (param i32) (result v128)
17 |         local.get 0
18 |         v128.load)
19 |   (func (export "store") (param i32 v128)
20 |         local.get 0
21 |         local.get 1
22 |         v128.store))
23 | 


--------------------------------------------------------------------------------
/tests/roundtrip/test.wat:
--------------------------------------------------------------------------------
1 | (module
2 |   (func (result i32)
3 |         i32.const 2
4 |         i32.const 2
5 |         i32.add
6 |         ))
7 | 


--------------------------------------------------------------------------------
/tests/roundtrip/test2.wat:
--------------------------------------------------------------------------------
 1 | (module
 2 |   (func (param i32) (result i32)
 3 |         local.get 0
 4 |         if (result i32)
 5 |           i32.const 1
 6 |           local.get 0
 7 |           i32.add
 8 |         else
 9 |           i32.const 2
10 |           local.get 0
11 |           i32.add
12 |         end))
13 | 


--------------------------------------------------------------------------------
/tests/roundtrip/typed-funcref.wat:
--------------------------------------------------------------------------------
 1 | (module
 2 |   (type $t (func (param i32 i32) (result i32)))
 3 | 
 4 |   (table $tab 10 10 (ref null $t))
 5 |   (table $tab2 10 10 (ref null $t))
 6 | 
 7 |   (elem (table $tab2) (i32.const 0) (ref null $t) (ref.func $f))
 8 | 
 9 |   (func $callit (param i32 i32 i32) (result i32)
10 |         (call_ref $t (local.get 1)
11 |                      (local.get 2)
12 |                      (table.get $tab (local.get 0))))
13 | 
14 |   (func $setit (param i32 (ref null $t))
15 |         (table.set $tab (local.get 0) (local.get 1)))
16 | 
17 |   (func $getf (result (ref null $t))
18 |         (ref.func $f))
19 | 
20 |   (func $f (param i32 i32) (result i32)
21 |         local.get 0))
22 | 


--------------------------------------------------------------------------------