├── .github └── workflows │ └── ci.yml ├── .gitignore ├── CONTRIBUTING.md ├── Cargo.toml ├── LICENSE ├── README.md ├── fuzz ├── .gitignore ├── Cargo.toml └── fuzz_targets │ ├── diff.rs │ └── instrument.rs ├── src ├── bin │ └── winliner.rs ├── convert.rs ├── counters.rs ├── cow_section.rs ├── instrument.rs ├── lib.rs ├── optimize.rs └── profile.rs └── tests └── all ├── instrument.rs ├── main.rs ├── optimize.rs └── profile.rs /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | test_all_features: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v3 17 | - name: Run tests 18 | run: cargo test --verbose --all-features 19 | 20 | test_no_features: 21 | runs-on: ubuntu-latest 22 | steps: 23 | - uses: actions/checkout@v3 24 | - name: Run tests 25 | run: cargo test --verbose --no-default-features 26 | 27 | doc: 28 | runs-on: ubuntu-latest 29 | steps: 30 | - uses: actions/checkout@v3 31 | - name: Build docs 32 | run: cargo rustdoc -p winliner --all-features -- -D rustdoc::broken-intra-doc-links 33 | 34 | build_cli: 35 | runs-on: ubuntu-latest 36 | steps: 37 | - uses: actions/checkout@v3 38 | - name: Build CLI 39 | run: cargo build --bin winliner --all-features 40 | 41 | # Check that you can compile the library to Wasm, so that you could extract 42 | # profiles on the Web, for example. 43 | compile_to_wasm: 44 | runs-on: ubuntu-latest 45 | steps: 46 | - uses: actions/checkout@v3 47 | - name: Install the wasm32-unknown-unknown target 48 | run: rustup target add wasm32-unknown-unknown 49 | - name: Compile to Wasm 50 | run: cargo build --target wasm32-unknown-unknown 51 | 52 | fuzz: 53 | strategy: 54 | matrix: 55 | fuzz_target: ["diff", "instrument"] 56 | runs-on: ubuntu-latest 57 | steps: 58 | - uses: actions/checkout@v3 59 | - name: Install cargo-fuzz 60 | run: cargo install cargo-fuzz 61 | - name: Run fuzzer for five minutes 62 | run: cargo fuzz run --sanitizer none ${{ matrix.fuzz_target }} -- -max_total_time=300 63 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /Cargo.lock 3 | .cargo/ 4 | *.wasm 5 | profile.json 6 | predicate.sh 7 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Winliner 2 | 3 | ## Building 4 | 5 | ```shell-session 6 | $ cargo build 7 | ``` 8 | 9 | ## Testing 10 | 11 | ```shell-session 12 | $ cargo test 13 | ``` 14 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Nick Fitzgerald "] 3 | categories = ["command-line-utilities", "development-tools", "wasm"] 4 | description = "The WebAssembly Indirect Call Inliner" 5 | documentation = "https://docs.rs/winliner" 6 | edition = "2021" 7 | exclude = ["**.wasm"] 8 | homepage = "https://github.com/fitzgen/winliner" 9 | license = "Apache-2.0 WITH LLVM-exception" 10 | name = "winliner" 11 | readme = "./README.md" 12 | repository = "https://github.com/fitzgen/winliner" 13 | version = "1.0.1" 14 | 15 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 16 | 17 | [[bin]] 18 | name = "winliner" 19 | required-features = [ 20 | "clap", 21 | "env_logger", 22 | "serde", 23 | "serde_json", 24 | "wasi-cap-std-sync", 25 | "wasmtime", 26 | "wasmtime-wasi", 27 | ] 28 | 29 | [dependencies] 30 | anyhow = "1.0.75" 31 | clap = { version = "4.3.22", features = ["derive"], optional = true } 32 | env_logger = { version = "0.10.0", optional = true } 33 | log = "0.4.20" 34 | serde = { version = "1", optional = true } 35 | serde_json = { version = "1", optional = true } 36 | wasi-cap-std-sync = { version = "11.0.1", optional = true } 37 | wasm-encoder = { version = "0.34.1", features = ["wasmparser"] } 38 | wasmparser = "0.114.0" 39 | wasmtime = { version = "11.0.1", optional = true } 40 | wasmtime-wasi = { version = "11.0.1", optional = true } 41 | 42 | [dev-dependencies] 43 | diff = "0.1.13" 44 | env_logger = "0.10.0" 45 | serde = "1" 46 | serde_json = "1" 47 | wasmprinter = "0.2.62" 48 | wasmtime = "11.0.1" 49 | wat = "1.0.75" 50 | 51 | [workspace] 52 | members = [ 53 | "fuzz", 54 | ] 55 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | 204 | 205 | --- LLVM Exceptions to the Apache 2.0 License ---- 206 | 207 | As an exception, if, as a result of your compiling your source code, portions 208 | of this Software are embedded into an Object form of such source code, you 209 | may redistribute such embedded portions in such Object form without complying 210 | with the conditions of Sections 4(a), 4(b) and 4(d) of the License. 211 | 212 | In addition, if you combine or link compiled forms of this Software with 213 | software that is licensed under the GPLv2 ("Combined Software") and if a 214 | court of competent jurisdiction determines that the patent provision (Section 215 | 3), the indemnity provision (Section 9) or other Section of the License 216 | conflicts with the conditions of the GPLv2, you may retroactively and 217 | prospectively choose to deem waived or otherwise exclude such Section(s) of 218 | the License, but only in their entirety and only with respect to the Combined 219 | Software. 220 | 221 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 |

Winliner

3 | 4 |

5 | The WebAssembly indirect call inliner! 6 |

7 | 8 |

9 | build status 10 | Documentation Status 11 |

12 | 13 |

14 | API Docs 15 | | 16 | Contributing 17 |

18 |
19 | 20 | * [About](#about) 21 | * [Install](#install) 22 | * [Example Usage](#example-usage) 23 | * [Caveats](#caveats) 24 | * [Using Winliner as a Library](#using-winliner-as-a-library) 25 | * [Acknowledgements](#acknowledgements) 26 | 27 | ## About 28 | 29 | Winliner speculatively inlines indirect calls in WebAssembly, based on observed 30 | information from a previous profiling phase. This is a form of [profile-guided 31 | optimization] that we affectionately call *winlining*. 32 | 33 | [profile-guided optimization]: https://en.wikipedia.org/wiki/Profile-guided_optimization 34 | 35 | First, Winliner inserts instrumentation to observe the actual target callee of 36 | every indirect call site in your Wasm program. Next, you run the instrumented 37 | program for a while, building up a profile. Finally, you invoke Winliner again, 38 | this time providing it with the recorded profile, and it optimizes your Wasm 39 | program based on the behavior observed in that profile. 40 | 41 | For example, if profiling shows that an indirect call always (or nearly always) 42 | goes to the 42nd entry in the funcrefs table, then Winliner will perform the 43 | following semantically-transparent transformation: 44 | 45 | ```wat 46 | ;; Before: 47 | 48 | call_indirect 49 | 50 | ;; After: 51 | 52 | ;; If the callee index is 42, execute the inlined body of 53 | ;; the associated function. 54 | local.tee $temp 55 | i32.const 42 56 | i32.eq 57 | if 58 | 59 | else 60 | local.get $temp 61 | call_indirect 62 | end 63 | ``` 64 | 65 | The speculative inlining by itself is generally not a huge performance win, 66 | since CPU indirect branch prediction is very powerful these days. (Although, 67 | depending on the Wasm engine, entering a new function may incur some cost and 68 | inlining does avoid that.) The primary benefit is that it allows the Wasm 69 | compiler to "see through" the indirect call and perform subsequent optimizations 70 | (like [GVN] and [LICM]) on the inlined callee's body, which can result in 71 | significant performance benefits. 72 | 73 | [GVN]: https://en.wikipedia.org/wiki/Value_numbering#Global_value_numbering 74 | [LICM]: https://en.wikipedia.org/wiki/Loop-invariant_code_motion 75 | 76 | This technique is similar to *devirtualization* but doesn't require that the 77 | compiler is able to statically determine the callee, nor that the callee is 78 | always a single, particular function 100% of the time. Unlike devirtualization, 79 | Winlining can still optimize indirect calls that go a certain way 99% of the 80 | time and a different way 1% of the time because it can always fall back to an 81 | unoptimized indirect call. 82 | 83 | ## Install 84 | 85 | You can install via `cargo`: 86 | 87 | ```shell-session 88 | $ cargo install winliner --all-features 89 | ``` 90 | 91 | ## Example Usage 92 | 93 | First, instrument your Wasm program: 94 | 95 | ```shell-session 96 | $ winliner instrument my-program.wasm > my-program.instrumented.wasm 97 | ``` 98 | 99 | Next, run the instrumented program to build a profile. This can either be done 100 | in your Wasm environment of choice (e.g. the Web) with a little glue code to 101 | extract and shepherd out the profile, or you can run within Winliner itself and 102 | the Wasmtime-based WASI environment that comes with it: 103 | 104 | ```shell-session 105 | $ winliner profile my-program.instrumented.wasm > profile.json 106 | ``` 107 | 108 | Finally, tell Winliner to optimize the original program based on the observed 109 | `call_indirect` behavior observed in the given profile: 110 | 111 | ```shell-session 112 | $ winliner optimize --profile profile.json my-program.wasm > my-program.winlined.wasm 113 | ``` 114 | 115 | ## Caveats 116 | 117 | * Winliner is not safe in the face of mutations to the `funcref` table, which is 118 | possible via the `table.set` instruction (and others) introduced as part of 119 | [the reference-types 120 | proposal](https://github.com/WebAssembly/reference-types). You must either 121 | disable this proposal or manually uphold the invariant that the `funcref` 122 | table is never mutated. Breaking this invariant will likely lead to diverging 123 | behavior from the original program and very wonky bugs! Any exported funcref 124 | tables must additionally not be mutated by the host. 125 | 126 | * Winliner only optimizes `call_indirect` instructions; it cannot optimize 127 | `call_ref` instructions because WebAssembly function references are not 128 | comparable, so we can't insert the `if actual_callee == speculative_callee` 129 | check. 130 | 131 | * Winliner assumes support for the (widely implemented) multi-value proposal in 132 | its generated code. 133 | 134 | ## Using Winliner as a Library 135 | 136 | First, add a dependency on Winliner to your `Cargo.toml`: 137 | 138 | ```toml 139 | [dependencies] 140 | winliner = "1" 141 | ``` 142 | 143 | Then, use the library like so: 144 | 145 | ```rust,no_run 146 | use winliner::{InstrumentationStrategy, Instrumenter, Optimizer, Profile, Result}; 147 | 148 | fn main() -> Result<()> { 149 | let original_wasm = std::fs::read("path/to/my.wasm")?; 150 | 151 | // Configure instrumentation. 152 | let mut instrumenter = Instrumenter::new(); 153 | instrumenter.strategy(InstrumentationStrategy::ThreeGlobals); 154 | 155 | // Instrument our wasm. 156 | let instrumented_wasm = instrumenter.instrument(&original_wasm)?; 157 | 158 | // Get a profile for our Wasm program from somewhere. Read it from disk, 159 | // record it now in this process, etc... 160 | // 161 | // See the API docs for `Profile` for more details. 162 | let profile = Profile::default(); 163 | 164 | // Configure optimization and thresholds for inlining. 165 | let mut optimizer = Optimizer::new(); 166 | optimizer 167 | .min_total_calls(100) 168 | .min_ratio(0.8)? 169 | .max_inline_depth(3); 170 | 171 | // Run the optimizer with the given profile! 172 | let optimized_wasm = optimizer.optimize(&profile, &original_wasm)?; 173 | 174 | std::fs::write("path/to/optimized.wasm", optimized_wasm)?; 175 | Ok(()) 176 | } 177 | ``` 178 | 179 | ## Acknowledgements 180 | 181 | The inspiration for this tool -- along with the low-overhead but imprecise 182 | "three globals" instrumentation strategy -- sprang from conversations with 183 | [Chris Fallin] and [Luke Wagner]. 184 | 185 | [Chris Fallin]: https://github.com/cfallin 186 | [Luke Wagner]: https://github.com/lukewagner 187 | -------------------------------------------------------------------------------- /fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | corpus 3 | artifacts 4 | -------------------------------------------------------------------------------- /fuzz/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "winliner-fuzz" 3 | version = "0.0.0" 4 | authors = ["Automatically generated"] 5 | publish = false 6 | edition = "2021" 7 | 8 | [package.metadata] 9 | cargo-fuzz = true 10 | 11 | [dependencies] 12 | libfuzzer-sys = { version = "0.4", features = ["arbitrary-derive"] } 13 | wasm-smith = "0.12.15" 14 | wasmtime = "12.0.2" 15 | 16 | [dependencies.winliner] 17 | path = ".." 18 | 19 | [[bin]] 20 | name = "diff" 21 | path = "fuzz_targets/diff.rs" 22 | test = false 23 | doc = false 24 | 25 | [[bin]] 26 | name = "instrument" 27 | path = "fuzz_targets/instrument.rs" 28 | test = false 29 | doc = false 30 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/diff.rs: -------------------------------------------------------------------------------- 1 | //! Differential fuzzing between original Wasm and optimized Wasm. 2 | //! 3 | //! Even given a bogus profile, speculative inlining should be semantically 4 | //! transparent. 5 | 6 | #![no_main] 7 | 8 | use libfuzzer_sys::{ 9 | arbitrary::{self, Arbitrary, Unstructured}, 10 | fuzz_target, 11 | }; 12 | use winliner::{Optimizer, Profile, ProfileBuilder}; 13 | 14 | const NUM_INPUTS: usize = 10; 15 | 16 | #[derive(Debug)] 17 | struct OptimizerConfig { 18 | min_total_calls: u64, 19 | min_ratio: f64, 20 | max_inline_depth: usize, 21 | } 22 | 23 | impl<'a> Arbitrary<'a> for OptimizerConfig { 24 | fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { 25 | let min_total_calls = u.int_in_range(0..=1_000)?; 26 | let min_ratio = u.arbitrary::()? as f64 / u8::MAX as f64; 27 | let max_inline_depth = u.int_in_range(0..=1_000)?; 28 | Ok(OptimizerConfig { 29 | min_total_calls, 30 | min_ratio, 31 | max_inline_depth, 32 | }) 33 | } 34 | } 35 | 36 | #[derive(Debug)] 37 | struct FuzzInput { 38 | module: wasm_smith::Module, 39 | profile: Vec>, 40 | optimizer: OptimizerConfig, 41 | inputs: Vec, 42 | } 43 | 44 | impl<'a> Arbitrary<'a> for FuzzInput { 45 | fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { 46 | let profile = u.arbitrary()?; 47 | let optimizer = u.arbitrary()?; 48 | 49 | let mut inputs = Vec::with_capacity(NUM_INPUTS); 50 | for _ in 0..NUM_INPUTS { 51 | inputs.push(u.arbitrary()?); 52 | } 53 | 54 | let mut config: wasm_smith::SwarmConfig = u.arbitrary()?; 55 | 56 | // Don't generate imports, we have nothing for the Wasm to import. 57 | config.max_imports = 0; 58 | 59 | // Always generate at least one export. 60 | config.min_exports = 1; 61 | config.max_exports = config.max_exports.max(config.min_exports); 62 | 63 | // Always generate at least one type, since we will need it to define a 64 | // function and do `call_indirect`s. 65 | config.min_types = 1; 66 | config.max_types = config.max_types.max(config.min_types); 67 | 68 | // Always generate at least one table, since we will need it to do 69 | // `call_indirect`s. 70 | config.min_tables = 1; 71 | config.max_tables = config.max_tables.max(config.min_tables as _); 72 | 73 | // Always generate at least one function, since we will need it to do 74 | // differential execution. 75 | config.min_funcs = 1; 76 | config.max_funcs = config.max_funcs.max(config.min_funcs as _); 77 | 78 | // No `table.set`, etc... instructions. 79 | config.reference_types_enabled = false; 80 | 81 | let module = wasm_smith::Module::new(config, u)?; 82 | 83 | Ok(FuzzInput { 84 | module, 85 | profile, 86 | optimizer, 87 | inputs, 88 | }) 89 | } 90 | } 91 | 92 | impl FuzzInput { 93 | fn profile(&self) -> Profile { 94 | let mut builder = ProfileBuilder::new(); 95 | for (call_site, calls) in self.profile.iter().enumerate() { 96 | let call_site = u32::try_from(call_site).unwrap(); 97 | for (callee, count) in calls { 98 | for _ in 0..*count { 99 | builder.add_indirect_call(*callee, call_site); 100 | } 101 | } 102 | } 103 | builder.build() 104 | } 105 | 106 | fn optimizer(&self) -> Optimizer { 107 | let mut o = Optimizer::new(); 108 | o.min_total_calls(self.optimizer.min_total_calls); 109 | o.min_ratio(self.optimizer.min_ratio).unwrap(); 110 | o.max_inline_depth(self.optimizer.max_inline_depth); 111 | o 112 | } 113 | } 114 | 115 | fuzz_target!(|input: FuzzInput| { 116 | let original_wasm = input.module.to_bytes(); 117 | let profile = input.profile(); 118 | let optimizer = input.optimizer(); 119 | 120 | let optimized_wasm = match optimizer.optimize(&profile, &original_wasm) { 121 | Ok(x) => x, 122 | Err(_) => return, 123 | }; 124 | 125 | let mut config = wasmtime::Config::new(); 126 | config.wasm_bulk_memory(true); 127 | config.wasm_reference_types(false); 128 | config.consume_fuel(true); 129 | 130 | let engine = wasmtime::Engine::new(&config).unwrap(); 131 | 132 | // Compile both versions of the Wasm. 133 | let original_module = match wasmtime::Module::new(&engine, &original_wasm) { 134 | Ok(x) => x, 135 | Err(_) => return, 136 | }; 137 | let optimized_module = match wasmtime::Module::new(&engine, &optimized_wasm) { 138 | Ok(x) => x, 139 | Err(_) => return, 140 | }; 141 | 142 | // Find the first exported function. 143 | let (func_name, func_ty) = match original_module.exports().find_map(|exp| match exp.ty() { 144 | wasmtime::ExternType::Func(ty) => Some((exp.name(), ty)), 145 | _ => None, 146 | }) { 147 | Some(x) => x, 148 | None => return, 149 | }; 150 | 151 | for i in 0..input.inputs.len() { 152 | // Build an arguments array from our fuzzer-chosen inputs. 153 | 154 | let mut args = Vec::with_capacity(func_ty.params().len()); 155 | for (j, ty) in func_ty.params().enumerate() { 156 | let x = input.inputs[(i + j) % input.inputs.len()]; 157 | args.push(match ty { 158 | wasmtime::ValType::I32 => wasmtime::Val::I32(x as _), 159 | wasmtime::ValType::I64 => wasmtime::Val::I64(x as _), 160 | wasmtime::ValType::F32 => wasmtime::Val::F32(x as _), 161 | wasmtime::ValType::F64 => wasmtime::Val::F64(x as _), 162 | wasmtime::ValType::V128 => wasmtime::Val::V128(x), 163 | wasmtime::ValType::FuncRef | wasmtime::ValType::ExternRef => return, 164 | }); 165 | } 166 | 167 | // Call the original version of the function. 168 | 169 | let mut store = wasmtime::Store::new(&engine, ()); 170 | store.add_fuel(1_000).unwrap(); 171 | 172 | let instance = match wasmtime::Instance::new(&mut store, &original_module, &[]) { 173 | Ok(x) => x, 174 | Err(_) => return, 175 | }; 176 | 177 | let func = instance.get_func(&mut store, func_name).unwrap(); 178 | let mut original_results = vec![wasmtime::Val::I32(0); func_ty.results().len()]; 179 | let original_result = func.call(&mut store, &args, &mut original_results); 180 | 181 | // Call the optimized version of the function. 182 | 183 | let mut store = wasmtime::Store::new(&engine, ()); 184 | store.add_fuel(1_000).unwrap(); 185 | 186 | let instance = match wasmtime::Instance::new(&mut store, &optimized_module, &[]) { 187 | Ok(x) => x, 188 | Err(_) => return, 189 | }; 190 | 191 | let func = instance.get_func(&mut store, func_name).unwrap(); 192 | let mut optimized_results = vec![wasmtime::Val::I32(0); func_ty.results().len()]; 193 | let optimized_result = func.call(&mut store, &args, &mut optimized_results); 194 | 195 | // Check that both versions computed the "same" results. 196 | 197 | match (original_result.is_err(), optimized_result.is_err()) { 198 | // If both trapped, then continue to the next input. 199 | (true, true) => continue, 200 | 201 | // If they didn't both trap or both not trap, then this could just 202 | // be a difference of fuel accounting due to inlining vs not. 203 | (true, false) => assert!(original_result.unwrap_err().to_string().contains("fuel")), 204 | (false, true) => assert!(optimized_result.unwrap_err().to_string().contains("fuel")), 205 | 206 | // If neither trapped, then check that they both have the same 207 | // results. 208 | (false, false) => { 209 | for (original_result, optimized_result) in 210 | original_results.into_iter().zip(optimized_results) 211 | { 212 | match (original_result, optimized_result) { 213 | (wasmtime::Val::I32(a), wasmtime::Val::I32(b)) => assert_eq!(a, b), 214 | (wasmtime::Val::I64(a), wasmtime::Val::I64(b)) => assert_eq!(a, b), 215 | (wasmtime::Val::F32(a), wasmtime::Val::F32(b)) => { 216 | let a: f32 = unsafe { std::mem::transmute(a) }; 217 | let b: f32 = unsafe { std::mem::transmute(b) }; 218 | assert!(a == b || (a.is_nan() && b.is_nan())); 219 | } 220 | (wasmtime::Val::F64(a), wasmtime::Val::F64(b)) => { 221 | let a: f64 = unsafe { std::mem::transmute(a) }; 222 | let b: f64 = unsafe { std::mem::transmute(b) }; 223 | assert!(a == b || (a.is_nan() && b.is_nan())); 224 | } 225 | (wasmtime::Val::ExternRef(_), wasmtime::Val::ExternRef(_)) => continue, 226 | (wasmtime::Val::FuncRef(_), wasmtime::Val::FuncRef(_)) => continue, 227 | _ => unreachable!("mismatched types"), 228 | } 229 | } 230 | } 231 | } 232 | } 233 | }); 234 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/instrument.rs: -------------------------------------------------------------------------------- 1 | //! Fuzz the instrumentation pass. 2 | 3 | #![no_main] 4 | 5 | use libfuzzer_sys::{ 6 | arbitrary::{self, Arbitrary}, 7 | fuzz_target, 8 | }; 9 | use winliner::{InstrumentationStrategy, Instrumenter}; 10 | 11 | #[derive(Arbitrary, Debug)] 12 | struct InstrumenterOptions { 13 | allow_table_mutation: bool, 14 | allow_arbitrary_element_offsets: bool, 15 | allow_table_imports: bool, 16 | three_globals: bool, 17 | } 18 | 19 | #[derive(Arbitrary, Debug)] 20 | struct FuzzInput { 21 | module: wasm_smith::ConfiguredModule, 22 | instrumenter: InstrumenterOptions, 23 | } 24 | 25 | impl FuzzInput { 26 | fn instrumenter(&self) -> Instrumenter { 27 | let mut i = Instrumenter::new(); 28 | i.allow_table_mutation(self.instrumenter.allow_table_mutation); 29 | i.allow_arbitrary_element_offsets(self.instrumenter.allow_arbitrary_element_offsets); 30 | i.allow_table_imports(self.instrumenter.allow_table_imports); 31 | i.strategy(if self.instrumenter.three_globals { 32 | InstrumentationStrategy::ThreeGlobals 33 | } else { 34 | InstrumentationStrategy::HostCalls 35 | }); 36 | i 37 | } 38 | } 39 | 40 | fuzz_target!(|input: FuzzInput| { 41 | let wasm = input.module.module.to_bytes(); 42 | let instrumenter = input.instrumenter(); 43 | let _ = instrumenter.instrument(&wasm); 44 | }); 45 | -------------------------------------------------------------------------------- /src/bin/winliner.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{ensure, Context, Result}; 2 | use clap::Parser; 3 | use std::io::Write; 4 | use std::path::PathBuf; 5 | use wasmtime_wasi::WasiCtx; 6 | use winliner::{InstrumentationStrategy, Instrumenter, Optimizer, Profile, ProfileBuilder}; 7 | 8 | /// Winliner: The WebAssembly indirect call inliner! 9 | /// 10 | /// Winliner speculatively inlines indirect calls in WebAssembly, based on 11 | /// observed information from a previous profiling phase. This is a form of 12 | /// profile-guided optimization (PGO) that we affectionately call *winlining*. 13 | /// 14 | /// First, you ask Winliner to instrument your Wasm program to observe the 15 | /// actual target callee of every indirect call site: 16 | /// 17 | /// $ winliner instrument my-program.wasm > my-program.instrumented.wasm 18 | /// 19 | /// Next, you run the instrumented program, building up a profile. This can 20 | /// either be done in your Wasm environment of choice (e.g. the Web) with a 21 | /// little glue code to extract and shepherd out the profile, or you can run 22 | /// within Winliner itself and the Wasmtime-based WASI environment that comes 23 | /// with it: 24 | /// 25 | /// $ winliner profile my-program.instrumented.wasm > profile.json 26 | /// 27 | /// Finally, you invoke Winliner again, this time providing it with the recorded 28 | /// profile, and it optimizes your Wasm program based on the behavior observed 29 | /// in that profile: 30 | /// 31 | /// $ winliner optimize --profile profile.json my-program.wasm > my-program.winlined.wasm 32 | /// 33 | /// You can also use Winliner as a library, rather than a CLI tool, if you 34 | /// prefer. See https://docs.rs/winliner for details. 35 | #[derive(Parser)] 36 | #[clap(author, version, verbatim_doc_comment)] 37 | enum Command { 38 | Instrument(InstrumentCommand), 39 | Profile(ProfileCommand), 40 | Merge(MergeCommand), 41 | Optimize(OptimizeCommand), 42 | } 43 | 44 | fn main() -> Result<()> { 45 | env_logger::init(); 46 | match Command::parse() { 47 | Command::Instrument(i) => instrument(i), 48 | Command::Profile(p) => profile(p), 49 | Command::Merge(m) => merge(m), 50 | Command::Optimize(o) => optimize(o), 51 | } 52 | } 53 | 54 | #[derive(Parser)] 55 | struct InstrumentCommand { 56 | #[clap(flatten)] 57 | instrumenter: Instrumenter, 58 | 59 | /// Where to write the instrumented output Wasm file to. The output is 60 | /// written to stdout if no path is supplied. 61 | #[clap(short, long)] 62 | output: Option, 63 | 64 | /// The Wasm file to be instrumented. 65 | input: PathBuf, 66 | } 67 | 68 | fn instrument(command: InstrumentCommand) -> Result<()> { 69 | let input = std::fs::read(&command.input) 70 | .with_context(|| format!("failed to read input from '{}'", command.input.display()))?; 71 | 72 | let output = command.instrumenter.instrument(&input)?; 73 | 74 | if let Some(path) = command.output.as_ref() { 75 | std::fs::write(path, &output) 76 | .with_context(|| format!("failed to write output to '{}'", path.display()))?; 77 | } else { 78 | let stdout = std::io::stdout(); 79 | let mut stdout = stdout.lock(); 80 | stdout 81 | .write_all(&output) 82 | .context("failed to write output to stdout")?; 83 | } 84 | 85 | Ok(()) 86 | } 87 | 88 | /// Execute an instrumented Wasm program and collect profiling data. 89 | /// 90 | /// This command will set up a WASI environment, invoke the Wasm's exported 91 | /// `_start` function, collect a profile of the execution, and finally write the 92 | /// profile out to the configured destination. 93 | #[derive(Parser)] 94 | struct ProfileCommand { 95 | /// The strategy with which the Wasm was instrumented. 96 | #[clap(short, long, default_value = "three-globals")] 97 | strategy: InstrumentationStrategy, 98 | 99 | /// Which file system directories should be made available in the Wasm 100 | /// guest? 101 | /// 102 | /// None are available by default. 103 | #[clap(long = "dir", value_name = "directory")] 104 | dirs: Vec, 105 | 106 | /// Which guest directories should be mapped to a host directory? 107 | /// 108 | /// The `--mapdir` option differs from `--dir` in that it allows giving a 109 | /// custom guest name to the directory that is different from its name in 110 | /// the host. 111 | /// 112 | /// None are mapped by default. 113 | #[clap( 114 | long = "mapdir", 115 | value_name = "GUEST_DIR::HOST_DIR", 116 | value_parser = parse_map_dirs, 117 | )] 118 | map_dirs: Vec<(PathBuf, PathBuf)>, 119 | 120 | /// Make the Wasm guest inherit `stdin`, `stderr`, and `stdout`. 121 | #[clap(long)] 122 | inherit_stdio: bool, 123 | 124 | /// Make the Wasm guest inherit environment variables. 125 | #[clap(long)] 126 | inherit_env: bool, 127 | 128 | /// Where to write the resulting profile to. The profile is written to 129 | /// stdout by default if no path is supplied. 130 | #[clap(short, long)] 131 | output: Option, 132 | 133 | /// The path of the instrumented Wasm module. 134 | #[clap(value_name = "INSTRUMENTED_WASM")] 135 | wasm: PathBuf, 136 | 137 | /// Arguments passed through to Wasm. 138 | #[clap(last = true)] 139 | wasm_args: Vec, 140 | } 141 | 142 | fn profile(command: ProfileCommand) -> Result<()> { 143 | use wasmtime::*; 144 | 145 | let engine = Engine::default(); 146 | let module = Module::from_file(&engine, &command.wasm)?; 147 | 148 | let mut linker = Linker::new(&engine); 149 | wasmtime_wasi::add_to_linker( 150 | &mut linker, 151 | |(_, ctx): &mut (Option, WasiCtx)| ctx, 152 | )?; 153 | 154 | if command.strategy == InstrumentationStrategy::HostCalls { 155 | linker.func_wrap( 156 | "winliner", 157 | "add_indirect_call", 158 | |mut caller: Caller<(Option, WasiCtx)>, callee, call_site| { 159 | let builder = caller.data_mut().0.as_mut().unwrap(); 160 | builder.add_indirect_call(callee, call_site); 161 | }, 162 | )?; 163 | } 164 | 165 | let wasi_ctx = wasi_context(&command)?; 166 | let mut store = Store::new( 167 | &engine, 168 | ( 169 | match command.strategy { 170 | InstrumentationStrategy::ThreeGlobals => None, 171 | InstrumentationStrategy::HostCalls => Some(ProfileBuilder::new()), 172 | }, 173 | wasi_ctx, 174 | ), 175 | ); 176 | 177 | let instance = linker.instantiate(&mut store, &module)?; 178 | let start = instance 179 | .get_typed_func::<(), ()>(&mut store, "_start") 180 | .context("Wasm module must export a `_start: [] -> []` function")?; 181 | start 182 | .call(&mut store, ()) 183 | .context("Error when executing Wasm")?; 184 | 185 | let profile = 186 | match command.strategy { 187 | InstrumentationStrategy::ThreeGlobals => Profile::from_three_globals(|name| { 188 | match instance.get_global(&mut store, name)?.get(&mut store) { 189 | Val::I32(x) => Some(x as u32 as u64), 190 | Val::I64(x) => Some(x as u64), 191 | _ => None, 192 | } 193 | })?, 194 | InstrumentationStrategy::HostCalls => store.data_mut().0.take().unwrap().build(), 195 | }; 196 | 197 | let stdout; 198 | let output_name; 199 | let output: Box = match command.output.as_ref() { 200 | Some(path) => { 201 | output_name = format!("'{}'", path.display()); 202 | Box::new(std::io::BufWriter::new(std::fs::File::create(path)?)) 203 | } 204 | None => { 205 | ensure!( 206 | !command.inherit_stdio, 207 | "Cannot both inherit stdio and print profile to stdout; either do not pass \ 208 | `--inherit-stdio`, or specify an output file for the profile with `-o my-file`." 209 | ); 210 | output_name = "stdout".to_string(); 211 | stdout = std::io::stdout(); 212 | let stdout = stdout.lock(); 213 | Box::new(stdout) 214 | } 215 | }; 216 | 217 | serde_json::to_writer(output, &profile) 218 | .with_context(|| format!("failed to write profile to {output_name}"))?; 219 | 220 | Ok(()) 221 | } 222 | 223 | fn parse_map_dirs(s: &str) -> Result<(PathBuf, PathBuf)> { 224 | let parts: Vec<&str> = s.split("::").collect(); 225 | if parts.len() != 2 { 226 | anyhow::bail!("`--map-dir` values must contain exactly one double colon ('::')"); 227 | } 228 | Ok((parts[0].into(), parts[1].into())) 229 | } 230 | 231 | fn wasi_context(command: &ProfileCommand) -> Result { 232 | let mut ctx = wasi_cap_std_sync::WasiCtxBuilder::new(); 233 | 234 | if command.inherit_stdio { 235 | ctx = ctx.inherit_stdio(); 236 | } 237 | 238 | if command.inherit_env { 239 | ctx = ctx.inherit_env()?; 240 | } 241 | 242 | for dir in &command.dirs { 243 | log::info!("Preopening directory: {}", dir.display()); 244 | let preopened = wasmtime_wasi::sync::Dir::open_ambient_dir( 245 | dir, 246 | wasmtime_wasi::sync::ambient_authority(), 247 | ) 248 | .with_context(|| format!("failed to open directory: {}", dir.display()))?; 249 | ctx = ctx.preopened_dir(preopened, dir)?; 250 | } 251 | 252 | for (guest_dir, host_dir) in &command.map_dirs { 253 | log::info!( 254 | "Preopening directory: {}::{}", 255 | guest_dir.display(), 256 | host_dir.display() 257 | ); 258 | let preopened = wasmtime_wasi::sync::Dir::open_ambient_dir( 259 | host_dir, 260 | wasmtime_wasi::sync::ambient_authority(), 261 | ) 262 | .with_context(|| format!("failed to open directory: {}", host_dir.display()))?; 263 | ctx = ctx.preopened_dir(preopened, guest_dir)?; 264 | } 265 | 266 | ctx = ctx.args(&command.wasm_args)?; 267 | 268 | Ok(ctx.build()) 269 | } 270 | 271 | /// Merge multiple profiles into a single profile. 272 | #[derive(Parser)] 273 | struct MergeCommand { 274 | /// Where to write the resulting merged profile to. The merged profile is 275 | /// written to stdout by default if no output path is supplied. 276 | #[clap(short, long)] 277 | output: Option, 278 | 279 | /// The profiles to merge together. 280 | profiles: Vec, 281 | } 282 | 283 | fn merge(command: MergeCommand) -> Result<()> { 284 | let mut merged = Profile::default(); 285 | 286 | for path in &command.profiles { 287 | let file = std::fs::File::open(path) 288 | .with_context(|| format!("failed to open '{}'", path.display()))?; 289 | let file = std::io::BufReader::new(file); 290 | let profile = serde_json::from_reader(file) 291 | .with_context(|| format!("failed to read profile from '{}'", path.display()))?; 292 | merged.merge(&profile); 293 | } 294 | 295 | let stdout; 296 | let output_name; 297 | let output: Box = match command.output.as_ref() { 298 | Some(path) => { 299 | output_name = format!("'{}'", path.display()); 300 | Box::new(std::io::BufWriter::new(std::fs::File::create(path)?)) 301 | } 302 | None => { 303 | output_name = "stdout".to_string(); 304 | stdout = std::io::stdout(); 305 | let stdout = stdout.lock(); 306 | Box::new(stdout) 307 | } 308 | }; 309 | 310 | serde_json::to_writer(output, &merged) 311 | .with_context(|| format!("failed to write merged profile to {output_name}"))?; 312 | 313 | Ok(()) 314 | } 315 | 316 | /// Optimize a Wasm program based on profiling data. 317 | /// 318 | /// You must ensure that: 319 | /// 320 | /// 1. The given Wasm must be the original, uninstrumented Wasm program. 321 | /// 322 | /// 2. The profile must have been created from an instrumented version of 323 | /// this Wasm program. 324 | /// 325 | /// Failure to satisfy these requirements may result in a mis-optimized Wasm 326 | /// binary that has divergent behavior from the original Wasm program. 327 | #[derive(Parser)] 328 | struct OptimizeCommand { 329 | #[clap(flatten)] 330 | optimizer: Optimizer, 331 | 332 | /// Where to write the resulting optimzed Wasm to. The optimized Wasm is 333 | /// written to stdout by default if no output path is supplied. 334 | #[clap(short, long)] 335 | output: Option, 336 | 337 | /// The profiling data. 338 | #[clap(short, long, required = true)] 339 | profile: PathBuf, 340 | 341 | /// The original, uninstrumented Wasm program. 342 | wasm: PathBuf, 343 | } 344 | 345 | fn optimize(command: OptimizeCommand) -> Result<()> { 346 | let file = std::fs::File::open(&command.profile) 347 | .with_context(|| format!("failed to open '{}'", command.profile.display()))?; 348 | let profile: Profile = serde_json::from_reader(file).with_context(|| { 349 | format!( 350 | "failed to read profile from '{}'", 351 | command.profile.display() 352 | ) 353 | })?; 354 | 355 | let wasm = std::fs::read(&command.wasm) 356 | .with_context(|| format!("failed to read '{}'", command.wasm.display()))?; 357 | 358 | let output = command.optimizer.optimize(&profile, &wasm)?; 359 | 360 | if let Some(path) = command.output.as_ref() { 361 | std::fs::write(path, &output) 362 | .with_context(|| format!("failed to write output to '{}'", path.display()))?; 363 | } else { 364 | let stdout = std::io::stdout(); 365 | let mut stdout = stdout.lock(); 366 | stdout 367 | .write_all(&output) 368 | .context("failed to write output to stdout")?; 369 | } 370 | 371 | Ok(()) 372 | } 373 | -------------------------------------------------------------------------------- /src/convert.rs: -------------------------------------------------------------------------------- 1 | //! Conversions from `wasmparser` types to `wasm-encoder` types. 2 | 3 | use anyhow::{bail, Result}; 4 | 5 | pub fn const_expr_with_func_delta( 6 | const_expr: wasmparser::ConstExpr, 7 | func_delta: u32, 8 | ) -> Result { 9 | let mut ops = const_expr.get_operators_reader().into_iter(); 10 | 11 | let result = match ops.next() { 12 | Some(Ok(wasmparser::Operator::I32Const { value })) => { 13 | wasm_encoder::ConstExpr::i32_const(value) 14 | } 15 | Some(Ok(wasmparser::Operator::I64Const { value })) => { 16 | wasm_encoder::ConstExpr::i64_const(value) 17 | } 18 | Some(Ok(wasmparser::Operator::F32Const { value })) => { 19 | wasm_encoder::ConstExpr::f32_const(value.bits() as _) 20 | } 21 | Some(Ok(wasmparser::Operator::F64Const { value })) => { 22 | wasm_encoder::ConstExpr::f64_const(value.bits() as _) 23 | } 24 | Some(Ok(wasmparser::Operator::V128Const { value })) => { 25 | wasm_encoder::ConstExpr::v128_const(i128::from_le_bytes(*value.bytes())) 26 | } 27 | Some(Ok(wasmparser::Operator::RefNull { hty })) => { 28 | wasm_encoder::ConstExpr::ref_null(hty.into()) 29 | } 30 | Some(Ok(wasmparser::Operator::RefFunc { function_index })) => { 31 | wasm_encoder::ConstExpr::ref_func(function_index + func_delta) 32 | } 33 | Some(Ok(wasmparser::Operator::GlobalGet { global_index })) => { 34 | wasm_encoder::ConstExpr::global_get(global_index) 35 | } 36 | 37 | // TODO: support the extended-const proposal. 38 | Some(Ok(op)) => bail!("invalid const expression: {op:?}"), 39 | 40 | Some(Err(e)) => return Err(e.into()), 41 | None => bail!("empty const expression is invalid"), 42 | }; 43 | 44 | match (ops.next(), ops.next()) { 45 | (Some(Ok(wasmparser::Operator::End)), None) => Ok(result), 46 | _ => bail!("invalid const expression"), 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/counters.rs: -------------------------------------------------------------------------------- 1 | //! Reading the correct-versus-incorrect counters from an optimized instance. 2 | 3 | use anyhow::{anyhow, ensure, Result}; 4 | 5 | /// Feedback counters for how often speculative inlining guesses were correct or 6 | /// incorrect. 7 | /// 8 | /// After speculatively inlining a callee at a `call_indirect` site, you may 9 | /// want to know whether you speculated correctly in practice or not. Is the 10 | /// training set reflective of your real world workloads? Do your past recorded 11 | /// profiles match current behavior? 12 | /// 13 | /// This type counts how often each `call_indirect`'s target was correctly or 14 | /// incorrectly guessed. 15 | /// 16 | /// Construction of a `FeedbackCounters` relies on the 17 | /// [`emit_feedback_counters`][crate::Optimizer::emit_feedback_counters] option 18 | /// being enabled when you generated the optimized Wasm. If they were not 19 | /// enabled, then you'll get an empty set of counters. 20 | /// 21 | /// ## Serializing and Deserializing `FeedbackCounters` 22 | /// 23 | /// When the `serde` cargo feature is enabled, `FeedbackCounters` implements 24 | /// `serde::Serialize` and `serde::Deserialize`: 25 | /// 26 | /// ``` 27 | /// # fn foo() -> anyhow::Result<()> { 28 | /// #![cfg(feature = "serde")] 29 | /// 30 | /// use winliner::FeedbackCounters; 31 | /// 32 | /// // Read counters in from disk. 33 | /// let file = std::fs::File::open("path/to/my/counters.json")?; 34 | /// let my_counters: FeedbackCounters = serde_json::from_reader(file)?; 35 | /// 36 | /// // Write counters out to disk. 37 | /// let file = std::fs::File::create("path/to/new/counters.json")?; 38 | /// serde_json::to_writer(file, &my_counters)?; 39 | /// # Ok(()) } 40 | /// ``` 41 | #[derive(Default)] 42 | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 43 | pub struct FeedbackCounters { 44 | counters: Vec, 45 | total_correct: u64, 46 | total_incorrect: u64, 47 | } 48 | 49 | /// How often a single speculative inlining call site was guessed correctly or 50 | /// incorrectly. 51 | /// 52 | /// See [`FeedbackCounters`][crate::FeedbackCounters] for more details. 53 | #[derive(Clone, Copy)] 54 | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 55 | pub struct FeedbackCounter { 56 | correct: u64, 57 | incorrect: u64, 58 | } 59 | 60 | impl FeedbackCounters { 61 | /// Extract counters from an optimized Wasm program. 62 | /// 63 | /// The program must have been optimized by Winliner with the 64 | /// [`emit_feedback_counters`][crate::Optimizer::emit_feedback_counters] 65 | /// option enabled. 66 | /// 67 | /// To avoid a public dependency on any particular version of Wasmtime (or 68 | /// any other Wasm runtime for that matter) this method takes a callback 69 | /// function to read a global (by name) from a Wasm instance instead of 70 | /// taking the Wasm instance as a parameter directly. It is up to callers to 71 | /// implement this callback function for their Wasm runtime. The callback 72 | /// function must be able to read `i64`-typed Wasm globals. 73 | /// 74 | /// # Example 75 | /// 76 | /// ``` 77 | /// # fn foo() -> wasmtime::Result<()> { 78 | /// use wasmtime::{Instance, Module, Store, Val}; 79 | /// use winliner::FeedbackCounters; 80 | /// 81 | /// // Instantiate your optimized Wasm module. 82 | /// let mut store = Store::<()>::default(); 83 | /// let module = Module::from_file(store.engine(), "path/to/optimized.wasm")?; 84 | /// let instance = Instance::new(&mut store, &module, &[])?; 85 | /// 86 | /// // Run the Wasm instance, call its exports, etc... 87 | /// # let run = |_, _| -> wasmtime::Result<()> { Ok(()) }; 88 | /// run(&mut store, instance)?; 89 | /// 90 | /// // Extract the counters from the instance. 91 | /// let counters = FeedbackCounters::from_instance(|name| { 92 | /// match instance.get_global(&mut store, name)?.get(&mut store) { 93 | /// Val::I64(x) => Some(x as u64), 94 | /// _ => None, 95 | /// } 96 | /// })?; 97 | /// # Ok(()) 98 | /// # } 99 | /// ``` 100 | pub fn from_instance(mut read_global: impl FnMut(&str) -> Option) -> Result { 101 | let mut counters = vec![]; 102 | let mut total_correct = 0_u64; 103 | let mut total_incorrect = 0_u64; 104 | 105 | for i in 0.. { 106 | let correct = match read_global(&format!("__winliner_counter_{i}_correct")) { 107 | Some(x) => x, 108 | None => break, 109 | }; 110 | total_correct = total_correct.saturating_add(correct); 111 | 112 | let incorrect = 113 | read_global(&format!("__winliner_counter_{i}_incorrect")).ok_or_else(|| { 114 | anyhow!("Failed to read `__winliner_counter_{i}_incorrect` global") 115 | })?; 116 | total_incorrect = total_incorrect.saturating_add(incorrect); 117 | 118 | counters.push(FeedbackCounter { correct, incorrect }); 119 | } 120 | 121 | Ok(FeedbackCounters { 122 | counters, 123 | total_correct, 124 | total_incorrect, 125 | }) 126 | } 127 | 128 | /// Merge another set of counters into this one. 129 | /// 130 | /// The `other` counters are merged into `self`. 131 | /// 132 | /// # Example 133 | /// 134 | /// ``` 135 | /// # fn foo() -> anyhow::Result<()> { 136 | /// use wasmtime::{Engine, Module}; 137 | /// use winliner::FeedbackCounters; 138 | /// 139 | /// // Load the optimized Wasm module. 140 | /// let engine = Engine::default(); 141 | /// let module = Module::from_file(&engine, "path/to/optimized.wasm")?; 142 | /// 143 | /// // Run the Wasm a couple times. 144 | /// # let run_and_get_counters = |_| -> anyhow::Result { unimplemented!() }; 145 | /// let mut counters1: FeedbackCounters = run_and_get_counters(&module)?; 146 | /// let counters2: FeedbackCounters = run_and_get_counters(&module)?; 147 | /// 148 | /// // Finally, combine the two sets of counters into a single set. 149 | /// counters1.merge(&counters2); 150 | /// # Ok(()) } 151 | /// ``` 152 | pub fn merge(&mut self, other: &Self) -> Result<()> { 153 | ensure!( 154 | self.counters.len() == other.counters.len(), 155 | "incompatible counters: generated from different Wasm modules" 156 | ); 157 | 158 | for (me, them) in self.counters.iter_mut().zip(&other.counters) { 159 | me.correct = me.correct.saturating_add(them.correct); 160 | me.incorrect += me.incorrect.saturating_add(them.incorrect); 161 | } 162 | 163 | self.total_correct = self.total_correct.saturating_add(other.total_correct); 164 | self.total_incorrect = self.total_incorrect.saturating_add(other.total_incorrect); 165 | 166 | Ok(()) 167 | } 168 | 169 | /// Get each counter in this set. 170 | /// 171 | /// You can use this to check for whether any speculative inlining has too 172 | /// high of an incorrect guess rate. 173 | pub fn counters(&self) -> &[FeedbackCounter] { 174 | &self.counters 175 | } 176 | 177 | /// Get the total number of calls represented by this set of counters. 178 | pub fn total(&self) -> u64 { 179 | self.total_correct.saturating_add(self.total_incorrect) 180 | } 181 | 182 | /// Get the total number of times we correctly guessed the callee in this 183 | /// set of counters. 184 | pub fn total_correct(&self) -> u64 { 185 | self.total_correct 186 | } 187 | 188 | /// Get the total number of times we incorrectly guessed the callee in this 189 | /// set of counters. 190 | pub fn total_incorrect(&self) -> u64 { 191 | self.total_incorrect 192 | } 193 | 194 | /// Get the total ratio of correct guesses in this set of counters. 195 | /// 196 | /// Returns `None` when `self.total() == 0`. 197 | pub fn total_correct_ratio(&self) -> Option { 198 | if self.total() > 0 { 199 | Some(self.total_correct as f64 / self.total() as f64) 200 | } else { 201 | None 202 | } 203 | } 204 | 205 | /// Get the total ratio of incorrect guesses in this set of counters. 206 | /// 207 | /// Returns `None` when `self.total() == 0`. 208 | pub fn total_incorrect_ratio(&self) -> Option { 209 | if self.total() > 0 { 210 | Some(self.total_incorrect as f64 / self.total() as f64) 211 | } else { 212 | None 213 | } 214 | } 215 | } 216 | 217 | impl FeedbackCounter { 218 | /// The number of times we guessed correctly for this speculative inlining. 219 | pub fn correct(&self) -> u64 { 220 | self.correct 221 | } 222 | 223 | /// The number of times we guessed incorrectly for this speculative 224 | /// inlining. 225 | pub fn incorrect(&self) -> u64 { 226 | self.incorrect 227 | } 228 | 229 | /// The total number of calls, correct or incorrect, to this counter's call 230 | /// site. 231 | pub fn total(&self) -> u64 { 232 | self.correct.saturating_add(self.incorrect) 233 | } 234 | 235 | /// The ratio of correct guesses. 236 | /// 237 | /// Returns `None` when `self.total() == 0`. 238 | pub fn correct_ratio(&self) -> Option { 239 | if self.total() > 0 { 240 | Some(self.correct as f64 / self.total() as f64) 241 | } else { 242 | None 243 | } 244 | } 245 | 246 | /// The ratio of incorrect guesses. 247 | /// 248 | /// Returns `None` when `self.total() == 0`. 249 | pub fn incorrect_ratio(&self) -> Option { 250 | if self.total() > 0 { 251 | Some(self.incorrect as f64 / self.total() as f64) 252 | } else { 253 | None 254 | } 255 | } 256 | } 257 | -------------------------------------------------------------------------------- /src/cow_section.rs: -------------------------------------------------------------------------------- 1 | use wasm_encoder::SectionId; 2 | 3 | pub enum CowSection<'a> { 4 | Borrowed(wasm_encoder::RawSection<'a>), 5 | Owned(OwnedSection), 6 | } 7 | 8 | impl<'a> wasm_encoder::Encode for CowSection<'a> { 9 | fn encode(&self, sink: &mut Vec) { 10 | match self { 11 | CowSection::Borrowed(b) => b.encode(sink), 12 | CowSection::Owned(o) => o.encode(sink), 13 | } 14 | } 15 | } 16 | 17 | impl<'a> wasm_encoder::Section for CowSection<'a> { 18 | fn id(&self) -> u8 { 19 | match self { 20 | CowSection::Borrowed(b) => b.id(), 21 | CowSection::Owned(o) => o.id(), 22 | } 23 | } 24 | } 25 | 26 | pub struct OwnedSection { 27 | id: u8, 28 | data: Vec, 29 | } 30 | 31 | impl wasm_encoder::Encode for OwnedSection { 32 | fn encode(&self, sink: &mut Vec) { 33 | sink.extend(&self.data); 34 | } 35 | } 36 | 37 | impl wasm_encoder::Section for OwnedSection { 38 | fn id(&self) -> u8 { 39 | self.id 40 | } 41 | } 42 | 43 | pub fn borrowed<'a, T>( 44 | new_sections: &mut Vec>, 45 | full_wasm: &'a [u8], 46 | reader: wasmparser::SectionLimited, 47 | id: SectionId, 48 | ) { 49 | let id = id as u8; 50 | log::trace!("Borrowing section {id} and leaving it unmodified"); 51 | new_sections.push(CowSection::Borrowed(wasm_encoder::RawSection { 52 | id, 53 | data: &full_wasm[reader.range()], 54 | })); 55 | } 56 | 57 | pub fn owned<'a>(new_sections: &mut Vec>, section: impl wasm_encoder::Section) { 58 | let id = section.id(); 59 | log::trace!("Adding instrumented section {id}"); 60 | let mut data = vec![]; 61 | section.encode(&mut data); 62 | new_sections.push(CowSection::Owned(OwnedSection { id, data })); 63 | } 64 | -------------------------------------------------------------------------------- /src/instrument.rs: -------------------------------------------------------------------------------- 1 | //! Instrumenting a Wasm program to observe indirect call callees. 2 | 3 | use crate::cow_section::{borrowed, owned, CowSection}; 4 | use anyhow::{bail, ensure, Error, Result}; 5 | use std::str::FromStr; 6 | use wasm_encoder::SectionId; 7 | use wasmparser::{Chunk, Payload}; 8 | 9 | #[cfg(feature = "clap")] 10 | use clap::Parser; 11 | 12 | /// Instrument a Wasm binary to collect PGO data. 13 | #[cfg_attr(feature = "clap", derive(Parser))] 14 | pub struct Instrumenter { 15 | /// Allow `funcref` tables to be mutated. 16 | /// 17 | /// By default, Winliner will reject Wasm programs that mutate `funcref` 18 | /// tables, since that can lead to divergence between the original and 19 | /// winlined version of the program. This flag lets you pinky promise that 20 | /// actually it is okay in this particular case (e.g. the program never 21 | /// indirectly calls a modified table element). 22 | #[cfg_attr(feature = "clap", clap(long))] 23 | allow_table_mutation: bool, 24 | 25 | /// Allow arbitrary table element offsets. 26 | /// 27 | /// By default, Winliner only allows constant table element offsets, so that 28 | /// it can determine exactly which function `table[N]` corresponds to. If 29 | /// Winliner doesn't definitively know which function `table[N]` is, it 30 | /// can't be sure that it is inlining the right function. This flag lets you 31 | /// pinky promise that a non-const offset table element isn't going to lead 32 | /// to divergence and misoptimization (e.g. the program never indirectly 33 | /// calls a table element that is initialized by or overwritten by a 34 | /// dynamically initialized table element). 35 | #[cfg_attr(feature = "clap", clap(long))] 36 | allow_arbitrary_element_offsets: bool, 37 | 38 | /// Allow table imports. 39 | /// 40 | /// By default, Winliner only allows locally-defined tables, and disallows 41 | /// imported tables. This is because Winliner doesn't have any insight into 42 | /// the contents of imported tables, only the contents added from local 43 | /// element segments. This flag lets you pinky promise that an imported 44 | /// table isn't going to lead to divergence and misoptimization. 45 | #[cfg_attr(feature = "clap", clap(long))] 46 | allow_table_imports: bool, 47 | 48 | /// The strategy of instrumentation to use. 49 | /// 50 | /// Choices: 51 | /// 52 | /// * three-globals: A low-overhead but imprecise instrumentation strategy 53 | /// that inserts three globals for every indirect call site. 54 | /// 55 | /// * host-calls: A precise but high-overhead strategy that inserts calls 56 | /// out to the host. 57 | /// 58 | /// See the API documentation for `winliner::InstrumentationStrategy` for 59 | /// more details. 60 | #[cfg_attr(feature = "clap", clap(short, long, default_value = "three-globals"))] 61 | strategy: InstrumentationStrategy, 62 | } 63 | 64 | impl Default for Instrumenter { 65 | fn default() -> Self { 66 | Instrumenter { 67 | allow_table_mutation: false, 68 | allow_arbitrary_element_offsets: false, 69 | allow_table_imports: false, 70 | strategy: InstrumentationStrategy::ThreeGlobals, 71 | } 72 | } 73 | } 74 | 75 | impl Instrumenter { 76 | /// Construct a new `Instrumenter`. 77 | pub fn new() -> Self { 78 | Instrumenter::default() 79 | } 80 | 81 | /// Allow `funcref` tables to be mutated. 82 | /// 83 | /// By default, Winliner will reject Wasm programs that mutate `funcref` 84 | /// tables, since that can lead to divergence between the original and 85 | /// winlined version of the program. This method lets you pinky promise that 86 | /// actually it is okay in this particular case (e.g. the program never 87 | /// indirectly calls a modified table element). 88 | pub fn allow_table_mutation(&mut self, allow: bool) -> &mut Self { 89 | self.allow_table_mutation = allow; 90 | self 91 | } 92 | 93 | /// Allow arbitrary table element offsets. 94 | /// 95 | /// By default, Winliner only allows constant table element offsets, so that 96 | /// it can determine exactly which function `table[N]` corresponds to. If 97 | /// Winliner doesn't definitively know which function `table[N]` is, it 98 | /// can't be sure that it is inlining the right function. This method lets 99 | /// you pinky promise that a non-const offset table element isn't going to 100 | /// lead to divergence and misoptimization (e.g. the program never 101 | /// indirectly calls a table element that is initialized by or overwritten 102 | /// by a dynamically initialized table element). 103 | pub fn allow_arbitrary_element_offsets(&mut self, allow: bool) -> &mut Self { 104 | self.allow_arbitrary_element_offsets = allow; 105 | self 106 | } 107 | 108 | /// Allow table imports. 109 | /// 110 | /// By default, Winliner only allows locally-defined tables, and disallows 111 | /// imported tables. This is because Winliner doesn't have any insight into 112 | /// the contents of imported tables, only the contents added from local 113 | /// element segments. This method lets you pinky promise that an imported 114 | /// table isn't going to lead to divergence and misoptimization. 115 | pub fn allow_table_imports(&mut self, allow: bool) -> &mut Self { 116 | self.allow_table_imports = allow; 117 | self 118 | } 119 | 120 | /// Configure the instrumentation strategy. 121 | /// 122 | /// See the documentation for [`InstrumentationStrategy`] for details. 123 | pub fn strategy(&mut self, strategy: InstrumentationStrategy) -> &mut Self { 124 | self.strategy = strategy; 125 | self 126 | } 127 | 128 | /// Instrument the given Wasm binary. 129 | /// 130 | /// Returns a new version of the input Wasm binary that is instrumented to 131 | /// record PGO data. 132 | pub fn instrument(&self, wasm: &[u8]) -> Result> { 133 | let full_wasm = wasm; 134 | let mut wasm = wasm; 135 | let mut parser = wasmparser::Parser::new(0); 136 | let mut validator = wasmparser::Validator::new_with_features(wasmparser::WasmFeatures { 137 | function_references: true, 138 | ..Default::default() 139 | }); 140 | 141 | // The list of `wasm_encoder` sections we will join together as the new, 142 | // instrumented Wasm binary. 143 | let mut new_sections: Vec = vec![]; 144 | 145 | // The index of the type for the host-call imported function. 146 | let mut host_call_type_index = None; 147 | // The index of the host-call imported function. 148 | let mut host_call_func_index = None; 149 | 150 | // The number of functions we have prepended to the function index 151 | // space, and need to shift all other function indices down by this 152 | // much. 153 | let num_prepended_funcs = match self.strategy { 154 | InstrumentationStrategy::ThreeGlobals => 0, 155 | InstrumentationStrategy::HostCalls => 1, 156 | }; 157 | 158 | // The new global and export sections for when we are doing the 159 | // three-global strategy. We add entries to these as we see 160 | // `call_indirect` instructions, so these have to be long-lived and we 161 | // can't just add them to `new_sections` as we see the old global and 162 | // export sections (if any even exist). 163 | let (mut new_global_section, mut new_export_section) = match self.strategy { 164 | InstrumentationStrategy::ThreeGlobals => ( 165 | Some(wasm_encoder::GlobalSection::new()), 166 | Some(wasm_encoder::ExportSection::new()), 167 | ), 168 | InstrumentationStrategy::HostCalls => (None, None), 169 | }; 170 | 171 | // The new code section, containing the instrumented code. 172 | let mut new_code_section = Some(wasm_encoder::CodeSection::new()); 173 | 174 | // The number of defined functions in this Wasm module. 175 | let mut defined_func_count = 0; 176 | // The defined function index we are currently processing. 177 | let mut current_defined_func = 0; 178 | 179 | // The number of parameters each type defines. 180 | let mut num_params_by_type = vec![]; 181 | 182 | // The type index of each defined function. 183 | let mut defined_func_types = vec![]; 184 | 185 | // The number of indirect call sites we've found. 186 | let mut num_indirect_call_sites = 0; 187 | 188 | // Helper to ensure we've added our modified type section when using the 189 | // host-calls strategy. 190 | let mut added_precise_host_types = false; 191 | let mut ensure_precise_host_type_section = 192 | |new_sections: &mut Vec<_>, 193 | types: Option, 194 | host_call_type_index: &mut Option| 195 | -> Result<()> { 196 | if added_precise_host_types { 197 | return Ok(()); 198 | } 199 | added_precise_host_types = true; 200 | owned( 201 | new_sections, 202 | precise_host_calls_new_type_section(types, host_call_type_index)?, 203 | ); 204 | Ok(()) 205 | }; 206 | 207 | // Helper to ensure we've added our modified import section when using 208 | // the host-calls strategy. 209 | let mut added_precise_host_imports = false; 210 | let mut ensure_precise_host_imports_section = 211 | |new_sections: &mut Vec<_>, 212 | imports: Option, 213 | host_call_type_index: u32, 214 | host_call_func_index: &mut Option| 215 | -> Result<()> { 216 | if added_precise_host_imports { 217 | return Ok(()); 218 | } 219 | added_precise_host_imports = true; 220 | owned( 221 | new_sections, 222 | precise_host_calls_new_import_section( 223 | imports, 224 | host_call_type_index, 225 | host_call_func_index, 226 | )?, 227 | ); 228 | Ok(()) 229 | }; 230 | 231 | loop { 232 | let (consumed, payload) = match parser.parse(wasm, /* eof = */ true)? { 233 | Chunk::NeedMoreData(_) => unreachable!(), 234 | Chunk::Parsed { consumed, payload } => (consumed, payload), 235 | }; 236 | 237 | let mut sub_validator = validator.payload(&payload)?; 238 | match payload { 239 | Payload::Version { .. } => {} 240 | 241 | Payload::CustomSection(custom) => { 242 | new_sections.push(CowSection::Borrowed(wasm_encoder::RawSection { 243 | id: SectionId::Custom as _, 244 | data: &full_wasm[custom.range()], 245 | })) 246 | } 247 | 248 | Payload::TypeSection(types) => { 249 | match self.strategy { 250 | InstrumentationStrategy::ThreeGlobals => { 251 | borrowed(&mut new_sections, full_wasm, types.clone(), SectionId::Type) 252 | } 253 | InstrumentationStrategy::HostCalls => { 254 | ensure_precise_host_type_section( 255 | &mut new_sections, 256 | Some(types.clone()), 257 | &mut host_call_type_index, 258 | )?; 259 | } 260 | } 261 | for rec_group in types.into_iter() { 262 | let rec_group = rec_group?; 263 | for ty in rec_group.types() { 264 | let num_params = match &ty.structural_type { 265 | wasmparser::StructuralType::Func(f) => f.params().len() as u32, 266 | wasmparser::StructuralType::Array(_) 267 | | wasmparser::StructuralType::Struct(_) => 0, 268 | }; 269 | num_params_by_type.push(num_params); 270 | } 271 | } 272 | } 273 | 274 | Payload::ImportSection(imports) => { 275 | for imp in imports.clone().into_iter() { 276 | if let wasmparser::TypeRef::Table(_) = imp?.ty { 277 | ensure!( 278 | self.allow_table_imports, 279 | "imported tables are disallowed and can lead to divergence between \ 280 | the original and optimized Wasm programs", 281 | ); 282 | } 283 | } 284 | match self.strategy { 285 | InstrumentationStrategy::ThreeGlobals => { 286 | borrowed(&mut new_sections, full_wasm, imports, SectionId::Import); 287 | } 288 | InstrumentationStrategy::HostCalls => { 289 | ensure_precise_host_type_section( 290 | &mut new_sections, 291 | None, 292 | &mut host_call_type_index, 293 | )?; 294 | ensure_precise_host_imports_section( 295 | &mut new_sections, 296 | Some(imports), 297 | host_call_type_index.unwrap(), 298 | &mut host_call_func_index, 299 | )?; 300 | } 301 | } 302 | } 303 | 304 | Payload::FunctionSection(funcs) => { 305 | if self.strategy == InstrumentationStrategy::HostCalls { 306 | ensure_precise_host_type_section( 307 | &mut new_sections, 308 | None, 309 | &mut host_call_type_index, 310 | )?; 311 | ensure_precise_host_imports_section( 312 | &mut new_sections, 313 | None, 314 | host_call_type_index.unwrap(), 315 | &mut host_call_func_index, 316 | )?; 317 | } 318 | borrowed( 319 | &mut new_sections, 320 | full_wasm, 321 | funcs.clone(), 322 | SectionId::Function, 323 | ); 324 | for ty_idx in funcs.into_iter() { 325 | let ty_idx = ty_idx?; 326 | defined_func_types.push(ty_idx); 327 | } 328 | } 329 | 330 | Payload::TableSection(tables) => { 331 | borrowed(&mut new_sections, full_wasm, tables, SectionId::Table) 332 | } 333 | 334 | Payload::MemorySection(memories) => { 335 | borrowed(&mut new_sections, full_wasm, memories, SectionId::Memory) 336 | } 337 | 338 | Payload::TagSection(tags) => { 339 | borrowed(&mut new_sections, full_wasm, tags, SectionId::Tag) 340 | } 341 | 342 | Payload::GlobalSection(globals) => match self.strategy { 343 | InstrumentationStrategy::ThreeGlobals => { 344 | let new_global_section = new_global_section.as_mut().unwrap(); 345 | for global in globals.into_iter() { 346 | let global = global?; 347 | new_global_section 348 | .global(global.ty.into(), &global.init_expr.try_into()?); 349 | } 350 | } 351 | InstrumentationStrategy::HostCalls => { 352 | let mut new_global_section = wasm_encoder::GlobalSection::new(); 353 | for global in globals.into_iter() { 354 | let global = global?; 355 | 356 | new_global_section.global( 357 | global.ty.into(), 358 | &crate::convert::const_expr_with_func_delta( 359 | global.init_expr, 360 | num_prepended_funcs, 361 | )?, 362 | ); 363 | } 364 | owned(&mut new_sections, new_global_section); 365 | } 366 | }, 367 | 368 | Payload::ExportSection(exports) => match self.strategy { 369 | InstrumentationStrategy::ThreeGlobals => { 370 | let new_export_section = new_export_section.as_mut().unwrap(); 371 | for export in exports.into_iter() { 372 | let export = export?; 373 | new_export_section.export( 374 | export.name, 375 | export.kind.into(), 376 | export.index, 377 | ); 378 | } 379 | } 380 | InstrumentationStrategy::HostCalls => { 381 | borrowed(&mut new_sections, full_wasm, exports, SectionId::Export) 382 | } 383 | }, 384 | 385 | Payload::StartSection { func, range: _ } => owned( 386 | &mut new_sections, 387 | wasm_encoder::StartSection { 388 | function_index: func + num_prepended_funcs, 389 | }, 390 | ), 391 | 392 | Payload::ElementSection(elements) => { 393 | for elem in elements.clone().into_iter() { 394 | let elem = elem?; 395 | if let wasmparser::ElementKind::Active { 396 | table_index: _, 397 | offset_expr, 398 | } = elem.kind 399 | { 400 | let mut ops = offset_expr.get_operators_reader().into_iter(); 401 | match (ops.next(), ops.next(), ops.next()) { 402 | ( 403 | Some(Ok(wasmparser::Operator::I32Const { .. })), 404 | Some(Ok(wasmparser::Operator::End)), 405 | None, 406 | ) => {} 407 | _ => ensure!( 408 | self.allow_arbitrary_element_offsets, 409 | "unsupported table element offset: only `i32.const N` offsets \ 410 | are allowed" 411 | ), 412 | } 413 | } 414 | } 415 | 416 | if num_prepended_funcs == 0 { 417 | borrowed(&mut new_sections, full_wasm, elements, SectionId::Element); 418 | } else { 419 | owned( 420 | &mut new_sections, 421 | precise_host_calls_new_elements_section(num_prepended_funcs, elements)?, 422 | ); 423 | } 424 | } 425 | 426 | Payload::DataCountSection { count, range: _ } => { 427 | owned(&mut new_sections, wasm_encoder::DataCountSection { count }) 428 | } 429 | 430 | Payload::CodeSectionStart { count, .. } => { 431 | defined_func_count = count; 432 | } 433 | Payload::CodeSectionEntry(body) => { 434 | match sub_validator { 435 | wasmparser::ValidPayload::Func(validator, body) => { 436 | let allocs = wasmparser::FuncValidatorAllocations::default(); 437 | let mut validator = validator.into_validator(allocs); 438 | validator.validate(&body)?; 439 | sub_validator = wasmparser::ValidPayload::Ok; 440 | } 441 | _ => unreachable!(), 442 | } 443 | 444 | let mut locals = body.get_locals_reader()?; 445 | let mut new_locals = Vec::with_capacity(locals.get_count() as usize); 446 | let mut num_locals = 0; 447 | for _ in 0..locals.get_count() { 448 | let (count, ty) = locals.read()?; 449 | num_locals += count; 450 | new_locals.push((count, ty.into())); 451 | } 452 | 453 | // Add a new temporary local for storing the current callee. 454 | let num_params = num_params_by_type 455 | [defined_func_types[current_defined_func as usize] as usize]; 456 | let current_callee_local = num_params + num_locals; 457 | new_locals.push((1, wasm_encoder::ValType::I32)); 458 | 459 | let mut new_func = wasm_encoder::Function::new(new_locals); 460 | let mut ops = body 461 | .get_operators_reader()? 462 | .into_iter_with_offsets() 463 | .peekable(); 464 | while let Some(op_and_offset) = ops.next() { 465 | let (op, offset) = op_and_offset?; 466 | 467 | match op { 468 | wasmparser::Operator::TableCopy { .. } 469 | | wasmparser::Operator::TableSet { .. } 470 | | wasmparser::Operator::TableFill { .. } 471 | | wasmparser::Operator::TableInit { .. } => { 472 | // TODO: Only do this check for funcref tables. 473 | if !self.allow_table_mutation { 474 | bail!( 475 | "Found table mutation instruction at offset {offset}: {op:?}\n\ 476 | \n\ 477 | Table mutation is disallowed, since it makes winlining \n\ 478 | unsound, and can lead to divergence between the original and\n\ 479 | winlined version of the program." 480 | ); 481 | } 482 | } 483 | 484 | wasmparser::Operator::Call { function_index } => { 485 | new_func.instruction(&wasm_encoder::Instruction::Call( 486 | function_index + num_prepended_funcs, 487 | )); 488 | continue; 489 | } 490 | wasmparser::Operator::RefFunc { function_index } => { 491 | new_func.instruction(&wasm_encoder::Instruction::RefFunc( 492 | function_index + num_prepended_funcs, 493 | )); 494 | continue; 495 | } 496 | 497 | wasmparser::Operator::CallIndirect { .. } => { 498 | let call_site_index = num_indirect_call_sites; 499 | num_indirect_call_sites += 1; 500 | 501 | match self.strategy { 502 | InstrumentationStrategy::ThreeGlobals => { 503 | let new_global_section = 504 | new_global_section.as_mut().unwrap(); 505 | let new_export_section = 506 | new_export_section.as_mut().unwrap(); 507 | 508 | let mut new_exported_global = |name: String, ty, init| { 509 | let global = new_global_section.len(); 510 | new_global_section.global(ty, &init); 511 | new_export_section.export( 512 | name.as_str(), 513 | wasm_encoder::ExportKind::Global, 514 | global, 515 | ); 516 | global 517 | }; 518 | 519 | let call_site_total_global = new_exported_global( 520 | format!("__winliner_call_site_{call_site_index}_total"), 521 | wasm_encoder::GlobalType { 522 | val_type: wasm_encoder::ValType::I64, 523 | mutable: true, 524 | }, 525 | wasm_encoder::ConstExpr::i64_const(0), 526 | ); 527 | 528 | let last_callee_global = new_exported_global( 529 | format!("__winliner_call_site_{call_site_index}_last_callee"), 530 | wasm_encoder::GlobalType { 531 | val_type: wasm_encoder::ValType::I32, 532 | mutable: true, 533 | }, 534 | wasm_encoder::ConstExpr::i32_const(-1), 535 | ); 536 | 537 | let last_callee_count_global = new_exported_global( 538 | format!("__winliner_call_site_{call_site_index}_last_callee_count"), 539 | wasm_encoder::GlobalType { 540 | val_type: wasm_encoder::ValType::I64, 541 | mutable: true, 542 | }, 543 | wasm_encoder::ConstExpr::i64_const(0), 544 | ); 545 | 546 | // Emit the following code: 547 | // 548 | // ```wat 549 | // ;; $call_site_total += 1 550 | // global.get $call_site_total 551 | // i64.const 1 552 | // i64.add 553 | // global.set $call_site_total 554 | // 555 | // ;; if $last_callee != $current_callee { 556 | // ;; $last_callee = $current_callee 557 | // ;; $last_callee_count = 0 558 | // ;; } 559 | // local.tee $current_callee 560 | // global.get $last_callee 561 | // i32.ne 562 | // if 563 | // local.get $current_callee 564 | // global.set $last_callee 565 | // i64.const 0 566 | // global.set $last_callee_count 567 | // end 568 | // 569 | // ;; $last_callee_count += 1 570 | // global.get $last_callee_count 571 | // i64.const 1 572 | // i64.add 573 | // global.set $last_callee_count 574 | // 575 | // ;; Finally, restore the operand stack for the indirect call. 576 | // local.get $current_callee 577 | // ``` 578 | new_func 579 | .instruction(&wasm_encoder::Instruction::GlobalGet( 580 | call_site_total_global, 581 | )) 582 | .instruction(&wasm_encoder::Instruction::I64Const(1)) 583 | .instruction(&wasm_encoder::Instruction::I64Add) 584 | .instruction(&wasm_encoder::Instruction::GlobalSet( 585 | call_site_total_global, 586 | )) 587 | .instruction(&wasm_encoder::Instruction::LocalTee( 588 | current_callee_local, 589 | )) 590 | .instruction(&wasm_encoder::Instruction::GlobalGet( 591 | last_callee_global, 592 | )) 593 | .instruction(&wasm_encoder::Instruction::I32Ne) 594 | .instruction(&wasm_encoder::Instruction::If( 595 | wasm_encoder::BlockType::Empty, 596 | )) 597 | .instruction(&wasm_encoder::Instruction::LocalGet( 598 | current_callee_local, 599 | )) 600 | .instruction(&wasm_encoder::Instruction::GlobalSet( 601 | last_callee_global, 602 | )) 603 | .instruction(&wasm_encoder::Instruction::I64Const(0)) 604 | .instruction(&wasm_encoder::Instruction::GlobalSet( 605 | last_callee_count_global, 606 | )) 607 | .instruction(&wasm_encoder::Instruction::End) 608 | .instruction(&wasm_encoder::Instruction::GlobalGet( 609 | last_callee_count_global, 610 | )) 611 | .instruction(&wasm_encoder::Instruction::I64Const(1)) 612 | .instruction(&wasm_encoder::Instruction::I64Add) 613 | .instruction(&wasm_encoder::Instruction::GlobalSet( 614 | last_callee_count_global, 615 | )) 616 | .instruction(&wasm_encoder::Instruction::LocalGet( 617 | current_callee_local, 618 | )); 619 | } 620 | InstrumentationStrategy::HostCalls => { 621 | // Emit the following code: 622 | // 623 | // ``` 624 | // local.tee $current_callee 625 | // i32.const 626 | // call $winliner_add_indirect_call 627 | // local.get $current_callee 628 | // ``` 629 | new_func 630 | .instruction(&wasm_encoder::Instruction::LocalTee( 631 | current_callee_local, 632 | )) 633 | .instruction(&wasm_encoder::Instruction::I32Const( 634 | call_site_index, 635 | )) 636 | .instruction(&wasm_encoder::Instruction::Call( 637 | host_call_func_index.unwrap(), 638 | )) 639 | .instruction(&wasm_encoder::Instruction::LocalGet( 640 | current_callee_local, 641 | )); 642 | } 643 | } 644 | } 645 | _ => {} 646 | } 647 | 648 | let start = offset; 649 | 650 | // Find the start of the next instruction, aka 651 | // the end of this instruction, and copy over 652 | // this instruction's raw bytes to the new 653 | // function. 654 | let end = ops 655 | .peek() 656 | .map_or(Ok(body.range().end), |res| res.clone().map(|(_, off)| off))?; 657 | 658 | new_func.raw(full_wasm[start..end].iter().copied()); 659 | } 660 | 661 | new_code_section.as_mut().unwrap().function(&new_func); 662 | current_defined_func += 1; 663 | if current_defined_func >= defined_func_count { 664 | owned(&mut new_sections, new_code_section.take().unwrap()); 665 | } 666 | } 667 | 668 | Payload::DataSection(data) => { 669 | borrowed(&mut new_sections, full_wasm, data, SectionId::Data) 670 | } 671 | 672 | Payload::ModuleSection { .. } 673 | | Payload::InstanceSection(_) 674 | | Payload::CoreTypeSection(_) 675 | | Payload::ComponentSection { .. } 676 | | Payload::ComponentInstanceSection(_) 677 | | Payload::ComponentAliasSection(_) 678 | | Payload::ComponentTypeSection(_) 679 | | Payload::ComponentCanonicalSection(_) 680 | | Payload::ComponentStartSection { .. } 681 | | Payload::ComponentImportSection(_) 682 | | Payload::ComponentExportSection(_) => { 683 | unreachable!("component model not supported yet; disabled in validator") 684 | } 685 | 686 | Payload::UnknownSection { 687 | id, 688 | contents: _, 689 | range, 690 | } => bail!("unknown section with id {id} at range {range:?}"), 691 | 692 | Payload::End(_) => break, 693 | } 694 | 695 | match sub_validator { 696 | wasmparser::ValidPayload::Ok | wasmparser::ValidPayload::End(_) => { 697 | wasm = &wasm[consumed..]; 698 | } 699 | _ => unreachable!(), 700 | } 701 | } 702 | 703 | log::trace!("Building final instrumented module"); 704 | let mut module = wasm_encoder::Module::new(); 705 | for section in &new_sections { 706 | use wasm_encoder::Section; 707 | 708 | if new_global_section 709 | .as_ref() 710 | .map_or(false, |s| s.id() < section.id()) 711 | { 712 | let s = new_global_section.take().unwrap(); 713 | log::trace!("Appending section id: {}", s.id()); 714 | module.section(&s); 715 | } 716 | 717 | if new_export_section 718 | .as_ref() 719 | .map_or(false, |s| s.id() < section.id()) 720 | { 721 | let s = new_export_section.take().unwrap(); 722 | log::trace!("Appending section id: {}", s.id()); 723 | module.section(&s); 724 | } 725 | 726 | log::trace!("Appending section id: {}", section.id()); 727 | module.section(&*section); 728 | } 729 | Ok(module.finish()) 730 | } 731 | } 732 | 733 | fn precise_host_calls_new_type_section( 734 | types: Option, 735 | host_call_type_index: &mut Option, 736 | ) -> Result { 737 | let mut new_types = wasm_encoder::TypeSection::new(); 738 | 739 | if let Some(types) = types { 740 | for rec_group in types.into_iter() { 741 | for ty in rec_group?.into_types() { 742 | let ty: wasmparser::SubType = ty; 743 | new_types.subtype(&ty.into()); 744 | } 745 | } 746 | } 747 | 748 | *host_call_type_index = Some(new_types.len()); 749 | new_types.function([wasm_encoder::ValType::I32, wasm_encoder::ValType::I32], []); 750 | 751 | Ok(new_types) 752 | } 753 | 754 | fn precise_host_calls_new_import_section( 755 | imports: Option, 756 | host_call_type_index: u32, 757 | host_call_func_index: &mut Option, 758 | ) -> Result { 759 | let mut new_imports = wasm_encoder::ImportSection::new(); 760 | 761 | *host_call_func_index = Some(0); 762 | new_imports.import( 763 | "winliner", 764 | "add_indirect_call", 765 | wasm_encoder::EntityType::Function(host_call_type_index), 766 | ); 767 | 768 | if let Some(imports) = imports { 769 | for import in imports.into_iter() { 770 | let import = import?; 771 | new_imports.import( 772 | import.module, 773 | import.name, 774 | wasm_encoder::EntityType::from(import.ty), 775 | ); 776 | } 777 | } 778 | 779 | Ok(new_imports) 780 | } 781 | 782 | fn precise_host_calls_new_elements_section( 783 | num_prepended_funcs: u32, 784 | elements: wasmparser::ElementSectionReader, 785 | ) -> Result { 786 | let mut new_elements = wasm_encoder::ElementSection::new(); 787 | for elem in elements.into_iter() { 788 | let elem = elem?; 789 | 790 | let funcs; 791 | let exprs; 792 | let elements = match elem.items { 793 | wasmparser::ElementItems::Functions(items) => { 794 | funcs = items.into_iter().collect::, _>>()?; 795 | wasm_encoder::Elements::Functions(&funcs) 796 | } 797 | wasmparser::ElementItems::Expressions(ref_ty, items) => { 798 | exprs = items 799 | .into_iter() 800 | .map(|expr| { 801 | let expr = expr?; 802 | crate::convert::const_expr_with_func_delta(expr, num_prepended_funcs) 803 | }) 804 | .collect::, _>>()?; 805 | wasm_encoder::Elements::Expressions(ref_ty.into(), &exprs) 806 | } 807 | }; 808 | 809 | match elem.kind { 810 | wasmparser::ElementKind::Passive => { 811 | new_elements.passive(elements); 812 | } 813 | wasmparser::ElementKind::Active { 814 | table_index, 815 | offset_expr, 816 | } => { 817 | new_elements.active(table_index, &offset_expr.try_into()?, elements); 818 | } 819 | wasmparser::ElementKind::Declared => { 820 | new_elements.declared(elements); 821 | } 822 | } 823 | } 824 | Ok(new_elements) 825 | } 826 | 827 | /// The instrumentation strategy for recording profiling data. 828 | #[derive(Clone, Copy, PartialEq, Eq)] 829 | pub enum InstrumentationStrategy { 830 | /// A low-overhead but imprecise instrumentation strategy that records 831 | /// profiling information in globals. 832 | /// 833 | /// This strategy adds three globals per indirect call site: 834 | /// 835 | /// 1. The total number of calls for this call site. 836 | /// 2. The table index of the last indirect callee. 837 | /// 3. The number of times the last callee has been called. 838 | /// 839 | /// A `call_indirect` then becomes the following sequence: 840 | /// 841 | /// ```wat 842 | /// ;; $total_count += 1 843 | /// global.get $total_count 844 | /// i64.const 1 845 | /// i64.add 846 | /// global.set $total_count 847 | /// 848 | /// ;; if $last_callee != $current_callee { 849 | /// ;; $last_callee = $current_callee 850 | /// ;; $last_callee_count = 0 851 | /// ;; } 852 | /// local.tee $current_callee 853 | /// global.get $last_callee 854 | /// i32.eq 855 | /// i32.eqz 856 | /// if 857 | /// local.get $current_callee 858 | /// global.set $last_callee 859 | /// i64.const 0 860 | /// global.set $last_callee_count 861 | /// end 862 | /// 863 | /// ;; $last_callee_count += 1 864 | /// global.get $last_callee_count 865 | /// i64.const 1 866 | /// i64.add 867 | /// global.set $last_callee_count 868 | /// 869 | /// ;; Finally, do the actual indirect call. 870 | /// local.get $current_callee 871 | /// call_indirect 872 | /// ``` 873 | /// 874 | /// When `$last_callee_count / $total_count` is high enough, then winlining 875 | /// `table[$last_callee]` is beneficial. 876 | /// 877 | /// Note that this strategy is imprecise and is easily defeated by the 878 | /// following sequence of calls: 879 | /// 880 | /// * Indirect call to `f` 881 | /// * Indirect call to `f` 882 | /// * Indirect call to `f` 883 | /// * ... many, many times ... 884 | /// * Indirect call to `f` 885 | /// * Indirect call to `g` 886 | /// 887 | /// In this case, winlining `f` would be beneficial, but we don't learn that 888 | /// because the last call to `g` clears that information away. 889 | /// 890 | /// However, this instrumentation's overhead is low enough that it is 891 | /// practical to run the instrumented Wasm programs in many production 892 | /// scenarios. 893 | ThreeGlobals, 894 | 895 | /// A precise but high-overhead strategy that inserts calls out to the host. 896 | /// 897 | /// This strategy inserts a call to an imported host function before every 898 | /// `call_indirect` instruction, letting the host record precise information 899 | /// about the number of indirect calls per call site and which function was 900 | /// the callee. 901 | /// 902 | /// The imported instrumentation function has the following module, name, 903 | /// and signature: 904 | /// 905 | /// ```wat 906 | /// (import "winliner" "add_indirect_call" (func (param i32 i32))) 907 | /// ``` 908 | /// 909 | /// Each `call_indirect` is then transformed into the following sequence: 910 | /// 911 | /// ```wat 912 | /// ;; Call out to the host to record the indirect call. 913 | /// local.tee $current_callee 914 | /// i32.const 1234 ;; This is the 1234th indirect call site. 915 | /// call $winliner_add_indirect_call 916 | /// 917 | /// ;; Finally, do the actual indirect call. 918 | /// local.get $current_callee 919 | /// call_indirect 920 | /// ``` 921 | /// 922 | /// When using this strategy, it is your responsibility to provide the host 923 | /// function that this instrumentation inserts calls to, and to build up the 924 | /// profiles using the [`ProfileBuilder`][crate::ProfileBuilder] type. 925 | /// 926 | /// Note that, while this strategy yields precise profiling information, it 927 | /// incurs fairly high overheads, likely making it unacceptable to run the 928 | /// instrumented Wasm programs in production scenarios. 929 | HostCalls, 930 | } 931 | 932 | impl FromStr for InstrumentationStrategy { 933 | type Err = Error; 934 | fn from_str(s: &str) -> Result { 935 | match s { 936 | "three-globals" => Ok(InstrumentationStrategy::ThreeGlobals), 937 | "host-calls" => Ok(InstrumentationStrategy::HostCalls), 938 | _ => bail!( 939 | "Unknown instrumentation strategy '{s}'; valid strategies are: three-globals, \ 940 | host-calls" 941 | ), 942 | } 943 | } 944 | } 945 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![doc = include_str!("../README.md")] 2 | #![deny(missing_docs)] 3 | 4 | mod convert; 5 | mod counters; 6 | mod cow_section; 7 | mod instrument; 8 | mod optimize; 9 | mod profile; 10 | 11 | pub use anyhow::Result; 12 | pub use counters::{FeedbackCounter, FeedbackCounters}; 13 | pub use instrument::{InstrumentationStrategy, Instrumenter}; 14 | pub use optimize::Optimizer; 15 | pub use profile::{Profile, ProfileBuilder}; 16 | -------------------------------------------------------------------------------- /src/optimize.rs: -------------------------------------------------------------------------------- 1 | //! Optimizing a Wasm program, assuming the behavior observed in the given 2 | //! profile. 3 | 4 | use std::collections::{HashMap, HashSet}; 5 | 6 | use crate::cow_section::{borrowed, owned, CowSection}; 7 | use crate::Profile; 8 | use anyhow::{bail, ensure, Result}; 9 | use wasm_encoder::SectionId; 10 | use wasmparser::{Chunk, Payload}; 11 | 12 | #[cfg(feature = "clap")] 13 | use clap::Parser; 14 | 15 | /// Optimize a Wasm program based on profiling data. 16 | /// 17 | /// # Example 18 | /// 19 | /// ``` 20 | /// # fn foo() -> anyhow::Result<()> { 21 | /// use winliner::{Optimizer, Profile}; 22 | /// 23 | /// // Create and configure an optimizer. 24 | /// let mut optimizer = Optimizer::new(); 25 | /// optimizer 26 | /// .min_total_calls(100) 27 | /// .min_ratio(0.99)? 28 | /// .max_inline_depth(5); 29 | /// 30 | /// // Get the original, uninstrumented Wasm program. 31 | /// let wasm = std::fs::read("path/to/my.wasm")?; 32 | /// 33 | /// // Get a profile for our Wasm program from somewhere. Read it from disk, 34 | /// // record it now in this process, etc... 35 | /// let profile = Profile::default(); 36 | /// 37 | /// // Run the optimizer with the given profile! 38 | /// let optimized_wasm = optimizer.optimize(&profile, &wasm)?; 39 | /// # Ok(()) } 40 | /// ``` 41 | #[cfg_attr(feature = "clap", derive(Parser))] 42 | pub struct Optimizer { 43 | /// The minimum number of total calls for a call site before it is 44 | /// considered for winlining. 45 | #[cfg_attr(feature = "clap", clap(long, default_value = "1000"))] 46 | min_total_calls: u64, 47 | 48 | /// The minimum ratio of all calls at a call site that go to a particular 49 | /// callee before the callee is considered for winlining. Must be between 50 | /// 0.0 and 1.0. 51 | #[cfg_attr(feature = "clap", clap(long, default_value = "0.9"))] 52 | min_ratio: f64, 53 | 54 | /// The maximum inlining depth. 55 | /// 56 | /// This can help limit code size blowup from duplicating many function 57 | /// bodies during inlining. 58 | #[cfg_attr(feature = "clap", clap(long, default_value = "1"))] 59 | max_inline_depth: usize, 60 | 61 | /// Emit feedback counters for how often our speculative inlining guesses 62 | /// were correct or incorrect. 63 | /// 64 | /// When this option is enabled, we will add two globals for each call site 65 | /// where we winlined a speculative callee: 66 | /// 67 | /// 1. A counter for how many times we guessed correctly. 68 | /// 69 | /// 2. A counter for how many times we guessed incorrectly. 70 | /// 71 | /// You can extract this data when using Winliner as a library with the 72 | /// `FeedbackCounters` type. 73 | #[cfg_attr(feature = "clap", clap(long))] 74 | emit_feedback_counters: bool, 75 | 76 | /// Set the maximum optimization fuel. 77 | /// 78 | /// This option is useful for Winliner developers, and unlikely to be useful 79 | /// to anyone else. 80 | /// 81 | /// Allows bisecting optimization bugs to find which optimization site is 82 | /// buggy. 83 | #[cfg_attr(feature = "clap", clap(long))] 84 | fuel: Option, 85 | } 86 | 87 | impl Default for Optimizer { 88 | fn default() -> Self { 89 | Optimizer { 90 | min_total_calls: 1000, 91 | min_ratio: 0.9, 92 | max_inline_depth: 1, 93 | emit_feedback_counters: false, 94 | fuel: None, 95 | } 96 | } 97 | } 98 | 99 | impl Optimizer { 100 | /// Create a new, default optimizer. 101 | pub fn new() -> Self { 102 | Default::default() 103 | } 104 | 105 | /// The minimum number of total calls for a call site before it is 106 | /// considered for winlining. 107 | pub fn min_total_calls(&mut self, min: u64) -> &mut Self { 108 | self.min_total_calls = min; 109 | self 110 | } 111 | 112 | /// The minimum ratio of all calls at a call site that go to a particular 113 | /// callee before the callee is considered for winlining. 114 | /// 115 | /// Must be between 0.0 and 1.0. 116 | pub fn min_ratio(&mut self, min: f64) -> Result<&mut Self> { 117 | ensure!( 118 | 0.0 <= min && min <= 1.0, 119 | "The `min_ratio` value must be between 0.0 and 1.0", 120 | ); 121 | self.min_ratio = min; 122 | Ok(self) 123 | } 124 | 125 | /// The maximum inlining depth. 126 | /// 127 | /// This can help limit code size blowup from duplicating many function 128 | /// bodies during inlining. 129 | pub fn max_inline_depth(&mut self, max: usize) -> &mut Self { 130 | self.max_inline_depth = max; 131 | self 132 | } 133 | 134 | /// Whether to emit feedback counters for how often our speculative inlining 135 | /// guesses were correct or incorrect. 136 | /// 137 | /// When this option is enabled, we will add two globals for each call site 138 | /// where we winlined a speculative callee: 139 | /// 140 | /// 1. A counter for how many times we guessed correctly. 141 | /// 142 | /// 2. A counter for how many times we guessed incorrectly. 143 | /// 144 | /// You can extract this data using the 145 | /// [`FeedbackCounters`][crate::FeedbackCounters] type. 146 | /// 147 | /// This option is `false` by default. 148 | pub fn emit_feedback_counters(&mut self, emit: bool) -> &mut Self { 149 | self.emit_feedback_counters = emit; 150 | self 151 | } 152 | 153 | /// Set the maximum optimization fuel. 154 | /// 155 | /// This option is useful for Winliner developers, and unlikely to be useful 156 | /// to anyone else. 157 | /// 158 | /// Allows bisecting optimization bugs to find which optimization site is 159 | /// buggy. 160 | pub fn fuel(&mut self, fuel: Option) -> &mut Self { 161 | self.fuel = fuel; 162 | self 163 | } 164 | 165 | /// Optimize the given Wasm binary. 166 | /// 167 | /// Callers must ensure that: 168 | /// 169 | /// 1. The given Wasm must be the original, uninstrumented Wasm program. 170 | /// 171 | /// 2. The profile must have been created from an instrumented version of 172 | /// this Wasm program. 173 | /// 174 | /// Failure to satisfy these requirements may result in a mis-optimized Wasm 175 | /// binary that has divergent behavior from the original Wasm program. 176 | pub fn optimize(&self, profile: &Profile, wasm: &[u8]) -> Result> { 177 | // NB: Have to re-validate because the `clap`-parsed values aren't 178 | // validated upon construction. 179 | if self.min_ratio < 0.0 || 1.0 < self.min_ratio { 180 | bail!("The `--min-ratio` value must be between 0.0 and 1.0"); 181 | } 182 | 183 | let mut wasm = wasm; 184 | let mut parser = wasmparser::Parser::new(0); 185 | 186 | let mut context = OptimizeContext { 187 | fuel: self.fuel, 188 | id_counter: 0, 189 | full_wasm: wasm, 190 | profile, 191 | num_imported_funcs: 0, 192 | types: vec![], 193 | funcs: vec![], 194 | first_call_site_offset_for_func: vec![], 195 | tables: TablesInfo::default(), 196 | func_bodies: vec![], 197 | new_global_section: if self.emit_feedback_counters { 198 | Some(wasm_encoder::GlobalSection::new()) 199 | } else { 200 | None 201 | }, 202 | new_export_section: if self.emit_feedback_counters { 203 | Some(wasm_encoder::ExportSection::new()) 204 | } else { 205 | None 206 | }, 207 | }; 208 | 209 | // The list of `wasm_encoder` sections we will join together as the new, 210 | // instrumented Wasm binary. 211 | let mut new_sections: Vec = vec![]; 212 | 213 | loop { 214 | let eof = true; 215 | let (consumed, payload) = match parser.parse(wasm, eof)? { 216 | Chunk::NeedMoreData(_) => unreachable!(), 217 | Chunk::Parsed { consumed, payload } => (consumed, payload), 218 | }; 219 | 220 | match payload { 221 | Payload::Version { .. } => {} 222 | 223 | Payload::CustomSection(custom) => { 224 | new_sections.push(CowSection::Borrowed(wasm_encoder::RawSection { 225 | id: SectionId::Custom as _, 226 | data: &context.full_wasm[custom.range()], 227 | })) 228 | } 229 | 230 | Payload::TypeSection(tys) => { 231 | borrowed( 232 | &mut new_sections, 233 | context.full_wasm, 234 | tys.clone(), 235 | SectionId::Type, 236 | ); 237 | for rec_group in tys.into_iter() { 238 | for ty in rec_group?.types() { 239 | context.types.push(ty.clone()); 240 | } 241 | } 242 | } 243 | 244 | Payload::ImportSection(imports) => { 245 | borrowed( 246 | &mut new_sections, 247 | context.full_wasm, 248 | imports.clone(), 249 | SectionId::Import, 250 | ); 251 | for imp in imports.into_iter() { 252 | let imp = imp?; 253 | match imp.ty { 254 | wasmparser::TypeRef::Func(_) => context.num_imported_funcs += 1, 255 | wasmparser::TypeRef::Table(table_ty) => { 256 | context.tables.push_imported_table(&table_ty) 257 | } 258 | _ => {} 259 | } 260 | } 261 | } 262 | 263 | Payload::FunctionSection(funcs) => { 264 | borrowed( 265 | &mut new_sections, 266 | context.full_wasm, 267 | funcs.clone(), 268 | SectionId::Function, 269 | ); 270 | for func_ty in funcs.into_iter() { 271 | context.funcs.push(func_ty?); 272 | } 273 | } 274 | 275 | Payload::TableSection(ts) => { 276 | borrowed( 277 | &mut new_sections, 278 | context.full_wasm, 279 | ts.clone(), 280 | SectionId::Table, 281 | ); 282 | for table in ts.into_iter() { 283 | let table = table?; 284 | context.tables.push_defined_table(&table); 285 | } 286 | } 287 | 288 | Payload::MemorySection(memories) => borrowed( 289 | &mut new_sections, 290 | context.full_wasm, 291 | memories, 292 | SectionId::Memory, 293 | ), 294 | 295 | Payload::TagSection(tags) => { 296 | borrowed(&mut new_sections, context.full_wasm, tags, SectionId::Tag) 297 | } 298 | 299 | Payload::GlobalSection(globals) => { 300 | if self.emit_feedback_counters { 301 | let new_global_section = context.new_global_section.as_mut().unwrap(); 302 | for global in globals.into_iter() { 303 | let global = global?; 304 | new_global_section 305 | .global(global.ty.into(), &global.init_expr.try_into()?); 306 | } 307 | } else { 308 | borrowed( 309 | &mut new_sections, 310 | context.full_wasm, 311 | globals, 312 | SectionId::Global, 313 | ) 314 | } 315 | } 316 | 317 | Payload::ExportSection(exports) => { 318 | if self.emit_feedback_counters { 319 | let new_export_section = context.new_export_section.as_mut().unwrap(); 320 | for export in exports.into_iter() { 321 | let export = export?; 322 | new_export_section.export( 323 | export.name, 324 | export.kind.into(), 325 | export.index, 326 | ); 327 | } 328 | } else { 329 | borrowed( 330 | &mut new_sections, 331 | context.full_wasm, 332 | exports, 333 | SectionId::Export, 334 | ) 335 | } 336 | } 337 | 338 | Payload::StartSection { func, range: _ } => owned( 339 | &mut new_sections, 340 | wasm_encoder::StartSection { 341 | function_index: func, 342 | }, 343 | ), 344 | 345 | Payload::ElementSection(elements) => { 346 | borrowed( 347 | &mut new_sections, 348 | context.full_wasm, 349 | elements.clone(), 350 | SectionId::Element, 351 | ); 352 | for elem in elements.into_iter() { 353 | let elem = elem?; 354 | match elem.kind { 355 | wasmparser::ElementKind::Active { 356 | table_index, 357 | offset_expr, 358 | } => { 359 | let mut ops = offset_expr.get_operators_reader().into_iter(); 360 | match (ops.next(), ops.next(), ops.next()) { 361 | ( 362 | Some(Ok(wasmparser::Operator::I32Const { value })), 363 | Some(Ok(wasmparser::Operator::End)), 364 | None, 365 | ) => { 366 | context.tables[table_index.unwrap_or(0)] 367 | .add_elements(value as u32, elem.items)?; 368 | } 369 | _ => log::warn!("Ignoring non-constant element segment"), 370 | } 371 | } 372 | wasmparser::ElementKind::Passive 373 | | wasmparser::ElementKind::Declared => {} 374 | } 375 | } 376 | } 377 | 378 | Payload::DataCountSection { count, range: _ } => { 379 | owned(&mut new_sections, wasm_encoder::DataCountSection { count }) 380 | } 381 | 382 | Payload::CodeSectionStart { .. } => {} 383 | Payload::CodeSectionEntry(body) => context.func_bodies.push(body), 384 | 385 | Payload::DataSection(data) => { 386 | borrowed(&mut new_sections, context.full_wasm, data, SectionId::Data) 387 | } 388 | 389 | Payload::ModuleSection { .. } 390 | | Payload::InstanceSection(_) 391 | | Payload::CoreTypeSection(_) 392 | | Payload::ComponentSection { .. } 393 | | Payload::ComponentInstanceSection(_) 394 | | Payload::ComponentAliasSection(_) 395 | | Payload::ComponentTypeSection(_) 396 | | Payload::ComponentCanonicalSection(_) 397 | | Payload::ComponentStartSection { .. } 398 | | Payload::ComponentImportSection(_) 399 | | Payload::ComponentExportSection(_) => { 400 | unreachable!("component model not supported yet; disabled in validator") 401 | } 402 | 403 | Payload::UnknownSection { 404 | id, 405 | contents: _, 406 | range, 407 | } => bail!("unknown section with id {id} at range {range:?}"), 408 | 409 | Payload::End(_) => break, 410 | } 411 | 412 | wasm = &wasm[consumed..]; 413 | } 414 | 415 | let mut new_code_section = Some(self.optimize_func_bodies(&mut context)?); 416 | 417 | log::trace!("Building final optimized module"); 418 | let mut module = wasm_encoder::Module::new(); 419 | for section in &new_sections { 420 | use wasm_encoder::Section; 421 | 422 | if context 423 | .new_global_section 424 | .as_ref() 425 | .map_or(false, |s| s.id() < section.id()) 426 | { 427 | let s = context.new_global_section.take().unwrap(); 428 | log::trace!("Appending section id: {}", s.id()); 429 | module.section(&s); 430 | } 431 | 432 | if context 433 | .new_export_section 434 | .as_ref() 435 | .map_or(false, |s| s.id() < section.id()) 436 | { 437 | let s = context.new_export_section.take().unwrap(); 438 | log::trace!("Appending section id: {}", s.id()); 439 | module.section(&s); 440 | } 441 | 442 | if new_code_section.as_ref().map_or(false, |s| { 443 | s.id() < section.id() && section.id() != SectionId::DataCount as u8 444 | }) { 445 | let s = new_code_section.take().unwrap(); 446 | log::trace!("Appending section id: {}", s.id()); 447 | module.section(&s); 448 | } 449 | 450 | log::trace!("Appending section id: {}", section.id()); 451 | module.section(&*section); 452 | } 453 | if let Some(s) = new_code_section { 454 | module.section(&s); 455 | } 456 | Ok(module.finish()) 457 | } 458 | 459 | fn optimize_func_bodies( 460 | &self, 461 | context: &mut OptimizeContext, 462 | ) -> Result { 463 | let mut call_site_index = 0; 464 | for body in context.func_bodies.iter() { 465 | context 466 | .first_call_site_offset_for_func 467 | .push(call_site_index); 468 | for op in body.get_operators_reader()?.into_iter() { 469 | match op? { 470 | wasmparser::Operator::CallIndirect { .. } => call_site_index += 1, 471 | _ => {} 472 | } 473 | } 474 | } 475 | 476 | let mut new_code_section = wasm_encoder::CodeSection::new(); 477 | for (defined_func_index, body) in context.func_bodies.clone().into_iter().enumerate() { 478 | let func_type = context.funcs[defined_func_index]; 479 | let defined_func_index = u32::try_from(defined_func_index).unwrap(); 480 | let func = self.optimize_one_func_body(context, defined_func_index, func_type, body)?; 481 | new_code_section.function(&func); 482 | } 483 | Ok(new_code_section) 484 | } 485 | 486 | fn optimize_one_func_body<'a>( 487 | &self, 488 | context: &mut OptimizeContext<'a, '_>, 489 | defined_func_index: u32, 490 | func_type: u32, 491 | func_body: wasmparser::FunctionBody<'a>, 492 | ) -> Result { 493 | // Copy the locals to the new function. 494 | let mut locals = vec![]; 495 | let func_type = &context.types[usize::try_from(func_type).unwrap()]; 496 | let mut num_locals = match &func_type.structural_type { 497 | wasmparser::StructuralType::Func(ty) => u32::try_from(ty.params().len()).unwrap(), 498 | _ => bail!("invalid type for defined function {defined_func_index}"), 499 | }; 500 | for local in func_body.get_locals_reader()?.into_iter() { 501 | let (count, ty) = local?; 502 | num_locals += count; 503 | locals.push((count, ty.into())); 504 | } 505 | 506 | // Add our temporary local where we save a copy of indirect callee 507 | // indices. 508 | let temp_callee_local = num_locals; 509 | locals.push((1, wasm_encoder::ValType::I32)); 510 | num_locals += 1; 511 | 512 | // The instructions making up the new body of the optimized function. 513 | let mut new_insts: Vec = vec![]; 514 | 515 | // Stack of functions to copy over to the new, optimized function. The 516 | // root is the original function itself and any subsequent entries are 517 | // being inlined into it. As we find a `call_indirect` that we'd like to 518 | // winline, we push new entries, as we finish winlining we pop 519 | // enties. Once we've popped the initial entry, we are done rewriting 520 | // this function. 521 | let mut stack = vec![{ 522 | let ops = func_body 523 | .get_operators_reader()? 524 | .into_iter_with_offsets() 525 | .peekable(); 526 | StackEntry { 527 | call_site_index: context.first_call_site_offset_for_func 528 | [usize::try_from(defined_func_index).unwrap()], 529 | defined_func_index, 530 | locals_delta: 0, 531 | func_body, 532 | ops, 533 | call_indirect_info: None, 534 | counters: None, 535 | label_depth_to_return_to: None, 536 | } 537 | }]; 538 | 539 | // The set of defined function indices that are in the process of being 540 | // copyied/inlined and are on our stack. We use this to break inlining 541 | // cycles. 542 | let mut on_stack: HashSet = HashSet::from_iter(Some(defined_func_index)); 543 | 544 | while let Some(entry) = stack.last_mut() { 545 | use wasmparser::Operator; 546 | 547 | let (op, offset) = match entry.ops.next() { 548 | Some(x) => x?, 549 | None => { 550 | // If we did not just finish the outermost function, then we 551 | // need to add the fallback path for when we incorrectly 552 | // guess the callee. 553 | if let Some(info) = entry.call_indirect_info { 554 | // This is what we want `new_insts` to look like: 555 | // 556 | // ... 557 | // if 558 | // 559 | // end <---- This is last instruction 560 | // currently in `new_insts` and 561 | // everything following needs to 562 | // be inserted. 563 | // else 564 | // local.get $temp_callee_local 565 | // call_indirect 566 | // end 567 | // ... 568 | // 569 | // Except that we don't want the `end` that immediately 570 | // follows the inlined body. Note that it is there 571 | // because Wasm functions always terminate with an `end` 572 | // instruction. We don't want it in our inlined version 573 | // of the function because it would terminate the `if` 574 | // block without a chance for us to add our `else`. So 575 | // pop it now; we can reuse it for closing the `else`. 576 | let end = new_insts 577 | .pop() 578 | .expect("inlined function should have ended with `end` instruction"); 579 | 580 | new_insts.push(CowInst::Owned(wasm_encoder::Instruction::Else)); 581 | 582 | if self.emit_feedback_counters { 583 | new_insts.push(CowInst::Owned(wasm_encoder::Instruction::GlobalGet( 584 | entry.counters.unwrap().1, 585 | ))); 586 | new_insts.push(CowInst::Owned(wasm_encoder::Instruction::I64Const(1))); 587 | new_insts.push(CowInst::Owned(wasm_encoder::Instruction::I64Add)); 588 | new_insts.push(CowInst::Owned(wasm_encoder::Instruction::GlobalSet( 589 | entry.counters.unwrap().1, 590 | ))); 591 | } 592 | 593 | new_insts.push(CowInst::Owned(wasm_encoder::Instruction::LocalGet( 594 | temp_callee_local, 595 | ))); 596 | new_insts.push(CowInst::Owned(wasm_encoder::Instruction::CallIndirect { 597 | ty: info.type_index, 598 | table: info.table_index, 599 | })); 600 | new_insts.push(end); 601 | } 602 | 603 | on_stack.remove(&entry.defined_func_index); 604 | stack.pop(); 605 | continue; 606 | } 607 | }; 608 | 609 | match op { 610 | Operator::CallIndirect { 611 | type_index, 612 | table_index, 613 | table_byte: _, 614 | } => { 615 | entry.call_site_index += 1; 616 | if let Some(new_entry) = self.try_enqueue_for_winlining( 617 | context, 618 | &on_stack, 619 | &mut locals, 620 | &mut num_locals, 621 | &mut new_insts, 622 | temp_callee_local, 623 | entry.call_site_index - 1, 624 | table_index, 625 | type_index, 626 | )? { 627 | on_stack.insert(new_entry.defined_func_index); 628 | stack.push(new_entry); 629 | } else { 630 | new_insts.push(CowInst::Owned(wasm_encoder::Instruction::CallIndirect { 631 | ty: type_index, 632 | table: table_index, 633 | })); 634 | } 635 | } 636 | 637 | // `local.{get,set,tee}` instruction's need their local index adjusted. 638 | Operator::LocalGet { local_index } if entry.locals_delta != 0 => { 639 | new_insts.push(CowInst::Owned(wasm_encoder::Instruction::LocalGet( 640 | local_index + entry.locals_delta, 641 | ))) 642 | } 643 | Operator::LocalSet { local_index } if entry.locals_delta != 0 => { 644 | new_insts.push(CowInst::Owned(wasm_encoder::Instruction::LocalSet( 645 | local_index + entry.locals_delta, 646 | ))) 647 | } 648 | Operator::LocalTee { local_index } if entry.locals_delta != 0 => { 649 | new_insts.push(CowInst::Owned(wasm_encoder::Instruction::LocalTee( 650 | local_index + entry.locals_delta, 651 | ))) 652 | } 653 | 654 | // Returns inside of inlined callees need to become branches out 655 | // of the inline body -- not returns from the caller function. 656 | Operator::Return => { 657 | new_insts.push(CowInst::Owned( 658 | if let Some(n) = entry.label_depth_to_return_to { 659 | wasm_encoder::Instruction::Br(n) 660 | } else { 661 | wasm_encoder::Instruction::Return 662 | }, 663 | )); 664 | } 665 | 666 | // TODO(#9): Support `return_call` and `return_call_indirect` 667 | Operator::ReturnCall { .. } => bail!("`return_call` is not currently supported"), 668 | Operator::ReturnCallIndirect { .. } => { 669 | bail!("`return_call_indirect` is not currently supported") 670 | } 671 | 672 | // All other instructions can just be copied over! 673 | op => { 674 | // Bookkeeping to update `label_depth_to_return_to` as 675 | // necessary when we enter and exit control blocks. 676 | if let Some(n) = entry.label_depth_to_return_to.as_mut() { 677 | match op { 678 | Operator::Block { .. } 679 | | Operator::Loop { .. } 680 | | Operator::If { .. } => { 681 | *n += 1; 682 | } 683 | Operator::End => { 684 | // NB: has to be a saturating decrement because 685 | // all function bodies are terminated with an 686 | // `end` that didn't have a corresponding 687 | // `block` or whatever increment. 688 | *n = n.saturating_sub(1); 689 | } 690 | _ => {} 691 | } 692 | } 693 | 694 | let start = offset; 695 | 696 | // Find the start of the next instruction, aka 697 | // the end of this instruction, and copy over 698 | // this instruction's raw bytes to the new 699 | // function. 700 | let end = entry 701 | .ops 702 | .peek() 703 | .map_or(Ok(entry.func_body.range().end), |res| { 704 | res.clone().map(|(_, off)| off) 705 | })?; 706 | 707 | new_insts.push(CowInst::Borrowed(&context.full_wasm[start..end])); 708 | } 709 | } 710 | } 711 | 712 | let mut func = wasm_encoder::Function::new(locals); 713 | for inst in new_insts { 714 | match inst { 715 | CowInst::Borrowed(bytes) => { 716 | func.raw(bytes.iter().copied()); 717 | } 718 | CowInst::Owned(inst) => { 719 | func.instruction(&inst); 720 | } 721 | } 722 | } 723 | Ok(func) 724 | } 725 | 726 | fn try_enqueue_for_winlining<'a, 'b>( 727 | &self, 728 | context: &mut OptimizeContext<'a, 'b>, 729 | on_stack: &HashSet, 730 | locals: &mut Vec<(u32, wasm_encoder::ValType)>, 731 | num_locals: &mut u32, 732 | new_insts: &mut Vec, 733 | temp_callee_local: u32, 734 | call_site_index: u32, 735 | table_index: u32, 736 | type_index: u32, 737 | ) -> Result>> { 738 | // Check whether we have enough fuel to perform this winlining, and 739 | // consume one unit of fuel if we do. 740 | if let Some(fuel) = context.fuel.as_mut() { 741 | if *fuel == 0 { 742 | return Ok(None); 743 | } 744 | *fuel -= 1; 745 | } 746 | 747 | // If we haven't already reached our maximum inlining depth... 748 | if (on_stack.len() - 1) >= self.max_inline_depth { 749 | return Ok(None); 750 | } 751 | 752 | // ... and we have profiling information for this call site... 753 | let call_site = match context.profile.call_sites.get(&call_site_index) { 754 | Some(x) => x, 755 | None => return Ok(None), 756 | }; 757 | 758 | // ... and this call site is hot enough... 759 | if call_site.total_call_count < self.min_total_calls { 760 | return Ok(None); 761 | } 762 | 763 | // ... then get the hottest callee table index... 764 | let callee = call_site 765 | .callee_to_count 766 | .iter() 767 | .map(|(callee, count)| (*callee, *count)) 768 | .max_by(|a, b| a.1.cmp(&b.1).then(b.0.cmp(&a.0))); 769 | let (callee_index_in_table, callee_count) = match callee { 770 | Some(x) => x, 771 | None => return Ok(None), 772 | }; 773 | 774 | // ... and if that hottest callee is called often enough... 775 | let callee_ratio = callee_count as f64 / call_site.total_call_count as f64; 776 | if callee_ratio < self.min_ratio { 777 | return Ok(None); 778 | } 779 | 780 | // ... and if we statically know what function index `table[callee]` 781 | // is... 782 | let callee_func_index = match context.tables[table_index].get(callee_index_in_table) { 783 | Some(x) => x, 784 | None => return Ok(None), 785 | }; 786 | 787 | // ... and if that function index is not already on our winlining stack 788 | // (i.e. we aren't in a recursive inlining chain)... 789 | let defined_func_index = callee_func_index 790 | .checked_sub(context.num_imported_funcs) 791 | .unwrap(); 792 | if on_stack.contains(&defined_func_index) { 793 | return Ok(None); 794 | } 795 | 796 | // ... and if that function has the correct type (if this is not true 797 | // then either the profile is bogus/mismatched or every time the call 798 | // site was executed it trapped)... 799 | if context.funcs[usize::try_from(defined_func_index).unwrap()] != type_index { 800 | return Ok(None); 801 | } 802 | 803 | // ... then we can winline this callee and push it onto our stack! 804 | 805 | let locals_delta = *num_locals; 806 | let func_body = context.func_bodies[usize::try_from(defined_func_index).unwrap()].clone(); 807 | let ops = func_body 808 | .get_operators_reader()? 809 | .into_iter_with_offsets() 810 | .peekable(); 811 | 812 | // Add the first half of our speculative inlining sequence, before the 813 | // inlined body: 814 | // 815 | // local.tee $temp_callee_local 816 | // i32.const 817 | // i32.eq 818 | // if (param ...) (result ...) 819 | // local.set $callee_param_0 820 | // local.set $callee_param_1 821 | // ... 822 | // local.set $callee_param_N <------- everything here and above 823 | // 824 | // else 825 | // local.get $temp_callee_local 826 | // call_indirect 827 | // end 828 | // 829 | // Note that it might seem like nested inlining might clobber 830 | // `temp_callee_local`, but it is actually not live in the `if` body, 831 | // only in the `else` body. Therefore, if nested inlining clobbers the 832 | // local, it is actually okay, since it won't be read again be this call 833 | // site. And if it is going to be read again by this call site, then we 834 | // aren't executing the inline body, and so it can't be clobbered. 835 | new_insts.push(CowInst::Owned(wasm_encoder::Instruction::LocalTee( 836 | temp_callee_local, 837 | ))); 838 | new_insts.push(CowInst::Owned(wasm_encoder::Instruction::I32Const( 839 | callee_index_in_table as i32, 840 | ))); 841 | new_insts.push(CowInst::Owned(wasm_encoder::Instruction::I32Eq)); 842 | new_insts.push(CowInst::Owned(wasm_encoder::Instruction::If( 843 | wasm_encoder::BlockType::FunctionType(type_index), 844 | ))); 845 | 846 | // If we are emitting counters, then create the counters for this 847 | // winlining and add the increment for the correct-guess counter. 848 | let counters = if self.emit_feedback_counters { 849 | let id = context.next_id(); 850 | let new_global_section = context.new_global_section.as_mut().unwrap(); 851 | let new_export_section = context.new_export_section.as_mut().unwrap(); 852 | 853 | let correct = new_global_section.len(); 854 | new_global_section.global( 855 | wasm_encoder::GlobalType { 856 | val_type: wasm_encoder::ValType::I64, 857 | mutable: true, 858 | }, 859 | &wasm_encoder::ConstExpr::i64_const(0), 860 | ); 861 | new_export_section.export( 862 | &format!("__winliner_counter_{id}_correct"), 863 | wasm_encoder::ExportKind::Global, 864 | correct, 865 | ); 866 | 867 | let incorrect = new_global_section.len(); 868 | new_global_section.global( 869 | wasm_encoder::GlobalType { 870 | val_type: wasm_encoder::ValType::I64, 871 | mutable: true, 872 | }, 873 | &wasm_encoder::ConstExpr::i64_const(0), 874 | ); 875 | new_export_section.export( 876 | &format!("__winliner_counter_{id}_incorrect"), 877 | wasm_encoder::ExportKind::Global, 878 | incorrect, 879 | ); 880 | 881 | new_insts.push(CowInst::Owned(wasm_encoder::Instruction::GlobalGet( 882 | correct, 883 | ))); 884 | new_insts.push(CowInst::Owned(wasm_encoder::Instruction::I64Const(1))); 885 | new_insts.push(CowInst::Owned(wasm_encoder::Instruction::I64Add)); 886 | new_insts.push(CowInst::Owned(wasm_encoder::Instruction::GlobalSet( 887 | correct, 888 | ))); 889 | 890 | Some((correct, incorrect)) 891 | } else { 892 | None 893 | }; 894 | 895 | // The callee function assumes that its parameters are in 896 | // locals, but they are currently on the operand 897 | // stack. Therefore, we need to "spill" them from the 898 | // operand stack to a new set of locals in this function and 899 | // update the locals delta appropriately. 900 | let ty = match &context.types[usize::try_from(type_index).unwrap()].structural_type { 901 | wasmparser::StructuralType::Func(ty) => ty, 902 | _ => bail!("function's type must be a function type"), 903 | }; 904 | 905 | // First, create the locals for the parameters in order. 906 | for param_ty in ty.params() { 907 | locals.push((1, (*param_ty).into())); 908 | *num_locals += 1; 909 | } 910 | 911 | // Then "spill" them from the operand stack in reverse order. 912 | for local in (locals_delta..*num_locals).rev() { 913 | new_insts.push(CowInst::Owned(wasm_encoder::Instruction::LocalSet(local))); 914 | } 915 | 916 | // Finally, create any additional locals that the callee function needs. 917 | for l in func_body.get_locals_reader()?.into_iter() { 918 | let (count, ty) = l?; 919 | *num_locals += count; 920 | locals.push((count, ty.into())); 921 | } 922 | 923 | Ok(Some(StackEntry { 924 | defined_func_index, 925 | call_site_index: context.first_call_site_offset_for_func 926 | [usize::try_from(defined_func_index).unwrap()], 927 | locals_delta, 928 | func_body, 929 | ops, 930 | call_indirect_info: Some(StackEntryCallIndirectInfo { 931 | type_index, 932 | table_index, 933 | }), 934 | counters, 935 | label_depth_to_return_to: Some(0), 936 | })) 937 | } 938 | } 939 | 940 | /// Common context needed when optimizing function bodies. 941 | struct OptimizeContext<'a, 'b> { 942 | /// Optimization fuel. 943 | fuel: Option, 944 | 945 | /// A counter for generating unique identifiers for each function inlining / 946 | /// copying. 947 | /// 948 | /// Note that this is different from `StackEntry::call_site_index` since the 949 | /// same call site can be inlined multiple times due to inline chaining. 950 | id_counter: u32, 951 | 952 | /// The Wasm module's full bytes. 953 | full_wasm: &'a [u8], 954 | 955 | /// The profile for which we are optimizing with the assumption that future 956 | /// behavior will match the behavior described in this profile. 957 | profile: &'b Profile, 958 | 959 | /// The number of imported functions. 960 | num_imported_funcs: u32, 961 | 962 | /// The types index space. 963 | types: Vec, 964 | 965 | /// A map from defined function index to type. 966 | funcs: Vec, 967 | 968 | /// A map from defined function index to the call site index offset for that 969 | /// function (i.e. the count of how many `call_indirect` instructions 970 | /// appeared in the code section before this function body). 971 | first_call_site_offset_for_func: Vec, 972 | 973 | /// The static information we have about the tables present in the module. 974 | tables: TablesInfo, 975 | 976 | /// The function bodies for this Wasm module. 977 | func_bodies: Vec>, 978 | 979 | /// The new global section we are building, when we are emitting counters. 980 | new_global_section: Option, 981 | 982 | /// The new export section we are building, when we are emitting counters. 983 | new_export_section: Option, 984 | } 985 | 986 | impl OptimizeContext<'_, '_> { 987 | fn next_id(&mut self) -> u32 { 988 | let id = self.id_counter; 989 | self.id_counter += 1; 990 | id 991 | } 992 | } 993 | 994 | /// Information about a function we are currently copying/inlining. 995 | /// 996 | /// An entry can either be: 997 | /// 998 | /// * The root function which we are copying from the original module to the 999 | /// optimized module. This is always the first entry on the stack. 1000 | /// 1001 | /// * Or a function we are inlining into the next older stack entry's function 1002 | /// at a `call_indirect` call site. 1003 | struct StackEntry<'a> { 1004 | /// The defined function index of the function we are currently inlining. 1005 | defined_func_index: u32, 1006 | 1007 | /// The current `call_indirect` index we are processing. 1008 | call_site_index: u32, 1009 | 1010 | /// The delta to apply to all `local.{get,set,tee}` instructions when 1011 | /// inlining this function body. 1012 | locals_delta: u32, 1013 | 1014 | /// Information about the `call_indirect` that led to this function getting 1015 | /// inlined. This field is `Some` if and only if this stack entry is a 1016 | /// non-root stack entry. 1017 | call_indirect_info: Option, 1018 | 1019 | /// The body of the function we are currently copying/inlining. 1020 | func_body: wasmparser::FunctionBody<'a>, 1021 | 1022 | /// The iterator of operators within the function we are currently 1023 | /// copying/inlining. 1024 | ops: std::iter::Peekable>, 1025 | 1026 | /// The global indices for the counters for when we guess correctly and 1027 | /// incorrectly, respectively. 1028 | counters: Option<(u32, u32)>, 1029 | 1030 | /// When an inlined function wants to return, we don't want to actually emit 1031 | /// a `return` instruction since that will exit the root caller. Instead, we 1032 | /// need to translate that to `br N` where `N` is the number of labels in 1033 | /// this callee's inlined body. This `label_depth_to_return_to` field is 1034 | /// that `N`. 1035 | /// 1036 | /// `None` for the root caller. 1037 | label_depth_to_return_to: Option, 1038 | } 1039 | 1040 | /// Information about a `call_indirect` call site for a stack entry. 1041 | #[derive(Clone, Copy)] 1042 | struct StackEntryCallIndirectInfo { 1043 | type_index: u32, 1044 | table_index: u32, 1045 | } 1046 | 1047 | enum CowInst<'a> { 1048 | /// A raw, already-encoded instruction. 1049 | Borrowed(&'a [u8]), 1050 | 1051 | /// An owned instruction that needs to be encoded. 1052 | Owned(wasm_encoder::Instruction<'a>), 1053 | } 1054 | 1055 | /// Static information about the contents of the tables in the Wasm program. 1056 | #[derive(Default)] 1057 | struct TablesInfo { 1058 | /// Maps from table index to the associated static info about that table's 1059 | /// entries. 1060 | tables: Vec, 1061 | } 1062 | 1063 | impl std::ops::Index for TablesInfo { 1064 | type Output = TableInfo; 1065 | 1066 | fn index(&self, index: u32) -> &Self::Output { 1067 | let index = usize::try_from(index).unwrap(); 1068 | &self.tables[index] 1069 | } 1070 | } 1071 | 1072 | impl std::ops::IndexMut for TablesInfo { 1073 | fn index_mut(&mut self, index: u32) -> &mut Self::Output { 1074 | let index = usize::try_from(index).unwrap(); 1075 | &mut self.tables[index] 1076 | } 1077 | } 1078 | 1079 | impl TablesInfo { 1080 | fn push_imported_table(&mut self, _table_type: &wasmparser::TableType) { 1081 | self.tables.push(TableInfo::default()); 1082 | } 1083 | 1084 | fn push_defined_table(&mut self, _table: &wasmparser::Table) { 1085 | self.tables.push(TableInfo::default()); 1086 | } 1087 | } 1088 | 1089 | /// Static information about the contents of a table in the Wasm program. 1090 | #[derive(Default)] 1091 | struct TableInfo { 1092 | /// Maps from an index within the table to the function index of the funcref 1093 | /// in that table slot, if any. 1094 | /// 1095 | /// Empty for non-funcref tables. 1096 | /// 1097 | /// Incomplete for imported tables, if they've been configured to be 1098 | /// allowed. 1099 | /// 1100 | /// Incomplete if arbitrary, non-constant element segment offsets are 1101 | /// allowed and present in the Wasm binary. 1102 | entries: HashMap, 1103 | } 1104 | 1105 | impl TableInfo { 1106 | /// Get the function index of the element at `table[tabled_index]`, if that 1107 | /// is statically known. 1108 | fn get(&self, table_index: u32) -> Option { 1109 | self.entries.get(&table_index).copied() 1110 | } 1111 | 1112 | /// Record that the given elements live at the given static offset. 1113 | fn add_elements(&mut self, offset: u32, elements: wasmparser::ElementItems) -> Result<()> { 1114 | match elements { 1115 | wasmparser::ElementItems::Functions(funcs) => { 1116 | for (offset, func) in (offset..).zip(funcs.into_iter()) { 1117 | let func = func?; 1118 | self.entries.insert(offset, func); 1119 | } 1120 | } 1121 | wasmparser::ElementItems::Expressions(_ref_ty, exprs) => { 1122 | for (offset, expr) in (offset..).zip(exprs.into_iter()) { 1123 | let expr = expr?; 1124 | let mut ops = expr.get_operators_reader().into_iter(); 1125 | match (ops.next(), ops.next(), ops.next()) { 1126 | ( 1127 | Some(Ok(wasmparser::Operator::RefFunc { function_index })), 1128 | Some(Ok(wasmparser::Operator::End)), 1129 | None, 1130 | ) => { 1131 | self.entries.insert(offset, function_index); 1132 | } 1133 | _ => { 1134 | // Either a null funcref, or this element segment is 1135 | // for a non-funcref table. Either way, skip it. 1136 | } 1137 | } 1138 | } 1139 | } 1140 | } 1141 | Ok(()) 1142 | } 1143 | } 1144 | -------------------------------------------------------------------------------- /src/profile.rs: -------------------------------------------------------------------------------- 1 | //! Extracting profiles from instrumented Wasm programs and merging profiles 2 | //! together. 3 | 4 | use std::collections::BTreeMap; 5 | 6 | use anyhow::{anyhow, ensure, Context, Result}; 7 | 8 | /// Observed behavior about one or more Wasm executions. 9 | /// 10 | /// A `Profile` records observed `call_indirect` behavior about one or more Wasm 11 | /// executions: 12 | /// 13 | /// * How many times was each `call_indirect` executed? 14 | /// * How many times was `table[x]` called from each call site? 15 | /// * Etc... 16 | /// 17 | /// ## Constructing a `Profile` 18 | /// 19 | /// There are two primary ways to get a `Profile`, one for each instrumentation 20 | /// strategy: 21 | /// 22 | /// 1. If you instrumented your Wasm using the 23 | /// [`InstrumentationStrategy::ThreeGlobals`][crate::InstrumentationStrategy::ThreeGlobals] 24 | /// strategy, you can use the [`Profile::from_three_globals`] constructor. 25 | /// 26 | /// 2. If you instrumented your Wasm using the 27 | /// [`InstrumentationStrategy::HostCalls`][crate::InstrumentationStrategy::HostCalls] 28 | /// strategy, you can implement the `winliner.add_indirect_call` host import 29 | /// using a [`ProfileBuilder`][crate::ProfileBuilder] and then call 30 | /// [`ProfileBuilder::build`][crate::ProfileBuilder::build] to extract the 31 | /// finished profile. 32 | /// 33 | /// ## Merging `Profile`s 34 | /// 35 | /// It can be difficult to get representative profiling data from a single Wasm 36 | /// execution. Luckily, a single `Profile` can represent many different 37 | /// executions! For each profiling run, record a new `Profile` and then call 38 | /// [`Profile::merge`] to combine them into a single, aggregate `Profile`. 39 | /// 40 | /// ## Serializing and Deserializing `Profile`s 41 | /// 42 | /// When the `serde` cargo feature is enabled, `Profile` implements 43 | /// `serde::Serialize` and `serde::Deserialize`: 44 | /// 45 | /// ``` 46 | /// # fn foo() -> anyhow::Result<()> { 47 | /// #![cfg(feature = "serde")] 48 | /// 49 | /// use winliner::Profile; 50 | /// 51 | /// // Read a profile in from disk. 52 | /// let file = std::fs::File::open("path/to/my/profile.json")?; 53 | /// let my_profile: Profile = serde_json::from_reader(file)?; 54 | /// 55 | /// // Write a profile out to disk. 56 | /// let file = std::fs::File::create("path/to/new/profile.json")?; 57 | /// serde_json::to_writer(file, &my_profile)?; 58 | /// # Ok(()) } 59 | /// ``` 60 | #[derive(Clone, Debug, Default)] 61 | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 62 | pub struct Profile { 63 | // Per-call site profiling information. 64 | // 65 | // Note that a lack of profile data for a particular call site implies that 66 | // the associated `call_indirect` was never executed (or at least never 67 | // observed to have been executed: our profiling is sometimes imprecise). 68 | pub(crate) call_sites: BTreeMap, 69 | } 70 | 71 | #[derive(Clone, Debug, Default)] 72 | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 73 | pub(crate) struct CallSiteProfile { 74 | // The total count of indirect calls for this call site. 75 | pub(crate) total_call_count: u64, 76 | // The observed callees and their associated counts. Note that these counts 77 | // don't necessarily add up to `total_call_count` since we can be missing 78 | // information due to imprecise instrumentation strategies. 79 | pub(crate) callee_to_count: BTreeMap, 80 | } 81 | 82 | impl Profile { 83 | /// Extract a profile from a Wasm program that was instrumented with the 84 | /// "three-globals" strategy. 85 | /// 86 | /// To avoid a public dependency on any particular version of Wasmtime (or 87 | /// any other Wasm runtime for that matter) this method takes a callback 88 | /// function to read a global (by name) from a Wasm instance instead of 89 | /// taking the Wasm instance as a parameter directly. It is up to callers to 90 | /// implement this callback function for their Wasm runtime. The callback 91 | /// function must be able to read `i32`- and `i64`-typed Wasm globals, 92 | /// zero-extending `i32` values as necessary. 93 | /// 94 | /// # Example 95 | /// 96 | /// ``` 97 | /// # fn foo() -> wasmtime::Result<()> { 98 | /// use wasmtime::{Instance, Module, Store, Val}; 99 | /// use winliner::Profile; 100 | /// 101 | /// // Instantiate your instrumented Wasm module. 102 | /// let mut store = Store::<()>::default(); 103 | /// let module = Module::from_file(store.engine(), "path/to/instrumented.wasm")?; 104 | /// let instance = Instance::new(&mut store, &module, &[])?; 105 | /// 106 | /// // Run the Wasm instance, call its exports, etc... to gather PGO data. 107 | /// # let run = |_| -> wasmtime::Result<()> { Ok(()) }; 108 | /// run(instance)?; 109 | /// 110 | /// // Extract the profile from the instance. 111 | /// let profile = Profile::from_three_globals(|name| { 112 | /// match instance.get_global(&mut store, name)?.get(&mut store) { 113 | /// Val::I32(x) => Some(x as u32 as u64), 114 | /// Val::I64(x) => Some(x as u64), 115 | /// _ => None, 116 | /// } 117 | /// })?; 118 | /// # Ok(()) 119 | /// # } 120 | /// ``` 121 | pub fn from_three_globals(mut read_global: impl FnMut(&str) -> Option) -> Result { 122 | let mut profile = Profile::default(); 123 | 124 | for call_site_index in 0.. { 125 | let total_call_count = 126 | match read_global(&format!("__winliner_call_site_{call_site_index}_total")) { 127 | None => break, 128 | Some(x) => x, 129 | }; 130 | 131 | let last_callee = read_global(&format!( 132 | "__winliner_call_site_{call_site_index}_last_callee" 133 | )) 134 | .ok_or_else(|| { 135 | anyhow!( 136 | "Failed to read `__winliner_call_site_{call_site_index}_last_callee` global" 137 | ) 138 | })?; 139 | let last_callee = u32::try_from(last_callee).context("callee is out of bounds")?; 140 | 141 | let last_callee_count = read_global(&format!( 142 | "__winliner_call_site_{call_site_index}_last_callee_count" 143 | )) 144 | .ok_or_else(|| { 145 | anyhow!( 146 | "Failed to read `__winliner_call_site_{call_site_index}_last_callee` global" 147 | ) 148 | })?; 149 | 150 | ensure!( 151 | total_call_count >= last_callee_count, 152 | "Bogus profiling data: call site's total count is less than the last callee's call \ 153 | count", 154 | ); 155 | 156 | let mut callee_to_count = BTreeMap::new(); 157 | callee_to_count.insert(last_callee, last_callee_count); 158 | 159 | profile.call_sites.insert( 160 | call_site_index, 161 | CallSiteProfile { 162 | total_call_count, 163 | callee_to_count, 164 | }, 165 | ); 166 | } 167 | 168 | Ok(profile) 169 | } 170 | 171 | /// Merge two profiles together. 172 | /// 173 | /// The `other` profile is merged into `self`. 174 | /// 175 | /// # Example 176 | /// 177 | /// ``` 178 | /// # fn foo() -> anyhow::Result<()> { 179 | /// use wasmtime::{Engine, Module}; 180 | /// use winliner::Profile; 181 | /// 182 | /// // Load the instrumented Wasm module. 183 | /// let engine = Engine::default(); 184 | /// let module = Module::from_file(&engine, "path/to/instrumented.wasm")?; 185 | /// 186 | /// // Record a couple of PGO profiles. 187 | /// # let record_one_profile = |_| -> anyhow::Result { unimplemented!() }; 188 | /// let mut profile1 = record_one_profile(&module)?; 189 | /// let profile2 = record_one_profile(&module)?; 190 | /// 191 | /// // Finally, combine the two profiles into a single profile. 192 | /// profile1.merge(&profile2); 193 | /// # Ok(()) } 194 | /// ``` 195 | pub fn merge(&mut self, other: &Profile) { 196 | for (call_site_index, other) in other.call_sites.iter() { 197 | let call_site = self.call_sites.entry(*call_site_index).or_default(); 198 | call_site.total_call_count += other.total_call_count; 199 | for (callee, count) in other.callee_to_count.iter() { 200 | *call_site.callee_to_count.entry(*callee).or_default() += count; 201 | } 202 | } 203 | } 204 | } 205 | 206 | /// A builder for constructing [`Profile`][crate::Profile]s. 207 | /// 208 | /// Primarily for use in conjunction with 209 | /// [`InstrumentationStrategy::HostCalls`][crate::InstrumentationStrategy::HostCalls] 210 | /// and implementing the `winliner.add_indirect_call` import function for the 211 | /// instrumented Wasm. 212 | /// 213 | /// # Example 214 | /// 215 | /// ``` 216 | /// use winliner::ProfileBuilder; 217 | /// 218 | /// // Create a new builder. 219 | /// let mut builder = ProfileBuilder::new(); 220 | /// 221 | /// // Record some observed calls. 222 | /// let callee = 42; 223 | /// let call_site = 36; 224 | /// builder.add_indirect_call(callee, call_site); 225 | /// 226 | /// // Construct the finished profile from the builder. 227 | /// let profile = builder.build(); 228 | /// ``` 229 | #[derive(Clone, Default)] 230 | pub struct ProfileBuilder { 231 | profile: Profile, 232 | } 233 | 234 | impl ProfileBuilder { 235 | /// Create a new, empty builder. 236 | pub fn new() -> Self { 237 | Default::default() 238 | } 239 | 240 | /// Record the observed target of an indirect call at the given call site. 241 | pub fn add_indirect_call(&mut self, callee: u32, call_site: u32) { 242 | let call_site = self.profile.call_sites.entry(call_site).or_default(); 243 | call_site.total_call_count += 1; 244 | *call_site.callee_to_count.entry(callee).or_default() += 1; 245 | } 246 | 247 | /// Construct the finished profile from this builder. 248 | pub fn build(self) -> Profile { 249 | self.profile 250 | } 251 | } 252 | -------------------------------------------------------------------------------- /tests/all/instrument.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{Context, Result}; 2 | use std::sync::atomic::{AtomicU32, Ordering}; 3 | use winliner::{InstrumentationStrategy, Instrumenter}; 4 | 5 | fn assert_instrumentation( 6 | instrumenter: Instrumenter, 7 | input: &str, 8 | expected_wat: &str, 9 | ) -> Result<()> { 10 | let _ = env_logger::try_init(); 11 | 12 | let input = wat::parse_str(input).context("failed to parse test input as WAT")?; 13 | 14 | let actual_wasm = instrumenter 15 | .instrument(&input) 16 | .context("failed to instrument input Wasm")?; 17 | 18 | if log::log_enabled!(log::Level::Debug) { 19 | static TEST_CASE: AtomicU32 = AtomicU32::new(0); 20 | let n = TEST_CASE.fetch_add(1, Ordering::AcqRel); 21 | log::debug!("Writing instrumented Wasm to `instrumented{n}.wasm`"); 22 | std::fs::write(format!("instrumented{n}.wasm"), &actual_wasm).unwrap(); 23 | } 24 | 25 | let actual_wat = 26 | wasmprinter::print_bytes(&actual_wasm).context("failed to print output Wasm as WAT")?; 27 | 28 | super::assert_no_diff(expected_wat.trim(), actual_wat.trim()); 29 | 30 | wasmparser::Validator::new_with_features(wasmparser::WasmFeatures { 31 | function_references: true, 32 | ..Default::default() 33 | }) 34 | .validate_all(&actual_wasm) 35 | .context("the instrumented wasm failed to validate")?; 36 | 37 | Ok(()) 38 | } 39 | 40 | mod three_globals { 41 | use super::*; 42 | 43 | fn instrumenter() -> Instrumenter { 44 | let mut i = Instrumenter::new(); 45 | i.strategy(InstrumentationStrategy::ThreeGlobals); 46 | i 47 | } 48 | 49 | #[test] 50 | fn empty_module() -> Result<()> { 51 | assert_instrumentation( 52 | instrumenter(), 53 | r#" 54 | (module) 55 | "#, 56 | r#" 57 | (module) 58 | "#, 59 | ) 60 | } 61 | 62 | #[test] 63 | fn basic() -> Result<()> { 64 | assert_instrumentation( 65 | instrumenter(), 66 | r#" 67 | (module 68 | (type (func)) 69 | (table 100 100 funcref) 70 | (func (param i32) 71 | local.get 0 72 | call_indirect (type 0) 73 | ) 74 | ) 75 | "#, 76 | r#" 77 | (module 78 | (type (;0;) (func)) 79 | (type (;1;) (func (param i32))) 80 | (func (;0;) (type 1) (param i32) 81 | (local i32) 82 | local.get 0 83 | global.get 0 84 | i64.const 1 85 | i64.add 86 | global.set 0 87 | local.tee 1 88 | global.get 1 89 | i32.ne 90 | if ;; label = @1 91 | local.get 1 92 | global.set 1 93 | i64.const 0 94 | global.set 2 95 | end 96 | global.get 2 97 | i64.const 1 98 | i64.add 99 | global.set 2 100 | local.get 1 101 | call_indirect (type 0) 102 | ) 103 | (table (;0;) 100 100 funcref) 104 | (global (;0;) (mut i64) i64.const 0) 105 | (global (;1;) (mut i32) i32.const -1) 106 | (global (;2;) (mut i64) i64.const 0) 107 | (export "__winliner_call_site_0_total" (global 0)) 108 | (export "__winliner_call_site_0_last_callee" (global 1)) 109 | (export "__winliner_call_site_0_last_callee_count" (global 2)) 110 | ) 111 | "#, 112 | ) 113 | } 114 | 115 | #[test] 116 | fn multiple_call_sites() -> Result<()> { 117 | assert_instrumentation( 118 | instrumenter(), 119 | r#" 120 | (module 121 | (type (func)) 122 | (table 100 100 funcref) 123 | (func (param i32) 124 | local.get 0 125 | call_indirect (type 0) 126 | local.get 0 127 | call_indirect (type 0) 128 | local.get 0 129 | call_indirect (type 0) 130 | ) 131 | ) 132 | "#, 133 | r#" 134 | (module 135 | (type (;0;) (func)) 136 | (type (;1;) (func (param i32))) 137 | (func (;0;) (type 1) (param i32) 138 | (local i32) 139 | local.get 0 140 | global.get 0 141 | i64.const 1 142 | i64.add 143 | global.set 0 144 | local.tee 1 145 | global.get 1 146 | i32.ne 147 | if ;; label = @1 148 | local.get 1 149 | global.set 1 150 | i64.const 0 151 | global.set 2 152 | end 153 | global.get 2 154 | i64.const 1 155 | i64.add 156 | global.set 2 157 | local.get 1 158 | call_indirect (type 0) 159 | local.get 0 160 | global.get 3 161 | i64.const 1 162 | i64.add 163 | global.set 3 164 | local.tee 1 165 | global.get 4 166 | i32.ne 167 | if ;; label = @1 168 | local.get 1 169 | global.set 4 170 | i64.const 0 171 | global.set 5 172 | end 173 | global.get 5 174 | i64.const 1 175 | i64.add 176 | global.set 5 177 | local.get 1 178 | call_indirect (type 0) 179 | local.get 0 180 | global.get 6 181 | i64.const 1 182 | i64.add 183 | global.set 6 184 | local.tee 1 185 | global.get 7 186 | i32.ne 187 | if ;; label = @1 188 | local.get 1 189 | global.set 7 190 | i64.const 0 191 | global.set 8 192 | end 193 | global.get 8 194 | i64.const 1 195 | i64.add 196 | global.set 8 197 | local.get 1 198 | call_indirect (type 0) 199 | ) 200 | (table (;0;) 100 100 funcref) 201 | (global (;0;) (mut i64) i64.const 0) 202 | (global (;1;) (mut i32) i32.const -1) 203 | (global (;2;) (mut i64) i64.const 0) 204 | (global (;3;) (mut i64) i64.const 0) 205 | (global (;4;) (mut i32) i32.const -1) 206 | (global (;5;) (mut i64) i64.const 0) 207 | (global (;6;) (mut i64) i64.const 0) 208 | (global (;7;) (mut i32) i32.const -1) 209 | (global (;8;) (mut i64) i64.const 0) 210 | (export "__winliner_call_site_0_total" (global 0)) 211 | (export "__winliner_call_site_0_last_callee" (global 1)) 212 | (export "__winliner_call_site_0_last_callee_count" (global 2)) 213 | (export "__winliner_call_site_1_total" (global 3)) 214 | (export "__winliner_call_site_1_last_callee" (global 4)) 215 | (export "__winliner_call_site_1_last_callee_count" (global 5)) 216 | (export "__winliner_call_site_2_total" (global 6)) 217 | (export "__winliner_call_site_2_last_callee" (global 7)) 218 | (export "__winliner_call_site_2_last_callee_count" (global 8)) 219 | ) 220 | "#, 221 | ) 222 | } 223 | 224 | #[test] 225 | fn multiple_call_sites_across_functions() -> Result<()> { 226 | assert_instrumentation( 227 | instrumenter(), 228 | r#" 229 | (module 230 | (type (func)) 231 | (table 100 100 funcref) 232 | (func (param i32) 233 | local.get 0 234 | call_indirect (type 0) 235 | ) 236 | (func (param i32) 237 | local.get 0 238 | call_indirect (type 0) 239 | ) 240 | ) 241 | "#, 242 | r#" 243 | (module 244 | (type (;0;) (func)) 245 | (type (;1;) (func (param i32))) 246 | (func (;0;) (type 1) (param i32) 247 | (local i32) 248 | local.get 0 249 | global.get 0 250 | i64.const 1 251 | i64.add 252 | global.set 0 253 | local.tee 1 254 | global.get 1 255 | i32.ne 256 | if ;; label = @1 257 | local.get 1 258 | global.set 1 259 | i64.const 0 260 | global.set 2 261 | end 262 | global.get 2 263 | i64.const 1 264 | i64.add 265 | global.set 2 266 | local.get 1 267 | call_indirect (type 0) 268 | ) 269 | (func (;1;) (type 1) (param i32) 270 | (local i32) 271 | local.get 0 272 | global.get 3 273 | i64.const 1 274 | i64.add 275 | global.set 3 276 | local.tee 1 277 | global.get 4 278 | i32.ne 279 | if ;; label = @1 280 | local.get 1 281 | global.set 4 282 | i64.const 0 283 | global.set 5 284 | end 285 | global.get 5 286 | i64.const 1 287 | i64.add 288 | global.set 5 289 | local.get 1 290 | call_indirect (type 0) 291 | ) 292 | (table (;0;) 100 100 funcref) 293 | (global (;0;) (mut i64) i64.const 0) 294 | (global (;1;) (mut i32) i32.const -1) 295 | (global (;2;) (mut i64) i64.const 0) 296 | (global (;3;) (mut i64) i64.const 0) 297 | (global (;4;) (mut i32) i32.const -1) 298 | (global (;5;) (mut i64) i64.const 0) 299 | (export "__winliner_call_site_0_total" (global 0)) 300 | (export "__winliner_call_site_0_last_callee" (global 1)) 301 | (export "__winliner_call_site_0_last_callee_count" (global 2)) 302 | (export "__winliner_call_site_1_total" (global 3)) 303 | (export "__winliner_call_site_1_last_callee" (global 4)) 304 | (export "__winliner_call_site_1_last_callee_count" (global 5)) 305 | ) 306 | "#, 307 | ) 308 | } 309 | 310 | #[test] 311 | fn no_call_indirect() -> Result<()> { 312 | assert_instrumentation( 313 | instrumenter(), 314 | r#" 315 | (module 316 | (elem funcref (ref.func 0) (ref.func 1)) 317 | (func) 318 | (func) 319 | ) 320 | "#, 321 | r#" 322 | (module 323 | (type (;0;) (func)) 324 | (func (;0;) (type 0) 325 | (local i32) 326 | ) 327 | (func (;1;) (type 0) 328 | (local i32) 329 | ) 330 | (elem (;0;) funcref (ref.func 0) (ref.func 1)) 331 | ) 332 | "#, 333 | ) 334 | } 335 | 336 | #[test] 337 | fn disallow_table_set() -> Result<()> { 338 | let result = assert_instrumentation( 339 | instrumenter(), 340 | r#" 341 | (module 342 | (table 10 10 funcref) 343 | (elem declare funcref (ref.func 0)) 344 | (func 345 | i32.const 0 346 | ref.func 0 347 | table.set 348 | ) 349 | ) 350 | "#, 351 | r#" 352 | 353 | "#, 354 | ); 355 | let err = result.expect_err("should have failed to instrument"); 356 | println!("Actual error: {err:?}"); 357 | assert!(format!("{err:?}").contains("Found table mutation instruction")); 358 | Ok(()) 359 | } 360 | 361 | #[test] 362 | fn allow_table_set() -> Result<()> { 363 | let mut instrumenter = instrumenter(); 364 | instrumenter.allow_table_mutation(true); 365 | assert_instrumentation( 366 | instrumenter, 367 | r#" 368 | (module 369 | (table 10 10 funcref) 370 | (elem declare funcref (ref.func 0)) 371 | (func 372 | i32.const 0 373 | ref.func 0 374 | table.set 375 | ) 376 | ) 377 | "#, 378 | r#" 379 | (module 380 | (type (;0;) (func)) 381 | (func (;0;) (type 0) 382 | (local i32) 383 | i32.const 0 384 | ref.func 0 385 | table.set 0 386 | ) 387 | (table (;0;) 10 10 funcref) 388 | (elem (;0;) declare funcref (ref.func 0)) 389 | ) 390 | "#, 391 | ) 392 | } 393 | 394 | #[test] 395 | fn existing_globals() -> Result<()> { 396 | assert_instrumentation( 397 | instrumenter(), 398 | r#" 399 | (module 400 | (global i32 (i32.const 42)) 401 | (table 10 10 funcref) 402 | (type (func)) 403 | (func (param i32) 404 | local.get 0 405 | call_indirect (type 0) 406 | ) 407 | ) 408 | "#, 409 | r#" 410 | (module 411 | (type (;0;) (func)) 412 | (type (;1;) (func (param i32))) 413 | (func (;0;) (type 1) (param i32) 414 | (local i32) 415 | local.get 0 416 | global.get 1 417 | i64.const 1 418 | i64.add 419 | global.set 1 420 | local.tee 1 421 | global.get 2 422 | i32.ne 423 | if ;; label = @1 424 | local.get 1 425 | global.set 2 426 | i64.const 0 427 | global.set 3 428 | end 429 | global.get 3 430 | i64.const 1 431 | i64.add 432 | global.set 3 433 | local.get 1 434 | call_indirect (type 0) 435 | ) 436 | (table (;0;) 10 10 funcref) 437 | (global (;0;) i32 i32.const 42) 438 | (global (;1;) (mut i64) i64.const 0) 439 | (global (;2;) (mut i32) i32.const -1) 440 | (global (;3;) (mut i64) i64.const 0) 441 | (export "__winliner_call_site_0_total" (global 1)) 442 | (export "__winliner_call_site_0_last_callee" (global 2)) 443 | (export "__winliner_call_site_0_last_callee_count" (global 3)) 444 | ) 445 | "#, 446 | ) 447 | } 448 | 449 | #[test] 450 | fn existing_exports() -> Result<()> { 451 | assert_instrumentation( 452 | instrumenter(), 453 | r#" 454 | (module 455 | (table 10 10 funcref) 456 | (type (func)) 457 | (func (param i32) 458 | local.get 0 459 | call_indirect (type 0) 460 | ) 461 | (export "foo" (func 0)) 462 | ) 463 | "#, 464 | r#" 465 | (module 466 | (type (;0;) (func)) 467 | (type (;1;) (func (param i32))) 468 | (func (;0;) (type 1) (param i32) 469 | (local i32) 470 | local.get 0 471 | global.get 0 472 | i64.const 1 473 | i64.add 474 | global.set 0 475 | local.tee 1 476 | global.get 1 477 | i32.ne 478 | if ;; label = @1 479 | local.get 1 480 | global.set 1 481 | i64.const 0 482 | global.set 2 483 | end 484 | global.get 2 485 | i64.const 1 486 | i64.add 487 | global.set 2 488 | local.get 1 489 | call_indirect (type 0) 490 | ) 491 | (table (;0;) 10 10 funcref) 492 | (global (;0;) (mut i64) i64.const 0) 493 | (global (;1;) (mut i32) i32.const -1) 494 | (global (;2;) (mut i64) i64.const 0) 495 | (export "foo" (func 0)) 496 | (export "__winliner_call_site_0_total" (global 0)) 497 | (export "__winliner_call_site_0_last_callee" (global 1)) 498 | (export "__winliner_call_site_0_last_callee_count" (global 2)) 499 | ) 500 | "#, 501 | ) 502 | } 503 | 504 | #[test] 505 | fn func_with_locals() -> Result<()> { 506 | assert_instrumentation( 507 | instrumenter(), 508 | r#" 509 | (module 510 | (table 10 10 funcref) 511 | (type (func (param i32))) 512 | (func (param i32) (local i32) 513 | local.get 0 514 | local.get 1 515 | call_indirect (type 0) 516 | ) 517 | ) 518 | "#, 519 | r#" 520 | (module 521 | (type (;0;) (func (param i32))) 522 | (func (;0;) (type 0) (param i32) 523 | (local i32 i32) 524 | local.get 0 525 | local.get 1 526 | global.get 0 527 | i64.const 1 528 | i64.add 529 | global.set 0 530 | local.tee 2 531 | global.get 1 532 | i32.ne 533 | if ;; label = @1 534 | local.get 2 535 | global.set 1 536 | i64.const 0 537 | global.set 2 538 | end 539 | global.get 2 540 | i64.const 1 541 | i64.add 542 | global.set 2 543 | local.get 2 544 | call_indirect (type 0) 545 | ) 546 | (table (;0;) 10 10 funcref) 547 | (global (;0;) (mut i64) i64.const 0) 548 | (global (;1;) (mut i32) i32.const -1) 549 | (global (;2;) (mut i64) i64.const 0) 550 | (export "__winliner_call_site_0_total" (global 0)) 551 | (export "__winliner_call_site_0_last_callee" (global 1)) 552 | (export "__winliner_call_site_0_last_callee_count" (global 2)) 553 | ) 554 | "#, 555 | ) 556 | } 557 | } 558 | 559 | mod host_calls { 560 | use super::*; 561 | 562 | fn instrumenter() -> Instrumenter { 563 | let mut i = Instrumenter::new(); 564 | i.strategy(InstrumentationStrategy::HostCalls); 565 | i 566 | } 567 | 568 | #[test] 569 | fn empty_module() -> Result<()> { 570 | assert_instrumentation( 571 | instrumenter(), 572 | r#" 573 | (module) 574 | "#, 575 | r#" 576 | (module) 577 | "#, 578 | ) 579 | } 580 | 581 | #[test] 582 | fn basic() -> Result<()> { 583 | assert_instrumentation( 584 | instrumenter(), 585 | r#" 586 | (module 587 | (type (func)) 588 | (table 100 100 funcref) 589 | (func (param i32) 590 | local.get 0 591 | call_indirect (type 0) 592 | ) 593 | ) 594 | "#, 595 | r#" 596 | (module 597 | (type (;0;) (func)) 598 | (type (;1;) (func (param i32))) 599 | (type (;2;) (func (param i32 i32))) 600 | (import "winliner" "add_indirect_call" (func (;0;) (type 2))) 601 | (func (;1;) (type 1) (param i32) 602 | (local i32) 603 | local.get 0 604 | local.tee 1 605 | i32.const 0 606 | call 0 607 | local.get 1 608 | call_indirect (type 0) 609 | ) 610 | (table (;0;) 100 100 funcref) 611 | ) 612 | "#, 613 | ) 614 | } 615 | 616 | #[test] 617 | fn multiple_call_sites() -> Result<()> { 618 | assert_instrumentation( 619 | instrumenter(), 620 | r#" 621 | (module 622 | (type (func)) 623 | (table 100 100 funcref) 624 | (func (param i32) 625 | local.get 0 626 | call_indirect (type 0) 627 | local.get 0 628 | call_indirect (type 0) 629 | local.get 0 630 | call_indirect (type 0) 631 | ) 632 | ) 633 | "#, 634 | r#" 635 | (module 636 | (type (;0;) (func)) 637 | (type (;1;) (func (param i32))) 638 | (type (;2;) (func (param i32 i32))) 639 | (import "winliner" "add_indirect_call" (func (;0;) (type 2))) 640 | (func (;1;) (type 1) (param i32) 641 | (local i32) 642 | local.get 0 643 | local.tee 1 644 | i32.const 0 645 | call 0 646 | local.get 1 647 | call_indirect (type 0) 648 | local.get 0 649 | local.tee 1 650 | i32.const 1 651 | call 0 652 | local.get 1 653 | call_indirect (type 0) 654 | local.get 0 655 | local.tee 1 656 | i32.const 2 657 | call 0 658 | local.get 1 659 | call_indirect (type 0) 660 | ) 661 | (table (;0;) 100 100 funcref) 662 | ) 663 | "#, 664 | ) 665 | } 666 | 667 | #[test] 668 | fn multiple_call_sites_across_functions() -> Result<()> { 669 | assert_instrumentation( 670 | instrumenter(), 671 | r#" 672 | (module 673 | (type (func)) 674 | (table 100 100 funcref) 675 | (func (param i32) 676 | local.get 0 677 | call_indirect (type 0) 678 | ) 679 | (func (param i32) 680 | local.get 0 681 | call_indirect (type 0) 682 | ) 683 | ) 684 | "#, 685 | r#" 686 | (module 687 | (type (;0;) (func)) 688 | (type (;1;) (func (param i32))) 689 | (type (;2;) (func (param i32 i32))) 690 | (import "winliner" "add_indirect_call" (func (;0;) (type 2))) 691 | (func (;1;) (type 1) (param i32) 692 | (local i32) 693 | local.get 0 694 | local.tee 1 695 | i32.const 0 696 | call 0 697 | local.get 1 698 | call_indirect (type 0) 699 | ) 700 | (func (;2;) (type 1) (param i32) 701 | (local i32) 702 | local.get 0 703 | local.tee 1 704 | i32.const 1 705 | call 0 706 | local.get 1 707 | call_indirect (type 0) 708 | ) 709 | (table (;0;) 100 100 funcref) 710 | ) 711 | "#, 712 | ) 713 | } 714 | 715 | #[test] 716 | fn no_call_indirect() -> Result<()> { 717 | assert_instrumentation( 718 | instrumenter(), 719 | r#" 720 | (module 721 | (elem funcref (ref.func 0) (ref.func 1)) 722 | (func) 723 | (func) 724 | ) 725 | "#, 726 | r#" 727 | (module 728 | (type (;0;) (func)) 729 | (type (;1;) (func (param i32 i32))) 730 | (import "winliner" "add_indirect_call" (func (;0;) (type 1))) 731 | (func (;1;) (type 0) 732 | (local i32) 733 | ) 734 | (func (;2;) (type 0) 735 | (local i32) 736 | ) 737 | (elem (;0;) funcref (ref.func 1) (ref.func 2)) 738 | ) 739 | "#, 740 | ) 741 | } 742 | 743 | #[test] 744 | fn elem_segments() -> Result<()> { 745 | assert_instrumentation( 746 | instrumenter(), 747 | r#" 748 | (module 749 | (table 100 100 funcref) 750 | 751 | ;; Active. 752 | (elem (table 0) (i32.const 0) funcref (ref.func 0) (ref.null func)) 753 | ;; Passive. 754 | (elem funcref (ref.func 0) (ref.null func)) 755 | ;; Declared. 756 | (elem declare funcref (ref.func 0) (ref.null func)) 757 | 758 | (func) 759 | ) 760 | "#, 761 | r#" 762 | (module 763 | (type (;0;) (func)) 764 | (type (;1;) (func (param i32 i32))) 765 | (import "winliner" "add_indirect_call" (func (;0;) (type 1))) 766 | (func (;1;) (type 0) 767 | (local i32) 768 | ) 769 | (table (;0;) 100 100 funcref) 770 | (elem (;0;) (i32.const 0) funcref (ref.func 1) (ref.null func)) 771 | (elem (;1;) funcref (ref.func 1) (ref.null func)) 772 | (elem (;2;) declare funcref (ref.func 1) (ref.null func)) 773 | ) 774 | "#, 775 | ) 776 | } 777 | 778 | #[test] 779 | fn global_ref_func() -> Result<()> { 780 | assert_instrumentation( 781 | instrumenter(), 782 | r#" 783 | (module 784 | (func) 785 | (global funcref (ref.func 0)) 786 | ) 787 | "#, 788 | r#" 789 | (module 790 | (type (;0;) (func)) 791 | (type (;1;) (func (param i32 i32))) 792 | (import "winliner" "add_indirect_call" (func (;0;) (type 1))) 793 | (func (;1;) (type 0) 794 | (local i32) 795 | ) 796 | (global (;0;) funcref ref.func 1) 797 | ) 798 | "#, 799 | ) 800 | } 801 | 802 | #[test] 803 | fn have_imported_func() -> Result<()> { 804 | assert_instrumentation( 805 | instrumenter(), 806 | r#" 807 | (module 808 | (import "foo" "bar" (func)) 809 | (func 810 | call 0 811 | ) 812 | ) 813 | "#, 814 | r#" 815 | (module 816 | (type (;0;) (func)) 817 | (type (;1;) (func (param i32 i32))) 818 | (import "winliner" "add_indirect_call" (func (;0;) (type 1))) 819 | (import "foo" "bar" (func (;1;) (type 0))) 820 | (func (;2;) (type 0) 821 | (local i32) 822 | call 1 823 | ) 824 | ) 825 | "#, 826 | ) 827 | } 828 | 829 | #[test] 830 | fn ref_func() -> Result<()> { 831 | assert_instrumentation( 832 | instrumenter(), 833 | r#" 834 | (module 835 | (elem declare funcref (ref.func 0)) 836 | (func (result funcref) 837 | ref.func 0 838 | ) 839 | ) 840 | "#, 841 | r#" 842 | (module 843 | (type (;0;) (func (result funcref))) 844 | (type (;1;) (func (param i32 i32))) 845 | (import "winliner" "add_indirect_call" (func (;0;) (type 1))) 846 | (func (;1;) (type 0) (result funcref) 847 | (local i32) 848 | ref.func 1 849 | ) 850 | (elem (;0;) declare funcref (ref.func 1)) 851 | ) 852 | "#, 853 | ) 854 | } 855 | 856 | #[test] 857 | fn start_section() -> Result<()> { 858 | assert_instrumentation( 859 | instrumenter(), 860 | r#" 861 | (module 862 | (start 0) 863 | (func) 864 | ) 865 | "#, 866 | r#" 867 | (module 868 | (type (;0;) (func)) 869 | (type (;1;) (func (param i32 i32))) 870 | (import "winliner" "add_indirect_call" (func (;0;) (type 1))) 871 | (func (;1;) (type 0) 872 | (local i32) 873 | ) 874 | (start 1) 875 | ) 876 | "#, 877 | ) 878 | } 879 | 880 | #[test] 881 | fn func_with_locals() -> Result<()> { 882 | assert_instrumentation( 883 | instrumenter(), 884 | r#" 885 | (module 886 | (table 10 10 funcref) 887 | (type (func (param i32))) 888 | (func (param i32) (local i32) 889 | local.get 0 890 | local.get 1 891 | call_indirect (type 0) 892 | ) 893 | (export "foo" (func 0)) 894 | ) 895 | "#, 896 | r#" 897 | (module 898 | (type (;0;) (func (param i32))) 899 | (type (;1;) (func (param i32 i32))) 900 | (import "winliner" "add_indirect_call" (func (;0;) (type 1))) 901 | (func (;1;) (type 0) (param i32) 902 | (local i32 i32) 903 | local.get 0 904 | local.get 1 905 | local.tee 2 906 | i32.const 0 907 | call 0 908 | local.get 2 909 | call_indirect (type 0) 910 | ) 911 | (table (;0;) 10 10 funcref) 912 | (export "foo" (func 0)) 913 | ) 914 | "#, 915 | ) 916 | } 917 | } 918 | -------------------------------------------------------------------------------- /tests/all/main.rs: -------------------------------------------------------------------------------- 1 | mod instrument; 2 | mod optimize; 3 | mod profile; 4 | 5 | fn main() {} 6 | 7 | pub fn assert_no_diff(expected: &str, actual: &str) { 8 | if expected == actual { 9 | return; 10 | } 11 | 12 | let mut difference = String::new(); 13 | for diff in diff::lines(expected, actual) { 14 | let (c, line) = match diff { 15 | diff::Result::Left(left) => ('-', left), 16 | diff::Result::Both(left, _right) => (' ', left), 17 | diff::Result::Right(right) => ('+', right), 18 | }; 19 | difference.push(c); 20 | difference.push_str(line); 21 | difference.push('\n'); 22 | } 23 | assert!(false, "expected != actual:\n\n{difference}"); 24 | } 25 | -------------------------------------------------------------------------------- /tests/all/optimize.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{Context, Result}; 2 | use std::sync::atomic::{AtomicU32, Ordering}; 3 | use winliner::{Optimizer, ProfileBuilder}; 4 | 5 | fn assert_optimize( 6 | optimizer: &Optimizer, 7 | // A map from `i`th call site to a list of (callee, count) pairs. 8 | profile: &[&[(u32, u32)]], 9 | input: &str, 10 | expected_wat: &str, 11 | ) -> Result<()> { 12 | let _ = env_logger::try_init(); 13 | 14 | let mut builder = ProfileBuilder::new(); 15 | for (call_site, calls) in profile.iter().enumerate() { 16 | let call_site = u32::try_from(call_site).unwrap(); 17 | for (callee, count) in *calls { 18 | for _ in 0..*count { 19 | builder.add_indirect_call(*callee, call_site); 20 | } 21 | } 22 | } 23 | let profile = builder.build(); 24 | 25 | let input = wat::parse_str(input).context("failed to parse test input as WAT")?; 26 | 27 | let actual_wasm = optimizer 28 | .optimize(&profile, &input) 29 | .context("failed to optimize input with given profile")?; 30 | 31 | if log::log_enabled!(log::Level::Debug) { 32 | static TEST_CASE: AtomicU32 = AtomicU32::new(0); 33 | let n = TEST_CASE.fetch_add(1, Ordering::AcqRel); 34 | log::debug!("Writing instrumented Wasm to `optimized{n}.wasm`"); 35 | std::fs::write(format!("optimized{n}.wasm"), &actual_wasm).unwrap(); 36 | } 37 | 38 | let actual_wat = 39 | wasmprinter::print_bytes(&actual_wasm).context("failed to print optimized Wasm as WAT")?; 40 | 41 | super::assert_no_diff(expected_wat.trim(), actual_wat.trim()); 42 | 43 | wasmparser::Validator::new_with_features(wasmparser::WasmFeatures { 44 | function_references: true, 45 | ..Default::default() 46 | }) 47 | .validate_all(&actual_wasm) 48 | .context("the optimized wasm failed to validate")?; 49 | 50 | Ok(()) 51 | } 52 | 53 | #[test] 54 | fn basic() -> Result<()> { 55 | assert_optimize( 56 | Optimizer::new().min_total_calls(1), 57 | &[&[(42, 1)]], 58 | r#" 59 | (module 60 | (type (;0;) (func (result i32))) 61 | (type (;1;) (func (param i32) (result i32))) 62 | (func (;0;) (type 0) (result i32) 63 | i32.const 36 64 | ) 65 | (func (;1;) (type 1) (param i32) (result i32) 66 | local.get 0 67 | call_indirect (type 0) 68 | ) 69 | (table (;0;) 100 100 funcref) 70 | (elem (;0;) (i32.const 42) funcref (ref.func 0)) 71 | ) 72 | "#, 73 | r#" 74 | (module 75 | (type (;0;) (func (result i32))) 76 | (type (;1;) (func (param i32) (result i32))) 77 | (func (;0;) (type 0) (result i32) 78 | (local i32) 79 | i32.const 36 80 | ) 81 | (func (;1;) (type 1) (param i32) (result i32) 82 | (local i32) 83 | local.get 0 84 | local.tee 1 85 | i32.const 42 86 | i32.eq 87 | if (type 0) (result i32) ;; label = @1 88 | i32.const 36 89 | else 90 | local.get 1 91 | call_indirect (type 0) 92 | end 93 | ) 94 | (table (;0;) 100 100 funcref) 95 | (elem (;0;) (i32.const 42) funcref (ref.func 0)) 96 | ) 97 | "#, 98 | ) 99 | } 100 | 101 | #[test] 102 | fn not_enough_total_calls() -> Result<()> { 103 | assert_optimize( 104 | Optimizer::new().min_total_calls(2), 105 | &[&[(42, 1)]], 106 | r#" 107 | (module 108 | (type (;0;) (func (result i32))) 109 | (type (;1;) (func (param i32) (result i32))) 110 | (func (;0;) (type 0) (result i32) 111 | i32.const 36 112 | ) 113 | (func (;1;) (type 1) (param i32) (result i32) 114 | local.get 0 115 | call_indirect (type 0) 116 | ) 117 | (table (;0;) 100 100 funcref) 118 | (elem (;0;) (i32.const 42) funcref (ref.func 0)) 119 | ) 120 | "#, 121 | r#" 122 | (module 123 | (type (;0;) (func (result i32))) 124 | (type (;1;) (func (param i32) (result i32))) 125 | (func (;0;) (type 0) (result i32) 126 | (local i32) 127 | i32.const 36 128 | ) 129 | (func (;1;) (type 1) (param i32) (result i32) 130 | (local i32) 131 | local.get 0 132 | call_indirect (type 0) 133 | ) 134 | (table (;0;) 100 100 funcref) 135 | (elem (;0;) (i32.const 42) funcref (ref.func 0)) 136 | ) 137 | "#, 138 | ) 139 | } 140 | 141 | #[test] 142 | fn multiple_call_sites() -> Result<()> { 143 | assert_optimize( 144 | Optimizer::new().min_total_calls(2), 145 | &[&[(0, 2)], &[(1, 1)], &[(2, 2)], &[(3, 1)]], 146 | r#" 147 | (module 148 | (type (func (result i32))) 149 | 150 | (func (type 0) 151 | i32.const 11 152 | ) 153 | (func (type 0) 154 | i32.const 22 155 | ) 156 | (func (type 0) 157 | i32.const 33 158 | ) 159 | (func (type 0) 160 | i32.const 44 161 | ) 162 | 163 | (func (param i32 i32 i32 i32) 164 | local.get 0 165 | call_indirect (type 0) 166 | drop 167 | local.get 1 168 | call_indirect (type 0) 169 | drop 170 | local.get 2 171 | call_indirect (type 0) 172 | drop 173 | local.get 3 174 | call_indirect (type 0) 175 | drop 176 | ) 177 | 178 | (table 100 100 funcref) 179 | (elem (i32.const 0) funcref (ref.func 0) (ref.func 1) (ref.func 2) (ref.func 3)) 180 | ) 181 | "#, 182 | r#" 183 | (module 184 | (type (;0;) (func (result i32))) 185 | (type (;1;) (func (param i32 i32 i32 i32))) 186 | (func (;0;) (type 0) (result i32) 187 | (local i32) 188 | i32.const 11 189 | ) 190 | (func (;1;) (type 0) (result i32) 191 | (local i32) 192 | i32.const 22 193 | ) 194 | (func (;2;) (type 0) (result i32) 195 | (local i32) 196 | i32.const 33 197 | ) 198 | (func (;3;) (type 0) (result i32) 199 | (local i32) 200 | i32.const 44 201 | ) 202 | (func (;4;) (type 1) (param i32 i32 i32 i32) 203 | (local i32) 204 | local.get 0 205 | local.tee 4 206 | i32.const 0 207 | i32.eq 208 | if (type 0) (result i32) ;; label = @1 209 | i32.const 11 210 | else 211 | local.get 4 212 | call_indirect (type 0) 213 | end 214 | drop 215 | local.get 1 216 | call_indirect (type 0) 217 | drop 218 | local.get 2 219 | local.tee 4 220 | i32.const 2 221 | i32.eq 222 | if (type 0) (result i32) ;; label = @1 223 | i32.const 33 224 | else 225 | local.get 4 226 | call_indirect (type 0) 227 | end 228 | drop 229 | local.get 3 230 | call_indirect (type 0) 231 | drop 232 | ) 233 | (table (;0;) 100 100 funcref) 234 | (elem (;0;) (i32.const 0) funcref (ref.func 0) (ref.func 1) (ref.func 2) (ref.func 3)) 235 | ) 236 | "#, 237 | ) 238 | } 239 | 240 | #[test] 241 | fn no_indirect_calls() -> Result<()> { 242 | assert_optimize( 243 | &Optimizer::new(), 244 | &[], 245 | r#" 246 | (module 247 | (type (func (result i32))) 248 | 249 | (func (type 0) 250 | i32.const 11 251 | ) 252 | (func (type 0) 253 | i32.const 22 254 | ) 255 | (func (type 0) 256 | i32.const 33 257 | ) 258 | (func (type 0) 259 | i32.const 44 260 | ) 261 | 262 | (func 263 | call 0 264 | drop 265 | call 1 266 | drop 267 | call 2 268 | drop 269 | call 3 270 | drop 271 | ) 272 | 273 | (table 100 100 funcref) 274 | (elem (i32.const 0) funcref (ref.func 0) (ref.func 1) (ref.func 2) (ref.func 3)) 275 | ) 276 | "#, 277 | r#" 278 | (module 279 | (type (;0;) (func (result i32))) 280 | (type (;1;) (func)) 281 | (func (;0;) (type 0) (result i32) 282 | (local i32) 283 | i32.const 11 284 | ) 285 | (func (;1;) (type 0) (result i32) 286 | (local i32) 287 | i32.const 22 288 | ) 289 | (func (;2;) (type 0) (result i32) 290 | (local i32) 291 | i32.const 33 292 | ) 293 | (func (;3;) (type 0) (result i32) 294 | (local i32) 295 | i32.const 44 296 | ) 297 | (func (;4;) (type 1) 298 | (local i32) 299 | call 0 300 | drop 301 | call 1 302 | drop 303 | call 2 304 | drop 305 | call 3 306 | drop 307 | ) 308 | (table (;0;) 100 100 funcref) 309 | (elem (;0;) (i32.const 0) funcref (ref.func 0) (ref.func 1) (ref.func 2) (ref.func 3)) 310 | ) 311 | "#, 312 | ) 313 | } 314 | 315 | #[test] 316 | fn multiple_params_and_results() -> Result<()> { 317 | assert_optimize( 318 | Optimizer::new().min_total_calls(1), 319 | &[&[(42, 1)]], 320 | r#" 321 | (module 322 | (type (func (param i32 i64) (result i32 i64))) 323 | 324 | (func (type 0) 325 | local.get 0 326 | i32.const 1 327 | i32.add 328 | local.get 1 329 | i64.const 1 330 | i64.add 331 | ) 332 | 333 | (func (param i32) (result i32 i64) 334 | i32.const 11 335 | i64.const 22 336 | local.get 0 337 | call_indirect (type 0) 338 | ) 339 | 340 | (table 100 100 funcref) 341 | (elem (i32.const 42) funcref (ref.func 0)) 342 | ) 343 | "#, 344 | r#" 345 | (module 346 | (type (;0;) (func (param i32 i64) (result i32 i64))) 347 | (type (;1;) (func (param i32) (result i32 i64))) 348 | (func (;0;) (type 0) (param i32 i64) (result i32 i64) 349 | (local i32) 350 | local.get 0 351 | i32.const 1 352 | i32.add 353 | local.get 1 354 | i64.const 1 355 | i64.add 356 | ) 357 | (func (;1;) (type 1) (param i32) (result i32 i64) 358 | (local i32 i32 i64) 359 | i32.const 11 360 | i64.const 22 361 | local.get 0 362 | local.tee 1 363 | i32.const 42 364 | i32.eq 365 | if (type 0) (param i32 i64) (result i32 i64) ;; label = @1 366 | local.set 3 367 | local.set 2 368 | local.get 2 369 | i32.const 1 370 | i32.add 371 | local.get 3 372 | i64.const 1 373 | i64.add 374 | else 375 | local.get 1 376 | call_indirect (type 0) 377 | end 378 | ) 379 | (table (;0;) 100 100 funcref) 380 | (elem (;0;) (i32.const 42) funcref (ref.func 0)) 381 | ) 382 | "#, 383 | ) 384 | } 385 | 386 | #[test] 387 | fn multiple_callees_that_satisfy_inlining_conditions() -> Result<()> { 388 | // Eventually we should support speculatively inlining multiple functions 389 | // (up to a configurable limit, of course) but until then test that we 390 | // choose the callee that was called the most. 391 | assert_optimize( 392 | Optimizer::new().min_ratio(0.1)?.min_total_calls(1), 393 | &[&[(10, 10), (20, 5)]], 394 | r#" 395 | (module 396 | (type (func (result i32))) 397 | 398 | (func (type 0) 399 | i32.const 11 400 | ) 401 | (func (type 0) 402 | i32.const 22 403 | ) 404 | 405 | (func (param i32) (result i32) 406 | local.get 0 407 | call_indirect (type 0) 408 | ) 409 | 410 | (table 100 100 funcref) 411 | (elem (i32.const 10) funcref (ref.func 0)) 412 | (elem (i32.const 20) funcref (ref.func 1)) 413 | ) 414 | "#, 415 | r#" 416 | (module 417 | (type (;0;) (func (result i32))) 418 | (type (;1;) (func (param i32) (result i32))) 419 | (func (;0;) (type 0) (result i32) 420 | (local i32) 421 | i32.const 11 422 | ) 423 | (func (;1;) (type 0) (result i32) 424 | (local i32) 425 | i32.const 22 426 | ) 427 | (func (;2;) (type 1) (param i32) (result i32) 428 | (local i32) 429 | local.get 0 430 | local.tee 1 431 | i32.const 10 432 | i32.eq 433 | if (type 0) (result i32) ;; label = @1 434 | i32.const 11 435 | else 436 | local.get 1 437 | call_indirect (type 0) 438 | end 439 | ) 440 | (table (;0;) 100 100 funcref) 441 | (elem (;0;) (i32.const 10) funcref (ref.func 0)) 442 | (elem (;1;) (i32.const 20) funcref (ref.func 1)) 443 | ) 444 | "#, 445 | ) 446 | } 447 | 448 | #[test] 449 | fn multiple_callees_that_satisfy_inlining_conditions_with_tie_breaking() -> Result<()> { 450 | // Same as previous test but with callees that have been called the exact 451 | // same amount of times. Need to break ties deterministically. 452 | assert_optimize( 453 | Optimizer::new().min_ratio(0.1)?.min_total_calls(1), 454 | &[&[(10, 10), (20, 10)]], 455 | r#" 456 | (module 457 | (type (func (result i32))) 458 | 459 | (func (type 0) 460 | i32.const 11 461 | ) 462 | (func (type 0) 463 | i32.const 22 464 | ) 465 | 466 | (func (param i32) (result i32) 467 | local.get 0 468 | call_indirect (type 0) 469 | ) 470 | 471 | (table 100 100 funcref) 472 | (elem (i32.const 10) funcref (ref.func 0)) 473 | (elem (i32.const 20) funcref (ref.func 1)) 474 | ) 475 | "#, 476 | r#" 477 | (module 478 | (type (;0;) (func (result i32))) 479 | (type (;1;) (func (param i32) (result i32))) 480 | (func (;0;) (type 0) (result i32) 481 | (local i32) 482 | i32.const 11 483 | ) 484 | (func (;1;) (type 0) (result i32) 485 | (local i32) 486 | i32.const 22 487 | ) 488 | (func (;2;) (type 1) (param i32) (result i32) 489 | (local i32) 490 | local.get 0 491 | local.tee 1 492 | i32.const 10 493 | i32.eq 494 | if (type 0) (result i32) ;; label = @1 495 | i32.const 11 496 | else 497 | local.get 1 498 | call_indirect (type 0) 499 | end 500 | ) 501 | (table (;0;) 100 100 funcref) 502 | (elem (;0;) (i32.const 10) funcref (ref.func 0)) 503 | (elem (;1;) (i32.const 20) funcref (ref.func 1)) 504 | ) 505 | "#, 506 | ) 507 | } 508 | 509 | #[test] 510 | fn not_high_enough_ratio() -> Result<()> { 511 | assert_optimize( 512 | Optimizer::new().min_total_calls(1).min_ratio(1.0)?, 513 | &[&[(0, 1), (1, 9_999_999)]], 514 | r#" 515 | (module 516 | (type (func (result i32))) 517 | 518 | (func (type 0) 519 | i32.const 11 520 | ) 521 | (func (type 0) 522 | i32.const 22 523 | ) 524 | 525 | (func (param i32) (result i32) 526 | local.get 0 527 | call_indirect (type 0) 528 | ) 529 | 530 | (table 100 100 funcref) 531 | (elem (i32.const 0) funcref (ref.func 0) (ref.func 1)) 532 | ) 533 | "#, 534 | r#" 535 | (module 536 | (type (;0;) (func (result i32))) 537 | (type (;1;) (func (param i32) (result i32))) 538 | (func (;0;) (type 0) (result i32) 539 | (local i32) 540 | i32.const 11 541 | ) 542 | (func (;1;) (type 0) (result i32) 543 | (local i32) 544 | i32.const 22 545 | ) 546 | (func (;2;) (type 1) (param i32) (result i32) 547 | (local i32) 548 | local.get 0 549 | call_indirect (type 0) 550 | ) 551 | (table (;0;) 100 100 funcref) 552 | (elem (;0;) (i32.const 0) funcref (ref.func 0) (ref.func 1)) 553 | ) 554 | "#, 555 | ) 556 | } 557 | 558 | #[test] 559 | fn dont_inline_direct_recursion() -> Result<()> { 560 | assert_optimize( 561 | Optimizer::new().min_total_calls(1), 562 | &[&[(0, 1)]], 563 | r#" 564 | (module 565 | (type (func (param i32) (result i32))) 566 | 567 | (func (type 0) 568 | local.get 0 569 | local.get 0 570 | call_indirect (type 0) 571 | ) 572 | 573 | (table 100 100 funcref) 574 | (elem (i32.const 0) funcref (ref.func 0)) 575 | ) 576 | "#, 577 | r#" 578 | (module 579 | (type (;0;) (func (param i32) (result i32))) 580 | (func (;0;) (type 0) (param i32) (result i32) 581 | (local i32) 582 | local.get 0 583 | local.get 0 584 | call_indirect (type 0) 585 | ) 586 | (table (;0;) 100 100 funcref) 587 | (elem (;0;) (i32.const 0) funcref (ref.func 0)) 588 | ) 589 | "#, 590 | ) 591 | } 592 | 593 | #[test] 594 | fn dont_inline_direct_recursion_with_imports() -> Result<()> { 595 | assert_optimize( 596 | Optimizer::new().min_total_calls(100), 597 | &[&[(1, 100)]], 598 | r#" 599 | (module 600 | (type (func (param i32) (result i32))) 601 | 602 | (import "foo" "bar" (func (type 0))) 603 | 604 | (func (type 0) 605 | local.get 0 606 | local.get 0 607 | call_indirect (type 0) 608 | ) 609 | 610 | (table 100 100 funcref) 611 | (elem (i32.const 0) funcref (ref.func 1)) 612 | ) 613 | "#, 614 | r#" 615 | (module 616 | (type (;0;) (func (param i32) (result i32))) 617 | (import "foo" "bar" (func (;0;) (type 0))) 618 | (func (;1;) (type 0) (param i32) (result i32) 619 | (local i32) 620 | local.get 0 621 | local.get 0 622 | call_indirect (type 0) 623 | ) 624 | (table (;0;) 100 100 funcref) 625 | (elem (;0;) (i32.const 0) funcref (ref.func 1)) 626 | ) 627 | "#, 628 | ) 629 | } 630 | 631 | #[test] 632 | fn multiple_funcref_tables() -> Result<()> { 633 | assert_optimize( 634 | Optimizer::new().min_total_calls(1), 635 | &[&[(0, 1)], &[(0, 1)]], 636 | r#" 637 | (module 638 | (type (func (result i32))) 639 | 640 | (func (type 0) 641 | i32.const 11 642 | ) 643 | 644 | (func (type 0) 645 | i32.const 22 646 | ) 647 | 648 | (func (param i32 i32) (result i32 i32) 649 | local.get 0 650 | call_indirect 0 (type 0) 651 | local.get 1 652 | call_indirect 1 (type 0) 653 | ) 654 | 655 | (table 100 100 funcref) 656 | (elem (table 0) (i32.const 0) funcref (ref.func 0)) 657 | 658 | (table 100 100 funcref) 659 | (elem (table 1) (i32.const 0) funcref (ref.func 1)) 660 | ) 661 | "#, 662 | r#" 663 | (module 664 | (type (;0;) (func (result i32))) 665 | (type (;1;) (func (param i32 i32) (result i32 i32))) 666 | (func (;0;) (type 0) (result i32) 667 | (local i32) 668 | i32.const 11 669 | ) 670 | (func (;1;) (type 0) (result i32) 671 | (local i32) 672 | i32.const 22 673 | ) 674 | (func (;2;) (type 1) (param i32 i32) (result i32 i32) 675 | (local i32) 676 | local.get 0 677 | local.tee 2 678 | i32.const 0 679 | i32.eq 680 | if (type 0) (result i32) ;; label = @1 681 | i32.const 11 682 | else 683 | local.get 2 684 | call_indirect (type 0) 685 | end 686 | local.get 1 687 | local.tee 2 688 | i32.const 0 689 | i32.eq 690 | if (type 0) (result i32) ;; label = @1 691 | i32.const 22 692 | else 693 | local.get 2 694 | call_indirect 1 (type 0) 695 | end 696 | ) 697 | (table (;0;) 100 100 funcref) 698 | (table (;1;) 100 100 funcref) 699 | (elem (;0;) (i32.const 0) funcref (ref.func 0)) 700 | (elem (;1;) (table 1) (i32.const 0) funcref (ref.func 1)) 701 | ) 702 | "#, 703 | ) 704 | } 705 | 706 | #[test] 707 | fn inlining_into_inlined_function() -> Result<()> { 708 | assert_optimize( 709 | Optimizer::new().min_total_calls(1_000).max_inline_depth(2), 710 | &[&[(0, 1_000)], &[(1, 1_000)]], 711 | r#" 712 | (module 713 | (type (func (result i32))) 714 | (type (func (param i32) (result i32))) 715 | 716 | (func (type 0) 717 | i32.const 11 718 | ) 719 | 720 | (func (type 1) 721 | local.get 0 722 | call_indirect (type 0) 723 | ) 724 | 725 | (func (param i32 i32) (result i32) 726 | local.get 0 727 | local.get 1 728 | call_indirect (type 1) 729 | ) 730 | 731 | (table 100 100 funcref) 732 | (elem (i32.const 0) funcref (ref.func 0) (ref.func 1)) 733 | ) 734 | "#, 735 | r#" 736 | (module 737 | (type (;0;) (func (result i32))) 738 | (type (;1;) (func (param i32) (result i32))) 739 | (type (;2;) (func (param i32 i32) (result i32))) 740 | (func (;0;) (type 0) (result i32) 741 | (local i32) 742 | i32.const 11 743 | ) 744 | (func (;1;) (type 1) (param i32) (result i32) 745 | (local i32) 746 | local.get 0 747 | local.tee 1 748 | i32.const 0 749 | i32.eq 750 | if (type 0) (result i32) ;; label = @1 751 | i32.const 11 752 | else 753 | local.get 1 754 | call_indirect (type 0) 755 | end 756 | ) 757 | (func (;2;) (type 2) (param i32 i32) (result i32) 758 | (local i32 i32) 759 | local.get 0 760 | local.get 1 761 | local.tee 2 762 | i32.const 1 763 | i32.eq 764 | if (type 1) (param i32) (result i32) ;; label = @1 765 | local.set 3 766 | local.get 3 767 | local.tee 2 768 | i32.const 0 769 | i32.eq 770 | if (type 0) (result i32) ;; label = @2 771 | i32.const 11 772 | else 773 | local.get 2 774 | call_indirect (type 0) 775 | end 776 | else 777 | local.get 2 778 | call_indirect (type 1) 779 | end 780 | ) 781 | (table (;0;) 100 100 funcref) 782 | (elem (;0;) (i32.const 0) funcref (ref.func 0) (ref.func 1)) 783 | ) 784 | "#, 785 | ) 786 | } 787 | 788 | #[test] 789 | fn reach_inline_depth_limit() -> Result<()> { 790 | assert_optimize( 791 | Optimizer::new().min_total_calls(1_000).max_inline_depth(1), 792 | &[&[(0, 1_000)], &[(1, 1_000)]], 793 | r#" 794 | (module 795 | (type (func (result i32))) 796 | (type (func (param i32) (result i32))) 797 | 798 | (func (type 0) 799 | i32.const 11 800 | ) 801 | 802 | (func (type 1) 803 | local.get 0 804 | call_indirect (type 0) 805 | ) 806 | 807 | (func (param i32 i32) (result i32) 808 | local.get 0 809 | local.get 1 810 | call_indirect (type 1) 811 | ) 812 | 813 | (table 100 100 funcref) 814 | (elem (i32.const 0) funcref (ref.func 0) (ref.func 1)) 815 | ) 816 | "#, 817 | r#" 818 | (module 819 | (type (;0;) (func (result i32))) 820 | (type (;1;) (func (param i32) (result i32))) 821 | (type (;2;) (func (param i32 i32) (result i32))) 822 | (func (;0;) (type 0) (result i32) 823 | (local i32) 824 | i32.const 11 825 | ) 826 | (func (;1;) (type 1) (param i32) (result i32) 827 | (local i32) 828 | local.get 0 829 | local.tee 1 830 | i32.const 0 831 | i32.eq 832 | if (type 0) (result i32) ;; label = @1 833 | i32.const 11 834 | else 835 | local.get 1 836 | call_indirect (type 0) 837 | end 838 | ) 839 | (func (;2;) (type 2) (param i32 i32) (result i32) 840 | (local i32 i32) 841 | local.get 0 842 | local.get 1 843 | local.tee 2 844 | i32.const 1 845 | i32.eq 846 | if (type 1) (param i32) (result i32) ;; label = @1 847 | local.set 3 848 | local.get 3 849 | call_indirect (type 0) 850 | else 851 | local.get 2 852 | call_indirect (type 1) 853 | end 854 | ) 855 | (table (;0;) 100 100 funcref) 856 | (elem (;0;) (i32.const 0) funcref (ref.func 0) (ref.func 1)) 857 | ) 858 | "#, 859 | ) 860 | } 861 | 862 | #[test] 863 | fn mutual_recursion() -> Result<()> { 864 | assert_optimize( 865 | Optimizer::new().min_total_calls(100).max_inline_depth(100), 866 | &[&[(1, 100)], &[(0, 100)]], 867 | r#" 868 | (module 869 | (type (func (param i32 i32) (result i32))) 870 | 871 | (func (type 0) 872 | local.get 0 873 | local.get 1 874 | local.get 0 875 | call_indirect (type 0) 876 | ) 877 | 878 | (func (type 0) 879 | local.get 0 880 | local.get 1 881 | local.get 1 882 | call_indirect (type 0) 883 | ) 884 | 885 | (table 100 100 funcref) 886 | (elem (i32.const 0) funcref (ref.func 0) (ref.func 1)) 887 | ) 888 | "#, 889 | r#" 890 | (module 891 | (type (;0;) (func (param i32 i32) (result i32))) 892 | (func (;0;) (type 0) (param i32 i32) (result i32) 893 | (local i32 i32 i32) 894 | local.get 0 895 | local.get 1 896 | local.get 0 897 | local.tee 2 898 | i32.const 1 899 | i32.eq 900 | if (type 0) (param i32 i32) (result i32) ;; label = @1 901 | local.set 4 902 | local.set 3 903 | local.get 3 904 | local.get 4 905 | local.get 4 906 | call_indirect (type 0) 907 | else 908 | local.get 2 909 | call_indirect (type 0) 910 | end 911 | ) 912 | (func (;1;) (type 0) (param i32 i32) (result i32) 913 | (local i32 i32 i32) 914 | local.get 0 915 | local.get 1 916 | local.get 1 917 | local.tee 2 918 | i32.const 0 919 | i32.eq 920 | if (type 0) (param i32 i32) (result i32) ;; label = @1 921 | local.set 4 922 | local.set 3 923 | local.get 3 924 | local.get 4 925 | local.get 3 926 | call_indirect (type 0) 927 | else 928 | local.get 2 929 | call_indirect (type 0) 930 | end 931 | ) 932 | (table (;0;) 100 100 funcref) 933 | (elem (;0;) (i32.const 0) funcref (ref.func 0) (ref.func 1)) 934 | ) 935 | "#, 936 | ) 937 | } 938 | 939 | #[test] 940 | fn inline_a_function_with_many_locals() -> Result<()> { 941 | assert_optimize( 942 | Optimizer::new().min_total_calls(100), 943 | &[&[(0, 100)]], 944 | r#" 945 | (module 946 | (type (func (result i32))) 947 | 948 | (func (type 0) (local i32 i64 f32 f64 v128 externref funcref) 949 | local.get 0 950 | ) 951 | 952 | (func (param i32) (result i32) 953 | (local funcref externref v128 f64 f32 i64 i32) 954 | local.get 0 955 | call_indirect (type 0) 956 | ) 957 | 958 | (table 100 100 funcref) 959 | (elem (i32.const 0) funcref (ref.func 0) (ref.func 1)) 960 | ) 961 | "#, 962 | r#" 963 | (module 964 | (type (;0;) (func (result i32))) 965 | (type (;1;) (func (param i32) (result i32))) 966 | (func (;0;) (type 0) (result i32) 967 | (local i32 i64 f32 f64 v128 externref funcref i32) 968 | local.get 0 969 | ) 970 | (func (;1;) (type 1) (param i32) (result i32) 971 | (local funcref externref v128 f64 f32 i64 i32 i32 i32 i64 f32 f64 v128 externref funcref) 972 | local.get 0 973 | local.tee 8 974 | i32.const 0 975 | i32.eq 976 | if (type 0) (result i32) ;; label = @1 977 | local.get 9 978 | else 979 | local.get 8 980 | call_indirect (type 0) 981 | end 982 | ) 983 | (table (;0;) 100 100 funcref) 984 | (elem (;0;) (i32.const 0) funcref (ref.func 0) (ref.func 1)) 985 | ) 986 | "#, 987 | ) 988 | } 989 | 990 | #[test] 991 | fn counters() -> Result<()> { 992 | assert_optimize( 993 | Optimizer::new() 994 | .min_total_calls(1) 995 | .emit_feedback_counters(true), 996 | &[&[(42, 1)]], 997 | r#" 998 | (module 999 | (type (;0;) (func (result i32))) 1000 | (type (;1;) (func (param i32) (result i32))) 1001 | (func (;0;) (type 0) (result i32) 1002 | i32.const 36 1003 | ) 1004 | (func (;1;) (type 1) (param i32) (result i32) 1005 | local.get 0 1006 | call_indirect (type 0) 1007 | ) 1008 | (table (;0;) 100 100 funcref) 1009 | (elem (;0;) (i32.const 42) funcref (ref.func 0)) 1010 | ) 1011 | "#, 1012 | r#" 1013 | (module 1014 | (type (;0;) (func (result i32))) 1015 | (type (;1;) (func (param i32) (result i32))) 1016 | (func (;0;) (type 0) (result i32) 1017 | (local i32) 1018 | i32.const 36 1019 | ) 1020 | (func (;1;) (type 1) (param i32) (result i32) 1021 | (local i32) 1022 | local.get 0 1023 | local.tee 1 1024 | i32.const 42 1025 | i32.eq 1026 | if (type 0) (result i32) ;; label = @1 1027 | global.get 0 1028 | i64.const 1 1029 | i64.add 1030 | global.set 0 1031 | i32.const 36 1032 | else 1033 | global.get 1 1034 | i64.const 1 1035 | i64.add 1036 | global.set 1 1037 | local.get 1 1038 | call_indirect (type 0) 1039 | end 1040 | ) 1041 | (table (;0;) 100 100 funcref) 1042 | (global (;0;) (mut i64) i64.const 0) 1043 | (global (;1;) (mut i64) i64.const 0) 1044 | (export "__winliner_counter_0_correct" (global 0)) 1045 | (export "__winliner_counter_0_incorrect" (global 1)) 1046 | (elem (;0;) (i32.const 42) funcref (ref.func 0)) 1047 | ) 1048 | "#, 1049 | ) 1050 | } 1051 | 1052 | #[test] 1053 | fn return_from_inlined_function_no_blocks() -> Result<()> { 1054 | assert_optimize( 1055 | Optimizer::new().min_total_calls(1), 1056 | &[&[(42, 1)]], 1057 | r#" 1058 | (module 1059 | (type (;0;) (func (result i32))) 1060 | (type (;1;) (func (param i32) (result i32))) 1061 | (func (;0;) (type 0) (result i32) 1062 | i32.const 36 1063 | return 1064 | ) 1065 | (func (;1;) (type 1) (param i32) (result i32) 1066 | local.get 0 1067 | call_indirect (type 0) 1068 | ) 1069 | (table (;0;) 100 100 funcref) 1070 | (elem (;0;) (i32.const 42) funcref (ref.func 0)) 1071 | ) 1072 | "#, 1073 | r#" 1074 | (module 1075 | (type (;0;) (func (result i32))) 1076 | (type (;1;) (func (param i32) (result i32))) 1077 | (func (;0;) (type 0) (result i32) 1078 | (local i32) 1079 | i32.const 36 1080 | return 1081 | ) 1082 | (func (;1;) (type 1) (param i32) (result i32) 1083 | (local i32) 1084 | local.get 0 1085 | local.tee 1 1086 | i32.const 42 1087 | i32.eq 1088 | if (type 0) (result i32) ;; label = @1 1089 | i32.const 36 1090 | br 0 (;@1;) 1091 | else 1092 | local.get 1 1093 | call_indirect (type 0) 1094 | end 1095 | ) 1096 | (table (;0;) 100 100 funcref) 1097 | (elem (;0;) (i32.const 42) funcref (ref.func 0)) 1098 | ) 1099 | "#, 1100 | ) 1101 | } 1102 | 1103 | #[test] 1104 | fn return_from_inlined_function_nested_blocks() -> Result<()> { 1105 | assert_optimize( 1106 | Optimizer::new().min_total_calls(1), 1107 | &[&[(42, 1)]], 1108 | r#" 1109 | (module 1110 | (type (;0;) (func (result i32))) 1111 | (type (;1;) (func (param i32) (result i32))) 1112 | (func (;0;) (type 0) (result i32) 1113 | block 1114 | loop 1115 | i32.const 1 1116 | if 1117 | i32.const 36 1118 | return 1119 | end 1120 | end 1121 | end 1122 | unreachable 1123 | ) 1124 | (func (;1;) (type 1) (param i32) (result i32) 1125 | block 1126 | loop 1127 | i32.const 1 1128 | if 1129 | local.get 0 1130 | call_indirect (type 0) 1131 | return 1132 | end 1133 | end 1134 | end 1135 | unreachable 1136 | ) 1137 | (table (;0;) 100 100 funcref) 1138 | (elem (;0;) (i32.const 42) funcref (ref.func 0)) 1139 | ) 1140 | "#, 1141 | r#" 1142 | (module 1143 | (type (;0;) (func (result i32))) 1144 | (type (;1;) (func (param i32) (result i32))) 1145 | (func (;0;) (type 0) (result i32) 1146 | (local i32) 1147 | block ;; label = @1 1148 | loop ;; label = @2 1149 | i32.const 1 1150 | if ;; label = @3 1151 | i32.const 36 1152 | return 1153 | end 1154 | end 1155 | end 1156 | unreachable 1157 | ) 1158 | (func (;1;) (type 1) (param i32) (result i32) 1159 | (local i32) 1160 | block ;; label = @1 1161 | loop ;; label = @2 1162 | i32.const 1 1163 | if ;; label = @3 1164 | local.get 0 1165 | local.tee 1 1166 | i32.const 42 1167 | i32.eq 1168 | if (type 0) (result i32) ;; label = @4 1169 | block ;; label = @5 1170 | loop ;; label = @6 1171 | i32.const 1 1172 | if ;; label = @7 1173 | i32.const 36 1174 | br 3 (;@4;) 1175 | end 1176 | end 1177 | end 1178 | unreachable 1179 | else 1180 | local.get 1 1181 | call_indirect (type 0) 1182 | end 1183 | return 1184 | end 1185 | end 1186 | end 1187 | unreachable 1188 | ) 1189 | (table (;0;) 100 100 funcref) 1190 | (elem (;0;) (i32.const 42) funcref (ref.func 0)) 1191 | ) 1192 | "#, 1193 | ) 1194 | } 1195 | 1196 | #[test] 1197 | fn fuel() -> Result<()> { 1198 | assert_optimize( 1199 | Optimizer::new().min_total_calls(1).fuel(Some(1)), 1200 | &[&[(42, 1)], &[(42, 1)]], 1201 | r#" 1202 | (module 1203 | (type (;0;) (func (result i32))) 1204 | (type (;1;) (func (param i32) (result i32))) 1205 | (func (;0;) (type 0) (result i32) 1206 | i32.const 42 1207 | ) 1208 | (func (;1;) (type 1) (param i32) (result i32) 1209 | local.get 0 1210 | call_indirect (type 0) 1211 | drop 1212 | local.get 0 1213 | call_indirect (type 0) 1214 | ) 1215 | (table (;0;) 100 100 funcref) 1216 | (elem (;0;) (i32.const 42) funcref (ref.func 0)) 1217 | ) 1218 | "#, 1219 | r#" 1220 | (module 1221 | (type (;0;) (func (result i32))) 1222 | (type (;1;) (func (param i32) (result i32))) 1223 | (func (;0;) (type 0) (result i32) 1224 | (local i32) 1225 | i32.const 42 1226 | ) 1227 | (func (;1;) (type 1) (param i32) (result i32) 1228 | (local i32) 1229 | local.get 0 1230 | local.tee 1 1231 | i32.const 42 1232 | i32.eq 1233 | if (type 0) (result i32) ;; label = @1 1234 | i32.const 42 1235 | else 1236 | local.get 1 1237 | call_indirect (type 0) 1238 | end 1239 | drop 1240 | local.get 0 1241 | call_indirect (type 0) 1242 | ) 1243 | (table (;0;) 100 100 funcref) 1244 | (elem (;0;) (i32.const 42) funcref (ref.func 0)) 1245 | ) 1246 | "#, 1247 | ) 1248 | } 1249 | 1250 | /// Tests for when we run optimization with bogus profiles. 1251 | /// 1252 | /// The exact behavior doesn't matter too much here (we do document the 1253 | /// requirement that the profile is valid for the input Wasm) as long as we 1254 | /// don't panic and are deterministic. 1255 | mod bogus_profile { 1256 | use super::*; 1257 | 1258 | #[test] 1259 | fn call_to_func_of_wrong_type() -> Result<()> { 1260 | // Note that this one could actually happen where we called the function 1261 | // and then it trapped, and if we merged multiple profiles we could even 1262 | // see many calls to a function of the wrong type. We could technically 1263 | // even speculatively trap here, but it doesn't gain us anything. 1264 | assert_optimize( 1265 | Optimizer::new().min_total_calls(1), 1266 | &[&[(0, 1)]], 1267 | r#" 1268 | (module 1269 | (type (func (result i32))) 1270 | 1271 | (func (param i32) (result i32) 1272 | local.get 0 1273 | ) 1274 | 1275 | (func (param i32) (result i32) 1276 | local.get 0 1277 | call_indirect (type 0) 1278 | ) 1279 | 1280 | (table 100 100 funcref) 1281 | (elem (i32.const 0) funcref (ref.func 0)) 1282 | ) 1283 | "#, 1284 | r#" 1285 | (module 1286 | (type (;0;) (func (result i32))) 1287 | (type (;1;) (func (param i32) (result i32))) 1288 | (func (;0;) (type 1) (param i32) (result i32) 1289 | (local i32) 1290 | local.get 0 1291 | ) 1292 | (func (;1;) (type 1) (param i32) (result i32) 1293 | (local i32) 1294 | local.get 0 1295 | call_indirect (type 0) 1296 | ) 1297 | (table (;0;) 100 100 funcref) 1298 | (elem (;0;) (i32.const 0) funcref (ref.func 0)) 1299 | ) 1300 | "#, 1301 | ) 1302 | } 1303 | 1304 | #[test] 1305 | fn call_to_out_of_bounds_function_index() -> Result<()> { 1306 | assert_optimize( 1307 | Optimizer::new().min_total_calls(1), 1308 | &[&[(999, 1)]], 1309 | r#" 1310 | (module 1311 | (type (func (result i32))) 1312 | (func (param i32) (result i32) 1313 | local.get 0 1314 | call_indirect (type 0) 1315 | ) 1316 | (table 100 100 funcref) 1317 | ) 1318 | "#, 1319 | r#" 1320 | (module 1321 | (type (;0;) (func (result i32))) 1322 | (type (;1;) (func (param i32) (result i32))) 1323 | (func (;0;) (type 1) (param i32) (result i32) 1324 | (local i32) 1325 | local.get 0 1326 | call_indirect (type 0) 1327 | ) 1328 | (table (;0;) 100 100 funcref) 1329 | ) 1330 | "#, 1331 | ) 1332 | } 1333 | 1334 | #[test] 1335 | fn more_call_site_data_than_call_sites() -> Result<()> { 1336 | assert_optimize( 1337 | Optimizer::new().min_total_calls(1), 1338 | &[&[(0, 999)]], 1339 | r#" 1340 | (module 1341 | (func (param i32) (result i32) 1342 | local.get 0 1343 | ) 1344 | (table 100 100 funcref) 1345 | (elem (i32.const 0) funcref (ref.func 0)) 1346 | ) 1347 | "#, 1348 | r#" 1349 | (module 1350 | (type (;0;) (func (param i32) (result i32))) 1351 | (func (;0;) (type 0) (param i32) (result i32) 1352 | (local i32) 1353 | local.get 0 1354 | ) 1355 | (table (;0;) 100 100 funcref) 1356 | (elem (;0;) (i32.const 0) funcref (ref.func 0)) 1357 | ) 1358 | "#, 1359 | ) 1360 | } 1361 | 1362 | #[test] 1363 | fn call_to_unknown_table_element() -> Result<()> { 1364 | assert_optimize( 1365 | Optimizer::new().min_total_calls(1), 1366 | &[&[(0, 1)]], 1367 | r#" 1368 | (module 1369 | (type (func (result i32))) 1370 | (func (param i32) (result i32) 1371 | local.get 0 1372 | call_indirect (type 0) 1373 | ) 1374 | (table 100 100 funcref) 1375 | (elem (i32.const 1) funcref (ref.func 0)) 1376 | ) 1377 | "#, 1378 | r#" 1379 | (module 1380 | (type (;0;) (func (result i32))) 1381 | (type (;1;) (func (param i32) (result i32))) 1382 | (func (;0;) (type 1) (param i32) (result i32) 1383 | (local i32) 1384 | local.get 0 1385 | call_indirect (type 0) 1386 | ) 1387 | (table (;0;) 100 100 funcref) 1388 | (elem (;0;) (i32.const 1) funcref (ref.func 0)) 1389 | ) 1390 | "#, 1391 | ) 1392 | } 1393 | } 1394 | -------------------------------------------------------------------------------- /tests/all/profile.rs: -------------------------------------------------------------------------------- 1 | #![cfg(feature = "serde")] 2 | 3 | use anyhow::Result; 4 | use winliner::{Profile, ProfileBuilder}; 5 | 6 | fn assert_profile(profile: Profile, expected: &str) -> Result<()> { 7 | let expected = expected.trim(); 8 | println!("Expected profile:\n{expected}"); 9 | 10 | let actual = serde_json::to_string_pretty(&profile)?; 11 | let actual = actual.trim(); 12 | println!("Actual profile:\n{actual}"); 13 | 14 | assert_eq!(expected, actual); 15 | Ok(()) 16 | } 17 | 18 | #[test] 19 | fn basic() -> Result<()> { 20 | let mut builder = ProfileBuilder::new(); 21 | 22 | for _ in 0..10 { 23 | builder.add_indirect_call(42, 36); 24 | } 25 | 26 | for _ in 0..3 { 27 | builder.add_indirect_call(666, 36); 28 | } 29 | 30 | builder.add_indirect_call(123, 456); 31 | 32 | let profile = builder.build(); 33 | 34 | assert_profile( 35 | profile, 36 | r#" 37 | { 38 | "call_sites": { 39 | "36": { 40 | "total_call_count": 13, 41 | "callee_to_count": { 42 | "42": 10, 43 | "666": 3 44 | } 45 | }, 46 | "456": { 47 | "total_call_count": 1, 48 | "callee_to_count": { 49 | "123": 1 50 | } 51 | } 52 | } 53 | } 54 | "#, 55 | ) 56 | } 57 | 58 | #[test] 59 | fn merge() -> Result<()> { 60 | let mut profile1 = { 61 | let mut builder = ProfileBuilder::new(); 62 | builder.add_indirect_call(123, 456); 63 | builder.add_indirect_call(456, 789); 64 | builder.build() 65 | }; 66 | let profile2 = { 67 | let mut builder = ProfileBuilder::new(); 68 | builder.add_indirect_call(123, 456); 69 | builder.add_indirect_call(321, 987); 70 | builder.build() 71 | }; 72 | 73 | profile1.merge(&profile2); 74 | 75 | assert_profile( 76 | profile1, 77 | r#" 78 | { 79 | "call_sites": { 80 | "456": { 81 | "total_call_count": 2, 82 | "callee_to_count": { 83 | "123": 2 84 | } 85 | }, 86 | "789": { 87 | "total_call_count": 1, 88 | "callee_to_count": { 89 | "456": 1 90 | } 91 | }, 92 | "987": { 93 | "total_call_count": 1, 94 | "callee_to_count": { 95 | "321": 1 96 | } 97 | } 98 | } 99 | } 100 | "#, 101 | ) 102 | } 103 | --------------------------------------------------------------------------------