├── .git-hooks └── pre-commit ├── .gitignore ├── CONTRIBUTING.md ├── Cargo.toml ├── LICENSE ├── README.md ├── benchmark.dat ├── init-git-hooks ├── robust-binary-search ├── Cargo.toml ├── README.md ├── flakiness_tuner │ ├── flakiness_benchmark.dat │ ├── inversion_ratio_versus_flakiness.plt │ └── recovered_flakiness.plt └── src │ ├── benchmark.rs │ ├── compressed_dag_flakiness_tracker.rs │ ├── dag.rs │ ├── flakiness_tracker.rs │ ├── flakiness_tuner.rs │ ├── lib.rs │ ├── range_map.rs │ └── tuner.rs ├── robust-git-bisect ├── Cargo.toml ├── README.md └── src │ └── main.rs └── test-commit /.git-hooks/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | if git-rev-parse --verify HEAD >/dev/null 2>&1 6 | then 7 | against=HEAD 8 | else 9 | # Initial commit: diff against an empty tree object 10 | against=4b825dc642cb6eb9a060e54bf8d69288fbee4904 11 | fi 12 | 13 | function grep-check { 14 | test="$1" 15 | ignore="$2" 16 | msg="$3" 17 | if (git diff --cached | egrep -i "$test" | grep -v IGNORE:"$ignore"); then 18 | echo "Error: $msg (This message can be suppressed by adding the string IGNORE:$ignore to the same line.)" 19 | exit 1 20 | fi 21 | } 22 | 23 | function grep-check-case-sensitive { 24 | test="$1" 25 | ignore="$2" 26 | msg="$3" 27 | if (git diff --cached | egrep "$test" | grep -v IGNORE:"$ignore"); then 28 | echo "Error: $msg (This message can be suppressed by adding the string IGNORE:$ignore to the same line.)" 29 | exit 1 30 | fi 31 | } 32 | 33 | grep-check-case-sensitive \ 34 | NOCOMMIT `#IGNORE:NOCOMMIT` \ 35 | NOCOMMIT `#IGNORE:NOCOMMIT` \ 36 | "Found a line tagged with NOCOMMIT." # IGNORE:NOCOMMIT 37 | 38 | if ! cargo fmt --all -- --check; then 39 | echo 'Please run `cargo fmt --all`.' 40 | exit 1 41 | fi 42 | 43 | # Check for trailing whitespace 44 | git diff-index --check --cached $against -- 45 | 46 | (cd robust-binary-search && cargo build --bin benchmark --features=benchmark) 47 | (cd robust-binary-search && cargo build --bin tuner --features=tuner) 48 | (cd robust-binary-search && cargo build --bin flakiness_tuner --features=flakiness_tuner) 49 | (cd robust-binary-search && cargo test --all-features) 50 | (cd robust-binary-search && cargo doc --all-features) 51 | (cd robust-git-bisect && cargo build) 52 | (cd robust-git-bisect && cargo test) 53 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | /target 3 | **/*.rs.bk 4 | Cargo.lock 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement (CLA). You (or your employer) retain the copyright to your 10 | contribution; this simply gives us permission to use and redistribute your 11 | contributions as part of the project. Head over to 12 | to see your current agreements on file or 13 | to sign a new one. 14 | 15 | You generally only need to submit a CLA once, so if you've already submitted one 16 | (even if it was for a different project), you probably don't need to do it 17 | again. 18 | 19 | ## Code reviews 20 | 21 | All submissions, including submissions by project members, require review. We 22 | use GitHub pull requests for this purpose. Consult 23 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 24 | information on using pull requests. 25 | 26 | ## Community Guidelines 27 | 28 | This project follows 29 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/). 30 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | [workspace] 16 | members = [ 17 | "robust-binary-search", 18 | "robust-git-bisect", 19 | ] 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Robust Binary Search 2 | 3 | Robust Binary Search provides a binary search implementation which is robust against errors during 4 | the search. In other words, if the comparison function sometimes returns an incorrect result, the 5 | search in this project will still converge on the correct solution. 6 | 7 | This is adapted from the multiplicative weights algorithm in ["Noisy binary search and its 8 | applications" by Karp and Kleinberg](https://www.cs.cornell.edu/~rdk/papers/karpr2.pdf), with 9 | adjustments to make it deterministic and then extended to support directed acyclic graphs. 10 | 11 | ## Usage 12 | 13 | To use the git bisect replacement, install with `cargo install robust-git-bisect`, and then 14 | `~/.cargo/bin/robust-git-bisect $start_commit $end_commit $command_to_test_commit` 15 | 16 | For library usage, see `AutoSearcher` for binary search over a linear range and 17 | `AutoCompressedDAGSearcher` for binary search over a graph. 18 | 19 | ## Performance 20 | 21 | This code is optimized to minimize the number of tests executed (i.e. number of iterations) and not 22 | necessarily the CPU time of the search algorithm itself, so this will be slower than a plain binary 23 | search if the test is deterministic. 24 | 25 | The linear algorithm (`Searcher` and `AutoSearcher`) takes approximately `O(log N)` time per 26 | iteration. The graph algorithm (`CompressedDAGSearcher` and `AutoCompressedDAGSearcher`) takes 27 | approximately `O(segments)` time per iteration. 28 | 29 | robust-git-bisect shows improved performance compared with git bisect (higher accuracy with fewer 30 | iterations): 31 | 32 | Method | Iterations | Accuracy 33 | ---------------------------------- | ---------- | -------- 34 | robust-git-bisect with 0.99 target | 29.6558 | 99.5392% 35 | robust-git-bisect with 0.9 target | 26.1828 | 98.8950% 36 | git bisect | 16.1907 | 31.7972% 37 | git bisect with tests repeated | 35.0465 | 86.6359% 38 | git bisect repeated | 72.3674 | 86.1751% 39 | 40 | This test is run over the `git` git repo from e83c516331 to 54e85e7af1, simulating 9c3592cf3c as the 41 | bad commit, with a test that returns an incorrect result 5% of the time. See benchmark.rs for 42 | details. 43 | -------------------------------------------------------------------------------- /benchmark.dat: -------------------------------------------------------------------------------- 1 | Method Iterations Accuracy 2 | robust1 29.6558 0.995392 3 | robust2 26.1828 0.98895 4 | git1 16.1907 0.317972 5 | git2 35.0465 0.866359 6 | git3 72.3674 0.861751 7 | -------------------------------------------------------------------------------- /init-git-hooks: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2020 Google LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # https://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | ln -s ../../.git-hooks/pre-commit .git/hooks/pre-commit 18 | -------------------------------------------------------------------------------- /robust-binary-search/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | [package] 16 | name = "robust-binary-search" 17 | version = "0.1.1" 18 | authors = ["Adam Crume "] 19 | edition = "2018" 20 | license = "Apache-2.0" 21 | description = "Robust Binary Search provides a binary search implementation which is robust against errors during the search." 22 | repository = "https://github.com/adamcrume/robust-binary-search" 23 | categories = ["algorithms"] 24 | readme = "README.md" 25 | 26 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 27 | 28 | [dependencies] 29 | clap = "2.33.3" 30 | im-rc = "15.0.0" 31 | lazy_static = {version = "1.4.0", optional = true} 32 | log = "0.4" 33 | rand = {version = "0.7.3", optional = true} 34 | regex = {version = "1.3.9", optional = true} 35 | simplelog = "0.8.0" 36 | 37 | [features] 38 | flakiness_tuner = ["rand"] 39 | tuner = ["rand"] 40 | benchmark = ["regex", "lazy_static", "rand"] 41 | 42 | [[bin]] 43 | name = "flakiness_tuner" 44 | path = "src/flakiness_tuner.rs" 45 | required-features = ["flakiness_tuner"] 46 | 47 | [[bin]] 48 | name = "tuner" 49 | path = "src/tuner.rs" 50 | required-features = ["tuner"] 51 | 52 | [[bin]] 53 | name = "benchmark" 54 | path = "src/benchmark.rs" 55 | required-features = ["benchmark"] 56 | -------------------------------------------------------------------------------- /robust-binary-search/README.md: -------------------------------------------------------------------------------- 1 | # Robust Binary Search 2 | 3 | Robust Binary Search provides a binary search implementation which is robust against errors during 4 | the search. In other words, if the comparison function sometimes returns an incorrect result, the 5 | search in this project will still converge on the correct solution. 6 | 7 | This is adapted from the multiplicative weights algorithm in ["Noisy binary search and its 8 | applications" by Karp and Kleinberg](https://www.cs.cornell.edu/~rdk/papers/karpr2.pdf), with 9 | adjustments to make it deterministic and then extended to support directed acyclic graphs. 10 | 11 | ## Usage 12 | 13 | See `AutoSearcher` for binary search over a linear range and `AutoCompressedDAGSearcher` for binary 14 | search over a graph. 15 | 16 | If you're looking for a git bisect replacement, see the `robust-git-bisect` crate which uses this 17 | library. 18 | 19 | ## Performance 20 | 21 | This code is optimized to minimize the number of tests executed (i.e. number of iterations) and not 22 | necessrily the CPU time of the search algorithm itself, so this will be slower than a plain binary 23 | search if the test is deterministic. 24 | 25 | The linear algorithm (`Searcher` and `AutoSearcher`) takes approximately `O(log N)` time per 26 | iteration. The graph algorithm (`CompressedDAGSearcher` and `AutoCompressedDAGSearcher`) takes 27 | approximately `O(segments)` time per iteration. 28 | -------------------------------------------------------------------------------- /robust-binary-search/flakiness_tuner/flakiness_benchmark.dat: -------------------------------------------------------------------------------- 1 | 0 0 2593439 2 | 0.01 7687 2720030 3 | 0.02 15050 2846042 4 | 0.03 24357 2986433 5 | 0.04 34078 3131896 6 | 0.05 45697 3298190 7 | 0.06 55322 3428758 8 | 0.07 69191 3619448 9 | 0.08 82564 3801783 10 | 0.09 97724 3979018 11 | 0.1 112328 4173530 12 | 0.11 134665 4435814 13 | 0.12 147606 4605638 14 | 0.13 171462 4892434 15 | 0.14 195545 5151107 16 | 0.15 220347 5419433 17 | 0.16 249543 5744891 18 | 0.17 272772 6000939 19 | 0.18 308700 6352805 20 | 0.19 333235 6617986 21 | 0.2 382428 7082551 22 | 0.21 411519 7374794 23 | 0.22 461558 7847944 24 | 0.23 495200 8160835 25 | 0.24 554680 8727600 26 | 0.25 598779 9127465 27 | 0.26 666527 9715314 28 | 0.27 736442 10346670 29 | 0.28 813125 10979006 30 | 0.29 885590 11582958 31 | 0.3 960822 12225425 32 | 0.31 1073110 13110428 33 | 0.32 1160100 13824452 34 | 0.33 1257959 14596961 35 | 0.34 1406794 15783963 36 | 0.35 1532675 16685239 37 | 0.36 1697488 17935685 38 | 0.37 1856446 19145724 39 | 0.38 2018349 20300887 40 | 0.39 2247915 21982339 41 | 0.4 2424495 23207208 42 | 0.41 2669295 24932589 43 | 0.42 2991438 27138683 44 | 0.43 3296230 29187209 45 | 0.44 3628420 31469382 46 | 0.45 4030711 34195380 47 | 0.46 4383899 36522519 48 | 0.47 4930148 40091975 49 | 0.48 5443270 43331428 50 | 0.49 6031369 47021198 51 | 0.5 6628093 50749235 52 | 0.51 7564366 56688537 53 | 0.52 8245459 60740592 54 | 0.53 9348164 67335929 55 | 0.54 10525423 74240936 56 | 0.55 12141745 83686051 57 | 0.56 13275697 90435195 58 | 0.57 15091455 100839773 59 | 0.58 16841746 110785483 60 | 0.59 19142239 123724056 61 | 0.6 21904061 139159586 62 | 0.61 25109922 156773363 63 | 0.62 28657470 176049542 64 | 0.63 32397304 196384558 65 | 0.64 38273616 227673932 66 | 0.65 43025784 252630874 67 | 0.66 49827290 287747615 68 | 0.67 58564546 333297714 69 | 0.68 68079350 381312202 70 | 0.69 80696814 445160005 71 | 0.7 94252525 512653597 72 | 0.71 109376810 587599390 73 | 0.72 132482809 698838332 74 | 0.73 155231574 812296804 75 | 0.74 192758716 991271166 76 | 0.75 225292984 1144696478 77 | 0.76 275625277 1382092574 78 | 0.77 326625985 1622162374 79 | 0.78 411268545 2015285483 80 | 0.79 509601079 2463722424 81 | -------------------------------------------------------------------------------- /robust-binary-search/flakiness_tuner/inversion_ratio_versus_flakiness.plt: -------------------------------------------------------------------------------- 1 | set terminal png size 800,600 2 | set out 'inversion_ratio_versus_flakiness.png' 3 | unset key 4 | set xlabel 'Flakiness' 5 | set ylabel 'Inversion ratio' 6 | plot 'flakiness_benchmark.dat' u 1:($2/$3) 7 | -------------------------------------------------------------------------------- /robust-binary-search/flakiness_tuner/recovered_flakiness.plt: -------------------------------------------------------------------------------- 1 | # To generate the plot, run the tuner: 2 | # cargo run --bin flakiness_tuner --features flakiness_tuner -- $PWD/flakiness_benchmark.dat 3 | # and then: 4 | # gnuplot recovered_flakiness.plt 5 | 6 | set terminal png size 800,600 7 | set out 'recovered_flakiness.png' 8 | set key right bottom 9 | set xlabel 'True flakiness' 10 | set ylabel 'Recovered flakiness' 11 | f(x) = a*x**2 + b*x 12 | fit f(x) 'flakiness_benchmark.dat' u ($2/$3):1 via a, b 13 | plot 'flakiness_benchmark.dat' u (f($2/$3)):1 t sprintf("Recovered flakiness: %.4f ρ² + %.4f ρ", a, b), \ 14 | x t 'True flakiness' 15 | -------------------------------------------------------------------------------- /robust-binary-search/src/benchmark.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use lazy_static::lazy_static; 16 | use log::info; 17 | use rand; 18 | use rand::rngs::ThreadRng; 19 | use rand::Rng; 20 | use regex::Regex; 21 | use simplelog::Config; 22 | use simplelog::LevelFilter; 23 | use simplelog::TermLogger; 24 | use simplelog::TerminalMode; 25 | use std::collections::HashMap; 26 | use std::env; 27 | use std::error::Error; 28 | use std::fs::File; 29 | use std::io::Write; 30 | use std::path::Path; 31 | use std::process; 32 | use std::process::Command; 33 | 34 | fn run(name: &str, mut configure: F) -> Result 35 | where 36 | F: FnMut(&mut Command) -> &mut Command, 37 | { 38 | let mut command = Command::new(name); 39 | let configured = configure(&mut command); 40 | info!("Executing {:?}", configured); 41 | let out = configured.output().unwrap(); 42 | if !out.status.success() { 43 | let msg = format!("failed to execute {:?}", configured); 44 | info!( 45 | "{}: {}{}", 46 | msg, 47 | String::from_utf8(out.stdout).unwrap(), 48 | String::from_utf8(out.stderr).unwrap() 49 | ); 50 | return Err(msg); 51 | } 52 | info!("Command {:?} finished successfully", configured); 53 | Ok(String::from_utf8(out.stdout).unwrap()) 54 | } 55 | 56 | lazy_static! { 57 | // `git bisect` output looks like: 58 | // Bisecting: 30215 revisions left to test after this (roughly 15 steps) 59 | // [53284de77712b2234c739afa3aa5f024fc89fc83] Second half of the fifth batch for 1.8.0 60 | static ref BISECT_COMMIT_RE: Regex = Regex::new("^\\[([a-z0-9]+)\\].*").unwrap(); 61 | static ref FIRST_BAD_COMMIT_RE: Regex = Regex::new("^([a-z0-9]+) is the first bad commit.*").unwrap(); 62 | static ref ROBUST_BISECT_RE: Regex = Regex::new("Most likely commit is ([a-z0-9]+) .* after ([0-9]+) iterations").unwrap(); 63 | } 64 | 65 | enum BisectResult { 66 | Nothing, 67 | Next(String), 68 | Final(String), 69 | } 70 | 71 | fn git_bisect>(dir: P, heads: bool, commit: &str) -> Result { 72 | let dir = &dir; 73 | let output = run("git", |cmd| { 74 | cmd.current_dir(dir) 75 | .arg("bisect") 76 | .arg(if heads { "bad" } else { "good" }) 77 | .arg(commit) 78 | })?; 79 | for line in output.lines() { 80 | if let Some(captures) = BISECT_COMMIT_RE.captures(line) { 81 | return Ok(BisectResult::Next( 82 | captures.get(1).unwrap().as_str().to_string(), 83 | )); 84 | } 85 | if let Some(captures) = FIRST_BAD_COMMIT_RE.captures(line) { 86 | return Ok(BisectResult::Final( 87 | captures.get(1).unwrap().as_str().to_string(), 88 | )); 89 | } 90 | } 91 | Ok(BisectResult::Nothing) 92 | } 93 | 94 | fn git_is_ancestor>(dir: P, r1: &str, r2: &str) -> bool { 95 | let dir = &dir; 96 | run("git", |cmd| { 97 | cmd.current_dir(dir) 98 | .arg("merge-base") 99 | .arg("--is-ancestor") 100 | .arg(r1) 101 | .arg(r2) 102 | }) 103 | .is_ok() 104 | } 105 | 106 | struct CommitTester { 107 | target_commit: String, 108 | rng: ThreadRng, 109 | flakiness: f64, 110 | } 111 | 112 | impl CommitTester { 113 | fn is_bad>(&mut self, dir: P, commit: &str) -> Result { 114 | if self.rng.gen::() < self.flakiness { 115 | Ok(self.rng.gen()) 116 | } else { 117 | Ok(git_is_ancestor(&dir, &self.target_commit, commit)) 118 | } 119 | } 120 | } 121 | 122 | fn run_git_bisect>( 123 | dir: P, 124 | good_commit: &str, 125 | bad_commit: &str, 126 | target_commit: &str, 127 | commit_tester: &mut CommitTester, 128 | inner_min_best: usize, 129 | outer_min_best: usize, 130 | ) -> Result<(usize, bool), String> { 131 | let mut iterations = 0; 132 | let mut final_commits = HashMap::new(); 133 | loop { 134 | run("git", |cmd| { 135 | cmd.current_dir(&dir).arg("bisect").arg("reset") 136 | }) 137 | .unwrap(); 138 | run("git", |cmd| { 139 | cmd.current_dir(&dir).arg("bisect").arg("start") 140 | }) 141 | .unwrap(); 142 | git_bisect(&dir, false, good_commit)?; 143 | let mut final_commit: Option = None; 144 | let mut next = match git_bisect(&dir, true, bad_commit)? { 145 | BisectResult::Nothing => None, 146 | BisectResult::Next(commit) => Some(commit), 147 | BisectResult::Final(commit) => { 148 | final_commit = Some(commit); 149 | None 150 | } 151 | }; 152 | while let Some(next_commit) = next { 153 | let mut heads = 0; 154 | let mut tails = 0; 155 | while heads < inner_min_best && tails < inner_min_best { 156 | iterations += 1; 157 | if commit_tester.is_bad(&dir, &next_commit)? { 158 | heads += 1; 159 | } else { 160 | tails += 1; 161 | } 162 | } 163 | match git_bisect(&dir, heads > tails, &next_commit)? { 164 | BisectResult::Nothing => next = None, 165 | BisectResult::Next(commit) => next = Some(commit), 166 | BisectResult::Final(commit) => { 167 | next = None; 168 | final_commit = Some(commit); 169 | } 170 | } 171 | } 172 | let count = { 173 | let count = final_commits 174 | .entry(final_commit.clone().unwrap()) 175 | .or_insert(0); 176 | *count += 1; 177 | *count 178 | }; 179 | if count >= outer_min_best { 180 | return Ok((iterations, final_commit == Some(target_commit.to_string()))); 181 | } 182 | } 183 | } 184 | 185 | fn run_robust_bisect>( 186 | bisect: &str, 187 | dir: P, 188 | good_commit: &str, 189 | bad_commit: &str, 190 | test_commit: &str, 191 | flakiness: f64, 192 | target_commit: &str, 193 | min_likelihood: f64, 194 | ) -> Result<(usize, bool), Box> { 195 | let output = run(bisect, |cmd| { 196 | cmd.current_dir(&dir) 197 | .arg(good_commit) 198 | .arg(bad_commit) 199 | .arg(format!( 200 | "'{}' {} {}", 201 | test_commit, 202 | (flakiness * 100.0) as usize, 203 | target_commit 204 | )) 205 | .arg("-vv") 206 | .arg(format!("--min-likelihood={}", min_likelihood)) 207 | }) 208 | .unwrap(); 209 | let mut best_commit = None; 210 | let mut iterations = 0; 211 | for line in output.lines() { 212 | if let Some(captures) = ROBUST_BISECT_RE.captures(line) { 213 | best_commit = Some(captures.get(1).unwrap().as_str().to_string()); 214 | iterations = captures.get(2).unwrap().as_str().parse()?; 215 | } 216 | } 217 | Ok((iterations, best_commit == Some(target_commit.to_string()))) 218 | } 219 | 220 | fn main() -> Result<(), Box> { 221 | let args: Vec = env::args().collect(); 222 | if args.len() != 5 { 223 | println!("Usage: main "); 224 | process::exit(1); 225 | } 226 | TermLogger::init(LevelFilter::Info, Config::default(), TerminalMode::Mixed).unwrap(); 227 | let dir = &args[1]; 228 | let good_commit = "e83c516331"; 229 | let bad_commit = "54e85e7af1"; 230 | let target_commit = "9c3592cf3cf9a9d49ad9a69b76d2be130a21d499"; 231 | let mut f = File::create(&args[2])?; 232 | let bisect = &args[3]; 233 | let test_commit = &args[4]; 234 | println!("test_commit = {}", test_commit); 235 | let flakiness = 0.1; 236 | let mut commit_tester = CommitTester { 237 | target_commit: target_commit.to_string(), 238 | rng: rand::thread_rng(), 239 | flakiness, 240 | }; 241 | loop { 242 | let (iterations0, correct0) = run_robust_bisect( 243 | bisect, 244 | dir, 245 | good_commit, 246 | bad_commit, 247 | test_commit, 248 | flakiness, 249 | target_commit, 250 | 0.99, 251 | )?; 252 | let (iterations1, correct1) = run_robust_bisect( 253 | bisect, 254 | dir, 255 | good_commit, 256 | bad_commit, 257 | test_commit, 258 | flakiness, 259 | target_commit, 260 | 0.9, 261 | )?; 262 | let (iterations2, correct2) = run_git_bisect( 263 | dir, 264 | good_commit, 265 | bad_commit, 266 | target_commit, 267 | &mut commit_tester, 268 | 1, 269 | 1, 270 | )?; 271 | let (iterations3, correct3) = run_git_bisect( 272 | dir, 273 | good_commit, 274 | bad_commit, 275 | target_commit, 276 | &mut commit_tester, 277 | 2, 278 | 1, 279 | )?; 280 | let (iterations4, correct4) = run_git_bisect( 281 | dir, 282 | good_commit, 283 | bad_commit, 284 | target_commit, 285 | &mut commit_tester, 286 | 1, 287 | 2, 288 | )?; 289 | let correct = |b| if b { "correct" } else { "incorrect" }; 290 | writeln!( 291 | f, 292 | "{} {} {} {} {} {} {} {} {} {} {}", 293 | flakiness, 294 | iterations0, 295 | correct(correct0), 296 | iterations1, 297 | correct(correct1), 298 | iterations2, 299 | correct(correct2), 300 | iterations3, 301 | correct(correct3), 302 | iterations4, 303 | correct(correct4) 304 | )?; 305 | f.sync_data()?; 306 | } 307 | } 308 | -------------------------------------------------------------------------------- /robust-binary-search/src/compressed_dag_flakiness_tracker.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use crate::CompressedDag; 16 | use crate::CompressedDagNodeRef; 17 | use crate::FlakinessTracker; 18 | use std::borrow::Borrow; 19 | use std::collections::BTreeMap; 20 | use std::collections::HashMap; 21 | use std::rc::Rc; 22 | 23 | /// Calculates vote inversions over a graph, which can be used to estimate flakiness. 24 | #[derive(Clone, Debug)] 25 | pub(crate) struct CompressedDagFlakinessTracker { 26 | graph: Rc, 27 | votes: BTreeMap, 28 | } 29 | 30 | impl CompressedDagFlakinessTracker { 31 | /// Creates a CompressedDagFlakinessTracker for the given graph. 32 | pub fn new(graph: Rc) -> Self { 33 | Self { 34 | graph, 35 | votes: BTreeMap::new(), 36 | } 37 | } 38 | 39 | /// Adds a vote to the internal statistics. With low flakiness, true votes are expected not to 40 | /// appear in the ancestors of false votes. 41 | pub fn report(&mut self, node: CompressedDagNodeRef, heads: bool) { 42 | self.votes 43 | .entry(node.segment) 44 | .or_insert_with(FlakinessTracker::default) 45 | .report(node.index, heads); 46 | } 47 | 48 | /// Returns the number of inversions and four times the number of "random" inverions. 49 | /// The "random" inversions is the number of inversions that would be expected if the votes were 50 | /// cast at the same nodes but were randomly half heads and half tails. It is scaled by four 51 | /// to avoid loss of precision. 52 | fn inversions(&self) -> (usize, usize) { 53 | let mut votes_at_segment = HashMap::new(); 54 | let graph: &CompressedDag = self.graph.borrow(); 55 | for segment in self.votes.keys() { 56 | let inputs = graph.node(*segment).inputs(); 57 | if !inputs.is_empty() { 58 | let (input_heads, input_votes) = self 59 | .votes 60 | .get(&inputs[0]) 61 | .map(|v| (v.total_heads(), v.total_votes())) 62 | .unwrap_or((0, 0)); 63 | let (mut heads, mut votes) = *votes_at_segment.get(&inputs[0]).unwrap_or(&(0, 0)); 64 | heads += input_heads; 65 | votes += input_votes; 66 | for ancestor in graph.node(*segment).remainder_ancestors() { 67 | let (ancestor_heads, ancestor_votes) = self 68 | .votes 69 | .get(ancestor) 70 | .map(|v| (v.total_heads(), v.total_votes())) 71 | .unwrap_or((0, 0)); 72 | heads += ancestor_heads; 73 | votes += ancestor_votes; 74 | } 75 | votes_at_segment.insert(segment, (heads, votes)); 76 | } 77 | } 78 | let mut inversions = 0; 79 | let mut random_inversions = 0; 80 | for (segment, votes) in &self.votes { 81 | let (segment_heads, segment_votes) = *votes_at_segment.get(&segment).unwrap_or(&(0, 0)); 82 | let (inv, rand_inv) = votes.inversions(); 83 | inversions += votes.total_tails() * segment_heads + inv; 84 | random_inversions += votes.total_votes() * segment_votes + rand_inv; 85 | } 86 | (inversions, random_inversions) 87 | } 88 | 89 | /// Returns the estimated flakiness based on the votes, where 0.0 is deterministic and 1.0 is 90 | /// complete randomness. 91 | pub fn flakiness(&self) -> f64 { 92 | // See note in FlakinessTracker::flakiness. 93 | let (inv, rand_inv) = self.inversions(); 94 | let tmp = 1.0 - (inv + 1) as f64 / (rand_inv as f64 / 4.0 + 4.0 / 3.0); 95 | 1.0 - tmp.max(0.0).sqrt() 96 | } 97 | } 98 | 99 | #[cfg(test)] 100 | mod tests { 101 | use super::*; 102 | use crate::CompressedDagSegment; 103 | 104 | macro_rules! assert_flakiness { 105 | ($tracker:expr, $flakiness:expr) => { 106 | let flakiness = $tracker.flakiness(); 107 | assert!( 108 | (flakiness - $flakiness).abs() < 1e-4, 109 | "flakiness = {}", 110 | flakiness 111 | ); 112 | }; 113 | } 114 | 115 | #[test] 116 | fn empty() { 117 | let mut graph = CompressedDag::default(); 118 | graph.add_node(CompressedDagSegment::new(10), vec![]); 119 | let tracker = CompressedDagFlakinessTracker::new(Rc::new(graph)); 120 | assert_eq!(tracker.inversions(), (0, 0)); 121 | assert_flakiness!(tracker, 0.5); 122 | } 123 | 124 | #[test] 125 | fn one_head() { 126 | let mut graph = CompressedDag::default(); 127 | graph.add_node(CompressedDagSegment::new(10), vec![]); 128 | let mut tracker = CompressedDagFlakinessTracker::new(Rc::new(graph)); 129 | tracker.report( 130 | CompressedDagNodeRef { 131 | segment: 0, 132 | index: 0, 133 | }, 134 | true, 135 | ); 136 | assert_eq!(tracker.inversions(), (0, 1)); 137 | assert_flakiness!(tracker, 0.3930); 138 | } 139 | 140 | #[test] 141 | fn one_tail() { 142 | let mut graph = CompressedDag::default(); 143 | graph.add_node(CompressedDagSegment::new(10), vec![]); 144 | let mut tracker = CompressedDagFlakinessTracker::new(Rc::new(graph)); 145 | tracker.report( 146 | CompressedDagNodeRef { 147 | segment: 0, 148 | index: 0, 149 | }, 150 | false, 151 | ); 152 | assert_eq!(tracker.inversions(), (0, 1)); 153 | assert_flakiness!(tracker, 0.3930); 154 | } 155 | 156 | #[test] 157 | fn two_heads_same_bucket() { 158 | let mut graph = CompressedDag::default(); 159 | graph.add_node(CompressedDagSegment::new(10), vec![]); 160 | let mut tracker = CompressedDagFlakinessTracker::new(Rc::new(graph)); 161 | tracker.report( 162 | CompressedDagNodeRef { 163 | segment: 0, 164 | index: 0, 165 | }, 166 | true, 167 | ); 168 | tracker.report( 169 | CompressedDagNodeRef { 170 | segment: 0, 171 | index: 0, 172 | }, 173 | true, 174 | ); 175 | assert_eq!(tracker.inversions(), (0, 4)); 176 | assert_flakiness!(tracker, 0.2441); 177 | } 178 | 179 | #[test] 180 | fn two_heads_different_buckets() { 181 | let mut graph = CompressedDag::default(); 182 | graph.add_node(CompressedDagSegment::new(10), vec![]); 183 | let mut tracker = CompressedDagFlakinessTracker::new(Rc::new(graph)); 184 | tracker.report( 185 | CompressedDagNodeRef { 186 | segment: 0, 187 | index: 0, 188 | }, 189 | true, 190 | ); 191 | tracker.report( 192 | CompressedDagNodeRef { 193 | segment: 0, 194 | index: 1, 195 | }, 196 | true, 197 | ); 198 | assert_eq!(tracker.inversions(), (0, 3)); 199 | assert_flakiness!(tracker, 0.2789); 200 | } 201 | 202 | #[test] 203 | fn two_tails_same_bucket() { 204 | let mut graph = CompressedDag::default(); 205 | graph.add_node(CompressedDagSegment::new(10), vec![]); 206 | let mut tracker = CompressedDagFlakinessTracker::new(Rc::new(graph)); 207 | tracker.report( 208 | CompressedDagNodeRef { 209 | segment: 0, 210 | index: 0, 211 | }, 212 | false, 213 | ); 214 | tracker.report( 215 | CompressedDagNodeRef { 216 | segment: 0, 217 | index: 0, 218 | }, 219 | false, 220 | ); 221 | assert_eq!(tracker.inversions(), (0, 4)); 222 | assert_flakiness!(tracker, 0.2441); 223 | } 224 | 225 | #[test] 226 | fn two_tails_different_buckets() { 227 | let mut graph = CompressedDag::default(); 228 | graph.add_node(CompressedDagSegment::new(10), vec![]); 229 | let mut tracker = CompressedDagFlakinessTracker::new(Rc::new(graph)); 230 | tracker.report( 231 | CompressedDagNodeRef { 232 | segment: 0, 233 | index: 0, 234 | }, 235 | false, 236 | ); 237 | tracker.report( 238 | CompressedDagNodeRef { 239 | segment: 0, 240 | index: 1, 241 | }, 242 | false, 243 | ); 244 | assert_eq!(tracker.inversions(), (0, 3)); 245 | assert_flakiness!(tracker, 0.2789); 246 | } 247 | 248 | #[test] 249 | fn one_head_one_tail_same_bucket() { 250 | let mut graph = CompressedDag::default(); 251 | graph.add_node(CompressedDagSegment::new(10), vec![]); 252 | let mut tracker = CompressedDagFlakinessTracker::new(Rc::new(graph)); 253 | tracker.report( 254 | CompressedDagNodeRef { 255 | segment: 0, 256 | index: 0, 257 | }, 258 | false, 259 | ); 260 | tracker.report( 261 | CompressedDagNodeRef { 262 | segment: 0, 263 | index: 0, 264 | }, 265 | true, 266 | ); 267 | assert_eq!(tracker.inversions(), (1, 4)); 268 | assert_flakiness!(tracker, 0.622); 269 | } 270 | 271 | #[test] 272 | fn one_head_one_tail_inverted() { 273 | let mut graph = CompressedDag::default(); 274 | graph.add_node(CompressedDagSegment::new(10), vec![]); 275 | let mut tracker = CompressedDagFlakinessTracker::new(Rc::new(graph)); 276 | tracker.report( 277 | CompressedDagNodeRef { 278 | segment: 0, 279 | index: 0, 280 | }, 281 | true, 282 | ); 283 | tracker.report( 284 | CompressedDagNodeRef { 285 | segment: 0, 286 | index: 1, 287 | }, 288 | false, 289 | ); 290 | assert_eq!(tracker.inversions(), (1, 3)); 291 | assert_flakiness!(tracker, 0.8); 292 | } 293 | 294 | #[test] 295 | fn one_head_one_tail_not_inverted() { 296 | let mut graph = CompressedDag::default(); 297 | graph.add_node(CompressedDagSegment::new(10), vec![]); 298 | let mut tracker = CompressedDagFlakinessTracker::new(Rc::new(graph)); 299 | tracker.report( 300 | CompressedDagNodeRef { 301 | segment: 0, 302 | index: 0, 303 | }, 304 | false, 305 | ); 306 | tracker.report( 307 | CompressedDagNodeRef { 308 | segment: 0, 309 | index: 1, 310 | }, 311 | true, 312 | ); 313 | assert_eq!(tracker.inversions(), (0, 3)); 314 | assert_flakiness!(tracker, 0.2789); 315 | } 316 | 317 | #[test] 318 | fn flakiness_scan_one_index() { 319 | let mut graph = CompressedDag::default(); 320 | graph.add_node(CompressedDagSegment::new(10), vec![]); 321 | let graph = Rc::new(graph); 322 | for i in 0..100 { 323 | let mut tracker = CompressedDagFlakinessTracker::new(graph.clone()); 324 | for _ in 0..i { 325 | tracker.report( 326 | CompressedDagNodeRef { 327 | segment: 0, 328 | index: 0, 329 | }, 330 | false, 331 | ); 332 | } 333 | for _ in i..100 { 334 | tracker.report( 335 | CompressedDagNodeRef { 336 | segment: 0, 337 | index: 0, 338 | }, 339 | true, 340 | ); 341 | } 342 | let expected_flakiness = if i < 50 { i } else { 100 - i } as f64 / 50.0; 343 | assert!( 344 | (tracker.flakiness() - expected_flakiness).abs() < 0.02, 345 | "i = {}, flakiness = {}, expected_flakiness = {}", 346 | i, 347 | tracker.flakiness(), 348 | expected_flakiness 349 | ); 350 | } 351 | } 352 | 353 | #[test] 354 | fn flakiness_scan_two_indexes() { 355 | let mut graph = CompressedDag::default(); 356 | graph.add_node(CompressedDagSegment::new(10), vec![]); 357 | let graph = Rc::new(graph); 358 | for i in 0..100 { 359 | let mut tracker = CompressedDagFlakinessTracker::new(graph.clone()); 360 | for _ in 0..i { 361 | tracker.report( 362 | CompressedDagNodeRef { 363 | segment: 0, 364 | index: 0, 365 | }, 366 | true, 367 | ); 368 | tracker.report( 369 | CompressedDagNodeRef { 370 | segment: 0, 371 | index: 1, 372 | }, 373 | true, 374 | ); 375 | } 376 | for _ in i..100 { 377 | tracker.report( 378 | CompressedDagNodeRef { 379 | segment: 0, 380 | index: 0, 381 | }, 382 | false, 383 | ); 384 | tracker.report( 385 | CompressedDagNodeRef { 386 | segment: 0, 387 | index: 1, 388 | }, 389 | false, 390 | ); 391 | } 392 | let expected_flakiness = if i < 50 { i } else { 100 - i } as f64 / 50.0; 393 | assert!( 394 | (tracker.flakiness() - expected_flakiness).abs() < 0.02, 395 | "i = {}, flakiness = {}, expected_flakiness = {}", 396 | i, 397 | tracker.flakiness(), 398 | expected_flakiness 399 | ); 400 | } 401 | } 402 | 403 | #[test] 404 | fn hundred_heads_same_bucket() { 405 | let mut graph = CompressedDag::default(); 406 | graph.add_node(CompressedDagSegment::new(10), vec![]); 407 | let mut tracker = CompressedDagFlakinessTracker::new(Rc::new(graph)); 408 | for _ in 0..100 { 409 | tracker.report( 410 | CompressedDagNodeRef { 411 | segment: 0, 412 | index: 0, 413 | }, 414 | true, 415 | ); 416 | } 417 | assert_eq!(tracker.inversions(), (0, 10000)); 418 | assert_flakiness!(tracker, 0.0002); 419 | } 420 | 421 | #[test] 422 | fn hundred_heads_one_tail_same_bucket() { 423 | let mut graph = CompressedDag::default(); 424 | graph.add_node(CompressedDagSegment::new(10), vec![]); 425 | let mut tracker = CompressedDagFlakinessTracker::new(Rc::new(graph)); 426 | for _ in 0..100 { 427 | tracker.report( 428 | CompressedDagNodeRef { 429 | segment: 0, 430 | index: 0, 431 | }, 432 | true, 433 | ); 434 | } 435 | tracker.report( 436 | CompressedDagNodeRef { 437 | segment: 0, 438 | index: 0, 439 | }, 440 | false, 441 | ); 442 | assert_eq!(tracker.inversions(), (100, 10201)); 443 | assert_flakiness!(tracker, 0.02); 444 | } 445 | 446 | #[test] 447 | fn hundred_heads_hundred_tails_same_bucket() { 448 | let mut graph = CompressedDag::default(); 449 | graph.add_node(CompressedDagSegment::new(10), vec![]); 450 | let mut tracker = CompressedDagFlakinessTracker::new(Rc::new(graph)); 451 | for _ in 0..100 { 452 | tracker.report( 453 | CompressedDagNodeRef { 454 | segment: 0, 455 | index: 0, 456 | }, 457 | true, 458 | ); 459 | tracker.report( 460 | CompressedDagNodeRef { 461 | segment: 0, 462 | index: 0, 463 | }, 464 | false, 465 | ); 466 | } 467 | assert_eq!(tracker.inversions(), (10000, 40000)); 468 | assert_flakiness!(tracker, 0.9942); 469 | } 470 | 471 | #[test] 472 | fn hundred_heads_hundred_tails_different_buckets() { 473 | let mut graph = CompressedDag::default(); 474 | graph.add_node(CompressedDagSegment::new(10), vec![]); 475 | let mut tracker = CompressedDagFlakinessTracker::new(Rc::new(graph)); 476 | for _ in 0..100 { 477 | tracker.report( 478 | CompressedDagNodeRef { 479 | segment: 0, 480 | index: 0, 481 | }, 482 | true, 483 | ); 484 | tracker.report( 485 | CompressedDagNodeRef { 486 | segment: 0, 487 | index: 0, 488 | }, 489 | false, 490 | ); 491 | tracker.report( 492 | CompressedDagNodeRef { 493 | segment: 0, 494 | index: 1, 495 | }, 496 | true, 497 | ); 498 | tracker.report( 499 | CompressedDagNodeRef { 500 | segment: 0, 501 | index: 1, 502 | }, 503 | false, 504 | ); 505 | } 506 | assert_eq!(tracker.inversions(), (30000, 120000)); 507 | assert_flakiness!(tracker, 0.9967); 508 | } 509 | 510 | #[test] 511 | fn hundred_heads_hundred_tails_inverted() { 512 | let mut graph = CompressedDag::default(); 513 | graph.add_node(CompressedDagSegment::new(10), vec![]); 514 | let mut tracker = CompressedDagFlakinessTracker::new(Rc::new(graph)); 515 | for _ in 0..100 { 516 | tracker.report( 517 | CompressedDagNodeRef { 518 | segment: 0, 519 | index: 0, 520 | }, 521 | true, 522 | ); 523 | tracker.report( 524 | CompressedDagNodeRef { 525 | segment: 0, 526 | index: 1, 527 | }, 528 | false, 529 | ); 530 | } 531 | assert_eq!(tracker.inversions(), (10000, 30000)); 532 | assert_flakiness!(tracker, 1.0); 533 | } 534 | 535 | #[test] 536 | fn hundred_heads_hundred_tails_not_inverted() { 537 | let mut graph = CompressedDag::default(); 538 | graph.add_node(CompressedDagSegment::new(10), vec![]); 539 | let mut tracker = CompressedDagFlakinessTracker::new(Rc::new(graph)); 540 | for _ in 0..100 { 541 | tracker.report( 542 | CompressedDagNodeRef { 543 | segment: 0, 544 | index: 0, 545 | }, 546 | false, 547 | ); 548 | tracker.report( 549 | CompressedDagNodeRef { 550 | segment: 0, 551 | index: 1, 552 | }, 553 | true, 554 | ); 555 | } 556 | assert_eq!(tracker.inversions(), (0, 30000)); 557 | assert_flakiness!(tracker, 0.0); 558 | } 559 | 560 | #[test] 561 | fn two_heads_sequential_segments() { 562 | let mut graph = CompressedDag::default(); 563 | graph.add_node(CompressedDagSegment::new(10), vec![]); 564 | graph.add_node(CompressedDagSegment::new(10), vec![0]); 565 | let mut tracker = CompressedDagFlakinessTracker::new(Rc::new(graph)); 566 | tracker.report( 567 | CompressedDagNodeRef { 568 | segment: 0, 569 | index: 0, 570 | }, 571 | true, 572 | ); 573 | tracker.report( 574 | CompressedDagNodeRef { 575 | segment: 1, 576 | index: 0, 577 | }, 578 | true, 579 | ); 580 | assert_eq!(tracker.inversions(), (0, 3)); 581 | assert_flakiness!(tracker, 0.2789); 582 | } 583 | 584 | #[test] 585 | fn one_head_one_tail_sequential_segments_inverted() { 586 | let mut graph = CompressedDag::default(); 587 | graph.add_node(CompressedDagSegment::new(10), vec![]); 588 | graph.add_node(CompressedDagSegment::new(10), vec![0]); 589 | let mut tracker = CompressedDagFlakinessTracker::new(Rc::new(graph)); 590 | tracker.report( 591 | CompressedDagNodeRef { 592 | segment: 0, 593 | index: 0, 594 | }, 595 | true, 596 | ); 597 | tracker.report( 598 | CompressedDagNodeRef { 599 | segment: 1, 600 | index: 0, 601 | }, 602 | false, 603 | ); 604 | assert_eq!(tracker.inversions(), (1, 3)); 605 | assert_flakiness!(tracker, 0.8); 606 | } 607 | 608 | #[test] 609 | fn one_head_one_tail_sequential_segments_not_inverted() { 610 | let mut graph = CompressedDag::default(); 611 | graph.add_node(CompressedDagSegment::new(10), vec![]); 612 | graph.add_node(CompressedDagSegment::new(10), vec![0]); 613 | let mut tracker = CompressedDagFlakinessTracker::new(Rc::new(graph)); 614 | tracker.report( 615 | CompressedDagNodeRef { 616 | segment: 0, 617 | index: 0, 618 | }, 619 | false, 620 | ); 621 | tracker.report( 622 | CompressedDagNodeRef { 623 | segment: 1, 624 | index: 0, 625 | }, 626 | true, 627 | ); 628 | assert_eq!(tracker.inversions(), (0, 3)); 629 | assert_flakiness!(tracker, 0.2789); 630 | } 631 | 632 | #[test] 633 | fn two_heads_parallel_segments() { 634 | let mut graph = CompressedDag::default(); 635 | graph.add_node(CompressedDagSegment::new(10), vec![]); 636 | graph.add_node(CompressedDagSegment::new(10), vec![]); 637 | let mut tracker = CompressedDagFlakinessTracker::new(Rc::new(graph)); 638 | tracker.report( 639 | CompressedDagNodeRef { 640 | segment: 0, 641 | index: 0, 642 | }, 643 | true, 644 | ); 645 | tracker.report( 646 | CompressedDagNodeRef { 647 | segment: 1, 648 | index: 0, 649 | }, 650 | true, 651 | ); 652 | assert_eq!(tracker.inversions(), (0, 2)); 653 | assert_flakiness!(tracker, 0.3258); 654 | } 655 | 656 | #[test] 657 | fn three_heads_join() { 658 | let mut graph = CompressedDag::default(); 659 | graph.add_node(CompressedDagSegment::new(10), vec![]); 660 | graph.add_node(CompressedDagSegment::new(10), vec![]); 661 | graph.add_node(CompressedDagSegment::new(10), vec![0, 1]); 662 | let mut tracker = CompressedDagFlakinessTracker::new(Rc::new(graph)); 663 | tracker.report( 664 | CompressedDagNodeRef { 665 | segment: 0, 666 | index: 0, 667 | }, 668 | true, 669 | ); 670 | tracker.report( 671 | CompressedDagNodeRef { 672 | segment: 1, 673 | index: 0, 674 | }, 675 | true, 676 | ); 677 | tracker.report( 678 | CompressedDagNodeRef { 679 | segment: 2, 680 | index: 0, 681 | }, 682 | true, 683 | ); 684 | assert_eq!(tracker.inversions(), (0, 5)); 685 | assert_flakiness!(tracker, 0.2171); 686 | } 687 | 688 | #[test] 689 | fn half_inverted_join() { 690 | let mut graph = CompressedDag::default(); 691 | graph.add_node(CompressedDagSegment::new(10), vec![]); 692 | graph.add_node(CompressedDagSegment::new(10), vec![]); 693 | graph.add_node(CompressedDagSegment::new(10), vec![0, 1]); 694 | let mut tracker = CompressedDagFlakinessTracker::new(Rc::new(graph)); 695 | tracker.report( 696 | CompressedDagNodeRef { 697 | segment: 0, 698 | index: 0, 699 | }, 700 | true, 701 | ); 702 | tracker.report( 703 | CompressedDagNodeRef { 704 | segment: 1, 705 | index: 0, 706 | }, 707 | false, 708 | ); 709 | tracker.report( 710 | CompressedDagNodeRef { 711 | segment: 2, 712 | index: 0, 713 | }, 714 | false, 715 | ); 716 | assert_eq!(tracker.inversions(), (1, 5)); 717 | assert_flakiness!(tracker, 0.5248); 718 | } 719 | } 720 | -------------------------------------------------------------------------------- /robust-binary-search/src/dag.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use im_rc::OrdSet; 16 | use std::collections::HashSet; 17 | 18 | /// A node in a Dag. 19 | #[derive(Clone, Debug)] 20 | pub struct DagNode { 21 | value: T, 22 | inputs: Vec, 23 | ancestors: OrdSet, 24 | remainder_ancestors: Vec, 25 | } 26 | 27 | impl DagNode { 28 | /// Returns the value in the node. 29 | pub fn value(&self) -> &T { 30 | &self.value 31 | } 32 | 33 | /// Returns indices within the Dag of the node's input nodes. 34 | pub fn inputs(&self) -> &[usize] { 35 | &self.inputs 36 | } 37 | 38 | /// Returns indices within the Dag of the transitive closure of the node's inputs. Includes the 39 | /// inputs but excludes the node itself. 40 | pub fn ancestors(&self) -> &OrdSet { 41 | &self.ancestors 42 | } 43 | 44 | /// Returns indices within the Dag of ancestors which are not the first input or its ancestors. 45 | /// In other words, the sets `remainder_ancestors()`, `{inputs()[0]}` (assuming there is at 46 | /// least one input), and `inputs()[0].ancestors()` (assuming there is at least one input) are 47 | /// disjoint, and their union equals `ancestors()`. 48 | /// 49 | /// This can be used to compute certain properties over a graph more efficiently. For example, 50 | /// computing the sum of ancestors' values incrementally for every node in the graph can be done 51 | /// by starting with the sum for `inputs()[0]` then adding the values of nodes in 52 | /// `remainder_ancestors()`. This can reduce the complexity from `O(n^2)` to roughly `O(n)` for 53 | /// deep and narrow graphs. 54 | pub fn remainder_ancestors(&self) -> &[usize] { 55 | &self.remainder_ancestors 56 | } 57 | } 58 | 59 | /// A Directed Acyclic Graph with the nodes sorted topologically. 60 | #[derive(Clone, Debug)] 61 | pub struct Dag { 62 | nodes: Vec>, 63 | } 64 | 65 | impl Default for Dag { 66 | fn default() -> Self { 67 | Self { nodes: vec![] } 68 | } 69 | } 70 | 71 | impl Dag { 72 | /// Creates an empty Dag. 73 | pub fn new() -> Self { 74 | Dag { nodes: vec![] } 75 | } 76 | 77 | /// Returns the nodes in the Dag. 78 | pub fn nodes(&self) -> &[DagNode] { 79 | &self.nodes 80 | } 81 | 82 | /// Convenience method for nodes()[index]. 83 | /// 84 | /// # Panics 85 | /// 86 | /// Panics if index is greater than or equal to nodes().len(). 87 | pub fn node(&self, index: usize) -> &DagNode { 88 | &self.nodes[index] 89 | } 90 | 91 | /// Adds a node to the Dag. Each input must all be less than the index of the new node itself, 92 | /// i.e. must be less than the number of nodes currently in the Dag. The first input is treated 93 | /// specially by DagNode::remainder_ancestors. 94 | /// 95 | /// # Panics 96 | /// 97 | /// Panics if any value in inputs is greater than or equal to nodes().len(). 98 | pub fn add_node(&mut self, value: T, inputs: Vec) { 99 | for input in &inputs { 100 | assert!(*input < self.nodes.len()); 101 | } 102 | 103 | let (ancestors, remainder_ancestors) = if inputs.is_empty() { 104 | (OrdSet::new(), vec![]) 105 | } else { 106 | let mut ancestors = self.nodes[inputs[0]].ancestors.clone(); 107 | let mut remainder_ancestors = HashSet::new(); 108 | ancestors.insert(inputs[0]); 109 | let mut queue = Vec::new(); 110 | for input in &inputs[1..] { 111 | queue.push(*input); 112 | } 113 | while let Some(ancestor) = queue.pop() { 114 | if ancestors.insert(ancestor).is_none() { 115 | remainder_ancestors.insert(ancestor); 116 | for ancestor_input in &self.nodes[ancestor].inputs { 117 | queue.push(*ancestor_input); 118 | } 119 | } 120 | } 121 | let mut sorted_remainder_ancestors = 122 | remainder_ancestors.into_iter().collect::>(); 123 | sorted_remainder_ancestors.sort(); 124 | (ancestors, sorted_remainder_ancestors) 125 | }; 126 | self.nodes.push(DagNode { 127 | value, 128 | ancestors, 129 | remainder_ancestors, 130 | inputs, 131 | }); 132 | } 133 | } 134 | 135 | #[cfg(test)] 136 | mod tests { 137 | use super::*; 138 | 139 | macro_rules! hash_set { 140 | ($($arg:expr),*) => { 141 | vec![$($arg),*].into_iter().map(|x: i32| x as usize).collect::>() 142 | } 143 | } 144 | 145 | #[test] 146 | fn ancestor_segments() { 147 | let mut graph = Dag::default(); 148 | graph.add_node((), vec![]); 149 | graph.add_node((), vec![0]); 150 | graph.add_node((), vec![1]); 151 | graph.add_node((), vec![2]); 152 | assert_eq!(graph.node(0).ancestors(), &hash_set![]); 153 | assert_eq!(graph.node(1).ancestors(), &hash_set![0]); 154 | assert_eq!(graph.node(2).ancestors(), &hash_set![0, 1]); 155 | assert_eq!(graph.node(3).ancestors(), &hash_set![0, 1, 2]); 156 | } 157 | 158 | #[test] 159 | fn remainder_ancestors() { 160 | // 0---1---2 161 | // \ \ 162 | // 3---4---x 163 | let mut graph = Dag::default(); 164 | graph.add_node((), vec![]); 165 | graph.add_node((), vec![0]); 166 | graph.add_node((), vec![1]); 167 | graph.add_node((), vec![0]); 168 | graph.add_node((), vec![3]); 169 | graph.add_node((), vec![2, 4]); 170 | assert_eq!(graph.node(5).remainder_ancestors(), &[3, 4]); 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /robust-binary-search/src/flakiness_tracker.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::collections::BTreeMap; 16 | 17 | /// INTERNAL ONLY. 18 | /// 19 | /// Calculates vote inversions in a linear range, which can be used to estimate flakiness. 20 | #[doc(hidden)] 21 | #[derive(Clone, Debug, Default)] 22 | pub struct FlakinessTracker { 23 | /// Maps index to number of number of tails votes and number of heads votes. 24 | votes: BTreeMap, 25 | total_heads: usize, 26 | total_tails: usize, 27 | } 28 | 29 | impl FlakinessTracker { 30 | /// Adds a vote to the internal statistics. With low flakiness, false votes are expected to have 31 | /// smaller indices than true votes. 32 | pub fn report(&mut self, index: usize, heads: bool) { 33 | let value = self.votes.entry(index).or_insert((0, 0)); 34 | value.0 += if heads { 0 } else { 1 }; 35 | value.1 += if heads { 1 } else { 0 }; 36 | if heads { 37 | self.total_heads += 1; 38 | } else { 39 | self.total_tails += 1; 40 | } 41 | } 42 | 43 | /// Returns the number of inversions and four times the number of "random" inverions. 44 | /// The "random" inversions is the number of inversions that would be expected if the votes were 45 | /// cast at the same indices but were randomly half heads and half tails. It is scaled by four 46 | /// to avoid loss of precision. 47 | pub fn inversions(&self) -> (usize, usize) { 48 | let mut headstotal = 0; 49 | let mut inverted = 0; 50 | let mut random_inversions = 0; 51 | let mut total_votes = 0; 52 | for (tails, heads) in self.votes.values() { 53 | let votes = heads + tails; 54 | random_inversions += votes * votes + votes * total_votes; 55 | inverted += tails * headstotal + tails * heads; 56 | headstotal += heads; 57 | total_votes += votes; 58 | } 59 | (inverted, random_inversions) 60 | } 61 | 62 | /// Returns the number of true votes. 63 | pub fn total_heads(&self) -> usize { 64 | self.total_heads 65 | } 66 | 67 | /// Returns the number of false votes. 68 | pub fn total_tails(&self) -> usize { 69 | self.total_tails 70 | } 71 | 72 | /// Returns the total number of votes. 73 | pub fn total_votes(&self) -> usize { 74 | self.total_heads + self.total_tails 75 | } 76 | 77 | /// Returns the estimated flakiness based on the votes, where 0.0 is deterministic and 1.0 is 78 | /// complete randomness. 79 | pub fn flakiness(&self) -> f64 { 80 | // The formula used here is provided by flakiness_tuner.rs (and fit by 81 | // recovered_flakiness.plt), plus some numerical niceties and a Bayesian prior. 82 | // ar^2 + br - f = 0 83 | // (-b + sqrt(b^2 + 4af))/(2a) 84 | let (inv, rand_inv) = self.inversions(); 85 | let r = (inv + 1) as f64 / (rand_inv as f64 + 7.6143); 86 | (0.1698 * r * r + 3.7844 * r).min(1.0).max(0.0) 87 | } 88 | } 89 | 90 | #[cfg(test)] 91 | mod tests { 92 | use super::*; 93 | 94 | #[test] 95 | fn empty() { 96 | let tracker = FlakinessTracker::default(); 97 | assert_eq!(tracker.inversions(), (0, 0)); 98 | assert!( 99 | (tracker.flakiness() - 0.5).abs() < 1e-4, 100 | "flakiness = {}", 101 | tracker.flakiness() 102 | ); 103 | } 104 | 105 | #[test] 106 | fn one_head() { 107 | let mut tracker = FlakinessTracker::default(); 108 | tracker.report(0, true); 109 | assert_eq!(tracker.inversions(), (0, 1)); 110 | assert!( 111 | (tracker.flakiness() - 0.4416).abs() < 1e-4, 112 | "flakiness = {}", 113 | tracker.flakiness() 114 | ); 115 | } 116 | 117 | #[test] 118 | fn one_tail() { 119 | let mut tracker = FlakinessTracker::default(); 120 | tracker.report(0, true); 121 | assert_eq!(tracker.inversions(), (0, 1)); 122 | assert!( 123 | (tracker.flakiness() - 0.4416).abs() < 1e-4, 124 | "flakiness = {}", 125 | tracker.flakiness() 126 | ); 127 | } 128 | 129 | #[test] 130 | fn two_heads_same_bucket() { 131 | let mut tracker = FlakinessTracker::default(); 132 | tracker.report(0, true); 133 | tracker.report(0, true); 134 | assert_eq!(tracker.inversions(), (0, 4)); 135 | assert!( 136 | (tracker.flakiness() - 0.3271).abs() < 1e-4, 137 | "flakiness = {}", 138 | tracker.flakiness() 139 | ); 140 | } 141 | 142 | #[test] 143 | fn two_heads_different_buckets() { 144 | let mut tracker = FlakinessTracker::default(); 145 | tracker.report(0, true); 146 | tracker.report(1, true); 147 | assert_eq!(tracker.inversions(), (0, 3)); 148 | assert!( 149 | (tracker.flakiness() - 0.3581).abs() < 1e-4, 150 | "flakiness = {}", 151 | tracker.flakiness() 152 | ); 153 | } 154 | 155 | #[test] 156 | fn two_tails_same_bucket() { 157 | let mut tracker = FlakinessTracker::default(); 158 | tracker.report(0, false); 159 | tracker.report(0, false); 160 | assert_eq!(tracker.inversions(), (0, 4)); 161 | assert!( 162 | (tracker.flakiness() - 0.3271).abs() < 1e-4, 163 | "flakiness = {}", 164 | tracker.flakiness() 165 | ); 166 | } 167 | 168 | #[test] 169 | fn two_tails_different_buckets() { 170 | let mut tracker = FlakinessTracker::default(); 171 | tracker.report(0, false); 172 | tracker.report(1, false); 173 | assert_eq!(tracker.inversions(), (0, 3)); 174 | assert!( 175 | (tracker.flakiness() - 0.3581).abs() < 1e-4, 176 | "flakiness = {}", 177 | tracker.flakiness() 178 | ); 179 | } 180 | 181 | #[test] 182 | fn one_head_one_tail_same_bucket() { 183 | let mut tracker = FlakinessTracker::default(); 184 | tracker.report(0, false); 185 | tracker.report(0, true); 186 | assert_eq!(tracker.inversions(), (1, 4)); 187 | assert!( 188 | (tracker.flakiness() - 0.6567).abs() < 1e-4, 189 | "flakiness = {}", 190 | tracker.flakiness() 191 | ); 192 | } 193 | 194 | #[test] 195 | fn one_head_one_tail_inverted() { 196 | let mut tracker = FlakinessTracker::default(); 197 | tracker.report(0, true); 198 | tracker.report(1, false); 199 | assert_eq!(tracker.inversions(), (1, 3)); 200 | assert!( 201 | (tracker.flakiness() - 0.7191).abs() < 1e-4, 202 | "flakiness = {}", 203 | tracker.flakiness() 204 | ); 205 | } 206 | 207 | #[test] 208 | fn one_head_one_tail_not_inverted() { 209 | let mut tracker = FlakinessTracker::default(); 210 | tracker.report(0, false); 211 | tracker.report(1, true); 212 | assert_eq!(tracker.inversions(), (0, 3)); 213 | assert!( 214 | (tracker.flakiness() - 0.3580).abs() < 1e-4, 215 | "flakiness = {}", 216 | tracker.flakiness() 217 | ); 218 | } 219 | 220 | #[test] 221 | fn hundred_heads_same_bucket() { 222 | let mut tracker = FlakinessTracker::default(); 223 | for _ in 0..100 { 224 | tracker.report(0, true); 225 | } 226 | assert_eq!(tracker.inversions(), (0, 10000)); 227 | assert!( 228 | (tracker.flakiness() - 0.0004).abs() < 1e-4, 229 | "flakiness = {}", 230 | tracker.flakiness() 231 | ); 232 | } 233 | 234 | #[test] 235 | fn hundred_heads_one_tail_same_bucket() { 236 | let mut tracker = FlakinessTracker::default(); 237 | for _ in 0..100 { 238 | tracker.report(0, true); 239 | } 240 | tracker.report(0, false); 241 | assert_eq!(tracker.inversions(), (100, 10201)); 242 | assert!( 243 | (tracker.flakiness() - 0.0375).abs() < 1e-4, 244 | "flakiness = {}", 245 | tracker.flakiness() 246 | ); 247 | } 248 | 249 | #[test] 250 | fn hundred_heads_hundred_tails_same_bucket() { 251 | let mut tracker = FlakinessTracker::default(); 252 | for _ in 0..100 { 253 | tracker.report(0, false); 254 | tracker.report(0, true); 255 | } 256 | assert_eq!(tracker.inversions(), (10000, 40000)); 257 | assert!( 258 | (tracker.flakiness() - 0.9566).abs() < 1e-4, 259 | "flakiness = {}", 260 | tracker.flakiness() 261 | ); 262 | } 263 | 264 | #[test] 265 | fn hundred_heads_hundred_tails_inverted() { 266 | let mut tracker = FlakinessTracker::default(); 267 | for _ in 0..100 { 268 | tracker.report(0, true); 269 | tracker.report(1, false); 270 | } 271 | assert_eq!(tracker.inversions(), (10000, 30000)); 272 | assert!( 273 | (tracker.flakiness() - 0.9999).abs() < 1e-4, 274 | "flakiness = {}", 275 | tracker.flakiness() 276 | ); 277 | } 278 | 279 | #[test] 280 | fn hundred_heads_hundred_tails_not_inverted() { 281 | let mut tracker = FlakinessTracker::default(); 282 | for _ in 0..100 { 283 | tracker.report(0, false); 284 | tracker.report(1, true); 285 | } 286 | assert_eq!(tracker.inversions(), (0, 30000)); 287 | assert!( 288 | (tracker.flakiness() - 0.0001).abs() < 1e-4, 289 | "flakiness = {}", 290 | tracker.flakiness() 291 | ); 292 | } 293 | } 294 | -------------------------------------------------------------------------------- /robust-binary-search/src/flakiness_tuner.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use rand::Rng; 16 | use robust_binary_search::flakiness_tracker::*; 17 | use robust_binary_search::*; 18 | use std::cell::RefCell; 19 | use std::env; 20 | use std::error::Error; 21 | use std::fs::File; 22 | use std::io::Write; 23 | use std::ops::DerefMut; 24 | use std::process; 25 | 26 | fn sample_inversions(rng: &mut R, p: f64) -> (usize, usize) { 27 | let size = 1 << 20; 28 | let mut tracker = FlakinessTracker::default(); 29 | let mut searcher = Searcher::new(size); 30 | let mut i = 0; 31 | let index = (rng.gen::() * size as f64) as usize; 32 | let max_steps = 10000; 33 | loop { 34 | i += 1; 35 | let test_index = searcher.next_index().unwrap(); 36 | if test_index == index || i == max_steps { 37 | break; 38 | } 39 | let heads = if rng.gen::() < p { 40 | rng.gen::() < 0.5 41 | } else { 42 | test_index >= index 43 | }; 44 | tracker.report(test_index, heads); 45 | let estimated_flakiness = tracker.flakiness(); 46 | searcher.report_with_stiffness(test_index, heads, optimal_stiffness(estimated_flakiness)); 47 | } 48 | tracker.inversions() 49 | } 50 | 51 | fn main() -> Result<(), Box> { 52 | let args: Vec = env::args().collect(); 53 | if args.len() != 2 { 54 | println!("Usage: main "); 55 | process::exit(1); 56 | } 57 | let mut f = File::create(&args[1])?; 58 | let rng = RefCell::new(rand::thread_rng()); 59 | for i in 0..80 { 60 | let p = i as f64 / 100.0; 61 | let mut inv_total = 0; 62 | let mut rand_inv_total = 0; 63 | for _ in 0..10000 { 64 | let (inv, rand_inv) = sample_inversions(rng.borrow_mut().deref_mut(), p); 65 | inv_total += inv; 66 | rand_inv_total += rand_inv; 67 | } 68 | writeln!(f, "{} {} {}", p, inv_total, rand_inv_total)?; 69 | f.sync_data()?; 70 | } 71 | Ok(()) 72 | } 73 | -------------------------------------------------------------------------------- /robust-binary-search/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use log::trace; 16 | use std::borrow::Borrow; 17 | use std::cmp; 18 | use std::collections::HashSet; 19 | use std::rc::Rc; 20 | 21 | #[doc(hidden)] 22 | pub mod flakiness_tracker; 23 | use flakiness_tracker::*; 24 | mod range_map; 25 | use range_map::*; 26 | 27 | mod dag; 28 | 29 | /// Reference to a node in a CompressedDag. 30 | #[derive(Default, Copy, Clone, Debug, PartialEq, Eq)] 31 | pub struct CompressedDagNodeRef { 32 | /// Index of the segment in the CompressedDag. 33 | pub segment: usize, 34 | /// Index of the expanded node within the segment. 35 | pub index: usize, 36 | } 37 | 38 | #[deprecated(note = "Use CompressedDagNodeRef instead.")] 39 | pub type CompressedDAGNodeRef = CompressedDagNodeRef; 40 | 41 | /// A segment in a CompressedDag. This is a node in a Dag but corresponds to a linear sequence of 42 | /// nodes in a conceptual expanded graph. The size is the number of nodes in the expanded graph 43 | /// represented by this segment. 44 | #[derive(Clone, Debug)] 45 | pub struct CompressedDagSegment { 46 | len: usize, 47 | } 48 | 49 | #[deprecated(note = "Use CompressedDagSegment instead.")] 50 | pub type CompressedDAGSegment = CompressedDagSegment; 51 | 52 | impl CompressedDagSegment { 53 | /// Creates a CompressedDagSegment of a given size. 54 | pub fn new(len: usize) -> Self { 55 | CompressedDagSegment { len } 56 | } 57 | 58 | /// Returns the size of the segment. 59 | pub fn len(&self) -> usize { 60 | self.len 61 | } 62 | 63 | /// Returns true if the segment is empty. 64 | pub fn is_empty(&self) -> bool { 65 | self.len == 0 66 | } 67 | } 68 | 69 | /// A Dag whose nodes are CompressedDagSegments, which represent sequences of nodes in a conceptual 70 | /// expanded graph. For example, given the graph: 71 | /// 72 | /// ```text 73 | /// B-C-D 74 | /// / \ 75 | /// A G 76 | /// \ / 77 | /// E---F 78 | /// ``` 79 | /// 80 | /// this can be expressed in a CompressedDag as: 81 | /// 82 | /// ```text 83 | /// B' 84 | /// / \ 85 | /// A' G' 86 | /// \ / 87 | /// E' 88 | /// ``` 89 | /// 90 | /// where `A'` and `G'` are segments of size 1 corresponding to `A` and `G`, `E'` is a segment of 91 | /// size 2 corresponding to `E` and `F`, and `B'` is a segment of size 3 corresponding to `B`, `C`, 92 | /// and `D`. 93 | /// 94 | /// More formally, the nodes represented by a segment must be in a linear formation (i.e. directed, 95 | /// acyclic, connected, with each node having at most one incoming edge from another node in the 96 | /// segment and at most one outgoing edge to another node in the segment), with only the first node 97 | /// allowing edges from outside the segment, and only the last node allowing edges to outside the 98 | /// segment. 99 | /// 100 | /// This representation allows many common graphs to be represented in a more compact form than 101 | /// directly as a Dag. 102 | pub type CompressedDag = dag::Dag; 103 | 104 | mod compressed_dag_flakiness_tracker; 105 | use compressed_dag_flakiness_tracker::*; 106 | 107 | /// Finds the index such that the sum of values at indices [0, i] (inclusive) is as close as 108 | /// possible to the argument. Returns the index and the sum. 109 | fn confidence_percentile_nearest(range_map: &RangeMap, percentile: f64) -> (usize, f64) { 110 | let mut sum = 0.0; 111 | let mut index = 0; 112 | let mut best_index = 0; 113 | let mut best_percentile = f64::NEG_INFINITY; 114 | for w in range_map.ranges() { 115 | let delta = w.len() as f64 * w.value(); 116 | trace!( 117 | "percentile = {}, sum = {}, w.value = {}", 118 | percentile, 119 | sum, 120 | w.value() 121 | ); 122 | trace!( 123 | "(percentile - sum) / w.value() - 0.5 = {}", 124 | (percentile - sum) / w.value() - 0.5 125 | ); 126 | let ix = index 127 | + cmp::min( 128 | w.len() - 1, 129 | ((percentile - sum) / w.value() - 0.5).max(0.0) as usize, 130 | ); 131 | let ix_percentile = sum + (ix - index + 1) as f64 * w.value(); 132 | trace!("ix = {} ix_percentile = {}", ix, ix_percentile); 133 | if (ix_percentile - percentile).abs() < (best_percentile - percentile).abs() { 134 | best_index = ix; 135 | best_percentile = ix_percentile; 136 | } 137 | sum += delta; 138 | index += w.len(); 139 | } 140 | assert!(best_percentile > f64::NEG_INFINITY); 141 | trace!( 142 | "confidence_percentile_nearest returning {:?}", 143 | (best_index, best_percentile) 144 | ); 145 | (best_index, best_percentile) 146 | } 147 | 148 | /// Finds the smallest index such that the sum of values at indices [0, i] (inclusive) is greater 149 | /// than or equal to the argument. Returns the index and the sum. If no sum is greater than or equal 150 | /// to the argument, returns the last index and the sum over all values. 151 | fn confidence_percentile_ceil(range_map: &RangeMap, percentile: f64) -> (usize, f64) { 152 | let mut sum = 0.0; 153 | let mut index = 0; 154 | for w in range_map.ranges() { 155 | let delta = w.len() as f64 * w.value(); 156 | if sum + delta >= percentile { 157 | let ix = index + ((percentile - sum) / w.value() - 1e-9) as usize; 158 | let ret = (ix, sum + (ix - index + 1) as f64 * w.value()); 159 | trace!("confidence_percentile_ceil returning {:?}", ret); 160 | return ret; 161 | } 162 | sum += delta; 163 | index += w.len(); 164 | } 165 | (range_map.len() - 1, sum) 166 | } 167 | 168 | // Does not normalize. 169 | fn report_range(weights: &mut RangeMap, index: usize, heads: bool, stiffness: f64) { 170 | if heads { 171 | for w in weights.split(index).0 { 172 | *w.value_mut() *= 1.0 + stiffness; 173 | } 174 | let (mut left, _right) = weights.split(index + 1); 175 | *left.next_back().unwrap().value_mut() *= 1.0 + stiffness; 176 | } else { 177 | let _ = weights.split(index); 178 | let (_left, right) = weights.split(index + 1); 179 | for w in right { 180 | *w.value_mut() *= 1.0 + stiffness; 181 | } 182 | } 183 | } 184 | 185 | /// Performs a robust binary search over a linear range. 186 | #[derive(Clone, Debug)] 187 | pub struct Searcher { 188 | weights: RangeMap, 189 | skips: HashSet, 190 | len: usize, 191 | } 192 | 193 | impl Searcher { 194 | /// Creates a new Searcher over a range with the given number of testable indices. 195 | pub fn new(len: usize) -> Self { 196 | Searcher { 197 | weights: RangeMap::new(len + 1, 1.0 / (len as f64 + 1.0)), 198 | len, 199 | skips: HashSet::default(), 200 | } 201 | } 202 | 203 | /// Adds an index which cannot be tested. `next_index` will never return this index. 204 | pub fn add_skip(&mut self, skip: usize) { 205 | self.skips.insert(skip); 206 | } 207 | 208 | /// Same as `report` but with a specified stiffness. Only public for use by the tuner, not for 209 | /// public use. 210 | /// 211 | /// # Panics 212 | /// 213 | /// Panics if `index >= len`. 214 | #[doc(hidden)] 215 | pub fn report_with_stiffness(&mut self, index: usize, heads: bool, stiffness: f64) { 216 | assert!(index < self.len); 217 | report_range(&mut self.weights, index, heads, stiffness); 218 | let weight_sum: f64 = self 219 | .weights 220 | .ranges() 221 | .map(|w| w.value() * w.len() as f64) 222 | .sum(); 223 | for w in self.weights.ranges_mut() { 224 | *w.value_mut() /= weight_sum; 225 | } 226 | } 227 | 228 | /// Adds a vote to the internal statistics. With low flakiness, false votes are expected to have 229 | /// smaller indices than true votes. In other words, false means the index is probably too low, 230 | /// and true means the index is probably correct or too high. 231 | /// 232 | /// # Panics 233 | /// 234 | /// Panics if `index >= len`. 235 | pub fn report(&mut self, index: usize, heads: bool, flakiness: f64) { 236 | self.report_with_stiffness(index, heads, optimal_stiffness(flakiness)); 237 | } 238 | 239 | /// Returns the next index that should be tested. Can return values in the range 0 to len, 240 | /// exclusive. 241 | pub fn next_index(&self) -> Option { 242 | let original_ix = cmp::min( 243 | confidence_percentile_nearest(&self.weights, 0.5).0, 244 | self.len - 1, 245 | ); 246 | let mut ix = original_ix; 247 | let mut attempt = 0; 248 | let mut can_inc = true; 249 | let mut can_dec = true; 250 | // Try indexes near the desired index, alternating above and below, while staying within 251 | // bounds. I'm sure this can be made more efficient (e.g. storing skips as ranges). 252 | while self.skips.contains(&ix) { 253 | if attempt % 2 == 0 { 254 | if ix + attempt + 1 >= self.len { 255 | can_inc = false; 256 | } 257 | if can_inc { 258 | ix += attempt + 1; 259 | } else if ix > 0 { 260 | ix -= 1; 261 | } else { 262 | return None; 263 | } 264 | } else { 265 | if ix < attempt + 1 { 266 | can_dec = false; 267 | } 268 | if can_dec { 269 | ix -= attempt + 1; 270 | } else if ix + 1 < self.len { 271 | ix += 1; 272 | } else { 273 | return None; 274 | } 275 | } 276 | attempt += 1; 277 | } 278 | Some(ix) 279 | } 280 | 281 | /// Returns the current estimate of the best index. Can return values in the range 0 to len, 282 | /// inclusive. 283 | pub fn best_index(&self) -> usize { 284 | confidence_percentile_ceil(&self.weights, 0.5).0 285 | } 286 | 287 | /// Only public for use by the tuner, not for public use. 288 | #[doc(hidden)] 289 | pub fn confidence_percentile_ceil(&self, percentile: f64) -> usize { 290 | confidence_percentile_ceil(&self.weights, percentile).0 291 | } 292 | 293 | /// Returns the likelihood of the given index. 294 | /// 295 | /// # Panics 296 | /// 297 | /// Panics if `index > len`. 298 | pub fn likelihood(&self, index: usize) -> f64 { 299 | *self.weights.range_for_index(index).value() 300 | } 301 | } 302 | 303 | /// INTERNAL ONLY. 304 | /// 305 | /// Returns the stiffness which should be optimal for the given flakiness. 306 | #[doc(hidden)] 307 | pub fn optimal_stiffness(flakiness: f64) -> f64 { 308 | // Values calculated by tuner.rs 309 | (2.6 / flakiness.powf(0.37)) 310 | .min(0.58 / flakiness.powf(0.97)) 311 | .min(0.19 / flakiness.powf(2.4)) 312 | } 313 | 314 | /// Performs a robust binary search over a linear range and automatically infers the flakiness based 315 | /// on the votes. 316 | #[derive(Clone, Debug)] 317 | pub struct AutoSearcher { 318 | searcher: Searcher, 319 | flakiness_tracker: FlakinessTracker, 320 | } 321 | 322 | impl AutoSearcher { 323 | /// Creates a new AutoSearcher over a range with the given number of testable indices. 324 | pub fn new(len: usize) -> Self { 325 | AutoSearcher { 326 | searcher: Searcher::new(len), 327 | flakiness_tracker: FlakinessTracker::default(), 328 | } 329 | } 330 | 331 | /// Adds a vote to the internal statistics. With low flakiness, false votes are expected to have 332 | /// smaller indices than true votes. 333 | /// 334 | /// # Panics 335 | /// 336 | /// Panics if `index >= len`. 337 | pub fn report(&mut self, index: usize, heads: bool) { 338 | self.flakiness_tracker.report(index, heads); 339 | self.searcher 340 | .report(index, heads, self.flakiness_tracker.flakiness()); 341 | } 342 | 343 | /// Returns the next index that should be tested. Can return values in the range 0 to len, 344 | /// exclusive. 345 | pub fn next_index(&self) -> Option { 346 | self.searcher.next_index() 347 | } 348 | 349 | /// Returns the current estimate of the best index. Can return values in the range 0 to len, 350 | /// inclusive. 351 | pub fn best_index(&self) -> usize { 352 | self.searcher.best_index() 353 | } 354 | 355 | /// Returns the likelihood of the given index. 356 | /// 357 | /// # Panics 358 | /// 359 | /// Panics if `index > len`. 360 | pub fn likelihood(&self, index: usize) -> f64 { 361 | self.searcher.likelihood(index) 362 | } 363 | } 364 | 365 | /// Performs a robust binary search over a CompressedDag. 366 | #[derive(Clone, Debug)] 367 | pub struct CompressedDagSearcher { 368 | graph: Rc, 369 | segment_range_maps: Vec>, 370 | } 371 | 372 | #[deprecated(note = "Use CompressedDagSearcher instead.")] 373 | pub type CompressedDAGSearcher = CompressedDagSearcher; 374 | 375 | impl CompressedDagSearcher { 376 | /// Creates a new CompressedDagSearcher. 377 | pub fn new(graph: Rc) -> Self { 378 | let n = graph 379 | .nodes() 380 | .iter() 381 | .map(|node| node.value().len()) 382 | .sum::(); 383 | let segment_range_maps = graph 384 | .nodes() 385 | .iter() 386 | .map(|node| RangeMap::new(node.value().len(), 1.0 / n as f64)) 387 | .collect(); 388 | CompressedDagSearcher { 389 | graph, 390 | segment_range_maps, 391 | } 392 | } 393 | 394 | /// Returns the sums at the beginning and end of every segment. Each vector entry corresponds to 395 | /// a single segment. The first entry in the tuple is the sum of all weights in the segment's 396 | /// ancestors (i.e. source segments will have a start of 0.0), and the second entry is the sum 397 | /// of all weights in the segment and its ancestors. 398 | fn segment_percentile_ranges(&self) -> Vec<(f64, f64)> { 399 | let mut segment_ranges = Vec::<(f64, f64)>::new(); 400 | let mut segment_sums = Vec::::new(); 401 | let graph: &CompressedDag = self.graph.borrow(); 402 | for (i, range_map) in self.segment_range_maps.iter().enumerate() { 403 | let inputs = graph.node(i).inputs(); 404 | let start = if inputs.is_empty() { 405 | 0.0 406 | } else { 407 | let mut start = segment_ranges[inputs[0]].1; 408 | for ancestor in graph.node(i).remainder_ancestors() { 409 | start += segment_sums[*ancestor]; 410 | } 411 | start 412 | }; 413 | let mut segment_sum = 0.0; 414 | for range in range_map.ranges() { 415 | segment_sum += range.value() * range.len() as f64; 416 | } 417 | segment_sums.push(segment_sum); 418 | let end = start + segment_sum; 419 | assert!( 420 | (0.0..=1.0 + 1e-11).contains(&start) && (0.0..=1.0 + 1e-11).contains(&end), 421 | "i = {} of {}, start = {}, end = {}", 422 | i, 423 | self.segment_range_maps.len(), 424 | start, 425 | end 426 | ); 427 | segment_ranges.push((start, end)); 428 | } 429 | segment_ranges 430 | } 431 | 432 | /// Returns the node whose percentile (i.e. the sum of weights over the node and its ancestors) 433 | /// is nearest the argument. 434 | fn confidence_percentile_nearest(&self, percentile: f64) -> CompressedDagNodeRef { 435 | let segment_ranges = self.segment_percentile_ranges(); 436 | trace!("segment_ranges = {:?}", segment_ranges); 437 | let mut best_node = CompressedDagNodeRef { 438 | segment: 0, 439 | index: 0, 440 | }; 441 | let mut best_value = f64::NEG_INFINITY; 442 | for (i, range) in segment_ranges.iter().enumerate() { 443 | let (ix, mut value) = 444 | confidence_percentile_nearest(&self.segment_range_maps[i], percentile - range.0); 445 | value += range.0; 446 | if (percentile - value).abs() < (percentile - best_value).abs() { 447 | best_node = CompressedDagNodeRef { 448 | segment: i, 449 | index: ix, 450 | }; 451 | best_value = value; 452 | } 453 | } 454 | assert!(best_value > f64::NEG_INFINITY); 455 | best_node 456 | } 457 | 458 | /// Returns the node whose percentile (i.e. the sum of weights over the node and its ancestors) 459 | /// is smallest but greater than or equal to the argument. 460 | pub fn confidence_percentile_ceil(&self, percentile: f64) -> CompressedDagNodeRef { 461 | let segment_ranges = self.segment_percentile_ranges(); 462 | let mut min_end = 0; 463 | let mut min_end_segment = 0; 464 | let mut min_end_value = f64::INFINITY; 465 | for (i, range) in segment_ranges.iter().enumerate() { 466 | let (ix, mut value) = 467 | confidence_percentile_ceil(&self.segment_range_maps[i], percentile - range.0); 468 | value += range.0; 469 | trace!( 470 | "i = {}, ix = {}, value = {}, min_end_value = {}", 471 | i, 472 | ix, 473 | value, 474 | min_end_value 475 | ); 476 | if value < min_end_value && value >= percentile { 477 | min_end = ix; 478 | min_end_segment = i; 479 | min_end_value = value; 480 | } 481 | } 482 | let ret = CompressedDagNodeRef { 483 | segment: min_end_segment, 484 | index: min_end, 485 | }; 486 | trace!( 487 | "CompressedDagSearcher::confidence_percentile_ceil returning {:?}", 488 | ret 489 | ); 490 | ret 491 | } 492 | 493 | /// Returns the current estimate of the best node. 494 | pub fn best_node(&self) -> CompressedDagNodeRef { 495 | self.confidence_percentile_ceil(0.5) 496 | } 497 | 498 | /// Returns the next node that should be tested. 499 | pub fn next_node(&self) -> CompressedDagNodeRef { 500 | self.confidence_percentile_nearest(0.5) 501 | } 502 | 503 | /// Adds a vote to the internal statistics. With low flakiness, nodes with false votes are 504 | /// expected not to nodes with true votes as ancestors. 505 | /// 506 | /// # Panics 507 | /// 508 | /// Panics if the node is out of range. 509 | pub fn report(&mut self, node: CompressedDagNodeRef, heads: bool, flakiness: f64) { 510 | let stiffness = optimal_stiffness(flakiness); 511 | let graph: &CompressedDag = self.graph.borrow(); 512 | if heads { 513 | for segment in graph.node(node.segment).ancestors() { 514 | for w in self.segment_range_maps[*segment].ranges_mut() { 515 | *w.value_mut() *= 1.0 + stiffness; 516 | } 517 | } 518 | } else { 519 | let ancestor_segments = graph.node(node.segment).ancestors(); 520 | for segment in 0..graph.nodes().len() { 521 | if ancestor_segments.contains(&segment) || segment == node.segment { 522 | continue; 523 | } 524 | for w in self.segment_range_maps[segment].ranges_mut() { 525 | *w.value_mut() *= 1.0 + stiffness; 526 | } 527 | } 528 | } 529 | report_range( 530 | &mut self.segment_range_maps[node.segment], 531 | node.index, 532 | heads, 533 | stiffness, 534 | ); 535 | let weight_sum: f64 = self 536 | .segment_range_maps 537 | .iter() 538 | .map(|range_map| { 539 | range_map 540 | .ranges() 541 | .map(|w| w.value() * w.len() as f64) 542 | .sum::() 543 | }) 544 | .sum(); 545 | for range_map in &mut self.segment_range_maps { 546 | for w in range_map.ranges_mut() { 547 | *w.value_mut() /= weight_sum; 548 | } 549 | } 550 | } 551 | 552 | /// Returns the likelihood of the given index. 553 | /// 554 | /// # Panics 555 | /// 556 | /// Panics if the node is out of range. 557 | pub fn likelihood(&self, node: CompressedDagNodeRef) -> f64 { 558 | *self.segment_range_maps[node.segment] 559 | .range_for_index(node.index) 560 | .value() 561 | } 562 | } 563 | 564 | /// Performs a robust binary search over a CompressedDag and automatically infers the flakiness 565 | /// based on the votes. 566 | #[derive(Clone, Debug)] 567 | pub struct AutoCompressedDagSearcher { 568 | searcher: CompressedDagSearcher, 569 | flakiness_tracker: CompressedDagFlakinessTracker, 570 | } 571 | 572 | #[deprecated(note = "Use AutoCompressedDagSearcher instead.")] 573 | pub type AutoCompressedDAGSearcher = AutoCompressedDagSearcher; 574 | 575 | impl AutoCompressedDagSearcher { 576 | /// Creates a new AutoCompressedDagSearcher. 577 | pub fn new(graph: Rc) -> Self { 578 | Self { 579 | searcher: CompressedDagSearcher::new(graph.clone()), 580 | flakiness_tracker: CompressedDagFlakinessTracker::new(graph), 581 | } 582 | } 583 | 584 | /// Adds a vote to the internal statistics. With low flakiness, nodes with false votes are 585 | /// expected not to nodes with true votes as ancestors. 586 | /// 587 | /// # Panics 588 | /// 589 | /// Panics if the node is out of range. 590 | pub fn report(&mut self, node: CompressedDagNodeRef, heads: bool) { 591 | self.flakiness_tracker.report(node, heads); 592 | self.searcher 593 | .report(node, heads, self.flakiness_tracker.flakiness()); 594 | } 595 | 596 | /// Returns the next node that should be tested. 597 | pub fn next_node(&self) -> CompressedDagNodeRef { 598 | self.searcher.next_node() 599 | } 600 | 601 | /// Returns the current estimate of the best node. 602 | pub fn best_node(&self) -> CompressedDagNodeRef { 603 | self.searcher.best_node() 604 | } 605 | 606 | /// Returns the likelihood of the given index. 607 | /// 608 | /// # Panics 609 | /// 610 | /// Panics if the node is out of range. 611 | pub fn likelihood(&self, index: CompressedDagNodeRef) -> f64 { 612 | self.searcher.likelihood(index) 613 | } 614 | 615 | /// Returns the estimated flakiness. 616 | pub fn flakiness(&self) -> f64 { 617 | self.flakiness_tracker.flakiness() 618 | } 619 | } 620 | 621 | #[cfg(test)] 622 | mod tests { 623 | use super::*; 624 | 625 | const DEFAULT_FLAKINESS: f64 = 0.01; 626 | 627 | macro_rules! assert_index { 628 | ($searcher:expr, $next:expr, $best:expr, $heads:expr, $flakiness:expr) => { 629 | assert_eq!($searcher.next_index().unwrap(), $next, "next_index"); 630 | assert_eq!($searcher.best_index(), $best, "best_index"); 631 | $searcher.report($next, $heads, $flakiness); 632 | }; 633 | } 634 | 635 | // Each test should run until a cycle repeats itself three times, and the 636 | // best_index is stable. The cycle may consist of a single element. 637 | 638 | #[test] 639 | fn one_element_zero() { 640 | let mut s = Searcher::new(1); 641 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 642 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 643 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 644 | } 645 | 646 | #[test] 647 | fn one_element_one() { 648 | let mut s = Searcher::new(1); 649 | assert_index!(s, 0, 0, false, DEFAULT_FLAKINESS); 650 | assert_index!(s, 0, 1, false, DEFAULT_FLAKINESS); 651 | assert_index!(s, 0, 1, false, DEFAULT_FLAKINESS); 652 | assert_index!(s, 0, 1, false, DEFAULT_FLAKINESS); 653 | } 654 | 655 | #[test] 656 | fn two_elements_zero() { 657 | let mut s = Searcher::new(2); 658 | assert_index!(s, 1, 1, true, DEFAULT_FLAKINESS); 659 | assert_index!(s, 0, 1, true, DEFAULT_FLAKINESS); 660 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 661 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 662 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 663 | } 664 | 665 | #[test] 666 | fn two_elements_one() { 667 | let mut s = Searcher::new(2); 668 | assert_index!(s, 1, 1, true, DEFAULT_FLAKINESS); 669 | assert_index!(s, 0, 1, false, DEFAULT_FLAKINESS); 670 | assert_index!(s, 0, 1, false, DEFAULT_FLAKINESS); 671 | assert_index!(s, 1, 1, true, DEFAULT_FLAKINESS); 672 | assert_index!(s, 0, 1, false, DEFAULT_FLAKINESS); 673 | assert_index!(s, 1, 1, true, DEFAULT_FLAKINESS); 674 | assert_index!(s, 1, 1, true, DEFAULT_FLAKINESS); 675 | assert_index!(s, 0, 1, true, DEFAULT_FLAKINESS); 676 | assert_index!(s, 0, 1, true, DEFAULT_FLAKINESS); 677 | assert_index!(s, 0, 1, true, DEFAULT_FLAKINESS); 678 | } 679 | 680 | #[test] 681 | fn two_elements_two() { 682 | let mut s = Searcher::new(2); 683 | assert_index!(s, 1, 1, false, DEFAULT_FLAKINESS); 684 | assert_index!(s, 1, 2, false, DEFAULT_FLAKINESS); 685 | assert_index!(s, 1, 2, false, DEFAULT_FLAKINESS); 686 | assert_index!(s, 1, 2, false, DEFAULT_FLAKINESS); 687 | } 688 | 689 | #[test] 690 | fn three_elements_zero() { 691 | let mut s = Searcher::new(3); 692 | assert_index!(s, 1, 1, true, DEFAULT_FLAKINESS); 693 | assert_index!(s, 0, 1, true, DEFAULT_FLAKINESS); 694 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 695 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 696 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 697 | } 698 | 699 | #[test] 700 | fn three_elements_one() { 701 | let mut s = Searcher::new(3); 702 | assert_index!(s, 1, 1, true, DEFAULT_FLAKINESS); 703 | assert_index!(s, 0, 1, false, DEFAULT_FLAKINESS); 704 | assert_index!(s, 1, 1, true, DEFAULT_FLAKINESS); 705 | assert_index!(s, 0, 1, false, DEFAULT_FLAKINESS); 706 | assert_index!(s, 1, 1, true, DEFAULT_FLAKINESS); 707 | assert_index!(s, 0, 1, false, DEFAULT_FLAKINESS); 708 | } 709 | 710 | #[test] 711 | fn three_elements_two() { 712 | let mut s = Searcher::new(3); 713 | assert_index!(s, 1, 1, false, DEFAULT_FLAKINESS); 714 | assert_index!(s, 2, 2, true, DEFAULT_FLAKINESS); 715 | assert_index!(s, 1, 2, false, DEFAULT_FLAKINESS); 716 | assert_index!(s, 2, 2, true, DEFAULT_FLAKINESS); 717 | assert_index!(s, 1, 2, false, DEFAULT_FLAKINESS); 718 | assert_index!(s, 2, 2, true, DEFAULT_FLAKINESS); 719 | assert_index!(s, 1, 2, false, DEFAULT_FLAKINESS); 720 | } 721 | 722 | #[test] 723 | fn three_elements_three() { 724 | let mut s = Searcher::new(3); 725 | assert_index!(s, 1, 1, false, DEFAULT_FLAKINESS); 726 | assert_index!(s, 2, 2, false, DEFAULT_FLAKINESS); 727 | assert_index!(s, 2, 3, false, DEFAULT_FLAKINESS); 728 | assert_index!(s, 2, 3, false, DEFAULT_FLAKINESS); 729 | assert_index!(s, 2, 3, false, DEFAULT_FLAKINESS); 730 | } 731 | 732 | #[test] 733 | fn many_elements_first() { 734 | let mut s = Searcher::new(1024); 735 | assert_index!(s, 512, 512, true, DEFAULT_FLAKINESS); 736 | assert_index!(s, 272, 273, true, DEFAULT_FLAKINESS); 737 | assert_index!(s, 144, 145, true, DEFAULT_FLAKINESS); 738 | assert_index!(s, 76, 77, true, DEFAULT_FLAKINESS); 739 | assert_index!(s, 40, 41, true, DEFAULT_FLAKINESS); 740 | assert_index!(s, 21, 21, true, DEFAULT_FLAKINESS); 741 | assert_index!(s, 11, 11, true, DEFAULT_FLAKINESS); 742 | assert_index!(s, 5, 6, true, DEFAULT_FLAKINESS); 743 | assert_index!(s, 2, 3, true, DEFAULT_FLAKINESS); 744 | assert_index!(s, 1, 1, true, DEFAULT_FLAKINESS); 745 | assert_index!(s, 0, 1, true, DEFAULT_FLAKINESS); 746 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 747 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 748 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 749 | } 750 | 751 | #[test] 752 | fn many_elements_last() { 753 | let mut s = Searcher::new(1024); 754 | assert_index!(s, 512, 512, false, DEFAULT_FLAKINESS); 755 | assert_index!(s, 751, 752, false, DEFAULT_FLAKINESS); 756 | assert_index!(s, 879, 879, false, DEFAULT_FLAKINESS); 757 | assert_index!(s, 947, 947, false, DEFAULT_FLAKINESS); 758 | assert_index!(s, 983, 983, false, DEFAULT_FLAKINESS); 759 | assert_index!(s, 1002, 1003, false, DEFAULT_FLAKINESS); 760 | assert_index!(s, 1012, 1013, false, DEFAULT_FLAKINESS); 761 | assert_index!(s, 1018, 1018, false, DEFAULT_FLAKINESS); 762 | assert_index!(s, 1021, 1021, false, DEFAULT_FLAKINESS); 763 | assert_index!(s, 1022, 1023, false, DEFAULT_FLAKINESS); 764 | assert_index!(s, 1023, 1023, false, DEFAULT_FLAKINESS); 765 | assert_index!(s, 1023, 1024, false, DEFAULT_FLAKINESS); 766 | assert_index!(s, 1023, 1024, false, DEFAULT_FLAKINESS); 767 | assert_index!(s, 1023, 1024, false, DEFAULT_FLAKINESS); 768 | } 769 | 770 | #[test] 771 | fn one_element_skip_zero() { 772 | let mut s = Searcher::new(1); 773 | s.add_skip(0); 774 | assert_eq!(s.next_index(), None); 775 | } 776 | 777 | #[test] 778 | fn two_elements_zero_skip_zero() { 779 | let mut s = Searcher::new(2); 780 | s.add_skip(0); 781 | assert_index!(s, 1, 1, true, DEFAULT_FLAKINESS); 782 | assert_index!(s, 1, 1, true, DEFAULT_FLAKINESS); 783 | assert_index!(s, 1, 1, true, DEFAULT_FLAKINESS); 784 | } 785 | 786 | #[test] 787 | fn two_elements_zero_skip_one() { 788 | let mut s = Searcher::new(2); 789 | s.add_skip(1); 790 | assert_index!(s, 0, 1, true, DEFAULT_FLAKINESS); 791 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 792 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 793 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 794 | } 795 | 796 | #[test] 797 | fn two_elements_one_skip_one() { 798 | let mut s = Searcher::new(2); 799 | s.add_skip(1); 800 | assert_index!(s, 0, 1, false, DEFAULT_FLAKINESS); 801 | assert_index!(s, 0, 1, false, DEFAULT_FLAKINESS); 802 | assert_index!(s, 0, 1, false, DEFAULT_FLAKINESS); 803 | } 804 | 805 | #[test] 806 | fn many_elements_first_skip_mid() { 807 | let mut s = Searcher::new(1024); 808 | s.add_skip(512); 809 | assert_index!(s, 513, 512, true, DEFAULT_FLAKINESS); 810 | assert_index!(s, 273, 273, true, DEFAULT_FLAKINESS); 811 | assert_index!(s, 145, 145, true, DEFAULT_FLAKINESS); 812 | assert_index!(s, 77, 77, true, DEFAULT_FLAKINESS); 813 | assert_index!(s, 41, 41, true, DEFAULT_FLAKINESS); 814 | assert_index!(s, 21, 22, true, DEFAULT_FLAKINESS); 815 | assert_index!(s, 11, 11, true, DEFAULT_FLAKINESS); 816 | assert_index!(s, 5, 6, true, DEFAULT_FLAKINESS); 817 | assert_index!(s, 2, 3, true, DEFAULT_FLAKINESS); 818 | assert_index!(s, 1, 1, true, DEFAULT_FLAKINESS); 819 | assert_index!(s, 0, 1, true, DEFAULT_FLAKINESS); 820 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 821 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 822 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 823 | } 824 | 825 | #[test] 826 | fn many_elements_first_skip_mid2() { 827 | let mut s = Searcher::new(1024); 828 | s.add_skip(512); 829 | s.add_skip(513); 830 | assert_index!(s, 511, 512, true, DEFAULT_FLAKINESS); 831 | assert_index!(s, 272, 272, true, DEFAULT_FLAKINESS); 832 | assert_index!(s, 144, 145, true, DEFAULT_FLAKINESS); 833 | assert_index!(s, 76, 77, true, DEFAULT_FLAKINESS); 834 | assert_index!(s, 40, 41, true, DEFAULT_FLAKINESS); 835 | assert_index!(s, 21, 21, true, DEFAULT_FLAKINESS); 836 | assert_index!(s, 11, 11, true, DEFAULT_FLAKINESS); 837 | assert_index!(s, 5, 6, true, DEFAULT_FLAKINESS); 838 | assert_index!(s, 2, 3, true, DEFAULT_FLAKINESS); 839 | assert_index!(s, 1, 1, true, DEFAULT_FLAKINESS); 840 | assert_index!(s, 0, 1, true, DEFAULT_FLAKINESS); 841 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 842 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 843 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 844 | } 845 | 846 | #[test] 847 | fn many_elements_first_skip_mid3() { 848 | let mut s = Searcher::new(1024); 849 | s.add_skip(512); 850 | s.add_skip(513); 851 | s.add_skip(511); 852 | assert_index!(s, 514, 512, true, DEFAULT_FLAKINESS); 853 | assert_index!(s, 273, 274, true, DEFAULT_FLAKINESS); 854 | assert_index!(s, 145, 145, true, DEFAULT_FLAKINESS); 855 | assert_index!(s, 77, 77, true, DEFAULT_FLAKINESS); 856 | assert_index!(s, 41, 41, true, DEFAULT_FLAKINESS); 857 | assert_index!(s, 21, 22, true, DEFAULT_FLAKINESS); 858 | assert_index!(s, 11, 11, true, DEFAULT_FLAKINESS); 859 | assert_index!(s, 5, 6, true, DEFAULT_FLAKINESS); 860 | assert_index!(s, 2, 3, true, DEFAULT_FLAKINESS); 861 | assert_index!(s, 1, 1, true, DEFAULT_FLAKINESS); 862 | assert_index!(s, 0, 1, true, DEFAULT_FLAKINESS); 863 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 864 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 865 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 866 | } 867 | 868 | #[test] 869 | fn many_elements_first_skip_mid4() { 870 | let mut s = Searcher::new(1024); 871 | s.add_skip(512); 872 | s.add_skip(513); 873 | s.add_skip(511); 874 | s.add_skip(514); 875 | assert_index!(s, 510, 512, true, DEFAULT_FLAKINESS); 876 | assert_index!(s, 271, 272, true, DEFAULT_FLAKINESS); 877 | assert_index!(s, 144, 144, true, DEFAULT_FLAKINESS); 878 | assert_index!(s, 76, 77, true, DEFAULT_FLAKINESS); 879 | assert_index!(s, 40, 41, true, DEFAULT_FLAKINESS); 880 | assert_index!(s, 21, 21, true, DEFAULT_FLAKINESS); 881 | assert_index!(s, 11, 11, true, DEFAULT_FLAKINESS); 882 | assert_index!(s, 5, 6, true, DEFAULT_FLAKINESS); 883 | assert_index!(s, 2, 3, true, DEFAULT_FLAKINESS); 884 | assert_index!(s, 1, 1, true, DEFAULT_FLAKINESS); 885 | assert_index!(s, 0, 1, true, DEFAULT_FLAKINESS); 886 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 887 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 888 | assert_index!(s, 0, 0, true, DEFAULT_FLAKINESS); 889 | } 890 | 891 | #[test] 892 | fn many_elements_mid_skip_mid() { 893 | let mut s = Searcher::new(1024); 894 | s.add_skip(512); 895 | assert_index!(s, 513, 512, true, DEFAULT_FLAKINESS); 896 | assert_index!(s, 273, 273, false, DEFAULT_FLAKINESS); 897 | assert_index!(s, 401, 401, false, DEFAULT_FLAKINESS); 898 | assert_index!(s, 469, 469, false, DEFAULT_FLAKINESS); 899 | assert_index!(s, 505, 506, false, DEFAULT_FLAKINESS); 900 | assert_index!(s, 687, 687, true, DEFAULT_FLAKINESS); 901 | assert_index!(s, 529, 530, true, DEFAULT_FLAKINESS); 902 | assert_index!(s, 509, 509, false, DEFAULT_FLAKINESS); 903 | assert_index!(s, 513, 512, true, DEFAULT_FLAKINESS); 904 | assert_index!(s, 511, 511, false, DEFAULT_FLAKINESS); 905 | assert_index!(s, 513, 512, true, DEFAULT_FLAKINESS); 906 | assert_index!(s, 513, 512, true, DEFAULT_FLAKINESS); 907 | assert_index!(s, 513, 512, true, DEFAULT_FLAKINESS); 908 | } 909 | 910 | #[test] 911 | fn graph_confidence_percentile_nearest_singleton() { 912 | let mut graph = CompressedDag::default(); 913 | graph.add_node(CompressedDagSegment::new(1), vec![]); 914 | let searcher = CompressedDagSearcher::new(Rc::new(graph)); 915 | assert_eq!( 916 | searcher.confidence_percentile_nearest(0.5), 917 | CompressedDagNodeRef { 918 | segment: 0, 919 | index: 0 920 | } 921 | ); 922 | } 923 | 924 | #[test] 925 | fn graph_confidence_percentile_nearest_single_segment() { 926 | let mut graph = CompressedDag::default(); 927 | graph.add_node(CompressedDagSegment::new(10), vec![]); 928 | let searcher = CompressedDagSearcher::new(Rc::new(graph)); 929 | assert_eq!( 930 | searcher.confidence_percentile_nearest(0.5), 931 | CompressedDagNodeRef { 932 | segment: 0, 933 | index: 4 934 | } 935 | ); 936 | } 937 | 938 | #[test] 939 | fn graph_confidence_percentile_nearest_parallel_segments() { 940 | let mut graph = CompressedDag::default(); 941 | graph.add_node(CompressedDagSegment::new(10), vec![]); 942 | graph.add_node(CompressedDagSegment::new(10), vec![]); 943 | let searcher = CompressedDagSearcher::new(Rc::new(graph)); 944 | assert_eq!( 945 | searcher.confidence_percentile_nearest(0.5), 946 | CompressedDagNodeRef { 947 | segment: 0, 948 | index: 9 949 | } 950 | ); 951 | } 952 | 953 | #[test] 954 | fn graph_confidence_percentile_nearest_parallel_unequal_segments() { 955 | let mut graph = CompressedDag::default(); 956 | graph.add_node(CompressedDagSegment::new(100), vec![]); 957 | graph.add_node(CompressedDagSegment::new(10), vec![]); 958 | let searcher = CompressedDagSearcher::new(Rc::new(graph)); 959 | assert_eq!( 960 | searcher.confidence_percentile_nearest(0.5), 961 | CompressedDagNodeRef { 962 | segment: 0, 963 | index: 54 964 | } 965 | ); 966 | } 967 | 968 | #[test] 969 | fn graph_confidence_percentile_nearest_parallel_unequal_segments2() { 970 | let mut graph = CompressedDag::default(); 971 | graph.add_node(CompressedDagSegment::new(10), vec![]); 972 | graph.add_node(CompressedDagSegment::new(100), vec![]); 973 | let searcher = CompressedDagSearcher::new(Rc::new(graph)); 974 | assert_eq!( 975 | searcher.confidence_percentile_nearest(0.5), 976 | CompressedDagNodeRef { 977 | segment: 1, 978 | index: 54 979 | } 980 | ); 981 | } 982 | 983 | #[test] 984 | fn graph_confidence_percentile_nearest_sequential_segments() { 985 | let mut graph = CompressedDag::default(); 986 | graph.add_node(CompressedDagSegment::new(10), vec![]); 987 | graph.add_node(CompressedDagSegment::new(10), vec![0]); 988 | graph.add_node(CompressedDagSegment::new(10), vec![1]); 989 | let searcher = CompressedDagSearcher::new(Rc::new(graph)); 990 | assert_eq!( 991 | searcher.confidence_percentile_nearest(0.5), 992 | CompressedDagNodeRef { 993 | segment: 1, 994 | index: 4 995 | } 996 | ); 997 | } 998 | 999 | #[test] 1000 | fn graph_confidence_percentile_nearest_fork() { 1001 | let mut graph = CompressedDag::default(); 1002 | graph.add_node(CompressedDagSegment::new(10), vec![]); 1003 | graph.add_node(CompressedDagSegment::new(10), vec![0]); 1004 | graph.add_node(CompressedDagSegment::new(10), vec![0]); 1005 | let searcher = CompressedDagSearcher::new(Rc::new(graph)); 1006 | assert_eq!( 1007 | searcher.confidence_percentile_nearest(0.5), 1008 | CompressedDagNodeRef { 1009 | segment: 1, 1010 | index: 4 1011 | } 1012 | ); 1013 | } 1014 | 1015 | #[test] 1016 | fn graph_confidence_percentile_nearest_merge() { 1017 | let mut graph = CompressedDag::default(); 1018 | graph.add_node(CompressedDagSegment::new(10), vec![]); 1019 | graph.add_node(CompressedDagSegment::new(10), vec![]); 1020 | graph.add_node(CompressedDagSegment::new(10), vec![0, 1]); 1021 | let searcher = CompressedDagSearcher::new(Rc::new(graph)); 1022 | assert_eq!( 1023 | searcher.confidence_percentile_nearest(0.5), 1024 | CompressedDagNodeRef { 1025 | segment: 0, 1026 | index: 9 1027 | } 1028 | ); 1029 | } 1030 | 1031 | macro_rules! assert_graph_index { 1032 | ($searcher:expr, $next:expr, $best:expr, $heads:expr, $flakiness:expr) => { 1033 | assert_eq!( 1034 | $searcher.next_node(), 1035 | CompressedDagNodeRef { 1036 | segment: $next.0, 1037 | index: $next.1 1038 | }, 1039 | "next_index" 1040 | ); 1041 | assert_eq!( 1042 | $searcher.best_node(), 1043 | CompressedDagNodeRef { 1044 | segment: $best.0, 1045 | index: $best.1 1046 | }, 1047 | "best_index" 1048 | ); 1049 | $searcher.report( 1050 | CompressedDagNodeRef { 1051 | segment: $next.0, 1052 | index: $next.1, 1053 | }, 1054 | $heads, 1055 | $flakiness, 1056 | ); 1057 | }; 1058 | } 1059 | 1060 | #[test] 1061 | fn graph_two_elements_zero() { 1062 | let mut graph = CompressedDag::default(); 1063 | graph.add_node(CompressedDagSegment::new(2), vec![]); 1064 | let mut s = CompressedDagSearcher::new(Rc::new(graph)); 1065 | assert_graph_index!(s, (0, 0), (0, 0), true, DEFAULT_FLAKINESS); 1066 | assert_graph_index!(s, (0, 0), (0, 0), true, DEFAULT_FLAKINESS); 1067 | } 1068 | 1069 | #[test] 1070 | fn graph_two_elements_one() { 1071 | let mut graph = CompressedDag::default(); 1072 | graph.add_node(CompressedDagSegment::new(2), vec![]); 1073 | let mut s = CompressedDagSearcher::new(Rc::new(graph)); 1074 | assert_graph_index!(s, (0, 0), (0, 0), false, DEFAULT_FLAKINESS); 1075 | assert_graph_index!(s, (0, 0), (0, 1), false, DEFAULT_FLAKINESS); 1076 | assert_graph_index!(s, (0, 0), (0, 1), false, DEFAULT_FLAKINESS); 1077 | } 1078 | 1079 | #[test] 1080 | fn graph_many_elements_last() { 1081 | let mut graph = CompressedDag::default(); 1082 | graph.add_node(CompressedDagSegment::new(1024), vec![]); 1083 | let mut s = CompressedDagSearcher::new(Rc::new(graph)); 1084 | assert_graph_index!(s, (0, 511), (0, 511), false, DEFAULT_FLAKINESS); 1085 | assert_graph_index!(s, (0, 750), (0, 751), false, DEFAULT_FLAKINESS); 1086 | assert_graph_index!(s, (0, 878), (0, 878), false, DEFAULT_FLAKINESS); 1087 | assert_graph_index!(s, (0, 946), (0, 946), false, DEFAULT_FLAKINESS); 1088 | assert_graph_index!(s, (0, 982), (0, 982), false, DEFAULT_FLAKINESS); 1089 | } 1090 | 1091 | #[test] 1092 | fn graph_parallel_first_first() { 1093 | let mut graph = CompressedDag::default(); 1094 | graph.add_node(CompressedDagSegment::new(100), vec![]); 1095 | graph.add_node(CompressedDagSegment::new(100), vec![]); 1096 | let mut s = CompressedDagSearcher::new(Rc::new(graph)); 1097 | assert_graph_index!(s, (0, 99), (0, 99), true, DEFAULT_FLAKINESS); 1098 | assert_graph_index!(s, (0, 52), (0, 53), true, DEFAULT_FLAKINESS); 1099 | assert_graph_index!(s, (0, 27), (0, 28), true, DEFAULT_FLAKINESS); 1100 | assert_graph_index!(s, (0, 14), (0, 14), true, DEFAULT_FLAKINESS); 1101 | assert_graph_index!(s, (0, 7), (0, 7), true, DEFAULT_FLAKINESS); 1102 | assert_graph_index!(s, (0, 3), (0, 4), true, DEFAULT_FLAKINESS); 1103 | assert_graph_index!(s, (0, 1), (0, 2), true, DEFAULT_FLAKINESS); 1104 | assert_graph_index!(s, (0, 0), (0, 1), true, DEFAULT_FLAKINESS); 1105 | assert_graph_index!(s, (0, 0), (0, 0), true, DEFAULT_FLAKINESS); 1106 | assert_graph_index!(s, (0, 0), (0, 0), true, DEFAULT_FLAKINESS); 1107 | } 1108 | 1109 | #[test] 1110 | fn graph_parallel_first_last() { 1111 | let mut graph = CompressedDag::default(); 1112 | graph.add_node(CompressedDagSegment::new(100), vec![]); 1113 | graph.add_node(CompressedDagSegment::new(100), vec![]); 1114 | let mut s = CompressedDagSearcher::new(Rc::new(graph)); 1115 | assert_graph_index!(s, (0, 99), (0, 99), true, DEFAULT_FLAKINESS); 1116 | assert_graph_index!(s, (0, 52), (0, 53), false, DEFAULT_FLAKINESS); 1117 | assert_graph_index!(s, (0, 77), (0, 78), false, DEFAULT_FLAKINESS); 1118 | assert_graph_index!(s, (0, 90), (0, 91), false, DEFAULT_FLAKINESS); 1119 | assert_graph_index!(s, (0, 97), (0, 98), false, DEFAULT_FLAKINESS); 1120 | assert_graph_index!(s, (1, 68), (1, 69), false, DEFAULT_FLAKINESS); 1121 | assert_graph_index!(s, (1, 99), (0, 99), false, DEFAULT_FLAKINESS); 1122 | assert_graph_index!(s, (0, 98), (0, 98), false, DEFAULT_FLAKINESS); 1123 | assert_graph_index!(s, (0, 99), (0, 99), true, DEFAULT_FLAKINESS); 1124 | assert_graph_index!(s, (0, 98), (0, 99), false, DEFAULT_FLAKINESS); 1125 | assert_graph_index!(s, (1, 99), (0, 99), false, DEFAULT_FLAKINESS); 1126 | } 1127 | 1128 | #[test] 1129 | fn graph_parallel_last_first() { 1130 | let mut graph = CompressedDag::default(); 1131 | graph.add_node(CompressedDagSegment::new(100), vec![]); 1132 | graph.add_node(CompressedDagSegment::new(100), vec![]); 1133 | let mut s = CompressedDagSearcher::new(Rc::new(graph)); 1134 | assert_graph_index!(s, (0, 99), (0, 99), false, DEFAULT_FLAKINESS); 1135 | assert_graph_index!(s, (1, 52), (1, 53), true, DEFAULT_FLAKINESS); 1136 | assert_graph_index!(s, (1, 27), (1, 28), true, DEFAULT_FLAKINESS); 1137 | assert_graph_index!(s, (1, 14), (1, 14), true, DEFAULT_FLAKINESS); 1138 | assert_graph_index!(s, (1, 7), (1, 7), true, DEFAULT_FLAKINESS); 1139 | assert_graph_index!(s, (1, 3), (1, 4), true, DEFAULT_FLAKINESS); 1140 | assert_graph_index!(s, (1, 1), (1, 2), true, DEFAULT_FLAKINESS); 1141 | assert_graph_index!(s, (1, 0), (1, 1), true, DEFAULT_FLAKINESS); 1142 | assert_graph_index!(s, (1, 0), (1, 0), true, DEFAULT_FLAKINESS); 1143 | assert_graph_index!(s, (1, 0), (1, 0), true, DEFAULT_FLAKINESS); 1144 | } 1145 | 1146 | #[test] 1147 | fn graph_parallel_last_last() { 1148 | let mut graph = CompressedDag::default(); 1149 | graph.add_node(CompressedDagSegment::new(100), vec![]); 1150 | graph.add_node(CompressedDagSegment::new(100), vec![]); 1151 | let mut s = CompressedDagSearcher::new(Rc::new(graph)); 1152 | assert_graph_index!(s, (0, 99), (0, 99), false, DEFAULT_FLAKINESS); 1153 | assert_graph_index!(s, (1, 52), (1, 53), false, DEFAULT_FLAKINESS); 1154 | assert_graph_index!(s, (1, 77), (1, 78), false, DEFAULT_FLAKINESS); 1155 | assert_graph_index!(s, (1, 90), (1, 91), false, DEFAULT_FLAKINESS); 1156 | assert_graph_index!(s, (1, 97), (1, 98), false, DEFAULT_FLAKINESS); 1157 | assert_graph_index!(s, (0, 68), (0, 69), false, DEFAULT_FLAKINESS); 1158 | assert_graph_index!(s, (0, 99), (1, 99), false, DEFAULT_FLAKINESS); 1159 | assert_graph_index!(s, (1, 98), (1, 98), false, DEFAULT_FLAKINESS); 1160 | assert_graph_index!(s, (0, 99), (1, 99), false, DEFAULT_FLAKINESS); 1161 | assert_graph_index!(s, (1, 98), (1, 99), false, DEFAULT_FLAKINESS); 1162 | } 1163 | 1164 | #[test] 1165 | fn graph_parallel_first_half() { 1166 | let mut graph = CompressedDag::default(); 1167 | graph.add_node(CompressedDagSegment::new(100), vec![]); 1168 | graph.add_node(CompressedDagSegment::new(100), vec![]); 1169 | let mut s = CompressedDagSearcher::new(Rc::new(graph)); 1170 | assert_graph_index!(s, (0, 99), (0, 99), true, DEFAULT_FLAKINESS); 1171 | assert_graph_index!(s, (0, 52), (0, 53), true, DEFAULT_FLAKINESS); 1172 | assert_graph_index!(s, (0, 27), (0, 28), false, DEFAULT_FLAKINESS); 1173 | assert_graph_index!(s, (0, 40), (0, 41), false, DEFAULT_FLAKINESS); 1174 | assert_graph_index!(s, (0, 47), (0, 48), false, DEFAULT_FLAKINESS); 1175 | assert_graph_index!(s, (0, 51), (0, 51), true, DEFAULT_FLAKINESS); 1176 | assert_graph_index!(s, (0, 49), (0, 49), false, DEFAULT_FLAKINESS); 1177 | assert_graph_index!(s, (0, 50), (0, 51), true, DEFAULT_FLAKINESS); 1178 | assert_graph_index!(s, (0, 49), (0, 50), false, DEFAULT_FLAKINESS); 1179 | assert_graph_index!(s, (0, 50), (0, 50), true, DEFAULT_FLAKINESS); 1180 | } 1181 | 1182 | #[test] 1183 | fn graph_parallel_second_half() { 1184 | let mut graph = CompressedDag::default(); 1185 | graph.add_node(CompressedDagSegment::new(100), vec![]); 1186 | graph.add_node(CompressedDagSegment::new(100), vec![]); 1187 | let mut s = CompressedDagSearcher::new(Rc::new(graph)); 1188 | assert_graph_index!(s, (0, 99), (0, 99), false, DEFAULT_FLAKINESS); 1189 | assert_graph_index!(s, (1, 52), (1, 53), true, DEFAULT_FLAKINESS); 1190 | assert_graph_index!(s, (1, 27), (1, 28), false, DEFAULT_FLAKINESS); 1191 | assert_graph_index!(s, (1, 40), (1, 41), false, DEFAULT_FLAKINESS); 1192 | assert_graph_index!(s, (1, 47), (1, 48), false, DEFAULT_FLAKINESS); 1193 | assert_graph_index!(s, (1, 51), (1, 51), true, DEFAULT_FLAKINESS); 1194 | assert_graph_index!(s, (1, 49), (1, 49), false, DEFAULT_FLAKINESS); 1195 | assert_graph_index!(s, (1, 50), (1, 51), true, DEFAULT_FLAKINESS); 1196 | assert_graph_index!(s, (1, 49), (1, 50), false, DEFAULT_FLAKINESS); 1197 | assert_graph_index!(s, (1, 50), (1, 50), true, DEFAULT_FLAKINESS); 1198 | } 1199 | 1200 | #[test] 1201 | fn graph_fork_join() { 1202 | // /-1-\ 1203 | // *-0-* *-3-* 1204 | // \-2-/ 1205 | let mut graph = CompressedDag::default(); 1206 | graph.add_node(CompressedDagSegment::new(100), vec![]); 1207 | graph.add_node(CompressedDagSegment::new(100), vec![0]); 1208 | graph.add_node(CompressedDagSegment::new(100), vec![0]); 1209 | graph.add_node(CompressedDagSegment::new(100), vec![1, 2]); 1210 | let mut s = CompressedDagSearcher::new(Rc::new(graph)); 1211 | assert_graph_index!(s, (1, 99), (1, 99), false, DEFAULT_FLAKINESS); 1212 | assert_graph_index!(s, (2, 99), (2, 99), true, DEFAULT_FLAKINESS); 1213 | assert_graph_index!(s, (2, 49), (2, 50), false, DEFAULT_FLAKINESS); 1214 | assert_graph_index!(s, (2, 76), (2, 76), true, DEFAULT_FLAKINESS); 1215 | assert_graph_index!(s, (2, 62), (2, 62), true, DEFAULT_FLAKINESS); 1216 | assert_graph_index!(s, (2, 54), (2, 55), true, DEFAULT_FLAKINESS); 1217 | assert_graph_index!(s, (2, 50), (2, 50), true, DEFAULT_FLAKINESS); 1218 | assert_graph_index!(s, (2, 31), (2, 31), false, DEFAULT_FLAKINESS); 1219 | assert_graph_index!(s, (2, 49), (2, 49), false, DEFAULT_FLAKINESS); 1220 | assert_graph_index!(s, (2, 50), (2, 50), true, DEFAULT_FLAKINESS); 1221 | assert_graph_index!(s, (2, 49), (2, 50), false, DEFAULT_FLAKINESS); 1222 | } 1223 | } 1224 | -------------------------------------------------------------------------------- /robust-binary-search/src/range_map.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | /// A single entry in a RangeMap, which corresponds to a range of individual values. 16 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] 17 | pub struct RangeMapEntry { 18 | /// Beginning index of the range within the conceptual vector of individual values. 19 | offset: usize, 20 | /// Number of indices captured by the range. 21 | len: usize, 22 | /// Value of all individual values within the range. 23 | value: T, 24 | } 25 | 26 | impl RangeMapEntry { 27 | #[allow(dead_code)] 28 | /// Returns the index of the first individual value in the range. 29 | pub fn offset(&self) -> usize { 30 | self.offset 31 | } 32 | 33 | /// Returns the length of the range. 34 | pub fn len(&self) -> usize { 35 | self.len 36 | } 37 | 38 | /// Returns offset() + len(). 39 | pub fn end(&self) -> usize { 40 | self.offset + self.len 41 | } 42 | 43 | /// Returns the value of the range. 44 | pub fn value(&self) -> &T { 45 | &self.value 46 | } 47 | 48 | /// Returns the value of the range. 49 | pub fn value_mut(&mut self) -> &mut T { 50 | &mut self.value 51 | } 52 | } 53 | 54 | /// A RangeMap is essentially a fixed-length vector optimized for long stretches of equal values. 55 | /// The RangeMap is partitioned into contiguous RangeMapEntries. For example, a map with the 56 | /// entries: 57 | /// 58 | /// ```text 59 | /// RangeMapEntry { 60 | /// offset: 0, 61 | /// len: 1, 62 | /// value: 'a' 63 | /// } 64 | /// RangeMapEntry { 65 | /// offset: 1, 66 | /// len: 2, 67 | /// value: 'b' 68 | /// } 69 | /// RangeMapEntry { 70 | /// offset: 3, 71 | /// len: 3, 72 | /// value: 'c' 73 | /// } 74 | /// RangeMapEntry { 75 | /// offset: 6, 76 | /// len: 4, 77 | /// value: 'd' 78 | /// } 79 | /// ``` 80 | /// 81 | /// represents the data: 82 | /// 83 | /// ```text 84 | /// ['a', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd', 'd'] 85 | /// ``` 86 | /// 87 | /// Note that neighboring entries may contain the same value. 88 | #[derive(Clone, Debug)] 89 | pub struct RangeMap { 90 | /// Entries within the map. Invariants: 91 | /// 92 | /// 1. Must be non-empty. 93 | /// 2. values[0].offset() == 0 94 | /// 3. values[i - 1].end() == values[i].offset() 95 | /// 4. The length of each entry must be non-zero. 96 | values: Vec>, 97 | } 98 | 99 | impl RangeMap { 100 | /// Creates a new RangeMap with the given size and initial value. It contains a single entry 101 | /// spanning the entire range. 102 | pub fn new(size: usize, value: T) -> Self { 103 | RangeMap { 104 | values: vec![RangeMapEntry { 105 | offset: 0, 106 | len: size, 107 | value, 108 | }], 109 | } 110 | } 111 | 112 | /// Returns the length of the entire range. 113 | pub fn len(&self) -> usize { 114 | self.values[self.values.len() - 1].end() 115 | } 116 | 117 | /// Takes an individual element index and returns the RangeMapEntry index. 118 | fn range_index(&self, index: usize) -> usize { 119 | for (i, w) in self.values.iter().enumerate() { 120 | if index >= w.offset && index < w.end() { 121 | return i; 122 | } 123 | } 124 | self.values.len() 125 | } 126 | 127 | /// Returns an iterator over entries. 128 | pub fn ranges(&self) -> impl DoubleEndedIterator> { 129 | self.values.iter() 130 | } 131 | 132 | /// Returns an iterator over mutable entries. 133 | pub fn ranges_mut(&mut self) -> impl DoubleEndedIterator> { 134 | self.values.iter_mut() 135 | } 136 | 137 | /// Returns the entry containing the given index. 138 | pub fn range_for_index(&self, index: usize) -> &RangeMapEntry { 139 | let range_index = self.range_index(index); 140 | &self.values[range_index] 141 | } 142 | 143 | /// Ensures that `index-1` and `index` are in different RangeMapEntrys. 144 | /// Returns the index of the RangeMapEntry containing `index`. 145 | fn _split(&mut self, index: usize) -> usize { 146 | match self.values.binary_search_by_key(&index, |e| e.offset) { 147 | Ok(i) => return i, 148 | Err(j) => { 149 | let i = j - 1; 150 | if index < self.values[i].end() { 151 | let w = self.values[i].clone(); 152 | self.values.insert( 153 | i + 1, 154 | RangeMapEntry { 155 | offset: index, 156 | len: w.end() - index, 157 | value: w.value, 158 | }, 159 | ); 160 | self.values[i].len = index - w.offset; 161 | return i + 1; 162 | } 163 | } 164 | } 165 | self.values.len() 166 | } 167 | 168 | /// Ensures that `index-1` and `index` are in different RangeMapEntrys. 169 | /// Returns iterators for the left and right side of the split. 170 | pub fn split( 171 | &mut self, 172 | index: usize, 173 | ) -> ( 174 | impl DoubleEndedIterator>, 175 | impl DoubleEndedIterator>, 176 | ) { 177 | let range_index = self._split(index); 178 | let (left, right) = self.values.split_at_mut(range_index); 179 | (left.iter_mut(), right.iter_mut()) 180 | } 181 | } 182 | 183 | #[cfg(test)] 184 | mod tests { 185 | use super::*; 186 | 187 | #[test] 188 | fn range_for_index_empty() { 189 | let m = RangeMap::new(10, 0.0); 190 | assert_eq!( 191 | m.range_for_index(0), 192 | &RangeMapEntry { 193 | offset: 0, 194 | len: 10, 195 | value: 0.0 196 | } 197 | ); 198 | assert_eq!( 199 | m.range_for_index(9), 200 | &RangeMapEntry { 201 | offset: 0, 202 | len: 10, 203 | value: 0.0 204 | } 205 | ); 206 | } 207 | 208 | #[test] 209 | fn split() { 210 | let mut m = RangeMap::new(10, 0.0); 211 | assert_eq!( 212 | m.ranges().collect::>(), 213 | vec![&RangeMapEntry { 214 | offset: 0, 215 | len: 10, 216 | value: 0.0 217 | }] 218 | ); 219 | let (left, right) = m.split(5); 220 | assert_eq!( 221 | left.collect::>(), 222 | vec![&RangeMapEntry { 223 | offset: 0, 224 | len: 5, 225 | value: 0.0 226 | }] 227 | ); 228 | assert_eq!( 229 | right.collect::>(), 230 | vec![&RangeMapEntry { 231 | offset: 5, 232 | len: 5, 233 | value: 0.0 234 | }] 235 | ); 236 | assert_eq!( 237 | m.range_for_index(0), 238 | &RangeMapEntry { 239 | offset: 0, 240 | len: 5, 241 | value: 0.0 242 | } 243 | ); 244 | assert_eq!( 245 | m.range_for_index(4), 246 | &RangeMapEntry { 247 | offset: 0, 248 | len: 5, 249 | value: 0.0 250 | } 251 | ); 252 | assert_eq!( 253 | m.range_for_index(5), 254 | &RangeMapEntry { 255 | offset: 5, 256 | len: 5, 257 | value: 0.0 258 | } 259 | ); 260 | assert_eq!( 261 | m.range_for_index(9), 262 | &RangeMapEntry { 263 | offset: 5, 264 | len: 5, 265 | value: 0.0 266 | } 267 | ); 268 | } 269 | } 270 | -------------------------------------------------------------------------------- /robust-binary-search/src/tuner.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use rand::Rng; 16 | use robust_binary_search::*; 17 | use std::cmp; 18 | use std::env; 19 | use std::error::Error; 20 | use std::fs::File; 21 | use std::io::Write; 22 | use std::process; 23 | 24 | fn steps_required(rng: &mut R, flakiness: f64, stiffness: f64) -> f64 { 25 | let size = 1 << 20; 26 | let mut max = 0; 27 | let count = 100; 28 | for _ in 0..count { 29 | let mut searcher = Searcher::new(size); 30 | let mut i = 0; 31 | let index = (rng.gen::() * size as f64) as usize; 32 | let max_steps = 1000; 33 | max = cmp::max( 34 | max, 35 | loop { 36 | i += 1; 37 | let test_index = searcher.next_index().unwrap(); 38 | if test_index == index || i == max_steps { 39 | break i; 40 | } 41 | let heads = if rng.gen::() < flakiness { 42 | rng.gen::() < 0.5 43 | } else { 44 | test_index >= index 45 | }; 46 | searcher.report_with_stiffness(test_index, heads, stiffness); 47 | }, 48 | ); 49 | } 50 | max as f64 51 | } 52 | 53 | fn log_interpolate(index: usize, buckets: usize, min: f64, max: f64) -> f64 { 54 | (min.ln() + index as f64 / buckets as f64 * (max / min).ln()).exp() 55 | } 56 | 57 | fn main() -> Result<(), Box> { 58 | // optimal stiffness is approximately 59 | // min(2.6/x**0.37, 0.58/x**0.97, 0.19/x**2.4) 60 | // where x is the flakiness (0 is deterministic, 1 is fully random) 61 | let args: Vec = env::args().collect(); 62 | if args.len() != 2 { 63 | println!("Usage: main "); 64 | process::exit(1); 65 | } 66 | let mut f = File::create(&args[1])?; 67 | let min_flakiness = 0.001; 68 | let max_flakiness = 1.0; 69 | let flakiness_buckets = 50; 70 | let stiffness_buckets = 1000; 71 | let min_stiffness = 0.1; 72 | let max_stiffness = 128.0; 73 | for flakiness_index in 0..flakiness_buckets { 74 | let flakiness = log_interpolate( 75 | flakiness_index, 76 | flakiness_buckets, 77 | min_flakiness, 78 | max_flakiness, 79 | ); 80 | let mut rng = rand::thread_rng(); 81 | let mut searcher = Searcher::new(stiffness_buckets); 82 | let to_stiffness = |i| log_interpolate(i, stiffness_buckets, min_stiffness, max_stiffness); 83 | let window = 1.5; 84 | for i in 0..1000 { 85 | let test_index = searcher.next_index().unwrap(); 86 | let steps1 = steps_required(&mut rng, flakiness, to_stiffness(test_index) / window); 87 | let steps2 = steps_required(&mut rng, flakiness, to_stiffness(test_index) * window); 88 | let heads = if steps1 < steps2 { 89 | true 90 | } else { 91 | if steps1 > steps2 { 92 | false 93 | } else { 94 | rng.gen::() < 0.5 95 | } 96 | }; 97 | searcher.report(test_index, heads, 0.5); 98 | let lower_bound = searcher.confidence_percentile_ceil(0.1); 99 | let upper_bound = searcher.confidence_percentile_ceil(0.9); 100 | println!( 101 | "{} {} {} {} {} {}", 102 | flakiness, 103 | i, 104 | to_stiffness(test_index), 105 | to_stiffness(lower_bound), 106 | to_stiffness(upper_bound), 107 | searcher.likelihood(searcher.best_index()) 108 | ); 109 | if lower_bound == upper_bound { 110 | break; 111 | } 112 | } 113 | 114 | writeln!(f, "{} {}", flakiness, to_stiffness(searcher.best_index()))?; 115 | f.sync_data()?; 116 | } 117 | Ok(()) 118 | } 119 | -------------------------------------------------------------------------------- /robust-git-bisect/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | [package] 16 | name = "robust-git-bisect" 17 | version = "0.1.1" 18 | authors = ["Adam Crume "] 19 | edition = "2018" 20 | license = "Apache-2.0" 21 | description = "Robust Git Bisect provides an alternative to git bisect which is robust against errors during the search." 22 | repository = "https://github.com/adamcrume/robust-binary-search" 23 | categories = ["command-line-utilities", "development-tools"] 24 | readme = "README.md" 25 | 26 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 27 | 28 | [dependencies] 29 | robust-binary-search = "0.1.0" 30 | clap = "2.33.3" 31 | log = "0.4" 32 | simplelog = "0.8.0" 33 | union-find = "0.3.2" 34 | -------------------------------------------------------------------------------- /robust-git-bisect/README.md: -------------------------------------------------------------------------------- 1 | # Robust Git Bisect 2 | 3 | Robust Git Bisect provides an alternative to git bisect which is robust against errors during 4 | the search. In other words, if the comparison function sometimes returns an incorrect result, the 5 | search in this project will still converge on the correct solution. 6 | 7 | This is adapted from the multiplicative weights algorithm in ["Noisy binary search and its 8 | applications" by Karp and Kleinberg](https://www.cs.cornell.edu/~rdk/papers/karpr2.pdf), with 9 | adjustments to make it deterministic and then extended to support directed acyclic graphs. 10 | 11 | ## Usage 12 | 13 | To use the git bisect replacement, install with `cargo install robust-git-bisect`, and then 14 | `~/.cargo/bin/robust-git-bisect $start_commit $end_commit $command_to_test_commit` 15 | 16 | If you're looking for a library version of this, see the `robust-binary-search` crate which this is 17 | based on. 18 | 19 | ## Performance 20 | 21 | robust-git-bisect shows improved performance compared with git bisect (higher accuracy with fewer 22 | iterations): 23 | 24 | Method | Iterations | Accuracy 25 | ---------------------------------- | ---------- | -------- 26 | robust-git-bisect with 0.99 target | 29.6558 | 99.5392% 27 | robust-git-bisect with 0.9 target | 26.1828 | 98.8950% 28 | git bisect | 16.1907 | 31.7972% 29 | git bisect with tests repeated | 35.0465 | 86.6359% 30 | git bisect repeated | 72.3674 | 86.1751% 31 | 32 | This test is run over the `git` git repo from e83c516331 to 54e85e7af1, simulating 9c3592cf3c as the 33 | bad commit, with a test that returns an incorrect result 5% of the time. See benchmark.rs for 34 | details. 35 | -------------------------------------------------------------------------------- /robust-git-bisect/src/main.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use clap::App; 16 | use clap::Arg; 17 | use log::info; 18 | use log::trace; 19 | use robust_binary_search::AutoCompressedDAGSearcher; 20 | use robust_binary_search::CompressedDAG; 21 | use robust_binary_search::CompressedDAGSegment; 22 | use simplelog::Config; 23 | use simplelog::LevelFilter; 24 | use simplelog::TermLogger; 25 | use simplelog::TerminalMode; 26 | use std::collections::HashMap; 27 | use std::collections::HashSet; 28 | use std::error::Error; 29 | use std::path::Path; 30 | use std::process::Command; 31 | use std::rc::Rc; 32 | use std::time::Duration; 33 | use std::time::Instant; 34 | use union_find::QuickFindUf; 35 | use union_find::Union; 36 | use union_find::UnionFind; 37 | use union_find::UnionResult; 38 | 39 | #[derive(Clone, Debug)] 40 | struct StringUnion(String); 41 | 42 | impl Union for StringUnion { 43 | fn union(lval: Self, _rval: Self) -> UnionResult { 44 | UnionResult::Left(lval) 45 | } 46 | } 47 | 48 | #[derive(Debug, Default)] 49 | struct GitSegmentUf { 50 | parents: Vec, 51 | commits: Vec, 52 | } 53 | 54 | #[derive(Debug, Default)] 55 | struct GitSegment { 56 | parents: Vec, 57 | commits: Vec, 58 | } 59 | 60 | fn run(name: &str, mut configure: F) -> Result 61 | where 62 | F: FnMut(&mut Command) -> &mut Command, 63 | { 64 | let mut command = Command::new(name); 65 | let configured = configure(&mut command); 66 | info!("Executing {:?}", configured); 67 | let out = configured.output().unwrap(); 68 | if !out.status.success() { 69 | let msg = format!("failed to execute {:?}", configured); 70 | info!("{}", msg); 71 | return Err(msg); 72 | } 73 | info!("Command {:?} finished successfully", configured); 74 | Ok(String::from_utf8(out.stdout).unwrap()) 75 | } 76 | 77 | fn sort_segments(segments: &HashMap) -> Vec { 78 | let mut parents = HashMap::>::new(); 79 | let mut children = HashMap::>::new(); 80 | let mut initial_segments = Vec::new(); 81 | for (id, segment) in segments { 82 | parents.insert(*id, segment.parents.iter().copied().collect()); 83 | for parent in &segment.parents { 84 | children 85 | .entry(*parent) 86 | .or_insert_with(HashSet::new) 87 | .insert(*id); 88 | } 89 | if segment.parents.is_empty() { 90 | initial_segments.push(*id); 91 | } 92 | } 93 | let mut sorted = Vec::new(); 94 | while let Some(id) = initial_segments.pop() { 95 | sorted.push(id); 96 | if let Some(children_to_update) = children.get(&id) { 97 | for child in children_to_update { 98 | let p = parents.get_mut(child).unwrap(); 99 | p.remove(&id); 100 | if p.is_empty() { 101 | parents.remove(child); 102 | initial_segments.push(*child); 103 | } 104 | } 105 | } 106 | children.remove(&id); 107 | } 108 | sorted 109 | } 110 | 111 | fn run_bisect>( 112 | dir: P, 113 | segments: &[GitSegment], 114 | test_cmd: &str, 115 | min_likelihood: f64, 116 | ) -> HashMap { 117 | let start = Instant::now(); 118 | let mut graph = CompressedDAG::new(); 119 | for (i, segment) in segments.iter().enumerate() { 120 | if i % 100 == 0 { 121 | trace!("Processing segment {} of {}", i, segments.len()); 122 | } 123 | graph.add_node( 124 | CompressedDAGSegment::new(segment.commits.len()), 125 | segment.parents.clone(), 126 | ); 127 | } 128 | let mut metrics = HashMap::new(); 129 | metrics.insert("graph-built".to_string(), start.elapsed()); 130 | trace!( 131 | "CompressedDAG built in {} seconds", 132 | start.elapsed().as_secs_f64() 133 | ); 134 | let mut searcher = AutoCompressedDAGSearcher::new(Rc::new(graph)); 135 | let mut iterations = 0; 136 | loop { 137 | iterations += 1; 138 | let node = searcher.next_node(); 139 | let commit = &segments[node.segment].commits[node.index]; 140 | run("git", |cmd| { 141 | cmd.current_dir(&dir).arg("checkout").arg(commit) 142 | }) 143 | .unwrap(); 144 | let heads = run("sh", |cmd| cmd.current_dir(&dir).arg("-c").arg(test_cmd)).is_err(); 145 | println!( 146 | "Reporting {} as {}", 147 | commit, 148 | if heads { "bad" } else { "good" } 149 | ); 150 | searcher.report(node, heads); 151 | let best = searcher.best_node(); 152 | let best_commit = segments[best.segment].commits[best.index].clone(); 153 | println!("Most likely commit is {} with likelihood {} after {} iterations. Estimated flakiness is {}.", 154 | best_commit, searcher.likelihood(best), iterations, searcher.flakiness()); 155 | if searcher.likelihood(best) > min_likelihood { 156 | break; 157 | } 158 | } 159 | metrics 160 | } 161 | 162 | fn main() -> Result<(), Box> { 163 | let start = Instant::now(); 164 | let matches = App::new("git-bisect") 165 | .version("1.0") 166 | .author("Adam Crume ") 167 | .about("Robust git bisect which works in the face of noise.") 168 | .arg( 169 | Arg::with_name("dir") 170 | .long("dir") 171 | .help("Git repo directory") 172 | .default_value("."), 173 | ) 174 | .arg( 175 | Arg::with_name("min-likelihood") 176 | .long("min-likelihood") 177 | .help("Minimum likelihood required to stop iterating.") 178 | .default_value("0.99"), 179 | ) 180 | .arg( 181 | Arg::with_name("verbose") 182 | .short("v") 183 | .long("verbose") 184 | .help("More verbose output") 185 | .multiple(true), 186 | ) 187 | .arg( 188 | Arg::with_name("start-commit") 189 | .help("Good/start commit") 190 | .required(true), 191 | ) 192 | .arg( 193 | Arg::with_name("end-commit") 194 | .help("Bad/end commit") 195 | .required(true), 196 | ) 197 | .arg( 198 | Arg::with_name("test-cmd") 199 | .help("Command to run which succeeds for good commits and fails for bad commits") 200 | .required(true), 201 | ) 202 | .get_matches(); 203 | let level_filter = match matches.occurrences_of("verbose") { 204 | 0 => LevelFilter::Warn, 205 | 1 => LevelFilter::Info, 206 | 2 => LevelFilter::Debug, 207 | _ => LevelFilter::Trace, 208 | }; 209 | TermLogger::init(level_filter, Config::default(), TerminalMode::Mixed).unwrap(); 210 | let dir = matches.value_of("dir").unwrap(); 211 | let min_likelihood = matches 212 | .value_of("min-likelihood") 213 | .unwrap() 214 | .parse::() 215 | .unwrap(); 216 | let start_commit = matches.value_of("start-commit").unwrap(); 217 | let end_commit = matches.value_of("end-commit").unwrap(); 218 | let test_cmd = matches.value_of("test-cmd").unwrap(); 219 | let commit_log = run("git", |command| { 220 | // TODO: Do we need --ancestry-path? 221 | command 222 | .current_dir(dir) 223 | .arg("log") 224 | .arg(format!("{}..{}", start_commit, end_commit)) 225 | .arg("--format=format:%H %P") 226 | }) 227 | .unwrap(); 228 | let mut parents = HashMap::>::new(); 229 | let mut children = HashMap::>::new(); 230 | for line in commit_log.lines() { 231 | let mut hashes = line.split(' ').map(|s| s.to_string()).collect::>(); 232 | let commit = hashes.swap_remove(0); 233 | for parent in hashes.into_iter() { 234 | children 235 | .entry(parent.clone()) 236 | .or_insert_with(Vec::new) 237 | .push(commit.clone()); 238 | parents 239 | .entry(commit.clone()) 240 | .or_insert_with(Vec::new) 241 | .push(parent); 242 | } 243 | } 244 | 245 | let mut unify = [].iter().cloned().collect::>(); 246 | let mut uf_keys = HashMap::::new(); 247 | for (key, value) in &parents { 248 | let uf_key1: usize = *uf_keys 249 | .entry(key.clone()) 250 | .or_insert_with(|| unify.insert(StringUnion(key.clone()))); 251 | if value.len() == 1 { 252 | if let Some(child_hashes) = children.get(&value[0]) { 253 | if child_hashes.len() == 1 { 254 | let uf_key2: usize = *uf_keys 255 | .entry(value[0].clone()) 256 | .or_insert_with(|| unify.insert(StringUnion(value[0].clone()))); 257 | unify.union(uf_key1, uf_key2); 258 | } 259 | } 260 | } 261 | } 262 | 263 | let mut segments = HashMap::::new(); 264 | for (key, value) in &parents { 265 | let uf_key: usize = *uf_keys.get(key).unwrap(); 266 | let segment: usize = unify.find(uf_key); 267 | let git_segment = segments 268 | .entry(segment) 269 | .or_insert_with(GitSegmentUf::default); 270 | git_segment.commits.push(key.clone()); 271 | for parent in value { 272 | if let Some(parent_uf_key) = uf_keys.get(parent) { 273 | let parent_segment: usize = unify.find(*parent_uf_key); 274 | if parent_segment != segment { 275 | git_segment.parents.push(parent_segment); 276 | } 277 | } 278 | } 279 | } 280 | 281 | for value in segments.values_mut() { 282 | let commit_set = value.commits.iter().cloned().collect::>(); 283 | let first_commits = value 284 | .commits 285 | .iter() 286 | .filter(|commit: &&String| { 287 | let commit_parents = parents.get(*commit).unwrap(); 288 | commit_parents.len() != 1 || !commit_set.contains(&commit_parents[0]) 289 | }) 290 | .cloned() 291 | .collect::>(); 292 | assert_eq!(first_commits.len(), 1); 293 | let mut commit = first_commits[0].clone(); 294 | let mut sorted_commits = vec![commit.clone()]; 295 | while let Some(child_commits) = children.get(&commit) { 296 | if child_commits.len() != 1 { 297 | break; 298 | } 299 | let child_commit = child_commits[0].clone(); 300 | if !commit_set.contains(&child_commit) { 301 | break; 302 | } 303 | sorted_commits.push(child_commit); 304 | commit = child_commits[0].clone(); 305 | } 306 | assert_eq!( 307 | sorted_commits.iter().cloned().collect::>(), 308 | commit_set 309 | ); 310 | value.commits = sorted_commits; 311 | } 312 | 313 | let sorted_segments = sort_segments(&segments); 314 | let segment_index_by_id = sorted_segments 315 | .iter() 316 | .enumerate() 317 | .map(|(k, v)| (*v, k)) 318 | .collect::>(); 319 | let git_segments = sorted_segments 320 | .iter() 321 | .map(|segment_id| { 322 | let segment = segments.get(segment_id).unwrap(); 323 | let parents = segment 324 | .parents 325 | .iter() 326 | .map(|id| segment_index_by_id.get(id).unwrap()) 327 | .copied() 328 | .collect::>(); 329 | GitSegment { 330 | parents, 331 | commits: segment.commits.clone(), 332 | } 333 | }) 334 | .collect::>(); 335 | 336 | info!("Running bisection"); 337 | let metrics = run_bisect(dir, &git_segments, test_cmd, min_likelihood); 338 | for (k, v) in metrics { 339 | info!("{}: {}", k, v.as_secs_f64()); 340 | } 341 | info!("Elapsed time: {} seconds", start.elapsed().as_secs_f64()); 342 | Ok(()) 343 | } 344 | -------------------------------------------------------------------------------- /test-commit: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2020 Google LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # https://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | if [ $(($RANDOM % 100)) -lt "$1" ]; then 18 | if [ $(($RANDOM % 2)) = 0 ]; then 19 | exit 0 20 | else 21 | exit 1 22 | fi 23 | fi 24 | 25 | if git merge-base --is-ancestor "$2" HEAD; then 26 | exit 1 27 | else 28 | exit 0 29 | fi 30 | --------------------------------------------------------------------------------