├── .bazelci ├── postsubmit.yml └── presubmit.yml ├── .bazelrc ├── .bazelversion ├── .gitignore ├── BUILD ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE.md ├── LICENSE ├── MODULE.bazel ├── PULL_REQUEST_TEMPLATE.md ├── README.md ├── WORKSPACE ├── WORKSPACE.bzlmod ├── bb-icon.png ├── benchmark.py ├── benchmark_test.py ├── report ├── BUILD ├── generate_master_report.py └── generate_report.py ├── testutils ├── BUILD ├── __init__.py └── fakes.py ├── third_party ├── BUILD ├── requirements.in └── requirements.txt └── utils ├── BUILD ├── __init__.py ├── bazel.py ├── bazel_test.py ├── benchmark_config.py ├── benchmark_config_test.py ├── bigquery_upload.py ├── json_profiles_merger.py ├── json_profiles_merger_lib.py ├── json_profiles_merger_lib_test.py ├── logger.py ├── output_handling.py ├── storage_upload.py ├── values.py └── values_test.py /.bazelci/postsubmit.yml: -------------------------------------------------------------------------------- 1 | --- 2 | tasks: 3 | ubuntu2204: 4 | include_json_profile: 5 | - build 6 | - test 7 | build_targets: 8 | - "//..." 9 | test_targets: 10 | - "//..." 11 | macos_arm64: 12 | include_json_profile: 13 | - build 14 | - test 15 | build_targets: 16 | - "//..." 17 | test_targets: 18 | - "//..." 19 | -------------------------------------------------------------------------------- /.bazelci/presubmit.yml: -------------------------------------------------------------------------------- 1 | --- 2 | tasks: 3 | ubuntu2204: 4 | build_targets: 5 | - "//..." 6 | test_targets: 7 | - "//..." 8 | macos_arm64: 9 | build_targets: 10 | - "//..." 11 | test_targets: 12 | - "//..." 13 | -------------------------------------------------------------------------------- /.bazelrc: -------------------------------------------------------------------------------- 1 | build --incompatible_disallow_empty_glob 2 | -------------------------------------------------------------------------------- /.bazelversion: -------------------------------------------------------------------------------- 1 | 7.3.1 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore backup files. 2 | *~ 3 | # Ignore Vim swap files. 4 | .*.swp 5 | # Ignore files generated by IDEs. 6 | /.classpath 7 | /.factorypath 8 | /.idea/ 9 | /.ijwb/ 10 | /.project 11 | /.settings 12 | /.vscode/ 13 | 14 | # Byte-compiled / optimized / DLL files 15 | __pycache__/ 16 | *.py[cod] 17 | *$py.class 18 | 19 | # Config file 20 | utils/config.py 21 | 22 | # Bazel 23 | bazel-* 24 | MODULE.bazel.lock 25 | 26 | # Python venv 27 | .venv/ -------------------------------------------------------------------------------- /BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_python//python:defs.bzl", "py_binary", "py_test") 2 | load("@third_party//:requirements.bzl", "requirement") 3 | 4 | # TODO(https://github.com/bazelbuild/bazel-bench/issues/36): Make these work for python3. 5 | py_binary( 6 | name = "benchmark", 7 | srcs = ["benchmark.py"], 8 | deps = [ 9 | "//utils", 10 | requirement("absl-py"), 11 | requirement("GitPython"), 12 | requirement("gitdb2"), 13 | ], 14 | ) 15 | 16 | py_test( 17 | name = "benchmark_test", 18 | srcs = ["benchmark_test.py"], 19 | deps = [ 20 | ":benchmark", 21 | "//testutils", 22 | requirement("mock"), 23 | ], 24 | ) 25 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Community Guidelines 26 | 27 | This project follows [Google's Open Source Community 28 | Guidelines](https://opensource.google.com/conduct/). 29 | -------------------------------------------------------------------------------- /ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | **Description of the problem / feature request:** 2 | 3 | > Replace this line with your answer. 4 | 5 | **Feature requests: what underlying problem are you trying to solve with this feature?** 6 | 7 | > Replace this line with your answer. 8 | 9 | **Bugs: what's the simplest, easiest way to reproduce this bug? Please provide a minimal example if possible.** 10 | 11 | > Replace this line with your answer. 12 | 13 | **Any other information, logs, or outputs that you want to share?** 14 | 15 | > Replace these lines with your answer. 16 | > 17 | > If the files are large, upload as attachment or provide link. 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /MODULE.bazel: -------------------------------------------------------------------------------- 1 | module( 2 | name = "bazel-bench", 3 | version = "0.0.0", 4 | ) 5 | 6 | bazel_dep( 7 | name = "rules_python", 8 | version = "0.35.0", 9 | ) 10 | 11 | # -- bazel_dep definitions -- # 12 | python = use_extension( 13 | "@rules_python//python/extensions:python.bzl", 14 | "python", 15 | dev_dependency = True, 16 | ) 17 | python.toolchain( 18 | python_version = "3.10", 19 | ) 20 | 21 | pip = use_extension( 22 | "@rules_python//python/extensions:pip.bzl", 23 | "pip", 24 | dev_dependency = True, 25 | ) 26 | pip.parse( 27 | hub_name = "third_party", 28 | python_version = "3.10", 29 | requirements_lock = "//third_party:requirements.txt", 30 | ) 31 | use_repo(pip, "third_party") 32 | -------------------------------------------------------------------------------- /PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | **What this PR does and why we need it:** 2 | 3 | > Replace this line with your answer. 4 | 5 | **New changes / Issues that this PR fixes:** 6 | 7 | > Replace this line with your answer. 8 | 9 | **Special notes for reviewer:** 10 | 11 | > Replace this line with your answer. 12 | 13 | **Does this require a change in the script's interface or the BigQuery's table structure?** 14 | 15 | > Replace this line with your answer. 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bazel Performance Benchmarking 2 | 3 | [![Build Status](https://badge.buildkite.com/1499c911d1faf665b9f6ba28d0a61e64c26a8586321b9d63a8.svg)](https://buildkite.com/bazel/bazel-bench) 4 | 5 | **Status**: WIP 6 | 7 | ![logo](bb-icon.png) 8 | 9 | # Setup 10 | 11 | Pre-requisites: `git` and `bazel`. 12 | 13 | ``` 14 | # Clone bazel-bench. 15 | $ git clone https://github.com/bazelbuild/bazel-bench.git 16 | $ cd bazel-bench 17 | ``` 18 | 19 | To do a test run, run the following command (if you're on Windows, populate 20 | `--data_directory` with an appropriate Windows-style path): 21 | 22 | ```shell 23 | $ bazel run :benchmark \ 24 | -- \ 25 | --bazel_commits=b8468a6b68a405e1a5767894426d3ea9a1a2f22f,ad503849e78b98d762f03168de5a336904280150 \ 26 | --project_source=https://github.com/bazelbuild/rules_cc.git \ 27 | --data_directory=/tmp/bazel-bench-data \ 28 | --verbose \ 29 | -- build //:all 30 | ``` 31 | 32 | The Bazel commits might be too old and no longer buildable by your local Bazel. Replace them with the more recent commits from [bazelbuild/bazel](https://github.com/bazelbuild/bazel). The above command would print a result table on the terminal and outputs a csv 33 | file to the specified `--data_directory`. 34 | 35 | ## Syntax 36 | 37 | Bazel-bench has the following syntax: 38 | 39 | ```shell 40 | $ bazel run :benchmark -- -- 41 | 42 | ``` 43 | 44 | For example, to benchmark the performance of 2 bazel commits A and B on the same 45 | command `bazel build --nobuild //:all` of `rules_cc` project, you'd do: 46 | 47 | ```shell 48 | $ bazel run :benchmark \ 49 | -- \ 50 | --bazel_commits=A,B \ 51 | --project_source=https://github.com/bazelbuild/rules_cc.git \ 52 | -- build --nobuild //:all 53 | ``` 54 | 55 | Note the double-dash `--` before the command arguments. You can pass any 56 | arguments that you would normally run on Bazel to the script. The performance of 57 | commands other than `build` can also be benchmarked e.g. `query`, ... 58 | 59 | ### Config-file Interface 60 | 61 | The flag-based approach does not support cases where the benchmarked Bazel 62 | commands differ. The most common use case for this: As a rule developer, I want 63 | to verify the effect of my flag on Bazel performance. For that, we'd need the 64 | config-file interface. The example config file would look like this: 65 | 66 | ```yaml 67 | # config.yaml 68 | global_options: 69 | project_commit: 595a730 70 | runs: 5 71 | collect_profile: false 72 | project_source: /path/to/project/repo 73 | units: 74 | - bazel_binary: /usr/bin/bazel 75 | command: --startup_option1 build --nomy_flag //:all 76 | - bazel_binary: /usr/bin/bazel 77 | command: --startup_option2 build --my_flag //:all 78 | ``` 79 | 80 | To launch the benchmark: 81 | 82 | ```shell 83 | $ bazel run :benchmark -- --benchmark_config=/absolute/path/to/config.yaml 84 | ``` 85 | 86 | The above config file would benchmark 2 "units". A unit is defined as a set of 87 | conditions that describes a scenario to be benchmarked. This setup allows 88 | maximum flexibility, as the conditions are independent between units. It's even 89 | possible to benchmark a `bazel_commit` against a pre-built `bazel_binary`. 90 | 91 | `global_options` is the list of options applied to every units. These global options are overridden by local options. 92 | 93 | For the list of currently supported flags/attributes and their default values, 94 | refer to [utils/benchmark_config.py](utils/benchmark_config.py). 95 | 96 | #### Known Limitations: 97 | 98 | - `project_source` should be a global option, as we don't support benchmarking 99 | multiple projects in 1 benchmark. Though, `project_commit` can differ between units. 100 | - Incremental benchmarks isn't available. 101 | - Commands have to be in canonical form (next section). 102 | 103 | 104 | ### Bazel Arguments Interpretation 105 | 106 | Bazel arguments are parsed manually. It 107 | is _important_ that the supplied arguments in the command line strictly follows 108 | the canonical form: 109 | 110 | ``` 111 | 112 | ``` 113 | 114 | Example of non-canonical command line arguments that could result in wrong 115 | interpretation: 116 | 117 | ``` 118 | GOOD: (correct order, options in canonical form) 119 | build --nobuild --compilation_mode=opt //:all 120 | 121 | BAD: (non-canonical options) 122 | build --nobuild -c opt //:all 123 | 124 | BAD: (wrong order) 125 | build --nobuild //:all --compilation_mode=opt 126 | ``` 127 | 128 | ## Available flags 129 | 130 | To show all the available flags: 131 | 132 | ``` 133 | $ bazel run :benchmark -- --helpshort 134 | ``` 135 | 136 | Some useful flags are: 137 | 138 | ``` 139 | --bazel_binaries: The pre-built bazel binaries to benchmark. 140 | (a comma separated list) 141 | --bazel_commits: The commits at which bazel is built. 142 | (default: 'latest') 143 | (a comma separated list) 144 | --bazel_source: Either a path to the local Bazel repo or a https url to a GitHub repository. 145 | (default: 'https://github.com/bazelbuild/bazel.git') 146 | --bazelrc: The path to a .bazelrc file. 147 | --csv_file_name: The name of the output csv, without the .csv extension 148 | --data_directory: The directory in which the csv files should be stored. 149 | --[no]prefetch_ext_deps: Whether to do an initial run to pre-fetch external dependencies. 150 | (default: 'true') 151 | --project_commits: The commits from the git project to be benchmarked. 152 | (default: 'latest') 153 | (a comma separated list) 154 | --project_source: Either a path to the local git project to be built or a https url to a GitHub repository. 155 | --runs: The number of benchmark runs. 156 | (default: '5') 157 | (an integer) 158 | --[no]verbose: Whether to include git/Bazel stdout logs. 159 | (default: 'false') 160 | --[no]collect_profile: Whether to collect JSON profile for each run. 161 | Requires --data_directory to be set. 162 | (default: 'false') 163 | ``` 164 | 165 | ## Collecting JSON Profile 166 | 167 | [Bazel's JSON Profile](https://docs.bazel.build/versions/master/skylark/performance.html#json-profile) 168 | is a useful tool to investigate the performance of Bazel. You can configure 169 | `bazel-bench` to export these JSON profiles on runs using the 170 | `--collect_profile` flag. 171 | 172 | ### JSON Profile Aggregation 173 | 174 | For each pair of `project_commit` and `bazel_commit`, we produce a couple JSON 175 | profiles, based on the number of runs. To have a better overview of the 176 | performance of each phase and events, we can aggregate these profiles and 177 | produce the median duration of each event across them. 178 | 179 | To run the tool: 180 | 181 | ``` 182 | bazel run utils:json_profiles_merger \ 183 | -- \ 184 | --bazel_source= \ 185 | --project_source= \ 186 | --project_commit= \ 187 | --output_path=/tmp/outfile.csv \ 188 | -- /tmp/my_json_profiles_*.profile 189 | ``` 190 | 191 | You can pass the pattern that selects the input profiles into the positional 192 | argument of the script, like in the above example 193 | (`/tmp/my_json_profiles_*.profile`). 194 | 195 | ## Output Directory Layout 196 | 197 | By default, bazel-bench will store the measurement results and other required 198 | files (project clones, built binaries, ...) under the `~/.bazel-bench` 199 | directory. 200 | 201 | The layout is: 202 | 203 | ``` 204 | ~/.bazel-bench/ <= The root of bazel-bench's output dir. 205 | bazel/ <= Where bazel's repository is cloned. 206 | bazel-bin/ <= Where the built bazel binaries are stored. 207 | fba9a2c87ee9589d72889caf082f1029/ <= The bazel commit hash. 208 | bazel <= The actual bazel binary. 209 | project-clones/ <= Where the projects' repositories are cloned. 210 | 7ffd56a6e4cb724ea575aba15733d113/ <= Each project is stored under a project hash, 211 | computed from its source. 212 | out/ <= This is the default output root. But 213 | the output root can also be set via --data_directory. 214 | ``` 215 | 216 | To clear the caches, simply `rm -rf` where necessary. 217 | 218 | ## Uploading to BigQuery & Storage 219 | 220 | As an important part of our bazel-bench daily pipeline, we upload the csv output 221 | files to BigQuery and Storage, using separate targets. 222 | 223 | To upload the output to BigQuery & Storage you'll need the GCP credentials and 224 | the table details. Please contact leba@google.com. 225 | 226 | BigQuery: 227 | 228 | ``` 229 | bazel run utils:bigquery_upload \ 230 | -- \ 231 | --upload_to_bigquery=::: \ 232 | -- \ 233 | ... 234 | ``` 235 | 236 | Storage: 237 | 238 | ``` 239 | bazel run utils:storage_upload \ 240 | -- \ 241 | --upload_to_storage=:: \ 242 | -- \ 243 | ... 244 | ``` 245 | 246 | ## Performance Report 247 | 248 | We generate a performance report with BazelCI. The generator script can be found 249 | under the `/report` directory. 250 | 251 | Example Usage: `$ python3 report/generate_report.py --date=2019-01-01 252 | --project=dummy --storage_bucket=dummy_bucket` 253 | 254 | For more detailed usage information, run: `$ python3 report/generate_report.py 255 | --help` 256 | 257 | ## Tests 258 | 259 | The tests for each module are found in the same directory. To run the test, 260 | simply: 261 | 262 | ``` 263 | $ bazel test ... 264 | ``` 265 | -------------------------------------------------------------------------------- /WORKSPACE: -------------------------------------------------------------------------------- 1 | load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") 2 | 3 | http_archive( 4 | name = "rules_python", 5 | sha256 = "e85ae30de33625a63eca7fc40a94fea845e641888e52f32b6beea91e8b1b2793", 6 | strip_prefix = "rules_python-0.27.1", 7 | url = "https://github.com/bazelbuild/rules_python/releases/download/0.27.1/rules_python-0.27.1.tar.gz", 8 | ) 9 | 10 | load("@rules_python//python:repositories.bzl", "py_repositories", "python_register_toolchains") 11 | 12 | py_repositories() 13 | 14 | load("@rules_python//python:pip.bzl", "pip_parse") 15 | 16 | # Use a hermetic Python interpreter so that builds are reproducible 17 | # irrespective of the Python version available on the host machine. 18 | python_register_toolchains( 19 | name = "python3_10", 20 | python_version = "3.10", 21 | ) 22 | 23 | load("@python3_10//:defs.bzl", "interpreter") 24 | 25 | # Translate requirements.txt into a @third_party external repository. 26 | pip_parse( 27 | name = "third_party", 28 | python_interpreter_target = interpreter, 29 | requirements_lock = "//third_party:requirements.txt", 30 | ) 31 | 32 | load("@third_party//:requirements.bzl", "install_deps") 33 | 34 | # 35 | install_deps() 36 | -------------------------------------------------------------------------------- /WORKSPACE.bzlmod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bazelbuild/bazel-bench/f0c8f585ad4733f184222be59c4401f9371991a6/WORKSPACE.bzlmod -------------------------------------------------------------------------------- /bb-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bazelbuild/bazel-bench/f0c8f585ad4733f184222be59c4401f9371991a6/bb-icon.png -------------------------------------------------------------------------------- /benchmark.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The Bazel Authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http:#www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import csv 15 | import collections 16 | import datetime 17 | import os 18 | import subprocess 19 | import sys 20 | import hashlib 21 | import re 22 | import shutil 23 | import collections 24 | import tempfile 25 | import git 26 | import utils.logger as logger 27 | import utils.json_profiles_merger_lib as json_profiles_merger_lib 28 | import utils.output_handling as output_handling 29 | 30 | from absl import app 31 | from absl import flags 32 | 33 | from utils.values import Values 34 | from utils.bazel import Bazel 35 | from utils.benchmark_config import BenchmarkConfig 36 | 37 | # BB_ROOT has different values, depending on the platform. 38 | BB_ROOT = os.path.join(os.path.expanduser('~'), '.bazel-bench') 39 | 40 | # The path to the directory that stores Bazel clones. 41 | BAZEL_CLONE_BASE_PATH = os.path.join(BB_ROOT, 'bazel-clones') 42 | # The path to the directory that stores project clones. 43 | PROJECT_CLONE_BASE_PATH = os.path.join(BB_ROOT, 'project-clones') 44 | BAZEL_GITHUB_URL = 'https://github.com/bazelbuild/bazel.git' 45 | # The path to the directory that stores the bazel binaries. 46 | BAZEL_BINARY_BASE_PATH = os.path.join(BB_ROOT, 'bazel-bin') 47 | # The path to the directory that stores the output csv (If required). 48 | DEFAULT_OUT_BASE_PATH = os.path.join(BB_ROOT, 'out') 49 | # The default name of the aggr json profile. 50 | DEFAULT_AGGR_JSON_PROFILE_FILENAME = 'aggr_json_profiles.csv' 51 | 52 | 53 | def _get_clone_subdir(project_source): 54 | """Calculates a hexdigest of project_source to serve as a unique subdir name.""" 55 | return hashlib.md5(project_source.encode('utf-8')).hexdigest() 56 | 57 | 58 | def _exec_command(args, shell=False, cwd=None): 59 | logger.log('Executing: %s' % (args if shell else ' '.join(args))) 60 | 61 | return subprocess.run( 62 | args, 63 | shell=shell, 64 | cwd=cwd, 65 | check=True, 66 | stdout=sys.stdout if FLAGS.verbose else subprocess.DEVNULL, 67 | stderr=sys.stderr if FLAGS.verbose else subprocess.DEVNULL) 68 | 69 | 70 | def _get_commits_topological(commits_sha_list, 71 | repo, 72 | flag_name, 73 | fill_default=True): 74 | """Returns a list of commits, sorted by topological order. 75 | 76 | e.g. for a commit history A -> B -> C -> D, commits_sha_list = [C, B] 77 | Output: [B, C] 78 | 79 | If the input commits_sha_list is empty, fetch the latest commit on branch 80 | 'master' 81 | of the repo. 82 | 83 | Args: 84 | commits_sha_list: a list of string of commit SHA digest. Can be long or 85 | short digest. 86 | repo: the git.Repo instance of the repository. 87 | flag_name: the flag that is supposed to specify commits_list. 88 | fill_default: whether to fill in a default latest commit if none is 89 | specified. 90 | 91 | Returns: 92 | A list of string of full SHA digests, sorted by topological commit order. 93 | """ 94 | if commits_sha_list: 95 | long_commits_sha_set = set( 96 | map(lambda x: _to_long_sha_digest(x, repo), commits_sha_list)) 97 | sorted_commit_list = [] 98 | for c in reversed(list(repo.iter_commits())): 99 | if c.hexsha in long_commits_sha_set: 100 | sorted_commit_list.append(c.hexsha) 101 | 102 | if len(sorted_commit_list) != len(long_commits_sha_set): 103 | raise ValueError( 104 | "The following commits weren't found in the repo in branch master: %s." 105 | % (long_commits_sha_set - set(sorted_commit_list))) 106 | return sorted_commit_list 107 | 108 | elif not fill_default: 109 | # If we have some binary paths specified, we don't need to fill in a default 110 | # commit. 111 | return [] 112 | 113 | # If no commit specified: take the repo's latest commit. 114 | latest_commit_sha = repo.commit().hexsha 115 | logger.log('No %s specified, using the latest one: %s' % 116 | (flag_name, latest_commit_sha)) 117 | return [latest_commit_sha] 118 | 119 | 120 | def _to_long_sha_digest(digest, repo): 121 | """Returns the full 40-char SHA digest of a commit.""" 122 | return repo.git.rev_parse(digest) if len(digest) < 40 else digest 123 | 124 | 125 | def _setup_project_repo(repo_path, project_source): 126 | """Returns a path to the cloned repository. 127 | 128 | If the repo_path exists, perform a `git fetch` to update the content. 129 | Else, clone the project to repo_path. 130 | 131 | Args: 132 | repo_path: the path to clone the repository to. 133 | project_source: the source to clone the repository from. Could be a local 134 | path or an URL. 135 | 136 | Returns: 137 | A git.Repo object of the cloned repository. 138 | """ 139 | if os.path.exists(repo_path): 140 | logger.log('Path %s exists. Updating...' % repo_path) 141 | repo = git.Repo(repo_path) 142 | repo.git.fetch('origin') 143 | else: 144 | logger.log('Cloning %s to %s...' % (project_source, repo_path)) 145 | repo = git.Repo.clone_from(project_source, repo_path) 146 | 147 | return repo 148 | 149 | 150 | def _build_bazel_binary(commit, repo, outroot, platform=None): 151 | """Builds bazel at the specified commit and copy the output binary to outroot. 152 | 153 | If the binary for this commit already exists at the destination path, simply 154 | return the path without re-building. 155 | 156 | Args: 157 | commit: the Bazel commit SHA. 158 | repo: the git.Repo instance of the Bazel clone. 159 | outroot: the directory inwhich the resulting binary is copied to. 160 | platform: the platform on which to build this binary. 161 | 162 | Returns: 163 | The path to the resulting binary (copied to outroot). 164 | """ 165 | outroot_for_commit = '%s/%s/%s' % ( 166 | outroot, platform, commit) if platform else '%s/%s' % (outroot, commit) 167 | destination = '%s/bazel' % outroot_for_commit 168 | if os.path.exists(destination): 169 | logger.log('Binary exists at %s, reusing...' % destination) 170 | return destination 171 | 172 | logger.log('Building Bazel binary at commit %s' % commit) 173 | repo.git.checkout('-f', commit) 174 | 175 | _exec_command(['bazel', 'build', '//src:bazel'], cwd=repo.working_dir) 176 | 177 | # Copy to another location 178 | binary_out = '%s/bazel-bin/src/bazel' % repo.working_dir 179 | 180 | if not os.path.exists(outroot_for_commit): 181 | os.makedirs(outroot_for_commit) 182 | logger.log('Copying bazel binary to %s' % destination) 183 | shutil.copyfile(binary_out, destination) 184 | _exec_command(['chmod', '+x', destination]) 185 | 186 | return destination 187 | 188 | 189 | def _construct_json_profile_flags(out_file_path): 190 | """Constructs the flags used to collect JSON profiles. 191 | 192 | Args: 193 | out_file_path: The path to output the profile to. 194 | 195 | Returns: 196 | A list of string representing the flags. 197 | """ 198 | return [ 199 | '--experimental_generate_json_trace_profile', 200 | '--profile={}'.format(out_file_path) 201 | ] 202 | 203 | 204 | def json_profile_filename(data_directory, bazel_bench_uid, bazel_commit, 205 | unit_num, project_commit, run_number, total_runs): 206 | return (f'{data_directory}/{bazel_bench_uid}_{bazel_commit}_{unit_num}' 207 | + f'_{project_commit}_{run_number}_of_{total_runs}.profile.gz') 208 | 209 | 210 | def _single_run(bazel_bin_path, 211 | command, 212 | options, 213 | targets, 214 | startup_options): 215 | """Runs the benchmarking for a combination of (bazel version, project version). 216 | 217 | Args: 218 | bazel_bin_path: the path to the bazel binary to be run. 219 | command: the command to be run with Bazel. 220 | options: the list of options. 221 | targets: the list of targets. 222 | startup_options: the list of target options. 223 | 224 | Returns: 225 | A result object: 226 | { 227 | 'wall': 1.000, 228 | 'cpu': 1.000, 229 | 'system': 1.000, 230 | 'memory': 1.000, 231 | 'exit_status': 0, 232 | 'started_at': datetime.datetime(2019, 1, 1, 0, 0, 0, 000000), 233 | } 234 | """ 235 | bazel = Bazel(bazel_bin_path, startup_options) 236 | 237 | default_arguments = collections.defaultdict(list) 238 | 239 | # Prepend some default options if the command is 'build'. 240 | # The order in which the options appear matters. 241 | if command == 'build': 242 | options = options + ['--nostamp', '--noshow_progress', '--color=no'] 243 | measurements = bazel.command(command, args=options + targets) 244 | 245 | if measurements != None: 246 | logger.log('Results of this run: wall: ' + 247 | '%.3fs, cpu %.3fs, system %.3fs, memory %.3fMB, exit_status: %d' % ( 248 | measurements['wall'], 249 | measurements['cpu'], 250 | measurements['system'], 251 | measurements['memory'], 252 | measurements['exit_status'])) 253 | 254 | if FLAGS.clean: 255 | bazel.command('clean', ['--color=no']) 256 | 257 | if FLAGS.shutdown: 258 | bazel.command('shutdown') 259 | 260 | return measurements 261 | 262 | 263 | def _run_benchmark(bazel_bin_path, 264 | project_path, 265 | runs, 266 | command, 267 | options, 268 | targets, 269 | startup_options, 270 | prefetch_ext_deps, 271 | bazel_bench_uid, 272 | unit_num, 273 | data_directory=None, 274 | collect_profile=False, 275 | bazel_identifier=None, 276 | project_commit=None): 277 | """Runs the benchmarking for a combination of (bazel version, project version). 278 | 279 | Args: 280 | bazel_bin_path: the path to the bazel binary to be run. 281 | project_path: the path to the project clone to be built. 282 | runs: the number of runs. 283 | bazel_args: the unparsed list of arguments to be passed to Bazel binary. 284 | prefetch_ext_deps: whether to do a first non-benchmarked run to fetch the 285 | external dependencies. 286 | bazel_bench_uid: a unique string identifier of this entire bazel-bench run. 287 | unit_num: the numerical order of the current unit being benchmarked. 288 | collect_profile: whether to collect JSON profile for each run. 289 | data_directory: the path to the directory to store run data. Required if 290 | collect_profile. 291 | bazel_identifier: the commit hash of the bazel commit. Required if 292 | collect_profile. 293 | project_commit: the commit hash of the project commit. Required if 294 | collect_profile. 295 | 296 | Returns: 297 | A list of result objects from each _single_run. 298 | """ 299 | collected = [] 300 | os.chdir(project_path) 301 | 302 | logger.log('=== BENCHMARKING BAZEL [Unit #%d]: %s, PROJECT: %s ===' % 303 | (unit_num, bazel_identifier, project_commit)) 304 | # Runs the command once to make sure external dependencies are fetched. 305 | if prefetch_ext_deps: 306 | logger.log('Pre-fetching external dependencies...') 307 | _single_run(bazel_bin_path, command, options, targets, startup_options) 308 | 309 | if collect_profile: 310 | if not os.path.exists(data_directory): 311 | os.makedirs(data_directory) 312 | 313 | for i in range(1, runs + 1): 314 | logger.log('Starting benchmark run %s/%s:' % (i, runs)) 315 | 316 | maybe_include_json_profile_flags = options[:] 317 | if collect_profile: 318 | assert bazel_identifier, ('bazel_identifier is required when ' 319 | 'collect_profile') 320 | assert project_commit, ('project_commit is required when ' 321 | 'collect_profile') 322 | maybe_include_json_profile_flags += _construct_json_profile_flags( 323 | json_profile_filename( 324 | data_directory=data_directory, 325 | bazel_bench_uid=bazel_bench_uid, 326 | bazel_commit=bazel_identifier.replace('/', '_'), 327 | unit_num=unit_num, 328 | project_commit=project_commit, 329 | run_number=i, 330 | total_runs=runs, 331 | )) 332 | collected.append( 333 | _single_run(bazel_bin_path, command, maybe_include_json_profile_flags, 334 | targets, startup_options)) 335 | 336 | return collected, (command, targets, options) 337 | 338 | 339 | def handle_json_profiles_aggr(bazel_bench_uid, unit_num, bazel_commits, 340 | project_source, project_commits, runs, 341 | output_path, data_directory): 342 | """Aggregates the collected JSON profiles and writes the result to a CSV. 343 | 344 | Args: 345 | bazel_bench_uid: a unique string identifier of this entire bazel-bench run. 346 | unit_num: the numerical order of the current unit being benchmarked. 347 | bazel_commits: the Bazel commits that bazel-bench ran on. 348 | project_source: a path/url to a local/remote repository of the project on 349 | which benchmarking was performed. 350 | project_commits: the commits of the project when benchmarking was done. 351 | runs: the total number of runs. 352 | output_path: the path to the output csv file. 353 | data_directory: the directory that stores output files. 354 | """ 355 | output_dir = os.path.dirname(output_path) 356 | if not os.path.exists(output_dir): 357 | os.makedirs(output_dir) 358 | 359 | with open(output_path, 'w') as f: 360 | csv_writer = csv.writer(f) 361 | csv_writer.writerow([ 362 | 'bazel_source', 'project_source', 'project_commit', 'cat', 'name', 'dur' 363 | ]) 364 | 365 | for bazel_commit in bazel_commits: 366 | for project_commit in project_commits: 367 | profiles_filenames = [ 368 | json_profile_filename( 369 | data_directory=data_directory, 370 | bazel_bench_uid=bazel_bench_uid, 371 | bazel_commit=bazel_commit, 372 | unit_num=unit_num, 373 | project_commit=project_commit, 374 | run_number=i, 375 | total_runs=runs, 376 | ) 377 | for i in range(1, runs + 1) 378 | ] 379 | event_list = json_profiles_merger_lib.aggregate_data( 380 | profiles_filenames, only_phases=True) 381 | for event in event_list: 382 | csv_writer.writerow([ 383 | bazel_commit, project_source, project_commit, event['cat'], 384 | event['name'], event['median'] 385 | ]) 386 | logger.log('Finished writing aggregate_json_profiles to %s' % output_path) 387 | 388 | 389 | def create_summary(data, project_source): 390 | """Creates the runs summary onto stdout. 391 | 392 | Excludes runs with non-zero exit codes from the final summary table. 393 | """ 394 | unit = { 395 | 'wall': 's ', 396 | 'cpu': 's ', 397 | 'system': 's ', 398 | 'memory': 'MB' 399 | } 400 | summary_builder = [] 401 | summary_builder.append('\nRESULTS:') 402 | last_collected = None 403 | for (i, bazel_commit, project_commit), collected in data.items(): 404 | header = ('[Unit #%d] Bazel version: %s, Project commit: %s, Project source: %s' % 405 | (i, bazel_commit, project_commit, project_source)) 406 | summary_builder.append(header) 407 | 408 | summary_builder.append( 409 | '%s %s %s %s %s' % 410 | ('metric'.rjust(8), 'mean'.center(20), 'median'.center(20), 411 | 'stddev'.center(10), 'pval'.center(10))) 412 | 413 | num_runs = len(collected['wall'].items()) 414 | # A map from run number to exit code, for runs with non-zero exit codes. 415 | non_zero_runs = {} 416 | for i, exit_code in enumerate(collected['exit_status'].items()): 417 | if exit_code != 0: 418 | non_zero_runs[i] = exit_code 419 | for metric, values in collected.items(): 420 | if metric in ['exit_status', 'started_at']: 421 | continue 422 | 423 | values_exclude_failures = values.exclude_from_indexes( 424 | non_zero_runs.keys()) 425 | # Skip if there's no value available after excluding failed runs. 426 | if not values_exclude_failures.items(): 427 | continue 428 | 429 | if last_collected: 430 | base = last_collected[metric] 431 | pval = '% 7.5f' % values_exclude_failures.pval(base.values()) 432 | mean_diff = '(% +6.2f%%)' % ( 433 | 100. * (values_exclude_failures.mean() - base.mean()) / base.mean()) 434 | median_diff = '(% +6.2f%%)' % ( 435 | 100. * 436 | (values_exclude_failures.median() - base.median()) / base.median()) 437 | else: 438 | pval = '' 439 | mean_diff = median_diff = ' ' 440 | summary_builder.append( 441 | '%s: %s %s %s %s' % 442 | (metric.rjust(8), 443 | ('% 8.3f%s %s' % 444 | (values_exclude_failures.mean(), unit[metric], mean_diff)).center(20), 445 | ('% 8.3f%s %s' % 446 | (values_exclude_failures.median(), unit[metric], median_diff)).center(20), 447 | ('% 7.3f%s' % (values_exclude_failures.stddev(), unit[metric])).center(10), 448 | pval.center(10))) 449 | last_collected = collected 450 | if non_zero_runs: 451 | summary_builder.append( 452 | ('The following runs contain non-zero exit code(s):\n %s\n' 453 | 'Please check the full log for more details. These runs are ' 454 | 'excluded from the above result table.' % 455 | '\n '.join('- run: %s/%s, exit_code: %s' % (k + 1, num_runs, v) 456 | for k, v in non_zero_runs.items()))) 457 | summary_builder.append('') 458 | 459 | return '\n'.join(summary_builder) 460 | 461 | 462 | FLAGS = flags.FLAGS 463 | # Flags for the bazel binaries. 464 | flags.DEFINE_list('bazel_commits', None, 'The commits at which bazel is built.') 465 | flags.DEFINE_list('bazel_binaries', None, 466 | 'The pre-built bazel binaries to benchmark.') 467 | flags.DEFINE_string('bazel_source', 468 | 'https://github.com/bazelbuild/bazel.git', 469 | 'Either a path to the local Bazel repo or a https url to ' \ 470 | 'a GitHub repository.') 471 | flags.DEFINE_string( 472 | 'bazel_bin_dir', None, 473 | 'The directory to store the bazel binaries from each commit.') 474 | 475 | # Flags for the project to be built. 476 | flags.DEFINE_string( 477 | 'project_label', None, 478 | 'The label of the project. Only relevant in the daily performance report.') 479 | flags.DEFINE_string('project_source', None, 480 | 'Either a path to the local git project to be built or ' \ 481 | 'a https url to a GitHub repository.') 482 | flags.DEFINE_list('project_commits', None, 483 | 'The commits from the git project to be benchmarked.') 484 | flags.DEFINE_string( 485 | 'env_configure', None, 486 | "The shell commands to configure the project's environment.") 487 | 488 | # Execution options. 489 | flags.DEFINE_integer('runs', 5, 'The number of benchmark runs.') 490 | flags.DEFINE_string('bazelrc', None, 'The path to a .bazelrc file.') 491 | flags.DEFINE_string('platform', None, 492 | ('The platform on which bazel-bench is run. This is just ' 493 | 'to categorize data and has no impact on the actual ' 494 | 'script execution.')) 495 | flags.DEFINE_boolean('clean', True, 'Whether to invoke clean between runs/builds.') 496 | flags.DEFINE_boolean('shutdown', True, 'Whether to invoke shutdown between runs/builds.') 497 | 498 | # Miscellaneous flags. 499 | flags.DEFINE_boolean('verbose', False, 500 | 'Whether to include git/Bazel stdout logs.') 501 | flags.DEFINE_boolean('prefetch_ext_deps', True, 502 | 'Whether to do an initial run to pre-fetch external ' \ 503 | 'dependencies.') 504 | flags.DEFINE_boolean('collect_profile', False, 505 | 'Whether to collect JSON profile for each run. Requires ' \ 506 | '--data_directory to be set.') 507 | flags.DEFINE_boolean('aggregate_json_profiles', False, 508 | 'Whether to aggregate the collected JSON profiles. Requires '\ 509 | '--collect_profile to be set.') 510 | flags.DEFINE_string( 511 | 'benchmark_config', None, 512 | 'Whether to use the config-file interface to define benchmark units.') 513 | 514 | # Output storage flags. 515 | flags.DEFINE_string('data_directory', None, 516 | 'The directory in which the csv files should be stored.') 517 | # The daily report generation process on BazelCI requires the csv file name to 518 | # be determined before bazel-bench is launched, so that METADATA files are 519 | # properly filled. 520 | flags.DEFINE_string('csv_file_name', None, 521 | 'The name of the output csv, without the .csv extension.') 522 | 523 | 524 | def _flag_checks(): 525 | """Verify flags requirements.""" 526 | if (not FLAGS.benchmark_config and FLAGS.bazel_commits and 527 | FLAGS.project_commits and len(FLAGS.bazel_commits) > 1 and 528 | len(FLAGS.project_commits) > 1): 529 | raise ValueError( 530 | 'Either --bazel_commits or --project_commits should be a single element.' 531 | ) 532 | 533 | if FLAGS.aggregate_json_profiles and not FLAGS.collect_profile: 534 | raise ValueError('--aggregate_json_profiles requires ' 535 | '--collect_profile to be set.') 536 | 537 | 538 | def _get_benchmark_config_and_clone_repos(argv): 539 | """From the flags/config file, get the benchmark units. 540 | 541 | Args: 542 | argv: the command line arguments. 543 | 544 | Returns: 545 | An instance of BenchmarkConfig that contains the benchmark units. 546 | """ 547 | if FLAGS.benchmark_config: 548 | config = BenchmarkConfig.from_file(FLAGS.benchmark_config) 549 | project_source = config.get_project_source() 550 | project_clone_repo = _setup_project_repo( 551 | PROJECT_CLONE_BASE_PATH + '/' + _get_clone_subdir(project_source), 552 | project_source) 553 | bazel_source = config.get_bazel_source() 554 | bazel_clone_repo = _setup_project_repo( 555 | BAZEL_CLONE_BASE_PATH + '/' + _get_clone_subdir(bazel_source), 556 | bazel_source) 557 | 558 | return config, bazel_clone_repo, project_clone_repo 559 | 560 | # Strip off 'benchmark.py' from argv 561 | # argv would be something like: 562 | # ['benchmark.py', 'build', '--nobuild', '//:all'] 563 | bazel_args = argv[1:] 564 | 565 | # Building Bazel binaries 566 | bazel_binaries = FLAGS.bazel_binaries or [] 567 | logger.log('Preparing bazelbuild/bazel repository.') 568 | bazel_source = FLAGS.bazel_source if FLAGS.bazel_source else BAZEL_GITHUB_URL 569 | bazel_clone_repo = _setup_project_repo( 570 | PROJECT_CLONE_BASE_PATH + '/' + _get_clone_subdir(bazel_source), 571 | bazel_source) 572 | bazel_commits = _get_commits_topological( 573 | FLAGS.bazel_commits, 574 | bazel_clone_repo, 575 | 'bazel_commits', 576 | fill_default=not FLAGS.bazel_commits and not bazel_binaries) 577 | 578 | # Set up project repo 579 | logger.log('Preparing %s clone.' % FLAGS.project_source) 580 | project_clone_repo = _setup_project_repo( 581 | PROJECT_CLONE_BASE_PATH + '/' + _get_clone_subdir(FLAGS.project_source), 582 | FLAGS.project_source) 583 | 584 | project_commits = _get_commits_topological(FLAGS.project_commits, 585 | project_clone_repo, 586 | 'project_commits') 587 | 588 | config = BenchmarkConfig.from_flags( 589 | bazel_commits=bazel_commits, 590 | bazel_binaries=bazel_binaries, 591 | project_commits=project_commits, 592 | bazel_source=bazel_source, 593 | project_source=FLAGS.project_source, 594 | env_configure=FLAGS.env_configure, 595 | runs=FLAGS.runs, 596 | collect_profile=FLAGS.collect_profile, 597 | command=' '.join(bazel_args), 598 | clean=FLAGS.clean, 599 | shutdown=FLAGS.shutdown) 600 | 601 | return config, bazel_clone_repo, project_clone_repo 602 | 603 | 604 | def main(argv): 605 | _flag_checks() 606 | 607 | config, bazel_clone_repo, project_clone_repo = _get_benchmark_config_and_clone_repos( 608 | argv) 609 | 610 | # A dictionary that maps a (bazel_commit, project_commit) tuple 611 | # to its benchmarking result. 612 | data = collections.OrderedDict() 613 | csv_data = collections.OrderedDict() 614 | data_directory = FLAGS.data_directory or DEFAULT_OUT_BASE_PATH 615 | 616 | # We use the start time as a unique identifier of this bazel-bench run. 617 | bazel_bench_uid = datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S') 618 | 619 | bazel_bin_base_path = FLAGS.bazel_bin_dir or BAZEL_BINARY_BASE_PATH 620 | 621 | # Build the bazel binaries, if necessary. 622 | for unit in config.get_units(): 623 | if 'bazel_binary' in unit: 624 | unit['bazel_bin_path'] = unit['bazel_binary'] 625 | elif 'bazel_commit' in unit: 626 | bazel_bin_path = _build_bazel_binary(unit['bazel_commit'], 627 | bazel_clone_repo, 628 | bazel_bin_base_path, FLAGS.platform) 629 | unit['bazel_bin_path'] = bazel_bin_path 630 | 631 | for i, unit in enumerate(config.get_units()): 632 | bazel_identifier = unit['bazel_commit'] if 'bazel_commit' in unit else unit['bazel_binary'] 633 | project_commit = unit['project_commit'] 634 | 635 | project_clone_repo.git.checkout('-f', project_commit) 636 | if unit['env_configure'] is not None: 637 | _exec_command( 638 | unit['env_configure'], shell=True, cwd=project_clone_repo.working_dir) 639 | 640 | results, args = _run_benchmark( 641 | bazel_bin_path=unit['bazel_bin_path'], 642 | project_path=project_clone_repo.working_dir, 643 | runs=unit['runs'], 644 | command=unit['command'], 645 | options=unit['options'], 646 | targets=unit['targets'], 647 | startup_options=unit['startup_options'], 648 | prefetch_ext_deps=FLAGS.prefetch_ext_deps, 649 | bazel_bench_uid=bazel_bench_uid, 650 | unit_num=i, 651 | collect_profile=unit['collect_profile'], 652 | data_directory=data_directory, 653 | bazel_identifier=bazel_identifier, 654 | project_commit=project_commit) 655 | collected = {} 656 | for benchmarking_result in results: 657 | for metric, value in benchmarking_result.items(): 658 | if metric not in collected: 659 | collected[metric] = Values() 660 | collected[metric].add(value) 661 | 662 | data[(i, bazel_identifier, project_commit)] = collected 663 | non_measurables = { 664 | 'project_source': unit['project_source'], 665 | 'platform': FLAGS.platform, 666 | 'project_label': FLAGS.project_label 667 | } 668 | csv_data[(bazel_identifier, project_commit)] = { 669 | 'results': results, 670 | 'args': args, 671 | 'non_measurables': non_measurables 672 | } 673 | 674 | summary_text = create_summary(data, config.get_project_source()) 675 | print(summary_text) 676 | 677 | if FLAGS.data_directory: 678 | csv_file_name = FLAGS.csv_file_name or '{}.csv'.format(bazel_bench_uid) 679 | txt_file_name = csv_file_name.replace('.csv', '.txt') 680 | 681 | output_handling.export_csv(data_directory, csv_file_name, csv_data) 682 | output_handling.export_file(data_directory, txt_file_name, summary_text) 683 | 684 | # This is mostly for the nightly benchmark. 685 | if FLAGS.aggregate_json_profiles: 686 | aggr_json_profiles_csv_path = ( 687 | '%s/%s' % (FLAGS.data_directory, DEFAULT_AGGR_JSON_PROFILE_FILENAME)) 688 | handle_json_profiles_aggr( 689 | bazel_bench_uid=bazel_bench_uid, 690 | unit_num=i, 691 | bazel_commits=config.get_bazel_commits(), 692 | project_source=config.get_project_source(), 693 | project_commits=config.get_project_commits(), 694 | runs=FLAGS.runs, 695 | output_path=aggr_json_profiles_csv_path, 696 | data_directory=FLAGS.data_directory, 697 | ) 698 | 699 | logger.log('Done.') 700 | 701 | 702 | if __name__ == '__main__': 703 | app.run(main) 704 | -------------------------------------------------------------------------------- /benchmark_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The Bazel Authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http:#www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Tests for the main benchmarking script.""" 15 | import mock 16 | import sys 17 | import benchmark 18 | import six 19 | 20 | from absl.testing import absltest 21 | from absl.testing import flagsaver 22 | from absl import flags 23 | from testutils.fakes import fake_log, fake_exec_command, FakeBazel 24 | 25 | # Setup custom fakes/mocks. 26 | benchmark.logger.log = fake_log 27 | benchmark._exec_command = fake_exec_command 28 | benchmark.Bazel = FakeBazel 29 | mock_stdio_type = six.StringIO 30 | 31 | 32 | class BenchmarkFunctionTests(absltest.TestCase): 33 | 34 | @mock.patch.object(benchmark.os.path, 'exists', return_value=True) 35 | @mock.patch.object(benchmark.os, 'chdir') 36 | def test_setup_project_repo_exists(self, unused_chdir_mock, 37 | unused_exists_mock): 38 | with mock.patch.object(sys, 'stderr', new=mock_stdio_type()) as mock_stderr, \ 39 | mock.patch('benchmark.git.Repo') as mock_repo_class: 40 | mock_repo = mock_repo_class.return_value 41 | benchmark._setup_project_repo('repo_path', 'project_source') 42 | 43 | mock_repo.git.fetch.assert_called_once_with('origin') 44 | self.assertEqual('Path repo_path exists. Updating...', 45 | mock_stderr.getvalue()) 46 | 47 | @mock.patch.object(benchmark.os.path, 'exists', return_value=False) 48 | @mock.patch.object(benchmark.os, 'chdir') 49 | def test_setup_project_repo_not_exists(self, unused_chdir_mock, 50 | unused_exists_mock): 51 | with mock.patch.object(sys, 'stderr', new=mock_stdio_type()) as mock_stderr, \ 52 | mock.patch('benchmark.git.Repo') as mock_repo_class: 53 | benchmark._setup_project_repo('repo_path', 'project_source') 54 | 55 | mock_repo_class.clone_from.assert_called_once_with('project_source', 56 | 'repo_path') 57 | self.assertEqual('Cloning project_source to repo_path...', 58 | mock_stderr.getvalue()) 59 | 60 | def test_get_commits_topological(self): 61 | with mock.patch('benchmark.git.Repo') as mock_repo_class: 62 | mock_repo = mock_repo_class.return_value 63 | mock_A = mock.MagicMock() 64 | mock_A.hexsha = 'A' 65 | mock_B = mock.MagicMock() 66 | mock_B.hexsha = 'B' 67 | mock_C = mock.MagicMock() 68 | mock_C.hexsha = 'C' 69 | mock_repo.iter_commits.return_value = [mock_C, mock_B, mock_A] 70 | mock_repo.git.rev_parse.side_effect = lambda x: x 71 | result = benchmark._get_commits_topological(['B', 'A'], mock_repo, 72 | 'flag_name') 73 | 74 | self.assertEqual(['A', 'B'], result) 75 | 76 | def test_get_commits_topological_latest(self): 77 | with mock.patch.object(sys, 'stderr', new=mock_stdio_type()) as mock_stderr, \ 78 | mock.patch('benchmark.git.Repo') as mock_repo_class: 79 | mock_repo = mock_repo_class.return_value 80 | mock_commit = mock.MagicMock() 81 | mock_repo.commit.return_value = mock_commit 82 | mock_commit.hexsha = 'A' 83 | result = benchmark._get_commits_topological(None, mock_repo, 84 | 'bazel_commits') 85 | 86 | self.assertEqual(['A'], result) 87 | self.assertEqual('No bazel_commits specified, using the latest one: A', 88 | mock_stderr.getvalue()) 89 | 90 | @mock.patch.object(benchmark.os.path, 'exists', return_value=True) 91 | @mock.patch.object(benchmark.os, 'makedirs') 92 | def test_build_bazel_binary_exists(self, unused_chdir_mock, 93 | unused_exists_mock): 94 | with mock.patch.object(sys, 'stderr', new=mock_stdio_type()) as mock_stderr: 95 | benchmark._build_bazel_binary('commit', 'repo_path', 'outroot') 96 | self.assertEqual('Binary exists at outroot/commit/bazel, reusing...', 97 | mock_stderr.getvalue()) 98 | 99 | @mock.patch.object(benchmark.os.path, 'exists', return_value=False) 100 | @mock.patch.object(benchmark.os, 'makedirs') 101 | @mock.patch.object(benchmark.os, 'chdir') 102 | @mock.patch.object(benchmark.shutil, 'copyfile') 103 | def test_build_bazel_binary_not_exists(self, unused_shutil_mock, 104 | unused_chdir_mock, 105 | unused_makedirs_mock, 106 | unused_exists_mock): 107 | with mock.patch.object(sys, 'stderr', new=mock_stdio_type()) as mock_stderr, \ 108 | mock.patch('benchmark.git.Repo') as mock_repo_class: 109 | mock_repo = mock_repo_class.return_value 110 | benchmark._build_bazel_binary('commit', mock_repo, 'outroot') 111 | 112 | mock_repo.git.checkout.assert_called_once_with('-f', 'commit') 113 | self.assertEqual( 114 | ''.join([ 115 | 'Building Bazel binary at commit commit', 'bazel build //src:bazel', 116 | 'Copying bazel binary to outroot/commit/bazel', 117 | 'chmod +x outroot/commit/bazel' 118 | ]), mock_stderr.getvalue()) 119 | 120 | def test_single_run(self): 121 | with mock.patch.object(sys, 'stderr', new=mock_stdio_type()) as mock_stderr: 122 | benchmark._single_run( 123 | 'bazel_binary_path', 124 | 'build', 125 | options=[], 126 | targets=['//:all'], 127 | startup_options=[]) 128 | 129 | self.assertEqual( 130 | ''.join([ 131 | 'Executing Bazel command: bazel build --nostamp --noshow_progress --color=no //:all', 132 | 'Executing Bazel command: bazel clean --color=no', 133 | 'Executing Bazel command: bazel shutdown ' 134 | ]), mock_stderr.getvalue()) 135 | 136 | @mock.patch.object(benchmark.os, 'chdir') 137 | def test_run_benchmark_no_prefetch(self, _): 138 | with mock.patch.object(sys, 'stderr', new=mock_stdio_type()) as mock_stderr: 139 | benchmark._run_benchmark( 140 | 'bazel_binary_path', 141 | 'project_path', 142 | runs=2, 143 | bazel_bench_uid='fake_uid', 144 | command='build', 145 | options=[], 146 | targets=['//:all'], 147 | startup_options=[], 148 | prefetch_ext_deps=False, 149 | unit_num=0) 150 | 151 | self.assertEqual( 152 | ''.join([ 153 | '=== BENCHMARKING BAZEL [Unit #0]: None, PROJECT: None ===', 154 | 'Starting benchmark run 1/2:', 155 | 'Executing Bazel command: bazel build --nostamp --noshow_progress --color=no //:all', 156 | 'Executing Bazel command: bazel clean --color=no', 157 | 'Executing Bazel command: bazel shutdown ', 158 | 'Starting benchmark run 2/2:', 159 | 'Executing Bazel command: bazel build --nostamp --noshow_progress --color=no //:all', 160 | 'Executing Bazel command: bazel clean --color=no', 161 | 'Executing Bazel command: bazel shutdown ' 162 | ]), mock_stderr.getvalue()) 163 | 164 | @mock.patch.object(benchmark.os, 'chdir') 165 | def test_run_benchmark_prefetch(self, _): 166 | benchmark.DEFAULT_OUT_BASE_PATH = 'some_out_path' 167 | with mock.patch.object(sys, 'stderr', new=mock_stdio_type()) as mock_stderr: 168 | benchmark._run_benchmark( 169 | 'bazel_binary_path', 170 | 'project_path', 171 | runs=2, 172 | bazel_bench_uid='fake_uid', 173 | command='build', 174 | options=[], 175 | targets=['//:all'], 176 | startup_options=[], 177 | prefetch_ext_deps=True, 178 | unit_num=0) 179 | 180 | self.assertEqual( 181 | ''.join([ 182 | '=== BENCHMARKING BAZEL [Unit #0]: None, PROJECT: None ===', 183 | 'Pre-fetching external dependencies...', 184 | 'Executing Bazel command: bazel build --nostamp --noshow_progress --color=no //:all', 185 | 'Executing Bazel command: bazel clean --color=no', 186 | 'Executing Bazel command: bazel shutdown ', 187 | 'Starting benchmark run 1/2:', 188 | 'Executing Bazel command: bazel build --nostamp --noshow_progress --color=no //:all', 189 | 'Executing Bazel command: bazel clean --color=no', 190 | 'Executing Bazel command: bazel shutdown ', 191 | 'Starting benchmark run 2/2:', 192 | 'Executing Bazel command: bazel build --nostamp --noshow_progress --color=no //:all', 193 | 'Executing Bazel command: bazel clean --color=no', 194 | 'Executing Bazel command: bazel shutdown ' 195 | ]), mock_stderr.getvalue()) 196 | 197 | @mock.patch.object(benchmark.os, 'chdir') 198 | def test_run_benchmark_collect_profile(self, _): 199 | benchmark.DEFAULT_OUT_BASE_PATH = 'some_out_path' 200 | with mock.patch.object(sys, 'stderr', new=mock_stdio_type()) as mock_stderr: 201 | benchmark._run_benchmark( 202 | 'bazel_binary_path', 203 | 'project_path', 204 | runs=2, 205 | bazel_bench_uid='fake_uid', 206 | command='build', 207 | options=[], 208 | targets=['//:all'], 209 | startup_options=[], 210 | prefetch_ext_deps=True, 211 | collect_profile=True, 212 | data_directory='fake_dir', 213 | bazel_identifier='fake_bazel_commit', 214 | project_commit='fake_project_commit', 215 | unit_num=0) 216 | 217 | self.assertEqual( 218 | ''.join([ 219 | '=== BENCHMARKING BAZEL [Unit #0]: fake_bazel_commit, PROJECT: fake_project_commit ===', 220 | 'Pre-fetching external dependencies...', 221 | 'Executing Bazel command: bazel build --nostamp --noshow_progress --color=no //:all', 222 | 'Executing Bazel command: bazel clean --color=no', 223 | 'Executing Bazel command: bazel shutdown ', 224 | 'Starting benchmark run 1/2:', 225 | 'Executing Bazel command: bazel build --experimental_generate_json_trace_profile --profile=fake_dir/fake_uid_fake_bazel_commit_0_fake_project_commit_1_of_2.profile.gz --nostamp --noshow_progress --color=no //:all', 226 | 'Executing Bazel command: bazel clean --color=no', 227 | 'Executing Bazel command: bazel shutdown ', 228 | 'Starting benchmark run 2/2:', 229 | 'Executing Bazel command: bazel build --experimental_generate_json_trace_profile --profile=fake_dir/fake_uid_fake_bazel_commit_0_fake_project_commit_2_of_2.profile.gz --nostamp --noshow_progress --color=no //:all', 230 | 'Executing Bazel command: bazel clean --color=no', 231 | 'Executing Bazel command: bazel shutdown ' 232 | ]), mock_stderr.getvalue()) 233 | 234 | 235 | class BenchmarkFlagsTest(absltest.TestCase): 236 | 237 | @flagsaver.flagsaver 238 | def test_project_source_present(self): 239 | # This mirrors the requirement in benchmark.py 240 | flags.mark_flag_as_required('project_source') 241 | # Assert that the script fails when no project_source is specified 242 | with mock.patch.object( 243 | sys, 'stderr', new=mock_stdio_type()) as mock_stderr, self.assertRaises( 244 | SystemExit) as context: 245 | benchmark.app.run(benchmark.main) 246 | self.assertIn( 247 | ''.join([ 248 | 'FATAL Flags parsing error: flag --project_source=None: ', 249 | 'Flag --project_source must have a value other than None.' 250 | ]), mock_stderr.getvalue()) 251 | 252 | @flagsaver.flagsaver(bazel_commits=['a', 'b'], project_commits=['c', 'd']) 253 | def test_either_bazel_commits_project_commits_single_element(self): 254 | with self.assertRaises(ValueError) as context: 255 | benchmark._flag_checks() 256 | value_err = context.exception 257 | self.assertEqual( 258 | str(value_err), 259 | 'Either --bazel_commits or --project_commits should be a single element.' 260 | ) 261 | 262 | @flagsaver.flagsaver(clean=False) 263 | def test_single_run_skip_clean(self): 264 | with mock.patch.object(sys, 'stderr', new=mock_stdio_type()) as mock_stderr: 265 | benchmark._single_run( 266 | 'bazel_binary_path', 267 | 'build', 268 | options=[], 269 | targets=['//:all'], 270 | startup_options=[]) 271 | 272 | self.assertEqual( 273 | ''.join([ 274 | 'Executing Bazel command: bazel build --nostamp --noshow_progress --color=no //:all', 275 | 'Executing Bazel command: bazel shutdown ' 276 | ]), mock_stderr.getvalue()) 277 | 278 | @flagsaver.flagsaver(shutdown=False) 279 | def test_single_run_skip_shutdown(self): 280 | with mock.patch.object(sys, 'stderr', new=mock_stdio_type()) as mock_stderr: 281 | benchmark._single_run( 282 | 'bazel_binary_path', 283 | 'build', 284 | options=[], 285 | targets=['//:all'], 286 | startup_options=[]) 287 | 288 | self.assertEqual( 289 | ''.join([ 290 | 'Executing Bazel command: bazel build --nostamp --noshow_progress --color=no //:all', 291 | 'Executing Bazel command: bazel clean --color=no' 292 | ]), mock_stderr.getvalue()) 293 | 294 | 295 | if __name__ == '__main__': 296 | absltest.main() 297 | -------------------------------------------------------------------------------- /report/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_python//python:defs.bzl", "py_binary") 2 | load("@third_party//:requirements.bzl", "requirement") 3 | 4 | package(default_visibility = ["//visibility:public"]) 5 | 6 | py_binary( 7 | name = "generate_report", 8 | srcs = ["generate_report.py"], 9 | deps = [ 10 | # This is a workaround for https://github.com/bazelbuild/rules_python/issues/14, 11 | # google-cloud-bigquery must be listed first. 12 | requirement("google-cloud-bigquery"), 13 | requirement("cachetools"), 14 | requirement("google-api-core"), 15 | requirement("google-auth"), 16 | requirement("google-cloud-core"), 17 | requirement("google-resumable-media"), 18 | requirement("googleapis-common-protos"), 19 | requirement("protobuf"), 20 | requirement("pytz"), 21 | requirement("requests"), 22 | ], 23 | ) 24 | 25 | py_binary( 26 | name = "generate_master_report", 27 | srcs = ["generate_master_report.py"], 28 | deps = [ 29 | # This is a workaround for https://github.com/bazelbuild/rules_python/issues/14, 30 | # google-cloud-bigquery must be listed first. 31 | requirement("google-cloud-bigquery"), 32 | requirement("cachetools"), 33 | requirement("google-api-core"), 34 | requirement("google-auth"), 35 | requirement("google-cloud-core"), 36 | requirement("google-resumable-media"), 37 | requirement("googleapis-common-protos"), 38 | requirement("protobuf"), 39 | requirement("pytz"), 40 | requirement("requests"), 41 | ], 42 | ) 43 | -------------------------------------------------------------------------------- /report/generate_master_report.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright 2019 The Bazel Authors. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http:#www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | """Generates a daily HTML report for the projects. 17 | 18 | The steps: 19 | 1. Get the necessary data from Storage for projects/date. 20 | 2. Manipulate the data to a format suitable for graphs. 21 | 3. Generate a HTML report containing the graphs. 22 | 4. Upload the generated HTMLs to GCP Storage. 23 | """ 24 | import argparse 25 | import collections 26 | import csv 27 | import datetime 28 | import json 29 | import io 30 | import os 31 | import statistics 32 | import subprocess 33 | import sys 34 | import tempfile 35 | import urllib.request 36 | from google.cloud import bigquery 37 | 38 | TMP = tempfile.gettempdir() 39 | REPORTS_DIRECTORY = os.path.join(TMP, ".bazel_bench", "reports") 40 | PLATFORMS = ["macos", "ubuntu1804"] 41 | PROJECT_SOURCE_TO_NAME = { 42 | "https://github.com/bazelbuild/bazel.git": "bazel", 43 | "https://github.com/tensorflow/tensorflow.git": "tensorflow" 44 | } 45 | 46 | 47 | def _upload_to_storage(src_file_path, storage_bucket, destination_dir): 48 | """Uploads the file from src_file_path to the specified location on Storage.""" 49 | args = [ 50 | "gsutil", "cp", src_file_path, 51 | "gs://{}/{}".format(storage_bucket, destination_dir) 52 | ] 53 | subprocess.run(args) 54 | 55 | 56 | def _get_storage_url(storage_bucket, dated_subdir): 57 | # In this case, the storage_bucket is a Domain-named bucket. 58 | # https://cloud.google.com/storage/docs/domain-name-verification 59 | return "https://{}/{}".format(storage_bucket, dated_subdir) 60 | 61 | 62 | def _short_hash(commit): 63 | return commit[:7] 64 | 65 | 66 | def _row_component(content): 67 | return """ 68 |
{content}
69 | """.format(content=content) 70 | 71 | 72 | def _col_component(col_class, content): 73 | return """ 74 |
{content}
75 | """.format( 76 | col_class=col_class, content=content) 77 | 78 | 79 | def _historical_graph(metric, metric_label, data, platform): 80 | """Returns the HTML
component of a single graph.""" 81 | title = "[{}] Historical values of {}".format(platform, metric_label) 82 | hAxis = "Date (commmit)" 83 | vAxis = metric_label 84 | chart_id = "{}-{}-time".format(platform, metric) 85 | 86 | return """ 87 | 125 |
126 | """.format( 127 | title=title, 128 | data=data, 129 | hAxis=hAxis, 130 | vAxis=vAxis, 131 | chart_id=chart_id, 132 | metric_label=metric_label) 133 | 134 | 135 | def _full_report(date, graph_components, project_reports_components): 136 | """Returns the full HTML of a complete report, from the graph components.""" 137 | return """ 138 | 139 | 140 | 141 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 158 | 159 | [{date}] Master Report 160 | 161 | 162 |
163 |
164 |
165 |

Report for {date}

166 |

Generated with https://github.com/bazelbuild/bazel-bench at {gentime}

167 |
168 |
169 |
170 |
171 | {reports} 172 |
173 |
174 | 175 |
176 |
177 | 178 | (Date & time are in UTC.) 179 |
180 | 201 |
202 |
203 | 204 | {graphs} 205 |
206 | 207 | 208 | """.format( 209 | gentime=datetime.datetime.now(), 210 | date=date.strftime("%Y/%m/%d"), 211 | graphs=graph_components, 212 | reports=project_reports_components) 213 | 214 | 215 | def _query_bq(bq_project, bq_table, date_cutoff, platform): 216 | bq_client = bigquery.Client(project=bq_project) 217 | query = """ 218 | SELECT 219 | MIN(wall) as min_wall, 220 | APPROX_QUANTILES(wall, 101)[OFFSET(50)] AS median_wall, 221 | MAX(wall) as max_wall, 222 | MIN(memory) as min_memory, 223 | APPROX_QUANTILES(memory, 101)[OFFSET(50)] AS median_memory, 224 | MAX(memory) as max_memory, 225 | bazel_commit, 226 | DATE(MIN(started_at)) as report_date, 227 | project_label 228 | FROM ( 229 | SELECT wall, memory, started_at, t1.bazel_commit, project_label FROM `{bq_project}.{bq_table}` t1 230 | JOIN ( 231 | SELECT DISTINCT bazel_commit, started_at_date 232 | FROM ( 233 | SELECT bazel_commit, DATE(started_at) started_at_date, 234 | RANK() OVER (PARTITION BY project_commit 235 | ORDER BY started_at DESC 236 | ) AS `Rank` 237 | FROM `{bq_project}.{bq_table}` 238 | WHERE DATE(started_at) <= "{date_cutoff}" 239 | AND platform = "{platform}" 240 | AND exit_status = 0 241 | ) 242 | WHERE Rank=1 243 | ORDER BY started_at_date DESC 244 | LIMIT 10 245 | ) t2 246 | ON t1.bazel_commit = t2.bazel_commit 247 | WHERE platform = "{platform}" 248 | AND exit_status = 0 249 | ) 250 | GROUP BY bazel_commit, project_label 251 | ORDER BY report_date, project_label ASC; 252 | """.format( 253 | bq_project=bq_project, 254 | bq_table=bq_table, 255 | date_cutoff=date_cutoff, 256 | platform=platform) 257 | 258 | return bq_client.query(query) 259 | 260 | 261 | # TODO(leba): Normalize data between projects. 262 | def _prepare_time_series_data(raw_data): 263 | """Massage the data to fit a format suitable for graph generation.""" 264 | headers = ["Date"] 265 | project_to_pos = {} 266 | date_to_wall = {} 267 | date_to_mem = {} 268 | 269 | # First pass to gather the projects and form the headers. 270 | for row in raw_data: 271 | if row.project_label not in project_to_pos: 272 | project_to_pos[row.project_label] = len(project_to_pos) 273 | headers.extend( 274 | [row.project_label, { 275 | "role": "interval" 276 | }, { 277 | "role": "interval" 278 | }]) 279 | 280 | for row in raw_data: 281 | if row.report_date not in date_to_wall: 282 | # Commits on day X are benchmarked on day X + 1. 283 | date_str = "{} ({})".format( 284 | (row.report_date - datetime.timedelta(days=1)).strftime("%Y-%m-%d"), 285 | _short_hash(row.bazel_commit)) 286 | 287 | date_to_wall[row.report_date] = ["null"] * len(headers) 288 | date_to_mem[row.report_date] = ["null"] * len(headers) 289 | 290 | date_to_wall[row.report_date][0] = date_str 291 | date_to_mem[row.report_date][0] = date_str 292 | 293 | base_pos = project_to_pos[row.project_label] * 3 294 | date_to_wall[row.report_date][base_pos + 1] = row.median_wall 295 | date_to_wall[row.report_date][base_pos + 2] = row.min_wall 296 | date_to_wall[row.report_date][base_pos + 3] = row.max_wall 297 | date_to_mem[row.report_date][base_pos + 1] = row.median_memory 298 | date_to_mem[row.report_date][base_pos + 2] = row.min_memory 299 | date_to_mem[row.report_date][base_pos + 3] = row.max_memory 300 | 301 | return [headers] + list(date_to_wall.values()), [headers] + list( 302 | date_to_mem.values()), project_to_pos.keys() 303 | 304 | 305 | def _project_reports_components(date, projects): 306 | links = " - ".join([ 307 | '{project_label}' 308 | .format(date_subdir=date.strftime("%Y/%m/%d"), project_label=label) 309 | for label in projects 310 | ]) 311 | return "

Individual Project Reports: {}

".format(links) 312 | 313 | 314 | def _generate_report_for_date(date, storage_bucket, report_name, upload_report, 315 | bq_project, bq_table): 316 | """Generates a html report for the specified date & project. 317 | 318 | Args: 319 | date: the date to generate report for. 320 | storage_bucket: the Storage bucket to fetch data from/upload the report to. 321 | report_name: the name of the report on GS. 322 | upload_report: whether to upload the report to GCS. 323 | bq_project: the BigQuery project. 324 | bq_table: the BigQuery table. 325 | """ 326 | bq_date_cutoff = (date + datetime.timedelta(days=1)).strftime("%Y-%m-%d") 327 | 328 | graph_components = [] 329 | projects = set() 330 | 331 | for platform in PLATFORMS: 332 | 333 | historical_wall_data, historical_mem_data, platform_projects = _prepare_time_series_data( 334 | _query_bq(bq_project, bq_table, bq_date_cutoff, platform)) 335 | 336 | projects = projects.union(set(platform_projects)) 337 | # Generate a graph for that platform. 338 | row_content = [] 339 | 340 | row_content.append( 341 | _col_component( 342 | "col-sm-6", 343 | _historical_graph( 344 | metric="wall", 345 | metric_label="Wall Time (s)", 346 | data=historical_wall_data, 347 | platform=platform, 348 | ))) 349 | 350 | row_content.append( 351 | _col_component( 352 | "col-sm-6", 353 | _historical_graph( 354 | metric="memory", 355 | metric_label="Memory (MB)", 356 | data=historical_mem_data, 357 | platform=platform, 358 | ))) 359 | 360 | graph_components.append(_row_component("\n".join(row_content))) 361 | 362 | content = _full_report( 363 | date, 364 | graph_components="\n".join(graph_components), 365 | project_reports_components=_project_reports_components(date, projects)) 366 | 367 | if not os.path.exists(REPORTS_DIRECTORY): 368 | os.makedirs(REPORTS_DIRECTORY) 369 | 370 | report_tmp_file = "{}/report_master_{}.html".format(REPORTS_DIRECTORY, 371 | date.strftime("%Y%m%d")) 372 | with open(report_tmp_file, "w") as fo: 373 | fo.write(content) 374 | 375 | if upload_report: 376 | _upload_to_storage( 377 | report_tmp_file, storage_bucket, 378 | "all/{}/{}.html".format(date.strftime("%Y/%m/%d"), report_name)) 379 | else: 380 | print(content) 381 | 382 | 383 | def main(args=None): 384 | if args is None: 385 | args = sys.argv[1:] 386 | 387 | parser = argparse.ArgumentParser( 388 | description="Bazel Bench Daily Master Report") 389 | parser.add_argument("--date", type=str, help="Date in YYYY-mm-dd format.") 390 | parser.add_argument( 391 | "--storage_bucket", 392 | help="The GCP Storage bucket to fetch benchmark data from/upload the reports to." 393 | ) 394 | parser.add_argument( 395 | "--upload_report", 396 | type=bool, 397 | default=False, 398 | help="Whether to upload the report.") 399 | parser.add_argument( 400 | "--bigquery_table", 401 | help="The BigQuery table to fetch data from. In the format: project:table_identifier." 402 | ) 403 | parser.add_argument( 404 | "--report_name", 405 | type=str, 406 | help="The name of the generated report.", 407 | default="report") 408 | parsed_args = parser.parse_args(args) 409 | 410 | date = ( 411 | datetime.datetime.strptime(parsed_args.date, "%Y-%m-%d").date() 412 | if parsed_args.date else datetime.date.today()) 413 | 414 | bq_project, bq_table = parsed_args.bigquery_table.split(":") 415 | _generate_report_for_date(date, parsed_args.storage_bucket, 416 | parsed_args.report_name, parsed_args.upload_report, 417 | bq_project, bq_table) 418 | 419 | 420 | if __name__ == "__main__": 421 | sys.exit(main()) 422 | -------------------------------------------------------------------------------- /report/generate_report.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright 2019 The Bazel Authors. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http:#www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | """Generates a daily HTML report for the projects. 17 | 18 | The steps: 19 | 1. Get the necessary data from Storage for projects/date. 20 | 2. Manipulate the data to a format suitable for graphs. 21 | 3. Generate a HTML report containing the graphs. 22 | 4. Upload the generated HTMLs to GCP Storage. 23 | """ 24 | import argparse 25 | import collections 26 | import csv 27 | import datetime 28 | import json 29 | import io 30 | import os 31 | import statistics 32 | import subprocess 33 | import sys 34 | import tempfile 35 | import urllib.request 36 | from google.cloud import bigquery 37 | 38 | TMP = tempfile.gettempdir() 39 | REPORTS_DIRECTORY = os.path.join(TMP, ".bazel_bench", "reports") 40 | EVENTS_ORDER = [ 41 | "Launch Blaze", 42 | "Initialize command", 43 | "Load packages", 44 | "Analyze dependencies", 45 | "Analyze licenses", 46 | "Prepare for build", 47 | "Build artifacts", 48 | "Complete build", 49 | ] 50 | 51 | 52 | def _upload_to_storage(src_file_path, storage_bucket, destination_dir): 53 | """Uploads the file from src_file_path to the specified location on Storage.""" 54 | args = [ 55 | "gsutil", "cp", src_file_path, 56 | "gs://{}/{}".format(storage_bucket, destination_dir) 57 | ] 58 | subprocess.run(args) 59 | 60 | 61 | def _load_csv_from_remote_file(http_url): 62 | with urllib.request.urlopen(http_url) as resp: 63 | reader = csv.DictReader(io.TextIOWrapper(resp)) 64 | return [row for row in reader] 65 | 66 | 67 | def _load_json_from_remote_file(http_url): 68 | with urllib.request.urlopen(http_url) as resp: 69 | data = resp.read() 70 | encoding = resp.info().get_content_charset("utf-8") 71 | return json.loads(data.decode(encoding)) 72 | 73 | 74 | def _load_txt_from_remote_file(http_url): 75 | with urllib.request.urlopen(http_url) as resp: 76 | return resp.read().decode(resp.headers.get_content_charset() or "utf-8") 77 | 78 | 79 | def _get_storage_url(storage_bucket, dated_subdir): 80 | # In this case, the storage_bucket is a Domain-named bucket. 81 | # https://cloud.google.com/storage/docs/domain-name-verification 82 | return "https://{}/{}".format(storage_bucket, dated_subdir) 83 | 84 | 85 | def _get_dated_subdir_for_project(project, date): 86 | return "{}/{}".format(project, date.strftime("%Y/%m/%d")) 87 | 88 | 89 | def _get_bazel_github_a_component(commit): 90 | return '{}'.format( 91 | "https://github.com/bazelbuild/bazel/commit/" + commit, commit) 92 | 93 | 94 | def _get_file_list_from_gs(bucket_name, gs_subdir): 95 | args = ["gsutil", "ls", "gs://{}/{}".format(bucket_name, gs_subdir)] 96 | command_output = subprocess.check_output(args) 97 | # The last element is just an empty string. 98 | decoded = command_output.decode("utf-8").split("\n")[:-1] 99 | 100 | return [line.strip("'").replace("gs://", "https://") for line in decoded] 101 | 102 | 103 | def _get_file_list_component(bucket_name, dated_subdir, platform): 104 | gs_subdir = "{}/{}".format(dated_subdir, platform) 105 | links = _get_file_list_from_gs(bucket_name, gs_subdir) 106 | li_components = [ 107 | '
  • {}
  • '.format(link, os.path.basename(link)) 108 | for link in links 109 | ] 110 | return """ 111 |
    112 |
      {}
    113 |
    114 | """.format(platform, "\n".join(li_components)) 115 | 116 | 117 | def _get_proportion_breakdown(aggr_json_profile): 118 | bazel_commit_to_phases = {} 119 | for entry in aggr_json_profile: 120 | bazel_commit = entry["bazel_source"] 121 | if bazel_commit not in bazel_commit_to_phases: 122 | bazel_commit_to_phases[bazel_commit] = [] 123 | bazel_commit_to_phases[bazel_commit].append({ 124 | "name": entry["name"], 125 | "dur": entry["dur"] 126 | }) 127 | 128 | bazel_commit_to_phase_proportion = {} 129 | for bazel_commit in bazel_commit_to_phases.keys(): 130 | total_time = sum( 131 | [float(entry["dur"]) for entry in bazel_commit_to_phases[bazel_commit]]) 132 | bazel_commit_to_phase_proportion[bazel_commit] = { 133 | entry["name"]: float(entry["dur"]) / total_time 134 | for entry in bazel_commit_to_phases[bazel_commit] 135 | } 136 | 137 | return bazel_commit_to_phase_proportion 138 | 139 | 140 | def _fit_data_to_phase_proportion(reading, proportion_breakdown): 141 | result = [] 142 | for phase in EVENTS_ORDER: 143 | if phase not in proportion_breakdown: 144 | result.append(0) 145 | else: 146 | result.append(reading * proportion_breakdown[phase]) 147 | return result 148 | 149 | 150 | def _short_form(commit): 151 | return commit[:7] 152 | 153 | 154 | def _prepare_data_for_graph(performance_data, aggr_json_profile): 155 | """Massage the data to fit a format suitable for graph generation.""" 156 | bazel_commit_to_phase_proportion = _get_proportion_breakdown( 157 | aggr_json_profile) 158 | ordered_commit_to_readings = collections.OrderedDict() 159 | for entry in performance_data: 160 | # Exclude measurements from failed runs in the graphs. 161 | # TODO(leba): Print the summary table, which includes info on which runs 162 | # failed. 163 | if entry["exit_status"] != "0": 164 | continue 165 | 166 | bazel_commit = entry["bazel_commit"] 167 | if bazel_commit not in ordered_commit_to_readings: 168 | ordered_commit_to_readings[bazel_commit] = { 169 | "bazel_commit": bazel_commit, 170 | "wall_readings": [], 171 | "memory_readings": [], 172 | } 173 | ordered_commit_to_readings[bazel_commit]["wall_readings"].append( 174 | float(entry["wall"])) 175 | ordered_commit_to_readings[bazel_commit]["memory_readings"].append( 176 | float(entry["memory"])) 177 | 178 | wall_data = [ 179 | ["Bazel Commit"] + EVENTS_ORDER + 180 | ["Median [Min, Max]", { 181 | "role": "interval" 182 | }, { 183 | "role": "interval" 184 | }] 185 | ] 186 | memory_data = [[ 187 | "Bazel Commit", "Memory (MB)", { 188 | "role": "interval" 189 | }, { 190 | "role": "interval" 191 | } 192 | ]] 193 | 194 | for obj in ordered_commit_to_readings.values(): 195 | commit = _short_form(obj["bazel_commit"]) 196 | 197 | median_wall = statistics.median(obj["wall_readings"]) 198 | min_wall = min(obj["wall_readings"]) 199 | max_wall = max(obj["wall_readings"]) 200 | wall_data.append([commit] + _fit_data_to_phase_proportion( 201 | median_wall, bazel_commit_to_phase_proportion[bazel_commit]) + 202 | [median_wall, min_wall, max_wall]) 203 | 204 | median_memory = statistics.median(obj["memory_readings"]) 205 | min_memory = min(obj["memory_readings"]) 206 | max_memory = max(obj["memory_readings"]) 207 | memory_data.append([commit, median_memory, min_memory, max_memory]) 208 | 209 | return wall_data, memory_data 210 | 211 | 212 | def _uncollapse_button(element_id, text): 213 | return """ 214 | 219 | """.format( 220 | element_id=element_id, text=text) 221 | 222 | 223 | def _row_component(content): 224 | return """ 225 |
    {content}
    226 | """.format(content=content) 227 | 228 | 229 | def _col_component(col_class, content): 230 | return """ 231 |
    {content}
    232 | """.format( 233 | col_class=col_class, content=content) 234 | 235 | 236 | def _commits_component(full_list, benchmarked_list): 237 | li_components = [] 238 | for commit in full_list: 239 | if commit in benchmarked_list: 240 | li_components.append("
  • {}
  • ".format( 241 | _get_bazel_github_a_component(commit))) 242 | else: 243 | li_components.append("
  • {}
  • ".format( 244 | _get_bazel_github_a_component(commit))) 245 | return """ 246 |
    247 | Commits: 248 |
      249 | {} 250 |
    251 |
    252 |
    253 | """.format("\n".join(li_components)) 254 | 255 | 256 | def _single_graph(metric, metric_label, data, platform, median_series=None): 257 | """Returns the HTML
    component of a single graph.""" 258 | title = "[{}] Bar Chart of {} vs Bazel commits".format(platform, metric_label) 259 | hAxis = "Bazel Commits (chronological order)" 260 | vAxis = metric_label 261 | chart_id = "{}-{}".format(platform, metric) 262 | 263 | return """ 264 | 298 |
    299 | """.format( 300 | title=title, 301 | data=data, 302 | hAxis=hAxis, 303 | vAxis=vAxis, 304 | chart_id=chart_id, 305 | median_series=median_series) 306 | 307 | 308 | def _historical_graph(metric, metric_label, data, platform, color): 309 | """Returns the HTML
    component of a single graph.""" 310 | title = "[{}] Historical values of {}".format(platform, metric_label) 311 | hAxis = "Date (commmit)" 312 | vAxis = metric_label 313 | chart_id = "{}-{}-time".format(platform, metric) 314 | 315 | # Set viewWindow margins. 316 | minVal = sys.maxsize 317 | maxVal = 0 318 | for row in data[1:]: 319 | minVal = min(minVal, row[2]) 320 | maxVal = max(maxVal, row[3]) 321 | viewWindowMin = minVal * 0.95 322 | viewWindowMax = maxVal * 1.05 323 | 324 | return """ 325 | 362 |
    363 | """.format( 364 | title=title, 365 | data=data, 366 | hAxis=hAxis, 367 | vAxis=vAxis, 368 | chart_id=chart_id, 369 | viewWindowMin=viewWindowMin, 370 | viewWindowMax=viewWindowMax, 371 | color=color) 372 | 373 | 374 | def _summary_table(content, platform): 375 | """Returns the HTML
    component of the summary table.""" 376 | return """ 377 |
    {content}
    378 |
    379 | """.format( 380 | platform=platform, content=content) 381 | 382 | 383 | def _full_report(project, project_source, date, command, graph_components, 384 | raw_files_components): 385 | """Returns the full HTML of a complete report, from the graph components.""" 386 | return """ 387 | 388 | 389 | 390 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 406 | 407 | [{date}] {project} 408 | 409 | 410 |
    411 |
    412 |
    413 |

    [{project}] Report for {date}

    414 |
    415 |
    416 | 417 |
    418 |
    419 | 420 | (Date & time are in UTC.) 421 |
    422 | 443 |
    444 |
    445 | 446 |
    447 |
    448 | Command: {command} 449 |
    450 |
    451 | {graphs} 452 |

    Raw Files:

    453 | {files} 454 |
    455 | 456 | 457 | """.format( 458 | project=project, 459 | project_source=project_source, 460 | date=date.strftime("%Y/%m/%d"), 461 | command=command, 462 | graphs=graph_components, 463 | files=raw_files_components) 464 | 465 | 466 | def _query_bq(bq_project, bq_table, project_source, date_cutoff, platform): 467 | bq_client = bigquery.Client(project=bq_project) 468 | # Limit to the last 10 days. 469 | query = """ 470 | SELECT 471 | MIN(wall) as min_wall, 472 | APPROX_QUANTILES(wall, 101)[OFFSET(50)] AS median_wall, 473 | MAX(wall) as max_wall, 474 | MIN(memory) as min_memory, 475 | APPROX_QUANTILES(memory, 101)[OFFSET(50)] AS median_memory, 476 | MAX(memory) as max_memory, 477 | bazel_commit, 478 | DATE(MIN(started_at)) as report_date 479 | FROM ( 480 | SELECT 481 | wall, memory, bazel_commit, started_at 482 | FROM `{bq_project}.{bq_table}` 483 | WHERE 484 | bazel_commit IN ( 485 | SELECT 486 | bazel_commit 487 | FROM ( 488 | SELECT 489 | bazel_commit, started_at, 490 | RANK() OVER ( 491 | PARTITION BY project_commit 492 | ORDER BY started_at DESC 493 | ) AS `Rank` 494 | FROM `{bq_project}.{bq_table}` 495 | WHERE 496 | DATE(started_at) <= "{date_cutoff}" 497 | AND project_source = "{project_source}" 498 | AND exit_status = 0 499 | ) 500 | WHERE 501 | Rank=1 502 | ORDER BY started_at DESC 503 | LIMIT 10 504 | ) 505 | AND project_source = "{project_source}" 506 | AND exit_status = 0 507 | AND platform = "{platform}" 508 | ) 509 | GROUP BY bazel_commit 510 | ORDER BY report_date ASC; 511 | """.format( 512 | bq_project=bq_project, 513 | bq_table=bq_table, 514 | project_source=project_source, 515 | date_cutoff=date_cutoff, 516 | platform=platform) 517 | 518 | return bq_client.query(query) 519 | 520 | 521 | def _prepare_time_series_data(raw_data): 522 | """Massage the data to fit a format suitable for graph generation.""" 523 | wall_data = [[ 524 | "Date", "Wall Time", { 525 | "role": "interval" 526 | }, { 527 | "role": "interval" 528 | } 529 | ]] 530 | memory_data = [["Date", "Memory", {"role": "interval"}, {"role": "interval"}]] 531 | 532 | for row in raw_data: 533 | # Commits on day X are benchmarked on day X + 1. 534 | date_str = "{} ({})".format( 535 | (row.report_date - datetime.timedelta(days=1)).strftime("%Y-%m-%d"), 536 | row.bazel_commit[:7]) 537 | wall_data.append([date_str, row.median_wall, row.min_wall, row.max_wall]) 538 | memory_data.append( 539 | [date_str, row.median_memory, row.min_memory, row.max_memory]) 540 | 541 | return wall_data, memory_data 542 | 543 | 544 | def _generate_report_for_date(project, date, storage_bucket, report_name, 545 | upload_report, bq_project, bq_table): 546 | """Generates a html report for the specified date & project. 547 | 548 | Args: 549 | project: the project to generate report for. Check out bazel_bench.py. 550 | date: the date to generate report for. 551 | storage_bucket: the Storage bucket to fetch data from/upload the report to. 552 | report_name: the name of the report on GS. 553 | upload_report: whether to upload the report to GCS. 554 | bq_project: the BigQuery project. 555 | bq_table: the BigQuery table. 556 | """ 557 | dated_subdir = _get_dated_subdir_for_project(project, date) 558 | bq_date_cutoff = (date + datetime.timedelta(days=1)).strftime("%Y-%m-%d") 559 | root_storage_url = _get_storage_url(storage_bucket, dated_subdir) 560 | metadata_file_url = "{}/METADATA".format(root_storage_url) 561 | metadata = _load_json_from_remote_file(metadata_file_url) 562 | 563 | graph_components = [] 564 | raw_files_components = [] 565 | graph_components.append(_uncollapse_button("commits", "Show commits")) 566 | graph_components.append( 567 | _row_component( 568 | _col_component( 569 | "col-sm-10", 570 | _commits_component(metadata["all_commits"], 571 | metadata["benchmarked_commits"])))) 572 | 573 | for platform_measurement in sorted( 574 | metadata["platforms"], key=lambda k: k["platform"]): 575 | # Get the data 576 | performance_data = _load_csv_from_remote_file("{}/{}".format( 577 | root_storage_url, platform_measurement["perf_data"])) 578 | aggr_json_profile = _load_csv_from_remote_file("{}/{}".format( 579 | root_storage_url, platform_measurement["aggr_json_profiles"])) 580 | summary_text = _load_txt_from_remote_file("{}/{}".format( 581 | root_storage_url, 582 | platform_measurement["perf_data"].replace(".csv", ".txt"))) 583 | 584 | wall_data, memory_data = _prepare_data_for_graph(performance_data, 585 | aggr_json_profile) 586 | platform = platform_measurement["platform"] 587 | 588 | historical_wall_data, historical_mem_data = _prepare_time_series_data( 589 | _query_bq(bq_project, bq_table, metadata["project_source"], 590 | bq_date_cutoff, platform)) 591 | 592 | # Generate a graph for that platform. 593 | row_content = [] 594 | row_content.append( 595 | _col_component( 596 | "col-sm-6", 597 | _single_graph( 598 | metric="wall", 599 | metric_label="Wall Time (s)", 600 | data=wall_data, 601 | platform=platform, 602 | median_series=len(EVENTS_ORDER)))) 603 | 604 | row_content.append( 605 | _col_component( 606 | "col-sm-6", 607 | _historical_graph( 608 | metric="wall", 609 | metric_label="Wall Time (s)", 610 | data=historical_wall_data, 611 | platform=platform, 612 | color="#dd4477"))) 613 | 614 | row_content.append( 615 | _col_component( 616 | "col-sm-6", 617 | _single_graph( 618 | metric="memory", 619 | metric_label="Memory (MB)", 620 | data=memory_data, 621 | platform=platform, 622 | ))) 623 | 624 | row_content.append( 625 | _col_component( 626 | "col-sm-6", 627 | _historical_graph( 628 | metric="memory", 629 | metric_label="Memory (MB)", 630 | data=historical_mem_data, 631 | platform=platform, 632 | color="#3366cc"))) 633 | 634 | row_content.append( 635 | _col_component( 636 | "col-sm-12", 637 | _uncollapse_button("summary-{}".format(platform), 638 | "Show Summary Table"))) 639 | row_content.append( 640 | _col_component("col-sm-12", 641 | _summary_table(content=summary_text, platform=platform))) 642 | 643 | graph_components.append( 644 | _row_component( 645 | _col_component( 646 | "col-sm-5", 647 | '

    {}

    '.format(platform)))) 648 | raw_files_components.append( 649 | _uncollapse_button("raw_files_%s" % platform, 650 | "Show raw files for %s" % platform)) 651 | raw_files_components.append( 652 | _row_component( 653 | _col_component( 654 | "col-sm-10", 655 | _get_file_list_component(storage_bucket, dated_subdir, 656 | platform)))) 657 | graph_components.append(_row_component("\n".join(row_content))) 658 | 659 | content = _full_report( 660 | project, 661 | metadata["project_source"], 662 | date, 663 | command=metadata["command"], 664 | graph_components="\n".join(graph_components), 665 | raw_files_components="\n".join(raw_files_components)) 666 | 667 | if not os.path.exists(REPORTS_DIRECTORY): 668 | os.makedirs(REPORTS_DIRECTORY) 669 | 670 | report_tmp_file = "{}/report_{}_{}.html".format(REPORTS_DIRECTORY, project, 671 | date.strftime("%Y%m%d")) 672 | with open(report_tmp_file, "w") as fo: 673 | fo.write(content) 674 | 675 | if upload_report: 676 | _upload_to_storage(report_tmp_file, storage_bucket, 677 | dated_subdir + "/{}.html".format(report_name)) 678 | else: 679 | print(content) 680 | 681 | 682 | def main(args=None): 683 | if args is None: 684 | args = sys.argv[1:] 685 | 686 | parser = argparse.ArgumentParser(description="Bazel Bench Daily Report") 687 | parser.add_argument("--date", type=str, help="Date in YYYY-mm-dd format.") 688 | parser.add_argument( 689 | "--project", 690 | action="append", 691 | help=( 692 | "Projects to generate report for. Use the storage_subdir defined " 693 | "in the main bazel-bench script in bazelbuild/continuous-integration." 694 | ), 695 | ) 696 | parser.add_argument( 697 | "--storage_bucket", 698 | help="The GCP Storage bucket to fetch benchmark data from/upload the reports to." 699 | ) 700 | parser.add_argument( 701 | "--upload_report", 702 | type=bool, 703 | default=False, 704 | help="Whether to upload the report.") 705 | parser.add_argument( 706 | "--bigquery_table", 707 | help="The BigQuery table to fetch data from. In the format: project:table_identifier." 708 | ) 709 | parser.add_argument( 710 | "--report_name", 711 | type=str, 712 | help="The name of the generated report.", 713 | default="report") 714 | parsed_args = parser.parse_args(args) 715 | 716 | date = ( 717 | datetime.datetime.strptime(parsed_args.date, "%Y-%m-%d").date() 718 | if parsed_args.date else datetime.date.today()) 719 | 720 | bq_project, bq_table = parsed_args.bigquery_table.split(":") 721 | for project in parsed_args.project: 722 | _generate_report_for_date(project, date, parsed_args.storage_bucket, 723 | parsed_args.report_name, 724 | parsed_args.upload_report, bq_project, bq_table) 725 | 726 | 727 | if __name__ == "__main__": 728 | sys.exit(main()) 729 | -------------------------------------------------------------------------------- /testutils/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_python//python:defs.bzl", "py_library") 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | filegroup( 6 | name = "testutils-srcs", 7 | srcs = glob(["*.py"]), 8 | ) 9 | 10 | py_library( 11 | name = "testutils", 12 | srcs = [":testutils-srcs"], 13 | ) 14 | -------------------------------------------------------------------------------- /testutils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The Bazel Authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http:#www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /testutils/fakes.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The Bazel Authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http:#www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Fakes for some functions/classes.""" 15 | import sys 16 | 17 | 18 | def fake_log(text): 19 | """Fakes the log function. Prints to stderr.""" 20 | sys.stderr.write(text) 21 | 22 | 23 | def fake_exec_command(args, shell=False, fail_if_nonzero=True, cwd=None): 24 | """Fakes the _exec_command function.""" 25 | fake_log(' '.join(args)) 26 | 27 | 28 | class FakeBazel(object): 29 | """Fake class for utils.Bazel""" 30 | 31 | def __init__(self, bazel_binary_path, bazelrc): 32 | # Do nothing 33 | return 34 | 35 | def command(self, command_name, args=None, collect_memory=False): 36 | """Fake method to verify that the command is executed.""" 37 | args = args or [] 38 | fake_log('Executing Bazel command: bazel %s %s' % 39 | (command_name, ' '.join(args))) 40 | -------------------------------------------------------------------------------- /third_party/BUILD: -------------------------------------------------------------------------------- 1 | exports_files(glob(["**"])) 2 | -------------------------------------------------------------------------------- /third_party/requirements.in: -------------------------------------------------------------------------------- 1 | absl-py==2.0.0 2 | cachetools==5.3.1 3 | certifi==2023.7.22 4 | chardet==5.2.0 5 | funcsigs==1.0.2 6 | futures==3.1.1 7 | gitdb2==4.0.2 8 | GitPython==3.1.41 9 | google-api-core==2.12.0 10 | google-auth==2.23.3 11 | google-cloud-bigquery==3.12.0 12 | google-cloud-storage==2.12.0 13 | google-cloud-core==2.3.3 14 | google-resumable-media==2.6.0 15 | googleapis-common-protos==1.61.0 16 | idna==3.4 17 | mock==5.1.0 18 | numpy==1.26.1 19 | pbr==5.1.3 20 | protobuf==4.24.4 21 | psutil==5.9.6 22 | pyasn1==0.5.0 23 | pyasn1-modules==0.3.0 24 | pytz==2018.9 25 | requests==2.31.0 26 | rsa==4.9 27 | scipy==1.11.3 28 | six==1.16.0 29 | urllib3==2.2.2 30 | PyYAML==6.0.1 31 | cython==3.0.3 32 | -------------------------------------------------------------------------------- /third_party/requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.10 3 | # by the following command: 4 | # 5 | # pip-compile --output-file=requirements.txt requirements.in 6 | # 7 | absl-py==2.0.0 8 | # via -r third_party/requirements.in 9 | cachetools==5.3.1 10 | # via 11 | # -r third_party/requirements.in 12 | # google-auth 13 | certifi==2023.7.22 14 | # via 15 | # -r third_party/requirements.in 16 | # requests 17 | chardet==5.2.0 18 | # via -r third_party/requirements.in 19 | charset-normalizer==3.3.2 20 | # via requests 21 | cython==3.0.3 22 | # via -r third_party/requirements.in 23 | funcsigs==1.0.2 24 | # via -r third_party/requirements.in 25 | futures==3.1.1 26 | # via -r third_party/requirements.in 27 | gitdb==4.0.11 28 | # via 29 | # gitdb2 30 | # gitpython 31 | gitdb2==4.0.2 32 | # via -r third_party/requirements.in 33 | gitpython==3.1.41 34 | # via -r third_party/requirements.in 35 | google-api-core[grpc]==2.12.0 36 | # via 37 | # -r third_party/requirements.in 38 | # google-cloud-bigquery 39 | # google-cloud-core 40 | # google-cloud-storage 41 | google-auth==2.23.3 42 | # via 43 | # -r third_party/requirements.in 44 | # google-api-core 45 | # google-cloud-core 46 | # google-cloud-storage 47 | google-cloud-bigquery==3.12.0 48 | # via -r third_party/requirements.in 49 | google-cloud-core==2.3.3 50 | # via 51 | # -r third_party/requirements.in 52 | # google-cloud-bigquery 53 | # google-cloud-storage 54 | google-cloud-storage==2.12.0 55 | # via -r third_party/requirements.in 56 | google-crc32c==1.5.0 57 | # via 58 | # google-cloud-storage 59 | # google-resumable-media 60 | google-resumable-media==2.6.0 61 | # via 62 | # -r third_party/requirements.in 63 | # google-cloud-bigquery 64 | # google-cloud-storage 65 | googleapis-common-protos==1.61.0 66 | # via 67 | # -r third_party/requirements.in 68 | # google-api-core 69 | # grpcio-status 70 | grpcio==1.62.1 71 | # via 72 | # google-api-core 73 | # google-cloud-bigquery 74 | # grpcio-status 75 | grpcio-status==1.62.1 76 | # via google-api-core 77 | idna==3.4 78 | # via 79 | # -r third_party/requirements.in 80 | # requests 81 | mock==5.1.0 82 | # via -r third_party/requirements.in 83 | numpy==1.26.1 84 | # via 85 | # -r third_party/requirements.in 86 | # scipy 87 | packaging==24.0 88 | # via google-cloud-bigquery 89 | pbr==5.1.3 90 | # via -r third_party/requirements.in 91 | proto-plus==1.23.0 92 | # via google-cloud-bigquery 93 | protobuf==4.24.4 94 | # via 95 | # -r third_party/requirements.in 96 | # google-api-core 97 | # google-cloud-bigquery 98 | # googleapis-common-protos 99 | # grpcio-status 100 | # proto-plus 101 | psutil==5.9.6 102 | # via -r third_party/requirements.in 103 | pyasn1==0.5.0 104 | # via 105 | # -r third_party/requirements.in 106 | # pyasn1-modules 107 | # rsa 108 | pyasn1-modules==0.3.0 109 | # via 110 | # -r third_party/requirements.in 111 | # google-auth 112 | python-dateutil==2.9.0.post0 113 | # via google-cloud-bigquery 114 | pytz==2018.9 115 | # via -r third_party/requirements.in 116 | pyyaml==6.0.1 117 | # via -r third_party/requirements.in 118 | requests==2.31.0 119 | # via 120 | # -r third_party/requirements.in 121 | # google-api-core 122 | # google-cloud-bigquery 123 | # google-cloud-storage 124 | rsa==4.9 125 | # via 126 | # -r third_party/requirements.in 127 | # google-auth 128 | scipy==1.11.3 129 | # via -r third_party/requirements.in 130 | six==1.16.0 131 | # via 132 | # -r third_party/requirements.in 133 | # python-dateutil 134 | smmap==5.0.1 135 | # via gitdb 136 | urllib3==2.2.2 137 | # via 138 | # -r third_party/requirements.in 139 | # requests 140 | -------------------------------------------------------------------------------- /utils/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_python//python:defs.bzl", "py_binary", "py_library", "py_test") 2 | load("@third_party//:requirements.bzl", "requirement") 3 | 4 | package(default_visibility = ["//visibility:public"]) 5 | 6 | filegroup( 7 | name = "utils-srcs", 8 | srcs = glob( 9 | ["*.py"], 10 | exclude = [ 11 | "*_test.py", 12 | "json_profile_merger.py", 13 | "bigquery_upload.py", 14 | "storage_upload.py", 15 | ], 16 | ), 17 | ) 18 | 19 | py_library( 20 | name = "utils", 21 | srcs = [":utils-srcs"], 22 | deps = [ 23 | requirement("absl-py"), 24 | requirement("certifi"), 25 | requirement("chardet"), 26 | requirement("funcsigs"), 27 | requirement("idna"), 28 | requirement("numpy"), 29 | requirement("pbr"), 30 | requirement("psutil"), 31 | requirement("pyasn1"), 32 | requirement("pyasn1-modules"), 33 | requirement("rsa"), 34 | requirement("scipy"), 35 | requirement("six"), 36 | requirement("urllib3"), 37 | requirement("PyYAML"), 38 | ], 39 | ) 40 | 41 | py_library( 42 | name = "google-common", 43 | deps = [ 44 | requirement("cachetools"), 45 | requirement("google-api-core"), 46 | requirement("google-auth"), 47 | requirement("google-cloud-core"), 48 | requirement("google-resumable-media"), 49 | requirement("googleapis-common-protos"), 50 | requirement("protobuf"), 51 | requirement("pytz"), 52 | requirement("requests"), 53 | ], 54 | ) 55 | 56 | py_binary( 57 | name = "bigquery_upload", 58 | srcs = ["bigquery_upload.py"], 59 | deps = [ 60 | # This is a workaround for https://github.com/bazelbuild/rules_python/issues/14, 61 | # google-cloud-bigquery must be listed first. 62 | requirement("google-cloud-bigquery"), 63 | requirement("absl-py"), 64 | requirement("futures"), 65 | ":google-common", 66 | ], 67 | ) 68 | 69 | py_binary( 70 | name = "storage_upload", 71 | srcs = ["storage_upload.py"], 72 | deps = [ 73 | # This is a workaround for https://github.com/bazelbuild/rules_python/issues/14, 74 | # google-cloud-storage must be listed first. 75 | requirement("google-cloud-storage"), 76 | requirement("absl-py"), 77 | ":google-common", 78 | ], 79 | ) 80 | 81 | py_binary( 82 | name = "json_profiles_merger", 83 | srcs = ["json_profiles_merger.py"], 84 | deps = [ 85 | ":utils", 86 | requirement("absl-py"), 87 | ], 88 | ) 89 | 90 | py_test( 91 | name = "bazel_test", 92 | size = "small", 93 | srcs = ["bazel_test.py"], 94 | deps = [ 95 | ":utils", 96 | requirement("mock"), 97 | ], 98 | ) 99 | 100 | py_test( 101 | name = "values_test", 102 | size = "small", 103 | srcs = ["values_test.py"], 104 | deps = [ 105 | ":utils", 106 | requirement("mock"), 107 | ], 108 | ) 109 | 110 | py_test( 111 | name = "json_profiles_merger_lib_test", 112 | size = "small", 113 | srcs = ["json_profiles_merger_lib_test.py"], 114 | deps = [ 115 | ":utils", 116 | requirement("mock"), 117 | ], 118 | ) 119 | 120 | py_test( 121 | name = "benchmark_config_test", 122 | size = "small", 123 | srcs = ["benchmark_config_test.py"], 124 | deps = [ 125 | ":utils", 126 | ], 127 | ) 128 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The Bazel Authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http:#www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /utils/bazel.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The Bazel Authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http:#www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Handles Bazel invocations and measures their time/memory consumption.""" 15 | import subprocess 16 | import tempfile 17 | import os 18 | import time 19 | import psutil 20 | import datetime 21 | import utils.logger as logger 22 | 23 | 24 | class Bazel(object): 25 | """Class to handle Bazel invocations. 26 | 27 | Allows to measure resource consumption of each command. 28 | 29 | Attributes: 30 | bazel_binary_path: A string specifying the path to the bazel binary to be 31 | invoked. 32 | bazelrc: A string specifying the argument to the bazelrc flag. Uses 33 | /dev/null if not set explicitly. 34 | """ 35 | 36 | def __init__(self, bazel_binary_path, startup_options): 37 | self._bazel_binary_path = str(bazel_binary_path) 38 | self._startup_options = startup_options 39 | self._pid = None 40 | 41 | def command(self, command, args=None): 42 | """Invokes a command with a bazel binary. 43 | 44 | Args: 45 | command: A string specifying the bazel command to invoke. 46 | args: An optional list of strings representing additional arguments to the 47 | bazel command. 48 | 49 | Returns: 50 | A dict containing collected metrics (wall, cpu, system times and 51 | optionally memory), the exit_status of the Bazel invocation, and the 52 | start datetime (in UTC). 53 | Returns None instead if the command equals 'shutdown'. 54 | """ 55 | args = args or [] 56 | logger.log('Executing Bazel command: bazel %s %s %s' % 57 | (' '.join(self._startup_options), command, ' '.join(args))) 58 | 59 | result = dict() 60 | result['started_at'] = datetime.datetime.utcnow() 61 | 62 | before_times = self._get_times() 63 | dev_null = open(os.devnull, 'w') 64 | exit_status = 0 65 | 66 | with tempfile.NamedTemporaryFile() as tmp_stdout: 67 | try: 68 | subprocess.check_call( 69 | [self._bazel_binary_path] + self._startup_options + [command] + args, 70 | stdout=dev_null, 71 | stderr=tmp_stdout.file) 72 | except subprocess.CalledProcessError as e: 73 | exit_status = e.returncode 74 | logger.log_error('Bazel command failed with exit code %s' % e.returncode) 75 | tmp_stdout.seek(0) 76 | logger.log_error(tmp_stdout.read().decode()) 77 | 78 | 79 | if command == 'shutdown': 80 | return None 81 | after_times = self._get_times() 82 | 83 | for kind in ['wall', 'cpu', 'system']: 84 | result[kind] = after_times[kind] - before_times[kind] 85 | result['exit_status'] = exit_status 86 | 87 | # We do a number of runs here to reduce the noise in the data. 88 | result['memory'] = min([self._get_heap_size() for _ in range(5)]) 89 | 90 | return result 91 | 92 | def _get_pid(self): 93 | """Returns the pid of the server. 94 | 95 | Has the side effect of starting the server if none is running. Caches the 96 | result. 97 | """ 98 | if not self._pid: 99 | self._pid = (int)( 100 | subprocess.check_output([self._bazel_binary_path] + 101 | self._startup_options + 102 | ['info', 'server_pid'])) 103 | return self._pid 104 | 105 | def _get_times(self): 106 | """Retrieves and returns the used times.""" 107 | # TODO(twerth): Getting the pid have the side effect of starting up the 108 | # Bazel server. There are benchmarks where we don't want this, so we 109 | # probably should make it configurable. 110 | process_data = psutil.Process(pid=self._get_pid()) 111 | cpu_times = process_data.cpu_times() 112 | return { 113 | 'wall': time.time(), 114 | 'cpu': cpu_times.user, 115 | 'system': cpu_times.system, 116 | } 117 | 118 | def _get_heap_size(self): 119 | """Retrieves and returns the used heap size.""" 120 | return (int)( 121 | subprocess.check_output([self._bazel_binary_path] + 122 | self._startup_options + 123 | ['info', 'used-heap-size-after-gc'])[:-3]) 124 | -------------------------------------------------------------------------------- /utils/bazel_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The Bazel Authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http:#www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Tests for utils.bazel.""" 15 | import collections 16 | import mock 17 | import unittest 18 | import bazel 19 | 20 | 21 | class BazelTest(unittest.TestCase): 22 | 23 | @mock.patch.object(bazel.subprocess, 'check_output', return_value='123\n') 24 | def test_get_pid(self, check_output_mock): 25 | b = bazel.Bazel('foo', []) 26 | self.assertEqual(123, b._get_pid()) 27 | self.assertEqual(123, b._get_pid()) 28 | # Verify that even that we called _get_pid twice, the we didn't spawn a 29 | # subprocess twice. 30 | self.assertEqual(1, check_output_mock.call_count) 31 | 32 | @mock.patch.object(bazel.subprocess, 'check_output', return_value='280MB\n') 33 | def test_get_heap_size(self, _): 34 | b = bazel.Bazel('foo', []) 35 | self.assertEqual(280, b._get_heap_size()) 36 | 37 | @mock.patch.object(bazel.Bazel, '_get_pid', return_value=123) 38 | @mock.patch.object(bazel.time, 'time', return_value=98.76) 39 | @mock.patch.object(bazel.psutil, 'Process') 40 | def test_get_times(self, process_mock, unused_time_mock, unused_get_pid_mock): 41 | cpu_times = collections.namedtuple('cpu_times', 'user system') 42 | cpu_times_mock = process_mock.return_value 43 | cpu_times_mock.cpu_times.return_value = cpu_times(user=47.11, system=23.42) 44 | 45 | b = bazel.Bazel('foo', []) 46 | self.assertEqual({ 47 | 'wall': 98.76, 48 | 'cpu': 47.11, 49 | 'system': 23.42, 50 | }, b._get_times()) 51 | 52 | @mock.patch.object(bazel.Bazel, '_get_pid', return_value=123) 53 | @mock.patch.object(bazel.Bazel, '_get_heap_size') 54 | @mock.patch.object(bazel.Bazel, '_get_times') 55 | @mock.patch.object(bazel.subprocess, 'check_call', return_value=0) 56 | @mock.patch('datetime.datetime') 57 | def test_command(self, datetime_mock, subprocess_mock, get_times_mock, 58 | get_heap_size_mock, _): 59 | get_times_mock.side_effect = [ 60 | { 61 | 'wall': 42, 62 | 'cpu': 0.5, 63 | 'system': 12.3, 64 | }, 65 | { 66 | 'wall': 81.5, 67 | 'cpu': 27.3, 68 | 'system': 14.3, 69 | }, 70 | ] 71 | get_heap_size_mock.side_effect = [700, 666, 668, 670, 667] 72 | datetime_mock.utcnow.return_value = 'fake_date' 73 | 74 | b = bazel.Bazel('foo', []) 75 | self.assertEqual( 76 | { 77 | 'wall': 39.5, 78 | 'cpu': 26.8, 79 | 'system': 2.0, 80 | 'memory': 666, 81 | 'exit_status': 0, 82 | 'started_at': 'fake_date' 83 | }, 84 | b.command( 85 | command='build', args=['bar', 'zoo'])) 86 | subprocess_mock.assert_called_with( 87 | ['foo', 'build', 'bar', 'zoo'], stdout=mock.ANY, stderr=mock.ANY) 88 | 89 | if __name__ == '__main__': 90 | unittest.main() 91 | -------------------------------------------------------------------------------- /utils/benchmark_config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The Bazel Authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http:#www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Manages the configuration file for benchmarking. 15 | 16 | Currently supported flags/attributes: 17 | - project_commit 18 | - project_source 19 | - bazel_commit 20 | - bazel_binary 21 | - runs 22 | - collect_profile 23 | - the command (which includes startup options, command, targets, command 24 | options) 25 | 26 | Note that the pluralized options (e.g. --project_commits) have to repeated 27 | across units or as a global option in their singular form. 28 | 29 | Example of a config file: 30 | benchmark_project_commits: False 31 | global_options: 32 | project_commit: 595a730 33 | runs: 5 34 | clean: true 35 | shutdown: true 36 | collect_profile: false 37 | project_source: /path/to/project/repo 38 | units: 39 | - bazel_commit: 595a730 40 | command: info 41 | - bazel_binary: /tmp/bazel 42 | command: --host_jvm_debug build --nobuild //src:bazel 43 | - bazel_binary: /tmp/bazel 44 | command: info 45 | project_commit: 595a731 46 | 47 | The "benchmarking units" represent independent sets of conditions to be 48 | benchmarked. 49 | 50 | """ 51 | 52 | import copy 53 | import shlex 54 | import sys 55 | import yaml 56 | import sys 57 | 58 | 59 | class BenchmarkConfig(object): 60 | """Manages the configuration file for benchmarking.""" 61 | 62 | # TODO(leba): have a single source of truth for this. 63 | # TODO(leba): Consider replacing dict with collections.namedtuple. 64 | _DEFAULT_VALS = { 65 | 'runs': 5, 66 | 'collect_profile': False, 67 | 'bazel_source': 'https://github.com/bazelbuild/bazel.git', 68 | 'env_configure': None, 69 | 'clean': True, 70 | 'shutdown': True, 71 | } 72 | 73 | def __init__(self, units, benchmark_project_commits=False): 74 | """Loads the YAML config file and get the benchmarking units. 75 | 76 | Args: 77 | units: the benchmarking units. 78 | benchmark_project_commits: whether we're benchmarking project commits 79 | (instead of bazel commits). This makes a difference in how we generate 80 | our report. 81 | """ 82 | self._units = units 83 | self._benchmark_project_commits = benchmark_project_commits 84 | 85 | def get_units(self): 86 | """Returns a copy of the parsed units.""" 87 | return copy.copy(self._units) 88 | 89 | def get_bazel_commits(self): 90 | """Returns the list of specified bazel_commits.""" 91 | return [ 92 | unit['bazel_commit'] for unit in self._units if 'bazel_commit' in unit 93 | ] 94 | 95 | def get_project_commits(self): 96 | """Returns the list of specified project_commits.""" 97 | return [ 98 | unit['project_commit'] for unit in self._units if 'project_commit' in unit 99 | ] 100 | 101 | def get_project_source(self): 102 | """Returns the common project_source across the units. 103 | 104 | We don't allow multiple project_source for now. 105 | """ 106 | return None if not self._units else self._units[0]['project_source'] 107 | 108 | def get_bazel_source(self): 109 | """Returns the common bazel_source across the units. 110 | 111 | We don't allow multiple bazel_source for now. 112 | """ 113 | return None if not self._units else self._units[0]['bazel_source'] 114 | 115 | def benchmark_project_commits(self): 116 | """Returns whether we're benchmarking project commits (instead of bazel commits).""" 117 | return self._benchmark_project_commits 118 | 119 | @classmethod 120 | def from_file(cls, config_file_path): 121 | """Loads the YAML config file and constructs a BenchmarkConfig. 122 | 123 | Args: 124 | config_file_path: the path to the configuration file. 125 | 126 | Returns: 127 | The created config object. 128 | """ 129 | with open(config_file_path, 'r') as fi: 130 | return cls.from_string(fi.read()) 131 | 132 | @classmethod 133 | def from_string(cls, string): 134 | """Parses the content of a YAML config file and constructs a BenchmarkConfig. 135 | 136 | Args: 137 | string: a string in YAML file format. Usually the content of a config 138 | file. 139 | 140 | Returns: 141 | The created config object. 142 | """ 143 | config = yaml.safe_load(string) 144 | if 'units' not in config: 145 | raise ValueError('Wrong config file format. Please check the example.') 146 | 147 | benchmark_project_commits = ('benchmark_project_commits' in config and 148 | config['benchmark_project_commits']) 149 | 150 | global_options = ( 151 | config['global_options'] if 'global_options' in config else {}) 152 | 153 | parsed_units = [] 154 | for local_options in config['units']: 155 | unit = copy.copy(global_options) 156 | # Local configs would override global ones. 157 | unit.update(local_options) 158 | parsed_units.append(cls._parse_unit(unit)) 159 | return cls(parsed_units, benchmark_project_commits) 160 | 161 | @classmethod 162 | def from_flags(cls, bazel_commits, bazel_binaries, project_commits, 163 | bazel_source, project_source, env_configure, runs, 164 | collect_profile, command, clean, shutdown): 165 | """Creates the BenchmarkConfig based on specified flags. 166 | 167 | Args: 168 | bazel_commits: the bazel commits. 169 | bazel_binaries: paths to pre-built bazel binaries. 170 | project_commits: the project commits. 171 | bazel_source: Either a path to the local Bazel repo or a https url to a 172 | GitHub repository 173 | project_source: Either a path to the local git project to be built or a 174 | https url to a GitHub repository 175 | env_configure: The command to run on the project repository before building it. 176 | runs: The number of benchmark runs to perform for each combination. 177 | collect_profile: Whether to collect a JSON profile. 178 | command: the full command to benchmark, optionally with startup options 179 | prepended, e.g. "--noexobazel build --nobuild ...". 180 | clean: Whether to invoke `bazel clean` between runs. 181 | shutdown: Whether to invoke `bazel shutdown` between runs. 182 | 183 | Returns: 184 | The created config object. 185 | """ 186 | units = [] 187 | for bazel_commit in bazel_commits: 188 | for project_commit in project_commits: 189 | units.append( 190 | cls._parse_unit({ 191 | 'bazel_commit': bazel_commit, 192 | 'project_commit': project_commit, 193 | 'bazel_source': bazel_source, 194 | 'project_source': project_source, 195 | 'runs': runs, 196 | 'collect_profile': collect_profile, 197 | 'env_configure': env_configure, 198 | 'command': command, 199 | 'clean': clean, 200 | 'shutdown': shutdown, 201 | })) 202 | for bazel_binary in bazel_binaries: 203 | for project_commit in project_commits: 204 | units.append( 205 | cls._parse_unit({ 206 | 'bazel_binary': bazel_binary, 207 | 'project_commit': project_commit, 208 | 'bazel_source': bazel_source, 209 | 'project_source': project_source, 210 | 'runs': runs, 211 | 'collect_profile': collect_profile, 212 | 'env_configure': env_configure, 213 | 'command': command, 214 | 'clean': clean, 215 | 'shutdown': shutdown, 216 | })) 217 | return cls(units, benchmark_project_commits=(len(project_commits) > 1)) 218 | 219 | @classmethod 220 | def _parse_unit(cls, unit): 221 | """Performs parsing of a benchmarking unit. 222 | 223 | Also fills up default values for attributes if they're not specified. 224 | 225 | Args: 226 | unit: the benchmarking unit. 227 | 228 | Returns: 229 | A dictionary that contains various attributes of the benchmarking unit. 230 | """ 231 | parsed_unit = copy.copy(cls._DEFAULT_VALS) 232 | parsed_unit.update(unit) 233 | 234 | if 'command' not in unit or not isinstance(unit['command'], str): 235 | raise ValueError('A command has to be specified either as a global option' 236 | ' or in each individual benchmarking unit.') 237 | full_command_tokens = shlex.split(unit['command']) 238 | startup_options = [] 239 | while full_command_tokens and full_command_tokens[0].startswith('--'): 240 | startup_options.append(full_command_tokens.pop(0)) 241 | try: 242 | command = full_command_tokens.pop(0) 243 | except IndexError: 244 | raise ValueError('\'%s\' does not contain a Blaze command (e.g. build)' % 245 | unit['command']) 246 | options = [] 247 | # Next, parse the options. We identify this by tokens that start with `--`. 248 | # The exception is the token `--`, which is a valid syntax used to separate 249 | # the flags from the targets: https://bazel.build/run/build#specifying-build-targets 250 | # Example: bazel build --flag_a -- //foo -//exluded/... 251 | while full_command_tokens and full_command_tokens[0].startswith('--') and full_command_tokens[0] != '--': 252 | options.append(full_command_tokens.pop(0)) 253 | # This is a workaround for https://github.com/bazelbuild/bazel/issues/3236. 254 | if sys.platform.startswith('linux'): 255 | options.append('--sandbox_tmpfs_path=/tmp') 256 | 257 | targets = full_command_tokens 258 | 259 | # Attributes that need special handling. 260 | parsed_unit['startup_options'] = startup_options 261 | parsed_unit['command'] = command 262 | parsed_unit['options'] = options 263 | parsed_unit['targets'] = targets 264 | 265 | return parsed_unit 266 | -------------------------------------------------------------------------------- /utils/benchmark_config_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The Bazel Authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http:#www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Tests for benchmark_config.""" 15 | 16 | import benchmark_config 17 | import unittest 18 | import os 19 | import sys 20 | import tempfile 21 | 22 | 23 | def _pad_test_command_options(options): 24 | # This is a workaround for https://github.com/bazelbuild/bazel/issues/3236. 25 | if sys.platform.startswith('linux'): 26 | return options + ['--sandbox_tmpfs_path=/tmp'] 27 | return options 28 | 29 | class BenchmarkConfigTest(unittest.TestCase): 30 | 31 | 32 | def test_parsing_from_file(self): 33 | file_content = """ 34 | units: 35 | - bazel_commit: hash1 36 | project_commit: hash1 37 | command: info 38 | """ 39 | _, config_file_path = tempfile.mkstemp() 40 | with open(config_file_path, 'w') as tf: 41 | tf.write(file_content) 42 | result = benchmark_config.BenchmarkConfig.from_file(config_file_path) 43 | 44 | self.assertEqual(result._units, [{ 45 | 'bazel_commit': 'hash1', 46 | 'project_commit': 'hash1', 47 | 'bazel_source': 'https://github.com/bazelbuild/bazel.git', 48 | 'runs': 5, 49 | 'collect_profile': False, 50 | 'command': 'info', 51 | 'startup_options': [], 52 | 'options': _pad_test_command_options([]), 53 | 'targets': [], 54 | 'env_configure': None, 55 | 'clean': True, 56 | 'shutdown': True, 57 | }]) 58 | self.assertEqual(result._benchmark_project_commits, False) 59 | os.remove(config_file_path) 60 | 61 | 62 | def test_parsing_from_string(self): 63 | file_content = """ 64 | benchmark_project_commits: False 65 | global_options: 66 | project_commit: 'hash3' 67 | runs: 5 68 | units: 69 | - bazel_commit: hash1 70 | command: info 71 | - bazel_path: /tmp/bazel 72 | command: build --nobuild //abc 73 | project_commit: 'hash2' 74 | env_configure: 'some-command' 75 | - bazel_path: /tmp/bazel 76 | command: build --flag_a -- //foo -//excluded/... 77 | """ 78 | result = benchmark_config.BenchmarkConfig.from_string(file_content) 79 | 80 | self.assertEqual(result._units, [{ 81 | 'bazel_commit': 'hash1', 82 | 'project_commit': 'hash3', 83 | 'bazel_source': 'https://github.com/bazelbuild/bazel.git', 84 | 'env_configure': None, 85 | 'runs': 5, 86 | 'collect_profile': False, 87 | 'command': 'info', 88 | 'startup_options': [], 89 | 'options': _pad_test_command_options([]), 90 | 'targets': [], 91 | 'clean': True, 92 | 'shutdown': True 93 | }, { 94 | 'bazel_path': '/tmp/bazel', 95 | 'project_commit': 'hash2', 96 | 'bazel_source': 'https://github.com/bazelbuild/bazel.git', 97 | 'env_configure': 'some-command', 98 | 'runs': 5, 99 | 'collect_profile': False, 100 | 'command': 'build', 101 | 'startup_options': [], 102 | 'options': _pad_test_command_options(['--nobuild']), 103 | 'targets': ['//abc'], 104 | 'clean': True, 105 | 'shutdown': True 106 | }, { 107 | 'bazel_path': '/tmp/bazel', 108 | 'project_commit': 'hash3', 109 | 'bazel_source': 'https://github.com/bazelbuild/bazel.git', 110 | 'env_configure': None, 111 | 'runs': 5, 112 | 'collect_profile': False, 113 | 'command': 'build', 114 | 'startup_options': [], 115 | 'options': _pad_test_command_options(['--flag_a']), 116 | 'targets': ['--', '//foo', '-//excluded/...'], 117 | 'clean': True, 118 | 'shutdown': True 119 | }]) 120 | self.assertEqual(result._benchmark_project_commits, False) 121 | 122 | 123 | def test_parsing_from_flags(self): 124 | result = benchmark_config.BenchmarkConfig.from_flags( 125 | bazel_commits=['hash1'], 126 | bazel_binaries=['path/to/bazel'], 127 | project_commits=['hash3'], 128 | bazel_source='foo', 129 | project_source='foo', 130 | runs=5, 131 | env_configure='some-command', 132 | collect_profile=False, 133 | command='build --nobuild //abc', 134 | clean=False, 135 | shutdown=False, 136 | ) 137 | self.assertEqual(result._units, [{ 138 | 'bazel_commit': 'hash1', 139 | 'project_commit': 'hash3', 140 | 'bazel_source': 'foo', 141 | 'project_source': 'foo', 142 | 'runs': 5, 143 | 'env_configure': 'some-command', 144 | 'collect_profile': False, 145 | 'command': 'build', 146 | 'startup_options': [], 147 | 'options': _pad_test_command_options(['--nobuild']), 148 | 'targets': ['//abc'], 149 | 'clean': False, 150 | 'shutdown': False, 151 | }, { 152 | 'bazel_binary': 'path/to/bazel', 153 | 'project_commit': 'hash3', 154 | 'bazel_source': 'foo', 155 | 'project_source': 'foo', 156 | 'runs': 5, 157 | 'env_configure': 'some-command', 158 | 'collect_profile': False, 159 | 'command': 'build', 160 | 'startup_options': [], 161 | 'options': _pad_test_command_options(['--nobuild']), 162 | 'targets': ['//abc'], 163 | 'clean': False, 164 | 'shutdown': False, 165 | }]) 166 | self.assertEqual(result._benchmark_project_commits, False) 167 | 168 | 169 | def test_get_units(self): 170 | config = benchmark_config.BenchmarkConfig([{ 171 | 'bazel_commit': 'hash1', 172 | 'project_commit': 'hash2', 173 | 'runs': 5, 174 | 'bazelrc': None, 175 | 'collect_profile': False, 176 | 'warmup_runs': 1, 177 | 'shutdown': True, 178 | 'command': 'info', 179 | 'startup_options': [], 180 | 'options': _pad_test_command_options([]), 181 | 'targets': [] 182 | }, { 183 | 'bazel_commit': '/tmp/bazel', 184 | 'project_commit': 'hash2', 185 | 'runs': 5, 186 | 'bazelrc': None, 187 | 'collect_profile': False, 188 | 'warmup_runs': 1, 189 | 'shutdown': True, 190 | 'command': 'build', 191 | 'startup_options': [], 192 | 'options': _pad_test_command_options(['--nobuild']), 193 | 'targets': ['//abc'] 194 | }]) 195 | self.assertEqual(config.get_units() is config._units, False) 196 | self.assertEqual(config.get_units() == config._units, True) 197 | 198 | 199 | if __name__ == '__main__': 200 | unittest.main() 201 | -------------------------------------------------------------------------------- /utils/bigquery_upload.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The Bazel Authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http:#www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Handles the uploading of result CSV to BigQuery.""" 15 | import re 16 | import sys 17 | import utils.logger as logger 18 | 19 | from absl import app 20 | from absl import flags 21 | from google.cloud import bigquery 22 | 23 | 24 | def upload_to_bigquery(csv_file_path, project_id, dataset_id, table_id, 25 | location): 26 | """Uploads the csv file to BigQuery. 27 | 28 | Takes the configuration from GOOGLE_APPLICATION_CREDENTIALS. 29 | 30 | Args: 31 | csv_file_path: the path to the csv to be uploaded. 32 | project_id: the BigQuery project id. 33 | dataset_id: the BigQuery dataset id. 34 | table_id: the BigQuery table id. 35 | location: the BigQuery table's location. 36 | """ 37 | 38 | logger.log('Uploading the data to bigquery.') 39 | client = bigquery.Client(project=project_id) 40 | 41 | dataset_ref = client.dataset(dataset_id) 42 | table_ref = dataset_ref.table(table_id) 43 | 44 | job_config = bigquery.LoadJobConfig() 45 | job_config.source_format = bigquery.SourceFormat.CSV 46 | job_config.skip_leading_rows = 1 47 | job_config.autodetect = False 48 | 49 | # load table to get schema 50 | table = client.get_table(table_ref) 51 | job_config.schema = table.schema 52 | 53 | with open(str(csv_file_path), 'rb') as source_file: 54 | job = client.load_table_from_file( 55 | source_file, table_ref, location=location, job_config=job_config) 56 | 57 | try: 58 | job.result() # Waits for table load to complete. 59 | except Exception: 60 | print('Uploading failed with: %s' % str(job.errors)) 61 | sys.exit(-1) 62 | logger.log('Uploaded {} rows into {}:{}.'.format(job.output_rows, dataset_id, 63 | table_id)) 64 | 65 | 66 | FLAGS = flags.FLAGS 67 | flags.DEFINE_string('upload_to_bigquery', None, 68 | 'The details of the BigQuery table to upload ' \ 69 | 'results to: :::') 70 | 71 | 72 | def main(argv): 73 | if not re.match('^[\w-]+:[\w-]+:[\w-]+:[\w-]+$', FLAGS.upload_to_bigquery): 74 | raise ValueError('--upload_to_bigquery should follow the pattern ' 75 | ':::.') 76 | 77 | # Discard the first argument. 78 | csv_files_to_upload = argv[1:] 79 | 80 | project_id, dataset_id, table_id, location = FLAGS.upload_to_bigquery.split( 81 | ':') 82 | for filename in csv_files_to_upload: 83 | upload_to_bigquery(filename, project_id, dataset_id, table_id, location) 84 | 85 | 86 | if __name__ == '__main__': 87 | app.run(main) 88 | -------------------------------------------------------------------------------- /utils/json_profiles_merger.py: -------------------------------------------------------------------------------- 1 | r"""A simple script to aggregate JSON profiles. 2 | 3 | Collect median duration of events across these profiles. 4 | Usage: 5 | bazel run json_profiles_merger -- \ 6 | --bazel_source=/usr/bin/bazel \ 7 | --project_source=https://github.com/bazelbuild/bazel \ 8 | --project_commit=2 \ 9 | --output_path=/tmp/median_dur.csv \ 10 | --upload_data_to=project-id:dataset-id:table-id:location \ 11 | -- \ 12 | *.profile 13 | """ 14 | from absl import app 15 | from absl import flags 16 | from glob import glob 17 | 18 | import json_profiles_merger_lib as lib 19 | import output_handling 20 | 21 | FLAGS = flags.FLAGS 22 | flags.DEFINE_string('output_path', None, 'The path to the output file.') 23 | flags.mark_flag_as_required('output_path') 24 | flags.DEFINE_string( 25 | 'bazel_source', None, 26 | ('(Optional) The bazel commit or path to the bazel binary from which these' 27 | 'JSON profiles were collected.')) 28 | flags.DEFINE_string( 29 | 'project_source', None, 30 | ('(Optional) The project on which the runs that generated these JSON' 31 | 'profiles were performed.')) 32 | flags.DEFINE_string( 33 | 'project_commit', None, 34 | '(Optional) The project commit on which the Bazel runs were performed.') 35 | flags.DEFINE_string( 36 | 'upload_data_to', None, 37 | 'Uploads data to bigquery, requires output_path to be set. ' 38 | 'The details of the BigQuery table to upload results to specified in ' 39 | 'the form: :::.') 40 | flags.DEFINE_string( 41 | 'input_profile_dir', None, '(Optional) Folder to load input profiles from.' 42 | 'This is useful for when your list of input profiles is quite large.') 43 | flags.DEFINE_boolean( 44 | 'only_phases', False, 45 | 'Whether to only include events from phase markers in the final output.') 46 | 47 | 48 | def main(argv): 49 | # Discard the first argument (the binary). 50 | input_profiles = argv[1:] 51 | 52 | if FLAGS.input_profile_dir: 53 | # Add any globbed files from the input_dir to the list. 54 | input_profiles += glob(FLAGS.input_profile_dir + '/*.profile.gz') 55 | 56 | if not input_profiles: 57 | raise ValueError('At least one profile must be provided!') 58 | 59 | aggregated_data = lib.aggregate_data(input_profiles, FLAGS.only_phases) 60 | 61 | lib.write_to_csv(FLAGS.bazel_source, FLAGS.project_source, 62 | FLAGS.project_commit, aggregated_data, FLAGS.output_path) 63 | 64 | if FLAGS.upload_data_to: 65 | project_id, dataset_id, table_id, location = FLAGS.upload_data_to.split(':') 66 | output_handling.upload_csv( 67 | csv_file_path=FLAGS.output_path, 68 | project_id=project_id, 69 | dataset_id=dataset_id, 70 | table_id=table_id, 71 | location=location) 72 | 73 | 74 | if __name__ == '__main__': 75 | app.run(main) 76 | -------------------------------------------------------------------------------- /utils/json_profiles_merger_lib.py: -------------------------------------------------------------------------------- 1 | """A library that holds the bulk of the logic for merging JSON profiles. 2 | 3 | Collect duration statistics of events across these profiles. 4 | 5 | Duration is measured in milliseconds. 6 | """ 7 | from __future__ import division 8 | 9 | import csv 10 | import gzip 11 | import json 12 | import os 13 | 14 | 15 | def _median(lst): 16 | """Returns the median of the input list. 17 | 18 | Args: 19 | lst: the input list. 20 | 21 | Returns: 22 | The median of the list, or None if the list is empty/None. 23 | """ 24 | sorted_lst = sorted(lst) 25 | length = len(sorted_lst) 26 | if length % 2: 27 | return sorted_lst[length // 2] 28 | return (sorted_lst[length // 2 - 1] + sorted_lst[length // 2]) / 2 29 | 30 | 31 | def write_to_csv(bazel_source, project_source, project_commit, event_list, 32 | output_csv_path): 33 | """Writes the event_list to output_csv_path. 34 | 35 | event_list format: 36 | [{'cat': , 'name': , 'min': , 37 | 'median': , 'max': , 'count': }, ...] 38 | Args: 39 | bazel_source: the bazel commit or path to the bazel binary from which these 40 | JSON profiles were collected. 41 | project_source: the project on which the runs that generated these JSON 42 | projects were performed. 43 | project_commit: the project commit on which the Bazel runs were performed. 44 | event_list: the list of events, aggregated from the JSON profiles. 45 | output_csv_path: a path to the output CSV file. 46 | """ 47 | output_dir = os.path.dirname(output_csv_path) 48 | if output_dir and not os.path.exists(output_dir): 49 | os.makedirs(output_dir) 50 | 51 | with open(output_csv_path, 'w') as csv_file: 52 | csv_writer = csv.writer(csv_file) 53 | csv_writer.writerow([ 54 | 'bazel_source', 'project_source', 'project_commit', 'cat', 'name', 55 | 'min', 'median', 'max', 'count' 56 | ]) 57 | 58 | for event in event_list: 59 | csv_writer.writerow([ 60 | bazel_source, project_source, project_commit, event['cat'], 61 | event['name'], event['min'], event['median'], event['max'], 62 | event['count'] 63 | ]) 64 | 65 | 66 | def _accumulate_event_duration(event_list, accum_dict, only_phases=False): 67 | """Fill up accum_dict by accummulating durations of each event. 68 | 69 | Also create the entries for each phase by subtracting the build phase markers' 70 | ts attribute. 71 | Args: 72 | event_list: the list of event objects. 73 | accum_dict: the dict to be filled up with a mapping of the following format: 74 | { : { name: ..., cat: ..., dur_list: [...]}, ...} 75 | only_phases: only collect entries from phase markers. 76 | """ 77 | # A list of tuples of the form (marker, occurrence time in micro s) 78 | build_markers_ts_pairs = [] 79 | max_ts = 0 80 | 81 | # Only collect events with a duration. 82 | # Special case: markers that indicates beginning/end of execution. 83 | for event in event_list: 84 | if 'ts' in event: 85 | max_ts = max(max_ts, event['ts']) 86 | 87 | if 'cat' in event and event['cat'] == 'build phase marker': 88 | build_markers_ts_pairs.append((event['name'], event['ts'])) 89 | 90 | if 'dur' not in event: 91 | continue 92 | 93 | if not only_phases: 94 | if event['name'] not in accum_dict: 95 | accum_dict[event['name']] = { 96 | 'name': event['name'], 97 | 'cat': event['cat'], 98 | 'dur_list': [] 99 | } 100 | accum_dict[event['name']]['dur_list'].append(event['dur']) 101 | 102 | # Append an artificial marker that signifies the end of the run. 103 | # This is to determine the duration from the last marker to the actual end of 104 | # the run and will not end up in the final data. 105 | build_markers_ts_pairs.append((None, max_ts)) 106 | 107 | # Fill in the markers. 108 | for i, marker_ts_pair in enumerate(build_markers_ts_pairs[:-1]): 109 | marker, ts = marker_ts_pair 110 | _, next_ts = build_markers_ts_pairs[i + 1] 111 | 112 | if marker not in accum_dict: 113 | accum_dict[marker] = { 114 | 'name': marker, 115 | 'cat': 'build phase marker', 116 | 'dur_list': [] 117 | } 118 | current_phase_duration_millis = ( 119 | next_ts - ts) / 1000 # Convert from microseconds to milliseconds 120 | accum_dict[marker]['dur_list'].append(current_phase_duration_millis) 121 | 122 | 123 | def _aggregate_from_accum_dict(accum_dict): 124 | """Aggregate the result from the accummulated dict. 125 | 126 | Calculate statistics of the durations and counts for each event. 127 | All measurements of time should be in milliseconds. 128 | Args: 129 | accum_dict: the dict to be filled up with a mapping of the following format: 130 | { : { name: ..., cat: ..., dur_list: [...]}, ...} 131 | 132 | Returns: 133 | A list of the following format: 134 | [{ name: ..., cat: ..., median: ..., min: ..., median: ..., max: ..., 135 | count: ... }] 136 | """ 137 | result = [] 138 | for obj in accum_dict.values(): 139 | result.append({ 140 | 'name': obj['name'], 141 | 'cat': obj['cat'], 142 | 'median': _median(obj['dur_list']), 143 | 'min': min(obj['dur_list']), 144 | 'max': max(obj['dur_list']), 145 | 'count': len(obj['dur_list']) 146 | }) 147 | return result 148 | 149 | 150 | def aggregate_data(input_profiles, only_phases=False): 151 | """Produces the aggregated data from the JSON profile inputs. 152 | 153 | Collects information on cat, name and median duration of the events in the 154 | JSON profiles. 155 | Args: 156 | input_profiles: a list of paths to .profile or .profile.gz files. 157 | only_phases: only output entries from phase markers. 158 | 159 | Returns: 160 | The list of objects which contain the info about cat, name and statistics on 161 | the 162 | duration of events. 163 | """ 164 | # A map from event name to an object which accumulates the durations. 165 | accum_dict = dict() 166 | for file_path in input_profiles: 167 | if file_path.endswith('.gz'): 168 | with gzip.GzipFile(file_path, 'r') as gz_input_file: 169 | event_list = json.loads(gz_input_file.read().decode('utf-8')) 170 | else: 171 | with open(file_path, 'r') as input_file: 172 | event_list = json.load(input_file) 173 | 174 | # The events in the JSON profiles can be presented directly as a list, 175 | # or as the value of key 'traceEvents'. 176 | if 'traceEvents' in event_list: 177 | event_list = event_list['traceEvents'] 178 | _accumulate_event_duration(event_list, accum_dict, only_phases) 179 | 180 | return _aggregate_from_accum_dict(accum_dict) 181 | -------------------------------------------------------------------------------- /utils/json_profiles_merger_lib_test.py: -------------------------------------------------------------------------------- 1 | """Tests for json_profiles_merger_lib.""" 2 | 3 | import json_profiles_merger_lib as lib 4 | import unittest 5 | 6 | 7 | class JsonProfilesMergerLibTest(unittest.TestCase): 8 | 9 | def test_accumulate_event_duration(self): 10 | event_list_1 = [ 11 | { 12 | 'name': 'to_skip_no_dur', 13 | }, 14 | { 15 | 'cat': 'fake_cat', 16 | 'name': 'fake_name', 17 | 'dur': 3, 18 | 'non_dur': 'something' 19 | }, 20 | ] 21 | 22 | event_list_2 = [ 23 | { 24 | 'name': 'to_skip_no_dur', 25 | }, 26 | { 27 | 'cat': 'fake_cat', 28 | 'name': 'fake_name', 29 | 'dur': 1, 30 | 'non_dur': 'something' 31 | }, 32 | ] 33 | 34 | accum_dict = {} 35 | lib._accumulate_event_duration(event_list_1, accum_dict) 36 | self.assertEqual( 37 | { 38 | 'fake_name': { 39 | 'cat': 'fake_cat', 40 | 'name': 'fake_name', 41 | 'dur_list': [3] 42 | }, 43 | }, accum_dict) 44 | lib._accumulate_event_duration(event_list_2, accum_dict) 45 | self.assertEqual( 46 | { 47 | 'fake_name': { 48 | 'cat': 'fake_cat', 49 | 'name': 'fake_name', 50 | 'dur_list': [3, 1] 51 | } 52 | }, accum_dict) 53 | 54 | def test_accumulate_build_phase_marker(self): 55 | event_list_3 = [ 56 | { 57 | 'name': 'to_skip_no_dur', 58 | }, 59 | { 60 | 'cat': 'build phase marker', 61 | 'name': 'phase1', 62 | 'ts': 1000 63 | }, 64 | { 65 | 'cat': 'build phase marker', 66 | 'name': 'phase2', 67 | 'ts': 10000 68 | }, 69 | { 70 | 'cat': 'fake_cat', 71 | 'name': 'fake_name', 72 | 'dur': 1, 73 | 'ts': 10001, 74 | 'non_dur': 'something' 75 | }, 76 | ] 77 | 78 | accum_dict = {} 79 | lib._accumulate_event_duration(event_list_3, accum_dict) 80 | self.assertEqual( 81 | { 82 | 'phase1': { 83 | 'cat': 'build phase marker', 84 | 'name': 'phase1', 85 | 'dur_list': [9.0] 86 | }, 87 | 'phase2': { 88 | 'cat': 'build phase marker', 89 | 'name': 'phase2', 90 | 'dur_list': [0.001] 91 | }, 92 | 'fake_name': { 93 | 'cat': 'fake_cat', 94 | 'name': 'fake_name', 95 | 'dur_list': [1] 96 | }, 97 | }, accum_dict) 98 | 99 | def test_accumulate_only_phase_marker(self): 100 | event_list = [ 101 | { 102 | 'name': 'to_skip_no_dur', 103 | }, 104 | { 105 | 'cat': 'build phase marker', 106 | 'name': 'phase1', 107 | 'ts': 1000 108 | }, 109 | { 110 | 'cat': 'build phase marker', 111 | 'name': 'phase2', 112 | 'ts': 10000 113 | }, 114 | { 115 | 'cat': 'fake_cat', 116 | 'name': 'fake_name', 117 | 'dur': 1, 118 | 'ts': 10001, 119 | 'non_dur': 'something' 120 | }, 121 | ] 122 | 123 | accum_dict = {} 124 | lib._accumulate_event_duration(event_list, accum_dict, only_phases=True) 125 | self.assertEqual( 126 | { 127 | 'phase1': { 128 | 'cat': 'build phase marker', 129 | 'name': 'phase1', 130 | 'dur_list': [9.0] 131 | }, 132 | 'phase2': { 133 | 'cat': 'build phase marker', 134 | 'name': 'phase2', 135 | 'dur_list': [0.001] 136 | }, 137 | }, accum_dict) 138 | 139 | def test_aggregate_from_accum_dict(self): 140 | accum_dict = { 141 | 'fake_name': { 142 | 'cat': 'fake_cat', 143 | 'name': 'fake_name', 144 | 'dur_list': [3, 1] 145 | }, 146 | } 147 | 148 | self.assertEqual([{ 149 | 'cat': 'fake_cat', 150 | 'name': 'fake_name', 151 | 'median': 2.0, 152 | 'min': 1, 153 | 'max': 3, 154 | 'count': 2 155 | }], lib._aggregate_from_accum_dict(accum_dict)) 156 | 157 | 158 | if __name__ == '__main__': 159 | unittest.main() 160 | -------------------------------------------------------------------------------- /utils/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The Bazel Authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http:#www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utility module to handle logging for the benchmarking script.""" 15 | import sys 16 | from absl import logging 17 | 18 | _COLOR_TMPL = { 19 | 'info': '\033[32m%s\033[0m', # Green 20 | 'warn': '\033[33m%s\033[0m', # Yellow 21 | 'error': '\033[31m%s\033[0m', # Red 22 | } 23 | 24 | 25 | def _maybe_colorize_text(text, color): 26 | """Colorize the text if running on a terminal.""" 27 | if not sys.stdout.isatty(): 28 | return text 29 | return _COLOR_TMPL[color] % text 30 | 31 | 32 | def log(text): 33 | """Logs a message using the logger singleton.""" 34 | logging.info(_maybe_colorize_text(text, 'info')) 35 | 36 | 37 | def log_warn(text): 38 | """Logs a warning message using the logger singleton.""" 39 | logging.warn(_maybe_colorize_text(text, 'warn')) 40 | 41 | 42 | def log_error(text): 43 | """Logs an error message using the logger singleton.""" 44 | logging.error(_maybe_colorize_text(text, 'error')) 45 | -------------------------------------------------------------------------------- /utils/output_handling.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The Bazel Authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http:#www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | import csv 16 | import socket 17 | import getpass 18 | 19 | import utils.logger as logger 20 | 21 | 22 | def export_csv(data_directory, filename, data): 23 | """Exports the content of data to a csv file in data_directory 24 | 25 | Args: 26 | data_directory: the directory to store the csv file. 27 | filename: the name of the .csv file. 28 | data: the collected data to be exported. 29 | 30 | Returns: 31 | The path to the newly created csv file. 32 | """ 33 | if not os.path.exists(data_directory): 34 | os.makedirs(data_directory) 35 | csv_file_path = os.path.join(data_directory, filename) 36 | logger.log('Writing raw data into csv file: %s' % str(csv_file_path)) 37 | 38 | with open(csv_file_path, 'w') as csv_file: 39 | hostname = socket.gethostname() 40 | username = getpass.getuser() 41 | csv_writer = csv.writer(csv_file) 42 | csv_writer.writerow([ 43 | 'project_source', 'project_commit', 'bazel_commit', 'run', 'cpu', 44 | 'wall', 'system', 'memory', 'command', 'expressions', 'hostname', 45 | 'username', 'options', 'exit_status', 'started_at', 'platform', 46 | 'project_label' 47 | ]) 48 | 49 | for (bazel_commit, project_commit), data_item in data.items(): 50 | command, expressions, options = data_item['args'] 51 | non_measurables = data_item['non_measurables'] 52 | for idx, run in enumerate(data_item['results'], start=1): 53 | csv_writer.writerow([ 54 | non_measurables['project_source'], project_commit, bazel_commit, 55 | idx, run['cpu'], run['wall'], run['system'], run['memory'], command, 56 | expressions, hostname, username, options, run['exit_status'], 57 | run['started_at'], non_measurables['platform'], 58 | non_measurables['project_label'] 59 | ]) 60 | return csv_file_path 61 | 62 | 63 | def export_file(data_directory, filename, content): 64 | """Exports the content of data to a file in data_directory 65 | 66 | Args: 67 | data_directory: the directory to store the file. 68 | filename: the name of the file. 69 | content: the content to be exported. 70 | 71 | Returns: 72 | The path to the newly created file. 73 | """ 74 | if not os.path.exists(data_directory): 75 | os.makedirs(data_directory) 76 | out_file_path = os.path.join(data_directory, filename) 77 | 78 | with open(out_file_path, 'w') as out_file: 79 | out_file.write(content) 80 | 81 | return out_file_path 82 | -------------------------------------------------------------------------------- /utils/storage_upload.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The Bazel Authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http:#www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Handles the uploading of results to Storage.""" 15 | import os 16 | import re 17 | import utils.logger as logger 18 | 19 | from absl import app 20 | from absl import flags 21 | from google.cloud import storage 22 | 23 | 24 | def upload_to_storage(file_path, project_id, bucket_id, destination): 25 | """Uploads the file to Storage. 26 | 27 | Takes the configuration from GOOGLE_APPLICATION_CREDENTIALS. 28 | 29 | Args: 30 | file_path: the path to the file to be uploaded. 31 | project_id: the GCP project id. 32 | bucket_id: the Storage bucket. 33 | destination: the path to the destination on the bucket. 34 | """ 35 | # This is a workaround for https://github.com/bazelbuild/rules_python/issues/14 36 | 37 | logger.log('Uploading data to Storage.') 38 | client = storage.Client(project=project_id) 39 | bucket = client.get_bucket(bucket_id) 40 | blob = bucket.blob(destination) 41 | 42 | blob.upload_from_filename(file_path) 43 | 44 | logger.log('Uploaded {} to {}/{}.'.format(file_path, bucket_id, destination)) 45 | 46 | 47 | FLAGS = flags.FLAGS 48 | flags.DEFINE_string('upload_to_storage', None, 49 | 'The details of the GCP Storage bucket to upload ' \ 50 | 'results to: ::.') 51 | 52 | 53 | def main(argv): 54 | if not re.match('^[\w-]+:[\w-]+:[\w\/-]+$', FLAGS.upload_to_storage): 55 | raise ValueError('--upload_to_storage should follow the pattern ' 56 | '::.') 57 | 58 | # Discard the first argument. 59 | files_to_upload = argv[1:] 60 | 61 | project_id, bucket_id, subdirectory = FLAGS.upload_to_storage.split(':') 62 | for filepath in files_to_upload: 63 | filename = os.path.basename(filepath) 64 | destination = '%s/%s' % (subdirectory, filename) 65 | upload_to_storage(filepath, project_id, bucket_id, destination) 66 | 67 | 68 | if __name__ == '__main__': 69 | app.run(main) 70 | -------------------------------------------------------------------------------- /utils/values.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The Bazel Authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http:#www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """"Stores a set of numeric values and offers statistical operations on them.""" 15 | import numpy 16 | import scipy.stats 17 | import copy 18 | 19 | 20 | class Values(object): 21 | """Utility class to store numeric values. 22 | 23 | This class is used in order to collect and compare metrics during 24 | benchmarking. 25 | 26 | Attributes: 27 | items: An optional list of numeric values to initialize the data structure 28 | with. 29 | """ 30 | 31 | def __init__(self, items=None): 32 | self._items = items or [] 33 | 34 | def add(self, value): 35 | """Adds value to the list of stored values.""" 36 | self._items.append(value) 37 | 38 | def values(self): 39 | """Returns the list of stored values.""" 40 | return self._items 41 | 42 | def mean(self): 43 | """Returns the mean of the stored values.""" 44 | return numpy.mean(self._items) 45 | 46 | def median(self): 47 | """Returns the median of the stored values.""" 48 | return numpy.median(self._items) 49 | 50 | def stddev(self): 51 | """Returns the standard deviation of the stored values.""" 52 | return float(numpy.std(self._items)) 53 | 54 | def pval(self, base_values): 55 | """Computes Kolmogorov-Smirnov statistic. 56 | 57 | Args: 58 | base_values: A list of numeric values to compare self.values() with. 59 | 60 | Returns: 61 | The probability for the null hypothesis that the samples drawn from the 62 | same distribution. 63 | Returns -1 if it cannot be computed because one of the samples contains 64 | less than 2 values. 65 | """ 66 | vals = self._items 67 | if len(vals) > 1 and len(base_values) > 1: 68 | _, p = scipy.stats.ks_2samp(vals, base_values) 69 | return 1 - p 70 | else: 71 | return -1 72 | 73 | def items(self): 74 | """Returns a copy of the items.""" 75 | return copy.copy(self._items) 76 | 77 | def exclude_from_indexes(self, indexes): 78 | """Returns a copy of Values which excludes the items from certain indexes.""" 79 | filtered = [] 80 | for i, value in enumerate(self._items): 81 | if i not in indexes: 82 | filtered.append(value) 83 | 84 | return Values(filtered) 85 | -------------------------------------------------------------------------------- /utils/values_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The Bazel Authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http:#www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Unit tests for benchmark values utility class.""" 15 | import unittest 16 | 17 | from values import Values 18 | 19 | 20 | class ValuesTest(unittest.TestCase): 21 | 22 | def test_initialize(self): 23 | values = Values() 24 | self.assertEqual([], values.values()) 25 | 26 | values = Values([2.3, 4.2]) 27 | self.assertEqual([2.3, 4.2], values.values()) 28 | 29 | def test_add(self): 30 | values = Values() 31 | self.assertEqual([], values.values()) 32 | 33 | values.add(4.2) 34 | values.add(2.3) 35 | self.assertEqual([4.2, 2.3], values.values()) 36 | 37 | def test_median(self): 38 | values = Values([1, 10, 1]) 39 | self.assertEqual(1, values.median()) 40 | 41 | # Returns the average of the two middle values when len(values()) is even. 42 | values.add(20) 43 | self.assertEqual(5.5, values.median()) 44 | 45 | values.add(20) 46 | self.assertEqual(10, values.median()) 47 | 48 | def test_mean(self): 49 | values = Values([1, 10, 1]) 50 | self.assertEqual(4, values.mean()) 51 | 52 | def test_stddev(self): 53 | values = Values([1, 10, 1]) 54 | self.assertAlmostEqual(4.24, values.stddev(), places=2) 55 | 56 | def test_pval_identical(self): 57 | identical_list = [1, 10, 1] 58 | values = Values(identical_list) 59 | self.assertEqual(0, values.pval(identical_list)) 60 | 61 | def test_pval_significant(self): 62 | values = Values([1, 1, 1, 1, 1]) 63 | self.assertAlmostEqual(0.992, values.pval([10, 10, 10, 10, 10]), places=3) 64 | 65 | 66 | if __name__ == '__main__': 67 | unittest.main() 68 | --------------------------------------------------------------------------------