├── .bazelci
    ├── postsubmit.yml
    └── presubmit.yml
├── .bazelrc
├── .bazelversion
├── .gitignore
├── BUILD
├── CONTRIBUTING.md
├── ISSUE_TEMPLATE.md
├── LICENSE
├── MODULE.bazel
├── PULL_REQUEST_TEMPLATE.md
├── README.md
├── WORKSPACE
├── WORKSPACE.bzlmod
├── bb-icon.png
├── benchmark.py
├── benchmark_test.py
├── report
    ├── BUILD
    ├── generate_master_report.py
    └── generate_report.py
├── testutils
    ├── BUILD
    ├── __init__.py
    └── fakes.py
├── third_party
    ├── BUILD
    ├── requirements.in
    └── requirements.txt
└── utils
    ├── BUILD
    ├── __init__.py
    ├── bazel.py
    ├── bazel_test.py
    ├── benchmark_config.py
    ├── benchmark_config_test.py
    ├── bigquery_upload.py
    ├── json_profiles_merger.py
    ├── json_profiles_merger_lib.py
    ├── json_profiles_merger_lib_test.py
    ├── logger.py
    ├── output_handling.py
    ├── storage_upload.py
    ├── values.py
    └── values_test.py


/.bazelci/postsubmit.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | tasks:
 3 |   ubuntu2204:
 4 |     include_json_profile:
 5 |       - build
 6 |       - test
 7 |     build_targets:
 8 |       - "//..."
 9 |     test_targets:
10 |       - "//..."
11 |   macos_arm64:
12 |     include_json_profile:
13 |       - build
14 |       - test
15 |     build_targets:
16 |       - "//..."
17 |     test_targets:
18 |       - "//..."
19 | 


--------------------------------------------------------------------------------
/.bazelci/presubmit.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | tasks:
 3 |   ubuntu2204:
 4 |     build_targets:
 5 |       - "//..."
 6 |     test_targets:
 7 |       - "//..."
 8 |   macos_arm64:
 9 |     build_targets:
10 |       - "//..."
11 |     test_targets:
12 |       - "//..."
13 | 


--------------------------------------------------------------------------------
/.bazelrc:
--------------------------------------------------------------------------------
1 | build --incompatible_disallow_empty_glob
2 | 


--------------------------------------------------------------------------------
/.bazelversion:
--------------------------------------------------------------------------------
1 | 7.3.1
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Ignore backup files.
 2 | *~
 3 | # Ignore Vim swap files.
 4 | .*.swp
 5 | # Ignore files generated by IDEs.
 6 | /.classpath
 7 | /.factorypath
 8 | /.idea/
 9 | /.ijwb/
10 | /.project
11 | /.settings
12 | /.vscode/
13 | 
14 | # Byte-compiled / optimized / DLL files
15 | __pycache__/
16 | *.py[cod]
17 | *$py.class
18 | 
19 | # Config file
20 | utils/config.py
21 | 
22 | # Bazel
23 | bazel-*
24 | MODULE.bazel.lock
25 | 
26 | # Python venv
27 | .venv/


--------------------------------------------------------------------------------
/BUILD:
--------------------------------------------------------------------------------
 1 | load("@rules_python//python:defs.bzl", "py_binary", "py_test")
 2 | load("@third_party//:requirements.bzl", "requirement")
 3 | 
 4 | # TODO(https://github.com/bazelbuild/bazel-bench/issues/36): Make these work for python3.
 5 | py_binary(
 6 |     name = "benchmark",
 7 |     srcs = ["benchmark.py"],
 8 |     deps = [
 9 |         "//utils",
10 |         requirement("absl-py"),
11 |         requirement("GitPython"),
12 |         requirement("gitdb2"),
13 |     ],
14 | )
15 | 
16 | py_test(
17 |     name = "benchmark_test",
18 |     srcs = ["benchmark_test.py"],
19 |     deps = [
20 |         ":benchmark",
21 |         "//testutils",
22 |         requirement("mock"),
23 |     ],
24 | )
25 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project. There are
 4 | just a few small guidelines you need to follow.
 5 | 
 6 | ## Contributor License Agreement
 7 | 
 8 | Contributions to this project must be accompanied by a Contributor License
 9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to <https://cla.developers.google.com/> to see
12 | your current agreements on file or to sign a new one.
13 | 
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 | 
18 | ## Code reviews
19 | 
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 | 
25 | ## Community Guidelines
26 | 
27 | This project follows [Google's Open Source Community
28 | Guidelines](https://opensource.google.com/conduct/).
29 | 


--------------------------------------------------------------------------------
/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | **Description of the problem / feature request:**
 2 | 
 3 | > Replace this line with your answer.
 4 | 
 5 | **Feature requests: what underlying problem are you trying to solve with this feature?**
 6 | 
 7 | > Replace this line with your answer.
 8 | 
 9 | **Bugs: what's the simplest, easiest way to reproduce this bug? Please provide a minimal example if possible.**
10 | 
11 | > Replace this line with your answer.
12 | 
13 | **Any other information, logs, or outputs that you want to share?**
14 | 
15 | > Replace these lines with your answer.
16 | >
17 | > If the files are large, upload as attachment or provide link.
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/MODULE.bazel:
--------------------------------------------------------------------------------
 1 | module(
 2 |     name = "bazel-bench",
 3 |     version = "0.0.0",
 4 | )
 5 | 
 6 | bazel_dep(
 7 |     name = "rules_python",
 8 |     version = "0.35.0",
 9 | )
10 | 
11 | # -- bazel_dep definitions -- #
12 | python = use_extension(
13 |     "@rules_python//python/extensions:python.bzl",
14 |     "python",
15 |     dev_dependency = True,
16 | )
17 | python.toolchain(
18 |     python_version = "3.10",
19 | )
20 | 
21 | pip = use_extension(
22 |     "@rules_python//python/extensions:pip.bzl",
23 |     "pip",
24 |     dev_dependency = True,
25 | )
26 | pip.parse(
27 |     hub_name = "third_party",
28 |     python_version = "3.10",
29 |     requirements_lock = "//third_party:requirements.txt",
30 | )
31 | use_repo(pip, "third_party")
32 | 


--------------------------------------------------------------------------------
/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | **What this PR does and why we need it:**
 2 | 
 3 | > Replace this line with your answer.
 4 | 
 5 | **New changes / Issues that this PR fixes:**
 6 | 
 7 | > Replace this line with your answer.
 8 | 
 9 | **Special notes for reviewer:**
10 | 
11 | > Replace this line with your answer.
12 | 
13 | **Does this require a change in the script's interface or the BigQuery's table structure?**
14 | 
15 | > Replace this line with your answer.
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Bazel Performance Benchmarking
  2 | 
  3 | [![Build Status](https://badge.buildkite.com/1499c911d1faf665b9f6ba28d0a61e64c26a8586321b9d63a8.svg)](https://buildkite.com/bazel/bazel-bench)
  4 | 
  5 | **Status**: WIP
  6 | 
  7 | ![logo](bb-icon.png)
  8 | 
  9 | # Setup
 10 | 
 11 | Pre-requisites: `git` and `bazel`.
 12 | 
 13 | ```
 14 | # Clone bazel-bench.
 15 | $ git clone https://github.com/bazelbuild/bazel-bench.git
 16 | $ cd bazel-bench
 17 | ```
 18 | 
 19 | To do a test run, run the following command (if you're on Windows, populate
 20 | `--data_directory` with an appropriate Windows-style path):
 21 | 
 22 | ```shell
 23 | $ bazel run :benchmark \
 24 | -- \
 25 | --bazel_commits=b8468a6b68a405e1a5767894426d3ea9a1a2f22f,ad503849e78b98d762f03168de5a336904280150 \
 26 | --project_source=https://github.com/bazelbuild/rules_cc.git \
 27 | --data_directory=/tmp/bazel-bench-data \
 28 | --verbose \
 29 | -- build //:all
 30 | ```
 31 | 
 32 | The Bazel commits might be too old and no longer buildable by your local Bazel. Replace them with the more recent commits from [bazelbuild/bazel](https://github.com/bazelbuild/bazel). The above command would print a result table on the terminal and outputs a csv
 33 | file to the specified `--data_directory`.
 34 | 
 35 | ## Syntax
 36 | 
 37 | Bazel-bench has the following syntax:
 38 | 
 39 | ```shell
 40 | $ bazel run :benchmark -- <bazel-bench-flags> -- <args to pass to bazel binary>
 41 | 
 42 | ```
 43 | 
 44 | For example, to benchmark the performance of 2 bazel commits A and B on the same
 45 | command `bazel build --nobuild //:all` of `rules_cc` project, you'd do:
 46 | 
 47 | ```shell
 48 | $ bazel run :benchmark \
 49 | -- \
 50 | --bazel_commits=A,B \
 51 | --project_source=https://github.com/bazelbuild/rules_cc.git \
 52 | -- build --nobuild //:all
 53 | ```
 54 | 
 55 | Note the double-dash `--` before the command arguments. You can pass any
 56 | arguments that you would normally run on Bazel to the script. The performance of
 57 | commands other than `build` can also be benchmarked e.g. `query`, ...
 58 | 
 59 | ### Config-file Interface
 60 | 
 61 | The flag-based approach does not support cases where the benchmarked Bazel
 62 | commands differ. The most common use case for this: As a rule developer, I want
 63 | to verify the effect of my flag on Bazel performance. For that, we'd need the
 64 | config-file interface. The example config file would look like this:
 65 | 
 66 | ```yaml
 67 | # config.yaml
 68 | global_options:
 69 |   project_commit: 595a730
 70 |   runs: 5
 71 |   collect_profile: false
 72 |   project_source: /path/to/project/repo
 73 | units:
 74 |  - bazel_binary: /usr/bin/bazel
 75 |    command: --startup_option1 build --nomy_flag //:all
 76 |  - bazel_binary: /usr/bin/bazel
 77 |    command: --startup_option2 build --my_flag //:all
 78 | ```
 79 | 
 80 | To launch the benchmark:
 81 | 
 82 | ```shell
 83 | $ bazel run :benchmark -- --benchmark_config=/absolute/path/to/config.yaml
 84 | ```
 85 | 
 86 | The above config file would benchmark 2 "units". A unit is defined as a set of 
 87 | conditions that describes a scenario to be benchmarked. This setup allows
 88 | maximum flexibility, as the conditions are independent between units. It's even 
 89 | possible to benchmark a `bazel_commit` against a pre-built `bazel_binary`.
 90 | 
 91 | `global_options` is the list of options applied to every units. These global options are overridden by local options.
 92 | 
 93 | For the list of currently supported flags/attributes and their default values,
 94 | refer to [utils/benchmark_config.py](utils/benchmark_config.py).
 95 | 
 96 | #### Known Limitations:
 97 | 
 98 | - `project_source` should be a global option, as we don't support benchmarking
 99 | multiple projects in 1 benchmark. Though, `project_commit` can differ between units.
100 | - Incremental benchmarks isn't available.
101 | - Commands have to be in canonical form (next section).
102 | 
103 | 
104 | ### Bazel Arguments Interpretation
105 | 
106 | Bazel arguments are parsed manually. It
107 | is _important_ that the supplied arguments in the command line strictly follows
108 | the canonical form:
109 | 
110 | ```
111 | <command> <canonical options> <expressions>
112 | ```
113 | 
114 | Example of non-canonical command line arguments that could result in wrong
115 | interpretation:
116 | 
117 | ```
118 | GOOD: (correct order, options in canonical form)
119 |   build --nobuild --compilation_mode=opt //:all
120 | 
121 | BAD: (non-canonical options)
122 |   build --nobuild -c opt //:all
123 | 
124 | BAD: (wrong order)
125 |   build --nobuild //:all --compilation_mode=opt
126 | ```
127 | 
128 | ## Available flags
129 | 
130 | To show all the available flags:
131 | 
132 | ```
133 | $ bazel run :benchmark -- --helpshort
134 | ```
135 | 
136 | Some useful flags are:
137 | 
138 | ```
139 |   --bazel_binaries: The pre-built bazel binaries to benchmark.
140 |     (a comma separated list)
141 |   --bazel_commits: The commits at which bazel is built.
142 |     (default: 'latest')
143 |     (a comma separated list)
144 |   --bazel_source: Either a path to the local Bazel repo or a https url to a GitHub repository.
145 |     (default: 'https://github.com/bazelbuild/bazel.git')
146 |   --bazelrc: The path to a .bazelrc file.
147 |   --csv_file_name: The name of the output csv, without the .csv extension
148 |   --data_directory: The directory in which the csv files should be stored.
149 |   --[no]prefetch_ext_deps: Whether to do an initial run to pre-fetch external dependencies.
150 |     (default: 'true')
151 |   --project_commits: The commits from the git project to be benchmarked.
152 |     (default: 'latest')
153 |     (a comma separated list)
154 |   --project_source: Either a path to the local git project to be built or a https url to a GitHub repository.
155 |   --runs: The number of benchmark runs.
156 |     (default: '5')
157 |     (an integer)
158 |   --[no]verbose: Whether to include git/Bazel stdout logs.
159 |     (default: 'false')
160 |   --[no]collect_profile: Whether to collect JSON profile for each run.
161 |     Requires --data_directory to be set.
162 |     (default: 'false')
163 | ```
164 | 
165 | ## Collecting JSON Profile
166 | 
167 | [Bazel's JSON Profile](https://docs.bazel.build/versions/master/skylark/performance.html#json-profile)
168 | is a useful tool to investigate the performance of Bazel. You can configure
169 | `bazel-bench` to export these JSON profiles on runs using the
170 | `--collect_profile` flag.
171 | 
172 | ### JSON Profile Aggregation
173 | 
174 | For each pair of `project_commit` and `bazel_commit`, we produce a couple JSON
175 | profiles, based on the number of runs. To have a better overview of the
176 | performance of each phase and events, we can aggregate these profiles and
177 | produce the median duration of each event across them.
178 | 
179 | To run the tool:
180 | 
181 | ```
182 | bazel run utils:json_profiles_merger \
183 | -- \
184 | --bazel_source=<some commit or path> \
185 | --project_source=<some url or path> \
186 | --project_commit=<some_commit> \
187 | --output_path=/tmp/outfile.csv \
188 | -- /tmp/my_json_profiles_*.profile
189 | ```
190 | 
191 | You can pass the pattern that selects the input profiles into the positional
192 | argument of the script, like in the above example
193 | (`/tmp/my_json_profiles_*.profile`).
194 | 
195 | ## Output Directory Layout
196 | 
197 | By default, bazel-bench will store the measurement results and other required
198 | files (project clones, built binaries, ...) under the `~/.bazel-bench`
199 | directory.
200 | 
201 | The layout is:
202 | 
203 | ```
204 | ~/.bazel-bench/                         <= The root of bazel-bench's output dir.
205 |   bazel/                                <= Where bazel's repository is cloned.
206 |   bazel-bin/                            <= Where the built bazel binaries are stored.
207 |     fba9a2c87ee9589d72889caf082f1029/   <= The bazel commit hash.
208 |       bazel                             <= The actual bazel binary.
209 |   project-clones/                       <= Where the projects' repositories are cloned.
210 |     7ffd56a6e4cb724ea575aba15733d113/   <= Each project is stored under a project hash,
211 |                                            computed from its source.
212 |   out/                                  <= This is the default output root. But
213 |                                            the output root can also be set via --data_directory.
214 | ```
215 | 
216 | To clear the caches, simply `rm -rf` where necessary.
217 | 
218 | ## Uploading to BigQuery & Storage
219 | 
220 | As an important part of our bazel-bench daily pipeline, we upload the csv output
221 | files to BigQuery and Storage, using separate targets.
222 | 
223 | To upload the output to BigQuery & Storage you'll need the GCP credentials and
224 | the table details. Please contact leba@google.com.
225 | 
226 | BigQuery:
227 | 
228 | ```
229 | bazel run utils:bigquery_upload \
230 | -- \
231 | --upload_to_bigquery=<project_id>:<dataset_id>:<table_id>:<location> \
232 | -- \
233 | <file1> <file2> ...
234 | ```
235 | 
236 | Storage:
237 | 
238 | ```
239 | bazel run utils:storage_upload \
240 | -- \
241 | --upload_to_storage=<project_id>:<bucket_id>:<subdirectory> \
242 | -- \
243 | <file1> <file2> ...
244 | ```
245 | 
246 | ## Performance Report
247 | 
248 | We generate a performance report with BazelCI. The generator script can be found
249 | under the `/report` directory.
250 | 
251 | Example Usage: `$ python3 report/generate_report.py --date=2019-01-01
252 | --project=dummy --storage_bucket=dummy_bucket`
253 | 
254 | For more detailed usage information, run: `$ python3 report/generate_report.py
255 | --help`
256 | 
257 | ## Tests
258 | 
259 | The tests for each module are found in the same directory. To run the test,
260 | simply:
261 | 
262 | ```
263 | $ bazel test ...
264 | ```
265 | 


--------------------------------------------------------------------------------
/WORKSPACE:
--------------------------------------------------------------------------------
 1 | load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
 2 | 
 3 | http_archive(
 4 |     name = "rules_python",
 5 |     sha256 = "e85ae30de33625a63eca7fc40a94fea845e641888e52f32b6beea91e8b1b2793",
 6 |     strip_prefix = "rules_python-0.27.1",
 7 |     url = "https://github.com/bazelbuild/rules_python/releases/download/0.27.1/rules_python-0.27.1.tar.gz",
 8 | )
 9 | 
10 | load("@rules_python//python:repositories.bzl", "py_repositories", "python_register_toolchains")
11 | 
12 | py_repositories()
13 | 
14 | load("@rules_python//python:pip.bzl", "pip_parse")
15 | 
16 | # Use a hermetic Python interpreter so that builds are reproducible
17 | # irrespective of the Python version available on the host machine.
18 | python_register_toolchains(
19 |     name = "python3_10",
20 |     python_version = "3.10",
21 | )
22 | 
23 | load("@python3_10//:defs.bzl", "interpreter")
24 | 
25 | # Translate requirements.txt into a @third_party external repository.
26 | pip_parse(
27 |     name = "third_party",
28 |     python_interpreter_target = interpreter,
29 |     requirements_lock = "//third_party:requirements.txt",
30 | )
31 | 
32 | load("@third_party//:requirements.bzl", "install_deps")
33 | 
34 | #
35 | install_deps()
36 | 


--------------------------------------------------------------------------------
/WORKSPACE.bzlmod:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bazelbuild/bazel-bench/f0c8f585ad4733f184222be59c4401f9371991a6/WORKSPACE.bzlmod


--------------------------------------------------------------------------------
/bb-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bazelbuild/bazel-bench/f0c8f585ad4733f184222be59c4401f9371991a6/bb-icon.png


--------------------------------------------------------------------------------
/benchmark.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 The Bazel Authors. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #    http:#www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import csv
 15 | import collections
 16 | import datetime
 17 | import os
 18 | import subprocess
 19 | import sys
 20 | import hashlib
 21 | import re
 22 | import shutil
 23 | import collections
 24 | import tempfile
 25 | import git
 26 | import utils.logger as logger
 27 | import utils.json_profiles_merger_lib as json_profiles_merger_lib
 28 | import utils.output_handling as output_handling
 29 | 
 30 | from absl import app
 31 | from absl import flags
 32 | 
 33 | from utils.values import Values
 34 | from utils.bazel import Bazel
 35 | from utils.benchmark_config import BenchmarkConfig
 36 | 
 37 | # BB_ROOT has different values, depending on the platform.
 38 | BB_ROOT = os.path.join(os.path.expanduser('~'), '.bazel-bench')
 39 | 
 40 | # The path to the directory that stores Bazel clones.
 41 | BAZEL_CLONE_BASE_PATH = os.path.join(BB_ROOT, 'bazel-clones')
 42 | # The path to the directory that stores project clones.
 43 | PROJECT_CLONE_BASE_PATH = os.path.join(BB_ROOT, 'project-clones')
 44 | BAZEL_GITHUB_URL = 'https://github.com/bazelbuild/bazel.git'
 45 | # The path to the directory that stores the bazel binaries.
 46 | BAZEL_BINARY_BASE_PATH = os.path.join(BB_ROOT, 'bazel-bin')
 47 | # The path to the directory that stores the output csv (If required).
 48 | DEFAULT_OUT_BASE_PATH = os.path.join(BB_ROOT, 'out')
 49 | # The default name of the aggr json profile.
 50 | DEFAULT_AGGR_JSON_PROFILE_FILENAME = 'aggr_json_profiles.csv'
 51 | 
 52 | 
 53 | def _get_clone_subdir(project_source):
 54 |   """Calculates a hexdigest of project_source to serve as a unique subdir name."""
 55 |   return hashlib.md5(project_source.encode('utf-8')).hexdigest()
 56 | 
 57 | 
 58 | def _exec_command(args, shell=False, cwd=None):
 59 |   logger.log('Executing: %s' % (args if shell else ' '.join(args)))
 60 | 
 61 |   return subprocess.run(
 62 |       args,
 63 |       shell=shell,
 64 |       cwd=cwd,
 65 |       check=True,
 66 |       stdout=sys.stdout if FLAGS.verbose else subprocess.DEVNULL,
 67 |       stderr=sys.stderr if FLAGS.verbose else subprocess.DEVNULL)
 68 | 
 69 | 
 70 | def _get_commits_topological(commits_sha_list,
 71 |                              repo,
 72 |                              flag_name,
 73 |                              fill_default=True):
 74 |   """Returns a list of commits, sorted by topological order.
 75 | 
 76 |   e.g. for a commit history A -> B -> C -> D, commits_sha_list = [C, B]
 77 |   Output: [B, C]
 78 | 
 79 |   If the input commits_sha_list is empty, fetch the latest commit on branch
 80 |   'master'
 81 |   of the repo.
 82 | 
 83 |   Args:
 84 |     commits_sha_list: a list of string of commit SHA digest. Can be long or
 85 |       short digest.
 86 |     repo: the git.Repo instance of the repository.
 87 |     flag_name: the flag that is supposed to specify commits_list.
 88 |     fill_default: whether to fill in a default latest commit if none is
 89 |       specified.
 90 | 
 91 |   Returns:
 92 |     A list of string of full SHA digests, sorted by topological commit order.
 93 |   """
 94 |   if commits_sha_list:
 95 |     long_commits_sha_set = set(
 96 |         map(lambda x: _to_long_sha_digest(x, repo), commits_sha_list))
 97 |     sorted_commit_list = []
 98 |     for c in reversed(list(repo.iter_commits())):
 99 |       if c.hexsha in long_commits_sha_set:
100 |         sorted_commit_list.append(c.hexsha)
101 | 
102 |     if len(sorted_commit_list) != len(long_commits_sha_set):
103 |       raise ValueError(
104 |           "The following commits weren't found in the repo in branch master: %s."
105 |           % (long_commits_sha_set - set(sorted_commit_list)))
106 |     return sorted_commit_list
107 | 
108 |   elif not fill_default:
109 |     # If we have some binary paths specified, we don't need to fill in a default
110 |     # commit.
111 |     return []
112 | 
113 |   # If no commit specified: take the repo's latest commit.
114 |   latest_commit_sha = repo.commit().hexsha
115 |   logger.log('No %s specified, using the latest one: %s' %
116 |              (flag_name, latest_commit_sha))
117 |   return [latest_commit_sha]
118 | 
119 | 
120 | def _to_long_sha_digest(digest, repo):
121 |   """Returns the full 40-char SHA digest of a commit."""
122 |   return repo.git.rev_parse(digest) if len(digest) < 40 else digest
123 | 
124 | 
125 | def _setup_project_repo(repo_path, project_source):
126 |   """Returns a path to the cloned repository.
127 | 
128 |   If the repo_path exists, perform a `git fetch` to update the content.
129 |   Else, clone the project to repo_path.
130 | 
131 |   Args:
132 |     repo_path: the path to clone the repository to.
133 |     project_source: the source to clone the repository from. Could be a local
134 |       path or an URL.
135 | 
136 |   Returns:
137 |     A git.Repo object of the cloned repository.
138 |   """
139 |   if os.path.exists(repo_path):
140 |     logger.log('Path %s exists. Updating...' % repo_path)
141 |     repo = git.Repo(repo_path)
142 |     repo.git.fetch('origin')
143 |   else:
144 |     logger.log('Cloning %s to %s...' % (project_source, repo_path))
145 |     repo = git.Repo.clone_from(project_source, repo_path)
146 | 
147 |   return repo
148 | 
149 | 
150 | def _build_bazel_binary(commit, repo, outroot, platform=None):
151 |   """Builds bazel at the specified commit and copy the output binary to outroot.
152 | 
153 |   If the binary for this commit already exists at the destination path, simply
154 |   return the path without re-building.
155 | 
156 |   Args:
157 |     commit: the Bazel commit SHA.
158 |     repo: the git.Repo instance of the Bazel clone.
159 |     outroot: the directory inwhich the resulting binary is copied to.
160 |     platform: the platform on which to build this binary.
161 | 
162 |   Returns:
163 |     The path to the resulting binary (copied to outroot).
164 |   """
165 |   outroot_for_commit = '%s/%s/%s' % (
166 |       outroot, platform, commit) if platform else '%s/%s' % (outroot, commit)
167 |   destination = '%s/bazel' % outroot_for_commit
168 |   if os.path.exists(destination):
169 |     logger.log('Binary exists at %s, reusing...' % destination)
170 |     return destination
171 | 
172 |   logger.log('Building Bazel binary at commit %s' % commit)
173 |   repo.git.checkout('-f', commit)
174 | 
175 |   _exec_command(['bazel', 'build', '//src:bazel'], cwd=repo.working_dir)
176 | 
177 |   # Copy to another location
178 |   binary_out = '%s/bazel-bin/src/bazel' % repo.working_dir
179 | 
180 |   if not os.path.exists(outroot_for_commit):
181 |     os.makedirs(outroot_for_commit)
182 |   logger.log('Copying bazel binary to %s' % destination)
183 |   shutil.copyfile(binary_out, destination)
184 |   _exec_command(['chmod', '+x', destination])
185 | 
186 |   return destination
187 | 
188 | 
189 | def _construct_json_profile_flags(out_file_path):
190 |   """Constructs the flags used to collect JSON profiles.
191 | 
192 |   Args:
193 |     out_file_path: The path to output the profile to.
194 | 
195 |   Returns:
196 |     A list of string representing the flags.
197 |   """
198 |   return [
199 |       '--experimental_generate_json_trace_profile',
200 |       '--profile={}'.format(out_file_path)
201 |   ]
202 | 
203 | 
204 | def json_profile_filename(data_directory, bazel_bench_uid, bazel_commit,
205 |                           unit_num, project_commit, run_number, total_runs):
206 |   return (f'{data_directory}/{bazel_bench_uid}_{bazel_commit}_{unit_num}'
207 |           + f'_{project_commit}_{run_number}_of_{total_runs}.profile.gz')
208 | 
209 | 
210 | def _single_run(bazel_bin_path,
211 |                 command,
212 |                 options,
213 |                 targets,
214 |                 startup_options):
215 |   """Runs the benchmarking for a combination of (bazel version, project version).
216 | 
217 |   Args:
218 |     bazel_bin_path: the path to the bazel binary to be run.
219 |     command: the command to be run with Bazel.
220 |     options: the list of options.
221 |     targets: the list of targets.
222 |     startup_options: the list of target options.
223 | 
224 |   Returns:
225 |     A result object:
226 |     {
227 |       'wall': 1.000,
228 |       'cpu': 1.000,
229 |       'system': 1.000,
230 |       'memory': 1.000,
231 |       'exit_status': 0,
232 |       'started_at': datetime.datetime(2019, 1, 1, 0, 0, 0, 000000),
233 |     }
234 |   """
235 |   bazel = Bazel(bazel_bin_path, startup_options)
236 | 
237 |   default_arguments = collections.defaultdict(list)
238 | 
239 |   # Prepend some default options if the command is 'build'.
240 |   # The order in which the options appear matters.
241 |   if command == 'build':
242 |     options = options + ['--nostamp', '--noshow_progress', '--color=no']
243 |   measurements = bazel.command(command, args=options + targets)
244 | 
245 |   if measurements != None:
246 |       logger.log('Results of this run: wall: ' +
247 |               '%.3fs, cpu %.3fs, system %.3fs, memory %.3fMB, exit_status: %d' % (
248 |                   measurements['wall'],
249 |                   measurements['cpu'],
250 |                   measurements['system'],
251 |                   measurements['memory'],
252 |                   measurements['exit_status']))
253 | 
254 |   if FLAGS.clean:
255 |     bazel.command('clean', ['--color=no'])
256 | 
257 |   if FLAGS.shutdown:
258 |     bazel.command('shutdown')
259 | 
260 |   return measurements
261 | 
262 | 
263 | def _run_benchmark(bazel_bin_path,
264 |                    project_path,
265 |                    runs,
266 |                    command,
267 |                    options,
268 |                    targets,
269 |                    startup_options,
270 |                    prefetch_ext_deps,
271 |                    bazel_bench_uid,
272 |                    unit_num,
273 |                    data_directory=None,
274 |                    collect_profile=False,
275 |                    bazel_identifier=None,
276 |                    project_commit=None):
277 |   """Runs the benchmarking for a combination of (bazel version, project version).
278 | 
279 |   Args:
280 |     bazel_bin_path: the path to the bazel binary to be run.
281 |     project_path: the path to the project clone to be built.
282 |     runs: the number of runs.
283 |     bazel_args: the unparsed list of arguments to be passed to Bazel binary.
284 |     prefetch_ext_deps: whether to do a first non-benchmarked run to fetch the
285 |       external dependencies.
286 |     bazel_bench_uid: a unique string identifier of this entire bazel-bench run.
287 |     unit_num: the numerical order of the current unit being benchmarked.
288 |     collect_profile: whether to collect JSON profile for each run.
289 |     data_directory: the path to the directory to store run data. Required if
290 |       collect_profile.
291 |     bazel_identifier: the commit hash of the bazel commit. Required if
292 |       collect_profile.
293 |     project_commit: the commit hash of the project commit. Required if
294 |       collect_profile.
295 | 
296 |   Returns:
297 |     A list of result objects from each _single_run.
298 |   """
299 |   collected = []
300 |   os.chdir(project_path)
301 | 
302 |   logger.log('=== BENCHMARKING BAZEL [Unit #%d]: %s, PROJECT: %s ===' %
303 |              (unit_num, bazel_identifier, project_commit))
304 |   # Runs the command once to make sure external dependencies are fetched.
305 |   if prefetch_ext_deps:
306 |     logger.log('Pre-fetching external dependencies...')
307 |     _single_run(bazel_bin_path, command, options, targets, startup_options)
308 | 
309 |   if collect_profile:
310 |     if not os.path.exists(data_directory):
311 |       os.makedirs(data_directory)
312 | 
313 |   for i in range(1, runs + 1):
314 |     logger.log('Starting benchmark run %s/%s:' % (i, runs))
315 | 
316 |     maybe_include_json_profile_flags = options[:]
317 |     if collect_profile:
318 |       assert bazel_identifier, ('bazel_identifier is required when '
319 |                                 'collect_profile')
320 |       assert project_commit, ('project_commit is required when '
321 |                               'collect_profile')
322 |       maybe_include_json_profile_flags += _construct_json_profile_flags(
323 |           json_profile_filename(
324 |               data_directory=data_directory,
325 |               bazel_bench_uid=bazel_bench_uid,
326 |               bazel_commit=bazel_identifier.replace('/', '_'),
327 |               unit_num=unit_num,
328 |               project_commit=project_commit,
329 |               run_number=i,
330 |               total_runs=runs,
331 |           ))
332 |     collected.append(
333 |         _single_run(bazel_bin_path, command, maybe_include_json_profile_flags,
334 |                     targets, startup_options))
335 | 
336 |   return collected, (command, targets, options)
337 | 
338 | 
339 | def handle_json_profiles_aggr(bazel_bench_uid, unit_num, bazel_commits,
340 |                               project_source, project_commits, runs,
341 |                               output_path, data_directory):
342 |   """Aggregates the collected JSON profiles and writes the result to a CSV.
343 | 
344 |    Args:
345 |     bazel_bench_uid: a unique string identifier of this entire bazel-bench run.
346 |     unit_num: the numerical order of the current unit being benchmarked.
347 |     bazel_commits: the Bazel commits that bazel-bench ran on.
348 |     project_source: a path/url to a local/remote repository of the project on
349 |       which benchmarking was performed.
350 |     project_commits: the commits of the project when benchmarking was done.
351 |     runs: the total number of runs.
352 |     output_path: the path to the output csv file.
353 |     data_directory: the directory that stores output files.
354 |   """
355 |   output_dir = os.path.dirname(output_path)
356 |   if not os.path.exists(output_dir):
357 |     os.makedirs(output_dir)
358 | 
359 |   with open(output_path, 'w') as f:
360 |     csv_writer = csv.writer(f)
361 |     csv_writer.writerow([
362 |         'bazel_source', 'project_source', 'project_commit', 'cat', 'name', 'dur'
363 |     ])
364 | 
365 |     for bazel_commit in bazel_commits:
366 |       for project_commit in project_commits:
367 |         profiles_filenames = [
368 |             json_profile_filename(
369 |                 data_directory=data_directory,
370 |                 bazel_bench_uid=bazel_bench_uid,
371 |                 bazel_commit=bazel_commit,
372 |                 unit_num=unit_num,
373 |                 project_commit=project_commit,
374 |                 run_number=i,
375 |                 total_runs=runs,
376 |             )
377 |             for i in range(1, runs + 1)
378 |         ]
379 |         event_list = json_profiles_merger_lib.aggregate_data(
380 |             profiles_filenames, only_phases=True)
381 |         for event in event_list:
382 |           csv_writer.writerow([
383 |               bazel_commit, project_source, project_commit, event['cat'],
384 |               event['name'], event['median']
385 |           ])
386 |   logger.log('Finished writing aggregate_json_profiles to %s' % output_path)
387 | 
388 | 
389 | def create_summary(data, project_source):
390 |   """Creates the runs summary onto stdout.
391 | 
392 |   Excludes runs with non-zero exit codes from the final summary table.
393 |   """
394 |   unit = {
395 |     'wall': 's ',
396 |     'cpu': 's ',
397 |     'system': 's ',
398 |     'memory': 'MB'
399 |   }
400 |   summary_builder = []
401 |   summary_builder.append('\nRESULTS:')
402 |   last_collected = None
403 |   for (i, bazel_commit, project_commit), collected in data.items():
404 |     header = ('[Unit #%d] Bazel version: %s, Project commit: %s, Project source: %s' %
405 |               (i, bazel_commit, project_commit, project_source))
406 |     summary_builder.append(header)
407 | 
408 |     summary_builder.append(
409 |         '%s  %s %s %s %s' %
410 |         ('metric'.rjust(8), 'mean'.center(20), 'median'.center(20),
411 |          'stddev'.center(10), 'pval'.center(10)))
412 | 
413 |     num_runs = len(collected['wall'].items())
414 |     # A map from run number to exit code, for runs with non-zero exit codes.
415 |     non_zero_runs = {}
416 |     for i, exit_code in enumerate(collected['exit_status'].items()):
417 |       if exit_code != 0:
418 |         non_zero_runs[i] = exit_code
419 |     for metric, values in collected.items():
420 |       if metric in ['exit_status', 'started_at']:
421 |         continue
422 | 
423 |       values_exclude_failures = values.exclude_from_indexes(
424 |           non_zero_runs.keys())
425 |       # Skip if there's no value available after excluding failed runs.
426 |       if not values_exclude_failures.items():
427 |         continue
428 | 
429 |       if last_collected:
430 |         base = last_collected[metric]
431 |         pval = '% 7.5f' % values_exclude_failures.pval(base.values())
432 |         mean_diff = '(% +6.2f%%)' % (
433 |             100. * (values_exclude_failures.mean() - base.mean()) / base.mean())
434 |         median_diff = '(% +6.2f%%)' % (
435 |             100. *
436 |             (values_exclude_failures.median() - base.median()) / base.median())
437 |       else:
438 |         pval = ''
439 |         mean_diff = median_diff = '         '
440 |       summary_builder.append(
441 |           '%s: %s %s %s %s' %
442 |           (metric.rjust(8),
443 |            ('% 8.3f%s %s' %
444 |             (values_exclude_failures.mean(), unit[metric], mean_diff)).center(20),
445 |            ('% 8.3f%s %s' %
446 |             (values_exclude_failures.median(), unit[metric], median_diff)).center(20),
447 |            ('% 7.3f%s' % (values_exclude_failures.stddev(), unit[metric])).center(10),
448 |            pval.center(10)))
449 |     last_collected = collected
450 |     if non_zero_runs:
451 |       summary_builder.append(
452 |           ('The following runs contain non-zero exit code(s):\n %s\n'
453 |            'Please check the full log for more details. These runs are '
454 |            'excluded from the above result table.' %
455 |            '\n '.join('- run: %s/%s, exit_code: %s' % (k + 1, num_runs, v)
456 |                       for k, v in non_zero_runs.items())))
457 |     summary_builder.append('')
458 | 
459 |   return '\n'.join(summary_builder)
460 | 
461 | 
462 | FLAGS = flags.FLAGS
463 | # Flags for the bazel binaries.
464 | flags.DEFINE_list('bazel_commits', None, 'The commits at which bazel is built.')
465 | flags.DEFINE_list('bazel_binaries', None,
466 |                   'The pre-built bazel binaries to benchmark.')
467 | flags.DEFINE_string('bazel_source',
468 |                     'https://github.com/bazelbuild/bazel.git',
469 |                     'Either a path to the local Bazel repo or a https url to ' \
470 |                     'a GitHub repository.')
471 | flags.DEFINE_string(
472 |     'bazel_bin_dir', None,
473 |     'The directory to store the bazel binaries from each commit.')
474 | 
475 | # Flags for the project to be built.
476 | flags.DEFINE_string(
477 |     'project_label', None,
478 |     'The label of the project. Only relevant in the daily performance report.')
479 | flags.DEFINE_string('project_source', None,
480 |                     'Either a path to the local git project to be built or ' \
481 |                     'a https url to a GitHub repository.')
482 | flags.DEFINE_list('project_commits', None,
483 |                   'The commits from the git project to be benchmarked.')
484 | flags.DEFINE_string(
485 |     'env_configure', None,
486 |     "The shell commands to configure the project's environment.")
487 | 
488 | # Execution options.
489 | flags.DEFINE_integer('runs', 5, 'The number of benchmark runs.')
490 | flags.DEFINE_string('bazelrc', None, 'The path to a .bazelrc file.')
491 | flags.DEFINE_string('platform', None,
492 |                     ('The platform on which bazel-bench is run. This is just '
493 |                      'to categorize data and has no impact on the actual '
494 |                      'script execution.'))
495 | flags.DEFINE_boolean('clean', True, 'Whether to invoke clean between runs/builds.')
496 | flags.DEFINE_boolean('shutdown', True, 'Whether to invoke shutdown between runs/builds.')
497 | 
498 | # Miscellaneous flags.
499 | flags.DEFINE_boolean('verbose', False,
500 |                      'Whether to include git/Bazel stdout logs.')
501 | flags.DEFINE_boolean('prefetch_ext_deps', True,
502 |                      'Whether to do an initial run to pre-fetch external ' \
503 |                      'dependencies.')
504 | flags.DEFINE_boolean('collect_profile', False,
505 |                      'Whether to collect JSON profile for each run. Requires ' \
506 |                      '--data_directory to be set.')
507 | flags.DEFINE_boolean('aggregate_json_profiles', False,
508 |                      'Whether to aggregate the collected JSON profiles. Requires '\
509 |                      '--collect_profile to be set.')
510 | flags.DEFINE_string(
511 |     'benchmark_config', None,
512 |     'Whether to use the config-file interface to define benchmark units.')
513 | 
514 | # Output storage flags.
515 | flags.DEFINE_string('data_directory', None,
516 |                     'The directory in which the csv files should be stored.')
517 | # The daily report generation process on BazelCI requires the csv file name to
518 | # be determined before bazel-bench is launched, so that METADATA files are
519 | # properly filled.
520 | flags.DEFINE_string('csv_file_name', None,
521 |                     'The name of the output csv, without the .csv extension.')
522 | 
523 | 
524 | def _flag_checks():
525 |   """Verify flags requirements."""
526 |   if (not FLAGS.benchmark_config and FLAGS.bazel_commits and
527 |       FLAGS.project_commits and len(FLAGS.bazel_commits) > 1 and
528 |       len(FLAGS.project_commits) > 1):
529 |     raise ValueError(
530 |         'Either --bazel_commits or --project_commits should be a single element.'
531 |     )
532 | 
533 |   if FLAGS.aggregate_json_profiles and not FLAGS.collect_profile:
534 |     raise ValueError('--aggregate_json_profiles requires '
535 |                      '--collect_profile to be set.')
536 | 
537 | 
538 | def _get_benchmark_config_and_clone_repos(argv):
539 |   """From the flags/config file, get the benchmark units.
540 | 
541 |   Args:
542 |     argv: the command line arguments.
543 | 
544 |   Returns:
545 |     An instance of BenchmarkConfig that contains the benchmark units.
546 |   """
547 |   if FLAGS.benchmark_config:
548 |     config = BenchmarkConfig.from_file(FLAGS.benchmark_config)
549 |     project_source = config.get_project_source()
550 |     project_clone_repo = _setup_project_repo(
551 |         PROJECT_CLONE_BASE_PATH + '/' + _get_clone_subdir(project_source),
552 |         project_source)
553 |     bazel_source = config.get_bazel_source()
554 |     bazel_clone_repo = _setup_project_repo(
555 |         BAZEL_CLONE_BASE_PATH + '/' + _get_clone_subdir(bazel_source),
556 |         bazel_source)
557 | 
558 |     return config, bazel_clone_repo, project_clone_repo
559 | 
560 |   # Strip off 'benchmark.py' from argv
561 |   # argv would be something like:
562 |   # ['benchmark.py', 'build', '--nobuild', '//:all']
563 |   bazel_args = argv[1:]
564 | 
565 |   # Building Bazel binaries
566 |   bazel_binaries = FLAGS.bazel_binaries or []
567 |   logger.log('Preparing bazelbuild/bazel repository.')
568 |   bazel_source = FLAGS.bazel_source if FLAGS.bazel_source else BAZEL_GITHUB_URL
569 |   bazel_clone_repo = _setup_project_repo(
570 |         PROJECT_CLONE_BASE_PATH + '/' + _get_clone_subdir(bazel_source),
571 |         bazel_source)
572 |   bazel_commits = _get_commits_topological(
573 |       FLAGS.bazel_commits,
574 |       bazel_clone_repo,
575 |       'bazel_commits',
576 |       fill_default=not FLAGS.bazel_commits and not bazel_binaries)
577 | 
578 |   # Set up project repo
579 |   logger.log('Preparing %s clone.' % FLAGS.project_source)
580 |   project_clone_repo = _setup_project_repo(
581 |       PROJECT_CLONE_BASE_PATH + '/' + _get_clone_subdir(FLAGS.project_source),
582 |       FLAGS.project_source)
583 | 
584 |   project_commits = _get_commits_topological(FLAGS.project_commits,
585 |                                              project_clone_repo,
586 |                                              'project_commits')
587 | 
588 |   config = BenchmarkConfig.from_flags(
589 |       bazel_commits=bazel_commits,
590 |       bazel_binaries=bazel_binaries,
591 |       project_commits=project_commits,
592 |       bazel_source=bazel_source,
593 |       project_source=FLAGS.project_source,
594 |       env_configure=FLAGS.env_configure,
595 |       runs=FLAGS.runs,
596 |       collect_profile=FLAGS.collect_profile,
597 |       command=' '.join(bazel_args),
598 |       clean=FLAGS.clean,
599 |       shutdown=FLAGS.shutdown)
600 | 
601 |   return config, bazel_clone_repo, project_clone_repo
602 | 
603 | 
604 | def main(argv):
605 |   _flag_checks()
606 | 
607 |   config, bazel_clone_repo, project_clone_repo = _get_benchmark_config_and_clone_repos(
608 |       argv)
609 | 
610 |   # A dictionary that maps a (bazel_commit, project_commit) tuple
611 |   # to its benchmarking result.
612 |   data = collections.OrderedDict()
613 |   csv_data = collections.OrderedDict()
614 |   data_directory = FLAGS.data_directory or DEFAULT_OUT_BASE_PATH
615 | 
616 |   # We use the start time as a unique identifier of this bazel-bench run.
617 |   bazel_bench_uid = datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S')
618 | 
619 |   bazel_bin_base_path = FLAGS.bazel_bin_dir or BAZEL_BINARY_BASE_PATH
620 | 
621 |   # Build the bazel binaries, if necessary.
622 |   for unit in config.get_units():
623 |     if 'bazel_binary' in unit:
624 |       unit['bazel_bin_path'] = unit['bazel_binary']
625 |     elif 'bazel_commit' in unit:
626 |       bazel_bin_path = _build_bazel_binary(unit['bazel_commit'],
627 |                                            bazel_clone_repo,
628 |                                            bazel_bin_base_path, FLAGS.platform)
629 |       unit['bazel_bin_path'] = bazel_bin_path
630 | 
631 |   for i, unit in enumerate(config.get_units()):
632 |     bazel_identifier = unit['bazel_commit'] if 'bazel_commit' in unit else unit['bazel_binary']
633 |     project_commit = unit['project_commit']
634 | 
635 |     project_clone_repo.git.checkout('-f', project_commit)
636 |     if unit['env_configure'] is not None:
637 |       _exec_command(
638 |           unit['env_configure'], shell=True, cwd=project_clone_repo.working_dir)
639 | 
640 |     results, args = _run_benchmark(
641 |         bazel_bin_path=unit['bazel_bin_path'],
642 |         project_path=project_clone_repo.working_dir,
643 |         runs=unit['runs'],
644 |         command=unit['command'],
645 |         options=unit['options'],
646 |         targets=unit['targets'],
647 |         startup_options=unit['startup_options'],
648 |         prefetch_ext_deps=FLAGS.prefetch_ext_deps,
649 |         bazel_bench_uid=bazel_bench_uid,
650 |         unit_num=i,
651 |         collect_profile=unit['collect_profile'],
652 |         data_directory=data_directory,
653 |         bazel_identifier=bazel_identifier,
654 |         project_commit=project_commit)
655 |     collected = {}
656 |     for benchmarking_result in results:
657 |       for metric, value in benchmarking_result.items():
658 |         if metric not in collected:
659 |           collected[metric] = Values()
660 |         collected[metric].add(value)
661 | 
662 |     data[(i, bazel_identifier, project_commit)] = collected
663 |     non_measurables = {
664 |       'project_source': unit['project_source'],
665 |       'platform': FLAGS.platform,
666 |       'project_label': FLAGS.project_label
667 |     }
668 |     csv_data[(bazel_identifier, project_commit)] = {
669 |         'results': results,
670 |         'args': args,
671 |         'non_measurables': non_measurables
672 |     }
673 | 
674 |   summary_text = create_summary(data, config.get_project_source())
675 |   print(summary_text)
676 | 
677 |   if FLAGS.data_directory:
678 |     csv_file_name = FLAGS.csv_file_name or '{}.csv'.format(bazel_bench_uid)
679 |     txt_file_name = csv_file_name.replace('.csv', '.txt')
680 | 
681 |     output_handling.export_csv(data_directory, csv_file_name, csv_data)
682 |     output_handling.export_file(data_directory, txt_file_name, summary_text)
683 | 
684 |     # This is mostly for the nightly benchmark.
685 |     if FLAGS.aggregate_json_profiles:
686 |       aggr_json_profiles_csv_path = (
687 |           '%s/%s' % (FLAGS.data_directory, DEFAULT_AGGR_JSON_PROFILE_FILENAME))
688 |       handle_json_profiles_aggr(
689 |           bazel_bench_uid=bazel_bench_uid,
690 |           unit_num=i,
691 |           bazel_commits=config.get_bazel_commits(),
692 |           project_source=config.get_project_source(),
693 |           project_commits=config.get_project_commits(),
694 |           runs=FLAGS.runs,
695 |           output_path=aggr_json_profiles_csv_path,
696 |           data_directory=FLAGS.data_directory,
697 |       )
698 | 
699 |   logger.log('Done.')
700 | 
701 | 
702 | if __name__ == '__main__':
703 |   app.run(main)
704 | 


--------------------------------------------------------------------------------
/benchmark_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 The Bazel Authors. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #    http:#www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Tests for the main benchmarking script."""
 15 | import mock
 16 | import sys
 17 | import benchmark
 18 | import six
 19 | 
 20 | from absl.testing import absltest
 21 | from absl.testing import flagsaver
 22 | from absl import flags
 23 | from testutils.fakes import fake_log, fake_exec_command, FakeBazel
 24 | 
 25 | # Setup custom fakes/mocks.
 26 | benchmark.logger.log = fake_log
 27 | benchmark._exec_command = fake_exec_command
 28 | benchmark.Bazel = FakeBazel
 29 | mock_stdio_type = six.StringIO
 30 | 
 31 | 
 32 | class BenchmarkFunctionTests(absltest.TestCase):
 33 | 
 34 |   @mock.patch.object(benchmark.os.path, 'exists', return_value=True)
 35 |   @mock.patch.object(benchmark.os, 'chdir')
 36 |   def test_setup_project_repo_exists(self, unused_chdir_mock,
 37 |                                      unused_exists_mock):
 38 |     with mock.patch.object(sys, 'stderr', new=mock_stdio_type()) as mock_stderr, \
 39 |       mock.patch('benchmark.git.Repo') as mock_repo_class:
 40 |       mock_repo = mock_repo_class.return_value
 41 |       benchmark._setup_project_repo('repo_path', 'project_source')
 42 | 
 43 |     mock_repo.git.fetch.assert_called_once_with('origin')
 44 |     self.assertEqual('Path repo_path exists. Updating...',
 45 |                      mock_stderr.getvalue())
 46 | 
 47 |   @mock.patch.object(benchmark.os.path, 'exists', return_value=False)
 48 |   @mock.patch.object(benchmark.os, 'chdir')
 49 |   def test_setup_project_repo_not_exists(self, unused_chdir_mock,
 50 |                                          unused_exists_mock):
 51 |     with mock.patch.object(sys, 'stderr', new=mock_stdio_type()) as mock_stderr, \
 52 |       mock.patch('benchmark.git.Repo') as mock_repo_class:
 53 |       benchmark._setup_project_repo('repo_path', 'project_source')
 54 | 
 55 |     mock_repo_class.clone_from.assert_called_once_with('project_source',
 56 |                                                        'repo_path')
 57 |     self.assertEqual('Cloning project_source to repo_path...',
 58 |                      mock_stderr.getvalue())
 59 | 
 60 |   def test_get_commits_topological(self):
 61 |     with mock.patch('benchmark.git.Repo') as mock_repo_class:
 62 |       mock_repo = mock_repo_class.return_value
 63 |       mock_A = mock.MagicMock()
 64 |       mock_A.hexsha = 'A'
 65 |       mock_B = mock.MagicMock()
 66 |       mock_B.hexsha = 'B'
 67 |       mock_C = mock.MagicMock()
 68 |       mock_C.hexsha = 'C'
 69 |       mock_repo.iter_commits.return_value = [mock_C, mock_B, mock_A]
 70 |       mock_repo.git.rev_parse.side_effect = lambda x: x
 71 |       result = benchmark._get_commits_topological(['B', 'A'], mock_repo,
 72 |                                                   'flag_name')
 73 | 
 74 |       self.assertEqual(['A', 'B'], result)
 75 | 
 76 |   def test_get_commits_topological_latest(self):
 77 |     with mock.patch.object(sys, 'stderr', new=mock_stdio_type()) as mock_stderr, \
 78 |       mock.patch('benchmark.git.Repo') as mock_repo_class:
 79 |       mock_repo = mock_repo_class.return_value
 80 |       mock_commit = mock.MagicMock()
 81 |       mock_repo.commit.return_value = mock_commit
 82 |       mock_commit.hexsha = 'A'
 83 |       result = benchmark._get_commits_topological(None, mock_repo,
 84 |                                                   'bazel_commits')
 85 | 
 86 |     self.assertEqual(['A'], result)
 87 |     self.assertEqual('No bazel_commits specified, using the latest one: A',
 88 |                      mock_stderr.getvalue())
 89 | 
 90 |   @mock.patch.object(benchmark.os.path, 'exists', return_value=True)
 91 |   @mock.patch.object(benchmark.os, 'makedirs')
 92 |   def test_build_bazel_binary_exists(self, unused_chdir_mock,
 93 |                                      unused_exists_mock):
 94 |     with mock.patch.object(sys, 'stderr', new=mock_stdio_type()) as mock_stderr:
 95 |       benchmark._build_bazel_binary('commit', 'repo_path', 'outroot')
 96 |     self.assertEqual('Binary exists at outroot/commit/bazel, reusing...',
 97 |                      mock_stderr.getvalue())
 98 | 
 99 |   @mock.patch.object(benchmark.os.path, 'exists', return_value=False)
100 |   @mock.patch.object(benchmark.os, 'makedirs')
101 |   @mock.patch.object(benchmark.os, 'chdir')
102 |   @mock.patch.object(benchmark.shutil, 'copyfile')
103 |   def test_build_bazel_binary_not_exists(self, unused_shutil_mock,
104 |                                          unused_chdir_mock,
105 |                                          unused_makedirs_mock,
106 |                                          unused_exists_mock):
107 |     with mock.patch.object(sys, 'stderr', new=mock_stdio_type()) as mock_stderr, \
108 |       mock.patch('benchmark.git.Repo') as mock_repo_class:
109 |       mock_repo = mock_repo_class.return_value
110 |       benchmark._build_bazel_binary('commit', mock_repo, 'outroot')
111 | 
112 |     mock_repo.git.checkout.assert_called_once_with('-f', 'commit')
113 |     self.assertEqual(
114 |         ''.join([
115 |             'Building Bazel binary at commit commit', 'bazel build //src:bazel',
116 |             'Copying bazel binary to outroot/commit/bazel',
117 |             'chmod +x outroot/commit/bazel'
118 |         ]), mock_stderr.getvalue())
119 | 
120 |   def test_single_run(self):
121 |     with mock.patch.object(sys, 'stderr', new=mock_stdio_type()) as mock_stderr:
122 |       benchmark._single_run(
123 |           'bazel_binary_path',
124 |           'build',
125 |           options=[],
126 |           targets=['//:all'],
127 |           startup_options=[])
128 | 
129 |     self.assertEqual(
130 |         ''.join([
131 |             'Executing Bazel command: bazel build --nostamp --noshow_progress --color=no //:all',
132 |             'Executing Bazel command: bazel clean --color=no',
133 |             'Executing Bazel command: bazel shutdown '
134 |         ]), mock_stderr.getvalue())
135 | 
136 |   @mock.patch.object(benchmark.os, 'chdir')
137 |   def test_run_benchmark_no_prefetch(self, _):
138 |     with mock.patch.object(sys, 'stderr', new=mock_stdio_type()) as mock_stderr:
139 |       benchmark._run_benchmark(
140 |           'bazel_binary_path',
141 |           'project_path',
142 |           runs=2,
143 |           bazel_bench_uid='fake_uid',
144 |           command='build',
145 |           options=[],
146 |           targets=['//:all'],
147 |           startup_options=[],
148 |           prefetch_ext_deps=False,
149 |           unit_num=0)
150 | 
151 |     self.assertEqual(
152 |         ''.join([
153 |             '=== BENCHMARKING BAZEL [Unit #0]: None, PROJECT: None ===',
154 |             'Starting benchmark run 1/2:',
155 |             'Executing Bazel command: bazel build --nostamp --noshow_progress --color=no //:all',
156 |             'Executing Bazel command: bazel clean --color=no',
157 |             'Executing Bazel command: bazel shutdown ',
158 |             'Starting benchmark run 2/2:',
159 |             'Executing Bazel command: bazel build --nostamp --noshow_progress --color=no //:all',
160 |             'Executing Bazel command: bazel clean --color=no',
161 |             'Executing Bazel command: bazel shutdown '
162 |         ]), mock_stderr.getvalue())
163 | 
164 |   @mock.patch.object(benchmark.os, 'chdir')
165 |   def test_run_benchmark_prefetch(self, _):
166 |     benchmark.DEFAULT_OUT_BASE_PATH = 'some_out_path'
167 |     with mock.patch.object(sys, 'stderr', new=mock_stdio_type()) as mock_stderr:
168 |       benchmark._run_benchmark(
169 |           'bazel_binary_path',
170 |           'project_path',
171 |           runs=2,
172 |           bazel_bench_uid='fake_uid',
173 |           command='build',
174 |           options=[],
175 |           targets=['//:all'],
176 |           startup_options=[],
177 |           prefetch_ext_deps=True,
178 |           unit_num=0)
179 | 
180 |     self.assertEqual(
181 |         ''.join([
182 |             '=== BENCHMARKING BAZEL [Unit #0]: None, PROJECT: None ===',
183 |             'Pre-fetching external dependencies...',
184 |             'Executing Bazel command: bazel build --nostamp --noshow_progress --color=no //:all',
185 |             'Executing Bazel command: bazel clean --color=no',
186 |             'Executing Bazel command: bazel shutdown ',
187 |             'Starting benchmark run 1/2:',
188 |             'Executing Bazel command: bazel build --nostamp --noshow_progress --color=no //:all',
189 |             'Executing Bazel command: bazel clean --color=no',
190 |             'Executing Bazel command: bazel shutdown ',
191 |             'Starting benchmark run 2/2:',
192 |             'Executing Bazel command: bazel build --nostamp --noshow_progress --color=no //:all',
193 |             'Executing Bazel command: bazel clean --color=no',
194 |             'Executing Bazel command: bazel shutdown '
195 |         ]), mock_stderr.getvalue())
196 | 
197 |   @mock.patch.object(benchmark.os, 'chdir')
198 |   def test_run_benchmark_collect_profile(self, _):
199 |     benchmark.DEFAULT_OUT_BASE_PATH = 'some_out_path'
200 |     with mock.patch.object(sys, 'stderr', new=mock_stdio_type()) as mock_stderr:
201 |       benchmark._run_benchmark(
202 |           'bazel_binary_path',
203 |           'project_path',
204 |           runs=2,
205 |           bazel_bench_uid='fake_uid',
206 |           command='build',
207 |           options=[],
208 |           targets=['//:all'],
209 |           startup_options=[],
210 |           prefetch_ext_deps=True,
211 |           collect_profile=True,
212 |           data_directory='fake_dir',
213 |           bazel_identifier='fake_bazel_commit',
214 |           project_commit='fake_project_commit',
215 |           unit_num=0)
216 | 
217 |     self.assertEqual(
218 |         ''.join([
219 |             '=== BENCHMARKING BAZEL [Unit #0]: fake_bazel_commit, PROJECT: fake_project_commit ===',
220 |             'Pre-fetching external dependencies...',
221 |             'Executing Bazel command: bazel build --nostamp --noshow_progress --color=no //:all',
222 |             'Executing Bazel command: bazel clean --color=no',
223 |             'Executing Bazel command: bazel shutdown ',
224 |             'Starting benchmark run 1/2:',
225 |             'Executing Bazel command: bazel build --experimental_generate_json_trace_profile --profile=fake_dir/fake_uid_fake_bazel_commit_0_fake_project_commit_1_of_2.profile.gz --nostamp --noshow_progress --color=no //:all',
226 |             'Executing Bazel command: bazel clean --color=no',
227 |             'Executing Bazel command: bazel shutdown ',
228 |             'Starting benchmark run 2/2:',
229 |             'Executing Bazel command: bazel build --experimental_generate_json_trace_profile --profile=fake_dir/fake_uid_fake_bazel_commit_0_fake_project_commit_2_of_2.profile.gz --nostamp --noshow_progress --color=no //:all',
230 |             'Executing Bazel command: bazel clean --color=no',
231 |             'Executing Bazel command: bazel shutdown '
232 |         ]), mock_stderr.getvalue())
233 | 
234 | 
235 | class BenchmarkFlagsTest(absltest.TestCase):
236 | 
237 |   @flagsaver.flagsaver
238 |   def test_project_source_present(self):
239 |     # This mirrors the requirement in benchmark.py
240 |     flags.mark_flag_as_required('project_source')
241 |     # Assert that the script fails when no project_source is specified
242 |     with mock.patch.object(
243 |         sys, 'stderr', new=mock_stdio_type()) as mock_stderr, self.assertRaises(
244 |             SystemExit) as context:
245 |       benchmark.app.run(benchmark.main)
246 |     self.assertIn(
247 |         ''.join([
248 |             'FATAL Flags parsing error: flag --project_source=None: ',
249 |             'Flag --project_source must have a value other than None.'
250 |         ]), mock_stderr.getvalue())
251 | 
252 |   @flagsaver.flagsaver(bazel_commits=['a', 'b'], project_commits=['c', 'd'])
253 |   def test_either_bazel_commits_project_commits_single_element(self):
254 |     with self.assertRaises(ValueError) as context:
255 |       benchmark._flag_checks()
256 |     value_err = context.exception
257 |     self.assertEqual(
258 |         str(value_err),
259 |         'Either --bazel_commits or --project_commits should be a single element.'
260 |     )
261 | 
262 |   @flagsaver.flagsaver(clean=False)
263 |   def test_single_run_skip_clean(self):
264 |     with mock.patch.object(sys, 'stderr', new=mock_stdio_type()) as mock_stderr:
265 |       benchmark._single_run(
266 |           'bazel_binary_path',
267 |           'build',
268 |           options=[],
269 |           targets=['//:all'],
270 |           startup_options=[])
271 | 
272 |     self.assertEqual(
273 |         ''.join([
274 |             'Executing Bazel command: bazel build --nostamp --noshow_progress --color=no //:all',
275 |             'Executing Bazel command: bazel shutdown '
276 |         ]), mock_stderr.getvalue())
277 | 
278 |   @flagsaver.flagsaver(shutdown=False)
279 |   def test_single_run_skip_shutdown(self):
280 |     with mock.patch.object(sys, 'stderr', new=mock_stdio_type()) as mock_stderr:
281 |       benchmark._single_run(
282 |           'bazel_binary_path',
283 |           'build',
284 |           options=[],
285 |           targets=['//:all'],
286 |           startup_options=[])
287 | 
288 |     self.assertEqual(
289 |         ''.join([
290 |             'Executing Bazel command: bazel build --nostamp --noshow_progress --color=no //:all',
291 |             'Executing Bazel command: bazel clean --color=no'
292 |         ]), mock_stderr.getvalue())
293 | 
294 | 
295 | if __name__ == '__main__':
296 |   absltest.main()
297 | 


--------------------------------------------------------------------------------
/report/BUILD:
--------------------------------------------------------------------------------
 1 | load("@rules_python//python:defs.bzl", "py_binary")
 2 | load("@third_party//:requirements.bzl", "requirement")
 3 | 
 4 | package(default_visibility = ["//visibility:public"])
 5 | 
 6 | py_binary(
 7 |     name = "generate_report",
 8 |     srcs = ["generate_report.py"],
 9 |     deps = [
10 |         # This is a workaround for https://github.com/bazelbuild/rules_python/issues/14,
11 |         # google-cloud-bigquery must be listed first.
12 |         requirement("google-cloud-bigquery"),
13 |         requirement("cachetools"),
14 |         requirement("google-api-core"),
15 |         requirement("google-auth"),
16 |         requirement("google-cloud-core"),
17 |         requirement("google-resumable-media"),
18 |         requirement("googleapis-common-protos"),
19 |         requirement("protobuf"),
20 |         requirement("pytz"),
21 |         requirement("requests"),
22 |     ],
23 | )
24 | 
25 | py_binary(
26 |     name = "generate_master_report",
27 |     srcs = ["generate_master_report.py"],
28 |     deps = [
29 |         # This is a workaround for https://github.com/bazelbuild/rules_python/issues/14,
30 |         # google-cloud-bigquery must be listed first.
31 |         requirement("google-cloud-bigquery"),
32 |         requirement("cachetools"),
33 |         requirement("google-api-core"),
34 |         requirement("google-auth"),
35 |         requirement("google-cloud-core"),
36 |         requirement("google-resumable-media"),
37 |         requirement("googleapis-common-protos"),
38 |         requirement("protobuf"),
39 |         requirement("pytz"),
40 |         requirement("requests"),
41 |     ],
42 | )
43 | 


--------------------------------------------------------------------------------
/report/generate_master_report.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | #
  3 | # Copyright 2019 The Bazel Authors. All rights reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #    http:#www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | """Generates a daily HTML report for the projects.
 17 | 
 18 | The steps:
 19 |   1. Get the necessary data from Storage for projects/date.
 20 |   2. Manipulate the data to a format suitable for graphs.
 21 |   3. Generate a HTML report containing the graphs.
 22 |   4. Upload the generated HTMLs to GCP Storage.
 23 | """
 24 | import argparse
 25 | import collections
 26 | import csv
 27 | import datetime
 28 | import json
 29 | import io
 30 | import os
 31 | import statistics
 32 | import subprocess
 33 | import sys
 34 | import tempfile
 35 | import urllib.request
 36 | from google.cloud import bigquery
 37 | 
 38 | TMP = tempfile.gettempdir()
 39 | REPORTS_DIRECTORY = os.path.join(TMP, ".bazel_bench", "reports")
 40 | PLATFORMS = ["macos", "ubuntu1804"]
 41 | PROJECT_SOURCE_TO_NAME = {
 42 |     "https://github.com/bazelbuild/bazel.git": "bazel",
 43 |     "https://github.com/tensorflow/tensorflow.git": "tensorflow"
 44 | }
 45 | 
 46 | 
 47 | def _upload_to_storage(src_file_path, storage_bucket, destination_dir):
 48 |   """Uploads the file from src_file_path to the specified location on Storage."""
 49 |   args = [
 50 |       "gsutil", "cp", src_file_path,
 51 |       "gs://{}/{}".format(storage_bucket, destination_dir)
 52 |   ]
 53 |   subprocess.run(args)
 54 | 
 55 | 
 56 | def _get_storage_url(storage_bucket, dated_subdir):
 57 |   # In this case, the storage_bucket is a Domain-named bucket.
 58 |   # https://cloud.google.com/storage/docs/domain-name-verification
 59 |   return "https://{}/{}".format(storage_bucket, dated_subdir)
 60 | 
 61 | 
 62 | def _short_hash(commit):
 63 |   return commit[:7]
 64 | 
 65 | 
 66 | def _row_component(content):
 67 |   return """
 68 | <div class="row">{content}</div>
 69 | """.format(content=content)
 70 | 
 71 | 
 72 | def _col_component(col_class, content):
 73 |   return """
 74 | <div class="{col_class}">{content}</div>
 75 | """.format(
 76 |     col_class=col_class, content=content)
 77 | 
 78 | 
 79 | def _historical_graph(metric, metric_label, data, platform):
 80 |   """Returns the HTML <div> component of a single graph."""
 81 |   title = "[{}] Historical values of {}".format(platform, metric_label)
 82 |   hAxis = "Date (commmit)"
 83 |   vAxis = metric_label
 84 |   chart_id = "{}-{}-time".format(platform, metric)
 85 | 
 86 |   return """
 87 | <script type="text/javascript">
 88 |   google.charts.setOnLoadCallback(drawChart);
 89 |   function drawChart() {{
 90 |     var rawDataFromScript = {data}
 91 |     for (var i = 0; i < rawDataFromScript.length; i++) {{
 92 |       for (var j = 0; j < rawDataFromScript[i].length; j++) {{
 93 |         if (rawDataFromScript[i][j] === "null") {{
 94 |           rawDataFromScript[i][j] = null
 95 |         }}
 96 |       }}
 97 |     }}
 98 |     var data = google.visualization.arrayToDataTable(rawDataFromScript)
 99 | 
100 |     var options = {{
101 |       title: "{title}",
102 |       titleTextStyle: {{ color: "gray" }},
103 |       hAxis: {{
104 |         title: "{hAxis}",
105 |         titleTextStyle: {{ color: "darkgray" }},
106 |         textStyle: {{ color: "darkgray", fontSize: 10 }},
107 |       }},
108 |       vAxis: {{
109 |         title: "{vAxis}",
110 |         titleTextStyle: {{ color: "darkgray" }},
111 |         textStyle: {{ color: "darkgray" }},
112 |       }},
113 |       axes: {{
114 |         y: {{
115 |           wall: {{ label: "{metric_label}"}},
116 |         }}
117 |       }},
118 |       intervals: {{ 'style':'area' }},
119 |       legend: {{ position: "right" }},
120 |     }};
121 |     var chart = new google.visualization.LineChart(document.getElementById("{chart_id}"));
122 |     chart.draw(data, options);
123 |   }}
124 |   </script>
125 | <div id="{chart_id}" style="min-height: 400px"></div>
126 | """.format(
127 |     title=title,
128 |     data=data,
129 |     hAxis=hAxis,
130 |     vAxis=vAxis,
131 |     chart_id=chart_id,
132 |     metric_label=metric_label)
133 | 
134 | 
135 | def _full_report(date, graph_components, project_reports_components):
136 |   """Returns the full HTML of a complete report, from the graph components."""
137 |   return """
138 | <html>
139 |   <head>
140 |     <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
141 |     <script type="text/javascript">
142 |       google.charts.load("current", {{ packages:["corechart"] }});
143 |     </script>
144 |     <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css" integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous">
145 |     <script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script>
146 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js" integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1" crossorigin="anonymous"></script>
147 |     <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js" integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM" crossorigin="anonymous"></script>
148 |     <!-- For the datepicker. -->
149 |     <script src="https://unpkg.com/gijgo@1.9.13/js/gijgo.min.js" type="text/javascript"></script>
150 |     <link href="https://unpkg.com/gijgo@1.9.13/css/gijgo.min.css" rel="stylesheet" type="text/css" />
151 | 
152 |     <style>
153 |       h1 {{ font-size: 1.7rem; color: darkslategrey; }}
154 |       h2 {{ font-size: 1.3rem; color: gray; }}
155 |       h2.underlined {{ border-bottom: 2px dotted lightgray; }}
156 |       body {{ font-family: monospace; padding: 1% 3% 1% 3%; font-size:1.1rem; }}
157 |     </style>
158 |     <link rel="icon" type="image/png" href="https://raw.githubusercontent.com/bazelbuild/bazel-bench/master/bb-icon.png">
159 |     <title>[{date}] Master Report</title>
160 |   </head>
161 |   <body>
162 |     <div class="container-fluid">
163 |       <div class="row">
164 |         <div class="col-sm-12">
165 |           <h1>Report for {date}</h1>
166 |           <p>Generated with https://github.com/bazelbuild/bazel-bench at {gentime}</p>
167 |         </div>
168 |       </div>
169 |       <div class="row">
170 |         <div class="col-sm-12">
171 |           {reports}
172 |         </div>
173 |       </div>
174 | 
175 |       <div class="row">
176 |         <div class="col-sm-3 input-group">
177 |           <span><input id="datePicker" width="150px"/></span>
178 |           <span><button id="viewReportButton" type="button" class="btn btn-sm btn-link">View Past Report</button><i>(Date & time are in UTC.)</i></span>
179 |         </div>
180 |         <script>
181 |           // latestReportDate is always yesterday.
182 |           var latestReportDate = new Date();
183 |           latestReportDate.setDate(latestReportDate.getDate() - 1)
184 | 
185 |           var $datePicker = $('#datePicker').datepicker({{
186 |               uiLibrary: 'bootstrap4',
187 |               size: 'small',
188 |               format: 'yyyy/mm/dd',
189 |               value: '{date}',
190 |               disableDates: function (date) {{
191 |                 return date < latestReportDate;
192 |               }}
193 |           }});
194 |           
195 |           $('#viewReportButton').on('click', function () {{
196 |             var dateSubdir = $datePicker.value();
197 |             var url = `https://perf.bazel.build/all/${{dateSubdir}}/report.html`;
198 |             window.open(url, '_blank');
199 |           }});
200 |         </script>
201 |       </div>
202 |       <br>
203 | 
204 |       {graphs}
205 |     </div>
206 |   </body>
207 | </html>
208 | """.format(
209 |     gentime=datetime.datetime.now(),
210 |     date=date.strftime("%Y/%m/%d"),
211 |     graphs=graph_components,
212 |     reports=project_reports_components)
213 | 
214 | 
215 | def _query_bq(bq_project, bq_table, date_cutoff, platform):
216 |   bq_client = bigquery.Client(project=bq_project)
217 |   query = """
218 | SELECT
219 |   MIN(wall) as min_wall,
220 |   APPROX_QUANTILES(wall, 101)[OFFSET(50)] AS median_wall,
221 |   MAX(wall) as max_wall,
222 |   MIN(memory) as min_memory,
223 |   APPROX_QUANTILES(memory, 101)[OFFSET(50)] AS median_memory,
224 |   MAX(memory) as max_memory,
225 |   bazel_commit,
226 |   DATE(MIN(started_at)) as report_date,
227 |   project_label
228 | FROM (
229 |   SELECT wall, memory, started_at, t1.bazel_commit, project_label FROM `{bq_project}.{bq_table}` t1
230 |   JOIN (
231 |     SELECT DISTINCT bazel_commit, started_at_date
232 |     FROM (
233 |       SELECT bazel_commit, DATE(started_at) started_at_date,
234 |             RANK() OVER (PARTITION BY project_commit
235 |                              ORDER BY started_at DESC
236 |                         ) AS `Rank`
237 |         FROM `{bq_project}.{bq_table}`
238 |         WHERE DATE(started_at) <= "{date_cutoff}"
239 |         AND platform = "{platform}"
240 |         AND exit_status = 0       
241 |     )
242 |     WHERE Rank=1
243 |     ORDER BY started_at_date DESC
244 |     LIMIT 10
245 |   ) t2
246 |   ON t1.bazel_commit = t2.bazel_commit
247 |   WHERE platform = "{platform}"
248 |   AND exit_status = 0 
249 | )
250 | GROUP BY bazel_commit, project_label
251 | ORDER BY report_date, project_label ASC;
252 | """.format(
253 |     bq_project=bq_project,
254 |     bq_table=bq_table,
255 |     date_cutoff=date_cutoff,
256 |     platform=platform)
257 | 
258 |   return bq_client.query(query)
259 | 
260 | 
261 | # TODO(leba): Normalize data between projects.
262 | def _prepare_time_series_data(raw_data):
263 |   """Massage the data to fit a format suitable for graph generation."""
264 |   headers = ["Date"]
265 |   project_to_pos = {}
266 |   date_to_wall = {}
267 |   date_to_mem = {}
268 | 
269 |   # First pass to gather the projects and form the headers.
270 |   for row in raw_data:
271 |     if row.project_label not in project_to_pos:
272 |       project_to_pos[row.project_label] = len(project_to_pos)
273 |       headers.extend(
274 |           [row.project_label, {
275 |               "role": "interval"
276 |           }, {
277 |               "role": "interval"
278 |           }])
279 | 
280 |   for row in raw_data:
281 |     if row.report_date not in date_to_wall:
282 |       # Commits on day X are benchmarked on day X + 1.
283 |       date_str = "{} ({})".format(
284 |           (row.report_date - datetime.timedelta(days=1)).strftime("%Y-%m-%d"),
285 |           _short_hash(row.bazel_commit))
286 | 
287 |       date_to_wall[row.report_date] = ["null"] * len(headers)
288 |       date_to_mem[row.report_date] = ["null"] * len(headers)
289 | 
290 |       date_to_wall[row.report_date][0] = date_str
291 |       date_to_mem[row.report_date][0] = date_str
292 | 
293 |     base_pos = project_to_pos[row.project_label] * 3
294 |     date_to_wall[row.report_date][base_pos + 1] = row.median_wall
295 |     date_to_wall[row.report_date][base_pos + 2] = row.min_wall
296 |     date_to_wall[row.report_date][base_pos + 3] = row.max_wall
297 |     date_to_mem[row.report_date][base_pos + 1] = row.median_memory
298 |     date_to_mem[row.report_date][base_pos + 2] = row.min_memory
299 |     date_to_mem[row.report_date][base_pos + 3] = row.max_memory
300 | 
301 |   return [headers] + list(date_to_wall.values()), [headers] + list(
302 |       date_to_mem.values()), project_to_pos.keys()
303 | 
304 | 
305 | def _project_reports_components(date, projects):
306 |   links = " - ".join([
307 |       '<a href="https://perf.bazel.build/{project_label}/{date_subdir}/report.html">{project_label}</a>'
308 |       .format(date_subdir=date.strftime("%Y/%m/%d"), project_label=label)
309 |       for label in projects
310 |   ])
311 |   return "<p><b>Individual Project Reports:</b> {}</p>".format(links)
312 | 
313 | 
314 | def _generate_report_for_date(date, storage_bucket, report_name, upload_report,
315 |                               bq_project, bq_table):
316 |   """Generates a html report for the specified date & project.
317 | 
318 |   Args:
319 |     date: the date to generate report for.
320 |     storage_bucket: the Storage bucket to fetch data from/upload the report to.
321 |     report_name: the name of the report on GS.
322 |     upload_report: whether to upload the report to GCS.
323 |     bq_project: the BigQuery project.
324 |     bq_table: the BigQuery table.
325 |   """
326 |   bq_date_cutoff = (date + datetime.timedelta(days=1)).strftime("%Y-%m-%d")
327 | 
328 |   graph_components = []
329 |   projects = set()
330 | 
331 |   for platform in PLATFORMS:
332 | 
333 |     historical_wall_data, historical_mem_data, platform_projects = _prepare_time_series_data(
334 |         _query_bq(bq_project, bq_table, bq_date_cutoff, platform))
335 | 
336 |     projects = projects.union(set(platform_projects))
337 |     # Generate a graph for that platform.
338 |     row_content = []
339 | 
340 |     row_content.append(
341 |         _col_component(
342 |             "col-sm-6",
343 |             _historical_graph(
344 |                 metric="wall",
345 |                 metric_label="Wall Time (s)",
346 |                 data=historical_wall_data,
347 |                 platform=platform,
348 |             )))
349 | 
350 |     row_content.append(
351 |         _col_component(
352 |             "col-sm-6",
353 |             _historical_graph(
354 |                 metric="memory",
355 |                 metric_label="Memory (MB)",
356 |                 data=historical_mem_data,
357 |                 platform=platform,
358 |             )))
359 | 
360 |     graph_components.append(_row_component("\n".join(row_content)))
361 | 
362 |   content = _full_report(
363 |       date,
364 |       graph_components="\n".join(graph_components),
365 |       project_reports_components=_project_reports_components(date, projects))
366 | 
367 |   if not os.path.exists(REPORTS_DIRECTORY):
368 |     os.makedirs(REPORTS_DIRECTORY)
369 | 
370 |   report_tmp_file = "{}/report_master_{}.html".format(REPORTS_DIRECTORY,
371 |                                                       date.strftime("%Y%m%d"))
372 |   with open(report_tmp_file, "w") as fo:
373 |     fo.write(content)
374 | 
375 |   if upload_report:
376 |     _upload_to_storage(
377 |         report_tmp_file, storage_bucket,
378 |         "all/{}/{}.html".format(date.strftime("%Y/%m/%d"), report_name))
379 |   else:
380 |     print(content)
381 | 
382 | 
383 | def main(args=None):
384 |   if args is None:
385 |     args = sys.argv[1:]
386 | 
387 |   parser = argparse.ArgumentParser(
388 |       description="Bazel Bench Daily Master Report")
389 |   parser.add_argument("--date", type=str, help="Date in YYYY-mm-dd format.")
390 |   parser.add_argument(
391 |       "--storage_bucket",
392 |       help="The GCP Storage bucket to fetch benchmark data from/upload the reports to."
393 |   )
394 |   parser.add_argument(
395 |       "--upload_report",
396 |       type=bool,
397 |       default=False,
398 |       help="Whether to upload the report.")
399 |   parser.add_argument(
400 |       "--bigquery_table",
401 |       help="The BigQuery table to fetch data from. In the format: project:table_identifier."
402 |   )
403 |   parser.add_argument(
404 |       "--report_name",
405 |       type=str,
406 |       help="The name of the generated report.",
407 |       default="report")
408 |   parsed_args = parser.parse_args(args)
409 | 
410 |   date = (
411 |       datetime.datetime.strptime(parsed_args.date, "%Y-%m-%d").date()
412 |       if parsed_args.date else datetime.date.today())
413 | 
414 |   bq_project, bq_table = parsed_args.bigquery_table.split(":")
415 |   _generate_report_for_date(date, parsed_args.storage_bucket,
416 |                             parsed_args.report_name, parsed_args.upload_report,
417 |                             bq_project, bq_table)
418 | 
419 | 
420 | if __name__ == "__main__":
421 |   sys.exit(main())
422 | 


--------------------------------------------------------------------------------
/report/generate_report.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | #
  3 | # Copyright 2019 The Bazel Authors. All rights reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #    http:#www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | """Generates a daily HTML report for the projects.
 17 | 
 18 | The steps:
 19 |   1. Get the necessary data from Storage for projects/date.
 20 |   2. Manipulate the data to a format suitable for graphs.
 21 |   3. Generate a HTML report containing the graphs.
 22 |   4. Upload the generated HTMLs to GCP Storage.
 23 | """
 24 | import argparse
 25 | import collections
 26 | import csv
 27 | import datetime
 28 | import json
 29 | import io
 30 | import os
 31 | import statistics
 32 | import subprocess
 33 | import sys
 34 | import tempfile
 35 | import urllib.request
 36 | from google.cloud import bigquery
 37 | 
 38 | TMP = tempfile.gettempdir()
 39 | REPORTS_DIRECTORY = os.path.join(TMP, ".bazel_bench", "reports")
 40 | EVENTS_ORDER = [
 41 |     "Launch Blaze",
 42 |     "Initialize command",
 43 |     "Load packages",
 44 |     "Analyze dependencies",
 45 |     "Analyze licenses",
 46 |     "Prepare for build",
 47 |     "Build artifacts",
 48 |     "Complete build",
 49 | ]
 50 | 
 51 | 
 52 | def _upload_to_storage(src_file_path, storage_bucket, destination_dir):
 53 |   """Uploads the file from src_file_path to the specified location on Storage."""
 54 |   args = [
 55 |       "gsutil", "cp", src_file_path,
 56 |       "gs://{}/{}".format(storage_bucket, destination_dir)
 57 |   ]
 58 |   subprocess.run(args)
 59 | 
 60 | 
 61 | def _load_csv_from_remote_file(http_url):
 62 |   with urllib.request.urlopen(http_url) as resp:
 63 |     reader = csv.DictReader(io.TextIOWrapper(resp))
 64 |     return [row for row in reader]
 65 | 
 66 | 
 67 | def _load_json_from_remote_file(http_url):
 68 |   with urllib.request.urlopen(http_url) as resp:
 69 |     data = resp.read()
 70 |     encoding = resp.info().get_content_charset("utf-8")
 71 |     return json.loads(data.decode(encoding))
 72 | 
 73 | 
 74 | def _load_txt_from_remote_file(http_url):
 75 |   with urllib.request.urlopen(http_url) as resp:
 76 |     return resp.read().decode(resp.headers.get_content_charset() or "utf-8")
 77 | 
 78 | 
 79 | def _get_storage_url(storage_bucket, dated_subdir):
 80 |   # In this case, the storage_bucket is a Domain-named bucket.
 81 |   # https://cloud.google.com/storage/docs/domain-name-verification
 82 |   return "https://{}/{}".format(storage_bucket, dated_subdir)
 83 | 
 84 | 
 85 | def _get_dated_subdir_for_project(project, date):
 86 |   return "{}/{}".format(project, date.strftime("%Y/%m/%d"))
 87 | 
 88 | 
 89 | def _get_bazel_github_a_component(commit):
 90 |   return '<a href="{}">{}</a>'.format(
 91 |       "https://github.com/bazelbuild/bazel/commit/" + commit, commit)
 92 | 
 93 | 
 94 | def _get_file_list_from_gs(bucket_name, gs_subdir):
 95 |   args = ["gsutil", "ls", "gs://{}/{}".format(bucket_name, gs_subdir)]
 96 |   command_output = subprocess.check_output(args)
 97 |   # The last element is just an empty string.
 98 |   decoded = command_output.decode("utf-8").split("\n")[:-1]
 99 | 
100 |   return [line.strip("'").replace("gs://", "https://") for line in decoded]
101 | 
102 | 
103 | def _get_file_list_component(bucket_name, dated_subdir, platform):
104 |   gs_subdir = "{}/{}".format(dated_subdir, platform)
105 |   links = _get_file_list_from_gs(bucket_name, gs_subdir)
106 |   li_components = [
107 |       '<li><a href="{}">{}</a></li>'.format(link, os.path.basename(link))
108 |       for link in links
109 |   ]
110 |   return """
111 | <div class="collapse" id="raw_files_{}">
112 | <ul>{}</ul>
113 | </div>
114 | """.format(platform, "\n".join(li_components))
115 | 
116 | 
117 | def _get_proportion_breakdown(aggr_json_profile):
118 |   bazel_commit_to_phases = {}
119 |   for entry in aggr_json_profile:
120 |     bazel_commit = entry["bazel_source"]
121 |     if bazel_commit not in bazel_commit_to_phases:
122 |       bazel_commit_to_phases[bazel_commit] = []
123 |     bazel_commit_to_phases[bazel_commit].append({
124 |         "name": entry["name"],
125 |         "dur": entry["dur"]
126 |     })
127 | 
128 |   bazel_commit_to_phase_proportion = {}
129 |   for bazel_commit in bazel_commit_to_phases.keys():
130 |     total_time = sum(
131 |         [float(entry["dur"]) for entry in bazel_commit_to_phases[bazel_commit]])
132 |     bazel_commit_to_phase_proportion[bazel_commit] = {
133 |         entry["name"]: float(entry["dur"]) / total_time
134 |         for entry in bazel_commit_to_phases[bazel_commit]
135 |     }
136 | 
137 |   return bazel_commit_to_phase_proportion
138 | 
139 | 
140 | def _fit_data_to_phase_proportion(reading, proportion_breakdown):
141 |   result = []
142 |   for phase in EVENTS_ORDER:
143 |     if phase not in proportion_breakdown:
144 |       result.append(0)
145 |     else:
146 |       result.append(reading * proportion_breakdown[phase])
147 |   return result
148 | 
149 | 
150 | def _short_form(commit):
151 |   return commit[:7]
152 | 
153 | 
154 | def _prepare_data_for_graph(performance_data, aggr_json_profile):
155 |   """Massage the data to fit a format suitable for graph generation."""
156 |   bazel_commit_to_phase_proportion = _get_proportion_breakdown(
157 |       aggr_json_profile)
158 |   ordered_commit_to_readings = collections.OrderedDict()
159 |   for entry in performance_data:
160 |     # Exclude measurements from failed runs in the graphs.
161 |     # TODO(leba): Print the summary table, which includes info on which runs
162 |     # failed.
163 |     if entry["exit_status"] != "0":
164 |       continue
165 | 
166 |     bazel_commit = entry["bazel_commit"]
167 |     if bazel_commit not in ordered_commit_to_readings:
168 |       ordered_commit_to_readings[bazel_commit] = {
169 |           "bazel_commit": bazel_commit,
170 |           "wall_readings": [],
171 |           "memory_readings": [],
172 |       }
173 |     ordered_commit_to_readings[bazel_commit]["wall_readings"].append(
174 |         float(entry["wall"]))
175 |     ordered_commit_to_readings[bazel_commit]["memory_readings"].append(
176 |         float(entry["memory"]))
177 | 
178 |   wall_data = [
179 |       ["Bazel Commit"] + EVENTS_ORDER +
180 |       ["Median [Min, Max]", {
181 |           "role": "interval"
182 |       }, {
183 |           "role": "interval"
184 |       }]
185 |   ]
186 |   memory_data = [[
187 |       "Bazel Commit", "Memory (MB)", {
188 |           "role": "interval"
189 |       }, {
190 |           "role": "interval"
191 |       }
192 |   ]]
193 | 
194 |   for obj in ordered_commit_to_readings.values():
195 |     commit = _short_form(obj["bazel_commit"])
196 | 
197 |     median_wall = statistics.median(obj["wall_readings"])
198 |     min_wall = min(obj["wall_readings"])
199 |     max_wall = max(obj["wall_readings"])
200 |     wall_data.append([commit] + _fit_data_to_phase_proportion(
201 |         median_wall, bazel_commit_to_phase_proportion[bazel_commit]) +
202 |                      [median_wall, min_wall, max_wall])
203 | 
204 |     median_memory = statistics.median(obj["memory_readings"])
205 |     min_memory = min(obj["memory_readings"])
206 |     max_memory = max(obj["memory_readings"])
207 |     memory_data.append([commit, median_memory, min_memory, max_memory])
208 | 
209 |   return wall_data, memory_data
210 | 
211 | 
212 | def _uncollapse_button(element_id, text):
213 |   return """
214 | <button class="btn btn-outline-primary btn-sm" type="button" data-toggle="collapse"
215 |         data-target="#{element_id}" aria-expanded="false"
216 |         aria-controls="{element_id}" style="margin-bottom: 5px;">
217 | {text}
218 | </button>
219 | """.format(
220 |     element_id=element_id, text=text)
221 | 
222 | 
223 | def _row_component(content):
224 |   return """
225 | <div class="row">{content}</div>
226 | """.format(content=content)
227 | 
228 | 
229 | def _col_component(col_class, content):
230 |   return """
231 | <div class="{col_class}">{content}</div>
232 | """.format(
233 |     col_class=col_class, content=content)
234 | 
235 | 
236 | def _commits_component(full_list, benchmarked_list):
237 |   li_components = []
238 |   for commit in full_list:
239 |     if commit in benchmarked_list:
240 |       li_components.append("<li><b>{}</b></li>".format(
241 |           _get_bazel_github_a_component(commit)))
242 |     else:
243 |       li_components.append("<li>{}</li>".format(
244 |           _get_bazel_github_a_component(commit)))
245 |   return """
246 | <div class="collapse" id="commits">
247 | <b>Commits:</b>
248 | <ul>
249 |   {}
250 | </ul>
251 | </div>
252 | <br>
253 | """.format("\n".join(li_components))
254 | 
255 | 
256 | def _single_graph(metric, metric_label, data, platform, median_series=None):
257 |   """Returns the HTML <div> component of a single graph."""
258 |   title = "[{}] Bar Chart of {} vs Bazel commits".format(platform, metric_label)
259 |   hAxis = "Bazel Commits (chronological order)"
260 |   vAxis = metric_label
261 |   chart_id = "{}-{}".format(platform, metric)
262 | 
263 |   return """
264 | <script type="text/javascript">
265 |   google.charts.setOnLoadCallback(drawChart);
266 |   function drawChart() {{
267 |     var data = google.visualization.arrayToDataTable({data})
268 | 
269 |     var options = {{
270 |       title: "{title}",
271 |       titleTextStyle: {{ color: "gray" }},
272 |       hAxis: {{
273 |         title: "{hAxis}",
274 |         titleTextStyle: {{ color: "darkgray" }},
275 |         textStyle: {{ color: "darkgray", fontSize: 10 }},
276 |         minValue: 0,
277 |       }},
278 |       vAxis: {{
279 |         title: "{vAxis}",
280 |         titleTextStyle: {{ color: "darkgray" }},
281 |         textStyle: {{ color: "darkgray" }},
282 |       }},
283 |       isStacked: true,
284 |       seriesType: "bars",
285 |       focusTarget: 'category',
286 |       series: {{
287 |         {median_series}: {{
288 |           type: "line",
289 |           lineWidth: 0.00001, // A very small number.
290 |         }},
291 |       }},
292 |       legend: {{ position: "right" }},
293 |     }};
294 |     var chart = new google.visualization.ComboChart(document.getElementById("{chart_id}"));
295 |     chart.draw(data, options);
296 |   }}
297 | </script>
298 | <div id="{chart_id}" style="min-height: 400px"></div>
299 | """.format(
300 |     title=title,
301 |     data=data,
302 |     hAxis=hAxis,
303 |     vAxis=vAxis,
304 |     chart_id=chart_id,
305 |     median_series=median_series)
306 | 
307 | 
308 | def _historical_graph(metric, metric_label, data, platform, color):
309 |   """Returns the HTML <div> component of a single graph."""
310 |   title = "[{}] Historical values of {}".format(platform, metric_label)
311 |   hAxis = "Date (commmit)"
312 |   vAxis = metric_label
313 |   chart_id = "{}-{}-time".format(platform, metric)
314 | 
315 |   # Set viewWindow margins.
316 |   minVal = sys.maxsize
317 |   maxVal = 0
318 |   for row in data[1:]:
319 |     minVal = min(minVal, row[2])
320 |     maxVal = max(maxVal, row[3])
321 |   viewWindowMin = minVal * 0.95
322 |   viewWindowMax = maxVal * 1.05
323 | 
324 |   return """
325 | <script type="text/javascript">
326 |   google.charts.setOnLoadCallback(drawChart);
327 |   function drawChart() {{
328 |     var data = google.visualization.arrayToDataTable({data})
329 | 
330 |     var options = {{
331 |       title: "{title}",
332 |       titleTextStyle: {{ color: "gray" }},
333 |       hAxis: {{
334 |         title: "{hAxis}",
335 |         titleTextStyle: {{ color: "darkgray" }},
336 |         textStyle: {{ color: "darkgray", fontSize: 10 }},
337 |       }},
338 |       vAxis: {{
339 |         title: "{vAxis}",
340 |         titleTextStyle: {{ color: "darkgray" }},
341 |         textStyle: {{ color: "darkgray" }},
342 |         viewWindow: {{
343 |           min: {viewWindowMin},
344 |           max: {viewWindowMax},
345 |         }}
346 |       }},
347 |       series: {{
348 |         0: {{ axis: 'wall', color: "{color}"}},
349 |       }},
350 |       axes: {{
351 |         y: {{
352 |           wall: {{ label: 'Wall Time (s)' }},
353 |         }}
354 |       }},
355 |       intervals: {{ 'style':'area' }},
356 |       legend: {{ position: "right" }},
357 |     }};
358 |     var chart = new google.visualization.LineChart(document.getElementById("{chart_id}"));
359 |     chart.draw(data, options);
360 |   }}
361 |   </script>
362 | <div id="{chart_id}" style="min-height: 400px"></div>
363 | """.format(
364 |     title=title,
365 |     data=data,
366 |     hAxis=hAxis,
367 |     vAxis=vAxis,
368 |     chart_id=chart_id,
369 |     viewWindowMin=viewWindowMin,
370 |     viewWindowMax=viewWindowMax,
371 |     color=color)
372 | 
373 | 
374 | def _summary_table(content, platform):
375 |   """Returns the HTML <div> component of the summary table."""
376 |   return """
377 | <pre class="collapse" id="summary-{platform}"  style="font-size:0.75em; color:gray">{content}</pre>
378 | <br>
379 | """.format(
380 |     platform=platform, content=content)
381 | 
382 | 
383 | def _full_report(project, project_source, date, command, graph_components,
384 |                  raw_files_components):
385 |   """Returns the full HTML of a complete report, from the graph components."""
386 |   return """
387 | <html>
388 |   <head>
389 |     <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
390 |     <script type="text/javascript">
391 |       google.charts.load("current", {{ packages:["corechart"] }});
392 |     </script>
393 |     <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css" integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous">
394 |     <script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script>
395 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js" integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1" crossorigin="anonymous"></script>
396 |     <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js" integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM" crossorigin="anonymous"></script>
397 |     <script src="https://unpkg.com/gijgo@1.9.13/js/gijgo.min.js" type="text/javascript"></script>
398 |     <link href="https://unpkg.com/gijgo@1.9.13/css/gijgo.min.css" rel="stylesheet" type="text/css" />
399 | 
400 |     <style>
401 |       h1 {{ font-size: 1.7rem; color: darkslategrey; }}
402 |       h2 {{ font-size: 1.3rem; color: gray; }}
403 |       h2.underlined {{ border-bottom: 2px dotted lightgray; }}
404 |       body {{ font-family: monospace; padding: 1% 3% 1% 3%; font-size:1.1rem; }}
405 |     </style>
406 |     <link rel="icon" type="image/png" href="https://raw.githubusercontent.com/bazelbuild/bazel-bench/master/bb-icon.png">
407 |     <title>[{date}] {project}</title>
408 |   </head>
409 |   <body>
410 |     <div class="container-fluid">
411 |       <div class="row">
412 |         <div class="col-sm-12">
413 |           <h1>[<a href="{project_source}">{project}</a>] Report for {date}</h1>
414 |         </div>
415 |       </div>
416 | 
417 |       <div class="row">
418 |         <div class="col-sm-3 input-group">
419 |           <span><input id="datePicker" width="150px"/></span>
420 |           <span><button id="viewReportButton" type="button" class="btn btn-sm btn-link">View Past Report</button><i>(Date & time are in UTC.)</i></span>
421 |         </div>
422 |         <script>
423 |           // latestReportDate is always yesterday.
424 |           var latestReportDate = new Date();
425 |           latestReportDate.setDate(latestReportDate.getDate() - 1)
426 | 
427 |           var $datePicker = $('#datePicker').datepicker({{
428 |               uiLibrary: 'bootstrap4',
429 |               size: 'small',
430 |               format: 'yyyy/mm/dd',
431 |               value: '{date}',
432 |               disableDates: function (date) {{
433 |                 return date < latestReportDate;
434 |               }}
435 |           }});
436 |           
437 |           $('#viewReportButton').on('click', function () {{
438 |             var dateSubdir = $datePicker.value();
439 |             var url = `https://perf.bazel.build/{project}/${{dateSubdir}}/report.html`;
440 |             window.open(url, '_blank');
441 |           }});
442 |         </script>
443 |       </div>
444 |       <br>
445 | 
446 |       <div class="row">
447 |         <div class="col-sm-12">
448 |           <b>Command: </b><span style="font-family: monospace">{command}</span>
449 |         </div>
450 |       </div>
451 |       {graphs}
452 |       <h2>Raw Files:</h2>
453 |       {files}
454 |     </div>
455 |   </body>
456 | </html>
457 | """.format(
458 |     project=project,
459 |     project_source=project_source,
460 |     date=date.strftime("%Y/%m/%d"),
461 |     command=command,
462 |     graphs=graph_components,
463 |     files=raw_files_components)
464 | 
465 | 
466 | def _query_bq(bq_project, bq_table, project_source, date_cutoff, platform):
467 |   bq_client = bigquery.Client(project=bq_project)
468 |   # Limit to the last 10 days.
469 |   query = """
470 | SELECT
471 |   MIN(wall) as min_wall,
472 |   APPROX_QUANTILES(wall, 101)[OFFSET(50)] AS median_wall,
473 |   MAX(wall) as max_wall,
474 |   MIN(memory) as min_memory,
475 |   APPROX_QUANTILES(memory, 101)[OFFSET(50)] AS median_memory,
476 |   MAX(memory) as max_memory,
477 |   bazel_commit,
478 |   DATE(MIN(started_at)) as report_date
479 | FROM (
480 |   SELECT
481 |     wall, memory, bazel_commit, started_at
482 |   FROM `{bq_project}.{bq_table}`
483 |   WHERE
484 |     bazel_commit IN (
485 |       SELECT
486 |         bazel_commit
487 |       FROM (
488 |         SELECT
489 |           bazel_commit, started_at,
490 |           RANK() OVER (
491 |             PARTITION BY project_commit
492 |             ORDER BY started_at DESC
493 |           ) AS `Rank`
494 |         FROM `{bq_project}.{bq_table}`
495 |         WHERE
496 |           DATE(started_at) <= "{date_cutoff}"
497 |           AND project_source = "{project_source}"
498 |           AND exit_status = 0       
499 |       )
500 |       WHERE
501 |         Rank=1
502 |       ORDER BY started_at DESC
503 |       LIMIT 10
504 |     )
505 |     AND project_source = "{project_source}"
506 |     AND exit_status = 0
507 |     AND platform = "{platform}"
508 | )
509 | GROUP BY bazel_commit
510 | ORDER BY report_date ASC;
511 | """.format(
512 |     bq_project=bq_project,
513 |     bq_table=bq_table,
514 |     project_source=project_source,
515 |     date_cutoff=date_cutoff,
516 |     platform=platform)
517 | 
518 |   return bq_client.query(query)
519 | 
520 | 
521 | def _prepare_time_series_data(raw_data):
522 |   """Massage the data to fit a format suitable for graph generation."""
523 |   wall_data = [[
524 |       "Date", "Wall Time", {
525 |           "role": "interval"
526 |       }, {
527 |           "role": "interval"
528 |       }
529 |   ]]
530 |   memory_data = [["Date", "Memory", {"role": "interval"}, {"role": "interval"}]]
531 | 
532 |   for row in raw_data:
533 |     # Commits on day X are benchmarked on day X + 1.
534 |     date_str = "{} ({})".format(
535 |         (row.report_date - datetime.timedelta(days=1)).strftime("%Y-%m-%d"),
536 |         row.bazel_commit[:7])
537 |     wall_data.append([date_str, row.median_wall, row.min_wall, row.max_wall])
538 |     memory_data.append(
539 |         [date_str, row.median_memory, row.min_memory, row.max_memory])
540 | 
541 |   return wall_data, memory_data
542 | 
543 | 
544 | def _generate_report_for_date(project, date, storage_bucket, report_name,
545 |                               upload_report, bq_project, bq_table):
546 |   """Generates a html report for the specified date & project.
547 | 
548 |   Args:
549 |     project: the project to generate report for. Check out bazel_bench.py.
550 |     date: the date to generate report for.
551 |     storage_bucket: the Storage bucket to fetch data from/upload the report to.
552 |     report_name: the name of the report on GS.
553 |     upload_report: whether to upload the report to GCS.
554 |     bq_project: the BigQuery project.
555 |     bq_table: the BigQuery table.
556 |   """
557 |   dated_subdir = _get_dated_subdir_for_project(project, date)
558 |   bq_date_cutoff = (date + datetime.timedelta(days=1)).strftime("%Y-%m-%d")
559 |   root_storage_url = _get_storage_url(storage_bucket, dated_subdir)
560 |   metadata_file_url = "{}/METADATA".format(root_storage_url)
561 |   metadata = _load_json_from_remote_file(metadata_file_url)
562 | 
563 |   graph_components = []
564 |   raw_files_components = []
565 |   graph_components.append(_uncollapse_button("commits", "Show commits"))
566 |   graph_components.append(
567 |       _row_component(
568 |           _col_component(
569 |               "col-sm-10",
570 |               _commits_component(metadata["all_commits"],
571 |                                  metadata["benchmarked_commits"]))))
572 | 
573 |   for platform_measurement in sorted(
574 |       metadata["platforms"], key=lambda k: k["platform"]):
575 |     # Get the data
576 |     performance_data = _load_csv_from_remote_file("{}/{}".format(
577 |         root_storage_url, platform_measurement["perf_data"]))
578 |     aggr_json_profile = _load_csv_from_remote_file("{}/{}".format(
579 |         root_storage_url, platform_measurement["aggr_json_profiles"]))
580 |     summary_text = _load_txt_from_remote_file("{}/{}".format(
581 |         root_storage_url,
582 |         platform_measurement["perf_data"].replace(".csv", ".txt")))
583 | 
584 |     wall_data, memory_data = _prepare_data_for_graph(performance_data,
585 |                                                      aggr_json_profile)
586 |     platform = platform_measurement["platform"]
587 | 
588 |     historical_wall_data, historical_mem_data = _prepare_time_series_data(
589 |         _query_bq(bq_project, bq_table, metadata["project_source"],
590 |                   bq_date_cutoff, platform))
591 | 
592 |     # Generate a graph for that platform.
593 |     row_content = []
594 |     row_content.append(
595 |         _col_component(
596 |             "col-sm-6",
597 |             _single_graph(
598 |                 metric="wall",
599 |                 metric_label="Wall Time (s)",
600 |                 data=wall_data,
601 |                 platform=platform,
602 |                 median_series=len(EVENTS_ORDER))))
603 | 
604 |     row_content.append(
605 |         _col_component(
606 |             "col-sm-6",
607 |             _historical_graph(
608 |                 metric="wall",
609 |                 metric_label="Wall Time (s)",
610 |                 data=historical_wall_data,
611 |                 platform=platform,
612 |                 color="#dd4477")))
613 | 
614 |     row_content.append(
615 |         _col_component(
616 |             "col-sm-6",
617 |             _single_graph(
618 |                 metric="memory",
619 |                 metric_label="Memory (MB)",
620 |                 data=memory_data,
621 |                 platform=platform,
622 |             )))
623 | 
624 |     row_content.append(
625 |         _col_component(
626 |             "col-sm-6",
627 |             _historical_graph(
628 |                 metric="memory",
629 |                 metric_label="Memory (MB)",
630 |                 data=historical_mem_data,
631 |                 platform=platform,
632 |                 color="#3366cc")))
633 | 
634 |     row_content.append(
635 |         _col_component(
636 |             "col-sm-12",
637 |             _uncollapse_button("summary-{}".format(platform),
638 |                                "Show Summary Table")))
639 |     row_content.append(
640 |         _col_component("col-sm-12",
641 |                        _summary_table(content=summary_text, platform=platform)))
642 | 
643 |     graph_components.append(
644 |         _row_component(
645 |             _col_component(
646 |                 "col-sm-5",
647 |                 '<h2 class="underlined">{}</h2></hr>'.format(platform))))
648 |     raw_files_components.append(
649 |         _uncollapse_button("raw_files_%s" % platform,
650 |                            "Show raw files for %s" % platform))
651 |     raw_files_components.append(
652 |         _row_component(
653 |             _col_component(
654 |                 "col-sm-10",
655 |                 _get_file_list_component(storage_bucket, dated_subdir,
656 |                                          platform))))
657 |     graph_components.append(_row_component("\n".join(row_content)))
658 | 
659 |   content = _full_report(
660 |       project,
661 |       metadata["project_source"],
662 |       date,
663 |       command=metadata["command"],
664 |       graph_components="\n".join(graph_components),
665 |       raw_files_components="\n".join(raw_files_components))
666 | 
667 |   if not os.path.exists(REPORTS_DIRECTORY):
668 |     os.makedirs(REPORTS_DIRECTORY)
669 | 
670 |   report_tmp_file = "{}/report_{}_{}.html".format(REPORTS_DIRECTORY, project,
671 |                                                   date.strftime("%Y%m%d"))
672 |   with open(report_tmp_file, "w") as fo:
673 |     fo.write(content)
674 | 
675 |   if upload_report:
676 |     _upload_to_storage(report_tmp_file, storage_bucket,
677 |                        dated_subdir + "/{}.html".format(report_name))
678 |   else:
679 |     print(content)
680 | 
681 | 
682 | def main(args=None):
683 |   if args is None:
684 |     args = sys.argv[1:]
685 | 
686 |   parser = argparse.ArgumentParser(description="Bazel Bench Daily Report")
687 |   parser.add_argument("--date", type=str, help="Date in YYYY-mm-dd format.")
688 |   parser.add_argument(
689 |       "--project",
690 |       action="append",
691 |       help=(
692 |           "Projects to generate report for. Use the storage_subdir defined "
693 |           "in the main bazel-bench script in bazelbuild/continuous-integration."
694 |       ),
695 |   )
696 |   parser.add_argument(
697 |       "--storage_bucket",
698 |       help="The GCP Storage bucket to fetch benchmark data from/upload the reports to."
699 |   )
700 |   parser.add_argument(
701 |       "--upload_report",
702 |       type=bool,
703 |       default=False,
704 |       help="Whether to upload the report.")
705 |   parser.add_argument(
706 |       "--bigquery_table",
707 |       help="The BigQuery table to fetch data from. In the format: project:table_identifier."
708 |   )
709 |   parser.add_argument(
710 |       "--report_name",
711 |       type=str,
712 |       help="The name of the generated report.",
713 |       default="report")
714 |   parsed_args = parser.parse_args(args)
715 | 
716 |   date = (
717 |       datetime.datetime.strptime(parsed_args.date, "%Y-%m-%d").date()
718 |       if parsed_args.date else datetime.date.today())
719 | 
720 |   bq_project, bq_table = parsed_args.bigquery_table.split(":")
721 |   for project in parsed_args.project:
722 |     _generate_report_for_date(project, date, parsed_args.storage_bucket,
723 |                               parsed_args.report_name,
724 |                               parsed_args.upload_report, bq_project, bq_table)
725 | 
726 | 
727 | if __name__ == "__main__":
728 |   sys.exit(main())
729 | 


--------------------------------------------------------------------------------
/testutils/BUILD:
--------------------------------------------------------------------------------
 1 | load("@rules_python//python:defs.bzl", "py_library")
 2 | 
 3 | package(default_visibility = ["//visibility:public"])
 4 | 
 5 | filegroup(
 6 |     name = "testutils-srcs",
 7 |     srcs = glob(["*.py"]),
 8 | )
 9 | 
10 | py_library(
11 |     name = "testutils",
12 |     srcs = [":testutils-srcs"],
13 | )
14 | 


--------------------------------------------------------------------------------
/testutils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 The Bazel Authors. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http:#www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/testutils/fakes.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 The Bazel Authors. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http:#www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Fakes for some functions/classes."""
15 | import sys
16 | 
17 | 
18 | def fake_log(text):
19 |   """Fakes the log function. Prints to stderr."""
20 |   sys.stderr.write(text)
21 | 
22 | 
23 | def fake_exec_command(args, shell=False, fail_if_nonzero=True, cwd=None):
24 |   """Fakes the _exec_command function."""
25 |   fake_log(' '.join(args))
26 | 
27 | 
28 | class FakeBazel(object):
29 |   """Fake class for utils.Bazel"""
30 | 
31 |   def __init__(self, bazel_binary_path, bazelrc):
32 |     # Do nothing
33 |     return
34 | 
35 |   def command(self, command_name, args=None, collect_memory=False):
36 |     """Fake method to verify that the command is executed."""
37 |     args = args or []
38 |     fake_log('Executing Bazel command: bazel %s %s' %
39 |              (command_name, ' '.join(args)))
40 | 


--------------------------------------------------------------------------------
/third_party/BUILD:
--------------------------------------------------------------------------------
1 | exports_files(glob(["**"]))
2 | 


--------------------------------------------------------------------------------
/third_party/requirements.in:
--------------------------------------------------------------------------------
 1 | absl-py==2.0.0
 2 | cachetools==5.3.1
 3 | certifi==2023.7.22
 4 | chardet==5.2.0
 5 | funcsigs==1.0.2
 6 | futures==3.1.1
 7 | gitdb2==4.0.2
 8 | GitPython==3.1.41
 9 | google-api-core==2.12.0
10 | google-auth==2.23.3
11 | google-cloud-bigquery==3.12.0
12 | google-cloud-storage==2.12.0
13 | google-cloud-core==2.3.3
14 | google-resumable-media==2.6.0
15 | googleapis-common-protos==1.61.0
16 | idna==3.4
17 | mock==5.1.0
18 | numpy==1.26.1
19 | pbr==5.1.3
20 | protobuf==4.24.4
21 | psutil==5.9.6
22 | pyasn1==0.5.0
23 | pyasn1-modules==0.3.0
24 | pytz==2018.9
25 | requests==2.31.0
26 | rsa==4.9
27 | scipy==1.11.3
28 | six==1.16.0
29 | urllib3==2.2.2
30 | PyYAML==6.0.1
31 | cython==3.0.3
32 | 


--------------------------------------------------------------------------------
/third_party/requirements.txt:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is autogenerated by pip-compile with Python 3.10
  3 | # by the following command:
  4 | #
  5 | #    pip-compile --output-file=requirements.txt requirements.in
  6 | #
  7 | absl-py==2.0.0
  8 |     # via -r third_party/requirements.in
  9 | cachetools==5.3.1
 10 |     # via
 11 |     #   -r third_party/requirements.in
 12 |     #   google-auth
 13 | certifi==2023.7.22
 14 |     # via
 15 |     #   -r third_party/requirements.in
 16 |     #   requests
 17 | chardet==5.2.0
 18 |     # via -r third_party/requirements.in
 19 | charset-normalizer==3.3.2
 20 |     # via requests
 21 | cython==3.0.3
 22 |     # via -r third_party/requirements.in
 23 | funcsigs==1.0.2
 24 |     # via -r third_party/requirements.in
 25 | futures==3.1.1
 26 |     # via -r third_party/requirements.in
 27 | gitdb==4.0.11
 28 |     # via
 29 |     #   gitdb2
 30 |     #   gitpython
 31 | gitdb2==4.0.2
 32 |     # via -r third_party/requirements.in
 33 | gitpython==3.1.41
 34 |     # via -r third_party/requirements.in
 35 | google-api-core[grpc]==2.12.0
 36 |     # via
 37 |     #   -r third_party/requirements.in
 38 |     #   google-cloud-bigquery
 39 |     #   google-cloud-core
 40 |     #   google-cloud-storage
 41 | google-auth==2.23.3
 42 |     # via
 43 |     #   -r third_party/requirements.in
 44 |     #   google-api-core
 45 |     #   google-cloud-core
 46 |     #   google-cloud-storage
 47 | google-cloud-bigquery==3.12.0
 48 |     # via -r third_party/requirements.in
 49 | google-cloud-core==2.3.3
 50 |     # via
 51 |     #   -r third_party/requirements.in
 52 |     #   google-cloud-bigquery
 53 |     #   google-cloud-storage
 54 | google-cloud-storage==2.12.0
 55 |     # via -r third_party/requirements.in
 56 | google-crc32c==1.5.0
 57 |     # via
 58 |     #   google-cloud-storage
 59 |     #   google-resumable-media
 60 | google-resumable-media==2.6.0
 61 |     # via
 62 |     #   -r third_party/requirements.in
 63 |     #   google-cloud-bigquery
 64 |     #   google-cloud-storage
 65 | googleapis-common-protos==1.61.0
 66 |     # via
 67 |     #   -r third_party/requirements.in
 68 |     #   google-api-core
 69 |     #   grpcio-status
 70 | grpcio==1.62.1
 71 |     # via
 72 |     #   google-api-core
 73 |     #   google-cloud-bigquery
 74 |     #   grpcio-status
 75 | grpcio-status==1.62.1
 76 |     # via google-api-core
 77 | idna==3.4
 78 |     # via
 79 |     #   -r third_party/requirements.in
 80 |     #   requests
 81 | mock==5.1.0
 82 |     # via -r third_party/requirements.in
 83 | numpy==1.26.1
 84 |     # via
 85 |     #   -r third_party/requirements.in
 86 |     #   scipy
 87 | packaging==24.0
 88 |     # via google-cloud-bigquery
 89 | pbr==5.1.3
 90 |     # via -r third_party/requirements.in
 91 | proto-plus==1.23.0
 92 |     # via google-cloud-bigquery
 93 | protobuf==4.24.4
 94 |     # via
 95 |     #   -r third_party/requirements.in
 96 |     #   google-api-core
 97 |     #   google-cloud-bigquery
 98 |     #   googleapis-common-protos
 99 |     #   grpcio-status
100 |     #   proto-plus
101 | psutil==5.9.6
102 |     # via -r third_party/requirements.in
103 | pyasn1==0.5.0
104 |     # via
105 |     #   -r third_party/requirements.in
106 |     #   pyasn1-modules
107 |     #   rsa
108 | pyasn1-modules==0.3.0
109 |     # via
110 |     #   -r third_party/requirements.in
111 |     #   google-auth
112 | python-dateutil==2.9.0.post0
113 |     # via google-cloud-bigquery
114 | pytz==2018.9
115 |     # via -r third_party/requirements.in
116 | pyyaml==6.0.1
117 |     # via -r third_party/requirements.in
118 | requests==2.31.0
119 |     # via
120 |     #   -r third_party/requirements.in
121 |     #   google-api-core
122 |     #   google-cloud-bigquery
123 |     #   google-cloud-storage
124 | rsa==4.9
125 |     # via
126 |     #   -r third_party/requirements.in
127 |     #   google-auth
128 | scipy==1.11.3
129 |     # via -r third_party/requirements.in
130 | six==1.16.0
131 |     # via
132 |     #   -r third_party/requirements.in
133 |     #   python-dateutil
134 | smmap==5.0.1
135 |     # via gitdb
136 | urllib3==2.2.2
137 |     # via
138 |     #   -r third_party/requirements.in
139 |     #   requests
140 | 


--------------------------------------------------------------------------------
/utils/BUILD:
--------------------------------------------------------------------------------
  1 | load("@rules_python//python:defs.bzl", "py_binary", "py_library", "py_test")
  2 | load("@third_party//:requirements.bzl", "requirement")
  3 | 
  4 | package(default_visibility = ["//visibility:public"])
  5 | 
  6 | filegroup(
  7 |     name = "utils-srcs",
  8 |     srcs = glob(
  9 |         ["*.py"],
 10 |         exclude = [
 11 |             "*_test.py",
 12 |             "json_profile_merger.py",
 13 |             "bigquery_upload.py",
 14 |             "storage_upload.py",
 15 |         ],
 16 |     ),
 17 | )
 18 | 
 19 | py_library(
 20 |     name = "utils",
 21 |     srcs = [":utils-srcs"],
 22 |     deps = [
 23 |         requirement("absl-py"),
 24 |         requirement("certifi"),
 25 |         requirement("chardet"),
 26 |         requirement("funcsigs"),
 27 |         requirement("idna"),
 28 |         requirement("numpy"),
 29 |         requirement("pbr"),
 30 |         requirement("psutil"),
 31 |         requirement("pyasn1"),
 32 |         requirement("pyasn1-modules"),
 33 |         requirement("rsa"),
 34 |         requirement("scipy"),
 35 |         requirement("six"),
 36 |         requirement("urllib3"),
 37 |         requirement("PyYAML"),
 38 |     ],
 39 | )
 40 | 
 41 | py_library(
 42 |     name = "google-common",
 43 |     deps = [
 44 |         requirement("cachetools"),
 45 |         requirement("google-api-core"),
 46 |         requirement("google-auth"),
 47 |         requirement("google-cloud-core"),
 48 |         requirement("google-resumable-media"),
 49 |         requirement("googleapis-common-protos"),
 50 |         requirement("protobuf"),
 51 |         requirement("pytz"),
 52 |         requirement("requests"),
 53 |     ],
 54 | )
 55 | 
 56 | py_binary(
 57 |     name = "bigquery_upload",
 58 |     srcs = ["bigquery_upload.py"],
 59 |     deps = [
 60 |         # This is a workaround for https://github.com/bazelbuild/rules_python/issues/14,
 61 |         # google-cloud-bigquery must be listed first.
 62 |         requirement("google-cloud-bigquery"),
 63 |         requirement("absl-py"),
 64 |         requirement("futures"),
 65 |         ":google-common",
 66 |     ],
 67 | )
 68 | 
 69 | py_binary(
 70 |     name = "storage_upload",
 71 |     srcs = ["storage_upload.py"],
 72 |     deps = [
 73 |         # This is a workaround for https://github.com/bazelbuild/rules_python/issues/14,
 74 |         # google-cloud-storage must be listed first.
 75 |         requirement("google-cloud-storage"),
 76 |         requirement("absl-py"),
 77 |         ":google-common",
 78 |     ],
 79 | )
 80 | 
 81 | py_binary(
 82 |     name = "json_profiles_merger",
 83 |     srcs = ["json_profiles_merger.py"],
 84 |     deps = [
 85 |         ":utils",
 86 |         requirement("absl-py"),
 87 |     ],
 88 | )
 89 | 
 90 | py_test(
 91 |     name = "bazel_test",
 92 |     size = "small",
 93 |     srcs = ["bazel_test.py"],
 94 |     deps = [
 95 |         ":utils",
 96 |         requirement("mock"),
 97 |     ],
 98 | )
 99 | 
100 | py_test(
101 |     name = "values_test",
102 |     size = "small",
103 |     srcs = ["values_test.py"],
104 |     deps = [
105 |         ":utils",
106 |         requirement("mock"),
107 |     ],
108 | )
109 | 
110 | py_test(
111 |     name = "json_profiles_merger_lib_test",
112 |     size = "small",
113 |     srcs = ["json_profiles_merger_lib_test.py"],
114 |     deps = [
115 |         ":utils",
116 |         requirement("mock"),
117 |     ],
118 | )
119 | 
120 | py_test(
121 |     name = "benchmark_config_test",
122 |     size = "small",
123 |     srcs = ["benchmark_config_test.py"],
124 |     deps = [
125 |         ":utils",
126 |     ],
127 | )
128 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 The Bazel Authors. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http:#www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/utils/bazel.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 The Bazel Authors. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #    http:#www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Handles Bazel invocations and measures their time/memory consumption."""
 15 | import subprocess
 16 | import tempfile
 17 | import os
 18 | import time
 19 | import psutil
 20 | import datetime
 21 | import utils.logger as logger
 22 | 
 23 | 
 24 | class Bazel(object):
 25 |   """Class to handle Bazel invocations.
 26 | 
 27 |   Allows to measure resource consumption of each command.
 28 | 
 29 |   Attributes:
 30 |     bazel_binary_path: A string specifying the path to the bazel binary to be
 31 |       invoked.
 32 |     bazelrc: A string specifying the argument to the bazelrc flag. Uses
 33 |       /dev/null if not set explicitly.
 34 |   """
 35 | 
 36 |   def __init__(self, bazel_binary_path, startup_options):
 37 |     self._bazel_binary_path = str(bazel_binary_path)
 38 |     self._startup_options = startup_options
 39 |     self._pid = None
 40 | 
 41 |   def command(self, command, args=None):
 42 |     """Invokes a command with a bazel binary.
 43 | 
 44 |     Args:
 45 |       command: A string specifying the bazel command to invoke.
 46 |       args: An optional list of strings representing additional arguments to the
 47 |         bazel command.
 48 | 
 49 |     Returns:
 50 |       A dict containing collected metrics (wall, cpu, system times and
 51 |       optionally memory), the exit_status of the Bazel invocation, and the
 52 |       start datetime (in UTC).
 53 |       Returns None instead if the command equals 'shutdown'.
 54 |     """
 55 |     args = args or []
 56 |     logger.log('Executing Bazel command: bazel %s %s %s' %
 57 |                (' '.join(self._startup_options), command, ' '.join(args)))
 58 | 
 59 |     result = dict()
 60 |     result['started_at'] = datetime.datetime.utcnow()
 61 | 
 62 |     before_times = self._get_times()
 63 |     dev_null = open(os.devnull, 'w')
 64 |     exit_status = 0
 65 | 
 66 |     with tempfile.NamedTemporaryFile() as tmp_stdout:
 67 |       try:
 68 |         subprocess.check_call(
 69 |             [self._bazel_binary_path] + self._startup_options + [command] + args,
 70 |             stdout=dev_null,
 71 |             stderr=tmp_stdout.file)
 72 |       except subprocess.CalledProcessError as e:
 73 |         exit_status = e.returncode
 74 |         logger.log_error('Bazel command failed with exit code %s' % e.returncode)
 75 |         tmp_stdout.seek(0)
 76 |         logger.log_error(tmp_stdout.read().decode())
 77 | 
 78 | 
 79 |     if command == 'shutdown':
 80 |       return None
 81 |     after_times = self._get_times()
 82 | 
 83 |     for kind in ['wall', 'cpu', 'system']:
 84 |       result[kind] = after_times[kind] - before_times[kind]
 85 |     result['exit_status'] = exit_status
 86 | 
 87 |     # We do a number of runs here to reduce the noise in the data.
 88 |     result['memory'] = min([self._get_heap_size() for _ in range(5)])
 89 | 
 90 |     return result
 91 | 
 92 |   def _get_pid(self):
 93 |     """Returns the pid of the server.
 94 | 
 95 |     Has the side effect of starting the server if none is running. Caches the
 96 |     result.
 97 |     """
 98 |     if not self._pid:
 99 |       self._pid = (int)(
100 |           subprocess.check_output([self._bazel_binary_path] +
101 |                                   self._startup_options +
102 |                                   ['info', 'server_pid']))
103 |     return self._pid
104 | 
105 |   def _get_times(self):
106 |     """Retrieves and returns the used times."""
107 |     # TODO(twerth): Getting the pid have the side effect of starting up the
108 |     # Bazel server. There are benchmarks where we don't want this, so we
109 |     # probably should make it configurable.
110 |     process_data = psutil.Process(pid=self._get_pid())
111 |     cpu_times = process_data.cpu_times()
112 |     return {
113 |         'wall': time.time(),
114 |         'cpu': cpu_times.user,
115 |         'system': cpu_times.system,
116 |     }
117 | 
118 |   def _get_heap_size(self):
119 |     """Retrieves and returns the used heap size."""
120 |     return (int)(
121 |         subprocess.check_output([self._bazel_binary_path] +
122 |                                 self._startup_options +
123 |                                 ['info', 'used-heap-size-after-gc'])[:-3])
124 | 


--------------------------------------------------------------------------------
/utils/bazel_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 The Bazel Authors. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http:#www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Tests for utils.bazel."""
15 | import collections
16 | import mock
17 | import unittest
18 | import bazel
19 | 
20 | 
21 | class BazelTest(unittest.TestCase):
22 | 
23 |   @mock.patch.object(bazel.subprocess, 'check_output', return_value='123\n')
24 |   def test_get_pid(self, check_output_mock):
25 |     b = bazel.Bazel('foo', [])
26 |     self.assertEqual(123, b._get_pid())
27 |     self.assertEqual(123, b._get_pid())
28 |     # Verify that even that we called _get_pid twice, the we didn't spawn a
29 |     # subprocess twice.
30 |     self.assertEqual(1, check_output_mock.call_count)
31 | 
32 |   @mock.patch.object(bazel.subprocess, 'check_output', return_value='280MB\n')
33 |   def test_get_heap_size(self, _):
34 |     b = bazel.Bazel('foo', [])
35 |     self.assertEqual(280, b._get_heap_size())
36 | 
37 |   @mock.patch.object(bazel.Bazel, '_get_pid', return_value=123)
38 |   @mock.patch.object(bazel.time, 'time', return_value=98.76)
39 |   @mock.patch.object(bazel.psutil, 'Process')
40 |   def test_get_times(self, process_mock, unused_time_mock, unused_get_pid_mock):
41 |     cpu_times = collections.namedtuple('cpu_times', 'user system')
42 |     cpu_times_mock = process_mock.return_value
43 |     cpu_times_mock.cpu_times.return_value = cpu_times(user=47.11, system=23.42)
44 | 
45 |     b = bazel.Bazel('foo', [])
46 |     self.assertEqual({
47 |         'wall': 98.76,
48 |         'cpu': 47.11,
49 |         'system': 23.42,
50 |     }, b._get_times())
51 | 
52 |   @mock.patch.object(bazel.Bazel, '_get_pid', return_value=123)
53 |   @mock.patch.object(bazel.Bazel, '_get_heap_size')
54 |   @mock.patch.object(bazel.Bazel, '_get_times')
55 |   @mock.patch.object(bazel.subprocess, 'check_call', return_value=0)
56 |   @mock.patch('datetime.datetime')
57 |   def test_command(self, datetime_mock, subprocess_mock, get_times_mock,
58 |                    get_heap_size_mock, _):
59 |     get_times_mock.side_effect = [
60 |         {
61 |             'wall': 42,
62 |             'cpu': 0.5,
63 |             'system': 12.3,
64 |         },
65 |         {
66 |             'wall': 81.5,
67 |             'cpu': 27.3,
68 |             'system': 14.3,
69 |         },
70 |     ]
71 |     get_heap_size_mock.side_effect = [700, 666, 668, 670, 667]
72 |     datetime_mock.utcnow.return_value = 'fake_date'
73 | 
74 |     b = bazel.Bazel('foo', [])
75 |     self.assertEqual(
76 |         {
77 |             'wall': 39.5,
78 |             'cpu': 26.8,
79 |             'system': 2.0,
80 |             'memory': 666,
81 |             'exit_status': 0,
82 |             'started_at': 'fake_date'
83 |         },
84 |         b.command(
85 |             command='build', args=['bar', 'zoo']))
86 |     subprocess_mock.assert_called_with(
87 |         ['foo', 'build', 'bar', 'zoo'], stdout=mock.ANY, stderr=mock.ANY)
88 | 
89 | if __name__ == '__main__':
90 |   unittest.main()
91 | 


--------------------------------------------------------------------------------
/utils/benchmark_config.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 The Bazel Authors. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #    http:#www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Manages the configuration file for benchmarking.
 15 | 
 16 | Currently supported flags/attributes:
 17 | - project_commit
 18 | - project_source
 19 | - bazel_commit
 20 | - bazel_binary
 21 | - runs
 22 | - collect_profile
 23 | - the command (which includes startup options, command, targets, command
 24 | options)
 25 | 
 26 | Note that the pluralized options (e.g. --project_commits) have to repeated
 27 | across units or as a global option in their singular form.
 28 | 
 29 | Example of a config file:
 30 | benchmark_project_commits: False
 31 | global_options:
 32 |   project_commit: 595a730
 33 |   runs: 5
 34 |   clean: true
 35 |   shutdown: true
 36 |   collect_profile: false
 37 |   project_source: /path/to/project/repo
 38 | units:
 39 |  - bazel_commit: 595a730
 40 |    command: info
 41 |  - bazel_binary: /tmp/bazel
 42 |    command: --host_jvm_debug build --nobuild //src:bazel
 43 |  - bazel_binary: /tmp/bazel
 44 |    command: info
 45 |    project_commit: 595a731
 46 | 
 47 | The "benchmarking units" represent independent sets of conditions to be
 48 | benchmarked.
 49 | 
 50 | """
 51 | 
 52 | import copy
 53 | import shlex
 54 | import sys
 55 | import yaml
 56 | import sys
 57 | 
 58 | 
 59 | class BenchmarkConfig(object):
 60 |   """Manages the configuration file for benchmarking."""
 61 | 
 62 |   # TODO(leba): have a single source of truth for this.
 63 |   # TODO(leba): Consider replacing dict with collections.namedtuple.
 64 |   _DEFAULT_VALS = {
 65 |       'runs': 5,
 66 |       'collect_profile': False,
 67 |       'bazel_source': 'https://github.com/bazelbuild/bazel.git',
 68 |       'env_configure': None,
 69 |       'clean': True,
 70 |       'shutdown': True,
 71 |   }
 72 | 
 73 |   def __init__(self, units, benchmark_project_commits=False):
 74 |     """Loads the YAML config file and get the benchmarking units.
 75 | 
 76 |     Args:
 77 |       units: the benchmarking units.
 78 |       benchmark_project_commits: whether we're benchmarking project commits
 79 |         (instead of bazel commits). This makes a difference in how we generate
 80 |         our report.
 81 |     """
 82 |     self._units = units
 83 |     self._benchmark_project_commits = benchmark_project_commits
 84 | 
 85 |   def get_units(self):
 86 |     """Returns a copy of the parsed units."""
 87 |     return copy.copy(self._units)
 88 | 
 89 |   def get_bazel_commits(self):
 90 |     """Returns the list of specified bazel_commits."""
 91 |     return [
 92 |         unit['bazel_commit'] for unit in self._units if 'bazel_commit' in unit
 93 |     ]
 94 | 
 95 |   def get_project_commits(self):
 96 |     """Returns the list of specified project_commits."""
 97 |     return [
 98 |         unit['project_commit'] for unit in self._units if 'project_commit' in unit
 99 |     ]
100 | 
101 |   def get_project_source(self):
102 |     """Returns the common project_source across the units.
103 | 
104 |     We don't allow multiple project_source for now.
105 |     """
106 |     return None if not self._units else self._units[0]['project_source']
107 | 
108 |   def get_bazel_source(self):
109 |     """Returns the common bazel_source across the units.
110 | 
111 |     We don't allow multiple bazel_source for now.
112 |     """
113 |     return None if not self._units else self._units[0]['bazel_source']
114 | 
115 |   def benchmark_project_commits(self):
116 |     """Returns whether we're benchmarking project commits (instead of bazel commits)."""
117 |     return self._benchmark_project_commits
118 | 
119 |   @classmethod
120 |   def from_file(cls, config_file_path):
121 |     """Loads the YAML config file and constructs a BenchmarkConfig.
122 | 
123 |     Args:
124 |       config_file_path: the path to the configuration file.
125 | 
126 |     Returns:
127 |       The created config object.
128 |     """
129 |     with open(config_file_path, 'r') as fi:
130 |       return cls.from_string(fi.read())
131 | 
132 |   @classmethod
133 |   def from_string(cls, string):
134 |     """Parses the content of a YAML config file and constructs a BenchmarkConfig.
135 | 
136 |     Args:
137 |       string: a string in YAML file format. Usually the content of a config
138 |         file.
139 | 
140 |     Returns:
141 |       The created config object.
142 |     """
143 |     config = yaml.safe_load(string)
144 |     if 'units' not in config:
145 |       raise ValueError('Wrong config file format. Please check the example.')
146 | 
147 |     benchmark_project_commits = ('benchmark_project_commits' in config and
148 |                                  config['benchmark_project_commits'])
149 | 
150 |     global_options = (
151 |         config['global_options'] if 'global_options' in config else {})
152 | 
153 |     parsed_units = []
154 |     for local_options in config['units']:
155 |       unit = copy.copy(global_options)
156 |       # Local configs would override global ones.
157 |       unit.update(local_options)
158 |       parsed_units.append(cls._parse_unit(unit))
159 |     return cls(parsed_units, benchmark_project_commits)
160 | 
161 |   @classmethod
162 |   def from_flags(cls, bazel_commits, bazel_binaries, project_commits,
163 |                  bazel_source, project_source, env_configure, runs,
164 |                  collect_profile, command, clean, shutdown):
165 |     """Creates the BenchmarkConfig based on specified flags.
166 | 
167 |     Args:
168 |       bazel_commits: the bazel commits.
169 |       bazel_binaries: paths to pre-built bazel binaries.
170 |       project_commits: the project commits.
171 |       bazel_source: Either a path to the local Bazel repo or a https url to a
172 |         GitHub repository
173 |       project_source: Either a path to the local git project to be built or a
174 |         https url to a GitHub repository
175 |       env_configure: The command to run on the project repository before building it.
176 |       runs: The number of benchmark runs to perform for each combination.
177 |       collect_profile: Whether to collect a JSON profile.
178 |       command: the full command to benchmark, optionally with startup options
179 |         prepended, e.g. "--noexobazel build --nobuild ...".
180 |       clean: Whether to invoke `bazel clean` between runs.
181 |       shutdown: Whether to invoke `bazel shutdown` between runs.
182 | 
183 |     Returns:
184 |       The created config object.
185 |     """
186 |     units = []
187 |     for bazel_commit in bazel_commits:
188 |       for project_commit in project_commits:
189 |         units.append(
190 |             cls._parse_unit({
191 |                 'bazel_commit': bazel_commit,
192 |                 'project_commit': project_commit,
193 |                 'bazel_source': bazel_source,
194 |                 'project_source': project_source,
195 |                 'runs': runs,
196 |                 'collect_profile': collect_profile,
197 |                 'env_configure': env_configure,
198 |                 'command': command,
199 |                 'clean': clean,
200 |                 'shutdown': shutdown,
201 |             }))
202 |     for bazel_binary in bazel_binaries:
203 |       for project_commit in project_commits:
204 |         units.append(
205 |             cls._parse_unit({
206 |                 'bazel_binary': bazel_binary,
207 |                 'project_commit': project_commit,
208 |                 'bazel_source': bazel_source,
209 |                 'project_source': project_source,
210 |                 'runs': runs,
211 |                 'collect_profile': collect_profile,
212 |                 'env_configure': env_configure,
213 |                 'command': command,
214 |                 'clean': clean,
215 |                 'shutdown': shutdown,
216 |             }))
217 |     return cls(units, benchmark_project_commits=(len(project_commits) > 1))
218 | 
219 |   @classmethod
220 |   def _parse_unit(cls, unit):
221 |     """Performs parsing of a benchmarking unit.
222 | 
223 |     Also fills up default values for attributes if they're not specified.
224 | 
225 |     Args:
226 |       unit: the benchmarking unit.
227 | 
228 |     Returns:
229 |       A dictionary that contains various attributes of the benchmarking unit.
230 |     """
231 |     parsed_unit = copy.copy(cls._DEFAULT_VALS)
232 |     parsed_unit.update(unit)
233 | 
234 |     if 'command' not in unit or not isinstance(unit['command'], str):
235 |       raise ValueError('A command has to be specified either as a global option'
236 |                        ' or in each individual benchmarking unit.')
237 |     full_command_tokens = shlex.split(unit['command'])
238 |     startup_options = []
239 |     while full_command_tokens and full_command_tokens[0].startswith('--'):
240 |       startup_options.append(full_command_tokens.pop(0))
241 |     try:
242 |       command = full_command_tokens.pop(0)
243 |     except IndexError:
244 |       raise ValueError('\'%s\' does not contain a Blaze command (e.g. build)' %
245 |                        unit['command'])
246 |     options = []
247 |     # Next, parse the options. We identify this by tokens that start with `--`.
248 |     # The exception is the token `--`, which is a valid syntax used to separate
249 |     # the flags from the targets: https://bazel.build/run/build#specifying-build-targets
250 |     # Example: bazel build --flag_a -- //foo -//exluded/...
251 |     while full_command_tokens and full_command_tokens[0].startswith('--') and full_command_tokens[0] != '--':
252 |       options.append(full_command_tokens.pop(0))
253 |     # This is a workaround for https://github.com/bazelbuild/bazel/issues/3236.
254 |     if sys.platform.startswith('linux'):
255 |       options.append('--sandbox_tmpfs_path=/tmp')
256 | 
257 |     targets = full_command_tokens
258 | 
259 |     # Attributes that need special handling.
260 |     parsed_unit['startup_options'] = startup_options
261 |     parsed_unit['command'] = command
262 |     parsed_unit['options'] = options
263 |     parsed_unit['targets'] = targets
264 | 
265 |     return parsed_unit
266 | 


--------------------------------------------------------------------------------
/utils/benchmark_config_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 The Bazel Authors. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #    http:#www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Tests for benchmark_config."""
 15 | 
 16 | import benchmark_config
 17 | import unittest
 18 | import os
 19 | import sys
 20 | import tempfile
 21 | 
 22 | 
 23 | def _pad_test_command_options(options):
 24 |   # This is a workaround for https://github.com/bazelbuild/bazel/issues/3236.
 25 |   if sys.platform.startswith('linux'):
 26 |     return options + ['--sandbox_tmpfs_path=/tmp']
 27 |   return options
 28 | 
 29 | class BenchmarkConfigTest(unittest.TestCase):
 30 | 
 31 | 
 32 |   def test_parsing_from_file(self):
 33 |     file_content = """
 34 | units:
 35 |  - bazel_commit: hash1
 36 |    project_commit: hash1
 37 |    command: info
 38 | """
 39 |     _, config_file_path = tempfile.mkstemp()
 40 |     with open(config_file_path, 'w') as tf:
 41 |       tf.write(file_content)
 42 |     result = benchmark_config.BenchmarkConfig.from_file(config_file_path)
 43 | 
 44 |     self.assertEqual(result._units, [{
 45 |         'bazel_commit': 'hash1',
 46 |         'project_commit': 'hash1',
 47 |         'bazel_source': 'https://github.com/bazelbuild/bazel.git',
 48 |         'runs': 5,
 49 |         'collect_profile': False,
 50 |         'command': 'info',
 51 |         'startup_options': [],
 52 |         'options': _pad_test_command_options([]),
 53 |         'targets': [],
 54 |         'env_configure': None,
 55 |         'clean': True,
 56 |         'shutdown': True,
 57 |     }])
 58 |     self.assertEqual(result._benchmark_project_commits, False)
 59 |     os.remove(config_file_path)
 60 | 
 61 | 
 62 |   def test_parsing_from_string(self):
 63 |     file_content = """
 64 | benchmark_project_commits: False
 65 | global_options:
 66 |   project_commit: 'hash3'
 67 |   runs: 5
 68 | units:
 69 |  - bazel_commit: hash1
 70 |    command: info
 71 |  - bazel_path: /tmp/bazel
 72 |    command: build --nobuild //abc
 73 |    project_commit: 'hash2'
 74 |    env_configure: 'some-command'
 75 |  - bazel_path: /tmp/bazel
 76 |    command: build --flag_a -- //foo -//excluded/...
 77 | """
 78 |     result = benchmark_config.BenchmarkConfig.from_string(file_content)
 79 | 
 80 |     self.assertEqual(result._units, [{
 81 |         'bazel_commit': 'hash1',
 82 |         'project_commit': 'hash3',
 83 |         'bazel_source': 'https://github.com/bazelbuild/bazel.git',
 84 |         'env_configure': None,
 85 |         'runs': 5,
 86 |         'collect_profile': False,
 87 |         'command': 'info',
 88 |         'startup_options': [],
 89 |         'options': _pad_test_command_options([]),
 90 |         'targets': [],
 91 |         'clean': True,
 92 |         'shutdown': True
 93 |     }, {
 94 |         'bazel_path': '/tmp/bazel',
 95 |         'project_commit': 'hash2',
 96 |         'bazel_source': 'https://github.com/bazelbuild/bazel.git',
 97 |         'env_configure': 'some-command',
 98 |         'runs': 5,
 99 |         'collect_profile': False,
100 |         'command': 'build',
101 |         'startup_options': [],
102 |         'options': _pad_test_command_options(['--nobuild']),
103 |         'targets': ['//abc'],
104 |         'clean': True,
105 |         'shutdown': True
106 |     }, {
107 |         'bazel_path': '/tmp/bazel',
108 |         'project_commit': 'hash3',
109 |         'bazel_source': 'https://github.com/bazelbuild/bazel.git',
110 |         'env_configure': None,
111 |         'runs': 5,
112 |         'collect_profile': False,
113 |         'command': 'build',
114 |         'startup_options': [],
115 |         'options': _pad_test_command_options(['--flag_a']),
116 |         'targets': ['--', '//foo', '-//excluded/...'],
117 |         'clean': True,
118 |         'shutdown': True
119 |     }])
120 |     self.assertEqual(result._benchmark_project_commits, False)
121 | 
122 | 
123 |   def test_parsing_from_flags(self):
124 |     result = benchmark_config.BenchmarkConfig.from_flags(
125 |         bazel_commits=['hash1'],
126 |         bazel_binaries=['path/to/bazel'],
127 |         project_commits=['hash3'],
128 |         bazel_source='foo',
129 |         project_source='foo',
130 |         runs=5,
131 |         env_configure='some-command',
132 |         collect_profile=False,
133 |         command='build --nobuild //abc',
134 |         clean=False,
135 |         shutdown=False,
136 |     )
137 |     self.assertEqual(result._units, [{
138 |         'bazel_commit': 'hash1',
139 |         'project_commit': 'hash3',
140 |         'bazel_source': 'foo',
141 |         'project_source': 'foo',
142 |         'runs': 5,
143 |         'env_configure': 'some-command',
144 |         'collect_profile': False,
145 |         'command': 'build',
146 |         'startup_options': [],
147 |         'options': _pad_test_command_options(['--nobuild']),
148 |         'targets': ['//abc'],
149 |         'clean': False,
150 |         'shutdown': False,
151 |     }, {
152 |         'bazel_binary': 'path/to/bazel',
153 |         'project_commit': 'hash3',
154 |         'bazel_source': 'foo',
155 |         'project_source': 'foo',
156 |         'runs': 5,
157 |         'env_configure': 'some-command',
158 |         'collect_profile': False,
159 |         'command': 'build',
160 |         'startup_options': [],
161 |         'options': _pad_test_command_options(['--nobuild']),
162 |         'targets': ['//abc'],
163 |         'clean': False,
164 |         'shutdown': False,
165 |     }])
166 |     self.assertEqual(result._benchmark_project_commits, False)
167 | 
168 | 
169 |   def test_get_units(self):
170 |     config = benchmark_config.BenchmarkConfig([{
171 |         'bazel_commit': 'hash1',
172 |         'project_commit': 'hash2',
173 |         'runs': 5,
174 |         'bazelrc': None,
175 |         'collect_profile': False,
176 |         'warmup_runs': 1,
177 |         'shutdown': True,
178 |         'command': 'info',
179 |         'startup_options': [],
180 |         'options': _pad_test_command_options([]),
181 |         'targets': []
182 |     }, {
183 |         'bazel_commit': '/tmp/bazel',
184 |         'project_commit': 'hash2',
185 |         'runs': 5,
186 |         'bazelrc': None,
187 |         'collect_profile': False,
188 |         'warmup_runs': 1,
189 |         'shutdown': True,
190 |         'command': 'build',
191 |         'startup_options': [],
192 |         'options': _pad_test_command_options(['--nobuild']),
193 |         'targets': ['//abc']
194 |     }])
195 |     self.assertEqual(config.get_units() is config._units, False)
196 |     self.assertEqual(config.get_units() == config._units, True)
197 | 
198 | 
199 | if __name__ == '__main__':
200 |   unittest.main()
201 | 


--------------------------------------------------------------------------------
/utils/bigquery_upload.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 The Bazel Authors. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http:#www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Handles the uploading of result CSV to BigQuery."""
15 | import re
16 | import sys
17 | import utils.logger as logger
18 | 
19 | from absl import app
20 | from absl import flags
21 | from google.cloud import bigquery
22 | 
23 | 
24 | def upload_to_bigquery(csv_file_path, project_id, dataset_id, table_id,
25 |                        location):
26 |   """Uploads the csv file to BigQuery.
27 | 
28 |   Takes the configuration from GOOGLE_APPLICATION_CREDENTIALS.
29 | 
30 |   Args:
31 |     csv_file_path: the path to the csv to be uploaded.
32 |     project_id: the BigQuery project id.
33 |     dataset_id: the BigQuery dataset id.
34 |     table_id: the BigQuery table id.
35 |     location: the BigQuery table's location.
36 |   """
37 | 
38 |   logger.log('Uploading the data to bigquery.')
39 |   client = bigquery.Client(project=project_id)
40 | 
41 |   dataset_ref = client.dataset(dataset_id)
42 |   table_ref = dataset_ref.table(table_id)
43 | 
44 |   job_config = bigquery.LoadJobConfig()
45 |   job_config.source_format = bigquery.SourceFormat.CSV
46 |   job_config.skip_leading_rows = 1
47 |   job_config.autodetect = False
48 | 
49 |   # load table to get schema
50 |   table = client.get_table(table_ref)
51 |   job_config.schema = table.schema
52 | 
53 |   with open(str(csv_file_path), 'rb') as source_file:
54 |     job = client.load_table_from_file(
55 |         source_file, table_ref, location=location, job_config=job_config)
56 | 
57 |   try:
58 |     job.result()  # Waits for table load to complete.
59 |   except Exception:
60 |     print('Uploading failed with: %s' % str(job.errors))
61 |     sys.exit(-1)
62 |   logger.log('Uploaded {} rows into {}:{}.'.format(job.output_rows, dataset_id,
63 |                                                    table_id))
64 | 
65 | 
66 | FLAGS = flags.FLAGS
67 | flags.DEFINE_string('upload_to_bigquery', None,
68 |                     'The details of the BigQuery table to upload ' \
69 |                     'results to: <project_id>:<dataset_id>:<table_id>:<location>')
70 | 
71 | 
72 | def main(argv):
73 |   if not re.match('^[\w-]+:[\w-]+:[\w-]+:[\w-]+$', FLAGS.upload_to_bigquery):
74 |     raise ValueError('--upload_to_bigquery should follow the pattern '
75 |                      '<project_id>:<dataset_id>:<table_id>:<location>.')
76 | 
77 |   # Discard the first argument.
78 |   csv_files_to_upload = argv[1:]
79 | 
80 |   project_id, dataset_id, table_id, location = FLAGS.upload_to_bigquery.split(
81 |       ':')
82 |   for filename in csv_files_to_upload:
83 |     upload_to_bigquery(filename, project_id, dataset_id, table_id, location)
84 | 
85 | 
86 | if __name__ == '__main__':
87 |   app.run(main)
88 | 


--------------------------------------------------------------------------------
/utils/json_profiles_merger.py:
--------------------------------------------------------------------------------
 1 | r"""A simple script to aggregate JSON profiles.
 2 | 
 3 | Collect median duration of events across these profiles.
 4 | Usage:
 5 |   bazel run json_profiles_merger -- \
 6 |   --bazel_source=/usr/bin/bazel \
 7 |   --project_source=https://github.com/bazelbuild/bazel \
 8 |   --project_commit=2 \
 9 |   --output_path=/tmp/median_dur.csv \
10 |   --upload_data_to=project-id:dataset-id:table-id:location \
11 |   -- \
12 |   *.profile
13 | """
14 | from absl import app
15 | from absl import flags
16 | from glob import glob
17 | 
18 | import json_profiles_merger_lib as lib
19 | import output_handling
20 | 
21 | FLAGS = flags.FLAGS
22 | flags.DEFINE_string('output_path', None, 'The path to the output file.')
23 | flags.mark_flag_as_required('output_path')
24 | flags.DEFINE_string(
25 |     'bazel_source', None,
26 |     ('(Optional) The bazel commit or path to the bazel binary from which these'
27 |      'JSON profiles were collected.'))
28 | flags.DEFINE_string(
29 |     'project_source', None,
30 |     ('(Optional) The project on which the runs that generated these JSON'
31 |      'profiles were performed.'))
32 | flags.DEFINE_string(
33 |     'project_commit', None,
34 |     '(Optional) The project commit on which the Bazel runs were performed.')
35 | flags.DEFINE_string(
36 |     'upload_data_to', None,
37 |     'Uploads data to bigquery, requires output_path to be set. '
38 |     'The details of the BigQuery table to upload results to specified in '
39 |     'the form: <project_id>:<dataset_id>:<table_id>:<location>.')
40 | flags.DEFINE_string(
41 |     'input_profile_dir', None, '(Optional) Folder to load input profiles from.'
42 |     'This is useful for when your list of input profiles is quite large.')
43 | flags.DEFINE_boolean(
44 |     'only_phases', False,
45 |     'Whether to only include events from phase markers in the final output.')
46 | 
47 | 
48 | def main(argv):
49 |   # Discard the first argument (the binary).
50 |   input_profiles = argv[1:]
51 | 
52 |   if FLAGS.input_profile_dir:
53 |     # Add any globbed files from the input_dir to the list.
54 |     input_profiles += glob(FLAGS.input_profile_dir + '/*.profile.gz')
55 | 
56 |   if not input_profiles:
57 |     raise ValueError('At least one profile must be provided!')
58 | 
59 |   aggregated_data = lib.aggregate_data(input_profiles, FLAGS.only_phases)
60 | 
61 |   lib.write_to_csv(FLAGS.bazel_source, FLAGS.project_source,
62 |                    FLAGS.project_commit, aggregated_data, FLAGS.output_path)
63 | 
64 |   if FLAGS.upload_data_to:
65 |     project_id, dataset_id, table_id, location = FLAGS.upload_data_to.split(':')
66 |     output_handling.upload_csv(
67 |         csv_file_path=FLAGS.output_path,
68 |         project_id=project_id,
69 |         dataset_id=dataset_id,
70 |         table_id=table_id,
71 |         location=location)
72 | 
73 | 
74 | if __name__ == '__main__':
75 |   app.run(main)
76 | 


--------------------------------------------------------------------------------
/utils/json_profiles_merger_lib.py:
--------------------------------------------------------------------------------
  1 | """A library that holds the bulk of the logic for merging JSON profiles.
  2 | 
  3 | Collect duration statistics of events across these profiles.
  4 | 
  5 | Duration is measured in milliseconds.
  6 | """
  7 | from __future__ import division
  8 | 
  9 | import csv
 10 | import gzip
 11 | import json
 12 | import os
 13 | 
 14 | 
 15 | def _median(lst):
 16 |   """Returns the median of the input list.
 17 | 
 18 |   Args:
 19 |     lst: the input list.
 20 | 
 21 |   Returns:
 22 |     The median of the list, or None if the list is empty/None.
 23 |   """
 24 |   sorted_lst = sorted(lst)
 25 |   length = len(sorted_lst)
 26 |   if length % 2:
 27 |     return sorted_lst[length // 2]
 28 |   return (sorted_lst[length // 2 - 1] + sorted_lst[length // 2]) / 2
 29 | 
 30 | 
 31 | def write_to_csv(bazel_source, project_source, project_commit, event_list,
 32 |                  output_csv_path):
 33 |   """Writes the event_list to output_csv_path.
 34 | 
 35 |   event_list format:
 36 |   [{'cat': <string>, 'name': <string>, 'min': <int>,
 37 |     'median': <int>, 'max': <int>, 'count': <int>}, ...]
 38 |   Args:
 39 |     bazel_source: the bazel commit or path to the bazel binary from which these
 40 |       JSON profiles were collected.
 41 |     project_source: the project on which the runs that generated these JSON
 42 |       projects were performed.
 43 |     project_commit: the project commit on which the Bazel runs were performed.
 44 |     event_list: the list of events, aggregated from the JSON profiles.
 45 |     output_csv_path: a path to the output CSV file.
 46 |   """
 47 |   output_dir = os.path.dirname(output_csv_path)
 48 |   if output_dir and not os.path.exists(output_dir):
 49 |     os.makedirs(output_dir)
 50 | 
 51 |   with open(output_csv_path, 'w') as csv_file:
 52 |     csv_writer = csv.writer(csv_file)
 53 |     csv_writer.writerow([
 54 |         'bazel_source', 'project_source', 'project_commit', 'cat', 'name',
 55 |         'min', 'median', 'max', 'count'
 56 |     ])
 57 | 
 58 |     for event in event_list:
 59 |       csv_writer.writerow([
 60 |           bazel_source, project_source, project_commit, event['cat'],
 61 |           event['name'], event['min'], event['median'], event['max'],
 62 |           event['count']
 63 |       ])
 64 | 
 65 | 
 66 | def _accumulate_event_duration(event_list, accum_dict, only_phases=False):
 67 |   """Fill up accum_dict by accummulating durations of each event.
 68 | 
 69 |   Also create the entries for each phase by subtracting the build phase markers'
 70 |   ts attribute.
 71 |   Args:
 72 |     event_list: the list of event objects.
 73 |     accum_dict: the dict to be filled up with a mapping of the following format:
 74 |       { <name>: { name: ..., cat: ..., dur_list: [...]}, ...}
 75 |     only_phases: only collect entries from phase markers.
 76 |   """
 77 |   # A list of tuples of the form (marker, occurrence time in micro s)
 78 |   build_markers_ts_pairs = []
 79 |   max_ts = 0
 80 | 
 81 |   # Only collect events with a duration.
 82 |   # Special case: markers that indicates beginning/end of execution.
 83 |   for event in event_list:
 84 |     if 'ts' in event:
 85 |       max_ts = max(max_ts, event['ts'])
 86 | 
 87 |     if 'cat' in event and event['cat'] == 'build phase marker':
 88 |       build_markers_ts_pairs.append((event['name'], event['ts']))
 89 | 
 90 |     if 'dur' not in event:
 91 |       continue
 92 | 
 93 |     if not only_phases:
 94 |       if event['name'] not in accum_dict:
 95 |         accum_dict[event['name']] = {
 96 |             'name': event['name'],
 97 |             'cat': event['cat'],
 98 |             'dur_list': []
 99 |         }
100 |       accum_dict[event['name']]['dur_list'].append(event['dur'])
101 | 
102 |   # Append an artificial marker that signifies the end of the run.
103 |   # This is to determine the duration from the last marker to the actual end of
104 |   # the run and will not end up in the final data.
105 |   build_markers_ts_pairs.append((None, max_ts))
106 | 
107 |   # Fill in the markers.
108 |   for i, marker_ts_pair in enumerate(build_markers_ts_pairs[:-1]):
109 |     marker, ts = marker_ts_pair
110 |     _, next_ts = build_markers_ts_pairs[i + 1]
111 | 
112 |     if marker not in accum_dict:
113 |       accum_dict[marker] = {
114 |           'name': marker,
115 |           'cat': 'build phase marker',
116 |           'dur_list': []
117 |       }
118 |     current_phase_duration_millis = (
119 |         next_ts - ts) / 1000  # Convert from microseconds to milliseconds
120 |     accum_dict[marker]['dur_list'].append(current_phase_duration_millis)
121 | 
122 | 
123 | def _aggregate_from_accum_dict(accum_dict):
124 |   """Aggregate the result from the accummulated dict.
125 | 
126 |   Calculate statistics of the durations and counts for each event.
127 |   All measurements of time should be in milliseconds.
128 |   Args:
129 |     accum_dict: the dict to be filled up with a mapping of the following format:
130 |       { <name>: { name: ..., cat: ..., dur_list: [...]}, ...}
131 | 
132 |   Returns:
133 |     A list of the following format:
134 |       [{ name: ..., cat: ..., median: ..., min: ..., median: ..., max: ...,
135 |       count: ... }]
136 |   """
137 |   result = []
138 |   for obj in accum_dict.values():
139 |     result.append({
140 |         'name': obj['name'],
141 |         'cat': obj['cat'],
142 |         'median': _median(obj['dur_list']),
143 |         'min': min(obj['dur_list']),
144 |         'max': max(obj['dur_list']),
145 |         'count': len(obj['dur_list'])
146 |     })
147 |   return result
148 | 
149 | 
150 | def aggregate_data(input_profiles, only_phases=False):
151 |   """Produces the aggregated data from the JSON profile inputs.
152 | 
153 |   Collects information on cat, name and median duration of the events in the
154 |   JSON profiles.
155 |   Args:
156 |     input_profiles: a list of paths to .profile or .profile.gz files.
157 |     only_phases: only output entries from phase markers.
158 | 
159 |   Returns:
160 |     The list of objects which contain the info about cat, name and statistics on
161 |     the
162 |     duration of events.
163 |   """
164 |   # A map from event name to an object which accumulates the durations.
165 |   accum_dict = dict()
166 |   for file_path in input_profiles:
167 |     if file_path.endswith('.gz'):
168 |       with gzip.GzipFile(file_path, 'r') as gz_input_file:
169 |         event_list = json.loads(gz_input_file.read().decode('utf-8'))
170 |     else:
171 |       with open(file_path, 'r') as input_file:
172 |         event_list = json.load(input_file)
173 | 
174 |     # The events in the JSON profiles can be presented directly as a list,
175 |     # or as the value of key 'traceEvents'.
176 |     if 'traceEvents' in event_list:
177 |       event_list = event_list['traceEvents']
178 |     _accumulate_event_duration(event_list, accum_dict, only_phases)
179 | 
180 |   return _aggregate_from_accum_dict(accum_dict)
181 | 


--------------------------------------------------------------------------------
/utils/json_profiles_merger_lib_test.py:
--------------------------------------------------------------------------------
  1 | """Tests for json_profiles_merger_lib."""
  2 | 
  3 | import json_profiles_merger_lib as lib
  4 | import unittest
  5 | 
  6 | 
  7 | class JsonProfilesMergerLibTest(unittest.TestCase):
  8 | 
  9 |   def test_accumulate_event_duration(self):
 10 |     event_list_1 = [
 11 |         {
 12 |             'name': 'to_skip_no_dur',
 13 |         },
 14 |         {
 15 |             'cat': 'fake_cat',
 16 |             'name': 'fake_name',
 17 |             'dur': 3,
 18 |             'non_dur': 'something'
 19 |         },
 20 |     ]
 21 | 
 22 |     event_list_2 = [
 23 |         {
 24 |             'name': 'to_skip_no_dur',
 25 |         },
 26 |         {
 27 |             'cat': 'fake_cat',
 28 |             'name': 'fake_name',
 29 |             'dur': 1,
 30 |             'non_dur': 'something'
 31 |         },
 32 |     ]
 33 | 
 34 |     accum_dict = {}
 35 |     lib._accumulate_event_duration(event_list_1, accum_dict)
 36 |     self.assertEqual(
 37 |         {
 38 |             'fake_name': {
 39 |                 'cat': 'fake_cat',
 40 |                 'name': 'fake_name',
 41 |                 'dur_list': [3]
 42 |             },
 43 |         }, accum_dict)
 44 |     lib._accumulate_event_duration(event_list_2, accum_dict)
 45 |     self.assertEqual(
 46 |         {
 47 |             'fake_name': {
 48 |                 'cat': 'fake_cat',
 49 |                 'name': 'fake_name',
 50 |                 'dur_list': [3, 1]
 51 |             }
 52 |         }, accum_dict)
 53 | 
 54 |   def test_accumulate_build_phase_marker(self):
 55 |     event_list_3 = [
 56 |         {
 57 |             'name': 'to_skip_no_dur',
 58 |         },
 59 |         {
 60 |             'cat': 'build phase marker',
 61 |             'name': 'phase1',
 62 |             'ts': 1000
 63 |         },
 64 |         {
 65 |             'cat': 'build phase marker',
 66 |             'name': 'phase2',
 67 |             'ts': 10000
 68 |         },
 69 |         {
 70 |             'cat': 'fake_cat',
 71 |             'name': 'fake_name',
 72 |             'dur': 1,
 73 |             'ts': 10001,
 74 |             'non_dur': 'something'
 75 |         },
 76 |     ]
 77 | 
 78 |     accum_dict = {}
 79 |     lib._accumulate_event_duration(event_list_3, accum_dict)
 80 |     self.assertEqual(
 81 |         {
 82 |             'phase1': {
 83 |                 'cat': 'build phase marker',
 84 |                 'name': 'phase1',
 85 |                 'dur_list': [9.0]
 86 |             },
 87 |             'phase2': {
 88 |                 'cat': 'build phase marker',
 89 |                 'name': 'phase2',
 90 |                 'dur_list': [0.001]
 91 |             },
 92 |             'fake_name': {
 93 |                 'cat': 'fake_cat',
 94 |                 'name': 'fake_name',
 95 |                 'dur_list': [1]
 96 |             },
 97 |         }, accum_dict)
 98 | 
 99 |   def test_accumulate_only_phase_marker(self):
100 |     event_list = [
101 |         {
102 |             'name': 'to_skip_no_dur',
103 |         },
104 |         {
105 |             'cat': 'build phase marker',
106 |             'name': 'phase1',
107 |             'ts': 1000
108 |         },
109 |         {
110 |             'cat': 'build phase marker',
111 |             'name': 'phase2',
112 |             'ts': 10000
113 |         },
114 |         {
115 |             'cat': 'fake_cat',
116 |             'name': 'fake_name',
117 |             'dur': 1,
118 |             'ts': 10001,
119 |             'non_dur': 'something'
120 |         },
121 |     ]
122 | 
123 |     accum_dict = {}
124 |     lib._accumulate_event_duration(event_list, accum_dict, only_phases=True)
125 |     self.assertEqual(
126 |         {
127 |             'phase1': {
128 |                 'cat': 'build phase marker',
129 |                 'name': 'phase1',
130 |                 'dur_list': [9.0]
131 |             },
132 |             'phase2': {
133 |                 'cat': 'build phase marker',
134 |                 'name': 'phase2',
135 |                 'dur_list': [0.001]
136 |             },
137 |         }, accum_dict)
138 | 
139 |   def test_aggregate_from_accum_dict(self):
140 |     accum_dict = {
141 |         'fake_name': {
142 |             'cat': 'fake_cat',
143 |             'name': 'fake_name',
144 |             'dur_list': [3, 1]
145 |         },
146 |     }
147 | 
148 |     self.assertEqual([{
149 |         'cat': 'fake_cat',
150 |         'name': 'fake_name',
151 |         'median': 2.0,
152 |         'min': 1,
153 |         'max': 3,
154 |         'count': 2
155 |     }], lib._aggregate_from_accum_dict(accum_dict))
156 | 
157 | 
158 | if __name__ == '__main__':
159 |   unittest.main()
160 | 


--------------------------------------------------------------------------------
/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 The Bazel Authors. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http:#www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utility module to handle logging for the benchmarking script."""
15 | import sys
16 | from absl import logging
17 | 
18 | _COLOR_TMPL = {
19 |     'info': '\033[32m%s\033[0m',  # Green
20 |     'warn': '\033[33m%s\033[0m',  # Yellow
21 |     'error': '\033[31m%s\033[0m',  # Red
22 | }
23 | 
24 | 
25 | def _maybe_colorize_text(text, color):
26 |   """Colorize the text if running on a terminal."""
27 |   if not sys.stdout.isatty():
28 |     return text
29 |   return _COLOR_TMPL[color] % text
30 | 
31 | 
32 | def log(text):
33 |   """Logs a message using the logger singleton."""
34 |   logging.info(_maybe_colorize_text(text, 'info'))
35 | 
36 | 
37 | def log_warn(text):
38 |   """Logs a warning message using the logger singleton."""
39 |   logging.warn(_maybe_colorize_text(text, 'warn'))
40 | 
41 | 
42 | def log_error(text):
43 |   """Logs an error message using the logger singleton."""
44 |   logging.error(_maybe_colorize_text(text, 'error'))
45 | 


--------------------------------------------------------------------------------
/utils/output_handling.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 The Bazel Authors. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http:#www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 | import csv
16 | import socket
17 | import getpass
18 | 
19 | import utils.logger as logger
20 | 
21 | 
22 | def export_csv(data_directory, filename, data):
23 |   """Exports the content of data to a csv file in data_directory
24 | 
25 |   Args:
26 |     data_directory: the directory to store the csv file.
27 |     filename: the name of the .csv file.
28 |     data: the collected data to be exported.
29 |     
30 |   Returns:
31 |     The path to the newly created csv file.
32 |   """
33 |   if not os.path.exists(data_directory):
34 |     os.makedirs(data_directory)
35 |   csv_file_path = os.path.join(data_directory, filename)
36 |   logger.log('Writing raw data into csv file: %s' % str(csv_file_path))
37 | 
38 |   with open(csv_file_path, 'w') as csv_file:
39 |     hostname = socket.gethostname()
40 |     username = getpass.getuser()
41 |     csv_writer = csv.writer(csv_file)
42 |     csv_writer.writerow([
43 |         'project_source', 'project_commit', 'bazel_commit', 'run', 'cpu',
44 |         'wall', 'system', 'memory', 'command', 'expressions', 'hostname',
45 |         'username', 'options', 'exit_status', 'started_at', 'platform',
46 |         'project_label'
47 |     ])
48 | 
49 |     for (bazel_commit, project_commit), data_item in data.items():
50 |       command, expressions, options = data_item['args']
51 |       non_measurables = data_item['non_measurables']
52 |       for idx, run in enumerate(data_item['results'], start=1):
53 |         csv_writer.writerow([
54 |             non_measurables['project_source'], project_commit, bazel_commit,
55 |             idx, run['cpu'], run['wall'], run['system'], run['memory'], command,
56 |             expressions, hostname, username, options, run['exit_status'],
57 |             run['started_at'], non_measurables['platform'],
58 |             non_measurables['project_label']
59 |         ])
60 |   return csv_file_path
61 | 
62 | 
63 | def export_file(data_directory, filename, content):
64 |   """Exports the content of data to a file in data_directory
65 | 
66 |   Args:
67 |     data_directory: the directory to store the file.
68 |     filename: the name of the file.
69 |     content: the content to be exported.
70 | 
71 |   Returns:
72 |     The path to the newly created file.
73 |   """
74 |   if not os.path.exists(data_directory):
75 |     os.makedirs(data_directory)
76 |   out_file_path = os.path.join(data_directory, filename)
77 | 
78 |   with open(out_file_path, 'w') as out_file:
79 |     out_file.write(content)
80 | 
81 |   return out_file_path
82 | 


--------------------------------------------------------------------------------
/utils/storage_upload.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 The Bazel Authors. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http:#www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Handles the uploading of results to Storage."""
15 | import os
16 | import re
17 | import utils.logger as logger
18 | 
19 | from absl import app
20 | from absl import flags
21 | from google.cloud import storage
22 | 
23 | 
24 | def upload_to_storage(file_path, project_id, bucket_id, destination):
25 |   """Uploads the file to Storage.
26 | 
27 |   Takes the configuration from GOOGLE_APPLICATION_CREDENTIALS.
28 | 
29 |   Args:
30 |     file_path: the path to the file to be uploaded.
31 |     project_id: the GCP project id.
32 |     bucket_id: the Storage bucket.
33 |     destination: the path to the destination on the bucket.
34 |   """
35 |   # This is a workaround for https://github.com/bazelbuild/rules_python/issues/14
36 | 
37 |   logger.log('Uploading data to Storage.')
38 |   client = storage.Client(project=project_id)
39 |   bucket = client.get_bucket(bucket_id)
40 |   blob = bucket.blob(destination)
41 | 
42 |   blob.upload_from_filename(file_path)
43 | 
44 |   logger.log('Uploaded {} to {}/{}.'.format(file_path, bucket_id, destination))
45 | 
46 | 
47 | FLAGS = flags.FLAGS
48 | flags.DEFINE_string('upload_to_storage', None,
49 |                     'The details of the GCP Storage bucket to upload ' \
50 |                     'results to: <project_id>:<bucket_id>:<subdirectory>.')
51 | 
52 | 
53 | def main(argv):
54 |   if not re.match('^[\w-]+:[\w-]+:[\w\/-]+$', FLAGS.upload_to_storage):
55 |     raise ValueError('--upload_to_storage should follow the pattern '
56 |                      '<project_id>:<bucket_id>:<subdirectory>.')
57 | 
58 |   # Discard the first argument.
59 |   files_to_upload = argv[1:]
60 | 
61 |   project_id, bucket_id, subdirectory = FLAGS.upload_to_storage.split(':')
62 |   for filepath in files_to_upload:
63 |     filename = os.path.basename(filepath)
64 |     destination = '%s/%s' % (subdirectory, filename)
65 |     upload_to_storage(filepath, project_id, bucket_id, destination)
66 | 
67 | 
68 | if __name__ == '__main__':
69 |   app.run(main)
70 | 


--------------------------------------------------------------------------------
/utils/values.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 The Bazel Authors. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http:#www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """"Stores a set of numeric values and offers statistical operations on them."""
15 | import numpy
16 | import scipy.stats
17 | import copy
18 | 
19 | 
20 | class Values(object):
21 |   """Utility class to store numeric values.
22 | 
23 |   This class is used in order to collect and compare metrics during
24 |   benchmarking.
25 | 
26 |   Attributes:
27 |     items: An optional list of numeric values to initialize the data structure
28 |       with.
29 |   """
30 | 
31 |   def __init__(self, items=None):
32 |     self._items = items or []
33 | 
34 |   def add(self, value):
35 |     """Adds value to the list of stored values."""
36 |     self._items.append(value)
37 | 
38 |   def values(self):
39 |     """Returns the list of stored values."""
40 |     return self._items
41 | 
42 |   def mean(self):
43 |     """Returns the mean of the stored values."""
44 |     return numpy.mean(self._items)
45 | 
46 |   def median(self):
47 |     """Returns the median of the stored values."""
48 |     return numpy.median(self._items)
49 | 
50 |   def stddev(self):
51 |     """Returns the standard deviation of the stored values."""
52 |     return float(numpy.std(self._items))
53 | 
54 |   def pval(self, base_values):
55 |     """Computes Kolmogorov-Smirnov statistic.
56 | 
57 |     Args:
58 |       base_values: A list of numeric values to compare self.values() with.
59 | 
60 |     Returns:
61 |       The probability for the null hypothesis that the samples drawn from the
62 |       same distribution.
63 |       Returns -1 if it cannot be computed because one of the samples contains
64 |       less than 2 values.
65 |     """
66 |     vals = self._items
67 |     if len(vals) > 1 and len(base_values) > 1:
68 |       _, p = scipy.stats.ks_2samp(vals, base_values)
69 |       return 1 - p
70 |     else:
71 |       return -1
72 | 
73 |   def items(self):
74 |     """Returns a copy of the items."""
75 |     return copy.copy(self._items)
76 | 
77 |   def exclude_from_indexes(self, indexes):
78 |     """Returns a copy of Values which excludes the items from certain indexes."""
79 |     filtered = []
80 |     for i, value in enumerate(self._items):
81 |       if i not in indexes:
82 |         filtered.append(value)
83 | 
84 |     return Values(filtered)
85 | 


--------------------------------------------------------------------------------
/utils/values_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 The Bazel Authors. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http:#www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Unit tests for benchmark values utility class."""
15 | import unittest
16 | 
17 | from values import Values
18 | 
19 | 
20 | class ValuesTest(unittest.TestCase):
21 | 
22 |   def test_initialize(self):
23 |     values = Values()
24 |     self.assertEqual([], values.values())
25 | 
26 |     values = Values([2.3, 4.2])
27 |     self.assertEqual([2.3, 4.2], values.values())
28 | 
29 |   def test_add(self):
30 |     values = Values()
31 |     self.assertEqual([], values.values())
32 | 
33 |     values.add(4.2)
34 |     values.add(2.3)
35 |     self.assertEqual([4.2, 2.3], values.values())
36 | 
37 |   def test_median(self):
38 |     values = Values([1, 10, 1])
39 |     self.assertEqual(1, values.median())
40 | 
41 |     # Returns the average of the two middle values when len(values()) is even.
42 |     values.add(20)
43 |     self.assertEqual(5.5, values.median())
44 | 
45 |     values.add(20)
46 |     self.assertEqual(10, values.median())
47 | 
48 |   def test_mean(self):
49 |     values = Values([1, 10, 1])
50 |     self.assertEqual(4, values.mean())
51 | 
52 |   def test_stddev(self):
53 |     values = Values([1, 10, 1])
54 |     self.assertAlmostEqual(4.24, values.stddev(), places=2)
55 | 
56 |   def test_pval_identical(self):
57 |     identical_list = [1, 10, 1]
58 |     values = Values(identical_list)
59 |     self.assertEqual(0, values.pval(identical_list))
60 | 
61 |   def test_pval_significant(self):
62 |     values = Values([1, 1, 1, 1, 1])
63 |     self.assertAlmostEqual(0.992, values.pval([10, 10, 10, 10, 10]), places=3)
64 | 
65 | 
66 | if __name__ == '__main__':
67 |   unittest.main()
68 | 


--------------------------------------------------------------------------------