├── .gitignore ├── LICENSE.txt ├── NOTICE.txt ├── README.md ├── analyze-token-benchmark-results.ipynb ├── llm_correctness.py ├── pre-commit.sh ├── pyproject.toml ├── requirements-dev.txt ├── src └── llmperf │ ├── __init__.py │ ├── common.py │ ├── common_metrics.py │ ├── models.py │ ├── ray_clients │ ├── __init__.py │ ├── litellm_client.py │ ├── openai_chat_completions_client.py │ ├── sagemaker_client.py │ └── vertexai_client.py │ ├── ray_llm_client.py │ ├── requests_launcher.py │ ├── sonnet.txt │ └── utils.py └── token_benchmark_ray.py /.gitignore: -------------------------------------------------------------------------------- 1 | # The build output should clearly not be checked in 2 | *test-output.xml 3 | /bazel-* 4 | /python/ray/core 5 | /python/ray/pickle5_files/ 6 | /python/ray/thirdparty_files/ 7 | /python/ray/pyarrow_files/ 8 | /python/ray/jars/ 9 | /python/ray/cpp/ 10 | /python/build 11 | /python/dist 12 | /python/python-driver-* 13 | /python/ray/serve/generated 14 | /thirdparty/pkg/ 15 | /build/java 16 | .jar 17 | /dashboard/client/build 18 | 19 | # Files generated by flatc should be ignored 20 | /src/ray/gcs/format/*_generated.h 21 | /src/ray/object_manager/format/*_generated.h 22 | /src/ray/raylet/format/*_generated.h 23 | /java/runtime/src/main/java/io/ray/runtime/generated/* 24 | /java/serve/src/main/java/io/ray/serve/generated/* 25 | 26 | # Files genrated by c++ worker should be ignored. 27 | /cpp/example/thirdparty/ 28 | /cpp/example/bazel-* 29 | /python/ray/cpp 30 | 31 | # Redis temporary files 32 | *dump.rdb 33 | 34 | # Python byte code files 35 | *.pyc 36 | python/.eggs 37 | *.egg-info 38 | 39 | # Backup files 40 | *.bak 41 | 42 | # Emacs temporary files 43 | *~ 44 | *# 45 | 46 | # Compiled Object files 47 | *.slo 48 | *.lo 49 | *.o 50 | *.xo 51 | *.obj 52 | 53 | # Precompiled Headers 54 | *.gch 55 | *.pch 56 | 57 | # Compiled Dynamic libraries 58 | *.so 59 | *.dylib 60 | *.dll 61 | python/ray/_raylet.pyd 62 | 63 | # Incremental linking files 64 | *.ilk 65 | 66 | # Library export files 67 | *.exp 68 | 69 | # Debug symbols 70 | *.pdb 71 | 72 | # Fortran module files 73 | *.mod 74 | !deploy/ray-operator/go.mod 75 | 76 | # Compiled Static libraries 77 | *.lai 78 | *.la 79 | *.a 80 | *.lib 81 | 82 | # Executables 83 | *.exe 84 | *.out 85 | *.app 86 | 87 | # Visual Studio files 88 | /packages 89 | *.suo 90 | *.user 91 | *.VC.db 92 | *.VC.opendb 93 | 94 | # Protobuf-generated files 95 | *_pb2.py 96 | *.pb.h 97 | *.pb.cc 98 | 99 | # Ray cluster configuration 100 | scripts/nodes.txt 101 | 102 | # OS X folder attributes 103 | .DS_Store 104 | 105 | # Debug files 106 | *.dSYM/ 107 | *.su 108 | 109 | # Python setup files 110 | *.egg-info 111 | 112 | # Compressed files 113 | *.gz 114 | 115 | # Datasets from examples 116 | **/MNIST_data/ 117 | **/cifar-10-batches-bin/ 118 | 119 | # Generated documentation files 120 | /doc/_build 121 | /doc/source/_static/thumbs 122 | /doc/source/tune/generated_guides/ 123 | /doc/source/**/doc/ 124 | 125 | # User-specific stuff: 126 | .idea/**/workspace.xml 127 | .idea/**/tasks.xml 128 | .idea/dictionaries 129 | .llvm-local.bazelrc 130 | 131 | # Sensitive or high-churn files: 132 | .idea/**/dataSources/ 133 | .idea/**/dataSources.ids 134 | .idea/**/dataSources.xml 135 | .idea/**/dataSources.local.xml 136 | .idea/**/sqlDataSources.xml 137 | .idea/**/dynamic.xml 138 | .idea/**/uiDesigner.xml 139 | 140 | # Gradle: 141 | .idea/**/gradle.xml 142 | .idea/**/libraries 143 | .idea 144 | 145 | # Website 146 | /site/Gemfile.lock 147 | /site/.sass-cache 148 | /site/_site 149 | 150 | # Pytest Cache 151 | **/.pytest_cache 152 | **/.cache 153 | .benchmarks 154 | python-driver-* 155 | 156 | # Vscode 157 | .vscode/ 158 | 159 | *.iml 160 | 161 | # Java 162 | java/**/target 163 | java/**/lib 164 | java/**/.settings 165 | java/**/.classpath 166 | java/**/.project 167 | java/runtime/native_dependencies/ 168 | java/testng_custom.xml 169 | 170 | dependency-reduced-pom.xml 171 | 172 | # Cpp 173 | cpp/example/thirdparty/ 174 | 175 | .clwb 176 | 177 | # pom.xml files generated from pom_template.xml 178 | java/**/pom.xml 179 | 180 | # python virtual env 181 | venv 182 | 183 | # pyenv version file 184 | .python-version 185 | 186 | # Vim 187 | .*.swp 188 | *.swp 189 | .*.swo 190 | *.swo 191 | tags 192 | tags.lock 193 | tags.temp 194 | *.vim 195 | 196 | # Emacs 197 | .#* 198 | 199 | # tools 200 | tools/prometheus* 201 | 202 | # ray project files 203 | project-id 204 | .mypy_cache/ 205 | 206 | # release test related 207 | .anyscale.yaml 208 | test_state.json 209 | 210 | # workflow storage 211 | workflow_data/ 212 | 213 | # vscode java extention generated 214 | .factorypath 215 | 216 | # Jupyter Notebooks 217 | **/.ipynb_checkpoints/ 218 | 219 | ### Added by Hedron's Bazel Compile Commands Extractor: https://github.com/hedronvision/bazel-compile-commands-extractor 220 | # The external link: Differs on Windows vs macOS/Linux, so we can't check it in. The pattern needs to not have a trailing / because it's a symlink on macOS/Linux. 221 | /external 222 | # Compiled output -> don't check in 223 | /compile_commands.json 224 | # Directory where clangd puts its indexing work 225 | /.cache/ 226 | 227 | # Auto-generated tag mapping 228 | tag-mapping.json 229 | 230 | .bazeliskrc 231 | 232 | # ignore tmp files 233 | *.tmp 234 | out 235 | temp* 236 | 237 | # build output 238 | build/ 239 | dist/ 240 | 241 | # results 242 | output/ 243 | *.json 244 | result_outputs/ 245 | 246 | __pycache__ 247 | **/__pycache__/ -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | [Project Name] 2 | Copyright 2023-onwards Anyscale, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LLMPerf 2 | 3 | A Tool for evaulation the performance of LLM APIs. 4 | 5 | # Installation 6 | ```bash 7 | git clone https://github.com/ray-project/llmperf.git 8 | cd llmperf 9 | pip install -e . 10 | ``` 11 | 12 | # Basic Usage 13 | 14 | We implement 2 tests for evaluating LLMs: a load test to check for performance and a correctness test to check for correctness. 15 | 16 | ## Load test 17 | 18 | The load test spawns a number of concurrent requests to the LLM API and measures the inter-token latency and generation throughput per request and across concurrent requests. The prompt that is sent with each request is of the format: 19 | 20 | ``` 21 | Randomly stream lines from the following text. Don't generate eos tokens: 22 | LINE 1, 23 | LINE 2, 24 | LINE 3, 25 | ... 26 | ``` 27 | 28 | Where the lines are randomly sampled from a collection of lines from Shakespeare sonnets. Tokens are counted using the `LlamaTokenizer` regardless of which LLM API is being tested. This is to ensure that the prompts are consistent across different LLM APIs. 29 | 30 | To run the most basic load test you can the token_benchmark_ray script. 31 | 32 | 33 | ### Caveats and Disclaimers 34 | 35 | - The endpoints provider backend might vary widely, so this is not a reflection on how the software runs on a particular hardware. 36 | - The results may vary with time of day. 37 | - The results may vary with the load. 38 | - The results may not correlate with users’ workloads. 39 | 40 | ### OpenAI Compatible APIs 41 | ```bash 42 | export OPENAI_API_KEY=secret_abcdefg 43 | export OPENAI_API_BASE="https://api.endpoints.anyscale.com/v1" 44 | 45 | python token_benchmark_ray.py \ 46 | --model "meta-llama/Llama-2-7b-chat-hf" \ 47 | --mean-input-tokens 550 \ 48 | --stddev-input-tokens 150 \ 49 | --mean-output-tokens 150 \ 50 | --stddev-output-tokens 10 \ 51 | --max-num-completed-requests 2 \ 52 | --timeout 600 \ 53 | --num-concurrent-requests 1 \ 54 | --results-dir "result_outputs" \ 55 | --llm-api openai \ 56 | --additional-sampling-params '{}' 57 | 58 | ``` 59 | 60 | ### Anthropic 61 | ```bash 62 | export ANTHROPIC_API_KEY=secret_abcdefg 63 | 64 | python token_benchmark_ray.py \ 65 | --model "claude-2" \ 66 | --mean-input-tokens 550 \ 67 | --stddev-input-tokens 150 \ 68 | --mean-output-tokens 150 \ 69 | --stddev-output-tokens 10 \ 70 | --max-num-completed-requests 2 \ 71 | --timeout 600 \ 72 | --num-concurrent-requests 1 \ 73 | --results-dir "result_outputs" \ 74 | --llm-api anthropic \ 75 | --additional-sampling-params '{}' 76 | 77 | ``` 78 | 79 | ### TogetherAI 80 | 81 | ```bash 82 | export TOGETHERAI_API_KEY="YOUR_TOGETHER_KEY" 83 | 84 | python token_benchmark_ray.py \ 85 | --model "together_ai/togethercomputer/CodeLlama-7b-Instruct" \ 86 | --mean-input-tokens 550 \ 87 | --stddev-input-tokens 150 \ 88 | --mean-output-tokens 150 \ 89 | --stddev-output-tokens 10 \ 90 | --max-num-completed-requests 2 \ 91 | --timeout 600 \ 92 | --num-concurrent-requests 1 \ 93 | --results-dir "result_outputs" \ 94 | --llm-api "litellm" \ 95 | --additional-sampling-params '{}' 96 | 97 | ``` 98 | 99 | ### Hugging Face 100 | 101 | ```bash 102 | export HUGGINGFACE_API_KEY="YOUR_HUGGINGFACE_API_KEY" 103 | export HUGGINGFACE_API_BASE="YOUR_HUGGINGFACE_API_ENDPOINT" 104 | 105 | python token_benchmark_ray.py \ 106 | --model "huggingface/meta-llama/Llama-2-7b-chat-hf" \ 107 | --mean-input-tokens 550 \ 108 | --stddev-input-tokens 150 \ 109 | --mean-output-tokens 150 \ 110 | --stddev-output-tokens 10 \ 111 | --max-num-completed-requests 2 \ 112 | --timeout 600 \ 113 | --num-concurrent-requests 1 \ 114 | --results-dir "result_outputs" \ 115 | --llm-api "litellm" \ 116 | --additional-sampling-params '{}' 117 | 118 | ``` 119 | 120 | ### LiteLLM 121 | 122 | LLMPerf can use LiteLLM to send prompts to LLM APIs. To see the environment variables to set for the provider and arguments that one should set for model and additional-sampling-params. 123 | 124 | see the [LiteLLM Provider Documentation](https://docs.litellm.ai/docs/providers). 125 | 126 | ```bash 127 | python token_benchmark_ray.py \ 128 | --model "meta-llama/Llama-2-7b-chat-hf" \ 129 | --mean-input-tokens 550 \ 130 | --stddev-input-tokens 150 \ 131 | --mean-output-tokens 150 \ 132 | --stddev-output-tokens 10 \ 133 | --max-num-completed-requests 2 \ 134 | --timeout 600 \ 135 | --num-concurrent-requests 1 \ 136 | --results-dir "result_outputs" \ 137 | --llm-api "litellm" \ 138 | --additional-sampling-params '{}' 139 | 140 | ``` 141 | 142 | ### Vertex AI 143 | 144 | Here, --model is used for logging, not for selecting the model. The model is specified in the Vertex AI Endpoint ID. 145 | 146 | The GCLOUD_ACCESS_TOKEN needs to be somewhat regularly set, as the token generated by `gcloud auth print-access-token` expires after 15 minutes or so. 147 | 148 | Vertex AI doesn't return the total number of tokens that are generated by their endpoint, so tokens are counted using the LLama tokenizer. 149 | 150 | ```bash 151 | 152 | gcloud auth application-default login 153 | gcloud config set project YOUR_PROJECT_ID 154 | 155 | export GCLOUD_ACCESS_TOKEN=$(gcloud auth print-access-token) 156 | export GCLOUD_PROJECT_ID=YOUR_PROJECT_ID 157 | export GCLOUD_REGION=YOUR_REGION 158 | export VERTEXAI_ENDPOINT_ID=YOUR_ENDPOINT_ID 159 | 160 | python token_benchmark_ray.py \ 161 | --model "meta-llama/Llama-2-7b-chat-hf" \ 162 | --mean-input-tokens 550 \ 163 | --stddev-input-tokens 150 \ 164 | --mean-output-tokens 150 \ 165 | --stddev-output-tokens 10 \ 166 | --max-num-completed-requests 2 \ 167 | --timeout 600 \ 168 | --num-concurrent-requests 1 \ 169 | --results-dir "result_outputs" \ 170 | --llm-api "vertexai" \ 171 | --additional-sampling-params '{}' 172 | 173 | ``` 174 | 175 | ### SageMaker 176 | 177 | SageMaker doesn't return the total number of tokens that are generated by their endpoint, so tokens are counted using the LLama tokenizer. 178 | 179 | ```bash 180 | 181 | export AWS_ACCESS_KEY_ID="YOUR_ACCESS_KEY_ID" 182 | export AWS_SECRET_ACCESS_KEY="YOUR_SECRET_ACCESS_KEY"s 183 | export AWS_SESSION_TOKEN="YOUR_SESSION_TOKEN" 184 | export AWS_REGION_NAME="YOUR_ENDPOINTS_REGION_NAME" 185 | 186 | python llm_correctness.py \ 187 | --model "llama-2-7b" \ 188 | --llm-api "sagemaker" \ 189 | --max-num-completed-requests 2 \ 190 | --timeout 600 \ 191 | --num-concurrent-requests 1 \ 192 | --results-dir "result_outputs" \ 193 | 194 | ``` 195 | 196 | see `python token_benchmark_ray.py --help` for more details on the arguments. 197 | 198 | ## Correctness Test 199 | 200 | The correctness test spawns a number of concurrent requests to the LLM API with the following format: 201 | 202 | ``` 203 | Convert the following sequence of words into a number: {random_number_in_word_format}. Output just your final answer. 204 | ``` 205 | 206 | where random_number_in_word_format could be for example "one hundred and twenty three". The test then checks that the response contains that number in digit format which in this case would be 123. 207 | 208 | The test does this for a number of randomly generated numbers and reports the number of responses that contain a mismatch. 209 | 210 | To run the most basic correctness test you can run the the llm_correctness.py script. 211 | 212 | ### OpenAI Compatible APIs 213 | 214 | ```bash 215 | export OPENAI_API_KEY=secret_abcdefg 216 | export OPENAI_API_BASE=https://console.endpoints.anyscale.com/m/v1 217 | 218 | python llm_correctness.py \ 219 | --model "meta-llama/Llama-2-7b-chat-hf" \ 220 | --max-num-completed-requests 150 \ 221 | --timeout 600 \ 222 | --num-concurrent-requests 10 \ 223 | --results-dir "result_outputs" 224 | ``` 225 | 226 | ### Anthropic 227 | 228 | ```bash 229 | export ANTHROPIC_API_KEY=secret_abcdefg 230 | 231 | python llm_correctness.py \ 232 | --model "claude-2" \ 233 | --llm-api "anthropic" \ 234 | --max-num-completed-requests 5 \ 235 | --timeout 600 \ 236 | --num-concurrent-requests 1 \ 237 | --results-dir "result_outputs" 238 | ``` 239 | 240 | ### TogetherAI 241 | 242 | ```bash 243 | export TOGETHERAI_API_KEY="YOUR_TOGETHER_KEY" 244 | 245 | python llm_correctness.py \ 246 | --model "together_ai/togethercomputer/CodeLlama-7b-Instruct" \ 247 | --llm-api "litellm" \ 248 | --max-num-completed-requests 2 \ 249 | --timeout 600 \ 250 | --num-concurrent-requests 1 \ 251 | --results-dir "result_outputs" \ 252 | 253 | ``` 254 | 255 | ### Hugging Face 256 | 257 | ```bash 258 | export HUGGINGFACE_API_KEY="YOUR_HUGGINGFACE_API_KEY" 259 | export HUGGINGFACE_API_BASE="YOUR_HUGGINGFACE_API_ENDPOINT" 260 | 261 | python llm_correctness.py \ 262 | --model "huggingface/meta-llama/Llama-2-7b-chat-hf" \ 263 | --llm-api "litellm" \ 264 | --max-num-completed-requests 2 \ 265 | --timeout 600 \ 266 | --num-concurrent-requests 1 \ 267 | --results-dir "result_outputs" \ 268 | 269 | ``` 270 | 271 | ### LiteLLM 272 | 273 | LLMPerf can use LiteLLM to send prompts to LLM APIs. To see the environment variables to set for the provider and arguments that one should set for model and additional-sampling-params. 274 | 275 | see the [LiteLLM Provider Documentation](https://docs.litellm.ai/docs/providers). 276 | 277 | ```bash 278 | python llm_correctness.py \ 279 | --model "meta-llama/Llama-2-7b-chat-hf" \ 280 | --llm-api "litellm" \ 281 | --max-num-completed-requests 2 \ 282 | --timeout 600 \ 283 | --num-concurrent-requests 1 \ 284 | --results-dir "result_outputs" \ 285 | 286 | ``` 287 | 288 | see `python llm_correctness.py --help` for more details on the arguments. 289 | 290 | 291 | ### Vertex AI 292 | 293 | Here, --model is used for logging, not for selecting the model. The model is specified in the Vertex AI Endpoint ID. 294 | 295 | The GCLOUD_ACCESS_TOKEN needs to be somewhat regularly set, as the token generated by `gcloud auth print-access-token` expires after 15 minutes or so. 296 | 297 | Vertex AI doesn't return the total number of tokens that are generated by their endpoint, so tokens are counted using the LLama tokenizer. 298 | 299 | 300 | ```bash 301 | 302 | gcloud auth application-default login 303 | gcloud config set project YOUR_PROJECT_ID 304 | 305 | export GCLOUD_ACCESS_TOKEN=$(gcloud auth print-access-token) 306 | export GCLOUD_PROJECT_ID=YOUR_PROJECT_ID 307 | export GCLOUD_REGION=YOUR_REGION 308 | export VERTEXAI_ENDPOINT_ID=YOUR_ENDPOINT_ID 309 | 310 | python llm_correctness.py \ 311 | --model "meta-llama/Llama-2-7b-chat-hf" \ 312 | --llm-api "vertexai" \ 313 | --max-num-completed-requests 2 \ 314 | --timeout 600 \ 315 | --num-concurrent-requests 1 \ 316 | --results-dir "result_outputs" \ 317 | 318 | ``` 319 | 320 | ### SageMaker 321 | 322 | SageMaker doesn't return the total number of tokens that are generated by their endpoint, so tokens are counted using the LLama tokenizer. 323 | 324 | ```bash 325 | 326 | export AWS_ACCESS_KEY_ID="YOUR_ACCESS_KEY_ID" 327 | export AWS_SECRET_ACCESS_KEY="YOUR_SECRET_ACCESS_KEY"s 328 | export AWS_SESSION_TOKEN="YOUR_SESSION_TOKEN" 329 | export AWS_REGION_NAME="YOUR_ENDPOINTS_REGION_NAME" 330 | 331 | python llm_correctness.py \ 332 | --model "llama-2-7b" \ 333 | --llm-api "sagemaker" \ 334 | --max-num-completed-requests 2 \ 335 | --timeout 600 \ 336 | --num-concurrent-requests 1 \ 337 | --results-dir "result_outputs" \ 338 | 339 | ``` 340 | 341 | ## Saving Results 342 | 343 | The results of the load test and correctness test are saved in the results directory specified by the `--results-dir` argument. The results are saved in 2 files, one with the summary metrics of the test, and one with metrics from each individual request that is returned. 344 | 345 | # Advanced Usage 346 | 347 | The correctness tests were implemented with the following workflow in mind: 348 | 349 | ```python 350 | import ray 351 | from transformers import LlamaTokenizerFast 352 | 353 | from llmperf.ray_clients.openai_chat_completions_client import ( 354 | OpenAIChatCompletionsClient, 355 | ) 356 | from llmperf.models import RequestConfig 357 | from llmperf.requests_launcher import RequestsLauncher 358 | 359 | 360 | # Copying the environment variables and passing them to ray.init() is necessary 361 | # For making any clients work. 362 | ray.init(runtime_env={"env_vars": {"OPENAI_API_BASE" : "https://api.endpoints.anyscale.com/v1", 363 | "OPENAI_API_KEY" : "YOUR_API_KEY"}}) 364 | 365 | base_prompt = "hello_world" 366 | tokenizer = LlamaTokenizerFast.from_pretrained( 367 | "hf-internal-testing/llama-tokenizer" 368 | ) 369 | base_prompt_len = len(tokenizer.encode(base_prompt)) 370 | prompt = (base_prompt, base_prompt_len) 371 | 372 | # Create a client for spawning requests 373 | clients = [OpenAIChatCompletionsClient.remote()] 374 | 375 | req_launcher = RequestsLauncher(clients) 376 | 377 | req_config = RequestConfig( 378 | model="meta-llama/Llama-2-7b-chat-hf", 379 | prompt=prompt 380 | ) 381 | 382 | req_launcher.launch_requests(req_config) 383 | result = req_launcher.get_next_ready(block=True) 384 | print(result) 385 | 386 | ``` 387 | 388 | # Implementing New LLM Clients 389 | 390 | To implement a new LLM client, you need to implement the base class `llmperf.ray_llm_client.LLMClient` and decorate it as a ray actor. 391 | 392 | ```python 393 | 394 | from llmperf.ray_llm_client import LLMClient 395 | import ray 396 | 397 | 398 | @ray.remote 399 | class CustomLLMClient(LLMClient): 400 | 401 | def llm_request(self, request_config: RequestConfig) -> Tuple[Metrics, str, RequestConfig]: 402 | """Make a single completion request to a LLM API 403 | 404 | Returns: 405 | Metrics about the performance charateristics of the request. 406 | The text generated by the request to the LLM API. 407 | The request_config used to make the request. This is mainly for logging purposes. 408 | 409 | """ 410 | ... 411 | 412 | ``` 413 | 414 | # Legacy Codebase 415 | The old LLMPerf code base can be found in the [llmperf-legacy](https://github.com/ray-project/llmval-legacy) repo. 416 | -------------------------------------------------------------------------------- /analyze-token-benchmark-results.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "56950450", 6 | "metadata": {}, 7 | "source": [ 8 | "# Token Benchmark Example Analysis\n", 9 | "The following is an example of the analysis that can be done on individual responses that are saved when running `token_benchmark_ray.py` with the flag `--results-dir` which enables the saving of all responses." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "id": "dacfe98a-e81b-4089-9506-97a652993b5b", 16 | "metadata": { 17 | "tags": [] 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "import pandas as pd" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 6, 27 | "id": "17f7abe9-ed9e-466c-b034-577489aaf98b", 28 | "metadata": { 29 | "tags": [] 30 | }, 31 | "outputs": [ 32 | { 33 | "data": { 34 | "text/html": [ 35 | "
\n", 36 | "\n", 49 | "\n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | "
error_codeerror_msginter_token_latency_sttft_send_to_end_latency_srequest_output_throughput_token_per_snumber_total_tokensnumber_output_tokensnumber_input_tokens
0NaN[0.5549881670012831, 0.0009654169989510001, 0....0.5549881.61073444.07927270671635
1NaN[0.6019128750049271, 0.007011749999946, 0.0144...0.6019131.72572944.03935773076654
\n", 91 | "
" 92 | ], 93 | "text/plain": [ 94 | " error_code error_msg inter_token_latency_s \\\n", 95 | "0 NaN [0.5549881670012831, 0.0009654169989510001, 0.... \n", 96 | "1 NaN [0.6019128750049271, 0.007011749999946, 0.0144... \n", 97 | "\n", 98 | " ttft_s end_to_end_latency_s request_output_throughput_token_per_s \\\n", 99 | "0 0.554988 1.610734 44.079272 \n", 100 | "1 0.601913 1.725729 44.039357 \n", 101 | "\n", 102 | " number_total_tokens number_output_tokens number_input_tokens \n", 103 | "0 706 71 635 \n", 104 | "1 730 76 654 " 105 | ] 106 | }, 107 | "execution_count": 6, 108 | "metadata": {}, 109 | "output_type": "execute_result" 110 | } 111 | ], 112 | "source": [ 113 | "# path to the individual responses json file\n", 114 | "df = pd.read_json('/home/ray/default/llmperf/result_outputs/550_150_individual_responses.json')\n" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 12, 120 | "id": "565a59e4", 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "valid_df = df[(df[\"error_code\"] != \"\")]" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 13, 130 | "id": "102894bc", 131 | "metadata": {}, 132 | "outputs": [ 133 | { 134 | "data": { 135 | "text/html": [ 136 | "
\n", 137 | "\n", 150 | "\n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | "
error_codeerror_msginter_token_latency_sttft_send_to_end_latency_srequest_output_throughput_token_per_snumber_total_tokensnumber_output_tokensnumber_input_tokens
0NaN[0.5549881670012831, 0.0009654169989510001, 0....0.5549881.61073444.07927270671635
1NaN[0.6019128750049271, 0.007011749999946, 0.0144...0.6019131.72572944.03935773076654
\n", 192 | "
" 193 | ], 194 | "text/plain": [ 195 | " error_code error_msg inter_token_latency_s \\\n", 196 | "0 NaN [0.5549881670012831, 0.0009654169989510001, 0.... \n", 197 | "1 NaN [0.6019128750049271, 0.007011749999946, 0.0144... \n", 198 | "\n", 199 | " ttft_s end_to_end_latency_s request_output_throughput_token_per_s \\\n", 200 | "0 0.554988 1.610734 44.079272 \n", 201 | "1 0.601913 1.725729 44.039357 \n", 202 | "\n", 203 | " number_total_tokens number_output_tokens number_input_tokens \n", 204 | "0 706 71 635 \n", 205 | "1 730 76 654 " 206 | ] 207 | }, 208 | "execution_count": 13, 209 | "metadata": {}, 210 | "output_type": "execute_result" 211 | } 212 | ], 213 | "source": [ 214 | "valid_df" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 14, 220 | "id": "c7519fc9", 221 | "metadata": {}, 222 | "outputs": [ 223 | { 224 | "name": "stdout", 225 | "output_type": "stream", 226 | "text": [ 227 | "Mean number of input tokens: 644.5. Mean number of output tokens: 73.5\n" 228 | ] 229 | }, 230 | { 231 | "data": { 232 | "text/plain": [ 233 | "" 234 | ] 235 | }, 236 | "execution_count": 14, 237 | "metadata": {}, 238 | "output_type": "execute_result" 239 | }, 240 | { 241 | "data": { 242 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkAAAAHHCAYAAABXx+fLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/SrBM8AAAACXBIWXMAAA9hAAAPYQGoP6dpAAA+u0lEQVR4nO3deVgW9f7/8dcNsqrgAgIqgop7uYSKYIkVbp1TVp4yWzBOmpWmRllRuWSLmll2mSeXcknLXKqv+tP0JLlUmpZmaZnghkuCogKiBgmf3x9d3MdbFsFYnefjuua6vD8z85nPe4aBlzNz37fNGGMEAABgIU4VPQAAAIDyRgACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACKsCGDRtks9m0bNmyih5KsaSkpOhf//qX6tatK5vNpqlTp1b0kCq1cePGyWazKTU1taKHAqAQBCBcs+bNmyebzSZ3d3cdO3Ys3/zu3bvruuuuq4CRVT1PPfWU1q5dq7i4OC1YsEC9e/cudFmbzaZhw4aV4+iK9vHHHxcrsOWFlitN3bt3L/MxX+uKs59LMm3YsEGHDh0qdH6XLl04vsinWkUPAChrWVlZmjhxoqZNm1bRQ6myvvrqK/Xt21fPPPNMRQ+lxD7++GPt3r1bI0eOLHK5u+++WyEhIfbXmZmZevzxx3XXXXfp7rvvtrf7+fmV1VAtY8GCBQ6vP/zwQ3355Zf52nNycuTs7HzF5Vq1aqULFy5IkgYMGKDbbrvNYb6vr68CAgI4vnBAAMI1r3379po9e7bi4uJUv379ih5OuTp37pyqV6/+t/s5ceKEatWq9fcHVIm1bdtWbdu2tb9OTU3V448/rrZt2+rBBx+swJFdey7fn999952+/PLLK+7nopY7dOiQJOmGG24otB+OLy7FLTBc81544QXl5ORo4sSJRS6Xdwl93rx5+ebZbDaNGzfO/jrvcnpCQoIefPBBeXt7y9fXV6NHj5YxRkeOHFHfvn3l5eUlf39/TZkypcBt5uTk6IUXXpC/v7+qV6+uO+64Q0eOHMm33NatW9W7d295e3vL09NTkZGR+vbbbx2WyRvTr7/+qvvvv1+1a9fWjTfeWGTNBw4c0D333KM6derI09NTXbp00apVq+zz824jGmM0ffp0+22Cksh73mnJkiV67bXX1LBhQ7m7u+vWW2/Vvn37HJbNuy25fft2RUREyMPDQ40bN9aMGTMclssbV94fvcu3tWHDBnt/q1atUlJSkn3swcHBJRr/5b766ivddNNNql69umrVqqW+fftqz549V1wvKSlJISEhuu6665SSkiJJSktL08iRIxUYGCg3NzeFhIRo0qRJys3Nta+X93P55ptvatasWWratKnc3NzUqVMnff/99w7bSE5OVkxMjBo2bCg3NzcFBASob9+++fbTpd58803ZbDYlJSXlmxcXFydXV1edOXNGkpSYmKh+/frJ399f7u7uatiwoe677z6lp6cXZ9cBlQpXgHDNa9y4saKjozV79mw9//zzpXoVqH///mrVqpUmTpyoVatW6dVXX1WdOnU0c+ZM3XLLLZo0aZI++ugjPfPMM+rUqZO6devmsP5rr70mm82m5557TidOnNDUqVMVFRWlnTt3ysPDQ9Jff3D79Omj0NBQjR07Vk5OTpo7d65uueUWff311+rcubNDn/fcc4+aNWum119/XcaYQseekpKiiIgInT9/XsOHD1fdunU1f/583XHHHVq2bJnuuusudevWTQsWLNBDDz2kHj16KDo6+qr31cSJE+Xk5KRnnnlG6enpeuONN/TAAw9o69atDsudOXNGt912m+69914NGDBAS5Ys0eOPPy5XV1f9+9//LtE2X3zxRaWnp+vo0aN6++23JUk1atS46hrWrVunPn36qEmTJho3bpwuXLigadOmqWvXrtqxY0eh4Wr//v265ZZbVKdOHX355Zfy8fHR+fPnFRkZqWPHjmnIkCFq1KiRNm/erLi4OB0/fjzfc0sff/yxzp49qyFDhshms+mNN97Q3XffrQMHDsjFxUWS1K9fP/3yyy968sknFRwcrBMnTujLL7/U4cOHCx3bvffeq2effVZLlizRqFGjHOYtWbJEPXv2VO3atZWdna1evXopKytLTz75pPz9/XXs2DH9v//3/5SWliZvb++r3q+l6fz58/kePvf29rbvI8DOANeouXPnGknm+++/N/v37zfVqlUzw4cPt8+PjIw0bdq0sb8+ePCgkWTmzp2bry9JZuzYsfbXY8eONZLMo48+am+7ePGiadiwobHZbGbixIn29jNnzhgPDw8zcOBAe9v69euNJNOgQQOTkZFhb1+yZImRZN555x1jjDG5ubmmWbNmplevXiY3N9e+3Pnz503jxo1Njx498o1pwIABxdo/I0eONJLM119/bW87e/asady4sQkODjY5OTkO9Q8dOrRY/V6+bF6trVq1MllZWfb2d955x0gyu3btsrdFRkYaSWbKlCn2tqysLNO+fXtTr149k52dbYz537E9ePCgw7bztrV+/Xp72z/+8Q8TFBRUrLFf6uTJk/mOe944Tp06ZW/76aefjJOTk4mOjra35R2LkydPmj179pj69eubTp06mdOnT9uXeeWVV0z16tVNQkKCw3aff/554+zsbA4fPmyM+d/PZd26dR3WX758uZFkVq5caYz56+dMkpk8eXKJaw0PDzehoaEObdu2bTOSzIcffmiMMebHH380kszSpUtL3P+VDB061BTnz1FRy+Xtp4KmS38e8hR0fGEt3AKDJTRp0kQPPfSQZs2apePHj5dav4MGDbL/29nZWR07dpQxRo888oi9vVatWmrRooUOHDiQb/3o6GjVrFnT/vpf//qXAgICtHr1aknSzp07lZiYqPvvv1+nTp1SamqqUlNTde7cOd16663atGmTw+0SSXrssceKNfbVq1erc+fODrfJatSooUcffVSHDh3Sr7/+WrydUEwxMTFydXW1v77pppskKd9+qVatmoYMGWJ/7erqqiFDhujEiRPavn17qY6pJI4fP66dO3fq4YcfVp06deztbdu2VY8ePezH7FK7d+9WZGSkgoODtW7dOtWuXds+b+nSpbrppptUu3Zt+3FNTU1VVFSUcnJytGnTJoe++vfv77D+5fvPw8NDrq6u2rBhg/2WVXH1799f27dv1/79++1tixcvlpubm/r27StJ9is8a9eu1fnz50vUf3l69NFH9eWXXzpM7dq1q+hhoRIiAMEyXnrpJV28ePGKzwKVRKNGjRxee3t7y93dXT4+PvnaC/qj1KxZM4fXNptNISEh9mc2EhMTJUkDBw6Ur6+vw/T+++8rKysr3/MXjRs3LtbYk5KS1KJFi3ztrVq1ss8vTZfvq7w/5pfvl/r16+d7cLt58+aSVOSzLGUtb38Uts/ygumlbr/9dtWsWVNr166Vl5eXw7zExEStWbMm33GNioqS9NeD55e60v5zc3PTpEmT9MUXX8jPz0/dunXTG2+8oeTk5CvWds8998jJyUmLFy+WJBljtHTpUvXp08c+7saNGys2Nlbvv/++fHx81KtXL02fPr3SPf/TrFkzRUVFOUyXBkcgDwEIltGkSRM9+OCDhV4FKuzh3pycnEL7vPQtukW1SSryeZzC5F3dmTx5cr7/1eZNlz/TkvfsUGVTmvvlao5VRejXr5/279+vjz76KN+83Nxc9ejRo9Dj2q9fP4fli7P/Ro4cqYSEBE2YMEHu7u4aPXq0WrVqpR9//LHIcdavX1833XSTlixZIumvd1sdPnxY/fv3d1huypQp+vnnn/XCCy/owoULGj58uNq0aaOjR48Wa38AlQkPQcNSXnrpJS1cuFCTJk3KNy/vf4lpaWkO7aV9JeRSeVd48hhjtG/fPvvbdZs2bSpJ8vLysl8ZKC1BQUHau3dvvvbffvvNPr8i/P777/nevp+QkCBJ9gd5S3KsSvqutcLk7Y/C9pmPj0++K1eTJ09WtWrV9MQTT6hmzZq6//777fOaNm2qzMzMUj+uTZs21dNPP62nn35aiYmJat++vaZMmaKFCxcWuV7//v31xBNPaO/evVq8eLE8PT11++2351vu+uuv1/XXX6+XXnpJmzdvVteuXTVjxgy9+uqrpVoHUNa4AgRLadq0qR588EHNnDkz360BLy8v+fj45Hv24j//+U+ZjefDDz/U2bNn7a+XLVum48ePq0+fPpKk0NBQNW3aVG+++aYyMzPzrX/y5Mmr3vZtt92mbdu2acuWLfa2c+fOadasWQoODlbr1q2vuu+/4+LFi5o5c6b9dXZ2tmbOnClfX1+FhoZK+l8wvPRY5eTkaNasWfn6q169eqncpgkICFD79u01f/58h+C1e/du/fe//8334XvSX+Fr1qxZ+te//qWBAwdqxYoV9nn33nuvtmzZorVr1+ZbLy0tTRcvXizR+M6fP68//vjDoa1p06aqWbOmsrKyrrh+v3795OzsrEWLFmnp0qX65z//6RDoMjIy8o3p+uuvl5OTk0P/hw8ftodooDLjChAs58UXX9SCBQu0d+9etWnTxmHeoEGDNHHiRA0aNEgdO3bUpk2b7FcfykKdOnV04403KiYmRikpKZo6dapCQkI0ePBgSZKTk5Pef/999enTR23atFFMTIwaNGigY8eOaf369fLy8tLKlSuvatvPP/+8Fi1apD59+mj48OGqU6eO5s+fr4MHD+rTTz+Vk1PF/P+ofv36mjRpkg4dOqTmzZtr8eLF2rlzp2bNmmV/K3ObNm3UpUsXxcXF6fTp06pTp44++eSTAkNDaGioFi9erNjYWHXq1Ek1atQo8MpGcUyePFl9+vRReHi4HnnkEfvb4L29vR0+J+pSTk5OWrhwoe68807de++9Wr16tW655RaNGjVKK1as0D//+U89/PDDCg0N1blz57Rr1y4tW7ZMhw4dyvcsWVESEhJ066236t5771Xr1q1VrVo1ff7550pJSdF99913xfXr1aunm2++WW+99ZbOnj2b7/bXV199pWHDhumee+5R8+bNdfHiRS1YsEDOzs4Ot+uio6O1cePGq7q1CZQnAhAsJyQkRA8++KDmz5+fb96YMWN08uRJLVu2TEuWLFGfPn30xRdfqF69emUylhdeeEE///yzJkyYoLNnz+rWW2/Vf/7zH3l6etqX6d69u7Zs2aJXXnlF7777rjIzM+Xv76+wsDCHd0uVlJ+fnzZv3qznnntO06ZN0x9//KG2bdtq5cqV+sc//lEa5V2V2rVra/78+XryySc1e/Zs+fn56d1337WHwjwfffSRhgwZookTJ6pWrVp65JFHdPPNN6tHjx4Oyz3xxBPauXOn5s6dq7fffltBQUFXHYCioqK0Zs0ajR07VmPGjJGLi4siIyM1adKkIh8+d3Fx0bJly9SnTx/17dtX69atU1hYmDZu3KjXX39dS5cu1YcffigvLy81b95cL7/8cok/VycwMFADBgxQfHy8FixYoGrVqqlly5ZasmRJvueJCtO/f3+tW7dONWvWzHdFq127durVq5dWrlypY8eOydPTU+3atdMXX3yhLl26lGisQGVgM8R0AJVE9+7dlZqaqt27d1f0UABc43gGCAAAWA4BCAAAWA4BCAAAWA7PAAEAAMvhChAAALAcAhAAALAcPgeoALm5ufr9999Vs2bNUvsYfQAAULaMMTp79qzq169/xQ9zJQAV4Pfff1dgYGBFDwMAAFyFI0eOqGHDhkUuQwAqQM2aNSX9tQO9vLwqeDQAAKA4MjIyFBgYaP87XhQCUAHybnt5eXkRgAAAqGKK8/gKD0EDAADLIQABAADLIQABAADLIQABAADLIQABAADLIQABAADLIQABAADLIQABAADLIQABAADLIQABAADL4aswAABAuTpwMlNJp88ruG51NfapXiFjIAABAIBykXY+W8MX7dSmxJP2tm7NfDVtQAd5e7qU61i4BQYAAMrF8EU79e2+VIe2b/el6slFP5b7WAhAAACgzB04malNiSeVY4xDe44x2pR4UgdTz5XreAhAAACgzCWdPl/k/EOnCEAAAOAaE1THs8j5wXXL92FoAhAAAChzTXxrqFszXznbbA7tzjabujXzLfd3gxGAAABAuZg2oIO6hvg4tHUN8dG0AR3KfSy8DR4AAJQLb08XffhIZx1MPadDp87xOUAAAMA6GvtUXPDJwy0wAABgOQQgAABgOQQgAABgOZUiAE2fPl3BwcFyd3dXWFiYtm3bVuTyaWlpGjp0qAICAuTm5qbmzZtr9erVf6tPAABgHRUegBYvXqzY2FiNHTtWO3bsULt27dSrVy+dOHGiwOWzs7PVo0cPHTp0SMuWLdPevXs1e/ZsNWjQ4Kr7BAAA1mIz5rIv5ShnYWFh6tSpk959911JUm5urgIDA/Xkk0/q+eefz7f8jBkzNHnyZP32229ycSn4m2NL2uflMjIy5O3trfT0dHl5ef2N6gAAQHkpyd/vCr0ClJ2dre3btysqKsre5uTkpKioKG3ZsqXAdVasWKHw8HANHTpUfn5+uu666/T6668rJyfnqvvMyspSRkaGwwQAAK5dFRqAUlNTlZOTIz8/P4d2Pz8/JScnF7jOgQMHtGzZMuXk5Gj16tUaPXq0pkyZoldfffWq+5wwYYK8vb3tU2BgYClUBwAAKqsKfwaopHJzc1WvXj3NmjVLoaGh6t+/v1588UXNmDHjqvuMi4tTenq6fTpy5EgpjhgAAFQ2FfpJ0D4+PnJ2dlZKSopDe0pKivz9/QtcJyAgQC4uLnJ2dra3tWrVSsnJycrOzr6qPt3c3OTm5vY3qwEAAFVFhV4BcnV1VWhoqOLj4+1tubm5io+PV3h4eIHrdO3aVfv27VNubq69LSEhQQEBAXJ1db2qPgEAgLVU+C2w2NhYzZ49W/Pnz9eePXv0+OOP69y5c4qJiZEkRUdHKy4uzr78448/rtOnT2vEiBFKSEjQqlWr9Prrr2vo0KHF7hMAAFhbhX8Zav/+/XXy5EmNGTNGycnJat++vdasWWN/iPnw4cNycvpfTgsMDNTatWv11FNPqW3btmrQoIFGjBih5557rth9AgAAa6vwzwGqjPgcIAAAqp4q8zlAAAAAFYEABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALKdSBKDp06crODhY7u7uCgsL07Zt2wpddt68ebLZbA6Tu7u7wzIpKSl6+OGHVb9+fXl6eqp3795KTEws6zIAAEAVUeEBaPHixYqNjdXYsWO1Y8cOtWvXTr169dKJEycKXcfLy0vHjx+3T0lJSfZ5xhjdeeedOnDggJYvX64ff/xRQUFBioqK0rlz58qjJAAAUMlVeAB66623NHjwYMXExKh169aaMWOGPD09NWfOnELXsdls8vf3t09+fn72eYmJifruu+/03nvvqVOnTmrRooXee+89XbhwQYsWLSqPkgAAQCVXoQEoOztb27dvV1RUlL3NyclJUVFR2rJlS6HrZWZmKigoSIGBgerbt69++eUX+7ysrCxJcrgt5uTkJDc3N33zzTcF9peVlaWMjAyHCQAAXLsqNAClpqYqJyfH4QqOJPn5+Sk5ObnAdVq0aKE5c+Zo+fLlWrhwoXJzcxUREaGjR49Kklq2bKlGjRopLi5OZ86cUXZ2tiZNmqSjR4/q+PHjBfY5YcIEeXt726fAwMDSLRQAAFQqFX4LrKTCw8MVHR2t9u3bKzIyUp999pl8fX01c+ZMSZKLi4s+++wzJSQkqE6dOvL09NT69evVp08fOTkVXG5cXJzS09Pt05EjR8qzJAAAUM6qVeTGfXx85OzsrJSUFIf2lJQU+fv7F6sPFxcXdejQQfv27bO3hYaGaufOnUpPT1d2drZ8fX0VFhamjh07FtiHm5ub3Nzcrr4QAABQpVToFSBXV1eFhoYqPj7e3pabm6v4+HiFh4cXq4+cnBzt2rVLAQEB+eZ5e3vL19dXiYmJ+uGHH9S3b99SGzsAAKi6KvQKkCTFxsZq4MCB6tixozp37qypU6fq3LlziomJkSRFR0erQYMGmjBhgiRp/Pjx6tKli0JCQpSWlqbJkycrKSlJgwYNsve5dOlS+fr6qlGjRtq1a5dGjBihO++8Uz179qyQGgEAQOVS4QGof//+OnnypMaMGaPk5GS1b99ea9assT8YffjwYYdnd86cOaPBgwcrOTlZtWvXVmhoqDZv3qzWrVvblzl+/LhiY2OVkpKigIAARUdHa/To0eVeGwAAqJxsxhhT0YOobDIyMuTt7a309HR5eXlV9HAAAEAxlOTvd5V7FxgAAMDfRQACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWc1UBaMeOHdq1a5f99fLly3XnnXfqhRdeUHZ2don7mz59uoKDg+Xu7q6wsDBt27at0GXnzZsnm83mMLm7uzssk5mZqWHDhqlhw4by8PBQ69atNWPGjBKPCwAAXJuuKgANGTJECQkJkqQDBw7ovvvuk6enp5YuXapnn322RH0tXrxYsbGxGjt2rHbs2KF27dqpV69eOnHiRKHreHl56fjx4/YpKSnJYX5sbKzWrFmjhQsXas+ePRo5cqSGDRumFStWlLxYAABwzbmqAJSQkKD27dtLkpYuXapu3brp448/1rx58/Tpp5+WqK+33npLgwcPVkxMjP1Kjaenp+bMmVPoOjabTf7+/vbJz8/PYf7mzZs1cOBAde/eXcHBwXr00UfVrl27Iq8sAQAA67iqAGSMUW5uriRp3bp1uu222yRJgYGBSk1NLXY/2dnZ2r59u6Kiov43ICcnRUVFacuWLYWul5mZqaCgIAUGBqpv37765ZdfHOZHRERoxYoVOnbsmIwxWr9+vRISEtSzZ88C+8vKylJGRobDBAAArl1XFYA6duyoV199VQsWLNDGjRv1j3/8Q5J08ODBfFdjipKamqqcnJx86/j5+Sk5ObnAdVq0aKE5c+Zo+fLlWrhwoXJzcxUREaGjR4/al5k2bZpat26thg0bytXVVb1799b06dPVrVu3AvucMGGCvL297VNgYGCxawAAAFXPVQWgqVOnaseOHRo2bJhefPFFhYSESJKWLVumiIiIUh3g5cLDwxUdHa327dsrMjJSn332mXx9fTVz5kz7MtOmTdN3332nFStWaPv27ZoyZYqGDh2qdevWFdhnXFyc0tPT7dORI0fKtAYAAFCxql3NSm3btnV4F1ieyZMny9nZ2f560aJFuuOOO1S9evUC+/Hx8ZGzs7NSUlIc2lNSUuTv71+ssbi4uKhDhw7at2+fJOnChQt64YUX9Pnnn9uvTLVt21Y7d+7Um2++6XC7LY+bm5vc3NyKtT0AAFD1lernALm7u8vFxcX+esiQIfnCzaVcXV0VGhqq+Ph4e1tubq7i4+MVHh5erG3m5ORo165dCggIkCT9+eef+vPPP+Xk5Fias7Oz/bklAABgbVd1Bai4jDFXXCY2NlYDBw5Ux44d1blzZ02dOlXnzp1TTEyMJCk6OloNGjTQhAkTJEnjx49Xly5dFBISorS0NE2ePFlJSUkaNGiQpL/eIh8ZGalRo0bJw8NDQUFB2rhxoz788EO99dZbZVcsAACoMso0ABVH//79dfLkSY0ZM0bJyclq37691qxZY38w+vDhww5Xc86cOaPBgwcrOTlZtWvXVmhoqDZv3qzWrVvbl/nkk08UFxenBx54QKdPn1ZQUJBee+01PfbYY+VeHwAAqHxspjiXaa5SzZo19dNPP6lJkyZltYkykZGRIW9vb6Wnp8vLy6uihwMAAIqhJH+/+S4wAABgOQQgAABgOWUagIKCghzeFQYAAFAZXFUAatKkiU6dOpWvPS0tzeF5n927d/OpygAAoNK5qgB06NAh5eTk5GvPysrSsWPH/vagAAAAylKJ3ga/YsUK+7/Xrl0rb29v++ucnBzFx8crODi41AYHAABQFkoUgO688077vwcOHOgwz8XFRcHBwZoyZUqpDAwAAKCsFDsA/fzzz/rzzz/l7Oysxo0b6/vvv5ePj09Zjg0AAKBMFPsZoA4dOuj06dOSJJvNJpvNVmaDAgAAKEvFDkC1atXSgQMHJElJSUl8sSgAAKiyin0LrF+/foqMjLR/63rHjh3l7Oxc4LJ5QQkAAKAyKnYAmjVrlu6++27t27dPw4cP1+DBg1WzZs2yHBsAAECZKNG7wHr37i1J2r59u0aMGEEAAgAAVdJVfRBiYQ9Anzt3Tv/+97//1oAAAADK2lUFoPnz5+vChQv52i9cuKAPP/zwbw8KAACgLJXoFlhGRoaMMTLG6OzZs3J3d7fPy8nJ0erVq1WvXr1SHyQAAEBpKlEAqlWrlv0zgJo3b55vvs1m08svv1xqgwMAACgLJQpA69evlzFGt9xyi5YtW6a6deva57m6uiooKEgXL14s9UECAACUphIFoMjISPu/w8PD7Z8JlOfUqVMKDAws8JviAQAAKoureghakqpVy5+dMjMzHZ4LAgAAqIxKdAUoNjZW0l/P+owePVqenp72eTk5Odq6davat29fqgMEAAAobSUKQD/++KMkyRijXbt2ydXV1T7P1dVV7dq10zPPPFO6IwQAAChlJX4IWpJiYmL0zjvvyMvLq0wGBQAAUJZKFIDyzJ07t7THAQAAUG6u+iFoAACAqooABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALKdSBKDp06crODhY7u7uCgsL07Zt2wpddt68ebLZbA6Tu7u7wzKXz8+bJk+eXNalAACAKqDCA9DixYsVGxursWPHaseOHWrXrp169eqlEydOFLqOl5eXjh8/bp+SkpIc5l867/jx45ozZ45sNpv69etX1uUAAIAqoMID0FtvvaXBgwcrJiZGrVu31owZM+Tp6ak5c+YUuo7NZpO/v7998vPzc5h/6Tx/f38tX75cN998s5o0aVLW5QAAgCqgQgNQdna2tm/frqioKHubk5OToqKitGXLlkLXy8zMVFBQkAIDA9W3b1/98ssvhS6bkpKiVatW6ZFHHil0maysLGVkZDhMAADg2lWhASg1NVU5OTn5ruD4+fkpOTm5wHVatGihOXPmaPny5Vq4cKFyc3MVERGho0ePFrj8/PnzVbNmTd19992FjmPChAny9va2T4GBgVdfFAAAqPQq/BZYSYWHhys6Olrt27dXZGSkPvvsM/n6+mrmzJkFLj9nzhw98MAD+R6UvlRcXJzS09Pt05EjR8pq+AAAoBKoVpEb9/HxkbOzs1JSUhzaU1JS5O/vX6w+XFxc1KFDB+3bty/fvK+//lp79+7V4sWLi+zDzc1Nbm5uxR84AACo0ir0CpCrq6tCQ0MVHx9vb8vNzVV8fLzCw8OL1UdOTo527dqlgICAfPM++OADhYaGql27dqU2ZgAAUPVV6BUgSYqNjdXAgQPVsWNHde7cWVOnTtW5c+cUExMjSYqOjlaDBg00YcIESdL48ePVpUsXhYSEKC0tTZMnT1ZSUpIGDRrk0G9GRoaWLl2qKVOmlHtNAACgcqvwANS/f3+dPHlSY8aMUXJystq3b681a9bYH4w+fPiwnJz+d6HqzJkzGjx4sJKTk1W7dm2FhoZq8+bNat26tUO/n3zyiYwxGjBgQLnWAwAAKj+bMcZU9CAqm4yMDHl7eys9PV1eXl4VPRwAAFAMJfn7XeXeBQYAAPB3EYAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlVIoANH36dAUHB8vd3V1hYWHatm1bocvOmzdPNpvNYXJ3d8+33J49e3THHXfI29tb1atXV6dOnXT48OGyLAMAAFQRFR6AFi9erNjYWI0dO1Y7duxQu3bt1KtXL504caLQdby8vHT8+HH7lJSU5DB///79uvHGG9WyZUtt2LBBP//8s0aPHl1gUAIAANZjM8aYihxAWFiYOnXqpHfffVeSlJubq8DAQD355JN6/vnn8y0/b948jRw5UmlpaYX2ed9998nFxUULFiy4qjFlZGTI29tb6enp8vLyuqo+AABA+SrJ3+8KvQKUnZ2t7du3Kyoqyt7m5OSkqKgobdmypdD1MjMzFRQUpMDAQPXt21e//PKLfV5ubq5WrVql5s2bq1evXqpXr57CwsL0f//3f4X2l5WVpYyMDIcJAABcuyo0AKWmpionJ0d+fn4O7X5+fkpOTi5wnRYtWmjOnDlavny5Fi5cqNzcXEVEROjo0aOSpBMnTigzM1MTJ05U79699d///ld33XWX7r77bm3cuLHAPidMmCBvb2/7FBgYWLqFAgCASqVaRQ+gpMLDwxUeHm5/HRERoVatWmnmzJl65ZVXlJubK0nq27evnnrqKUlS+/bttXnzZs2YMUORkZH5+oyLi1NsbKz9dUZGBiEIAIBrWIUGIB8fHzk7OyslJcWhPSUlRf7+/sXqw8XFRR06dNC+ffvsfVarVk2tW7d2WK5Vq1b65ptvCuzDzc1Nbm5uV1EBAACoiir0Fpirq6tCQ0MVHx9vb8vNzVV8fLzDVZ6i5OTkaNeuXQoICLD32alTJ+3du9dhuYSEBAUFBZXe4AEAQJVV4bfAYmNjNXDgQHXs2FGdO3fW1KlTde7cOcXExEiSoqOj1aBBA02YMEGSNH78eHXp0kUhISFKS0vT5MmTlZSUpEGDBtn7HDVqlPr3769u3brp5ptv1po1a7Ry5Upt2LChIkoEAACVTIUHoP79++vkyZMaM2aMkpOT1b59e61Zs8b+YPThw4fl5PS/C1VnzpzR4MGDlZycrNq1ays0NFSbN292uOV11113acaMGZowYYKGDx+uFi1a6NNPP9WNN95Y7vUBAIDKp8I/B6gy4nOAAACoeqrM5wABAABUBAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwnGoVPQCrOXAyU0mnzyu4bnU19qle0cMBAMCSCEDlJO18toYv2qlNiSftbd2a+WragA7y9nSpwJEBAGA93AIrJ8MX7dS3+1Id2r7dl6onF/1YQSMCAMC6CEDl4MDJTG1KPKkcYxzac4zRpsSTOph6roJGBgCANRGAykHS6fNFzj90igAEAEB5IgCVg6A6nkXOD67Lw9AAAJQnAlA5aOJbQ92a+crZZnNod7bZ1K2ZL+8GAwCgnBGAysm0AR3UNcTHoa1riI+mDehQQSMCAMC6eBt8OfH2dNGHj3TWwdRzOnTqHJ8DBABABSIAlbPGPgQfAAAqGrfAAACA5RCAAACA5RCAAACA5RCAAACA5RCAAACA5RCAAACA5RCAAACA5RCAAACA5RCAAACA5RCAAACA5fBVGAUwxkiSMjIyKngkAACguPL+buf9HS8KAagAZ8+elSQFBgZW8EgAAEBJnT17Vt7e3kUuYzPFiUkWk5ubq99//101a9aUzWYr1b4zMjIUGBioI0eOyMvLq1T7rmyo9dplpXqp9dplpXqtUqsxRmfPnlX9+vXl5FT0Uz5cASqAk5OTGjZsWKbb8PLyuqZ/CC9FrdcuK9VLrdcuK9VrhVqvdOUnDw9BAwAAyyEAAQAAyyEAlTM3NzeNHTtWbm5uFT2UMket1y4r1Uut1y4r1WulWouLh6ABAIDlcAUIAABYDgEIAABYDgEIAABYDgEIAABYDgGoBI4dO6YHH3xQdevWlYeHh66//nr98MMP9vnjxo1Ty5YtVb16ddWuXVtRUVHaunWrQx/BwcGy2WwO08SJE4vc7h9//KGhQ4eqbt26qlGjhvr166eUlJQyqTHP3611w4YN+erMm77//vtCt9u9e/d8yz/22GNlWqt05Xov9dhjj8lms2nq1KkO7adPn9YDDzwgLy8v1apVS4888ogyMzOL3G5lPLaXKqjWQ4cO6ZFHHlHjxo3l4eGhpk2bauzYscrOzi5yuxVxbEvjuFaVc1b6+/VWpfP2SrU+/PDD+cbUu3dvhz6ulXP2SrVWpXO2PPFJ0MV05swZde3aVTfffLO++OIL+fr6KjExUbVr17Yv07x5c7377rtq0qSJLly4oLfffls9e/bUvn375Ovra19u/PjxGjx4sP11zZo1i9z2U089pVWrVmnp0qXy9vbWsGHDdPfdd+vbb78t/UJVOrVGRETo+PHjDv2OHj1a8fHx6tixY5HbHzx4sMaPH29/7enpWboFXqY49eb5/PPP9d1336l+/fr55j3wwAM6fvy4vvzyS/3555+KiYnRo48+qo8//rjQbVfGY5unsFp/++035ebmaubMmQoJCdHu3bs1ePBgnTt3Tm+++WaR2y/PY1tax1Wq/OesVDr1VpXztri19u7dW3PnzrW/vvwt4NfSOVtUrVXlnC13BsXy3HPPmRtvvLFE66SnpxtJZt26dfa2oKAg8/bbbxe7j7S0NOPi4mKWLl1qb9uzZ4+RZLZs2VKi8RRXadV6qezsbOPr62vGjx9fZD+RkZFmxIgRJdr231Xceo8ePWoaNGhgdu/ene84/vrrr0aS+f777+1tX3zxhbHZbObYsWMF9leZj21RtRbkjTfeMI0bNy5ymfI+tqVVa1U4Z40pm2NbWc/b4tQ6cOBA07dv30LnX0vn7JVqLUhlPGfLG7fAimnFihXq2LGj7rnnHtWrV08dOnTQ7NmzC10+Oztbs2bNkre3t9q1a+cwb+LEiapbt646dOigyZMn6+LFi4X2s337dv3555+Kioqyt7Vs2VKNGjXSli1b/n5hBSjNWi/t89SpU4qJibni9j/66CP5+PjouuuuU1xcnM6fP3/VtRRHcerNzc3VQw89pFGjRqlNmzb5+tiyZYtq1arl8L/kqKgoOTk55bsNmqeyHtsr1VqQ9PR01alT54rLleexLc1aK/s5K5XNsa2s521xf0dt2LBB9erVU4sWLfT444/r1KlT9nnX0jkrFV1rQSrjOVvuKjqBVRVubm7Gzc3NxMXFmR07dpiZM2cad3d3M2/ePIflVq5caapXr25sNpupX7++2bZtm8P8KVOmmPXr15uffvrJvPfee6ZWrVrmqaeeKnS7H330kXF1dc3X3qlTJ/Pss8+WTnGXKa1aL9WnTx/Tp0+fK2575syZZs2aNebnn382CxcuNA0aNDB33XXX366pKMWp9/XXXzc9evQwubm5xpj8VwVee+0107x583x9+/r6mv/85z8FbreyHtsr1Xq5xMRE4+XlZWbNmlXktsv72JZWrVXhnDWmbI5tZT1vi1ProkWLzPLly83PP/9sPv/8c9OqVSvTqVMnc/HiRWPMtXXOXqnWy1XWc7a8EYCKycXFxYSHhzu0Pfnkk6ZLly4ObZmZmSYxMdFs2bLF/Pvf/zbBwcEmJSWl0H4/+OADU61aNfPHH38UOL8iTrjSrvXIkSPGycnJLFu2rMRjiY+PN5LMvn37SrxucV2p3h9++MH4+fk5XBavqgGoNGq91NGjR03Tpk3NI488UuKxlPWxLe1a81TGc9aY0q+3Mp+3xf0ddan9+/c73Ka/Vs7Zglxe66Uq8zlb3rgFVkwBAQFq3bq1Q1urVq10+PBhh7bq1asrJCREXbp00QcffKBq1arpgw8+KLTfsLAwXbx4UYcOHSpwvr+/v7Kzs5WWlubQnpKSIn9//6uq5UpKu9a5c+eqbt26uuOOO0o8lrCwMEnSvn37SrxucV2p3q+//lonTpxQo0aNVK1aNVWrVk1JSUl6+umnFRwcLOmv43TixAmHPi5evKjTp08Xepwq47EtTq15fv/9d918882KiIjQrFmzSjyWsj62pVnr5eOubOesVPr1Vubztri/oy7VpEkT+fj42Md0rZyzBbm81jyV/ZwtbwSgYuratav27t3r0JaQkKCgoKAi18vNzVVWVlah83fu3CknJyfVq1evwPmhoaFycXFRfHy8vW3v3r06fPiwwsPDS1BB8ZVmrcYYzZ07V9HR0XJxcSnxWHbu3Cnpr18CZeVK9T700EP6+eeftXPnTvtUv359jRo1SmvXrpUkhYeHKy0tTdu3b7f38dVXXyk3N9f+S+NylfHYFqdW6a+35Xbv3l2hoaGaO3eunJxK/qukrI9tadVa0Lgr2zkrlW69lf28vZrfUUePHtWpU6fsY7pWztmCXF6rVDXO2XJX0Zegqopt27aZatWqmddee80kJiaajz76yHh6epqFCxcaY/66HRQXF2e2bNliDh06ZH744QcTExNj3NzczO7du40xxmzevNm8/fbbZufOnWb//v1m4cKFxtfX10RHR9u3c/ToUdOiRQuzdetWe9tjjz1mGjVqZL766ivzww8/mPDw8HyXRCtbrXnWrVtnJJk9e/bk287lte7bt8+MHz/e/PDDD+bgwYNm+fLlpkmTJqZbt25lVmtx6i1IQbcOevfubTp06GC2bt1qvvnmG9OsWTMzYMAA+/yqcGwLcnmtR48eNSEhIebWW281R48eNcePH7dPly5T0ce2NGqtKuesMaX3c2xM5T9vr1Tr2bNnzTPPPGO2bNliDh48aNatW2duuOEG06xZM4dbl9fCOVucWqvKOVveCEAlsHLlSnPdddcZNzc307JlS4cHyC5cuGDuuusuU79+fePq6moCAgLMHXfc4fBg8Pbt201YWJjx9vY27u7uplWrVub11193OCEPHjxoJJn169c79P3EE0+Y2rVrG09PT3PXXXc5/OBWxlrzDBgwwERERBS4jctrPXz4sOnWrZupU6eOcXNzMyEhIWbUqFEmPT29TGq8VFH1FqSgPxynTp0yAwYMMDVq1DBeXl4mJibGnD171j6/Khzbglxe69y5c42kAqc8leXY/t1aq9I5a0zp/BwbUzXO26JqPX/+vOnZs6fx9fU1Li4uJigoyAwePNgkJyc79HEtnLPFqbUqnbPlyWaMMeV5xQkAAKCi8QwQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQgL+te/fuGjlyZLlv9+GHH9add95Z7tstT8HBwZo6dWpFDwO45hCAAFRZ77zzjubNm1fu2503b55q1apVonUIMkDlUq2iBwAABcnJyZHNZivySxu9vb3LcUQAriVcAQKuId27d9fw4cP17LPPqk6dOvL399e4ceMkSYcOHZLNZrN/o7MkpaWlyWazacOGDZKkDRs2yGazae3aterQoYM8PDx0yy236MSJE/riiy/UqlUreXl56f7779f58+cdtn3x4kUNGzZM3t7e8vHx0ejRo3XpN+1kZWXpmWeeUYMGDVS9enWFhYXZtyv976rKihUr1Lp1a7m5uenw4cNF1nv5LbCi6s9js9n03nvvqU+fPvLw8FCTJk20bNky+/y8fZCWlmZv27lzp2w2mw4dOqQNGzYoJiZG6enpstlsstls+bZxue7duyspKUlPPfWUfZ08n376qdq0aSM3NzcFBwdrypQpRfb1/vvvq1atWvZvJN+9e7f69OmjGjVqyM/PTw899JBSU1OLvU+MMRo3bpwaNWokNzc31a9fX8OHDy9yDMA1oWK/igxAaYqMjDReXl5m3LhxJiEhwcyfP9/YbDbz3//+1/5lhz/++KN9+TNnzjh8AeL69euNJNOlSxfzzTffmB07dpiQkBATGRlpevbsaXbs2GE2bdpk6tatayZOnOiw3Ro1apgRI0aY3377zSxcuNB4eno6fGnjoEGDTEREhNm0aZPZt2+fmTx5snFzczMJCQnGmL++sNHFxcVERESYb7/91vz222/m3LlzRdY7cOBA07dv32LVn0eSqVu3rpk9e7bZu3eveemll4yzs7P59ddfHfbBmTNn7Ov8+OOPRpI5ePCgycrKMlOnTjVeXl72b9S+9As0C3Lq1CnTsGFDM378eIdv4f7hhx+Mk5OTGT9+vNm7d6+ZO3eu8fDwMHPnzrWve+kXlk6aNMnUrVvX/o3dZ86cMb6+viYuLs7s2bPH7Nixw/To0cPcfPPNxd4nS5cuNV5eXmb16tUmKSnJbN269YpfogpcCwhAwDUkMjLS3HjjjQ5tnTp1Ms8991yJAtC6devsy0yYMMFIMvv377e3DRkyxPTq1cthu61atTK5ubn2tueee860atXKGGNMUlKScXZ2NseOHXMY26233mri4uKMMf/7xuqdO3cWu96CAlBh9eeRZB577DGHZcLCwszjjz/usA8KC0B5Y/X29i72OI0p+JvX77//ftOjRw+HtlGjRpnWrVvnW+/ZZ581AQEBZvfu3fZ5r7zyiunZs6fD+keOHDGSzN69e40xV94nU6ZMMc2bNzfZ2dklqgeo6rgFBlxj2rZt6/A6ICBAJ06cuOo+/Pz85OnpqSZNmji0Xd5nly5dHG7thIeHKzExUTk5Odq1a5dycnLUvHlz1ahRwz5t3LhR+/fvt6/j6uqab/wlVZz6w8PD873es2fP39ru1dizZ4+6du3q0Na1a1f7fsszZcoUzZ49W998843atGljb//pp5+0fv16h33asmVLSXLYr0Xtk3vuuUcXLlxQkyZNNHjwYH3++ee6ePFiqdcKVDY8BA1cY1xcXBxe22w25ebm2h8mNpc8l/Pnn39esQ+bzVZon8WVmZkpZ2dnbd++Xc7Ozg7zatSoYf+3h4eHQ4i6Gn93rCXZT+Xlpptu0qpVq7RkyRI9//zz9vbMzEzdfvvtmjRpUr51AgIC7P8uap8EBgZq7969Wrdunb788ks98cQTmjx5sjZu3JhvPeBaQgACLMLX11eSdPz4cXXo0EGSHB6I/ru2bt3q8Pq7775Ts2bN5OzsrA4dOignJ0cnTpzQTTfdVGrbvFrfffedoqOjHV7n7ZNL91Pt2rUl5d9Prq6uDldoiqOgdVq1aqVvv/3Woe3bb79V8+bNHYJi586dNWzYMPXu3VvVqlXTM888I0m64YYb9Omnnyo4OFjVql39r3MPDw/dfvvtuv322zV06FC1bNlSu3bt0g033HDVfQKVHbfAAIvw8PBQly5dNHHiRO3Zs0cbN27USy+9VGr9Hz58WLGxsdq7d68WLVqkadOmacSIEZKk5s2b64EHHlB0dLQ+++wzHTx4UNu2bdOECRO0atWqUhtDcS1dulRz5sxRQkKCxo4dq23btmnYsGGSpJCQEAUGBmrcuHFKTEzUqlWr8r0zKzg4WJmZmYqPj1dqamq+d8QVJDg4WJs2bdKxY8fs79J6+umnFR8fr1deeUUJCQmaP3++3n33XXvAuVRERIRWr16tl19+2f55QkOHDtXp06c1YMAAff/999q/f7/Wrl2rmJiYYge0efPm6YMPPtDu3bt14MABLVy4UB4eHgoKCirW+kBVRQACLGTOnDm6ePGiQkNDNXLkSL366qul1nd0dLQuXLigzp07a+jQoRoxYoQeffRR+/y5c+cqOjpaTz/9tFq0aKE777xT33//vRo1alRqYyiul19+WZ988onatm2rDz/8UIsWLVLr1q0l/XW7aNGiRfrtt9/Utm1bTZo0Kd9+ioiI0GOPPab+/fvL19dXb7zxxhW3OX78eB06dEhNmza1X2W64YYbtGTJEn3yySe67rrrNGbMGI0fP14PP/xwgX3ceOONWrVqlV566SVNmzZN9evX17fffqucnBz17NlT119/vUaOHKlatWoV+flJl6pVq5Zmz56trl27qm3btlq3bp1WrlypunXrFmt9oKqymUtvdAPANc5ms+nzzz+/5r9CA0DRuAIEAAAshwAEoNK69O3dl09ff/11RQ/P7uuvvy5yrAAqH26BAai09u3bV+i8Bg0ayMPDoxxHU7gLFy7o2LFjhc4PCQkpx9EAKA4CEAAAsBxugQEAAMshAAEAAMshAAEAAMshAAEAAMshAAEAAMshAAEAAMshAAEAAMshAAEAAMv5//KcgIuL/M9kAAAAAElFTkSuQmCC", 243 | "text/plain": [ 244 | "
" 245 | ] 246 | }, 247 | "metadata": {}, 248 | "output_type": "display_data" 249 | } 250 | ], 251 | "source": [ 252 | "final_df = pd.DataFrame()\n", 253 | "final_df[\"number_input_tokens\"] = valid_df[\"number_input_tokens\"]\n", 254 | "final_df[\"number_output_tokens\"] = valid_df[\"number_output_tokens\"]\n", 255 | "final_df[\"ttft_s\"] = valid_df[\"ttft_s\"]\n", 256 | "final_df[\"end_to_end_latency_s\"] = valid_df[\"end_to_end_latency_s\"]\n", 257 | "final_df[\"generation_throughput\"] = valid_df[\"request_output_throughput_token_per_s\"]\n", 258 | "\n", 259 | "mean_tokens_in = final_df[\"number_input_tokens\"].mean()\n", 260 | "mean_tokens_out = valid_df[\"number_output_tokens\"].mean()\n", 261 | "print(f\"Mean number of input tokens: {mean_tokens_in}. Mean number of output tokens: {mean_tokens_out}\")\n", 262 | "final_df.plot.scatter(x=\"number_input_tokens\", y=\"ttft_s\", title=\"Number of Input Tokens vs. TTFT\")" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 15, 268 | "id": "a14de79c", 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "data": { 273 | "text/plain": [ 274 | "" 275 | ] 276 | }, 277 | "execution_count": 15, 278 | "metadata": {}, 279 | "output_type": "execute_result" 280 | }, 281 | { 282 | "data": { 283 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGzCAYAAADT4Tb9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/SrBM8AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAvAklEQVR4nO3de1TUVb/H8c+AAl7wUioocsRILTO19EiEVhaJ6aHMfLyVIpkdE08m6VNmiaaJWZKdMk3zkqdj+pSXWqWWktbpaI8nL1k9XlIjvIGQFxQTEPb5w+U8TWDCODCwfb/WmrWaPXv/5rt31HzW77d/Mw5jjBEAAIAlfLxdAAAAgCcRbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuABTjcDg0cuRIb5dxVXA4HJo4caK3ywCsQrgBLOFwOEr12Lhxo7dLLZO77rpLbdq08cixNm3apIkTJ+rkyZMeOR6AyqmatwsA4Bn/9V//5fJ88eLFWrduXbH2G2+8sSLLqlQ2bdqkSZMmaciQIapXr563y5Ek/fbbb6pWjf8VA57Ef1GAJR555BGX5998843WrVtXrB2VS0BAgLdLAKzDZSngKpKbm6unn35aoaGh8vf3V6tWrfTqq6/KGHPZsVOmTJGPj4/eeOMNZ9uaNWvUpUsX1apVS4GBgerZs6d+/PFHl3FDhgxR7dq1dfjwYfXq1Uu1a9dWw4YNNWbMGBUWFnpkXjt37tSQIUN03XXXKSAgQMHBwXr00Uf166+/OvtMnDhRY8eOlSQ1b97ceZkuLS3N2ee9995Thw4dVKNGDV1zzTXq37+/Dh486PJeFy+T/eMf/1DXrl1Vs2ZNhYSEaPr06cXqOnfunCZOnKiWLVsqICBAjRs3Vu/evbV//35nn5L23Bw+fFiPPvqogoKC5O/vr5tuukkLFiwodvw33nhDN910k2rWrKn69eurY8eOWrJkiTtLCFiFMzfAVcIYo/vvv18bNmzQ0KFD1b59e3322WcaO3asDh8+rNdee+2SY59//nlNnTpVb7/9toYNGybpwmWwuLg4xcTE6OWXX9bZs2c1e/Zsde7cWdu3b1dYWJhzfGFhoWJiYhQREaFXX31V69ev14wZMxQeHq4nnnjiiue2bt06HThwQPHx8QoODtaPP/6ouXPn6scff9Q333wjh8Oh3r17a+/evXr//ff12muvqUGDBpKkhg0bSpJeeuklvfDCC+rbt68ee+wxZWVl6Y033tAdd9yh7du3u1zGOnHihLp3767evXurb9+++vDDD/XMM8/o5ptv1n333eec87/9278pNTVV/fv316hRo3T69GmtW7dOP/zwg8LDw0ucS2Zmpm677Tbnpu6GDRtqzZo1Gjp0qHJycvTUU09JkubNm6cnn3xSffr00ahRo3Tu3Dnt3LlTf//73zVw4MArXlOgSjMArJSQkGB+/5/4qlWrjCQzZcoUl359+vQxDofD7Nu3z9kmySQkJBhjjHn66aeNj4+PWbRokfP106dPm3r16plhw4a5HCsjI8PUrVvXpT0uLs5IMi+++KJL31tuucV06NDhsvO48847zU033fSnfc6ePVus7f333zeSzFdffeVse+WVV4wk8/PPP7v0TUtLM76+vuall15yaf/+++9NtWrVXNrvvPNOI8ksXrzY2ZaXl2eCg4PNQw895GxbsGCBkWRSUlKK1VZUVOT8Z0kmKSnJ+Xzo0KGmcePGJjs722VM//79Td26dZ1zfeCBBy67LsDVistSwFVi9erV8vX11ZNPPunS/vTTT8sYozVr1ri0G2M0cuRIvf7663rvvfcUFxfnfG3dunU6efKkBgwYoOzsbOfD19dXERER2rBhQ7H3Hz58uMvzLl266MCBAx6ZW40aNZz/fO7cOWVnZ+u2226TJG3btu2y41esWKGioiL17dvXZT7BwcFq0aJFsfnUrl3bZS+Tn5+fOnXq5DKf5cuXq0GDBvqP//iPYu/ncDhKrMMYo+XLlys2NlbGGJdaYmJidOrUKed86tWrp0OHDun//u//Ljs/4GrDZSngKvHLL7+oSZMmCgwMdGm/ePfUL7/84tK+ePFinTlzRrNnz9aAAQNcXvvpp58kSXfffXeJ71WnTh2X5wEBAc7LPxfVr19fJ06cKPtESnD8+HFNmjRJS5cu1bFjx1xeO3Xq1GXH//TTTzLGqEWLFiW+Xr16dZfnTZs2LRZQ6tevr507dzqf79+/X61atSrTnVBZWVk6efKk5s6dq7lz55bY5+L8nnnmGa1fv16dOnXS9ddfr27dumngwIGKiooq9fsBtiLcAChRVFSUduzYoTfffFN9+/bVNddc43ytqKhI0oV9N8HBwcXG/vED3dfXt1xr7du3rzZt2qSxY8eqffv2ql27toqKitS9e3dnrX+mqKhIDodDa9asKbHW2rVruzy/1HxMKTZmX64O6cKdb78/U/Z7bdu2lXQhlO7Zs0effPKJ1q5dq+XLl+utt97ShAkTNGnSpCuqA6jqCDfAVaJZs2Zav369Tp8+7XL2Zvfu3c7Xf+/666/X9OnTddddd6l79+5KTU11jru4GbZRo0aKjo6uoBmU7MSJE0pNTdWkSZM0YcIEZ/vFs0u/d6nLQeHh4TLGqHnz5mrZsqVH6goPD9ff//53FRQUFDvzcykNGzZUYGCgCgsLS7WutWrVUr9+/dSvXz/l5+erd+/eeumllzRu3DhuMcdVjT03wFWiR48eKiws1JtvvunS/tprr8nhcDjv8vm9tm3bavXq1dq1a5diY2P122+/SZJiYmJUp04dTZ06VQUFBcXGZWVllc8kSnDxLMofz5rMnDmzWN9atWpJUrFvKO7du7d8fX01adKkYscxxrjcUl5aDz30kLKzs4utd0m1XuTr66uHHnpIy5cv1w8//FDs9d+v6x9r8vPzU+vWrWWMKfHfCXA14cwNcJWIjY1V165dNX78eKWlpaldu3b6/PPP9dFHH+mpp5665K3Jt912mz766CP16NFDffr00apVq1SnTh3Nnj1bgwYN0q233qr+/furYcOGSk9P16effqqoqKgSP9TdlZWVpSlTphRrb968uR5++GHdcccdmj59ugoKChQSEqLPP/9cP//8c7H+HTp0kCSNHz9e/fv3V/Xq1RUbG6vw8HBNmTJF48aNU1pamnr16qXAwED9/PPPWrlypR5//HGNGTOmTDUPHjxYixcvVmJiorZs2aIuXbooNzdX69ev14gRI/TAAw+UOG7atGnasGGDIiIiNGzYMLVu3VrHjx/Xtm3btH79eh0/flyS1K1bNwUHBysqKkpBQUHatWuX3nzzTfXs2bPYvirgquOdm7QAlLc/3gpuzIVbuEePHm2aNGliqlevblq0aGFeeeUVl1uTjXG9Ffyijz76yFSrVs3069fPFBYWGmOM2bBhg4mJiTF169Y1AQEBJjw83AwZMsR8++23znFxcXGmVq1axepLSkoqVl9JLt56XdLjnnvuMcYYc+jQIfPggw+aevXqmbp165q//OUv5siRI8VuszbGmMmTJ5uQkBDj4+NT7Lbw5cuXm86dO5tatWqZWrVqmRtuuMEkJCSYPXv2uNRT0i3YcXFxplmzZi5tZ8+eNePHjzfNmzc31atXN8HBwaZPnz5m//79zj4l1ZiZmWkSEhJMaGioc9w999xj5s6d6+zz9ttvmzvuuMNce+21xt/f34SHh5uxY8eaU6dOXXZNAds5jLnCHXAAAACVCHtuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsctV9iV9RUZGOHDmiwMDAS34VOwAAqFyMMTp9+rSaNGkiH58/Pzdz1YWbI0eOKDQ01NtlAAAANxw8eFBNmzb90z5XXbi5+LXkBw8eVJ06dbxcDQAAKI2cnByFhoaW6udFrrpwc/FSVJ06dQg3AABUMaXZUsKGYgAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwilfDzVdffaXY2Fg1adJEDodDq1atuuyYjRs36tZbb5W/v7+uv/56LVq0qNzrBAAAVYdXw01ubq7atWunWbNmlar/zz//rJ49e6pr167asWOHnnrqKT322GP67LPPyrlSAABQVXj1hzPvu+8+3XfffaXuP2fOHDVv3lwzZsyQJN144436+uuv9dprrykmJqa8ygQAAFVIldpzs3nzZkVHR7u0xcTEaPPmzZcck5eXp5ycHJcHAACwl1fP3JRVRkaGgoKCXNqCgoKUk5Oj3377TTVq1Cg2Jjk5WZMmTaqoEhX27KcV9l6ekjatp7dLAABcAp8rZVelzty4Y9y4cTp16pTzcfDgQW+XBAAAylGVOnMTHByszMxMl7bMzEzVqVOnxLM2kuTv7y9/f/+KKA8AAFQCVerMTWRkpFJTU13a1q1bp8jISC9VBAAAKhuvhpszZ85ox44d2rFjh6QLt3rv2LFD6enpki5cUho8eLCz//Dhw3XgwAH99a9/1e7du/XWW2/pb3/7m0aPHu2N8gEAQCXk1XDz7bff6pZbbtEtt9wiSUpMTNQtt9yiCRMmSJKOHj3qDDqS1Lx5c3366adat26d2rVrpxkzZuidd97hNnAAAODk1T03d911l4wxl3y9pG8fvuuuu7R9+/ZyrAoAAFRlVWrPDQAAwOUQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFa+Hm1mzZiksLEwBAQGKiIjQli1b/rT/zJkz1apVK9WoUUOhoaEaPXq0zp07V0HVAgCAys6r4WbZsmVKTExUUlKStm3bpnbt2ikmJkbHjh0rsf+SJUv07LPPKikpSbt27dL8+fO1bNkyPffccxVcOQAAqKy8Gm5SUlI0bNgwxcfHq3Xr1pozZ45q1qypBQsWlNh/06ZNioqK0sCBAxUWFqZu3bppwIABlz3bAwAArh5eCzf5+fnaunWroqOj/1mMj4+io6O1efPmEsfcfvvt2rp1qzPMHDhwQKtXr1aPHj0u+T55eXnKyclxeQAAAHtV89YbZ2dnq7CwUEFBQS7tQUFB2r17d4ljBg4cqOzsbHXu3FnGGJ0/f17Dhw//08tSycnJmjRpkkdrBwAAlZfXNxSXxcaNGzV16lS99dZb2rZtm1asWKFPP/1UkydPvuSYcePG6dSpU87HwYMHK7BiAABQ0bx25qZBgwby9fVVZmamS3tmZqaCg4NLHPPCCy9o0KBBeuyxxyRJN998s3Jzc/X4449r/Pjx8vEpntX8/f3l7+/v+QkAAIBKyWtnbvz8/NShQwelpqY624qKipSamqrIyMgSx5w9e7ZYgPH19ZUkGWPKr1gAAFBleO3MjSQlJiYqLi5OHTt2VKdOnTRz5kzl5uYqPj5ekjR48GCFhIQoOTlZkhQbG6uUlBTdcsstioiI0L59+/TCCy8oNjbWGXIAAMDVzavhpl+/fsrKytKECROUkZGh9u3ba+3atc5Nxunp6S5nap5//nk5HA49//zzOnz4sBo2bKjY2Fi99NJL3poCAACoZBzmKruek5OTo7p16+rUqVOqU6eOx48f9uynHj9meUub1tPbJQAALoHPlQvK8vldpe6WAgAAuBzCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFbfCzYEDBzxdBwAAgEe4FW6uv/56de3aVe+9957OnTvn6ZoAAADc5la42bZtm9q2bavExEQFBwfr3//937Vlyxa3Cpg1a5bCwsIUEBCgiIiIyx7n5MmTSkhIUOPGjeXv76+WLVtq9erVbr03AACwj1vhpn379nr99dd15MgRLViwQEePHlXnzp3Vpk0bpaSkKCsrq1THWbZsmRITE5WUlKRt27apXbt2iomJ0bFjx0rsn5+fr3vvvVdpaWn68MMPtWfPHs2bN08hISHuTAMAAFjoijYUV6tWTb1799YHH3ygl19+Wfv27dOYMWMUGhqqwYMH6+jRo386PiUlRcOGDVN8fLxat26tOXPmqGbNmlqwYEGJ/RcsWKDjx49r1apVioqKUlhYmO688061a9fuSqYBAAAsckXh5ttvv9WIESPUuHFjpaSkaMyYMdq/f7/WrVunI0eO6IEHHrjk2Pz8fG3dulXR0dH/LMbHR9HR0dq8eXOJYz7++GNFRkYqISFBQUFBatOmjaZOnarCwsJLvk9eXp5ycnJcHgAAwF7V3BmUkpKihQsXas+ePerRo4cWL16sHj16yMfnQlZq3ry5Fi1apLCwsEseIzs7W4WFhQoKCnJpDwoK0u7du0scc+DAAX3xxRd6+OGHtXr1au3bt08jRoxQQUGBkpKSShyTnJysSZMmuTNNAABQBbkVbmbPnq1HH31UQ4YMUePGjUvs06hRI82fP/+KivujoqIiNWrUSHPnzpWvr686dOigw4cP65VXXrlkuBk3bpwSExOdz3NychQaGurRugAAQOXhVrj56aefLtvHz89PcXFxl3y9QYMG8vX1VWZmpkt7ZmamgoODSxzTuHFjVa9eXb6+vs62G2+8URkZGcrPz5efn1+xMf7+/vL3979svQAAwA5u7blZuHChPvjgg2LtH3zwgd59991SHcPPz08dOnRQamqqs62oqEipqamKjIwscUxUVJT27dunoqIiZ9vevXvVuHHjEoMNAAC4+rgVbpKTk9WgQYNi7Y0aNdLUqVNLfZzExETNmzdP7777rnbt2qUnnnhCubm5io+PlyQNHjxY48aNc/Z/4okndPz4cY0aNUp79+7Vp59+qqlTpyohIcGdaQAAAAu5dVkqPT1dzZs3L9berFkzpaenl/o4/fr1U1ZWliZMmKCMjAy1b99ea9eudW4yTk9Pd25SlqTQ0FB99tlnGj16tNq2bauQkBCNGjVKzzzzjDvTAAAAFnIr3DRq1Eg7d+4sdjfUd999p2uvvbZMxxo5cqRGjhxZ4msbN24s1hYZGalvvvmmTO8BAACuHm5dlhowYICefPJJbdiwQYWFhSosLNQXX3yhUaNGqX///p6uEQAAoNTcOnMzefJkpaWl6Z577lG1ahcOUVRUpMGDB5dpzw0AAICnuRVu/Pz8tGzZMk2ePFnfffedatSooZtvvlnNmjXzdH0AAABl4la4uahly5Zq2bKlp2oBAAC4Ym6Fm8LCQi1atEipqak6duyYy/fOSNIXX3zhkeIAAADKyq1wM2rUKC1atEg9e/ZUmzZt5HA4PF0XAACAW9wKN0uXLtXf/vY39ejRw9P1AAAAXBG3bgX38/PT9ddf7+laAAAArphb4ebpp5/W66+/LmOMp+sBAAC4Im5dlvr666+1YcMGrVmzRjfddJOqV6/u8vqKFSs8UhwAAEBZuRVu6tWrpwcffNDTtQAAAFwxt8LNwoULPV0HAACAR7i150aSzp8/r/Xr1+vtt9/W6dOnJUlHjhzRmTNnPFYcAABAWbl15uaXX35R9+7dlZ6erry8PN17770KDAzUyy+/rLy8PM2ZM8fTdQIAAJSKW2duRo0apY4dO+rEiROqUaOGs/3BBx9Uamqqx4oDAAAoK7fO3PzP//yPNm3aJD8/P5f2sLAwHT582COFAQAAuMOtMzdFRUUqLCws1n7o0CEFBgZecVEAAADucivcdOvWTTNnznQ+dzgcOnPmjJKSkvhJBgAA4FVuXZaaMWOGYmJi1Lp1a507d04DBw7UTz/9pAYNGuj999/3dI0AAACl5la4adq0qb777jstXbpUO3fu1JkzZzR06FA9/PDDLhuMAQAAKppb4UaSqlWrpkceecSTtQAAAFwxt8LN4sWL//T1wYMHu1UMAADAlXIr3IwaNcrleUFBgc6ePSs/Pz/VrFmTcAMAALzGrbulTpw44fI4c+aM9uzZo86dO7OhGAAAeJXbvy31Ry1atNC0adOKndUBAACoSB4LN9KFTcZHjhzx5CEBAADKxK09Nx9//LHLc2OMjh49qjfffFNRUVEeKQwAAMAdboWbXr16uTx3OBxq2LCh7r77bs2YMcMTdQEAALjFrXBTVFTk6ToAAAA8wqN7bgAAALzNrTM3iYmJpe6bkpLizlsAAAC4xa1ws337dm3fvl0FBQVq1aqVJGnv3r3y9fXVrbfe6uzncDg8UyUAAEApuRVuYmNjFRgYqHfffVf169eXdOGL/eLj49WlSxc9/fTTHi0SAACgtNzaczNjxgwlJyc7g40k1a9fX1OmTOFuKQAA4FVuhZucnBxlZWUVa8/KytLp06evuCgAAAB3uRVuHnzwQcXHx2vFihU6dOiQDh06pOXLl2vo0KHq3bu3p2sEAAAoNbf23MyZM0djxozRwIEDVVBQcOFA1app6NCheuWVVzxaIAAAQFm4FW5q1qypt956S6+88or2798vSQoPD1etWrU8WhwAAEBZXdGX+B09elRHjx5VixYtVKtWLRljPFUXAACAW9wKN7/++qvuuecetWzZUj169NDRo0clSUOHDuU2cAAA4FVuhZvRo0erevXqSk9PV82aNZ3t/fr109q1az1WHAAAQFm5tefm888/12effaamTZu6tLdo0UK//PKLRwoDAABwh1tnbnJzc13O2Fx0/Phx+fv7X3FRAAAA7nIr3HTp0kWLFy92Pnc4HCoqKtL06dPVtWtXjxUHAABQVm5dlpo+fbruueceffvtt8rPz9df//pX/fjjjzp+/Lj+93//19M1AgAAlJpbZ27atGmjvXv3qnPnznrggQeUm5ur3r17a/v27QoPD/d0jQAAAKVW5jM3BQUF6t69u+bMmaPx48eXR00AAABuK/OZm+rVq2vnzp3lUQsAAMAVc+uy1COPPKL58+d7uhYAAIAr5taG4vPnz2vBggVav369OnToUOw3pVJSUjxSHAAAQFmVKdwcOHBAYWFh+uGHH3TrrbdKkvbu3evSx+FweK46AACAMipTuGnRooWOHj2qDRs2SLrwcwv/+Z//qaCgoHIpDgAAoKzKtOfmj7/6vWbNGuXm5nq0IAAAgCvh1obii/4YdgAAALytTOHG4XAU21PDHhsAAFCZlGnPjTFGQ4YMcf445rlz5zR8+PBid0utWLHCcxUCAACUQZnCTVxcnMvzRx55xKPFAAAAXKkyhZuFCxeWVx0AAAAecUUbigEAACobwg0AALBKpQg3s2bNUlhYmAICAhQREaEtW7aUatzSpUvlcDjUq1ev8i0QAABUGV4PN8uWLVNiYqKSkpK0bds2tWvXTjExMTp27NifjktLS9OYMWPUpUuXCqoUAABUBV4PNykpKRo2bJji4+PVunVrzZkzRzVr1tSCBQsuOaawsFAPP/ywJk2apOuuu+5Pj5+Xl6ecnByXBwAAsJdXw01+fr62bt2q6OhoZ5uPj4+io6O1efPmS4578cUX1ahRIw0dOvSy75GcnKy6des6H6GhoR6pHQAAVE5eDTfZ2dkqLCws9sObQUFBysjIKHHM119/rfnz52vevHmleo9x48bp1KlTzsfBgwevuG4AAFB5lel7brzt9OnTGjRokObNm6cGDRqUaoy/v7/zG5UBAID9vBpuGjRoIF9fX2VmZrq0Z2ZmKjg4uFj//fv3Ky0tTbGxsc62oqIiSVK1atW0Z88ehYeHl2/RAACgUvPqZSk/Pz916NBBqampzraioiKlpqYqMjKyWP8bbrhB33//vXbs2OF83H///eratat27NjBfhoAAOD9y1KJiYmKi4tTx44d1alTJ82cOVO5ubmKj4+XJA0ePFghISFKTk5WQECA2rRp4zK+Xr16klSsHQAAXJ28Hm769eunrKwsTZgwQRkZGWrfvr3Wrl3r3GScnp4uHx+v37EOAACqCK+HG0kaOXKkRo4cWeJrGzdu/NOxixYt8nxBAACgyuKUCAAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsUinCzaxZsxQWFqaAgABFRERoy5Ytl+w7b948denSRfXr11f9+vUVHR39p/0BAMDVxevhZtmyZUpMTFRSUpK2bdumdu3aKSYmRseOHSux/8aNGzVgwABt2LBBmzdvVmhoqLp166bDhw9XcOUAAKAy8nq4SUlJ0bBhwxQfH6/WrVtrzpw5qlmzphYsWFBi///+7//WiBEj1L59e91www165513VFRUpNTU1AquHAAAVEZeDTf5+fnaunWroqOjnW0+Pj6Kjo7W5s2bS3WMs2fPqqCgQNdcc02Jr+fl5SknJ8flAQAA7OXVcJOdna3CwkIFBQW5tAcFBSkjI6NUx3jmmWfUpEkTl4D0e8nJyapbt67zERoaesV1AwCAysvrl6WuxLRp07R06VKtXLlSAQEBJfYZN26cTp065XwcPHiwgqsEAAAVqZo337xBgwby9fVVZmamS3tmZqaCg4P/dOyrr76qadOmaf369Wrbtu0l+/n7+8vf398j9QIAgMrPq2du/Pz81KFDB5fNwBc3B0dGRl5y3PTp0zV58mStXbtWHTt2rIhSAQBAFeHVMzeSlJiYqLi4OHXs2FGdOnXSzJkzlZubq/j4eEnS4MGDFRISouTkZEnSyy+/rAkTJmjJkiUKCwtz7s2pXbu2ateu7bV5AACAysHr4aZfv37KysrShAkTlJGRofbt22vt2rXOTcbp6eny8fnnCabZs2crPz9fffr0cTlOUlKSJk6cWJGlAwCASsjr4UaSRo4cqZEjR5b42saNG12ep6WllX9BAACgyqrSd0sBAAD8EeEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxSKcLNrFmzFBYWpoCAAEVERGjLli1/2v+DDz7QDTfcoICAAN18881avXp1BVUKAAAqO6+Hm2XLlikxMVFJSUnatm2b2rVrp5iYGB07dqzE/ps2bdKAAQM0dOhQbd++Xb169VKvXr30ww8/VHDlAACgMvJ6uElJSdGwYcMUHx+v1q1ba86cOapZs6YWLFhQYv/XX39d3bt319ixY3XjjTdq8uTJuvXWW/Xmm29WcOUAAKAyqubNN8/Pz9fWrVs1btw4Z5uPj4+io6O1efPmEsds3rxZiYmJLm0xMTFatWpVif3z8vKUl5fnfH7q1ClJUk5OzhVWX7KivLPlctzyVF5rAQC4cnyuuB7TGHPZvl4NN9nZ2SosLFRQUJBLe1BQkHbv3l3imIyMjBL7Z2RklNg/OTlZkyZNKtYeGhrqZtX2qTvT2xUAAGxSnp8rp0+fVt26df+0j1fDTUUYN26cy5meoqIiHT9+XNdee60cDocXKyt/OTk5Cg0N1cGDB1WnTh1vl1OlsHbuYd3cw7q5j7VzT1VcN2OMTp8+rSZNmly2r1fDTYMGDeTr66vMzEyX9szMTAUHB5c4Jjg4uEz9/f395e/v79JWr14994uugurUqVNl/ngrG9bOPaybe1g397F27qlq63a5MzYXeXVDsZ+fnzp06KDU1FRnW1FRkVJTUxUZGVnimMjISJf+krRu3bpL9gcAAFcXr1+WSkxMVFxcnDp27KhOnTpp5syZys3NVXx8vCRp8ODBCgkJUXJysiRp1KhRuvPOOzVjxgz17NlTS5cu1bfffqu5c+d6cxoAAKCS8Hq46devn7KysjRhwgRlZGSoffv2Wrt2rXPTcHp6unx8/nmC6fbbb9eSJUv0/PPP67nnnlOLFi20atUqtWnTxltTqLT8/f2VlJRU7LIcLo+1cw/r5h7WzX2snXtsXzeHKc09VQAAAFWE17/EDwAAwJMINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwU0V89dVXio2NVZMmTeRwOC75Q6G/l5eXp/Hjx6tZs2by9/dXWFiYy6+tz5s3T126dFH9+vVVv359RUdHa8uWLeU4C+8oj7X7vaVLl8rhcKhXr16eLdzLymvdTp48qYSEBDVu3Fj+/v5q2bKlVq9eXU6zqHjltW4zZ85Uq1atVKNGDYWGhmr06NE6d+5cOc3CO8q6dkOGDJHD4Sj2uOmmm1z6zZo1S2FhYQoICFBERIR1/58rj3VLTk7Wv/7rvyowMFCNGjVSr169tGfPnnKeiecQbqqI3NxctWvXTrNmzSr1mL59+yo1NVXz58/Xnj179P7776tVq1bO1zdu3KgBAwZow4YN2rx5s0JDQ9WtWzcdPny4PKbgNeWxdhelpaVpzJgx6tKliydLrhTKY93y8/N17733Ki0tTR9++KH27NmjefPmKSQkpDym4BXlsW5LlizRs88+q6SkJO3atUvz58/XsmXL9Nxzz5XHFLymrGv3+uuv6+jRo87HwYMHdc011+gvf/mLs8+yZcuUmJiopKQkbdu2Te3atVNMTIyOHTtWXtOocOWxbl9++aUSEhL0zTffaN26dSooKFC3bt2Um5tbXtPwLIMqR5JZuXLln/ZZs2aNqVu3rvn1119Lfdzz58+bwMBA8+67715hhZWXJ9fu/Pnz5vbbbzfvvPOOiYuLMw888IDnCq1kPLVus2fPNtddd53Jz8/3cIWVk6fWLSEhwdx9990ubYmJiSYqKsoTZVZKpVm7P1q5cqVxOBwmLS3N2dapUyeTkJDgfF5YWGiaNGlikpOTPVVqpeKpdfujY8eOGUnmyy+/vMIKKwZnbiz18ccfq2PHjpo+fbpCQkLUsmVLjRkzRr/99tslx5w9e1YFBQW65pprKrDSyqe0a/fiiy+qUaNGGjp0qJcqrVxKs24ff/yxIiMjlZCQoKCgILVp00ZTp05VYWGhFyv3rtKs2+23366tW7c6L6ccOHBAq1evVo8ePbxVdqU0f/58RUdHq1mzZpIunCncunWroqOjnX18fHwUHR2tzZs3e6vMSueP61aSU6dOSVKV+Xzw+s8voHwcOHBAX3/9tQICArRy5UplZ2drxIgR+vXXX7Vw4cISxzzzzDNq0qSJy/8IrkalWbuvv/5a8+fP144dO7xbbCVSmnU7cOCAvvjiCz388MNavXq19u3bpxEjRqigoEBJSUlenoF3lGbdBg4cqOzsbHXu3FnGGJ0/f17Dhw+37rLUlThy5IjWrFmjJUuWONuys7NVWFjo/Dmfi4KCgrR79+6KLrFSKmnd/qioqEhPPfWUoqKiqs5PHXn71BHKTqU47XjvvfeagIAAc/LkSWfb8uXLjcPhMGfPni3WPzk52dSvX9989913ni63UvHE2uXk5JiwsDCzevVq5+tclird31yLFi1MaGioOX/+vLPPjBkzTHBwcLnU7W2eWrcNGzaYoKAgM2/ePLNz506zYsUKExoaal588cXyLN+rSrN2vzd16lRz7bXXmry8PGfb4cOHjSSzadMml75jx441nTp18lSplYon1u2Phg8fbpo1a2YOHjzogQorBmduLNW4cWOFhISobt26zrYbb7xRxhgdOnRILVq0cLa/+uqrmjZtmtavX6+2bdt6o9xK5XJrl5ubq7S0NMXGxjpfLyoqkiRVq1ZNe/bsUXh4eIXX7W2l+Ztr3LixqlevLl9fX5c+GRkZys/Pl5+fnzdK96rSrNsLL7ygQYMG6bHHHpMk3XzzzcrNzdXjjz+u8ePHu/y48NXIGKMFCxZo0KBBLn9DDRo0kK+vrzIzM136Z2ZmKjg4uKLLrHQutW6/N3LkSH3yySf66quv1LRp0wqu0H1X938RFouKitKRI0d05swZZ9vevXvl4+Pj8gc6ffp0TZ48WWvXrlXHjh29UWqlc7m1u+GGG/T9999rx44dzsf999+vrl27aseOHQoNDfVi9d5Tmr+5qKgo7du3zxkGL/Zp3LjxVRlspNKt29mzZ4sFmIsB0fDbx/ryyy+1b9++Yvvf/Pz81KFDB6WmpjrbioqKlJqaqsjIyIous9K51LpJF/6uRo4cqZUrV+qLL75Q8+bNvVDhFfDeSSOUxenTp8327dvN9u3bjSSTkpJitm/fbn755RdjjDHPPvusGTRokEv/pk2bmj59+pgff/zRfPnll6ZFixbmsccec/aZNm2a8fPzMx9++KE5evSo83H69OkKn195Ko+1+yMbL0uVx7qlp6ebwMBAM3LkSLNnzx7zySefmEaNGpkpU6ZU+PzKS3msW1JSkgkMDDTvv/++OXDggPn8889NeHi46du3b4XPrzyVde0ueuSRR0xERESJx1y6dKnx9/c3ixYtMv/4xz/M448/burVq2cyMjLKdS4VqTzW7YknnjB169Y1GzdudPl8KGlbQ2VEuKkiNmzYYCQVe8TFxRljLny43nnnnS5jdu3aZaKjo02NGjVM06ZNTWJiossfZrNmzUo8ZlJSUsVNrAKUx9r9kY3hprzWbdOmTSYiIsL4+/ub6667zrz00ksue3CquvJYt4KCAjNx4kQTHh5uAgICTGhoqBkxYoQ5ceJExU2sArizdidPnjQ1atQwc+fOveRx33jjDfMv//Ivxs/Pz3Tq1Ml888035TiLilce61bS8SSZhQsXlu9kPMRhDOc0AQCAPdhzAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACr/D/vevnJpwE9FgAAAABJRU5ErkJggg==", 284 | "text/plain": [ 285 | "
" 286 | ] 287 | }, 288 | "metadata": {}, 289 | "output_type": "display_data" 290 | } 291 | ], 292 | "source": [ 293 | "all_token_latencies = valid_df['end_to_end_latency_s'].apply(pd.Series).stack()\n", 294 | "all_token_latencies = all_token_latencies.reset_index(drop=True)\n", 295 | "all_token_latencies.plot.hist(title=\"Token Latencies\")\n" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "metadata": {}, 302 | "outputs": [], 303 | "source": [] 304 | } 305 | ], 306 | "metadata": { 307 | "kernelspec": { 308 | "display_name": "Python 3 (ipykernel)", 309 | "language": "python", 310 | "name": "python3" 311 | }, 312 | "language_info": { 313 | "codemirror_mode": { 314 | "name": "ipython", 315 | "version": 3 316 | }, 317 | "file_extension": ".py", 318 | "mimetype": "text/x-python", 319 | "name": "python", 320 | "nbconvert_exporter": "python", 321 | "pygments_lexer": "ipython3", 322 | "version": "3.10.13" 323 | } 324 | }, 325 | "nbformat": 4, 326 | "nbformat_minor": 5 327 | } 328 | -------------------------------------------------------------------------------- /llm_correctness.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | from pathlib import Path 5 | import random 6 | import re 7 | import time 8 | from typing import Any, Dict, List, Optional, Tuple 9 | 10 | import num2words 11 | import ray 12 | from tqdm import tqdm 13 | 14 | from llmperf import common_metrics 15 | from llmperf.common import SUPPORTED_APIS, construct_clients 16 | from llmperf.models import RequestConfig 17 | from llmperf.requests_launcher import RequestsLauncher 18 | from llmperf.utils import ( 19 | LLMPerfResults, 20 | ) 21 | 22 | MAX_RANDOM_NUMBER = 10000 23 | 24 | 25 | def llm_correctness( 26 | model: str, 27 | additional_sampling_params: Optional[Dict[str, Any]] = None, 28 | num_concurrent_requests: int = 1, 29 | max_num_completed_requests: int = 500, 30 | test_timeout_s=90, 31 | llm_api="chat", 32 | ) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]: 33 | """Get the token throughput and latencies for the given model. 34 | 35 | Args: 36 | model: The name of the model to query. 37 | additional_sampling_params: Additional sampling parameters to send with the request. 38 | For more information see the LLM APIs documentation for the completions 39 | num_concurrent_requests: The number of concurrent requests to make. Increase 40 | this to increase the amount of load and vice versa. 41 | test_timeout_s: The amount of time to run the test for before reporting results. 42 | llm_api: The type of request to make. Either "chat" or "litellm". 43 | 44 | Returns: 45 | A tuple containing summary metrics and raw results from the test. 46 | 47 | """ 48 | 49 | if not additional_sampling_params: 50 | additional_sampling_params = {} 51 | 52 | clients = construct_clients(llm_api=llm_api, num_clients=num_concurrent_requests) 53 | req_launcher = RequestsLauncher(clients) 54 | start_time = time.monotonic() 55 | 56 | num_errored_requests = 0 57 | num_mismatched_requests = 0 58 | num_completed_requests = 0 59 | 60 | sampling_params = {"temperature": 0.0} 61 | sampling_params.update(additional_sampling_params) 62 | completed_requests = [] 63 | iter = 0 64 | pbar = tqdm(total=max_num_completed_requests) 65 | while ( 66 | time.monotonic() - start_time < test_timeout_s 67 | and num_completed_requests < max_num_completed_requests 68 | ): 69 | iter += 1 70 | rnd_number = random.randint(0, MAX_RANDOM_NUMBER) 71 | rnd_num_words = num2words.num2words(rnd_number) 72 | 73 | prompt = f"Convert the following sequence of words into a number: {rnd_num_words}.\nPrint the number first." 74 | 75 | request_config = RequestConfig( 76 | model=model, 77 | prompt=(prompt, 0), 78 | sampling_params=sampling_params, 79 | metadata={"rnd_number": rnd_number}, 80 | llm_api=llm_api, 81 | ) 82 | req_launcher.launch_requests(request_config) 83 | 84 | if not (iter % num_concurrent_requests): 85 | completed_requests.extend(req_launcher.get_next_ready()) 86 | pbar.update(len(completed_requests) - num_completed_requests) 87 | num_completed_requests = len(completed_requests) 88 | 89 | pbar.close() 90 | end_time = time.monotonic() 91 | if end_time - start_time >= test_timeout_s: 92 | print("Test timed out before all requests could be completed.") 93 | 94 | raw_results = [] 95 | 96 | print("Mismatched and errored requests.") 97 | for out in completed_requests: 98 | metrics, generated_text, completed_request_config = out 99 | 100 | raw_results.append( 101 | { 102 | "metrics": metrics, 103 | "generated_text": generated_text, 104 | "request_config": dict(completed_request_config), 105 | } 106 | ) 107 | 108 | # if there were no errors when making request. 109 | if not metrics[common_metrics.ERROR_CODE]: 110 | try: 111 | commas_between_numbers_re = r"(\d+),(?=\d)" 112 | gen_text_commas_removed = re.sub( 113 | commas_between_numbers_re, r"\1", generated_text 114 | ) 115 | nums = re.findall(r"\d+", gen_text_commas_removed) 116 | generated_text = gen_text_commas_removed.replace("\n", " ") 117 | 118 | assert str(completed_request_config.metadata["rnd_number"]) in nums 119 | except: 120 | num_mismatched_requests += 1 121 | print( 122 | f" mismatched request: {generated_text}, expected: {completed_request_config.metadata['rnd_number']}" 123 | ) 124 | else: 125 | num_errored_requests += 1 126 | print( 127 | f" The request errored: {metrics[common_metrics.ERROR_CODE]}, " 128 | f"{metrics[common_metrics.ERROR_MSG]} " 129 | ) 130 | print() 131 | 132 | error_rate = num_errored_requests / num_completed_requests 133 | mismatch_rate = num_mismatched_requests / num_completed_requests 134 | num_non_errored_requests = num_completed_requests - num_errored_requests 135 | summary_metrics = {} 136 | summary_metrics[common_metrics.NUM_ERRORS] = num_errored_requests 137 | summary_metrics["num_mismatched_requests"] = num_mismatched_requests 138 | summary_metrics["error_rate"] = error_rate 139 | summary_metrics["mismatch_rate"] = mismatch_rate 140 | summary_metrics[common_metrics.NUM_COMPLETED_REQUESTS] = num_completed_requests 141 | summary_metrics["num_non_errored_requests"] = num_non_errored_requests 142 | 143 | # Metadata 144 | summary_metrics["model"] = model 145 | summary_metrics["num_concurrent_requests"] = num_concurrent_requests 146 | summary_metrics["additional_sampling_params"] = additional_sampling_params 147 | summary_metrics["llm_api"] = llm_api 148 | 149 | return summary_metrics, raw_results 150 | 151 | 152 | def run( 153 | llm_api: str, 154 | model: str, 155 | test_timeout_s: int, 156 | max_num_completed_requests: int, 157 | num_concurrent_requests: int, 158 | additional_sampling_params: str, 159 | results_dir: str, 160 | user_metadata: Dict[str, str], 161 | ): 162 | """ 163 | Args: 164 | llm_api: The type of request to make. Either "chat" or "litellm". 165 | model: The name of the model to query. 166 | max_num_completed_requests: The number of requests to complete before finishing the test. 167 | test_timeout_s: The amount of time to run the test for before reporting results. 168 | num_concurrent_requests: The number of concurrent requests to make. Increase 169 | this to increase the amount of load and vice versa. 170 | mean_input_tokens: The mean number of tokens to send in the prompt for the request. 171 | stddev_input_tokens: The standard deviation of the number of tokens to send in the prompt for the request. 172 | mean_output_tokens: The mean number of tokens to generate per request. 173 | stddev_output_tokens: The standard deviation of the number of tokens to generate per request. 174 | additional_sampling_params: Additional sampling parameters to send with the request. 175 | For more information see the LLM APIs documentation for the completions. 176 | results_dir: The directory to save the results to. 177 | 178 | """ 179 | 180 | summary_metrics, raw_results = llm_correctness( 181 | model=model, 182 | llm_api=llm_api, 183 | test_timeout_s=test_timeout_s, 184 | max_num_completed_requests=max_num_completed_requests, 185 | num_concurrent_requests=num_concurrent_requests, 186 | additional_sampling_params=json.loads(additional_sampling_params), 187 | ) 188 | 189 | time.sleep(2) 190 | 191 | print( 192 | f"Results for llm correctness test for {model} queried with the {llm_api} api." 193 | ) 194 | print( 195 | f"Errors: {summary_metrics[common_metrics.NUM_ERRORS]}, " 196 | f"Error rate: {summary_metrics['error_rate']}" 197 | ) 198 | 199 | print( 200 | f"Mismatched: {summary_metrics['num_mismatched_requests']}, " 201 | f"Mismatch rate: {summary_metrics['mismatch_rate']}" 202 | ) 203 | print(f"Completed: {summary_metrics[common_metrics.NUM_COMPLETED_REQUESTS]}") 204 | print(f"Completed without errors: {summary_metrics['num_non_errored_requests']}") 205 | 206 | if results_dir: 207 | file_name = f"{model}_correctness" 208 | file_name = re.sub(r"[^\w\d-]+", "-", file_name) 209 | file_name = re.sub(r"-{2,}", "-", file_name) 210 | summary_file_name = f"{file_name}_summary" 211 | individual_responses_filename = f"{file_name}_individual_responses" 212 | summary_metrics.update(user_metadata) 213 | results = LLMPerfResults(name=summary_file_name, metadata=summary_metrics) 214 | results_dir = Path(results_dir) 215 | if not results_dir.exists(): 216 | results_dir.mkdir(parents=True) 217 | elif not results_dir.is_dir(): 218 | raise ValueError(f"{results_dir} is not a directory") 219 | with open(results_dir / f"{summary_file_name}.json", "w") as f: 220 | json.dump(results.to_dict(), f, indent=4) 221 | with open(results_dir / f"{individual_responses_filename}.json", "w") as f: 222 | json.dump(raw_results, f, indent=4) 223 | 224 | 225 | args = argparse.ArgumentParser(description="Run a correctness test for a given model.") 226 | 227 | args.add_argument( 228 | "--model", type=str, required=True, help="The model to use for this load test." 229 | ) 230 | args.add_argument( 231 | "--num-concurrent-requests", 232 | type=int, 233 | default=10, 234 | help=("The number of concurrent requests to send. (default: %(default)s)"), 235 | ) 236 | args.add_argument( 237 | "--timeout", 238 | type=int, 239 | default=90, 240 | help="The amount of time to run the load test for. (default: %(default)s)", 241 | ) 242 | args.add_argument( 243 | "--max-num-completed-requests", 244 | type=int, 245 | default=50, 246 | help=( 247 | "The number of requests to complete before finishing the test. Note " 248 | "that its possible for the test to timeout first. (default: %(default)s)" 249 | ), 250 | ) 251 | args.add_argument( 252 | "--additional-sampling-params", 253 | type=str, 254 | default="{}", 255 | help=( 256 | "Additional sampling params to send with the each request to the LLM API. " 257 | "(default: %(default)s) No additional sampling params are sent." 258 | ), 259 | ) 260 | args.add_argument( 261 | "--results-dir", 262 | type=str, 263 | default="", 264 | help=( 265 | "The directory to save the results to. " 266 | "(`default: %(default)s`) No results are saved)" 267 | ), 268 | ) 269 | args.add_argument( 270 | "--llm-api", 271 | type=str, 272 | default="openai", 273 | help=( 274 | f"The type of request to make. The supported llm apis are {SUPPORTED_APIS} " 275 | " (`default: %(default)s`)" 276 | ), 277 | ) 278 | args.add_argument( 279 | "--metadata", 280 | type=str, 281 | default="", 282 | help=( 283 | "A comma separated list of metadata to include in the results, e.g. " 284 | "name=foo,bar=1. These will be added to the metadata field of the results. " 285 | ), 286 | ) 287 | 288 | if __name__ == "__main__": 289 | args = args.parse_args() 290 | 291 | env_vars = dict(os.environ) 292 | ray.init(runtime_env={"env_vars": env_vars}) 293 | # Parse user metadata. 294 | user_metadata = {} 295 | if args.metadata: 296 | for item in args.metadata.split(","): 297 | key, value = item.split("=") 298 | user_metadata[key] = value 299 | 300 | run( 301 | llm_api=args.llm_api, 302 | model=args.model, 303 | test_timeout_s=args.timeout, 304 | max_num_completed_requests=args.max_num_completed_requests, 305 | num_concurrent_requests=args.num_concurrent_requests, 306 | additional_sampling_params=args.additional_sampling_params, 307 | results_dir=args.results_dir, 308 | user_metadata=user_metadata, 309 | ) 310 | -------------------------------------------------------------------------------- /pre-commit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "Running pre-hooks before committing..." 3 | 4 | echo "======FORMAT=====" 5 | black . -q 6 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=43.0.0", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "LLMPerf" 7 | version = "0.1.0" 8 | description = "A framework for load testing LLM APIs" 9 | authors = [{name="Avnish Narayan", email="avnish@anyscale.com"}] 10 | license = {text= "Apache-2.0"} 11 | requires-python = ">=3.8, <3.11" 12 | dependencies = ["pydantic<2.5", 13 | "ray", 14 | "pytest>=6.0", 15 | "seaborn>=0.11", 16 | "awscli>=1.22", 17 | "typer>=0.4", 18 | "litellm>=0.1.738", 19 | "num2words", 20 | "transformers", 21 | "tqdm", 22 | "boto3", 23 | "google-cloud-aiplatform"] 24 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | # For lints 2 | black -------------------------------------------------------------------------------- /src/llmperf/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/llmperf/common.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from llmperf.ray_clients.litellm_client import LiteLLMClient 3 | from llmperf.ray_clients.openai_chat_completions_client import ( 4 | OpenAIChatCompletionsClient, 5 | ) 6 | from llmperf.ray_clients.sagemaker_client import SageMakerClient 7 | from llmperf.ray_clients.vertexai_client import VertexAIClient 8 | from llmperf.ray_llm_client import LLMClient 9 | 10 | 11 | SUPPORTED_APIS = ["openai", "anthropic", "litellm"] 12 | 13 | 14 | def construct_clients(llm_api: str, num_clients: int) -> List[LLMClient]: 15 | """Construct LLMClients that will be used to make requests to the LLM API. 16 | 17 | Args: 18 | llm_api: The name of the LLM API to use. 19 | num_clients: The number of concurrent requests to make. 20 | 21 | Returns: 22 | The constructed LLMCLients 23 | 24 | """ 25 | if llm_api == "openai": 26 | clients = [OpenAIChatCompletionsClient.remote() for _ in range(num_clients)] 27 | elif llm_api == "sagemaker": 28 | clients = [SageMakerClient.remote() for _ in range(num_clients)] 29 | elif llm_api == "vertexai": 30 | clients = [VertexAIClient.remote() for _ in range(num_clients)] 31 | elif llm_api in SUPPORTED_APIS: 32 | clients = [LiteLLMClient.remote() for _ in range(num_clients)] 33 | else: 34 | raise ValueError( 35 | f"llm_api must be one of the supported LLM APIs: {SUPPORTED_APIS}" 36 | ) 37 | 38 | return clients 39 | -------------------------------------------------------------------------------- /src/llmperf/common_metrics.py: -------------------------------------------------------------------------------- 1 | # TODO (Avnishn): compute metrics in class 2 | INTER_TOKEN_LAT = "inter_token_latency_s" 3 | TTFT = "ttft_s" 4 | E2E_LAT = "end_to_end_latency_s" 5 | NUM_INPUT_TOKENS = "number_input_tokens" 6 | NUM_OUTPUT_TOKENS = "number_output_tokens" 7 | NUM_TOTAL_TOKENS = "number_total_tokens" 8 | REQ_OUTPUT_THROUGHPUT = "request_output_throughput_token_per_s" 9 | ERROR_MSG = "error_msg" 10 | ERROR_CODE = "error_code" 11 | ERROR_CODE_FREQ = "error_code_frequency" 12 | NUM_ERRORS = "number_errors" 13 | OUTPUT_THROUGHPUT = "mean_output_throughput_token_per_s" 14 | NUM_COMPLETED_REQUESTS = "num_completed_requests" 15 | COMPLETED_REQUESTS_PER_MIN = "num_completed_requests_per_min" 16 | ERROR_RATE = "error_rate" 17 | NUM_REQ_STARTED = "num_requests_started" 18 | -------------------------------------------------------------------------------- /src/llmperf/models.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional, Tuple 2 | from pydantic import BaseModel 3 | 4 | 5 | class RequestConfig(BaseModel): 6 | """The configuration for a request to the LLM API. 7 | 8 | Args: 9 | model: The model to use. 10 | prompt: The prompt to provide to the LLM API. 11 | sampling_params: Additional sampling parameters to send with the request. 12 | For more information see the Router app's documentation for the completions 13 | llm_api: The name of the LLM API to send the request to. 14 | metadata: Additional metadata to attach to the request for logging or validation purposes. 15 | """ 16 | 17 | model: str 18 | prompt: Tuple[str, int] 19 | sampling_params: Optional[Dict[str, Any]] = None 20 | llm_api: Optional[str] = None 21 | metadata: Optional[Dict[str, Any]] = None 22 | -------------------------------------------------------------------------------- /src/llmperf/ray_clients/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/llmperf/f1d6bed47e4501b0e371082b41601b59ab55269f/src/llmperf/ray_clients/__init__.py -------------------------------------------------------------------------------- /src/llmperf/ray_clients/litellm_client.py: -------------------------------------------------------------------------------- 1 | import time 2 | from typing import Any, Dict 3 | import ray 4 | 5 | from llmperf.ray_llm_client import LLMClient 6 | from llmperf.models import RequestConfig 7 | from llmperf import common_metrics 8 | 9 | 10 | @ray.remote 11 | class LiteLLMClient(LLMClient): 12 | """Client for LiteLLM Completions API.""" 13 | 14 | def llm_request(self, request_config: RequestConfig) -> Dict[str, Any]: 15 | # litellm package isn't serializable, so we import it within the function 16 | # to maintain compatibility with ray. 17 | from litellm import completion, validate_environment 18 | 19 | prompt = request_config.prompt 20 | prompt, prompt_len = prompt 21 | 22 | message = [ 23 | {"role": "system", "content": ""}, 24 | {"role": "user", "content": prompt}, 25 | ] 26 | assert ( 27 | request_config.llm_api is not None 28 | ), "the request config's llm_api must be set." 29 | if request_config.llm_api == "litellm": 30 | model = request_config.model 31 | else: 32 | model = request_config.llm_api + "/" + request_config.model 33 | validation_result = validate_environment(model) 34 | if validation_result["missing_keys"]: 35 | raise ValueError( 36 | f"The following environment vars weren't found but were necessary for " 37 | f"the model {request_config.model}: {validation_result['missing_keys']}" 38 | ) 39 | body = { 40 | "model": model, 41 | "messages": message, 42 | "stream": True, 43 | } 44 | sampling_params = request_config.sampling_params 45 | body.update(sampling_params or {}) 46 | 47 | time_to_next_token = [] 48 | tokens_received = 0 49 | ttft = 0 50 | error_response_code = -1 51 | generated_text = "" 52 | error_msg = "" 53 | output_throughput = 0 54 | total_request_time = 0 55 | 56 | metrics = {} 57 | 58 | metrics[common_metrics.ERROR_CODE] = None 59 | metrics[common_metrics.ERROR_MSG] = "" 60 | 61 | try: 62 | start_time = time.monotonic() 63 | most_recent_received_token_time = time.monotonic() 64 | 65 | response = completion(**body) 66 | ttft = 0 67 | for tok in response: 68 | if tok.choices[0].delta: 69 | delta = tok.choices[0].delta 70 | if delta.get("content", None): 71 | if ttft == 0: 72 | ttft = time.monotonic() - start_time 73 | time_to_next_token.append(ttft) 74 | else: 75 | time_to_next_token.append( 76 | time.monotonic() - most_recent_received_token_time 77 | ) 78 | generated_text += delta["content"] 79 | most_recent_received_token_time = time.monotonic() 80 | tokens_received += 1 81 | 82 | total_request_time = time.monotonic() - start_time 83 | 84 | output_throughput = tokens_received / total_request_time 85 | 86 | except Exception as e: 87 | metrics[common_metrics.ERROR_MSG] = error_msg 88 | metrics[common_metrics.ERROR_CODE] = error_response_code 89 | 90 | print(f"Warning Or Error: {e}") 91 | print(error_response_code) 92 | 93 | metrics[common_metrics.INTER_TOKEN_LAT] = sum(time_to_next_token) 94 | metrics[common_metrics.TTFT] = ttft 95 | metrics[common_metrics.E2E_LAT] = total_request_time 96 | metrics[common_metrics.REQ_OUTPUT_THROUGHPUT] = output_throughput 97 | metrics[common_metrics.NUM_TOTAL_TOKENS] = tokens_received + prompt_len 98 | metrics[common_metrics.NUM_OUTPUT_TOKENS] = tokens_received 99 | metrics[common_metrics.NUM_INPUT_TOKENS] = prompt_len 100 | return metrics, generated_text, request_config 101 | -------------------------------------------------------------------------------- /src/llmperf/ray_clients/openai_chat_completions_client.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import time 4 | from typing import Any, Dict 5 | 6 | import ray 7 | import requests 8 | 9 | from llmperf.ray_llm_client import LLMClient 10 | from llmperf.models import RequestConfig 11 | from llmperf import common_metrics 12 | 13 | 14 | @ray.remote 15 | class OpenAIChatCompletionsClient(LLMClient): 16 | """Client for OpenAI Chat Completions API.""" 17 | 18 | def llm_request(self, request_config: RequestConfig) -> Dict[str, Any]: 19 | prompt = request_config.prompt 20 | prompt, prompt_len = prompt 21 | 22 | message = [ 23 | {"role": "system", "content": ""}, 24 | {"role": "user", "content": prompt}, 25 | ] 26 | model = request_config.model 27 | body = { 28 | "model": model, 29 | "messages": message, 30 | "stream": True, 31 | } 32 | sampling_params = request_config.sampling_params 33 | body.update(sampling_params or {}) 34 | time_to_next_token = [] 35 | tokens_received = 0 36 | ttft = 0 37 | error_response_code = -1 38 | generated_text = "" 39 | error_msg = "" 40 | output_throughput = 0 41 | total_request_time = 0 42 | 43 | metrics = {} 44 | 45 | metrics[common_metrics.ERROR_CODE] = None 46 | metrics[common_metrics.ERROR_MSG] = "" 47 | 48 | start_time = time.monotonic() 49 | most_recent_received_token_time = time.monotonic() 50 | address = os.environ.get("OPENAI_API_BASE") 51 | if not address: 52 | raise ValueError("the environment variable OPENAI_API_BASE must be set.") 53 | key = os.environ.get("OPENAI_API_KEY") 54 | if not key: 55 | raise ValueError("the environment variable OPENAI_API_KEY must be set.") 56 | headers = {"Authorization": f"Bearer {key}"} 57 | if not address: 58 | raise ValueError("No host provided.") 59 | if not address.endswith("/"): 60 | address = address + "/" 61 | address += "chat/completions" 62 | try: 63 | with requests.post( 64 | address, 65 | json=body, 66 | stream=True, 67 | timeout=180, 68 | headers=headers, 69 | ) as response: 70 | if response.status_code != 200: 71 | error_msg = response.text 72 | error_response_code = response.status_code 73 | response.raise_for_status() 74 | for chunk in response.iter_lines(chunk_size=None): 75 | chunk = chunk.strip() 76 | 77 | if not chunk: 78 | continue 79 | stem = "data: " 80 | chunk = chunk[len(stem) :] 81 | if chunk == b"[DONE]": 82 | continue 83 | tokens_received += 1 84 | data = json.loads(chunk) 85 | 86 | if "error" in data: 87 | error_msg = data["error"]["message"] 88 | error_response_code = data["error"]["code"] 89 | raise RuntimeError(data["error"]["message"]) 90 | 91 | delta = data["choices"][0]["delta"] 92 | if delta.get("content", None): 93 | if not ttft: 94 | ttft = time.monotonic() - start_time 95 | time_to_next_token.append(ttft) 96 | else: 97 | time_to_next_token.append( 98 | time.monotonic() - most_recent_received_token_time 99 | ) 100 | most_recent_received_token_time = time.monotonic() 101 | generated_text += delta["content"] 102 | 103 | total_request_time = time.monotonic() - start_time 104 | output_throughput = tokens_received / total_request_time 105 | 106 | except Exception as e: 107 | metrics[common_metrics.ERROR_MSG] = error_msg 108 | metrics[common_metrics.ERROR_CODE] = error_response_code 109 | print(f"Warning Or Error: {e}") 110 | print(error_response_code) 111 | 112 | metrics[common_metrics.INTER_TOKEN_LAT] = sum(time_to_next_token) #This should be same as metrics[common_metrics.E2E_LAT]. Leave it here for now 113 | metrics[common_metrics.TTFT] = ttft 114 | metrics[common_metrics.E2E_LAT] = total_request_time 115 | metrics[common_metrics.REQ_OUTPUT_THROUGHPUT] = output_throughput 116 | metrics[common_metrics.NUM_TOTAL_TOKENS] = tokens_received + prompt_len 117 | metrics[common_metrics.NUM_OUTPUT_TOKENS] = tokens_received 118 | metrics[common_metrics.NUM_INPUT_TOKENS] = prompt_len 119 | 120 | return metrics, generated_text, request_config 121 | -------------------------------------------------------------------------------- /src/llmperf/ray_clients/sagemaker_client.py: -------------------------------------------------------------------------------- 1 | import io 2 | import json 3 | import os 4 | import time 5 | from typing import Any, Dict 6 | 7 | import boto3 8 | import ray 9 | from transformers import LlamaTokenizerFast 10 | 11 | from llmperf.ray_llm_client import LLMClient 12 | from llmperf.models import RequestConfig 13 | from llmperf import common_metrics 14 | 15 | 16 | @ray.remote 17 | class SageMakerClient(LLMClient): 18 | """Client for OpenAI Chat Completions API.""" 19 | 20 | def __init__(self): 21 | # Sagemaker doesn't return the number of tokens that are generated so we approximate it by 22 | # using the llama tokenizer. 23 | self.tokenizer = LlamaTokenizerFast.from_pretrained( 24 | "hf-internal-testing/llama-tokenizer" 25 | ) 26 | 27 | def llm_request(self, request_config: RequestConfig) -> Dict[str, Any]: 28 | if not os.environ.get("AWS_ACCESS_KEY_ID"): 29 | raise ValueError("AWS_ACCESS_KEY_ID must be set.") 30 | if not os.environ.get("AWS_SECRET_ACCESS_KEY"): 31 | raise ValueError("AWS_SECRET_ACCESS_KEY must be set.") 32 | if not os.environ.get("AWS_REGION_NAME"): 33 | raise ValueError("AWS_REGION_NAME must be set.") 34 | 35 | prompt = request_config.prompt 36 | prompt, prompt_len = prompt 37 | 38 | message = [ 39 | {"role": "system", "content": ""}, 40 | {"role": "user", "content": prompt}, 41 | ] 42 | model = request_config.model 43 | sm_runtime = boto3.client( 44 | "sagemaker-runtime", region_name=os.environ.get("AWS_REGION_NAME") 45 | ) 46 | 47 | sampling_params = request_config.sampling_params 48 | 49 | if "max_tokens" in sampling_params: 50 | sampling_params["max_new_tokens"] = sampling_params["max_tokens"] 51 | del sampling_params["max_tokens"] 52 | 53 | message = { 54 | "inputs": [ 55 | [ 56 | {"role": "system", "content": ""}, 57 | {"role": "user", "content": prompt}, 58 | ] 59 | ], 60 | "parameters": { 61 | **request_config.sampling_params, 62 | }, 63 | } 64 | 65 | time_to_next_token = [] 66 | tokens_received = 0 67 | ttft = 0 68 | error_response_code = None 69 | generated_text = "" 70 | error_msg = "" 71 | output_throughput = 0 72 | total_request_time = 0 73 | metrics = {} 74 | 75 | start_time = time.monotonic() 76 | most_recent_received_token_time = time.monotonic() 77 | 78 | try: 79 | response = sm_runtime.invoke_endpoint_with_response_stream( 80 | EndpointName=model, 81 | ContentType="application/json", 82 | Body=json.dumps(message), 83 | CustomAttributes="accept_eula=true", 84 | ) 85 | 86 | event_stream = response["Body"] 87 | json_byte = b"" 88 | for line, ttft, _ in LineIterator(event_stream): 89 | json_byte += line 90 | time_to_next_token.append( 91 | time.monotonic() - most_recent_received_token_time 92 | ) 93 | most_recent_received_token_time = time.monotonic() 94 | ttft = ttft - start_time 95 | resp = json.loads(json_byte) 96 | total_request_time = time.monotonic() - start_time 97 | generated_text = resp[0]["generation"]["content"] 98 | tokens_received = len(self.tokenizer.encode(generated_text)) 99 | output_throughput = tokens_received / total_request_time 100 | 101 | except Exception as e: 102 | print(f"Warning Or Error: {e}") 103 | print(error_response_code) 104 | error_msg = str(e) 105 | error_response_code = 500 106 | 107 | metrics[common_metrics.ERROR_MSG] = error_msg 108 | metrics[common_metrics.ERROR_CODE] = error_response_code 109 | metrics[common_metrics.INTER_TOKEN_LAT] = time_to_next_token 110 | metrics[common_metrics.TTFT] = ttft 111 | metrics[common_metrics.E2E_LAT] = total_request_time 112 | metrics[common_metrics.REQ_OUTPUT_THROUGHPUT] = output_throughput 113 | metrics[common_metrics.NUM_TOTAL_TOKENS] = tokens_received + prompt_len 114 | metrics[common_metrics.NUM_OUTPUT_TOKENS] = tokens_received 115 | metrics[common_metrics.NUM_INPUT_TOKENS] = prompt_len 116 | 117 | return metrics, generated_text, request_config 118 | 119 | 120 | class LineIterator: 121 | """ 122 | A helper class for parsing the byte stream input. 123 | Reference: https://aws.amazon.com/blogs/machine-learning/elevating-the-generative-ai-experience-introducing-streaming-support-in-amazon-sagemaker-hosting/ 124 | """ 125 | 126 | def __init__(self, stream): 127 | self.byte_iterator = iter(stream) 128 | self.buffer = io.BytesIO() 129 | self.read_pos = 0 130 | self.ttft = 0 131 | 132 | def __iter__(self): 133 | return self 134 | 135 | def __next__(self): 136 | while True: 137 | self.buffer.seek(self.read_pos) 138 | line = self.buffer.readline() 139 | if line and line[-1] == ord("\n"): 140 | if self.ttft == 0: 141 | self.ttft = time.monotonic() 142 | self.read_pos += len(line) 143 | return line[:-1], self.ttft, time.monotonic() 144 | # kyle: dealing with last ']' for chat output 145 | if line and self.read_pos == self.buffer.getbuffer().nbytes - 1: 146 | self.read_pos += 1 147 | return line, self.ttft, time.monotonic() 148 | try: 149 | chunk = next(self.byte_iterator) 150 | except StopIteration: 151 | if self.read_pos < self.buffer.getbuffer().nbytes: 152 | continue 153 | raise 154 | if "PayloadPart" not in chunk: 155 | print("Unknown event type:" + chunk) 156 | continue 157 | self.buffer.seek(0, io.SEEK_END) 158 | self.buffer.write(chunk["PayloadPart"]["Bytes"]) 159 | -------------------------------------------------------------------------------- /src/llmperf/ray_clients/vertexai_client.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import time 4 | from typing import Any, Dict 5 | 6 | import ray 7 | import requests 8 | from transformers import LlamaTokenizerFast 9 | 10 | from llmperf.ray_llm_client import LLMClient 11 | from llmperf.models import RequestConfig 12 | from llmperf import common_metrics 13 | 14 | 15 | @ray.remote 16 | class VertexAIClient(LLMClient): 17 | """Client for VertexAI API.""" 18 | 19 | def __init__(self): 20 | # VertexAI doesn't return the number of tokens that are generated so we approximate it by 21 | # using the llama tokenizer. 22 | self.tokenizer = LlamaTokenizerFast.from_pretrained( 23 | "hf-internal-testing/llama-tokenizer" 24 | ) 25 | 26 | def llm_request(self, request_config: RequestConfig) -> Dict[str, Any]: 27 | project_id = os.environ.get("GCLOUD_PROJECT_ID") 28 | region = os.environ.get("GCLOUD_REGION") 29 | endpoint_id = os.environ.get("VERTEXAI_ENDPOINT_ID") 30 | access_token = os.environ.get("GCLOUD_ACCESS_TOKEN").strip() 31 | if not project_id: 32 | raise ValueError("the environment variable GCLOUD_PROJECT_ID must be set.") 33 | if not region: 34 | raise ValueError("the environment variable GCLOUD_REGION must be set.") 35 | if not endpoint_id: 36 | raise ValueError( 37 | "the environment variable VERTEXAI_ENDPOINT_ID must be set." 38 | ) 39 | if not access_token: 40 | raise ValueError( 41 | "the environment variable GCLOUD_ACCESS_TOKEN must be set." 42 | ) 43 | prompt = request_config.prompt 44 | prompt, prompt_len = prompt 45 | 46 | time_to_next_token = [] 47 | tokens_received = 0 48 | ttft = 0 49 | generated_text = "" 50 | output_throughput = 0 51 | total_request_time = 0 52 | 53 | metrics = {} 54 | 55 | metrics[common_metrics.ERROR_CODE] = None 56 | metrics[common_metrics.ERROR_MSG] = "" 57 | 58 | try: 59 | # Define the URL for the request 60 | url = ( 61 | f"https://{region}-aiplatform.googleapis.com/v1/projects/" 62 | f"{project_id}/locations/{region}/endpoints/{endpoint_id}:predict" 63 | ) 64 | 65 | # Define the headers 66 | headers = { 67 | "Authorization": f"Bearer {access_token}", 68 | "Content-Type": "application/json", 69 | } 70 | 71 | sampling_params = request_config.sampling_params 72 | if "max_new_tokens" in sampling_params: 73 | sampling_params["maxOutputTokens"] = sampling_params.pop( 74 | "max_new_tokens" 75 | ) 76 | 77 | # Define the data payload 78 | data = {"instances": [{"prompt": prompt}], "parameters": sampling_params} 79 | 80 | # Make the POST request 81 | start_time = time.monotonic() 82 | response = requests.post(url, headers=headers, data=json.dumps(data)) 83 | total_request_time = time.monotonic() - start_time 84 | response_code = response.status_code 85 | response.raise_for_status() 86 | # output from the endpoint is in the form: 87 | # {"predictions": ["Input: ... \nOutput:\n ..."]} 88 | generated_text = response.json()["predictions"][0].split("\nOutput:\n")[1] 89 | tokens_received = len(self.tokenizer.encode(generated_text)) 90 | ttft = -1 91 | output_throughput = tokens_received / total_request_time 92 | time_to_next_token = [ 93 | total_request_time / tokens_received for _ in range(tokens_received) 94 | ] 95 | 96 | except Exception as e: 97 | metrics[common_metrics.ERROR_MSG] = str(e) 98 | metrics[common_metrics.ERROR_CODE] = response_code 99 | print(f"Warning Or Error: {e}") 100 | print(response_code) 101 | print(response_code) 102 | 103 | metrics[common_metrics.INTER_TOKEN_LAT] = time_to_next_token 104 | metrics[common_metrics.TTFT] = ttft 105 | metrics[common_metrics.E2E_LAT] = total_request_time 106 | metrics[common_metrics.REQ_OUTPUT_THROUGHPUT] = output_throughput 107 | metrics[common_metrics.NUM_TOTAL_TOKENS] = tokens_received + prompt_len 108 | metrics[common_metrics.NUM_OUTPUT_TOKENS] = tokens_received 109 | metrics[common_metrics.NUM_INPUT_TOKENS] = prompt_len 110 | 111 | return metrics, generated_text, request_config 112 | 113 | 114 | if __name__ == "__main__": 115 | # Run these before hand: 116 | 117 | # gcloud auth application-default login 118 | # gcloud config set project YOUR_PROJECT_ID 119 | # export GCLOUD_ACCESS_TOKEN=$(gcloud auth print-access-token) 120 | # export GCLOUD_PROJECT_ID=YOUR_PROJECT_ID 121 | # export GCLOUD_REGION=YOUR_REGION 122 | # export VERTEXAI_ENDPOINT_ID=YOUR_ENDPOINT_ID 123 | 124 | client = VertexAIClient.remote() 125 | request_config = RequestConfig( 126 | prompt=("Give me ten interview questions for the role of program manager.", 10), 127 | model="gpt3", 128 | sampling_params={ 129 | "temperature": 0.2, 130 | "max_new_tokens": 256, 131 | "top_k": 40, 132 | "top_p": 0.95, 133 | }, 134 | ) 135 | ray.get(client.llm_request.remote(request_config)) 136 | -------------------------------------------------------------------------------- /src/llmperf/ray_llm_client.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import Any, Dict, Tuple 3 | 4 | from llmperf.models import RequestConfig 5 | 6 | 7 | class LLMClient: 8 | """A client for making requests to a LLM API e.g Anyscale Endpoints.""" 9 | 10 | @abc.abstractmethod 11 | def llm_request( 12 | self, request_config: RequestConfig 13 | ) -> Tuple[Dict[str, Any], str, RequestConfig]: 14 | """Make a single completion request to a LLM API 15 | 16 | Returns: 17 | Metrics about the performance charateristics of the request. 18 | The text generated by the request to the LLM API. 19 | The request_config used to make the request. This is mainly for logging purposes. 20 | 21 | """ 22 | ... 23 | -------------------------------------------------------------------------------- /src/llmperf/requests_launcher.py: -------------------------------------------------------------------------------- 1 | from typing import Any, List 2 | 3 | from llmperf.ray_llm_client import LLMClient 4 | from llmperf.models import RequestConfig 5 | from ray.util import ActorPool 6 | 7 | 8 | class RequestsLauncher: 9 | """Launch requests from LLMClients to their respective LLM APIs.""" 10 | 11 | def __init__(self, llm_clients: List[LLMClient]): 12 | self._llm_client_pool = ActorPool(llm_clients) 13 | 14 | def launch_requests(self, request_config: RequestConfig) -> None: 15 | """Launch requests to the LLM API. 16 | 17 | Args: 18 | request_config: The configuration for the request. 19 | 20 | """ 21 | if self._llm_client_pool.has_free(): 22 | self._llm_client_pool.submit( 23 | lambda client, _request_config: client.llm_request.remote( 24 | _request_config 25 | ), 26 | request_config, 27 | ) 28 | 29 | def get_next_ready(self, block: bool = False) -> List[Any]: 30 | """Return results that are ready from completed requests. 31 | 32 | Args: 33 | block: Whether to block until a result is ready. 34 | 35 | Returns: 36 | A list of results that are ready. 37 | 38 | """ 39 | results = [] 40 | if not block: 41 | while self._llm_client_pool.has_next(): 42 | results.append(self._llm_client_pool.get_next_unordered()) 43 | else: 44 | while not self._llm_client_pool.has_next(): 45 | pass 46 | while self._llm_client_pool.has_next(): 47 | results.append(self._llm_client_pool.get_next_unordered()) 48 | return results 49 | -------------------------------------------------------------------------------- /src/llmperf/sonnet.txt: -------------------------------------------------------------------------------- 1 | Shall I compare thee to a summer's day? 2 | Thou art more lovely and more temperate: 3 | Rough winds do shake the darling buds of May, 4 | And summer's lease hath all too short a date: 5 | Sometime too hot the eye of heaven shines, 6 | And often is his gold complexion dimm'd; 7 | And every fair from fair sometime declines, 8 | By chance or nature's changing course untrimm'd; 9 | But thy eternal summer shall not fade 10 | Nor lose possession of that fair thou owest; 11 | Nor shall Death brag thou wander'st in his shade, 12 | When in eternal lines to time thou growest: 13 | So long as men can breathe or eyes can see, 14 | So long lives this and this gives life to thee. 15 | Then let not winter's ragged hand deface 16 | In thee thy summer, ere thou be distill'd: 17 | Make sweet some vial; treasure thou some place 18 | With beauty's treasure, ere it be self-kill'd. 19 | That use is not forbidden usury, 20 | Which happies those that pay the willing loan; 21 | That's for thyself to breed another thee, 22 | Or ten times happier, be it ten for one; 23 | Ten times thyself were happier than thou art, 24 | If ten of thine ten times refigured thee: 25 | Then what could death do, if thou shouldst depart, 26 | Leaving thee living in posterity? 27 | Be not self-will'd, for thou art much too fair 28 | To be death's conquest and make worms thine heir. 29 | Where art thou, Muse, that thou forget'st so long 30 | To speak of that which gives thee all thy might? 31 | Spend'st thou thy fury on some worthless song, 32 | Darkening thy power to lend base subjects light? 33 | Return, forgetful Muse, and straight redeem 34 | In gentle numbers time so idly spent; 35 | Sing to the ear that doth thy lays esteem 36 | And gives thy pen both skill and argument. 37 | Rise, resty Muse, my love's sweet face survey, 38 | If Time have any wrinkle graven there; 39 | If any, be a satire to decay, 40 | And make Time's spoils despised every where. 41 | Give my love fame faster than Time wastes life; 42 | So thou prevent'st his scythe and crooked knife. 43 | My glass shall not persuade me I am old, 44 | So long as youth and thou are of one date; 45 | But when in thee time's furrows I behold, 46 | Then look I death my days should expiate. 47 | For all that beauty that doth cover thee 48 | Is but the seemly raiment of my heart, 49 | Which in thy breast doth live, as thine in me: 50 | How can I then be elder than thou art? 51 | O, therefore, love, be of thyself so wary 52 | As I, not for myself, but for thee will; 53 | Bearing thy heart, which I will keep so chary 54 | As tender nurse her babe from faring ill. 55 | Presume not on thy heart when mine is slain; 56 | Thou gavest me thine, not to give back again. 57 | So am I as the rich, whose blessed key 58 | Can bring him to his sweet up-locked treasure, 59 | The which he will not every hour survey, 60 | For blunting the fine point of seldom pleasure. 61 | Therefore are feasts so solemn and so rare, 62 | Since, seldom coming, in the long year set, 63 | Like stones of worth they thinly placed are, 64 | Or captain jewels in the carcanet. 65 | So is the time that keeps you as my chest, 66 | Or as the wardrobe which the robe doth hide, 67 | To make some special instant special blest, 68 | By new unfolding his imprison'd pride. 69 | Blessed are you, whose worthiness gives scope, 70 | Being had, to triumph, being lack'd, to hope. 71 | If there be nothing new, but that which is 72 | Hath been before, how are our brains beguiled, 73 | Which, labouring for invention, bear amiss 74 | The second burden of a former child! 75 | O, that record could with a backward look, 76 | Even of five hundred courses of the sun, 77 | Show me your image in some antique book, 78 | Since mind at first in character was done! 79 | That I might see what the old world could say 80 | To this composed wonder of your frame; 81 | Whether we are mended, or whether better they, 82 | Or whether revolution be the same. 83 | O, sure I am, the wits of former days 84 | To subjects worse have given admiring praise. -------------------------------------------------------------------------------- /src/llmperf/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import math 3 | import pathlib 4 | import random 5 | import subprocess 6 | import time 7 | from typing import Any, Dict, Tuple 8 | 9 | from transformers import LlamaTokenizerFast 10 | 11 | 12 | RESULTS_VERSION = "2023-08-31" 13 | 14 | 15 | class LLMPerfResults: 16 | def __init__( 17 | self, 18 | name: str, 19 | metadata: Dict[str, Any] = None, 20 | ): 21 | self.name = name 22 | self.metadata = metadata or {} 23 | self.timestamp = int(time.time()) 24 | self.metadata["timestamp"] = self.timestamp 25 | self.version = RESULTS_VERSION 26 | 27 | def to_dict(self): 28 | data = { 29 | "version": self.version, 30 | "name": self.name, 31 | } 32 | data.update(self.metadata) 33 | data = flatten_dict(data) 34 | return data 35 | 36 | def json(self): 37 | data = self.to_dict() 38 | return json.dumps(data) 39 | 40 | 41 | def upload_to_s3(results_path: str, s3_path: str) -> None: 42 | """Upload the results to s3. 43 | 44 | Args: 45 | results_path: The path to the results file. 46 | s3_path: The s3 path to upload the results to. 47 | 48 | """ 49 | 50 | command = ["aws", "s3", "sync", results_path, f"{s3_path}/"] 51 | result = subprocess.run(command) 52 | if result.returncode == 0: 53 | print("Files uploaded successfully!") 54 | else: 55 | print("An error occurred:") 56 | print(result.stderr) 57 | 58 | 59 | def randomly_sample_sonnet_lines_prompt( 60 | prompt_tokens_mean: int = 550, 61 | prompt_tokens_stddev: int = 250, 62 | expect_output_tokens: int = 150, 63 | tokenizer = LlamaTokenizerFast.from_pretrained( 64 | "hf-internal-testing/llama-tokenizer") 65 | ) -> Tuple[str, int]: 66 | """Generate a prompt that randomly samples lines from a the shakespeare sonnet at sonnet.txt. 67 | 68 | Args: 69 | prompt_length_mean: The mean length of the prompt to generate. 70 | prompt_len_stddev: The standard deviation of the length of the prompt to generate. 71 | expect_output_tokens: The number of tokens to expect in the output. This is used to 72 | determine the length of the prompt. The prompt will be generated such that the output 73 | will be approximately this many tokens. 74 | 75 | Note: 76 | tokens will be counted from the sonnet using the Llama tokenizer. Using one tokenizer 77 | ensures a fairer comparison across different LLMs. For example, if gpt 3.5 tokenizes 78 | a prompt in less tokens than Llama2, then this will be reflected in the results since 79 | they will be fed identical prompts. 80 | 81 | Returns: 82 | A tuple of the prompt and the length of the prompt. 83 | """ 84 | 85 | get_token_length = lambda text: len(tokenizer.encode(text)) 86 | 87 | prompt = ( 88 | "Randomly stream lines from the following text " 89 | f"with {expect_output_tokens} output tokens. " 90 | "Don't generate eos tokens:\n\n" 91 | ) 92 | # get a prompt length that is at least as long as the base 93 | num_prompt_tokens = sample_random_positive_int( 94 | prompt_tokens_mean, prompt_tokens_stddev 95 | ) 96 | while num_prompt_tokens < get_token_length(prompt): 97 | num_prompt_tokens = sample_random_positive_int( 98 | prompt_tokens_mean, prompt_tokens_stddev 99 | ) 100 | remaining_prompt_tokens = num_prompt_tokens - get_token_length(prompt) 101 | sonnet_path = pathlib.Path(__file__).parent.resolve() / "sonnet.txt" 102 | with open(sonnet_path, "r") as f: 103 | sonnet_lines = f.readlines() 104 | random.shuffle(sonnet_lines) 105 | sampling_lines = True 106 | while sampling_lines: 107 | for line in sonnet_lines: 108 | line_to_add = line 109 | if remaining_prompt_tokens - get_token_length(line_to_add) < 0: 110 | # This will cut off a line in the middle of a word, but that's ok since an 111 | # llm should be able to handle that. 112 | line_to_add = line_to_add[: int(math.ceil(remaining_prompt_tokens))] 113 | sampling_lines = False 114 | prompt += line_to_add 115 | break 116 | prompt += line_to_add 117 | remaining_prompt_tokens -= get_token_length(line_to_add) 118 | return (prompt, num_prompt_tokens) 119 | 120 | 121 | def sample_random_positive_int(mean: int, stddev: int) -> int: 122 | """Sample random numbers from a gaussian distribution until a positive number is sampled. 123 | 124 | Args: 125 | mean: The mean of the gaussian distribution to sample from. 126 | stddev: The standard deviation of the gaussian distribution to sample from. 127 | 128 | Returns: 129 | A random positive integer sampled from the gaussian distribution. 130 | """ 131 | ret = -1 132 | while ret <= 0: 133 | ret = int(random.gauss(mean, stddev)) 134 | return ret 135 | 136 | 137 | def flatten_dict(d, parent_key="", sep="_"): 138 | items = [] 139 | for k, v in d.items(): 140 | new_key = f"{parent_key}{sep}{k}" if parent_key else k 141 | if isinstance(v, dict): 142 | items.extend(flatten_dict(v, new_key, sep=sep).items()) 143 | else: 144 | items.append((new_key, v)) 145 | return dict(items) 146 | -------------------------------------------------------------------------------- /token_benchmark_ray.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import argparse 3 | from collections.abc import Iterable 4 | import json 5 | import os 6 | from pathlib import Path 7 | import re 8 | import time 9 | import random 10 | from typing import Any, Dict, List, Optional, Tuple 11 | 12 | import pandas as pd 13 | import ray 14 | 15 | from llmperf import common_metrics 16 | from llmperf.common import SUPPORTED_APIS, construct_clients 17 | 18 | from llmperf.models import RequestConfig 19 | from llmperf.requests_launcher import RequestsLauncher 20 | from llmperf.utils import ( 21 | randomly_sample_sonnet_lines_prompt, 22 | LLMPerfResults, 23 | sample_random_positive_int, 24 | ) 25 | from tqdm import tqdm 26 | 27 | from transformers import LlamaTokenizerFast 28 | 29 | def get_token_throughput_latencies( 30 | model: str, 31 | mean_input_tokens: int, 32 | stddev_input_tokens: int, 33 | mean_output_tokens: int, 34 | stddev_output_tokens: int, 35 | additional_sampling_params: Optional[Dict[str, Any]] = None, 36 | num_concurrent_requests: int = 1, 37 | max_num_completed_requests: int = 500, 38 | test_timeout_s=90, 39 | llm_api="openai", 40 | ) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]: 41 | """Get the token throughput and latencies for the given model. 42 | 43 | Args: 44 | model: The name of the model to query. 45 | mean_input_tokens: The mean number of tokens to send in the prompt for the request. 46 | stddev_input_tokens: The standard deviation of the number of tokens to send in the prompt for the request. 47 | mean_output_tokens: The mean number of tokens to generate per request. 48 | stddev_output_tokens: The standard deviation of the number of tokens to generate per request. 49 | additional_sampling_params: Additional sampling parameters to send with the request. 50 | For more information see the LLM APIs documentation for the completions 51 | num_concurrent_requests: The number of concurrent requests to make. Increase 52 | this to increase the amount of load and vice versa. 53 | test_timeout_s: The amount of time to run the test for before reporting results. 54 | llm_api: The name of the llm api to use. Either "openai" or "litellm". 55 | 56 | Returns: 57 | A summary of the performance metrics collected across all completed requests 58 | (e.g. throughput, latencies, etc.) 59 | The individual metrics for each request. 60 | """ 61 | random.seed(11111) 62 | 63 | tokenizer = LlamaTokenizerFast.from_pretrained( 64 | "hf-internal-testing/llama-tokenizer" 65 | ) 66 | get_token_length = lambda text: len(tokenizer.encode(text)) 67 | 68 | if not additional_sampling_params: 69 | additional_sampling_params = {} 70 | 71 | completed_requests_lock = threading.Lock() 72 | completed_requests = [] 73 | num_completed_requests = 0 74 | # make up prompts outside of send loop for faster benchmarking loop 75 | num_output_tokens_list = [] 76 | prompts = [] 77 | for i in range(max_num_completed_requests): 78 | num_output_tokens = (sample_random_positive_int( 79 | mean_output_tokens, stddev_output_tokens 80 | )) 81 | num_output_tokens_list.append(num_output_tokens) 82 | 83 | prompts.append(randomly_sample_sonnet_lines_prompt( 84 | prompt_tokens_mean=mean_input_tokens, 85 | prompt_tokens_stddev=stddev_input_tokens, 86 | expect_output_tokens=num_output_tokens, 87 | tokenizer=tokenizer 88 | )) 89 | start_time = time.monotonic() 90 | pbar = tqdm(total=max_num_completed_requests) 91 | 92 | def launch_request(thread_index): 93 | nonlocal num_completed_requests 94 | clients = construct_clients(llm_api=llm_api, num_clients=1) 95 | req_launcher = RequestsLauncher(clients) 96 | request_index = thread_index % max_num_completed_requests 97 | 98 | while ( 99 | time.monotonic() - start_time < test_timeout_s 100 | and num_completed_requests < max_num_completed_requests 101 | ): 102 | 103 | default_sampling_params = {"max_tokens": num_output_tokens_list[request_index] } 104 | default_sampling_params.update(additional_sampling_params) 105 | request_config = RequestConfig( 106 | model=model, 107 | prompt=prompts[request_index], 108 | sampling_params=default_sampling_params, 109 | llm_api=llm_api, 110 | ) 111 | req_launcher.launch_requests(request_config) 112 | 113 | outs = req_launcher.get_next_ready() 114 | all_metrics = [] 115 | for out in outs: 116 | request_metrics, gen_text, _ = out 117 | num_output_tokens = get_token_length(gen_text) 118 | with completed_requests_lock: 119 | if num_completed_requests < max_num_completed_requests: 120 | if num_output_tokens: 121 | request_metrics[common_metrics.INTER_TOKEN_LAT] /= request_metrics[common_metrics.NUM_OUTPUT_TOKENS] 122 | else: 123 | request_metrics[common_metrics.INTER_TOKEN_LAT] = 0 124 | request_metrics[common_metrics.NUM_OUTPUT_TOKENS] = num_output_tokens 125 | request_metrics[common_metrics.NUM_TOTAL_TOKENS] = request_metrics[common_metrics.NUM_INPUT_TOKENS] + num_output_tokens 126 | request_metrics[common_metrics.REQ_OUTPUT_THROUGHPUT] = num_output_tokens / request_metrics[common_metrics.E2E_LAT] 127 | all_metrics.append(request_metrics) 128 | completed_requests.extend(all_metrics) 129 | pbar.update(len(all_metrics)) 130 | num_completed_requests += len(all_metrics) 131 | request_index = (request_index + num_concurrent_requests) % max_num_completed_requests 132 | 133 | threads = [] 134 | for i in range(num_concurrent_requests): 135 | thread = threading.Thread(target=launch_request, args=(i,)) 136 | threads.append(thread) 137 | thread.start() 138 | 139 | for thread in threads: 140 | thread.join() 141 | 142 | pbar.close() 143 | end_time = time.monotonic() 144 | if end_time - start_time >= test_timeout_s: 145 | print("Test timed out before all requests could be completed.") 146 | 147 | # check one last time that there are no remaining results to collect. 148 | clients = construct_clients(llm_api=llm_api, num_clients=1) 149 | req_launcher = RequestsLauncher(clients) 150 | outs = req_launcher.get_next_ready() 151 | all_metrics = [] 152 | for out in outs: 153 | request_metrics, gen_text, _ = out 154 | num_output_tokens = get_token_length(gen_text) 155 | with completed_requests_lock: 156 | if num_completed_requests < max_num_completed_requests: 157 | if num_output_tokens: 158 | request_metrics[common_metrics.INTER_TOKEN_LAT] /= num_output_tokens 159 | else: 160 | request_metrics[common_metrics.INTER_TOKEN_LAT] = 0 161 | request_metrics[common_metrics.NUM_OUTPUT_TOKENS] = num_output_tokens 162 | request_metrics[common_metrics.NUM_TOTAL_TOKENS] = request_metrics[common_metrics.NUM_INPUT_TOKENS] + num_output_tokens 163 | request_metrics[common_metrics.REQ_OUTPUT_THROUGHPUT] = num_output_tokens / request_metrics[common_metrics.E2E_LAT] 164 | completed_requests.extend(request_metrics) 165 | 166 | print(f"\Results for token benchmark for {model} queried with the {llm_api} api.\n") 167 | ret = metrics_summary(completed_requests, start_time, end_time) 168 | 169 | metadata = { 170 | "model": model, 171 | "mean_input_tokens": mean_input_tokens, 172 | "stddev_input_tokens": stddev_input_tokens, 173 | "mean_output_tokens": mean_output_tokens, 174 | "stddev_output_tokens": stddev_output_tokens, 175 | "num_concurrent_requests": num_concurrent_requests, 176 | "additional_sampling_params": additional_sampling_params, 177 | } 178 | 179 | metadata["results"] = ret 180 | 181 | return metadata, completed_requests 182 | 183 | 184 | def metrics_summary( 185 | metrics: List[Dict[str, Any]], start_time: int, end_time: int 186 | ) -> Dict[str, Any]: 187 | """Generate a summary over metrics generated from potentially multiple instances of this client. 188 | 189 | Args: 190 | metrics: The metrics to summarize. 191 | start_time: The time the test started. 192 | end_time: The time the test ended. 193 | 194 | Returns: 195 | A summary with the following information: 196 | - Overall throughput (generated tokens / total test time) 197 | - Number of completed requests 198 | - Error rate 199 | - Error code frequency 200 | - Quantiles (p25-p99) for the following metrics: 201 | - Inter token latency 202 | - Time to first token 203 | - User total request time 204 | - Number of tokens processed per request 205 | - Number of tokens generated per request 206 | - User throughput (tokens / s) 207 | """ 208 | ret = {} 209 | 210 | def flatten(item): 211 | for sub_item in item: 212 | if isinstance(sub_item, Iterable) and not isinstance(sub_item, str): 213 | yield from flatten(sub_item) 214 | else: 215 | yield sub_item 216 | 217 | df = pd.DataFrame(metrics) 218 | df_without_errored_req = df[df[common_metrics.ERROR_CODE].isna()] 219 | 220 | for key in [ 221 | common_metrics.INTER_TOKEN_LAT, 222 | common_metrics.TTFT, 223 | common_metrics.E2E_LAT, 224 | common_metrics.REQ_OUTPUT_THROUGHPUT, 225 | common_metrics.NUM_INPUT_TOKENS, 226 | common_metrics.NUM_OUTPUT_TOKENS 227 | ]: 228 | print(key) 229 | ret[key] = {} 230 | series = pd.Series(list(flatten(df_without_errored_req[key]))).dropna() 231 | quantiles = series.quantile([0.25, 0.5, 0.75, 0.9, 0.95, 0.99]).to_dict() 232 | quantiles_reformatted_keys = {} 233 | for quantile, value in quantiles.items(): 234 | reformatted_key = f"p{int(quantile * 100)}" 235 | print(f" {reformatted_key} = {value}") 236 | quantiles_reformatted_keys[reformatted_key] = value 237 | ret[key]["quantiles"] = quantiles_reformatted_keys 238 | mean = series.mean() 239 | print(f" mean = {mean}") 240 | ret[key]["mean"] = mean 241 | print(f" min = {series.min()}") 242 | ret[key]["min"] = series.min() 243 | print(f" max = {series.max()}") 244 | ret[key]["max"] = series.max() 245 | print(f" stddev = {series.std()}") 246 | ret[key]["stddev"] = series.std() 247 | 248 | ret[common_metrics.NUM_REQ_STARTED] = len(metrics) 249 | 250 | error_codes = df[common_metrics.ERROR_CODE].dropna() 251 | num_errors = len(error_codes) 252 | ret[common_metrics.ERROR_RATE] = num_errors / len(metrics) if len(metrics) else 0 253 | ret[common_metrics.NUM_ERRORS] = num_errors 254 | print(f"Number Of Errored Requests: {num_errors}") 255 | error_code_frequency = dict(error_codes.value_counts()) 256 | if num_errors: 257 | error_code_frequency = dict(error_codes.value_counts()) 258 | print("Error Code Frequency") 259 | print(error_code_frequency) 260 | ret[common_metrics.ERROR_CODE_FREQ] = str(error_code_frequency) 261 | 262 | overall_output_throughput = df_without_errored_req[ 263 | common_metrics.NUM_OUTPUT_TOKENS 264 | ].sum() / (end_time - start_time) 265 | 266 | print(f"Overall Output Throughput: {overall_output_throughput}") 267 | ret[common_metrics.OUTPUT_THROUGHPUT] = overall_output_throughput 268 | 269 | num_completed_requests = len(df_without_errored_req) 270 | num_completed_requests_per_min = ( 271 | num_completed_requests / (end_time - start_time) * 60 272 | ) 273 | print(f"Number Of Completed Requests: {num_completed_requests}") 274 | print(f"Completed Requests Per Minute: {num_completed_requests_per_min}") 275 | 276 | ret[common_metrics.NUM_COMPLETED_REQUESTS] = num_completed_requests 277 | ret[common_metrics.COMPLETED_REQUESTS_PER_MIN] = num_completed_requests_per_min 278 | 279 | return ret 280 | 281 | 282 | def run_token_benchmark( 283 | llm_api: str, 284 | model: str, 285 | test_timeout_s: int, 286 | max_num_completed_requests: int, 287 | num_concurrent_requests: int, 288 | mean_input_tokens: int, 289 | stddev_input_tokens: int, 290 | mean_output_tokens: int, 291 | stddev_output_tokens: int, 292 | additional_sampling_params: str, 293 | results_dir: str, 294 | user_metadata: Dict[str, Any], 295 | ): 296 | """ 297 | Args: 298 | llm_api: The name of the llm api to use. 299 | model: The name of the model to query. 300 | max_num_completed_requests: The number of requests to complete before finishing the test. 301 | test_timeout_s: The amount of time to run the test for before reporting results. 302 | num_concurrent_requests: The number of concurrent requests to make. Increase 303 | this to increase the amount of load and vice versa. 304 | mean_input_tokens: The mean number of tokens to send in the prompt for the request. 305 | stddev_input_tokens: The standard deviation of the number of tokens to send in the prompt for the request. 306 | mean_output_tokens: The mean number of tokens to generate per request. 307 | stddev_output_tokens: The standard deviation of the number of tokens to generate per request. 308 | additional_sampling_params: Additional sampling parameters to send with the request. 309 | For more information see the LLM APIs documentation for the completions. 310 | results_dir: The directory to save the results to. 311 | user_metadata: Additional metadata to include in the results. 312 | """ 313 | if mean_input_tokens < 40: 314 | print( 315 | "the minimum number of input tokens that will be sent is 41" 316 | " because of the prompting logic right now" 317 | ) 318 | 319 | summary, individual_responses = get_token_throughput_latencies( 320 | model=model, 321 | llm_api=llm_api, 322 | test_timeout_s=test_timeout_s, 323 | max_num_completed_requests=max_num_completed_requests, 324 | mean_input_tokens=mean_input_tokens, 325 | stddev_input_tokens=stddev_input_tokens, 326 | mean_output_tokens=mean_output_tokens, 327 | stddev_output_tokens=stddev_output_tokens, 328 | num_concurrent_requests=num_concurrent_requests, 329 | additional_sampling_params=json.loads(additional_sampling_params), 330 | ) 331 | 332 | if results_dir: 333 | filename = f"{model}_{mean_input_tokens}_{mean_output_tokens}" 334 | filename = re.sub(r"[^\w\d-]+", "-", filename) 335 | filename = re.sub(r"-{2,}", "-", filename) 336 | summary_filename = f"{filename}_summary" 337 | individual_responses_filename = f"{filename}_individual_responses" 338 | 339 | # Update to metadata. 340 | summary.update(user_metadata) 341 | 342 | results = LLMPerfResults(name=summary_filename, metadata=summary) 343 | results_dir = Path(results_dir) 344 | if not results_dir.exists(): 345 | results_dir.mkdir(parents=True) 346 | elif not results_dir.is_dir(): 347 | raise ValueError(f"{results_dir} is not a directory") 348 | 349 | try: 350 | with open(results_dir / f"{summary_filename}.json", "w") as f: 351 | json.dump(results.to_dict(), f, indent=4, default=str) 352 | except Exception as e: 353 | print(results.to_dict()) 354 | raise e 355 | 356 | try: 357 | with open(results_dir / f"{individual_responses_filename}.json", "w") as f: 358 | json.dump(individual_responses, f, indent=4) 359 | except Exception as e: 360 | print(individual_responses) 361 | raise e 362 | 363 | 364 | args = argparse.ArgumentParser( 365 | description="Run a token throughput and latency benchmark." 366 | ) 367 | 368 | args.add_argument( 369 | "--model", type=str, required=True, help="The model to use for this load test." 370 | ) 371 | args.add_argument( 372 | "--mean-input-tokens", 373 | type=int, 374 | default=550, 375 | help=( 376 | "The mean number of tokens to send in the prompt for the request. " 377 | " (default: %(default)s)" 378 | ), 379 | ) 380 | args.add_argument( 381 | "--stddev-input-tokens", 382 | type=int, 383 | default=150, 384 | help=( 385 | "The standard deviation of number of tokens to send in the prompt for the request. " 386 | "(default: %(default)s)" 387 | ), 388 | ) 389 | args.add_argument( 390 | "--mean-output-tokens", 391 | type=int, 392 | default=150, 393 | help=( 394 | "The mean number of tokens to generate from each llm request. This is the max_tokens param " 395 | "for the completions API. Note that this is not always the number of tokens returned. " 396 | "(default: %(default)s)" 397 | ), 398 | ) 399 | args.add_argument( 400 | "--stddev-output-tokens", 401 | type=int, 402 | default=80, 403 | help=( 404 | "The stdandard deviation on the number of tokens to generate per llm request. " 405 | "(default: %(default)s)" 406 | ), 407 | ) 408 | args.add_argument( 409 | "--num-concurrent-requests", 410 | type=int, 411 | default=10, 412 | help=("The number of concurrent requests to send (default: %(default)s)"), 413 | ) 414 | args.add_argument( 415 | "--timeout", 416 | type=int, 417 | default=90, 418 | help="The amount of time to run the load test for. (default: %(default)s)", 419 | ) 420 | args.add_argument( 421 | "--max-num-completed-requests", 422 | type=int, 423 | default=10, 424 | help=( 425 | "The number of requests to complete before finishing the test. Note " 426 | "that its possible for the test to timeout first. (default: %(default)s)" 427 | ), 428 | ) 429 | args.add_argument( 430 | "--additional-sampling-params", 431 | type=str, 432 | default="{}", 433 | help=( 434 | "Additional sampling params to send with the each request to the LLM API. " 435 | "(default: %(default)s) No additional sampling params are sent." 436 | ), 437 | ) 438 | args.add_argument( 439 | "--results-dir", 440 | type=str, 441 | default="", 442 | help=( 443 | "The directory to save the results to. " 444 | "(`default: %(default)s`) No results are saved)" 445 | ), 446 | ) 447 | args.add_argument( 448 | "--llm-api", 449 | type=str, 450 | default="openai", 451 | help=( 452 | f"The name of the llm api to use. Can select from {SUPPORTED_APIS}" 453 | " (default: %(default)s)" 454 | ), 455 | ) 456 | args.add_argument( 457 | "--metadata", 458 | type=str, 459 | default="", 460 | help=( 461 | "A comma separated list of metadata to include in the results, e.g. " 462 | "name=foo,bar=1. These will be added to the metadata field of the results. " 463 | ), 464 | ) 465 | 466 | if __name__ == "__main__": 467 | env_vars = dict(os.environ) 468 | ray.init(runtime_env={"env_vars": env_vars}) 469 | args = args.parse_args() 470 | 471 | # Parse user metadata. 472 | user_metadata = {} 473 | if args.metadata: 474 | for item in args.metadata.split(","): 475 | key, value = item.split("=") 476 | user_metadata[key] = value 477 | 478 | run_token_benchmark( 479 | llm_api=args.llm_api, 480 | model=args.model, 481 | test_timeout_s=args.timeout, 482 | max_num_completed_requests=args.max_num_completed_requests, 483 | mean_input_tokens=args.mean_input_tokens, 484 | stddev_input_tokens=args.stddev_input_tokens, 485 | mean_output_tokens=args.mean_output_tokens, 486 | stddev_output_tokens=args.stddev_output_tokens, 487 | num_concurrent_requests=args.num_concurrent_requests, 488 | additional_sampling_params=args.additional_sampling_params, 489 | results_dir=args.results_dir, 490 | user_metadata=user_metadata, 491 | ) 492 | --------------------------------------------------------------------------------