├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── Makefile ├── README.md ├── docs ├── 01.create-benchmark.md ├── 01.overview.md ├── 02.getting-started.md ├── 03.benchmark-model.md ├── Makefile ├── api │ ├── 01.api.md │ └── index.md ├── conf.py ├── images │ ├── codeformodel.png │ ├── image_classification.png │ ├── imagenet.png │ └── pwcsearch.png └── index.md ├── requirements-dev.txt ├── requirements.txt ├── setup.cfg ├── setup.py └── sotabenchapi ├── __init__.py ├── __main__.py ├── check.py ├── client.py ├── commands ├── __init__.py ├── benchmark.py ├── build.py ├── check.py ├── cli.py ├── dataset.py ├── repo.py └── utils.py ├── config.py ├── consts.py ├── core ├── __init__.py ├── inputs.py └── results.py ├── errors.py ├── http.py ├── tests ├── __init__.py └── test_client.py ├── uploader ├── __init__.py ├── consts.py ├── models.py ├── upload.py └── utils.py └── version.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Run artifacts 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # Build artifacts 6 | sotabenchapi.egg-info 7 | build/ 8 | dist/ 9 | 10 | # Test artifacts 11 | .coverage 12 | 13 | # IDE Configuration 14 | .idea 15 | .vscode 16 | 17 | # Virtualenv 18 | .venv 19 | venv 20 | .envrc 21 | 22 | # Documentation artefacts 23 | docs/_build/ -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | - repo: https://github.com/ambv/black 2 | rev: stable 3 | hooks: 4 | - id: black 5 | name: Format Python Code 6 | language: python 7 | entry: black 8 | args: 9 | - --safe 10 | - --line-length=79 11 | - --target-version=py37 12 | - . 13 | 14 | - repo: https://github.com/pre-commit/pre-commit-hooks 15 | rev: v2.2.3 16 | hooks: 17 | - id: requirements-txt-fixer 18 | name: Requirements file fixer 19 | language: python 20 | args: 21 | - requirements.txt 22 | - requirements-dev.txt 23 | 24 | - repo: https://gitlab.com/pycqa/flake8 25 | rev: 3.7.8 26 | hooks: 27 | - id: flake8 28 | name: Flake8 Check 29 | language: python 30 | entry: flake8 31 | args: 32 | - sotabenchapi 33 | 34 | - repo: https://github.com/pycqa/pydocstyle 35 | rev: master 36 | hooks: 37 | - id: pydocstyle 38 | name: Python Documentation Style Check 39 | language: python 40 | entry: pydocstyle 41 | args: 42 | - sotabenchapi 43 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: help default docs build release clean test check fmt 2 | .DEFAULT_GOAL := help 3 | PROJECT := sotabenchapi 4 | 5 | help: ## Show help. 6 | @grep -E '^[a-zA-Z2_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' 7 | 8 | 9 | docs: ## Build documentation. 10 | @cd docs && make html && open _build/html/index.html 11 | 12 | 13 | build: ## Build the source and wheel distribution packages. 14 | @python3 setup.py sdist bdist_wheel 15 | 16 | 17 | release: build ## Build and upload the package to PyPI. 18 | @twine upload --repository-url https://upload.pypi.org/legacy/ dist/* 19 | @rm -fr build dist sotabenchapi.egg-info 20 | 21 | 22 | clean: ## Cleanup the project 23 | @find . -type d -name __pycache__ -delete 24 | @find . -type f -name "*.py[cod]" -delete 25 | @rm -fr build dist sotabenchapi.egg-info 26 | @rm -fr docs/_build/* 27 | 28 | 29 | test: ## Run tests and code checks. 30 | @py.test -v --cov "$(PROJECT)" "$(PROJECT)" 31 | 32 | 33 | check: ## Run code checks. 34 | @flake8 "$(PROJECT)" 35 | @pydocstyle "$(PROJECT)" 36 | 37 | 38 | fmt: ## Format the code. 39 | @black --target-version=py37 --safe --line-length=79 "$(PROJECT)" 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sotabench-api 2 | 3 | Easily benchmark machine learning models on selected tasks and datasets. 4 | 5 | This is a core package with common functions and APIs to interact with the 6 | [sotabench](https://sotabench.com) website. 7 | 8 | For benchmarks, see the 9 | [torchbench](https://github.com/paperswithcode/torchbench) repository with 10 | PyTorch benchmarks (more frameworks to come soon). 11 | -------------------------------------------------------------------------------- /docs/01.create-benchmark.md: -------------------------------------------------------------------------------- 1 | # How to Create a Benchmark 2 | 3 | To create a benchmark, you need to write a function that returns a `BenchmarkResult` instance. 4 | This object is dictionary-like and holds information about the model results on the benchmark. 5 | 6 | For example if you submitted an 7 | EfficientNet model to an ImageNet benchmark, the instance would contain information on its performance (Top 1/5 Accuracy), the model name, the name 8 | of the dataset and task, and so on. The object also contains methods for serialising the results to JSON, and some server checking methods that call the sotabench.com API to check if the results can be accepted. 9 | 10 | If you want to see the full API for `BenchmarkResult`, then skip to the end of this section. 11 | Otherwise we will go through a step-by-step example in PyTorch for creating a benchmarl. 12 | 13 | ## The Bare Necessities 14 | 15 | Start a new project and make a benchmark file, e.g. `mnist.py`. Begin by writing a skeleton function 16 | as follows: 17 | 18 | ```python 19 | from sotabenchapi.core import BenchmarkResult 20 | 21 | def evaluate_mnist(...) -> BenchmarkResult: 22 | 23 | # your evaluation logic here 24 | results = {...} # dict with keys as metric names, values as metric results 25 | 26 | return BenchmarkResult(results=results) 27 | ``` 28 | 29 | This is the core structure of an evaluation method for sotabench: we have a function that takes in user inputs, 30 | we do some evaluation, and we pass in some results and other outputs to a `BenchmarkResult` instance. Essentially you can write any benchmark around this format, 31 | and take in any input that you want for your evaluation. It is designed to be flexible. 32 | 33 | For example, it could be as simple as taking a json of predictions as an input if that's all you need. Or if you 34 | want more information about the model, you could request a model function or class as an input and pass the data to the 35 | model yourself. It is up to you how you want to design your benchmark. 36 | 37 | ## Sotabench Metadata 38 | 39 | So that benchmark results can be displayed on sotabench.com, you will need your submissions to have metadata about the model name, 40 | the dataset name and the task. For example, "EfficientNet", "Imagenet", "Image Classification". 41 | 42 | In the context of your benchmark function: 43 | 44 | ```python 45 | from sotabenchapi.core import BenchmarkResult 46 | 47 | DATASET_NAME = 'ImageNet' 48 | TASK = 'Image Classification' 49 | 50 | def evaluate_mnist(model_name, ...) -> BenchmarkResult: 51 | 52 | # your evaluation logic here 53 | results = {...} # dict with keys as metric names, values as metric results 54 | 55 | return BenchmarkResult(results=results, model=model_name, dataset=DATASET_NAME, task=TASK) 56 | ``` 57 | 58 | Here the dataset name and task name will be fixed for the benchmark, but the model name 59 | can be specified as an input. You can add additional metadata to connect things like the 60 | ArXiv paper id - see the API documentation at the end of this section for more information. 61 | 62 | ## Example: An MNIST benchmark in PyTorch 63 | 64 | Let's see how we might make a PyTorch friendly benchmark which adheres to the framework's abstractions. 65 | 66 | The first thing we need for evaluation is a dataset! Let's use the MNIST dataset from the `torchvision` library, 67 | along with a `DataLoader`: 68 | 69 | ```python 70 | from sotabenchapi.core import BenchmarkResult 71 | from torch.utils.data import DataLoader 72 | import torchvision.datasets as datasets 73 | 74 | def evaluate_mnist(data_root: str, batch_size: int = 32, num_workers: int = 4) -> BenchmarkResult: 75 | 76 | dataset = datasets.MNIST(data_root, train=False, download=True) 77 | loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True) 78 | 79 | return BenchmarkResult(dataset=dataset.__name__) 80 | ``` 81 | 82 | We've set `train=false` since we want to use the testing split for evaluation. We've also added a `data_root` parameter 83 | just so the use can specify where they want the data downloaded. 84 | 85 | We should also probably allow for the user to put in their own transforms since this is a vision dataset, so 86 | let's modify further: 87 | 88 | ```python 89 | from sotabenchapi.core import BenchmarkResult 90 | from torch.utils.data import DataLoader 91 | import torchvision.datasets as datasets 92 | 93 | def evaluate_mnist(data_root: str, batch_size: int = 32, num_workers: int = 4, 94 | input_transform=None, target_transform=None) -> BenchmarkResult: 95 | 96 | dataset = datasets.MNIST(data_root, transform=input_transform, target_transform=target_transform, 97 | train=False, download=True) 98 | loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True) 99 | 100 | return BenchmarkResult(dataset=dataset.__name__) 101 | ``` 102 | 103 | Great, so we have a dataset set up. Let's now take in a model. We could do this in a number of ways, for example, 104 | we could accept a model function as an input (that takes in data and outputs predictions). Since we are using PyTorch, 105 | where most modules are subclasses of `nn.Module`, let's do it in an object-oriented way by accepting a model object input: 106 | 107 | ```python 108 | from sotabenchapi.core import BenchmarkResult 109 | import torchvision.datasets as datasets 110 | from torchbench.utils import send_model_to_device 111 | 112 | def evaluate_mnist(model, data_root: str, batch_size: int = 32, num_workers: int = 4, 113 | input_transform=None, target_transform=None) -> BenchmarkResult: 114 | 115 | model, device = send_model_to_device(model, device='cuda') 116 | model.eval() 117 | 118 | dataset = datasets.MNIST(data_root, transform=input_transform, target_transform=target_transform, 119 | train=False, download=True) 120 | loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True) 121 | 122 | return BenchmarkResult(dataset=dataset.__name__) 123 | ``` 124 | 125 | Here we have reused a function from `torchbench` for sending the model to a cuda device, but this is optional - you can 126 | decide how models are processed in your own benchmark however you see fit. 127 | 128 | Now that we have a model and a dataset, let's loop through and evaluate the model: 129 | 130 | ```python 131 | from sotabenchapi.core import BenchmarkResult 132 | from torch.utils.data import DataLoader 133 | import torchvision.datasets as datasets 134 | from torchbench.utils import send_model_to_device, default_data_to_device, AverageMeter, accuracy 135 | import tqdm 136 | import torch 137 | 138 | def evaluate_mnist(model, data_root: str, batch_size: int = 32, num_workers: int = 4, 139 | input_transform=None, target_transform=None) -> BenchmarkResult: 140 | 141 | model, device = send_model_to_device(model, device='cuda') 142 | model.eval() 143 | 144 | dataset = datasets.MNIST(data_root, transform=input_transform, target_transform=target_transform, 145 | train=False, download=True) 146 | loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True) 147 | 148 | top1 = AverageMeter() 149 | top5 = AverageMeter() 150 | 151 | with torch.no_grad(): 152 | for i, (input, target) in enumerate(tqdm.tqdm(loader)): 153 | 154 | input, target = default_data_to_device(input, target, device=device) 155 | output = model(input) 156 | prec1, prec5 = accuracy(output, target, topk=(1, 5)) 157 | top1.update(prec1.item(), input.size(0)) 158 | top5.update(prec5.item(), input.size(0)) 159 | 160 | results = {'Top 1 Accuracy': top1.avg, 'Top 5 Accuracy': top5.avg} 161 | 162 | return BenchmarkResult(dataset=dataset.__name__, results=results) 163 | ``` 164 | 165 | We've used some more utility functions from `torchbench`, but again, you can use whatever you want to do evaluation. 166 | You can see we've passed a results dictionary into the `BenchmarkResult` object. Great! So we have a function that 167 | takes in a model and evaluates on a dataset. But how do we connect to Sotabench? Well, we need to have the user pass 168 | in some metadata information about the model name and paper id. We also need to specify a bit more about our benchmark, 169 | e.g. the task in this case is "Image Classification": 170 | 171 | ```python 172 | from sotabenchapi.core import BenchmarkResult 173 | from torch.utils.data import DataLoader 174 | import torchvision.datasets as datasets 175 | from torchbench.utils import send_model_to_device, default_data_to_device, AverageMeter, accuracy 176 | import tqdm 177 | import torch 178 | 179 | def evaluate_mnist(model, data_root: str, batch_size: int = 32, num_workers: int = 4, 180 | input_transform=None, target_transform=None, model_name:str = None, 181 | arxiv_id:str = None) -> BenchmarkResult: 182 | 183 | model, device = send_model_to_device(model, device='cuda') 184 | model.eval() 185 | 186 | dataset = datasets.MNIST(data_root, transform=input_transform, target_transform=target_transform, 187 | train=False, download=True) 188 | loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True) 189 | 190 | top1 = AverageMeter() 191 | top5 = AverageMeter() 192 | 193 | with torch.no_grad(): 194 | for i, (input, target) in enumerate(tqdm.tqdm(loader)): 195 | 196 | input, target = default_data_to_device(input, target, device=device) 197 | output = model(input) 198 | prec1, prec5 = accuracy(output, target, topk=(1, 5)) 199 | top1.update(prec1.item(), input.size(0)) 200 | top5.update(prec5.item(), input.size(0)) 201 | 202 | results = {'Top 1 Accuracy': top1.avg, 'Top 5 Accuracy': top5.avg} 203 | 204 | return BenchmarkResult(task='Image Classification', dataset=dataset.__name__, results=results, 205 | model=model_name, arxiv_id=arxiv_id) 206 | ``` 207 | 208 | And you're set! The task string connects to the taxonomy on sotabench, the rest gives context to the 209 | result - for example the model's name and the paper it is from. 210 | 211 | The final step is to publish this as a PyPi library. This will enable your users to write a `sotabench.py` file 212 | that imports your benchmark and passes their model and other parameters into it. When they connect to sotabench.com, 213 | sotabench.com will download your library and evaluate their model with it, and then publish the results to your 214 | benchmark page. 215 | 216 | ## Other Examples 217 | 218 | The [torchbench](https://www.github.com/paperswithcode/torchbench) library is a good reference for benchmark implementations, 219 | which you can base your own benchmarks on. 220 | 221 | ## API for BenchmarkResult 222 | 223 | ```eval_rst 224 | 225 | .. automodule:: sotabenchapi.core.results 226 | :members: 227 | ``` 228 | -------------------------------------------------------------------------------- /docs/01.overview.md: -------------------------------------------------------------------------------- 1 | # Sotabench Documentation 2 | 3 | ## Overview 4 | 5 | You have reached the docs for [Sotabench](https://www.sotabench.com)! These 6 | docs will explain how the website and library work, how you can benchmark your 7 | own machine learning models, and how you can create your own benchmarks. 8 | 9 | ### What is Sotabench? 10 | 11 | Sotabench is a new resource for benchmarking machine learning models. 12 | Pre-trained models are a growing dependency for machine learning projects, but 13 | it is hard to verify their quality: 14 | 15 | - How do I know if a model reproduces the results of the original paper? 16 | - How does this model compare to other models on the same task? Which to 17 | choose? 18 | 19 | Sotabench solves this problem through a build system that benchmarks pretrained 20 | models on public benchmarks. With minimal setup, people can submit their models 21 | for evaluation on popular benchmarks like ImageNet. Benchmarks enable 22 | comparison: using Sotabench the community can easily compare models to see 23 | whether they reproduced the results of the original paper and whether they are 24 | state-of-the-art for a task. This helps the community decide which models to 25 | use as a starting point for a project. 26 | 27 | ### How Do I Benchmark My Model? 28 | 29 | The full documentation for this use case can be accessed here: 30 | [Benchmarking Your Model](03.benchmark-model.md). 31 | 32 | **TLDR Summary:** You add a `benchmark.py` file to your GitHub repo and connect 33 | your repo to Sotabench. Sotabench will automatically evaluate your models on 34 | benchmarks for free with GPUs, and feature your repository on the site's public 35 | benchmark pages. 36 | 37 | ### How Do I Create A Benchmark? 38 | 39 | The full documentation for this use case can be accessed here: 40 | [Create a Benchmark](04.create-benchmark.md). 41 | 42 | **TLDR Summary:** You write a function that takes in a model and writes results 43 | to an `evaluation.json`, and publish it as a Python library. When other users 44 | import your library for their model's `benchmark.py` file, their model results 45 | will be published to your benchmark page. 46 | 47 | ### How Do I Contribute to the Resource? 48 | 49 | The full documentation for this user case can be accessed here: 50 | [Getting Started](02.getting-started.md). 51 | 52 | **TLDR Summary:** Sotabench is a great place to publish paper implementations! 53 | You can use sotabench to discover what implementations the community is crying 54 | out for, publish them, and get awards and recognition for your work. 55 | 56 | ## Library Installation 57 | 58 | If you just want to submit models, you do not need the sotabench library, but 59 | will probably want to install the libraries that power the benchmarks you are 60 | interesting - for example [torchbench](https://pypi.org/project/torchbench/). 61 | 62 | If you want to create custom benchmarks, then you can use the sotabench library 63 | as a starting point: 64 | 65 | There are two ways to install sotabench: 66 | 67 | **Install Sotabench from PyPi** 68 | 69 | ```bash 70 | pip install sotabenchapi 71 | ``` 72 | 73 | 74 | **Install Sotabench from GitHub source** 75 | 76 | ```bash 77 | git clone https://github.com/sotabench/sotabenchapi.git 78 | cd sotabench 79 | python setup.py install 80 | ``` 81 | 82 | 83 | ## Support 84 | 85 | If you get stuck you can head to our [Discourse]() forum where you ask 86 | questions on how to use the project. You can also find ideas for contributions, 87 | and work with others on exciting projects. 88 | -------------------------------------------------------------------------------- /docs/02.getting-started.md: -------------------------------------------------------------------------------- 1 | # Getting in Started 2 | 3 | ## What You Will Learn 4 | 5 | In this tutorial, you will submit your first machine learning model 6 | implementation to the Sotabench resource. You will: 7 | 8 | - 🔎 **Find** an implementation of a modern image classification model 9 | - 📊 **Configure** a `benchmark.py` file to evaluate the model 10 | - 🎉 **Submit** the repository to sotabench and obtain *state-of-the-art* 11 | results! 12 | 13 | Sound fun? Let's get started! 14 | 15 | 16 | ## Find an Image Classification Model 17 | 18 | To find model implementations, we use [Papers With Code](), which has over 19 | 10,000 implementations and also ranks them according to their performance on 20 | popular research benchmarks. 21 | 22 | Search for **Image Classification**: 23 | 24 | ![Search PWC](images/pwcsearch.png) 25 | 26 | This will bring you to the **Task Page**, where you can see various different 27 | *benchmarks* for evaluating the performance of models on image classification: 28 | 29 | ![Image Classification](images/image_classification.png) 30 | 31 | Let's click on **ImageNet** since this is now the most famous image 32 | classification benchmark: 33 | 34 | ![ImageNet](images/imagenet.png) 35 | 36 | This is the **Evaluation Page**. From the graph, we can see the historical 37 | progress on the task, and how models have got better over time. Below we can 38 | see the leaderboard of the best methods, ranked by performance. 39 | 40 | Let's click on the top paper **Exploring the Limits of Weakly Supervised 41 | Pretraining**. This brings us to the paper page where we can see publicly 42 | available implementations: 43 | 44 | ![Model Code](images/codeformodel.png) 45 | 46 | There is an implementation in PyTorch, which we can evaluate using sotabench. 47 | Head to the repository: 48 | -------------------------------------------------------------------------------- /docs/03.benchmark-model.md: -------------------------------------------------------------------------------- 1 | # Benchmark your model 2 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/api/01.api.md: -------------------------------------------------------------------------------- 1 | # CLasses 2 | 3 | ## Config 4 | 5 | ```eval_rst 6 | 7 | .. automodule:: sotabenchapi.config 8 | :members: 9 | ``` 10 | 11 | 12 | 13 | 14 | ## Results 15 | 16 | ```eval_rst 17 | 18 | .. automodule:: sotabenchapi.core.results 19 | :members: 20 | ``` 21 | -------------------------------------------------------------------------------- /docs/api/index.md: -------------------------------------------------------------------------------- 1 | # API Documentation 2 | 3 | ```eval_rst 4 | .. toctree:: 5 | :maxdepth: 3 6 | 7 | 01.api.md 8 | ``` 9 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # http://www.sphinx-doc.org/en/master/config 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | import os 20 | import sys 21 | from datetime import datetime 22 | from recommonmark.transform import AutoStructify 23 | 24 | # Add project root to pythonpath 25 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 26 | 27 | from sotabenchapi.version import __version__ 28 | 29 | 30 | project = "Sotabench API" 31 | author = "Sotabench Team " 32 | description = "Easily benchmark deep learning models" 33 | copyright = f"{datetime.now():%Y}, {author}" 34 | 35 | 36 | # The full version, including alpha/beta/rc tags 37 | version = __version__ 38 | release = __version__ 39 | 40 | 41 | # -- General configuration --------------------------------------------------- 42 | 43 | # Add any Sphinx extension module names here, as strings. They can be 44 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 45 | # ones. 46 | extensions = [ 47 | "sphinx.ext.autodoc", 48 | "sphinx.ext.todo", 49 | "sphinx.ext.mathjax", 50 | "sphinx.ext.ifconfig", 51 | "sphinx.ext.viewcode", 52 | "sphinx.ext.napoleon", 53 | "sphinx.ext.githubpages", 54 | "recommonmark", 55 | ] 56 | 57 | # Add any paths that contain templates here, relative to this directory. 58 | templates_path = ["_templates"] 59 | 60 | # The suffix(es) of source filenames. 61 | # You can specify multiple suffix as a list of string: 62 | # 63 | # source_suffix = ['.rst', '.md'] 64 | source_suffix = ".md" 65 | 66 | 67 | # List of patterns, relative to source directory, that match files and 68 | # directories to ignore when looking for source files. 69 | # This pattern also affects html_static_path and html_extra_path. 70 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 71 | 72 | 73 | # -- Options for HTML output ------------------------------------------------- 74 | 75 | # The theme to use for HTML and HTML Help pages. See the documentation for 76 | # a list of builtin themes. 77 | # 78 | html_theme = "sphinx_rtd_theme" 79 | 80 | # Add any paths that contain custom static files (such as style sheets) here, 81 | # relative to this directory. They are copied after the builtin static files, 82 | # so a file named "default.css" will overwrite the builtin "default.css". 83 | html_static_path = ["_static"] 84 | 85 | # -- Extension configuration ------------------------------------------------- 86 | 87 | 88 | # autodoc 89 | 90 | autoclass_content = "both" 91 | autodoc_default_options = { 92 | "member-order": "bysource", 93 | "special-members": "__init__", 94 | "undoc-members": True, 95 | } 96 | 97 | 98 | # recommonmark 99 | 100 | 101 | def setup(app): 102 | app.add_config_value( 103 | "recommonmark_config", 104 | { 105 | "enable_auto_toc_tree": True, 106 | "auto_toc_tree_section": True, 107 | "enable_math": True, 108 | "enable_inline_math": True, 109 | "enable_eval_rst": True, 110 | }, 111 | True, 112 | ) 113 | app.add_transform(AutoStructify) 114 | -------------------------------------------------------------------------------- /docs/images/codeformodel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paperswithcode/sotabench-api/513c0095328a41485d4d13ec3d77da919d8de8b1/docs/images/codeformodel.png -------------------------------------------------------------------------------- /docs/images/image_classification.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paperswithcode/sotabench-api/513c0095328a41485d4d13ec3d77da919d8de8b1/docs/images/image_classification.png -------------------------------------------------------------------------------- /docs/images/imagenet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paperswithcode/sotabench-api/513c0095328a41485d4d13ec3d77da919d8de8b1/docs/images/imagenet.png -------------------------------------------------------------------------------- /docs/images/pwcsearch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paperswithcode/sotabench-api/513c0095328a41485d4d13ec3d77da919d8de8b1/docs/images/pwcsearch.png -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Welcome to the Sotabench API Documentation 2 | 3 | This documentation details how to use the `sotabenchapi` library to connect 4 | with [sotabench](http://www.sotabench.com). 5 | 6 | Using this library you will be able to create your own public research benchmarks, allowing the community to submit and 7 | evaluate models on them, and have the results submitted to the [sotabench](http://www.sotabench.com) resource. 8 | 9 | ## Installation 10 | 11 | The library requires Python 3.6+. You can install via pip: 12 | 13 | pip install sotabenchapi 14 | 15 | ## Contents 16 | 17 | ```eval_rst 18 | .. toctree:: 19 | :maxdepth: 2 20 | 21 | 01.create-benchmark.md 22 | ``` 23 | 24 | 25 | ## Indices and tables 26 | 27 | ```eval_rst 28 | * :ref:`genindex` 29 | * :ref:`modindex` 30 | ``` 31 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | black==19.10b0 3 | flake8==3.8.3 4 | pre-commit==2.6.0 5 | pydocstyle==5.0.2 6 | pytest==5.4.3 7 | pytest-cov==2.10.0 8 | recommonmark==0.6.0 9 | sphinx==3.1.2 10 | sphinx-rtd-theme==0.5.0 11 | twine==3.2.0 12 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | click==7.1.2 2 | requests==2.24.0 3 | tabulate==0.8.7 4 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E203,E402,W503,E701 3 | 4 | [pydocstyle] 5 | ignore = D10,D202,D203,D212,D213,D401,D403,D406,D407,D413 6 | 7 | [tool:pytest] 8 | testpaths = sotabenchapi/tests 9 | python_files = test_*.py 10 | norecursedirs = .git 11 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import io 2 | from setuptools import setup, find_packages 3 | from sotabenchapi.version import __version__ 4 | 5 | name = "sotabenchapi" 6 | author = "Robert Stojnic" 7 | author_email = "hello@sotabench.com" 8 | license = "Apache-2.0" 9 | url = "https://sotabench.com" 10 | description = ( 11 | "Easily benchmark Machine Learning models on selected tasks and datasets." 12 | ) 13 | 14 | 15 | setup( 16 | name=name, 17 | version=__version__, 18 | author=author, 19 | author_email=author_email, 20 | maintainer=author, 21 | maintainer_email=author_email, 22 | description=description, 23 | long_description=io.open("README.md", "r", encoding="utf-8").read(), 24 | long_description_content_type="text/markdown", 25 | url=url, 26 | platforms=["Windows", "POSIX", "MacOSX"], 27 | license=license, 28 | packages=find_packages(), 29 | install_requires=io.open("requirements.txt").read().splitlines(), 30 | entry_points=""" 31 | [console_scripts] 32 | sb=sotabenchapi.__main__:cli 33 | """, 34 | ) 35 | -------------------------------------------------------------------------------- /sotabenchapi/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paperswithcode/sotabench-api/513c0095328a41485d4d13ec3d77da919d8de8b1/sotabenchapi/__init__.py -------------------------------------------------------------------------------- /sotabenchapi/__main__.py: -------------------------------------------------------------------------------- 1 | from sotabenchapi.commands import cli 2 | 3 | 4 | if __name__ == "__main__": 5 | cli() 6 | -------------------------------------------------------------------------------- /sotabenchapi/check.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def in_check_mode(): 5 | """Return True/False if we are running the library in check mode. 6 | 7 | In check mode we perform a dry run to make sure the benchmarks are 8 | going to run and the parameters (such as paper IDs and model names) 9 | have been specified correctly. 10 | 11 | Returns: 12 | bool: True if we are in check mode. 13 | """ 14 | check_mode = os.environ.get("SOTABENCH_CHECK") 15 | 16 | if check_mode == "full" or check_mode == "params": 17 | return True 18 | else: 19 | return False 20 | 21 | 22 | def get_check_mode_type(): 23 | """Get the type of checking we are doing. 24 | 25 | Returns: 26 | str: Either "full" for a full check including running the benchmark on 27 | the first batch, or "params" which only check input parameters of 28 | the benchmark function. 29 | """ 30 | check_mode = os.environ.get("SOTABENCH_CHECK") 31 | 32 | if not in_check_mode(): 33 | return None 34 | elif check_mode in ["full", "params"]: 35 | return check_mode 36 | else: 37 | return "n/a" 38 | -------------------------------------------------------------------------------- /sotabenchapi/client.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List, Optional 3 | 4 | from sotabenchapi import uploader 5 | from sotabenchapi.config import Config 6 | from sotabenchapi.http import HttpClient 7 | 8 | 9 | class Client(object): 10 | """NewReleases client. 11 | 12 | Args: 13 | config (sotabenchapi.config.Config): Instance of the sotabenchapi 14 | configuration. 15 | """ 16 | 17 | def __init__(self, config: Config): 18 | self.config = config 19 | self.http = HttpClient(url=config.url, token=config.token) 20 | 21 | @classmethod 22 | def public(cls) -> "Client": 23 | """Get the public access sotabench client. 24 | 25 | Returns: 26 | Client: A client instance that can be used to make API 27 | requests to sotabench.com. 28 | """ 29 | config = Config(None) 30 | return Client(config) 31 | 32 | def login(self, username: str, password: str) -> str: 33 | """Obtain authentication token. 34 | 35 | Args: 36 | username (str): SotaBench username. 37 | password (str): SotaBench password. 38 | 39 | Returns: 40 | str: Authentication token. 41 | """ 42 | response = self.http.post( 43 | "auth/token/", data={"username": username, "password": password} 44 | ) 45 | return response["token"] 46 | 47 | # Check 48 | def check_run_hashes(self, hashes: List[str]) -> dict: 49 | """Check if the hash exist in the database. 50 | 51 | Args: 52 | hashes (list of str): List of run hashes. 53 | 54 | Returns: 55 | dict: Dictionary of ``{hash: True/False}`` pairs. ``True`` 56 | represents an existing hash, ``False`` a non existing. 57 | """ 58 | response = self.http.post("check/run-hashes/", data={"hashes": hashes}) 59 | return response 60 | 61 | def get_results_by_run_hash(self, run_hash: str) -> dict: 62 | """Get cached results by run_hash. 63 | 64 | Args: 65 | run_hash (str): SHA256 run_hash that identifies the run 66 | 67 | Returns: 68 | dict: A dictionary of results, e.g:: 69 | 70 | { 71 | "Top 1 Accuracy": 0.85, 72 | "Top 5 Accuracy": 0.90 73 | } 74 | """ 75 | 76 | response = self.http.get( 77 | "check/get_results_by_hash/", params={"run_hash": run_hash} 78 | ) 79 | return response 80 | 81 | def check_results(self, results: List[dict]) -> List[dict]: 82 | """Check if the results would be accepted by sotabench.com. 83 | 84 | Args: 85 | results: A list of results dictionaries (ie same format as 86 | sotabench-results.json. 87 | 88 | Returns: 89 | List[dict]: A list of dictionaries highlighting any errors with the 90 | submitted results. 91 | """ 92 | 93 | return self.http.post("check/results/", data={"results": results}) 94 | 95 | # Repository 96 | def repository_list(self, username: Optional[str] = None): 97 | """List repositories. 98 | 99 | Optionally filter by repository owner. 100 | """ 101 | if username is None: 102 | return self.http.get("repositories/") 103 | else: 104 | return self.http.get(f"repositories/{username}/") 105 | 106 | def repository_get(self, repository: str): 107 | """Get repository. 108 | 109 | Args: 110 | repository (str): Repository in ``owner/project`` format. 111 | """ 112 | return self.http.get(f"repositories/{repository}/") 113 | 114 | def repository_update(self, repository: str, build_enabled: bool): 115 | """Update build_enabled flag. 116 | 117 | Args: 118 | repository (str): Repository in ``owner/project`` format. 119 | build_enabled (bool): Should the build be enabled or not. 120 | """ 121 | return self.http.patch( 122 | f"repositories/{repository}/", 123 | data={"build_enabled": build_enabled}, 124 | ) 125 | 126 | # Dataset 127 | def dataset_list(self, repository: str): 128 | """List all datasets for the given repository. 129 | 130 | Args: 131 | repository (str): Repository in ``owner/project`` format. 132 | """ 133 | return self.http.get(f"repositories/{repository}/datasets/") 134 | 135 | def dataset_upload( 136 | self, 137 | dataset: str, 138 | repository: str, 139 | path: Optional[str] = None, 140 | part_size: Optional[int] = None, 141 | ): 142 | """Upload dataset for a repository. 143 | 144 | Args: 145 | dataset (str): Path to a dataset file. 146 | repository (str): repository slug. 147 | path (str): Path under the .data folder where the dataset should be 148 | downloaded. Default: `basename(dataset). 149 | part_size (int, optional): Optional user defined part size. 150 | """ 151 | 152 | uploader.multipart_upload( 153 | http=self.http, 154 | filename=dataset, 155 | repository=repository, 156 | path=path or os.path.basename(dataset), 157 | part_size=part_size, 158 | ) 159 | 160 | def dataset_get(self, repository: str, dataset: str): 161 | """Get dataset. 162 | 163 | Args: 164 | repository (str): Repository in ``owner/project`` format. 165 | dataset (str): Path to the dataset. 166 | """ 167 | return self.http.get( 168 | f"repositories/{repository}/datasets/?dataset={dataset}" 169 | ) 170 | 171 | def dataset_delete(self, repository: str, dataset: str): 172 | """Update build_enabled flag. 173 | 174 | Args: 175 | repository (str): Repository in ``owner/project`` format. 176 | dataset (str): Path to the dataset. 177 | """ 178 | return self.http.delete( 179 | f"repositories/{repository}/datasets/", data={"dataset": dataset}, 180 | ) 181 | 182 | # Build 183 | def build_start(self, repository: str): 184 | """Initiate repository build. 185 | 186 | Args: 187 | repository (str): Repository in ``owner/project`` format. 188 | """ 189 | return self.http.post(f"builds/{repository}/") 190 | 191 | def build_list(self, repository: str): 192 | """List builds for a given repository. 193 | 194 | Args: 195 | repository (str): Repository in ``owner/project`` format. 196 | """ 197 | return self.http.get(f"builds/{repository}/") 198 | 199 | def build_get(self, repository: str, run_number: int): 200 | """Get build. 201 | 202 | Args: 203 | repository (str): Repository in ``owner/project`` format. 204 | run_number (int): Run number of the build. 205 | """ 206 | return self.http.get(f"builds/{repository}/{run_number}/") 207 | 208 | # Benchmark 209 | def benchmark_list(self): 210 | """List users benchmarks.""" 211 | return self.http.get("benchmarks/") 212 | 213 | def benchmark_get(self, benchmark: str): 214 | """Get benchmark. 215 | 216 | Args: 217 | benchmark (str): Benchmark slug. 218 | """ 219 | return self.http.get(f"benchmarks/{benchmark}/") 220 | -------------------------------------------------------------------------------- /sotabenchapi/commands/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "cli", 3 | "check", 4 | "repo_cli", 5 | "build_cli", 6 | "benchmark_cli", 7 | "dataset_cli", 8 | ] 9 | 10 | from sotabenchapi.commands.cli import cli 11 | from sotabenchapi.commands.check import check 12 | from sotabenchapi.commands.repo import repo_cli 13 | from sotabenchapi.commands.build import build_cli 14 | from sotabenchapi.commands.benchmark import benchmark_cli 15 | from sotabenchapi.commands.dataset import dataset_cli 16 | -------------------------------------------------------------------------------- /sotabenchapi/commands/benchmark.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from sotabenchapi.config import Config 4 | from sotabenchapi.client import Client 5 | from sotabenchapi.commands.cli import cli 6 | from sotabenchapi.commands.utils import handle_errors, table 7 | 8 | 9 | @cli.group("benchmark") 10 | def benchmark_cli(): 11 | """Benchmark operations.""" 12 | pass 13 | 14 | 15 | @benchmark_cli.command("list") 16 | @click.pass_obj 17 | @handle_errors() 18 | def benchmark_list(config: Config): 19 | """List users benchmarks.""" 20 | client = Client(config) 21 | table(client.benchmark_list()) 22 | 23 | 24 | @benchmark_cli.command("get") 25 | @click.argument("benchmark", required=True) 26 | @click.pass_obj 27 | @handle_errors() 28 | def benchmark_get(config: Config, benchmark: str): 29 | """Get benchmark. 30 | 31 | Provide benchmark slug as an argument. 32 | """ 33 | client = Client(config) 34 | table(client.benchmark_get(benchmark=benchmark)) 35 | -------------------------------------------------------------------------------- /sotabenchapi/commands/build.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from sotabenchapi.config import Config 4 | from sotabenchapi.client import Client 5 | from sotabenchapi.commands.cli import cli 6 | from sotabenchapi.commands.utils import handle_errors, check_repo, table 7 | 8 | 9 | @cli.group("build") 10 | def build_cli(): 11 | """Build operations.""" 12 | pass 13 | 14 | 15 | @build_cli.command("start") 16 | @click.argument("repository", required="True") 17 | @click.pass_obj 18 | @handle_errors() 19 | def build_start(config: Config, repository): 20 | """Start build.""" 21 | repository = check_repo(repository) 22 | client = Client(config) 23 | table(client.build_start(repository=repository)) 24 | 25 | 26 | @build_cli.command("list") 27 | @click.argument("repository", required=True) 28 | @click.pass_obj 29 | @handle_errors() 30 | def build_list(config: Config, repository: str): 31 | """List builds for a given repository..""" 32 | repository = check_repo(repository) 33 | client = Client(config) 34 | table(client.build_list(repository=repository)) 35 | 36 | 37 | @build_cli.command("get") 38 | @click.argument("repository", required=True) 39 | @click.argument("run_number", type=int, required=True) 40 | @click.pass_obj 41 | @handle_errors() 42 | def build_get(config: Config, repository: str, run_number: int): 43 | """Get build details.""" 44 | repository = check_repo(repository) 45 | client = Client(config) 46 | table(client.build_get(repository=repository, run_number=run_number)) 47 | -------------------------------------------------------------------------------- /sotabenchapi/commands/check.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import subprocess 4 | from pathlib import Path 5 | 6 | import click 7 | 8 | from sotabenchapi.config import Config 9 | from sotabenchapi.commands.cli import cli 10 | from sotabenchapi.commands.utils import handle_errors 11 | 12 | 13 | @cli.command("check") 14 | @click.pass_obj 15 | @click.option( 16 | "-p", 17 | "--params", 18 | is_flag=True, 19 | default=False, 20 | help="Checks the parameters, such as model names and arxiv paper ids, " 21 | "to ensure they are correct. Does not perform evaluation, but is a pure " 22 | "API call with the inputs. You can use this command to check input string " 23 | "validity before submission.", 24 | ) 25 | @handle_errors() 26 | def check(config: Config, params: bool = False): 27 | """Check if the benchmarking setup is correct.""" 28 | cwd = Path(os.getcwd()).absolute() 29 | 30 | if not (cwd / "sotabench.py").is_file(): 31 | click.secho("sotabench.py is missing.", fg="red") 32 | sys.exit(1) 33 | 34 | if not (cwd / "requirements.txt").is_file(): 35 | click.secho("requirements.txt is missing.", fg="red") 36 | sys.exit(1) 37 | 38 | check_var = config.sotabench_check or "full" 39 | 40 | if params is True: 41 | check_var = "params" 42 | 43 | process = subprocess.Popen( 44 | [sys.executable, "sotabench.py"], 45 | env={"SOTABENCH_CHECK": check_var}, 46 | stdout=sys.stdout, 47 | stderr=sys.stderr, 48 | ) 49 | process.wait() 50 | -------------------------------------------------------------------------------- /sotabenchapi/commands/cli.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import click 4 | 5 | from sotabenchapi import consts 6 | from sotabenchapi.config import Config 7 | from sotabenchapi.client import Client 8 | 9 | 10 | @click.group() 11 | @click.option( 12 | "--config", 13 | "config_path", 14 | type=click.Path(exists=True), 15 | envvar="SOTABENCH_CONFIG", 16 | help="Path to the alternative configuration file.", 17 | ) 18 | @click.option( 19 | "--profile", 20 | default="default", 21 | envvar="SOTABENCH_PROFILE", 22 | help="Configuration file profile.", 23 | ) 24 | @click.pass_context 25 | def cli(ctx, config_path, profile): 26 | """sotabench command line client.""" 27 | if config_path is None: 28 | config_path = os.path.expanduser(consts.DEFAULT_CONFIG_PATH) 29 | ctx.obj = Config(config_path, profile) 30 | 31 | 32 | @cli.command("login") 33 | @click.pass_obj 34 | def login(config: Config): 35 | """Obtain authentication token.""" 36 | username = click.prompt("Username") 37 | password = click.prompt("Password", hide_input=True) 38 | 39 | client = Client(config) 40 | config.token = client.login(username=username, password=password) 41 | config.save() 42 | -------------------------------------------------------------------------------- /sotabenchapi/commands/dataset.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from sotabenchapi.config import Config 4 | from sotabenchapi.client import Client 5 | from sotabenchapi.commands.cli import cli 6 | from sotabenchapi.commands.utils import handle_errors, check_repo, table 7 | 8 | 9 | @cli.group("dataset") 10 | def dataset_cli(): 11 | """Dataset operations (upload, management).""" 12 | pass 13 | 14 | 15 | part_size_type = click.IntRange(min=5) 16 | part_size_type.name = "integer" 17 | 18 | 19 | @dataset_cli.command("list") 20 | @click.argument("repository", required=True) 21 | @click.pass_obj 22 | @handle_errors(m404="Repository not found.") 23 | def dataset_list(config: Config, repository: str): 24 | """List all uploaded datasets for a repository. 25 | 26 | Repository name must be in ``owner/project`` format. 27 | """ 28 | repository = check_repo(repository) 29 | client = Client(config) 30 | table(client.dataset_list(repository=repository)) 31 | 32 | 33 | @dataset_cli.command("upload") 34 | @click.argument( 35 | "dataset", 36 | required=True, 37 | type=click.Path(exists=True, dir_okay=False, resolve_path=True), 38 | ) 39 | @click.option("-r", "--repository", required=True, help="Repository slug.") 40 | @click.option( 41 | "-p", 42 | "--path", 43 | required=False, 44 | default=None, 45 | help="Path in .data folder where the dataset should be downloaded. " 46 | "Default: `basename(dataset)`", 47 | ) 48 | @click.option( 49 | "-s", 50 | "--part-size", 51 | type=part_size_type, 52 | default=None, 53 | help=( 54 | "Set the part size in MB (min 5MB). If not provided the part size " 55 | "will be calculated based on the file size." 56 | ), 57 | ) 58 | @click.pass_obj 59 | @handle_errors(m404="Repository not found.") 60 | def dataset_upload( 61 | config: Config, dataset: str, repository: str, path: str, part_size: int 62 | ): 63 | """Upload dataset for a repository.""" 64 | client = Client(config) 65 | client.dataset_upload( 66 | dataset=dataset, 67 | repository=check_repo(repository), 68 | path=path, 69 | part_size=part_size, 70 | ) 71 | 72 | 73 | @dataset_cli.command("get") 74 | @click.argument("repository", required=True) 75 | @click.argument("dataset", required=True) 76 | @click.pass_obj 77 | @handle_errors(m404="Either the repository or the dataset is not found.") 78 | def dataset_get(config: Config, repository: str, dataset: str): 79 | """Get dataset details. 80 | 81 | Repository name must be in ``owner/project`` format. 82 | """ 83 | repository = check_repo(repository) 84 | client = Client(config) 85 | table(client.dataset_get(repository=repository, dataset=dataset)) 86 | 87 | 88 | @dataset_cli.command("delete") 89 | @click.argument("repository", required=True) 90 | @click.argument("dataset", required=True) 91 | @click.pass_obj 92 | @handle_errors(m404="Either the repository or the dataset is not found.") 93 | def dataset_delete(config: Config, repository: str, dataset: str): 94 | """Delete dataset. 95 | 96 | Repository name must be in ``owner/project`` format. 97 | """ 98 | repository = check_repo(repository) 99 | client = Client(config) 100 | result = client.dataset_delete(repository=repository, dataset=dataset) 101 | if result["status"] == "OK": 102 | click.secho("Dataset successfully deleted.", fg="green") 103 | else: 104 | click.secho("An error occurred please try again later..", fg="red") 105 | -------------------------------------------------------------------------------- /sotabenchapi/commands/repo.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from sotabenchapi.config import Config 4 | from sotabenchapi.client import Client 5 | from sotabenchapi.commands.cli import cli 6 | from sotabenchapi.commands.utils import handle_errors, check_repo, table 7 | 8 | 9 | @cli.group("repo") 10 | def repo_cli(): 11 | """Repository operations.""" 12 | pass 13 | 14 | 15 | @repo_cli.command("list") 16 | @click.option( 17 | "-o", "--owner", help="Filter by repository owner.", default=None 18 | ) 19 | @click.pass_obj 20 | @handle_errors() 21 | def repo_list(config: Config, owner): 22 | """List repositories.""" 23 | client = Client(config) 24 | table(client.repository_list(username=owner)) 25 | 26 | 27 | @repo_cli.command("get") 28 | @click.argument("repository", required=True) 29 | @click.pass_obj 30 | @handle_errors() 31 | def repo_get(config: Config, repository: str): 32 | """Get repository. 33 | 34 | Repository name must be in ``owner/project`` format. 35 | """ 36 | repository = check_repo(repository) 37 | client = Client(config) 38 | table(client.repository_get(repository=repository)) 39 | -------------------------------------------------------------------------------- /sotabenchapi/commands/utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import functools 3 | 4 | import click 5 | from tabulate import tabulate 6 | 7 | from sotabenchapi import errors 8 | 9 | 10 | def handle_errors(m404=None): 11 | def decorator(func): 12 | @functools.wraps(func) 13 | def wrapper(*args, **kwargs): 14 | try: 15 | return func(*args, **kwargs) 16 | except errors.HttpClientError as e: 17 | if m404 and e.status_code == 404: 18 | click.secho(m404, fg="red") 19 | else: 20 | click.secho(e.message, fg="red") 21 | try: 22 | data = e.response.json() 23 | if "error" in data: 24 | click.secho(data["error"], fg="red") 25 | except Exception: 26 | pass 27 | 28 | return wrapper 29 | 30 | return decorator 31 | 32 | 33 | def check_repo(repository: str): 34 | parts = repository.split("/") 35 | if len(parts) != 2: 36 | click.secho("Invalid repository name: ", fg="red", nl=False) 37 | click.secho(repository) 38 | click.secho( 39 | "Repository name must be in owner/project format.", fg="cyan" 40 | ) 41 | sys.exit(1) 42 | return repository.strip("/") 43 | 44 | 45 | def table(data): 46 | """Show data as a table.""" 47 | if not isinstance(data, (list, tuple)): 48 | data = [data] 49 | if len(data) == 0: 50 | click.secho("No items found.", fg="cyan") 51 | else: 52 | click.secho(tabulate(data, headers="keys", tablefmt="fancy_grid")) 53 | -------------------------------------------------------------------------------- /sotabenchapi/config.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | from typing import Optional 4 | from configparser import ConfigParser 5 | 6 | from sotabenchapi import consts 7 | 8 | 9 | class Config(object): 10 | """Configuration. 11 | 12 | Args: 13 | config_path (str, optional): Path to the configuration `ini` file. If 14 | the file is not provided, default configuration file 15 | `~/.sotabench/sotabenchapi.ini` will be used. 16 | profile (str, optional): Selected profile from the `ini` file. Default: 17 | `default`. 18 | 19 | Attributes: 20 | config_path (str): Absolute path to the configuration `ini` file. 21 | profile (str): Selected profile. 22 | url (str): URL to the sotabench api. 23 | sotabench_check (str, optional): Defines what should be checked when 24 | doing checking operations. 25 | """ 26 | 27 | def __init__( 28 | self, config_path: Optional[str] = None, profile: str = "default" 29 | ): 30 | self.config_path = os.path.abspath( 31 | config_path or os.path.expanduser(consts.DEFAULT_CONFIG_PATH) 32 | ) 33 | self.profile = profile 34 | if not os.path.isfile(self.config_path): 35 | data = {} 36 | else: 37 | cp = ConfigParser() 38 | cp.read(self.config_path) 39 | data = cp[self.profile] if cp.has_section(self.profile) else {} 40 | 41 | self.url = os.environ.get( 42 | "SOTABENCH_URL", data.get("url", consts.SOTABENCH_API_URL) 43 | ) 44 | self.token = os.environ.get("SOTABENCH_TOKEN", data.get("token", "")) 45 | self.sotabench_check = os.environ.get( 46 | "SOTABENCH_CHECK", data.get("sotabench_check", "full") 47 | ) 48 | 49 | def save(self): 50 | """Save the configuration file.""" 51 | # Create config dir if it doesn't exist 52 | config_dir = os.path.dirname(self.config_path) 53 | os.makedirs(config_dir, exist_ok=True) 54 | 55 | cp = ConfigParser() 56 | # Read existing configuration if exists 57 | if os.path.isfile(self.config_path): 58 | cp.read(self.config_path) 59 | 60 | # Create profile if it doesn't exist 61 | if self.profile not in cp.sections(): 62 | cp.add_section(self.profile) 63 | 64 | # Write the current configuration to the profile 65 | cp[self.profile]["token"] = self.token 66 | cp[self.profile]["sotabench_check"] = self.sotabench_check 67 | 68 | # Save configuration 69 | with io.open(self.config_path, "w") as f: 70 | cp.write(f) 71 | 72 | def __str__(self): 73 | return f"Config({self.config_path})" 74 | 75 | __repr__ = __str__ 76 | -------------------------------------------------------------------------------- /sotabenchapi/consts.py: -------------------------------------------------------------------------------- 1 | DEFAULT_CONFIG_PATH = "~/.sotabench/sotabenchapi.ini" 2 | 3 | SOTABENCH_API_URL = "https://sotabench.com/api/v0" 4 | -------------------------------------------------------------------------------- /sotabenchapi/core/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["BenchmarkResult", "check_inputs"] 2 | 3 | from sotabenchapi.core.inputs import check_inputs 4 | from sotabenchapi.core.results import BenchmarkResult 5 | -------------------------------------------------------------------------------- /sotabenchapi/core/inputs.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from sotabenchapi.core.results import BenchmarkResult 4 | 5 | 6 | def check_inputs(func): 7 | """A decorator for checking inputs to a benchmark method. 8 | 9 | Args: 10 | func (callable): a benchmark method, e.g. ImageNet.benchmark(...) 11 | 12 | Returns: 13 | callable: If regular evaluation, then func; if parameter check only 14 | then skips evaluation and returns inputs in a BenchMarkResult 15 | object so they can be checked for correctness (e.g. if model name 16 | is correct). 17 | """ 18 | check_mode = os.environ.get("SOTABENCH_CHECK") 19 | 20 | def param_check_only(*args, **kwargs): 21 | """Return a BenchmarkResult with only parameters. 22 | 23 | No evaluation - so we can check the inputs to see if sotabench.com will 24 | accept them. 25 | 26 | Args: 27 | args: args for the benchmark() method. 28 | kwargs: kwargs for the benchmark() method. 29 | 30 | Returns: 31 | BenchmarkResult: BenchmarkResult instance. 32 | """ 33 | BenchmarkResult( 34 | task=args[0].task, 35 | config=None, 36 | dataset=args[0].dataset.__name__, 37 | results={}, 38 | pytorch_hub_id=None, 39 | model=kwargs.get("paper_model_name", None), 40 | model_description=kwargs.get("model_description", ""), 41 | arxiv_id=kwargs.get("paper_arxiv_id", None), 42 | pwc_id=kwargs.get("paper_pwc_id", None), 43 | paper_results={}, 44 | run_hash=None, 45 | ) 46 | 47 | def regular_evaluation(*args, **kwargs): 48 | """A regular call to benchmark(). 49 | 50 | If a SOTABENCH_SERVER environment variable is set then we enforce some 51 | parameters so it works on the server (e.g. number of gpus, device type, 52 | data location). 53 | 54 | Args: 55 | args: args for the benchmark() method. 56 | kwargs: kwargs for the benchmark() method. 57 | 58 | Returns: 59 | BenchmarkResult: BenchmarkResult instance. 60 | """ 61 | 62 | check_server = os.environ.get("SOTABENCH_SERVER") 63 | 64 | if ( 65 | check_server == "true" 66 | ): # if being run on a server, we enforce some parameters 67 | kwargs.pop("data_root", None) 68 | 69 | if "num_gpu" in kwargs: 70 | if kwargs["num_gpu"] != "1": 71 | kwargs["num_gpu"] = 1 72 | print( 73 | "Changing number of GPUs to 1 for sotabench.com " 74 | "server \n" 75 | ) 76 | 77 | if "device" in kwargs: 78 | if kwargs["device"] != "cuda": 79 | kwargs["device"] = "cuda" 80 | print( 81 | "Changing device to cuda for sotabench.com server \n" 82 | ) 83 | 84 | func(*args, **kwargs) 85 | 86 | if check_mode == "params": 87 | return param_check_only 88 | else: 89 | return regular_evaluation 90 | -------------------------------------------------------------------------------- /sotabenchapi/core/results.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import json 4 | from typing import Optional 5 | 6 | import click 7 | 8 | from sotabenchapi.client import Client 9 | from sotabenchapi.check import in_check_mode, get_check_mode_type 10 | 11 | 12 | class BenchmarkResult: 13 | """Class encapsulates data for the results of a model on a benchmark. 14 | 15 | It also provides methods for serialising that data and checking the 16 | parameters with the sotabench.com resource. 17 | 18 | Most of the inputs are optional - so when you create a benchmark, you can 19 | choose which subset of arguments you want to store (that are relevant for 20 | your benchmark). 21 | 22 | Arguments: 23 | model (str): Name of the model, e.g. ``EfficientNet-B0``. 24 | task (str): String describing a task, e.g. ``Image Classification``. 25 | dataset (str): String representing the name of a dataset, e.g. 26 | ``CIFAR-10``. 27 | results (dict): Dictionary with keys as metric names, e.g. 28 | ``Top 1 Accuracy``, and values as floats, e.g. ``0.80``. 29 | model_description (str, optional): Optional description of the model. 30 | config (dict, optional): Dictionary storing user configuration 31 | arguments (inputs to the evaluation function), e.g. the transforms 32 | that were passed to the dataset object (resizing, cropping...) 33 | arxiv_id (str, optional): String describing the paper where the model 34 | comes from, e.g. ``1901.07518``. 35 | pwc_id (str, optional): Describing the paperswithcode.com page - e.g.: 36 | ``hybrid-task-cascade-for-instance-segmentation``. 37 | pytorch_hub_id (str, optional): Describing the location of the PyTorch 38 | Hub model, e.g.: ``mateuszbuda_brain-segmentation-pytorch_unet`` 39 | paper_results (dict, optional): Dictionary with original results from 40 | the PAPER, e.g.:: 41 | 42 | { 43 | 'Top 1 Accuracy': 0.543, 44 | 'Top 5 Accuracy': 0.743 45 | } 46 | 47 | The metric names should match those used in the existing 48 | leaderboard. 49 | run_hash (str): The run_hash that uniquely identifies this run, based 50 | on results from the first batch. It is used to cache runs so we 51 | don't have to re-run benchmarks when nothing has changed. 52 | """ 53 | 54 | def __init__( 55 | self, 56 | model: str, 57 | task: str, 58 | dataset: str, 59 | results: dict, 60 | model_description: Optional[str] = None, 61 | config: Optional[dict] = None, 62 | arxiv_id: Optional[str] = None, 63 | pwc_id: Optional[str] = None, 64 | pytorch_hub_id: Optional[str] = None, 65 | paper_results: Optional[dict] = None, 66 | run_hash: Optional[str] = None, 67 | ): 68 | 69 | self.model = model 70 | self.task = task 71 | self.dataset = dataset 72 | self.results = results 73 | self.model_description = model_description 74 | self.config = config 75 | self.arxiv_id = arxiv_id 76 | self.pwc_id = pwc_id 77 | self.pytorch_hub_id = pytorch_hub_id 78 | self.paper_results = paper_results 79 | self.run_hash = run_hash 80 | 81 | self.create_json = ( 82 | True if os.environ.get("SOTABENCH_STORE_FILENAME") else False 83 | ) 84 | 85 | self.in_check_mode = in_check_mode() 86 | self.check_mode_type = get_check_mode_type() 87 | 88 | self.to_dict() 89 | 90 | def to_dict(self) -> dict: 91 | """Serialises the benchmark result data. 92 | 93 | If an environmental variable is set, e.g. 94 | (``SOTABENCH_STORE_FILENAME == 'evaluation.json'``) then will also save 95 | a JSON called ``evaluation.json`` 96 | 97 | The method also checks for errors with the sotabench.com server if in 98 | check mode. 99 | 100 | Returns: 101 | dict: A dictionary containing results 102 | """ 103 | 104 | build_dict = { 105 | "model": self.model.encode("ascii", "ignore").decode("ascii"), 106 | "model_description": self.model_description, 107 | "task": self.task, 108 | "dataset_name": self.dataset, 109 | "results": self.results, 110 | "arxiv_id": self.arxiv_id, 111 | "pwc_id": self.pwc_id, 112 | "pytorch_hub_id": self.pytorch_hub_id, 113 | "paper_results": self.paper_results, 114 | "run_hash": self.run_hash, 115 | } 116 | 117 | if self.in_check_mode: 118 | client = Client.public() 119 | r = client.check_results([build_dict]) 120 | errors = r["response"]["errors"] 121 | click.secho("\n---\n", fg="white") 122 | print("Model: {name}\n".format(name=build_dict["model"])) 123 | if errors: 124 | click.secho("Error while checking:\n", fg="red") 125 | for error_dict in errors: 126 | print(error_dict["error"]) 127 | else: 128 | click.secho("No errors detected, looks good!", fg="green") 129 | click.secho("\n---\n", fg="white") 130 | elif self.create_json: 131 | file_name = os.environ.get("SOTABENCH_STORE_FILENAME") 132 | 133 | if not os.path.isfile(file_name): 134 | models_dict = [build_dict] 135 | else: 136 | with io.open(file_name) as f: 137 | models_dict = json.load(f) 138 | models_dict.append(build_dict) 139 | 140 | with io.open(file_name, "w") as f: 141 | json.dump(models_dict, f, ensure_ascii=False) 142 | 143 | return build_dict 144 | -------------------------------------------------------------------------------- /sotabenchapi/errors.py: -------------------------------------------------------------------------------- 1 | from requests import Response 2 | 3 | 4 | class SotaBenchError(Exception): 5 | def __init__(self, message): 6 | self.message = message 7 | 8 | @property 9 | def name(self): 10 | return self.__class__.__name__ 11 | 12 | def __str__(self): 13 | return f"{self.name}(message={self.message})" 14 | 15 | __repr__ = __str__ 16 | 17 | 18 | class HttpClientError(SotaBenchError): 19 | def __init__(self, message, response=None): 20 | super().__init__(message) 21 | self.response: Response = response 22 | self.status_code = ( 23 | response.status_code if response is not None else 500 24 | ) 25 | 26 | @property 27 | def data(self): 28 | return self.response.json() 29 | 30 | def __str__(self): 31 | return ( 32 | f"{self.name}(message={self.message}, " 33 | f"status_code={self.status_code})" 34 | ) 35 | 36 | __repr__ = __str__ 37 | 38 | 39 | class HttpClientTimeout(HttpClientError): 40 | """Http timeout error. 41 | 42 | From http://docs.python-requests.org/en/master/user/quickstart/#timeouts: 43 | 44 | timeout is not a time limit on the entire response download; rather, an 45 | exception is raised if the server has not issued a response for timeout 46 | seconds (more precisely, if no bytes have been received on the 47 | underlying socket for timeout seconds). If no timeout is specified 48 | explicitly, requests do not time out. 49 | 50 | Catching this error will catch both 51 | :exc:`~requests.exceptions.ConnectTimeout` and 52 | :exc:`~requests.exceptions.ReadTimeout` errors. 53 | 54 | ConnectTimeout: 55 | The request timed out while trying to connect to the remote server. 56 | Requests that produced this error are safe to retry. 57 | 58 | ReadTimeout: 59 | The server did not send any data in the allotted amount of time. 60 | """ 61 | 62 | def __init__(self): 63 | super().__init__("Timeout exceeded") 64 | 65 | 66 | class HttpRateLimitExceeded(HttpClientError): 67 | def __init__(self, response, limit, remaining, reset, retry): 68 | super().__init__("Rate limit exceeded.", response=response) 69 | self.limit = limit 70 | self.remaining = remaining 71 | self.reset = reset 72 | self.retry = retry 73 | 74 | def __str__(self): 75 | return ( 76 | f"{self.name}(limit={self.limit}, remaining={self.remaining}, " 77 | f"reset={self.reset}s, retry={self.retry}s)" 78 | ) 79 | 80 | __repr__ = __str__ 81 | 82 | 83 | class SerializationError(SotaBenchError): 84 | def __init__(self, errors): 85 | """Thrown when the client cannot serialize or deserialize an object. 86 | 87 | Args: 88 | errors (dict): Dictionary of found errors 89 | """ 90 | super().__init__("Serialization error.") 91 | self.errors = errors 92 | -------------------------------------------------------------------------------- /sotabenchapi/http.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from urllib3.util import Retry 4 | from requests import Session, Timeout 5 | from requests.adapters import HTTPAdapter 6 | from requests.exceptions import ConnectionError 7 | 8 | from sotabenchapi.errors import ( 9 | HttpClientError, 10 | HttpClientTimeout, 11 | HttpRateLimitExceeded, 12 | ) 13 | 14 | 15 | class HttpClient: 16 | """Generic requests handler. 17 | 18 | Handles retries and HTTP errors. 19 | """ 20 | 21 | ERRORS = { 22 | 401: "Unauthorized", 23 | 403: "Forbidden!", 24 | 404: "Not found.", 25 | 429: "SotaBench under pressure! (Too many requests)", 26 | 500: "You broke SotaBench!!!", 27 | 502: "SotaBench server not reachable.", 28 | 503: "SotaBench server under maintenance.", 29 | } 30 | 31 | def __init__( 32 | self, 33 | url, 34 | token="", 35 | timeout=60, 36 | max_retries=3, 37 | backoff_factor=0.05, 38 | backoff_max=10, 39 | status_forcelist=(500, 502, 503, 504), 40 | ): 41 | """Initialize. 42 | 43 | Args: 44 | url (str): URL to the SotaBench server. 45 | token (str): SotaBench authentication token. 46 | timeout (int): Request timeout time. 47 | max_retries (int): Maximal number of retries. 48 | backoff_factor (float): Backoff factor. 49 | backoff_max (int): Maximal number of backoffs. 50 | status_forcelist (tuple of int): Tuple of HTTP statuses for 51 | which the service should retry. 52 | """ 53 | self.url = url 54 | self.token = token 55 | self.timeout = timeout 56 | self.max_retries = max_retries 57 | self.backoff_factor = backoff_factor 58 | self.backoff_max = backoff_max 59 | self.status_forcelist = status_forcelist 60 | 61 | # Setup headers 62 | self.headers = {"Content-Type": "application/json"} 63 | if self.token.strip() != "": 64 | self.headers["Authorization"] = f"JWT {self.token}" 65 | 66 | self.response = None 67 | 68 | # setup connection pool 69 | self.session = Session() 70 | retry = Retry( 71 | total=max_retries, 72 | backoff_factor=backoff_factor, 73 | status_forcelist=status_forcelist, 74 | ) 75 | retry.BACKOFF_MAX = backoff_max 76 | adapter = HTTPAdapter(max_retries=retry) 77 | self.session.mount(self.url, adapter) 78 | 79 | def request( 80 | self, method, url, headers=None, params=None, data=None, timeout=None 81 | ): 82 | """Request method. 83 | 84 | Request method handles all the url joining, header merging, logging and 85 | error handling. 86 | 87 | Args: 88 | method (str): Method for the request - GET or POST 89 | url (str): Partial url of the request. It is added to the base url 90 | headers (dict): Dictionary of additional HTTP headers 91 | params (dict): Dictionary of query parameters for the request 92 | data (dict): A JSON serializable Python object to send in the body 93 | of the request. Used only in POST requests. 94 | timeout (float): How many seconds to wait for the server to send 95 | data before giving up. 96 | """ 97 | full_url = os.path.join(self.url, url.lstrip("/")) 98 | headers = {**self.headers, **(headers or {})} 99 | timeout = timeout or self.timeout 100 | 101 | try: 102 | if method.lower() == "get": 103 | self.response = self.session.get( 104 | url=full_url, 105 | headers=headers, 106 | params=params, 107 | timeout=timeout, 108 | ) 109 | elif method.lower() == "patch": 110 | self.response = self.session.patch( 111 | url=full_url, 112 | headers=headers, 113 | params=params, 114 | json=data, 115 | timeout=timeout, 116 | ) 117 | elif method.lower() == "post": 118 | self.response = self.session.post( 119 | url=full_url, 120 | headers=headers, 121 | params=params, 122 | json=data, 123 | timeout=timeout, 124 | ) 125 | elif method.lower() == "delete": 126 | self.response = self.session.delete( 127 | url=full_url, 128 | headers=headers, 129 | params=params, 130 | json=data, 131 | timeout=timeout, 132 | ) 133 | else: 134 | raise HttpClientError(f"Unsupported method: {method}") 135 | except Timeout as e: 136 | # If request timed out, let upper level handle it they way it sees 137 | # fit one place might want to retry another might not. 138 | raise HttpClientTimeout() from e 139 | 140 | except ConnectionError as e: 141 | raise HttpClientError("SotaBench server not reachable.") from e 142 | 143 | except Exception as e: 144 | raise HttpClientError(f"Unknown error. {e!r}") from e 145 | 146 | if self.response.status_code == 200: 147 | try: 148 | return self.response.json() if self.response.text else {} 149 | except Exception as e: 150 | raise HttpClientError( 151 | f"Error while parsing server response: {e!r}", 152 | response=self.response, 153 | ) from e 154 | # Check rate limit 155 | limit = self.response.headers.get("X-Ratelimit-Limit", None) 156 | if limit is not None: 157 | remaining = self.response.headers["X-Ratelimit-Remaining"] 158 | reset = self.response.headers["X-Ratelimit-Reset"] 159 | retry = self.response.headers["X-Ratelimit-Retry"] 160 | 161 | if remaining == 0: 162 | raise HttpRateLimitExceeded( 163 | response=self.response, 164 | limit=limit, 165 | remaining=remaining, 166 | reset=reset, 167 | retry=retry, 168 | ) 169 | 170 | # Try known error messages 171 | message = self.ERRORS.get(self.response.status_code, None) 172 | if message is not None: 173 | raise HttpClientError(message, response=self.response) 174 | 175 | if self.response.status_code == 400: 176 | try: 177 | message = "\n".join(self.response.json()["errors"]) 178 | except Exception: 179 | message = "Bad Request." 180 | raise HttpClientError(message, response=self.response) 181 | 182 | # Generalize unknown messages. 183 | try: 184 | message = self.response.json()["message"] 185 | except Exception: 186 | message = "Unknown error." 187 | raise HttpClientError(message, response=self.response) 188 | 189 | def get(self, url, headers=None, params=None, timeout=None): 190 | """Perform get request. 191 | 192 | Args: 193 | url (str): Partial url of the request. It is added to the base url 194 | headers (dict): Dictionary of additional HTTP headers 195 | params (dict): Dictionary of query parameters for the request 196 | timeout (float): How many seconds to wait for the server to send 197 | data before giving up 198 | 199 | Returns: 200 | dict: Deserialized json response. 201 | 202 | """ 203 | return self.request( 204 | method="get", 205 | url=url, 206 | headers=headers, 207 | params=params, 208 | timeout=timeout, 209 | ) 210 | 211 | def patch(self, url, headers=None, params=None, data=None, timeout=None): 212 | """Perform patch request. 213 | 214 | Args: 215 | url (str): Partial url of the request. It is added to the base url 216 | headers (dict): Dictionary of additional HTTP headers 217 | params (dict): Dictionary of query parameters for the request 218 | data (dict): A JSON serializable Python object to send in the body 219 | of the request. 220 | timeout (float): How many seconds to wait for the server to send 221 | data before giving up 222 | 223 | Returns: 224 | dict: Deserialized json response. 225 | 226 | """ 227 | return self.request( 228 | method="patch", 229 | url=url, 230 | headers=headers, 231 | params=params, 232 | data=data, 233 | timeout=timeout, 234 | ) 235 | 236 | def post(self, url, headers=None, params=None, data=None, timeout=None): 237 | """Perform post request. 238 | 239 | Args: 240 | url (str): Partial url of the request. It is added to the base url 241 | headers (dict): Dictionary of additional HTTP headers 242 | params (dict): Dictionary of query parameters for the request 243 | data (dict): A JSON serializable Python object to send in the body 244 | of the request. 245 | timeout (float): How many seconds to wait for the server to send 246 | data before giving up 247 | 248 | Returns: 249 | dict: Deserialized json response. 250 | 251 | """ 252 | return self.request( 253 | method="post", 254 | url=url, 255 | headers=headers, 256 | params=params, 257 | data=data, 258 | timeout=timeout, 259 | ) 260 | 261 | def delete(self, url, headers=None, params=None, data=None, timeout=None): 262 | """Perform delete request. 263 | 264 | Args: 265 | url (str): Partial url of the request. It is added to the base url 266 | headers (dict): Dictionary of additional HTTP headers 267 | params (dict): Dictionary of query parameters for the request 268 | data (dict): A JSON serializable Python object to send in the body 269 | of the request. 270 | timeout (float): How many seconds to wait for the server to send 271 | data before giving up 272 | 273 | Returns: 274 | dict: Deserialized json response. 275 | """ 276 | return self.request( 277 | method="delete", 278 | url=url, 279 | headers=headers, 280 | params=params, 281 | data=data, 282 | timeout=timeout, 283 | ) 284 | -------------------------------------------------------------------------------- /sotabenchapi/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paperswithcode/sotabench-api/513c0095328a41485d4d13ec3d77da919d8de8b1/sotabenchapi/tests/__init__.py -------------------------------------------------------------------------------- /sotabenchapi/tests/test_client.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | from sotabenchapi import consts 6 | from sotabenchapi.client import Client 7 | from sotabenchapi.config import Config 8 | 9 | 10 | @pytest.fixture 11 | def run_hash(): 12 | return "c474595718e06d524fa4eaeba35347181f1fa18b28f123e68eeaeca8c52336aa" 13 | 14 | 15 | def test_run_hash(run_hash): 16 | config_path = os.path.expanduser(consts.DEFAULT_CONFIG_PATH) 17 | config = Config(config_path) 18 | client = Client(config) 19 | 20 | res = client.get_results_by_run_hash(run_hash=run_hash) 21 | 22 | assert isinstance(res, dict) 23 | assert res["Top 5 Accuracy"] == 0.9795 24 | 25 | 26 | def test_check_results(run_hash): 27 | client = Client.public() 28 | 29 | r = [ 30 | { 31 | "model": "FixResNeXt-101 32x48d", 32 | "task": "Image Classification", 33 | "dataset_name": "ImageNet", 34 | "results": { 35 | "Top 1 Accuracy": 0.8636199999999999, 36 | "Top 5 Accuracy": 0.9795, 37 | }, 38 | "arxiv_id": "1906.06423", 39 | "pwc_id": None, 40 | "pytorch_hub_id": None, 41 | "paper_results": None, 42 | "run_hash": run_hash, 43 | } 44 | ] 45 | 46 | res = client.check_results(r) 47 | assert len(res["response"]["errors"]) == 0 48 | 49 | r[0]["task"] = "Make a cup of tea" 50 | res = client.check_results(r) 51 | 52 | e = res["response"]["errors"][0] 53 | assert "error" in e 54 | -------------------------------------------------------------------------------- /sotabenchapi/uploader/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["multipart_upload"] 2 | 3 | from sotabenchapi.uploader.upload import multipart_upload 4 | -------------------------------------------------------------------------------- /sotabenchapi/uploader/consts.py: -------------------------------------------------------------------------------- 1 | KB = 1024 2 | MB = KB * KB 3 | -------------------------------------------------------------------------------- /sotabenchapi/uploader/models.py: -------------------------------------------------------------------------------- 1 | import enum 2 | from typing import Optional 3 | from datetime import datetime 4 | from dataclasses import dataclass 5 | 6 | from sotabenchapi.uploader.utils import utcnow, strftime, safe_timestamp 7 | 8 | 9 | class UploadState(enum.Enum): 10 | exists = "exists" 11 | queued = "queued" 12 | started = "started" 13 | finished = "finished" 14 | error = "error" 15 | 16 | 17 | @dataclass 18 | class Upload: 19 | State = UploadState 20 | 21 | id: str 22 | sha256: str 23 | size: int 24 | part_size: int 25 | part_number: int 26 | state: UploadState 27 | 28 | def to_dict(self) -> dict: 29 | return { 30 | "id": self.id, 31 | "sha256": self.sha256, 32 | "size": str(self.size), 33 | "part_size": str(self.part_size), 34 | "part_number": self.part_number, 35 | "state": self.state.value, 36 | } 37 | 38 | @classmethod 39 | def from_dict(cls, d: dict) -> Optional["Upload"]: 40 | if len(d) == 0: 41 | return None 42 | 43 | return cls( 44 | id=d["id"], 45 | sha256=d["sha256"], 46 | size=int(d["size"]), 47 | part_size=int(d["part_size"]), 48 | part_number=d["part_number"], 49 | state=UploadState(d["state"]), 50 | ) 51 | 52 | 53 | @dataclass 54 | class Part: 55 | State = UploadState 56 | 57 | upload: str 58 | no: int 59 | size: int 60 | state: UploadState 61 | sha256: Optional[str] = None 62 | etag: Optional[str] = None 63 | started_time: Optional[datetime] = None 64 | finished_time: Optional[datetime] = None 65 | presigned_url: Optional[str] = None 66 | 67 | def to_dict(self) -> dict: 68 | return { 69 | "upload": self.upload, 70 | "no": self.no, 71 | "size": str(self.size), 72 | "sha256": self.sha256, 73 | "etag": self.etag, 74 | "state": self.state.value, 75 | "started_time": strftime(self.started_time), 76 | "finished_time": strftime(self.finished_time), 77 | "presigned_url": self.presigned_url, 78 | } 79 | 80 | @classmethod 81 | def from_dict(cls, d: dict) -> Optional["Part"]: 82 | if len(d) == 0: 83 | return None 84 | 85 | return cls( 86 | upload=d["upload"], 87 | no=int(d["no"]), 88 | size=int(d["size"]), 89 | sha256=d["sha256"], 90 | etag=d["etag"], 91 | state=UploadState(d["state"]), 92 | started_time=safe_timestamp(d["started_time"]) or utcnow(), 93 | finished_time=safe_timestamp(d["finished_time"]), 94 | presigned_url=d["presigned_url"], 95 | ) 96 | -------------------------------------------------------------------------------- /sotabenchapi/uploader/upload.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import logging 4 | from typing import Optional 5 | 6 | import click 7 | import requests 8 | 9 | from sotabenchapi.http import HttpClient 10 | from sotabenchapi.uploader.utils import get_sha256 11 | from sotabenchapi.uploader.models import Part, Upload 12 | 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | class Buffer(io.BytesIO): 18 | def __init__(self, buffer, label=None): 19 | self.size = len(buffer) 20 | if label is None: 21 | self.bar = None 22 | else: 23 | self.bar = click.progressbar(length=self.size, label=label) 24 | super().__init__(buffer) 25 | 26 | def read(self, n=-1): 27 | chunk = super().read(n) 28 | if self.bar is not None: 29 | self.bar.update(len(chunk)) 30 | return chunk 31 | 32 | def reset(self, label=None): 33 | self.seek(0) 34 | if label is None: 35 | self.bar = None 36 | else: 37 | self.bar = click.progressbar(length=self.size, label=label) 38 | 39 | 40 | def multipart_upload( 41 | http: HttpClient, 42 | filename: str, 43 | repository: str, 44 | path: str, 45 | part_size: Optional[int] = None, 46 | ): 47 | size = os.stat(filename).st_size 48 | file = io.open(filename, "rb") 49 | try: 50 | sha256 = get_sha256(file, size=size, label="Calculating file SHA 256") 51 | file.seek(0) 52 | 53 | upload = Upload.from_dict( 54 | http.post( 55 | "/upload/start/", 56 | data={ 57 | "repository": repository, 58 | "path": path, 59 | "size": size, 60 | "sha256": sha256, 61 | "part_size": part_size, 62 | }, 63 | ) 64 | ) 65 | 66 | # If the dataset is already uploaded it will just be added to the 67 | # repository, no additional uploading will be done. 68 | if upload.state == Upload.State.exists: 69 | click.secho( 70 | f"Dataset already uploaded." 71 | f"\nAdded to repository: {repository}", 72 | fg="cyan", 73 | ) 74 | return 75 | 76 | while True: 77 | part = Part.from_dict( 78 | http.post( 79 | "/upload/part/reserve/", data={"upload_id": upload.id} 80 | ) 81 | ) 82 | if part is None: 83 | # No more parts to upload, we finished 84 | break 85 | offset = (part.no - 1) * part.size 86 | file.seek(offset) 87 | 88 | # buffer = io.BytesIO(file.read(part.size)) 89 | buffer = Buffer(file.read(part.size)) 90 | part.sha256 = get_sha256( 91 | buffer, 92 | size=part.size, 93 | label=f"Calculating SHA 256 for part #{part.no}", 94 | ) 95 | part = Part.from_dict( 96 | http.post("/upload/part/start/", data=part.to_dict()) 97 | ) 98 | buffer.reset(label=f"Uploading part #{part.no}") 99 | try: 100 | result = requests.put(part.presigned_url, data=buffer) 101 | part.etag = result.headers.get("ETag", "") 102 | part.state = Part.State.finished 103 | except Exception as e: 104 | logger.exception("Failed to upload: %s", e) 105 | part.state = Part.State.error 106 | http.post("/upload/part/end/", data=part.to_dict()) 107 | 108 | http.post("/upload/end/", data={"upload_id": upload.id}) 109 | click.secho("Upload successfully finished.", fg="cyan") 110 | finally: 111 | file.close() 112 | -------------------------------------------------------------------------------- /sotabenchapi/uploader/utils.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | from typing import Optional, Union 3 | from datetime import datetime, timezone 4 | 5 | import click 6 | 7 | from sotabenchapi.uploader.consts import MB 8 | 9 | 10 | def get_sha256(file, size, chunk_size=MB, label="") -> str: 11 | """Return a sha256 hexdigest of a file. 12 | 13 | Tested different chunk sizes, and I got the fastest calculation with the 14 | 1MB chunk size. 15 | 16 | Args: 17 | file: File like object. 18 | size (int): File size. 19 | chunk_size (int): Chunk size while reading the file. 20 | label (str): Progress bar label. 21 | """ 22 | sha = hashlib.sha256() 23 | 24 | with click.progressbar(length=size, label=label) as bar: 25 | while True: 26 | buf = file.read(chunk_size) 27 | if not buf: 28 | break 29 | sha.update(buf) 30 | bar.update(chunk_size) 31 | return sha.hexdigest() 32 | 33 | 34 | def utcnow() -> datetime: 35 | """Return tz aware UTC now.""" 36 | return datetime.utcnow().astimezone(timezone.utc) 37 | 38 | 39 | # Format used in serialization and deserialization from json. 40 | timestamp_format = "%Y.%m.%dT%H:%M:%S" 41 | 42 | 43 | def strftime(dt: Optional[datetime]): 44 | """Format datetime as string.""" 45 | if dt is None: 46 | return None 47 | return dt.strftime(timestamp_format) 48 | 49 | 50 | # Unsafe timestamp can either be None (when no timestamp is provided), 51 | # string (when we deserialized json) or datetime object. 52 | UnsafeTimestamp = Optional[Union[str, datetime]] 53 | 54 | # Safe timestamp is either None (when no timestamp is provided or a timezone 55 | # aware datetime 56 | SafeTimestamp = Optional[datetime] 57 | 58 | 59 | def safe_timestamp(dt: UnsafeTimestamp) -> SafeTimestamp: 60 | """Returns tz aware UTC SafeTimestamp from UnsafeTimestamp. 61 | 62 | It can receive either str, datetime or None. If it receives None or 63 | datetime it will return them, since both are valid in object serialization. 64 | 65 | None represents missing object and datetime is a valid datetime. 66 | 67 | If it receives a string it's probably a product of json deserialization 68 | and it should be parsed to a valid datetime object. 69 | 70 | Args: 71 | dt: None, string serialized datetime or datetime object. 72 | """ 73 | # If it's None return None 74 | if dt is None: 75 | return None 76 | 77 | # If it's a string, it's from deserialized json so strptime it. 78 | if isinstance(dt, str): 79 | dt = datetime.strptime(dt, timestamp_format) 80 | 81 | # Return timezone aware UTC datetime. 82 | return dt.astimezone(timezone.utc) 83 | -------------------------------------------------------------------------------- /sotabenchapi/version.py: -------------------------------------------------------------------------------- 1 | class Version: 2 | __slots__ = ("major", "minor", "build") 3 | 4 | def __init__(self, major, minor, build): 5 | self.major = major 6 | self.minor = minor 7 | self.build = build 8 | 9 | def __str__(self): 10 | return f"{self.major}.{self.minor}.{self.build}" 11 | 12 | def __repr__(self): 13 | return ( 14 | f"Version(major={self.major}, minor={self.minor}, " 15 | f"build={self.build})" 16 | ) 17 | 18 | 19 | version = Version(0, 0, 14) 20 | __version__ = str(version) 21 | --------------------------------------------------------------------------------