├── .gitignore ├── README.md ├── environment.yml ├── evals ├── nni │ ├── README.md │ ├── base_model.onnx │ ├── base_model_weight │ ├── env.yml │ ├── keeper_nasbench201_graph.py │ ├── keeper_offline.py │ └── parser.py └── ray_tune │ ├── __init_.py │ ├── keeper_offline.py │ ├── models │ ├── cifarmodels │ │ ├── __init__.py │ │ ├── densenet.py │ │ ├── dla.py │ │ ├── dla_simple.py │ │ ├── dpn.py │ │ ├── efficientnet.py │ │ ├── mobilenet.py │ │ ├── mobilenetv2.py │ │ ├── mobilenetv3.py │ │ ├── model_provider.py │ │ ├── pnasnet.py │ │ ├── preact_resnet.py │ │ ├── preactresnet.py │ │ ├── resnet.py │ │ ├── resnext.py │ │ ├── senet.py │ │ ├── shufflenet.py │ │ ├── shufflenetv2.py │ │ ├── stochasticdepth.py │ │ └── vgg.py │ ├── nasbench │ │ ├── __init__.py │ │ ├── cell_infers │ │ │ ├── __init__.py │ │ │ ├── cells.py │ │ │ ├── nasnet_cifar.py │ │ │ └── tiny_network.py │ │ ├── cell_operations.py │ │ ├── configure_utils.py │ │ └── genotypes.py │ ├── torchcv │ │ ├── __init__.py │ │ ├── model_provider.py │ │ └── models │ │ │ ├── __init__.py │ │ │ ├── airnet.py │ │ │ ├── airnext.py │ │ │ ├── alexnet.py │ │ │ ├── alphapose_coco.py │ │ │ ├── bagnet.py │ │ │ ├── bamresnet.py │ │ │ ├── bisenet.py │ │ │ ├── bninception.py │ │ │ ├── cbamresnet.py │ │ │ ├── centernet.py │ │ │ ├── cgnet.py │ │ │ ├── channelnet.py │ │ │ ├── common.py │ │ │ ├── condensenet.py │ │ │ ├── contextnet.py │ │ │ ├── dabnet.py │ │ │ ├── danet.py │ │ │ ├── darknet.py │ │ │ ├── darknet53.py │ │ │ ├── darts.py │ │ │ ├── deeplabv3.py │ │ │ ├── densenet.py │ │ │ ├── densenet_cifar.py │ │ │ ├── diapreresnet.py │ │ │ ├── diapreresnet_cifar.py │ │ │ ├── diaresnet.py │ │ │ ├── diaresnet_cifar.py │ │ │ ├── dicenet.py │ │ │ ├── diracnetv2.py │ │ │ ├── dla.py │ │ │ ├── dpn.py │ │ │ ├── drn.py │ │ │ ├── edanet.py │ │ │ ├── efficientnet.py │ │ │ ├── efficientnetedge.py │ │ │ ├── enet.py │ │ │ ├── erfnet.py │ │ │ ├── esnet.py │ │ │ ├── espcnet.py │ │ │ ├── espnetv2.py │ │ │ ├── fastscnn.py │ │ │ ├── fastseresnet.py │ │ │ ├── fbnet.py │ │ │ ├── fcn8sd.py │ │ │ ├── fdmobilenet.py │ │ │ ├── fishnet.py │ │ │ ├── fpenet.py │ │ │ ├── fractalnet_cifar.py │ │ │ ├── ghostnet.py │ │ │ ├── hardnet.py │ │ │ ├── hrnet.py │ │ │ ├── ibnbresnet.py │ │ │ ├── ibndensenet.py │ │ │ ├── ibnresnet.py │ │ │ ├── ibnresnext.py │ │ │ ├── ibppose_coco.py │ │ │ ├── icnet.py │ │ │ ├── igcv3.py │ │ │ ├── inceptionresnetv1.py │ │ │ ├── inceptionresnetv2.py │ │ │ ├── inceptionv3.py │ │ │ ├── inceptionv4.py │ │ │ ├── irevnet.py │ │ │ ├── isqrtcovresnet.py │ │ │ ├── jasper.py │ │ │ ├── jasperdr.py │ │ │ ├── lednet.py │ │ │ ├── lffd.py │ │ │ ├── linknet.py │ │ │ ├── lwopenpose_cmupan.py │ │ │ ├── menet.py │ │ │ ├── mixnet.py │ │ │ ├── mnasnet.py │ │ │ ├── mobilenet.py │ │ │ ├── mobilenet_cub.py │ │ │ ├── mobilenetb.py │ │ │ ├── mobilenetv2.py │ │ │ ├── mobilenetv3.py │ │ │ ├── model_store.py │ │ │ ├── msdnet.py │ │ │ ├── msdnet_cifar10.py │ │ │ ├── nasnet.py │ │ │ ├── nin_cifar.py │ │ │ ├── ntsnet_cub.py │ │ │ ├── nvpattexp.py │ │ │ ├── octresnet.py │ │ │ ├── others │ │ │ ├── __init__.py │ │ │ ├── _espnet.py │ │ │ ├── _inceptionresnetv1_.py │ │ │ ├── oth_espnet.py │ │ │ ├── oth_inception_resnet_v1.py │ │ │ ├── oth_quartznet.py │ │ │ └── oth_vit.py │ │ │ ├── peleenet.py │ │ │ ├── pfpcnet.py │ │ │ ├── pnasnet.py │ │ │ ├── polynet.py │ │ │ ├── preresnet.py │ │ │ ├── preresnet_cifar.py │ │ │ ├── prnet.py │ │ │ ├── proxylessnas.py │ │ │ ├── proxylessnas_cub.py │ │ │ ├── pspnet.py │ │ │ ├── pyramidnet.py │ │ │ ├── pyramidnet_cifar.py │ │ │ ├── quartznet.py │ │ │ ├── regnet.py │ │ │ ├── resattnet.py │ │ │ ├── resdropresnet_cifar.py │ │ │ ├── resnesta.py │ │ │ ├── resnet.py │ │ │ ├── resnet_cifar.py │ │ │ ├── resnet_cub.py │ │ │ ├── resneta.py │ │ │ ├── resnetd.py │ │ │ ├── resnext.py │ │ │ ├── resnext_cifar.py │ │ │ ├── revnet.py │ │ │ ├── rir_cifar.py │ │ │ ├── ror_cifar.py │ │ │ ├── scnet.py │ │ │ ├── segnet.py │ │ │ ├── selecsls.py │ │ │ ├── senet.py │ │ │ ├── sepreresnet.py │ │ │ ├── sepreresnet_cifar.py │ │ │ ├── seresnet.py │ │ │ ├── seresnet_cifar.py │ │ │ ├── seresnet_cub.py │ │ │ ├── seresnext.py │ │ │ ├── shakedropresnet_cifar.py │ │ │ ├── shakeshakeresnet_cifar.py │ │ │ ├── sharesnet.py │ │ │ ├── shufflenet.py │ │ │ ├── shufflenetv2.py │ │ │ ├── shufflenetv2b.py │ │ │ ├── simplepose_coco.py │ │ │ ├── simpleposemobile_coco.py │ │ │ ├── sinet.py │ │ │ ├── sknet.py │ │ │ ├── sparsenet.py │ │ │ ├── spnasnet.py │ │ │ ├── sqnet.py │ │ │ ├── squeezenet.py │ │ │ ├── squeezenext.py │ │ │ ├── superpointnet.py │ │ │ ├── tresnet.py │ │ │ ├── unet.py │ │ │ ├── vgg.py │ │ │ ├── visemenet.py │ │ │ ├── voca.py │ │ │ ├── vovnet.py │ │ │ ├── wrn.py │ │ │ ├── wrn1bit_cifar.py │ │ │ ├── wrn_cifar.py │ │ │ ├── xception.py │ │ │ ├── xdensenet.py │ │ │ ├── xdensenet_cifar.py │ │ │ └── zfnet.py │ └── vgg.py │ ├── onlinescheduler.py │ ├── ray_tuner.py │ ├── setup │ ├── cluster_manager.py │ └── conf.yml │ ├── thirdparty │ ├── __init__.py │ ├── calculate_ged.py │ ├── custom_rnn.py │ ├── data.py │ ├── embed_regularize.py │ ├── locked_dropout.py │ ├── main_one_model_train.py │ ├── model.py │ ├── multilinear.py │ ├── nas_environment.py │ ├── search_space.py │ ├── splitcross.py │ ├── train.py │ ├── utils.py │ └── weight_drop.py │ └── workloads │ ├── imgclsmob-large.csv │ ├── nlp_list.csv │ ├── nlp_nwp.csv │ ├── torchcv_list │ ├── torchcv_list.csv │ ├── workload-1.csv │ ├── workload-2.csv │ ├── workload-3.csv │ ├── workload-4.csv │ ├── workload-5.csv │ ├── workload-6.csv │ ├── workload-7.csv │ └── workload-8.csv ├── examples ├── __init__.py ├── train_cifar10.py └── train_mnist.py ├── install.sh ├── modelkeeper ├── __init__.py ├── aed_matcher.py ├── analyze_zoo.py ├── backend │ ├── Make │ ├── bin │ │ └── matcher.so │ ├── json.hpp │ ├── main.cpp │ ├── matcher.cpp │ ├── matcher.hpp │ ├── test.cpp │ └── test.py ├── clientservice.py ├── clustering.py ├── config.py ├── dryrun_clustering.py ├── evictor.py ├── generate_zoo.py ├── gml_export.py ├── keeper_start.py ├── mapper.py ├── matcher.py ├── nettransformer.py └── test.py ├── setup.py └── tests ├── test_map.py └── tests.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | plot/ 4 | *.py[cod] 5 | *$py.class 6 | *.swp 7 | # C extensions 8 | *.png 9 | data/ 10 | /build/ 11 | /dist/ 12 | /modelkeeper_backend.chu_liu_edmonds.egg-info/ 13 | 14 | # Distribution / packaging 15 | .Python 16 | __pycache__/ 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | .hypothesis/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | .static_storage/ 62 | .media/ 63 | local_settings.py 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # pyenv 82 | .python-version 83 | 84 | # celery beat schedule file 85 | celerybeat-schedule 86 | 87 | # SageMath parsed files 88 | *.sage.py 89 | 90 | # Environments 91 | .env 92 | .venv 93 | env/ 94 | venv/ 95 | ENV/ 96 | env.bak/ 97 | venv.bak/ 98 | 99 | # Spyder project settings 100 | .spyderproject 101 | .spyproject 102 | 103 | # Rope project settings 104 | .ropeproject 105 | 106 | # mkdocs documentation 107 | /site 108 | 109 | # mypy 110 | .mypy_cache/ 111 | 112 | #log 113 | *.e 114 | *.o 115 | _generated_model/ 116 | lightning_logs/ 117 | .DS_Store -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ModelKeeper 2 | 3 | This repository contains the evaluation artifacts of our NSDI '23 paper "[ModelKeeper: Accelerating DNN Training via Automated Training Warmup](https://symbioticlab.org/publications/files/modelkeeper:nsdi23/modelkeeper-nsdi23.pdf)". 4 | 5 | **ModelKeeper is being merged as part of [FedScale](https://github.com/SymbioticLab/FedScale) and is actively maintained there. Please try it!** 6 | 7 | # Overview 8 | 9 | * [Getting Started](#getting-started) 10 | * [Run Experiments](#run-experiments) 11 | * [Repo Structure](#repo-structure) 12 | * [Contact](#contact) 13 | 14 | # Getting Started 15 | 16 | Our ```install.sh``` will install the following automatically: 17 | 18 | * Anaconda Package Manager 19 | * CUDA 10.2 20 | 21 | Note: if you prefer different versions of conda and CUDA, please check comments in `install.sh` for details. 22 | 23 | Run the following commands to install ModelKeeper. 24 | 25 | ``` 26 | source install.sh 27 | pip install -e . 28 | ``` 29 | 30 | # Run Experiments 31 | 32 | # Repo Structure 33 | 34 | ``` 35 | Repo Root 36 | |---- modelkeeper # Core implementation (e.g., Matcher). 37 | |---- evals # MK support for different training backends 38 | |---- ray_tune # Ray experiments 39 | |---- nni # Retiarii experiments 40 | |---- examples # Toy experiments of model transformation 41 | ``` 42 | 43 | # Notes 44 | please consider to cite our paper if you use the code or data in your research project. 45 | ```bibtex 46 | @inproceedings{modelkeeper-nsdi23, 47 | title={ModelKeeper: Accelerating DNN Training via Automated Training Warmup}, 48 | author={Fan Lai and Yinwei Dai and Harsha V. Madhyastha and Mosharaf Chowdhury}, 49 | booktitle={USENIX Symposium on Networked Systems Design and Implementation (NSDI)}, 50 | year={2023} 51 | } 52 | ``` 53 | 54 | # Contact 55 | Fan Lai (fanlai@umich.edu) and Yinwei Dai (yinweid@princeton.edu). 56 | 57 | 58 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: modelkeeper 2 | channels: 3 | - pytorch 4 | - conda-forge 5 | - anaconda 6 | - defaults 7 | dependencies: 8 | - python=3.6 9 | - numba=0.49.1 10 | - pip 11 | - pip: 12 | - torchvision 13 | - transformers 14 | - matplotlib==3.1.3 15 | - tensorboardX==2.1 16 | - scipy 17 | - PyYAML 18 | - gdown 19 | - ray==1.8.0 20 | - networkx 21 | - onnx 22 | - paramiko 23 | - scp 24 | - pandas 25 | - ray[tune] 26 | - sentencepiece 27 | - datasets 28 | - ortools 29 | - torch 30 | -------------------------------------------------------------------------------- /evals/nni/README.md: -------------------------------------------------------------------------------- 1 | ## Install the dependency 2 | 3 | You can simply run `install.sh`. 4 | 5 | ``` 6 | conda env create -f environment. yml 7 | ``` 8 | 9 | ## Setting Up GPU Cluster 10 | 11 | **Note:** 12 | Please assure that these paths are consistent across all nodes so that ModelKeeper can find the right path. 13 | 14 | - ***Coordinator node***: Make sure that the coodinator (master node) has access to other worker nodes via ```ssh```. 15 | 16 | - ***All nodes***: Follow the same dependency setup. 17 | 18 | ## Running the experiment 19 | ``` 20 | python keeper_nasbench201_graph.py --use_keeper --user=xxx --num_nodes=xxx --num_gpu_per_nodes=xxx --max_trial_number_per_gpu=xxx --max_trial_number=xxx 21 | ``` -------------------------------------------------------------------------------- /evals/nni/base_model.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SymbioticLab/ModelKeeper/9212bc79bfc4a271e6120c410bb9fb89cb151486/evals/nni/base_model.onnx -------------------------------------------------------------------------------- /evals/nni/base_model_weight: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SymbioticLab/ModelKeeper/9212bc79bfc4a271e6120c410bb9fb89cb151486/evals/nni/base_model_weight -------------------------------------------------------------------------------- /evals/nni/env.yml: -------------------------------------------------------------------------------- 1 | name: nni-mk 2 | channels: 3 | - gurobi 4 | - pytorch 5 | - conda-forge 6 | - defaults 7 | dependencies: 8 | - _libgcc_mutex=0.1 9 | - _openmp_mutex=4.5 10 | - audioread=2.1.9 11 | - bzip2=1.0.8 12 | - ca-certificates=2021.10.8 13 | - certifi=2021.5.30 14 | - cffi=1.14.6 15 | - cycler=0.10.0 16 | - ffmpeg=4.3 17 | - freetype=2.10.4 18 | - gettext=0.19.8.1 19 | - gmp=6.2.1 20 | - gnutls=3.6.13 21 | - gurobi=9.1.0 22 | - jbig=2.1 23 | - joblib=1.1.0 24 | - jpeg=9d 25 | - kiwisolver=1.3.1 26 | - lame=3.100 27 | - lcms2=2.12 28 | - ld_impl_linux-64=2.36.1 29 | - lerc=3.0 30 | - libblas=3.9.0 31 | - libcblas=3.9.0 32 | - libdeflate=1.8 33 | - libffi=3.4.2 34 | - libflac=1.3.3 35 | - libgcc-ng=11.2.0 36 | - libgfortran-ng=11.2.0 37 | - libgfortran5=11.2.0 38 | - libgomp=11.2.0 39 | - libiconv=1.16 40 | - liblapack=3.9.0 41 | - libllvm8=8.0.1 42 | - libogg=1.3.4 43 | - libopenblas=0.3.17 44 | - libopus=1.3.1 45 | - libpng=1.6.37 46 | - librosa=0.7.2 47 | - libsndfile=1.0.31 48 | - libstdcxx-ng=11.2.0 49 | - libtiff=4.3.0 50 | - libvorbis=1.3.7 51 | - libwebp-base=1.2.1 52 | - libzlib=1.2.11 53 | - llvmlite=0.32.1 54 | - lz4-c=1.9.3 55 | - mad=0.15.1b 56 | - ncurses=6.2 57 | - nettle=3.6 58 | - numba=0.49.1 59 | - numpy=1.17.5 60 | - olefile=0.46 61 | - openh264=2.1.1 62 | - openjpeg=2.4.0 63 | - openssl=1.1.1l 64 | - pillow=8.3.2 65 | - pip=20.0.2 66 | - pycparser=2.20 67 | - pyparsing=2.4.7 68 | - pysoundfile=0.10.3.post1 69 | - python=3.6.13 70 | - python-dateutil=2.8.2 71 | - python_abi=3.6 72 | - readline=8.1 73 | - resampy=0.2.2 74 | - scikit-learn=0.24.2 75 | - setuptools=58.0.4 76 | - sqlite=3.36.0 77 | - threadpoolctl=3.0.0 78 | - tk=8.6.11 79 | - tornado=6.1 80 | - wheel=0.37.0 81 | - xz=5.2.5 82 | - zlib=1.2.11 83 | - zstd=1.5.0 84 | - pip: 85 | - absl-py==0.10.0 86 | - aiohttp==3.7.4.post0 87 | - astor==0.8.1 88 | - astunparse==1.6.3 89 | - async-timeout==3.0.1 90 | - attrs==20.3.0 91 | - bcrypt==4.0.0 92 | - cached-property==1.5.2 93 | - cachetools==4.2.4 94 | - chardet==4.0.0 95 | - charset-normalizer==2.0.7 96 | - click==8.0.3 97 | - colorama==0.4.4 98 | - contextlib2==21.6.0 99 | - cryptography==38.0.1 100 | - dataclasses==0.8 101 | - decorator==4.4.2 102 | - dill==0.3.4 103 | - drill==1.2.0 104 | - filelock==3.3.0 105 | - fsspec==2021.10.1 106 | - future==0.18.2 107 | - gast==0.3.3 108 | - gdown==4.0.2 109 | - google-auth==1.35.0 110 | - google-auth-oauthlib==0.4.6 111 | - google-pasta==0.2.0 112 | - grpcio==1.41.0 113 | - h5py==3.1.0 114 | - huggingface-hub==0.0.19 115 | - hyperopt==0.1.2 116 | - idna==3.3 117 | - idna-ssl==1.1.0 118 | - importlib-metadata==4.8.1 119 | - json-tricks==3.15.5 120 | - keras-preprocessing==1.1.2 121 | - keras-tuner==1.0.2 122 | - markdown==3.3.4 123 | - matplotlib==3.1.3 124 | - ml-metadata==0.26.0 125 | - mock==4.0.3 126 | - multidict==5.2.0 127 | - networkx==2.5.1 128 | - nni==2.4 129 | - oauthlib==3.1.1 130 | - onnx==1.4.1 131 | - opt-einsum==3.3.0 132 | - ortools==9.4.1874 133 | - packaging==21.0 134 | - pandas==1.1.0 135 | - paramiko==2.11.0 136 | - prettytable==2.2.1 137 | - protobuf==3.18.1 138 | - psutil==5.8.0 139 | - pyasn1==0.4.8 140 | - pyasn1-modules==0.2.8 141 | - pybind11==2.8.0 142 | - pydeprecate==0.3.1 143 | - pyemd==0.5.1 144 | - pymongo==3.12.0 145 | - pynacl==1.5.0 146 | - python-levenshtein==0.12.0 147 | - pythonwebhdfs==0.2.3 148 | - pytorch-lightning==1.4.9 149 | - pytz==2021.3 150 | - pyyaml==6.0 151 | - regex==2021.10.8 152 | - requests==2.26.0 153 | - requests-oauthlib==1.3.0 154 | - responses==0.14.0 155 | - rsa==4.7.2 156 | - sacremoses==0.0.46 157 | - schema==0.7.4 158 | - scipy==1.4.1 159 | - scp==0.14.4 160 | - simplejson==3.17.5 161 | - six==1.15.0 162 | - sklearn==0.0 163 | - sox==1.3.7 164 | - tabulate==0.8.9 165 | - tensorboard==2.2.2 166 | - tensorboard-plugin-wit==1.8.0 167 | - tensorboardx==2.1 168 | - tensorflow==2.2.0 169 | - tensorflow-estimator==2.2.0 170 | - termcolor==1.1.0 171 | - terminaltables==3.1.0 172 | - tf-slim==1.1.0 173 | - timm==0.4.12 174 | - tokenizers==0.10.3 175 | - torch==1.9.1 176 | - torch-baidu-ctc==0.3.0 177 | - torchmetrics==0.5.1 178 | - torchvision==0.10.1 179 | - tqdm==4.62.3 180 | - transformers==4.11.3 181 | - typing==3.7.4.3 182 | - typing-extensions==3.10.0.2 183 | - urllib3==1.26.7 184 | - wcwidth==0.2.5 185 | - websockets==9.1 186 | - werkzeug==2.0.2 187 | - wrapt==1.13.2 188 | - yarl==1.7.0 189 | - zipp==3.6.0 -------------------------------------------------------------------------------- /evals/nni/keeper_offline.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Offline register of model keeper client service 3 | ''' 4 | 5 | from modelkeeper.config import modelkeeper_config 6 | from modelkeeper.clientservice import ModelKeeperClient 7 | 8 | import argparse 9 | import logging 10 | import pickle 11 | import torch 12 | import time 13 | import os 14 | 15 | log_path = './modelkeeper_log' 16 | logging.basicConfig(format='%(asctime)s,%(msecs)d %(levelname)s %(message)s', 17 | datefmt='%H:%M:%S', 18 | level=logging.INFO, 19 | handlers=[ 20 | logging.FileHandler(log_path, mode='a'), 21 | logging.StreamHandler() 22 | ]) 23 | 24 | parser = argparse.ArgumentParser(description="ModelKeeper offline client APIs") 25 | parser.add_argument('--task', type=str, default='cv') 26 | parser.add_argument('--model_file', type=str, default=None) 27 | parser.add_argument('--export_path', type=str, default=None) 28 | parser.add_argument('--accuracy', type=float, default=-1) 29 | 30 | def register_model(model_file, export_path, accuracy): 31 | # with open(model_file, 'rb') as fin: 32 | # model = pickle.load(fin) 33 | # dummpy_input = pickle.load(fin) 34 | 35 | #os.remove(model_file) 36 | # torch.onnx.export(model, dummpy_input, export_path, export_params=True, verbose=0, training=1) 37 | 38 | # register model to the zoo 39 | modelkeeper_client = ModelKeeperClient(modelkeeper_config) 40 | modelkeeper_client.register_model_to_zoo(export_path, accuracy=accuracy) 41 | modelkeeper_client.stop() 42 | os.remove(export_path) 43 | 44 | 45 | args, unknown = parser.parse_known_args() 46 | 47 | logging.info(f"Start to upload {args.model_file}") 48 | register_model(args.model_file, args.model_file, args.accuracy) 49 | logging.info(f"Successfully upload model {args.model_file} to the zoo") 50 | -------------------------------------------------------------------------------- /evals/nni/parser.py: -------------------------------------------------------------------------------- 1 | import json 2 | from collections import defaultdict 3 | 4 | json_file_path = "./experiment_random.json" 5 | 6 | choices = ["layerchoice_cell__0_1_", "layerchoice_cell__0_2_", "layerchoice_cell__1_2_", "layerchoice_cell__0_3_", "layerchoice_cell__1_3_", "layerchoice_cell__2_3_"] 7 | 8 | trial_count = defaultdict(int) 9 | trial_idx = [] 10 | 11 | with open(json_file_path, 'r') as j: 12 | contents = json.loads(j.read()) 13 | trials = contents.get("trialMessage") 14 | count = 0 15 | for i, trial in enumerate(trials): 16 | if trial["status"] == "SUCCEEDED": 17 | hp = json.loads(trial["hyperParameters"][0]) 18 | if trial_count[hp["parameter_id"]] != 0: 19 | continue 20 | else: 21 | trial_count[hp["parameter_id"]] += 1 22 | script = hp["parameters"]["model_script"] 23 | config = [] 24 | for choice in choices: 25 | result = script.find(choice) 26 | config.append(script[result: result+len(choice)+18].split()[0].split("_", 5)[-1]) 27 | config = '-'.join(config) 28 | print(hp["parameter_id"], config) 29 | # print(trial["finalMetricData"]) 30 | count += 1 31 | # if count > 200: 32 | # break 33 | print(len(trial_count)) 34 | print(count) 35 | -------------------------------------------------------------------------------- /evals/ray_tune/__init_.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SymbioticLab/ModelKeeper/9212bc79bfc4a271e6120c410bb9fb89cb151486/evals/ray_tune/__init_.py -------------------------------------------------------------------------------- /evals/ray_tune/keeper_offline.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Offline register of model keeper client service 3 | ''' 4 | 5 | import argparse 6 | import logging 7 | import os 8 | import pickle 9 | import time 10 | 11 | import torch 12 | 13 | from modelkeeper.clientservice import ModelKeeperClient 14 | from modelkeeper.config import modelkeeper_config 15 | 16 | log_path = './modelkeeper_log' 17 | logging.basicConfig(format='%(asctime)s,%(msecs)d %(levelname)s %(message)s', 18 | datefmt='%H:%M:%S', 19 | level=logging.INFO, 20 | handlers=[ 21 | logging.FileHandler(log_path, mode='a'), 22 | logging.StreamHandler() 23 | ]) 24 | 25 | parser = argparse.ArgumentParser(description="ModelKeeper offline client APIs") 26 | parser.add_argument('--task', type=str, default='cv') 27 | parser.add_argument('--model_file', type=str, default=None) 28 | parser.add_argument('--export_path', type=str, default=None) 29 | parser.add_argument('--accuracy', type=float, default=-1) 30 | 31 | def register_model(model_file, export_path, accuracy): 32 | with open(model_file, 'rb') as fin: 33 | model = pickle.load(fin) 34 | dummpy_input = pickle.load(fin) 35 | 36 | #os.remove(model_file) 37 | torch.onnx.export(model, dummpy_input, export_path, export_params=True, verbose=0, training=1, do_constant_folding=False) 38 | 39 | # register model to the zoo 40 | modelkeeper_client = ModelKeeperClient(modelkeeper_config) 41 | modelkeeper_client.register_model_to_zoo(export_path, accuracy=accuracy) 42 | modelkeeper_client.stop() 43 | os.remove(export_path) 44 | 45 | 46 | args, unknown = parser.parse_known_args() 47 | 48 | logging.info(f"Start to upload {args.model_file}") 49 | register_model(args.model_file, args.model_file, args.accuracy) 50 | logging.info(f"Successfully upload model {args.model_file} to the zoo") 51 | -------------------------------------------------------------------------------- /evals/ray_tune/models/cifarmodels/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /evals/ray_tune/models/cifarmodels/densenet.py: -------------------------------------------------------------------------------- 1 | '''DenseNet in PyTorch.''' 2 | import math 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | 9 | class Bottleneck(nn.Module): 10 | def __init__(self, in_planes, growth_rate): 11 | super(Bottleneck, self).__init__() 12 | self.bn1 = nn.BatchNorm2d(in_planes) 13 | self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False) 14 | self.bn2 = nn.BatchNorm2d(4*growth_rate) 15 | self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False) 16 | 17 | def forward(self, x): 18 | out = self.conv1(F.relu(self.bn1(x))) 19 | out = self.conv2(F.relu(self.bn2(out))) 20 | out = torch.cat([out,x], 1) 21 | return out 22 | 23 | 24 | class Transition(nn.Module): 25 | def __init__(self, in_planes, out_planes): 26 | super(Transition, self).__init__() 27 | self.bn = nn.BatchNorm2d(in_planes) 28 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False) 29 | 30 | def forward(self, x): 31 | out = self.conv(F.relu(self.bn(x))) 32 | out = F.avg_pool2d(out, 2) 33 | return out 34 | 35 | 36 | class DenseNet(nn.Module): 37 | def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10): 38 | super(DenseNet, self).__init__() 39 | self.growth_rate = growth_rate 40 | 41 | num_planes = 2*growth_rate 42 | self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False) 43 | 44 | self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0]) 45 | num_planes += nblocks[0]*growth_rate 46 | out_planes = int(math.floor(num_planes*reduction)) 47 | self.trans1 = Transition(num_planes, out_planes) 48 | num_planes = out_planes 49 | 50 | self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1]) 51 | num_planes += nblocks[1]*growth_rate 52 | out_planes = int(math.floor(num_planes*reduction)) 53 | self.trans2 = Transition(num_planes, out_planes) 54 | num_planes = out_planes 55 | 56 | self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2]) 57 | num_planes += nblocks[2]*growth_rate 58 | out_planes = int(math.floor(num_planes*reduction)) 59 | self.trans3 = Transition(num_planes, out_planes) 60 | num_planes = out_planes 61 | 62 | self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3]) 63 | num_planes += nblocks[3]*growth_rate 64 | 65 | self.bn = nn.BatchNorm2d(num_planes) 66 | self.linear = nn.Linear(num_planes, num_classes) 67 | 68 | def _make_dense_layers(self, block, in_planes, nblock): 69 | layers = [] 70 | for i in range(nblock): 71 | layers.append(block(in_planes, self.growth_rate)) 72 | in_planes += self.growth_rate 73 | return nn.Sequential(*layers) 74 | 75 | def forward(self, x): 76 | out = self.conv1(x) 77 | out = self.trans1(self.dense1(out)) 78 | out = self.trans2(self.dense2(out)) 79 | out = self.trans3(self.dense3(out)) 80 | out = self.dense4(out) 81 | out = F.avg_pool2d(F.relu(self.bn(out)), 4) 82 | out = out.view(out.size(0), -1) 83 | out = self.linear(out) 84 | return out 85 | 86 | def DenseNet121(num_classes=10): 87 | return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32, num_classes=num_classes) 88 | 89 | def DenseNet169(num_classes=10): 90 | return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32, num_classes=num_classes) 91 | 92 | def DenseNet201(num_classes=10): 93 | return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32, num_classes=num_classes) 94 | 95 | def DenseNet161(num_classes=10): 96 | return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48, num_classes=num_classes) 97 | 98 | def densenet_cifar(): 99 | return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12) 100 | 101 | def test(): 102 | net = densenet_cifar() 103 | x = torch.randn(1,3,32,32) 104 | y = net(x) 105 | print(y) 106 | 107 | # test() 108 | -------------------------------------------------------------------------------- /evals/ray_tune/models/cifarmodels/dla.py: -------------------------------------------------------------------------------- 1 | '''DLA in PyTorch. 2 | 3 | Reference: 4 | Deep Layer Aggregation. https://arxiv.org/abs/1707.06484 5 | ''' 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | 11 | class BasicBlock(nn.Module): 12 | expansion = 1 13 | 14 | def __init__(self, in_planes, planes, stride=1): 15 | super(BasicBlock, self).__init__() 16 | self.conv1 = nn.Conv2d( 17 | in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 18 | self.bn1 = nn.BatchNorm2d(planes) 19 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, 20 | stride=1, padding=1, bias=False) 21 | self.bn2 = nn.BatchNorm2d(planes) 22 | 23 | self.shortcut = nn.Sequential() 24 | if stride != 1 or in_planes != self.expansion*planes: 25 | self.shortcut = nn.Sequential( 26 | nn.Conv2d(in_planes, self.expansion*planes, 27 | kernel_size=1, stride=stride, bias=False), 28 | nn.BatchNorm2d(self.expansion*planes) 29 | ) 30 | 31 | def forward(self, x): 32 | out = F.relu(self.bn1(self.conv1(x))) 33 | out = self.bn2(self.conv2(out)) 34 | out += self.shortcut(x) 35 | out = F.relu(out) 36 | return out 37 | 38 | 39 | class Root(nn.Module): 40 | def __init__(self, in_channels, out_channels, kernel_size=1): 41 | super(Root, self).__init__() 42 | self.conv = nn.Conv2d( 43 | in_channels, out_channels, kernel_size, 44 | stride=1, padding=(kernel_size - 1) // 2, bias=False) 45 | self.bn = nn.BatchNorm2d(out_channels) 46 | 47 | def forward(self, xs): 48 | x = torch.cat(xs, 1) 49 | out = F.relu(self.bn(self.conv(x))) 50 | return out 51 | 52 | 53 | class Tree(nn.Module): 54 | def __init__(self, block, in_channels, out_channels, level=1, stride=1): 55 | super(Tree, self).__init__() 56 | self.level = level 57 | if level == 1: 58 | self.root = Root(2*out_channels, out_channels) 59 | self.left_node = block(in_channels, out_channels, stride=stride) 60 | self.right_node = block(out_channels, out_channels, stride=1) 61 | else: 62 | self.root = Root((level+2)*out_channels, out_channels) 63 | for i in reversed(range(1, level)): 64 | subtree = Tree(block, in_channels, out_channels, 65 | level=i, stride=stride) 66 | self.__setattr__('level_%d' % i, subtree) 67 | self.prev_root = block(in_channels, out_channels, stride=stride) 68 | self.left_node = block(out_channels, out_channels, stride=1) 69 | self.right_node = block(out_channels, out_channels, stride=1) 70 | 71 | def forward(self, x): 72 | xs = [self.prev_root(x)] if self.level > 1 else [] 73 | for i in reversed(range(1, self.level)): 74 | level_i = self.__getattr__('level_%d' % i) 75 | x = level_i(x) 76 | xs.append(x) 77 | x = self.left_node(x) 78 | xs.append(x) 79 | x = self.right_node(x) 80 | xs.append(x) 81 | out = self.root(xs) 82 | return out 83 | 84 | 85 | class DLA(nn.Module): 86 | def __init__(self, block=BasicBlock, num_classes=10): 87 | super(DLA, self).__init__() 88 | self.base = nn.Sequential( 89 | nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False), 90 | nn.BatchNorm2d(16), 91 | nn.ReLU(True) 92 | ) 93 | 94 | self.layer1 = nn.Sequential( 95 | nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False), 96 | nn.BatchNorm2d(16), 97 | nn.ReLU(True) 98 | ) 99 | 100 | self.layer2 = nn.Sequential( 101 | nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False), 102 | nn.BatchNorm2d(32), 103 | nn.ReLU(True) 104 | ) 105 | 106 | self.layer3 = Tree(block, 32, 64, level=1, stride=1) 107 | self.layer4 = Tree(block, 64, 128, level=2, stride=2) 108 | self.layer5 = Tree(block, 128, 256, level=2, stride=2) 109 | self.layer6 = Tree(block, 256, 512, level=1, stride=2) 110 | self.linear = nn.Linear(512, num_classes) 111 | 112 | def forward(self, x): 113 | out = self.base(x) 114 | out = self.layer1(out) 115 | out = self.layer2(out) 116 | out = self.layer3(out) 117 | out = self.layer4(out) 118 | out = self.layer5(out) 119 | out = self.layer6(out) 120 | out = F.avg_pool2d(out, 4) 121 | out = out.view(out.size(0), -1) 122 | out = self.linear(out) 123 | return out 124 | 125 | 126 | def test(): 127 | net = DLA() 128 | print(net) 129 | x = torch.randn(1, 3, 32, 32) 130 | y = net(x) 131 | print(y.size()) 132 | 133 | 134 | if __name__ == '__main__': 135 | test() 136 | -------------------------------------------------------------------------------- /evals/ray_tune/models/cifarmodels/dla_simple.py: -------------------------------------------------------------------------------- 1 | '''Simplified version of DLA in PyTorch. 2 | 3 | Note this implementation is not identical to the original paper version. 4 | But it seems works fine. 5 | 6 | See dla.py for the original paper version. 7 | 8 | Reference: 9 | Deep Layer Aggregation. https://arxiv.org/abs/1707.06484 10 | ''' 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | 15 | 16 | class BasicBlock(nn.Module): 17 | expansion = 1 18 | 19 | def __init__(self, in_planes, planes, stride=1): 20 | super(BasicBlock, self).__init__() 21 | self.conv1 = nn.Conv2d( 22 | in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 23 | self.bn1 = nn.BatchNorm2d(planes) 24 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, 25 | stride=1, padding=1, bias=False) 26 | self.bn2 = nn.BatchNorm2d(planes) 27 | 28 | self.shortcut = nn.Sequential() 29 | if stride != 1 or in_planes != self.expansion*planes: 30 | self.shortcut = nn.Sequential( 31 | nn.Conv2d(in_planes, self.expansion*planes, 32 | kernel_size=1, stride=stride, bias=False), 33 | nn.BatchNorm2d(self.expansion*planes) 34 | ) 35 | 36 | def forward(self, x): 37 | out = F.relu(self.bn1(self.conv1(x))) 38 | out = self.bn2(self.conv2(out)) 39 | out += self.shortcut(x) 40 | out = F.relu(out) 41 | return out 42 | 43 | 44 | class Root(nn.Module): 45 | def __init__(self, in_channels, out_channels, kernel_size=1): 46 | super(Root, self).__init__() 47 | self.conv = nn.Conv2d( 48 | in_channels, out_channels, kernel_size, 49 | stride=1, padding=(kernel_size - 1) // 2, bias=False) 50 | self.bn = nn.BatchNorm2d(out_channels) 51 | 52 | def forward(self, xs): 53 | x = torch.cat(xs, 1) 54 | out = F.relu(self.bn(self.conv(x))) 55 | return out 56 | 57 | 58 | class Tree(nn.Module): 59 | def __init__(self, block, in_channels, out_channels, level=1, stride=1): 60 | super(Tree, self).__init__() 61 | self.root = Root(2*out_channels, out_channels) 62 | if level == 1: 63 | self.left_tree = block(in_channels, out_channels, stride=stride) 64 | self.right_tree = block(out_channels, out_channels, stride=1) 65 | else: 66 | self.left_tree = Tree(block, in_channels, 67 | out_channels, level=level-1, stride=stride) 68 | self.right_tree = Tree(block, out_channels, 69 | out_channels, level=level-1, stride=1) 70 | 71 | def forward(self, x): 72 | out1 = self.left_tree(x) 73 | out2 = self.right_tree(out1) 74 | out = self.root([out1, out2]) 75 | return out 76 | 77 | 78 | class SimpleDLA(nn.Module): 79 | def __init__(self, block=BasicBlock, num_classes=10): 80 | super(SimpleDLA, self).__init__() 81 | self.base = nn.Sequential( 82 | nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False), 83 | nn.BatchNorm2d(16), 84 | nn.ReLU(True) 85 | ) 86 | 87 | self.layer1 = nn.Sequential( 88 | nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False), 89 | nn.BatchNorm2d(16), 90 | nn.ReLU(True) 91 | ) 92 | 93 | self.layer2 = nn.Sequential( 94 | nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False), 95 | nn.BatchNorm2d(32), 96 | nn.ReLU(True) 97 | ) 98 | 99 | self.layer3 = Tree(block, 32, 64, level=1, stride=1) 100 | self.layer4 = Tree(block, 64, 128, level=2, stride=2) 101 | self.layer5 = Tree(block, 128, 256, level=2, stride=2) 102 | self.layer6 = Tree(block, 256, 512, level=1, stride=2) 103 | self.linear = nn.Linear(512, num_classes) 104 | 105 | def forward(self, x): 106 | out = self.base(x) 107 | out = self.layer1(out) 108 | out = self.layer2(out) 109 | out = self.layer3(out) 110 | out = self.layer4(out) 111 | out = self.layer5(out) 112 | out = self.layer6(out) 113 | out = F.avg_pool2d(out, 4) 114 | out = out.view(out.size(0), -1) 115 | out = self.linear(out) 116 | return out 117 | 118 | 119 | def test(): 120 | net = SimpleDLA() 121 | print(net) 122 | x = torch.randn(1, 3, 32, 32) 123 | y = net(x) 124 | print(y.size()) 125 | 126 | 127 | if __name__ == '__main__': 128 | test() 129 | -------------------------------------------------------------------------------- /evals/ray_tune/models/cifarmodels/dpn.py: -------------------------------------------------------------------------------- 1 | '''Dual Path Networks in PyTorch.''' 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | 7 | class Bottleneck(nn.Module): 8 | def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer): 9 | super(Bottleneck, self).__init__() 10 | self.out_planes = out_planes 11 | self.dense_depth = dense_depth 12 | 13 | self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False) 14 | self.bn1 = nn.BatchNorm2d(in_planes) 15 | self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False) 16 | self.bn2 = nn.BatchNorm2d(in_planes) 17 | self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False) 18 | self.bn3 = nn.BatchNorm2d(out_planes+dense_depth) 19 | 20 | self.shortcut = nn.Sequential() 21 | if first_layer: 22 | self.shortcut = nn.Sequential( 23 | nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False), 24 | nn.BatchNorm2d(out_planes+dense_depth) 25 | ) 26 | 27 | def forward(self, x): 28 | out = F.relu(self.bn1(self.conv1(x))) 29 | out = F.relu(self.bn2(self.conv2(out))) 30 | out = self.bn3(self.conv3(out)) 31 | x = self.shortcut(x) 32 | d = self.out_planes 33 | out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1) 34 | out = F.relu(out) 35 | return out 36 | 37 | 38 | class DPN(nn.Module): 39 | def __init__(self, cfg, num_classes=10): 40 | super(DPN, self).__init__() 41 | in_planes, out_planes = cfg['in_planes'], cfg['out_planes'] 42 | num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth'] 43 | 44 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) 45 | self.bn1 = nn.BatchNorm2d(64) 46 | self.last_planes = 64 47 | self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1) 48 | self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2) 49 | self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2) 50 | self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2) 51 | self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], num_classes) 52 | 53 | def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride): 54 | strides = [stride] + [1]*(num_blocks-1) 55 | layers = [] 56 | for i,stride in enumerate(strides): 57 | layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0)) 58 | self.last_planes = out_planes + (i+2) * dense_depth 59 | return nn.Sequential(*layers) 60 | 61 | def forward(self, x): 62 | out = F.relu(self.bn1(self.conv1(x))) 63 | out = self.layer1(out) 64 | out = self.layer2(out) 65 | out = self.layer3(out) 66 | out = self.layer4(out) 67 | out = F.avg_pool2d(out, 4) 68 | out = out.view(out.size(0), -1) 69 | out = self.linear(out) 70 | return out 71 | 72 | 73 | def DPN26(num_classes=10): 74 | cfg = { 75 | 'in_planes': (96,192,384,768), 76 | 'out_planes': (256,512,1024,2048), 77 | 'num_blocks': (2,2,2,2), 78 | 'dense_depth': (16,32,24,128) 79 | } 80 | return DPN(cfg, num_classes=num_classes) 81 | 82 | def DPN68(num_classes=10): 83 | cfg = { 84 | 'in_planes': (96,192,384,768), 85 | 'out_planes': (256,512,1024,2048), 86 | 'num_blocks': (2,2,12,3), 87 | 'dense_depth': (16,32,32,64) 88 | } 89 | return DPN(cfg, num_classes=num_classes) 90 | 91 | 92 | def DPN92(num_classes=10): 93 | cfg = { 94 | 'in_planes': (96,192,384,768), 95 | 'out_planes': (256,512,1024,2048), 96 | 'num_blocks': (3,4,20,3), 97 | 'dense_depth': (16,32,24,128) 98 | } 99 | return DPN(cfg, num_classes=num_classes) 100 | 101 | 102 | def DPN98(num_classes=10): 103 | cfg = { 104 | 'in_planes': (96,192,384,768), 105 | 'out_planes': (256,512,1024,2048), 106 | 'num_blocks': (3,6,20,3), 107 | 'dense_depth': (16,32,32,128) 108 | } 109 | return DPN(cfg, num_classes=num_classes) 110 | 111 | def DPN107(num_classes=10): 112 | cfg = { 113 | 'in_planes': (96,192,384,768), 114 | 'out_planes': (256,512,1024,2048), 115 | 'num_blocks': (4,8,20,3), 116 | 'dense_depth': (20,64,64,128) 117 | } 118 | return DPN(cfg, num_classes=num_classes) 119 | 120 | def test(): 121 | net = DPN92() 122 | x = torch.randn(1,3,32,32) 123 | y = net(x) 124 | print(y) 125 | 126 | # test() 127 | -------------------------------------------------------------------------------- /evals/ray_tune/models/cifarmodels/mobilenet.py: -------------------------------------------------------------------------------- 1 | '''MobileNet in PyTorch. 2 | 3 | See the paper "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" 4 | for more details. 5 | ''' 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | 11 | class Block(nn.Module): 12 | '''Depthwise conv + Pointwise conv''' 13 | def __init__(self, in_planes, out_planes, stride=1): 14 | super(Block, self).__init__() 15 | self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False) 16 | self.bn1 = nn.BatchNorm2d(in_planes) 17 | self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) 18 | self.bn2 = nn.BatchNorm2d(out_planes) 19 | 20 | def forward(self, x): 21 | out = F.relu(self.bn1(self.conv1(x))) 22 | out = F.relu(self.bn2(self.conv2(out))) 23 | return out 24 | 25 | 26 | class MobileNet(nn.Module): 27 | # (128,2) means conv planes=128, conv stride=2, by default conv stride=1 28 | cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024] 29 | 30 | def __init__(self, num_classes=10): 31 | super(MobileNet, self).__init__() 32 | self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False) 33 | self.bn1 = nn.BatchNorm2d(32) 34 | self.layers = self._make_layers(in_planes=32) 35 | self.linear = nn.Linear(1024, num_classes) 36 | 37 | def _make_layers(self, in_planes): 38 | layers = [] 39 | for x in self.cfg: 40 | out_planes = x if isinstance(x, int) else x[0] 41 | stride = 1 if isinstance(x, int) else x[1] 42 | layers.append(Block(in_planes, out_planes, stride)) 43 | in_planes = out_planes 44 | return nn.Sequential(*layers) 45 | 46 | def forward(self, x): 47 | out = F.relu(self.bn1(self.conv1(x))) 48 | out = self.layers(out) 49 | out = F.avg_pool2d(out, 2) 50 | out = out.view(out.size(0), -1) 51 | out = self.linear(out) 52 | return out 53 | 54 | 55 | def test(): 56 | net = MobileNet() 57 | x = torch.randn(1,3,32,32) 58 | y = net(x) 59 | print(y.size()) 60 | 61 | # test() 62 | -------------------------------------------------------------------------------- /evals/ray_tune/models/cifarmodels/mobilenetv2.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | class BaseBlock(nn.Module): 9 | alpha = 1 10 | 11 | def __init__(self, input_channel, output_channel, t = 6, downsample = False): 12 | """ 13 | t: expansion factor, t*input_channel is channel of expansion layer 14 | alpha: width multiplier, to get thinner models 15 | rho: resolution multiplier, to get reduced representation 16 | """ 17 | super(BaseBlock, self).__init__() 18 | self.stride = 2 if downsample else 1 19 | self.downsample = downsample 20 | self.shortcut = (not downsample) and (input_channel == output_channel) 21 | 22 | # apply alpha 23 | input_channel = int(self.alpha * input_channel) 24 | output_channel = int(self.alpha * output_channel) 25 | 26 | # for main path: 27 | c = t * input_channel 28 | # 1x1 point wise conv 29 | self.conv1 = nn.Conv2d(input_channel, c, kernel_size = 1, bias = False) 30 | self.bn1 = nn.BatchNorm2d(c) 31 | # 3x3 depth wise conv 32 | self.conv2 = nn.Conv2d(c, c, kernel_size = 3, stride = self.stride, padding = 1, groups = c, bias = False) 33 | self.bn2 = nn.BatchNorm2d(c) 34 | # 1x1 point wise conv 35 | self.conv3 = nn.Conv2d(c, output_channel, kernel_size = 1, bias = False) 36 | self.bn3 = nn.BatchNorm2d(output_channel) 37 | 38 | 39 | def forward(self, inputs): 40 | # main path 41 | x = F.relu6(self.bn1(self.conv1(inputs)), inplace = True) 42 | x = F.relu6(self.bn2(self.conv2(x)), inplace = True) 43 | x = self.bn3(self.conv3(x)) 44 | 45 | # shortcut path 46 | x = x + inputs if self.shortcut else x 47 | 48 | return x 49 | 50 | 51 | 52 | 53 | class MobileNetV2(nn.Module): 54 | def __init__(self, num_classes, alpha = 1): 55 | super(MobileNetV2, self).__init__() 56 | 57 | # first conv layer 58 | self.conv0 = nn.Conv2d(3, int(32*alpha), kernel_size = 3, stride = 1, padding = 1, bias = False) 59 | self.bn0 = nn.BatchNorm2d(int(32*alpha)) 60 | 61 | # build bottlenecks 62 | BaseBlock.alpha = alpha 63 | self.bottlenecks = nn.Sequential( 64 | BaseBlock(32, 16, t = 1, downsample = False), 65 | BaseBlock(16, 24, downsample = False), 66 | BaseBlock(24, 24), 67 | BaseBlock(24, 32, downsample = False), 68 | BaseBlock(32, 32), 69 | BaseBlock(32, 32), 70 | BaseBlock(32, 64, downsample = True), 71 | BaseBlock(64, 64), 72 | BaseBlock(64, 64), 73 | BaseBlock(64, 64), 74 | BaseBlock(64, 96, downsample = False), 75 | BaseBlock(96, 96), 76 | BaseBlock(96, 96), 77 | BaseBlock(96, 160, downsample = True), 78 | BaseBlock(160, 160), 79 | BaseBlock(160, 160), 80 | BaseBlock(160, 320, downsample = False)) 81 | 82 | # last conv layers and fc layer 83 | self.conv1 = nn.Conv2d(int(320*alpha), 1280, kernel_size = 1, bias = False) 84 | self.bn1 = nn.BatchNorm2d(1280) 85 | self.fc = nn.Linear(1280, num_classes) 86 | 87 | # weights init 88 | self.weights_init() 89 | 90 | 91 | def weights_init(self): 92 | for m in self.modules(): 93 | if isinstance(m, nn.Conv2d): 94 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 95 | m.weight.data.normal_(0, math.sqrt(2. / n)) 96 | 97 | elif isinstance(m, nn.BatchNorm2d): 98 | m.weight.data.fill_(1) 99 | m.bias.data.zero_() 100 | 101 | 102 | def forward(self, inputs): 103 | 104 | # first conv layer 105 | x = F.relu6(self.bn0(self.conv0(inputs)), inplace = True) 106 | # assert x.shape[1:] == torch.Size([32, 32, 32]) 107 | 108 | # bottlenecks 109 | x = self.bottlenecks(x) 110 | # assert x.shape[1:] == torch.Size([320, 8, 8]) 111 | 112 | # last conv layer 113 | x = F.relu6(self.bn1(self.conv1(x)), inplace = True) 114 | # assert x.shape[1:] == torch.Size([1280,8,8]) 115 | 116 | # global pooling and fc (in place of conv 1x1 in paper) 117 | x = F.adaptive_avg_pool2d(x, 1) 118 | x = x.view(x.shape[0], -1) 119 | x = self.fc(x) 120 | 121 | return x 122 | 123 | -------------------------------------------------------------------------------- /evals/ray_tune/models/cifarmodels/model_provider.py: -------------------------------------------------------------------------------- 1 | from .densenet import * 2 | from .dla import * 3 | from .dla_simple import * 4 | from .dpn import * 5 | from .efficientnet import * 6 | from .mobilenetv2 import * 7 | from .mobilenetv3 import * 8 | from .pnasnet import * 9 | from .preact_resnet import * 10 | from .preactresnet import * 11 | from .resnet import * 12 | from .resnext import * 13 | from .senet import * 14 | from .shufflenet import * 15 | from .shufflenetv2 import * 16 | from .stochasticdepth import * 17 | from .vgg import * 18 | 19 | __all__ = ['get_model'] 20 | 21 | 22 | _models = { 23 | "DLA": DLA, 24 | "DPN107": DPN107, 25 | "DPN26": DPN26, 26 | "DPN68": DPN68, 27 | "DPN92": DPN92, 28 | "DPN98": DPN98, 29 | "DenseNet121": DenseNet121, 30 | "DenseNet161": DenseNet161, 31 | "DenseNet169": DenseNet169, 32 | "DenseNet201": DenseNet201, 33 | "MobileNetV2": MobileNetV2, 34 | "MobileNetV3": MobileNetV3, 35 | "PreActResNet101": PreActResNet101, 36 | "PreActResNet152": PreActResNet152, 37 | "PreActResNet18": PreActResNet18, 38 | "PreActResNet34": PreActResNet34, 39 | "PreActResNet50": PreActResNet50, 40 | "ResNeXt29_2x64d": ResNeXt29_2x64d, 41 | "ResNeXt29_32x4d": ResNeXt29_32x4d, 42 | "ResNeXt29_4x64d": ResNeXt29_4x64d, 43 | "ResNeXt29_8x64d": ResNeXt29_8x64d, 44 | "ResNet101": ResNet101, 45 | "ResNet152": ResNet152, 46 | "ResNet18": ResNet18, 47 | "ResNet34": ResNet34, 48 | "ResNet50": ResNet50, 49 | "ShuffleNetG2": ShuffleNetG2, 50 | "ShuffleNetG3": ShuffleNetG3, 51 | "ShuffleNetV2": ShuffleNetV2, 52 | "SimpleDLA": SimpleDLA, 53 | "VGG": VGG, 54 | "preactresnet101": preactresnet101, 55 | "preactresnet152": preactresnet152, 56 | "preactresnet18": preactresnet18, 57 | "preactresnet34": preactresnet34, 58 | "preactresnet50": preactresnet50, 59 | "seresnet101": seresnet101, 60 | "seresnet152": seresnet152, 61 | "seresnet18": seresnet18, 62 | "seresnet34": seresnet34, 63 | "seresnet50": seresnet50, 64 | "stochastic_depth_resnet101": stochastic_depth_resnet101, 65 | "stochastic_depth_resnet152": stochastic_depth_resnet152, 66 | "stochastic_depth_resnet18": stochastic_depth_resnet18, 67 | "stochastic_depth_resnet34": stochastic_depth_resnet34, 68 | "stochastic_depth_resnet50": stochastic_depth_resnet50, 69 | } 70 | 71 | 72 | def get_cv_model(name, **kwargs): 73 | """ 74 | Get supported model. 75 | 76 | Parameters: 77 | ---------- 78 | name : str 79 | Name of model. 80 | 81 | Returns: 82 | ------- 83 | Module 84 | Resulted model. 85 | """ 86 | if name not in _models: 87 | raise ValueError("Unsupported model: {}".format(name)) 88 | net = _models[name](**kwargs) 89 | return net 90 | 91 | 92 | -------------------------------------------------------------------------------- /evals/ray_tune/models/cifarmodels/pnasnet.py: -------------------------------------------------------------------------------- 1 | '''PNASNet in PyTorch. 2 | 3 | Paper: Progressive Neural Architecture Search 4 | ''' 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | 10 | class SepConv(nn.Module): 11 | '''Separable Convolution.''' 12 | def __init__(self, in_planes, out_planes, kernel_size, stride): 13 | super(SepConv, self).__init__() 14 | self.conv1 = nn.Conv2d(in_planes, out_planes, 15 | kernel_size, stride, 16 | padding=(kernel_size-1)//2, 17 | bias=False, groups=in_planes) 18 | self.bn1 = nn.BatchNorm2d(out_planes) 19 | 20 | def forward(self, x): 21 | return self.bn1(self.conv1(x)) 22 | 23 | 24 | class CellA(nn.Module): 25 | def __init__(self, in_planes, out_planes, stride=1): 26 | super(CellA, self).__init__() 27 | self.stride = stride 28 | self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride) 29 | if stride==2: 30 | self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) 31 | self.bn1 = nn.BatchNorm2d(out_planes) 32 | 33 | def forward(self, x): 34 | y1 = self.sep_conv1(x) 35 | y2 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1) 36 | if self.stride==2: 37 | y2 = self.bn1(self.conv1(y2)) 38 | return F.relu(y1+y2) 39 | 40 | class CellB(nn.Module): 41 | def __init__(self, in_planes, out_planes, stride=1): 42 | super(CellB, self).__init__() 43 | self.stride = stride 44 | # Left branch 45 | self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride) 46 | self.sep_conv2 = SepConv(in_planes, out_planes, kernel_size=3, stride=stride) 47 | # Right branch 48 | self.sep_conv3 = SepConv(in_planes, out_planes, kernel_size=5, stride=stride) 49 | if stride==2: 50 | self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) 51 | self.bn1 = nn.BatchNorm2d(out_planes) 52 | # Reduce channels 53 | self.conv2 = nn.Conv2d(2*out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) 54 | self.bn2 = nn.BatchNorm2d(out_planes) 55 | 56 | def forward(self, x): 57 | # Left branch 58 | y1 = self.sep_conv1(x) 59 | y2 = self.sep_conv2(x) 60 | # Right branch 61 | y3 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1) 62 | if self.stride==2: 63 | y3 = self.bn1(self.conv1(y3)) 64 | y4 = self.sep_conv3(x) 65 | # Concat & reduce channels 66 | b1 = F.relu(y1+y2) 67 | b2 = F.relu(y3+y4) 68 | y = torch.cat([b1,b2], 1) 69 | return F.relu(self.bn2(self.conv2(y))) 70 | 71 | class PNASNet(nn.Module): 72 | def __init__(self, cell_type, num_cells, num_planes): 73 | super(PNASNet, self).__init__() 74 | self.in_planes = num_planes 75 | self.cell_type = cell_type 76 | 77 | self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=1, padding=1, bias=False) 78 | self.bn1 = nn.BatchNorm2d(num_planes) 79 | 80 | self.layer1 = self._make_layer(num_planes, num_cells=6) 81 | self.layer2 = self._downsample(num_planes*2) 82 | self.layer3 = self._make_layer(num_planes*2, num_cells=6) 83 | self.layer4 = self._downsample(num_planes*4) 84 | self.layer5 = self._make_layer(num_planes*4, num_cells=6) 85 | 86 | self.linear = nn.Linear(num_planes*4, 10) 87 | 88 | def _make_layer(self, planes, num_cells): 89 | layers = [] 90 | for _ in range(num_cells): 91 | layers.append(self.cell_type(self.in_planes, planes, stride=1)) 92 | self.in_planes = planes 93 | return nn.Sequential(*layers) 94 | 95 | def _downsample(self, planes): 96 | layer = self.cell_type(self.in_planes, planes, stride=2) 97 | self.in_planes = planes 98 | return layer 99 | 100 | def forward(self, x): 101 | out = F.relu(self.bn1(self.conv1(x))) 102 | out = self.layer1(out) 103 | out = self.layer2(out) 104 | out = self.layer3(out) 105 | out = self.layer4(out) 106 | out = self.layer5(out) 107 | out = F.avg_pool2d(out, 8) 108 | out = self.linear(out.view(out.size(0), -1)) 109 | return out 110 | 111 | 112 | def PNASNetA(): 113 | return PNASNet(CellA, num_cells=6, num_planes=44) 114 | 115 | def PNASNetB(): 116 | return PNASNet(CellB, num_cells=6, num_planes=32) 117 | 118 | 119 | def test(): 120 | net = PNASNetB() 121 | x = torch.randn(1,3,32,32) 122 | y = net(x) 123 | print(y) 124 | 125 | # test() 126 | -------------------------------------------------------------------------------- /evals/ray_tune/models/cifarmodels/preact_resnet.py: -------------------------------------------------------------------------------- 1 | '''Pre-activation ResNet in PyTorch. 2 | 3 | Reference: 4 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 5 | Identity Mappings in Deep Residual Networks. arXiv:1603.05027 6 | ''' 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | 12 | class PreActBlock(nn.Module): 13 | '''Pre-activation version of the BasicBlock.''' 14 | expansion = 1 15 | 16 | def __init__(self, in_planes, planes, stride=1): 17 | super(PreActBlock, self).__init__() 18 | self.bn1 = nn.BatchNorm2d(in_planes) 19 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 20 | self.bn2 = nn.BatchNorm2d(planes) 21 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 22 | 23 | if stride != 1 or in_planes != self.expansion*planes: 24 | self.shortcut = nn.Sequential( 25 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) 26 | ) 27 | 28 | def forward(self, x): 29 | out = F.relu(self.bn1(x)) 30 | shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x 31 | out = self.conv1(out) 32 | out = self.conv2(F.relu(self.bn2(out))) 33 | out += shortcut 34 | return out 35 | 36 | 37 | class PreActBottleneck(nn.Module): 38 | '''Pre-activation version of the original Bottleneck module.''' 39 | expansion = 4 40 | 41 | def __init__(self, in_planes, planes, stride=1): 42 | super(PreActBottleneck, self).__init__() 43 | self.bn1 = nn.BatchNorm2d(in_planes) 44 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 45 | self.bn2 = nn.BatchNorm2d(planes) 46 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 47 | self.bn3 = nn.BatchNorm2d(planes) 48 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 49 | 50 | if stride != 1 or in_planes != self.expansion*planes: 51 | self.shortcut = nn.Sequential( 52 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) 53 | ) 54 | 55 | def forward(self, x): 56 | out = F.relu(self.bn1(x)) 57 | shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x 58 | out = self.conv1(out) 59 | out = self.conv2(F.relu(self.bn2(out))) 60 | out = self.conv3(F.relu(self.bn3(out))) 61 | out += shortcut 62 | return out 63 | 64 | 65 | class PreActResNet(nn.Module): 66 | def __init__(self, block, num_blocks, num_classes=10): 67 | super(PreActResNet, self).__init__() 68 | self.in_planes = 64 69 | 70 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) 71 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) 72 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) 73 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) 74 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) 75 | self.linear = nn.Linear(512*block.expansion, num_classes) 76 | 77 | def _make_layer(self, block, planes, num_blocks, stride): 78 | strides = [stride] + [1]*(num_blocks-1) 79 | layers = [] 80 | for stride in strides: 81 | layers.append(block(self.in_planes, planes, stride)) 82 | self.in_planes = planes * block.expansion 83 | return nn.Sequential(*layers) 84 | 85 | def forward(self, x): 86 | out = self.conv1(x) 87 | out = self.layer1(out) 88 | out = self.layer2(out) 89 | out = self.layer3(out) 90 | out = self.layer4(out) 91 | out = F.avg_pool2d(out, 4) 92 | out = out.view(out.size(0), -1) 93 | out = self.linear(out) 94 | return out 95 | 96 | 97 | def PreActResNet18(num_classes=10): 98 | return PreActResNet(PreActBlock, [2,2,2,2], num_classes=num_classes) 99 | 100 | def PreActResNet34(num_classes=10): 101 | return PreActResNet(PreActBlock, [3,4,6,3], num_classes=num_classes) 102 | 103 | def PreActResNet50(num_classes=10): 104 | return PreActResNet(PreActBottleneck, [3,4,6,3], num_classes=num_classes) 105 | 106 | def PreActResNet101(num_classes=10): 107 | return PreActResNet(PreActBottleneck, [3,4,23,3], num_classes=num_classes) 108 | 109 | def PreActResNet152(num_classes=10): 110 | return PreActResNet(PreActBottleneck, [3,8,36,3], num_classes=num_classes) 111 | 112 | 113 | def test(): 114 | net = PreActResNet18() 115 | y = net((torch.randn(1,3,32,32))) 116 | print(y.size()) 117 | 118 | # test() 119 | -------------------------------------------------------------------------------- /evals/ray_tune/models/cifarmodels/preactresnet.py: -------------------------------------------------------------------------------- 1 | """preactresnet in pytorch 2 | 3 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 4 | 5 | Identity Mappings in Deep Residual Networks 6 | https://arxiv.org/abs/1603.05027 7 | """ 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | 13 | 14 | class PreActBasic(nn.Module): 15 | 16 | expansion = 1 17 | def __init__(self, in_channels, out_channels, stride): 18 | super().__init__() 19 | self.residual = nn.Sequential( 20 | nn.BatchNorm2d(in_channels), 21 | nn.ReLU(inplace=True), 22 | nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1), 23 | nn.BatchNorm2d(out_channels), 24 | nn.ReLU(inplace=True), 25 | nn.Conv2d(out_channels, out_channels * PreActBasic.expansion, kernel_size=3, padding=1) 26 | ) 27 | 28 | self.shortcut = nn.Sequential() 29 | if stride != 1 or in_channels != out_channels * PreActBasic.expansion: 30 | self.shortcut = nn.Conv2d(in_channels, out_channels * PreActBasic.expansion, 1, stride=stride) 31 | 32 | def forward(self, x): 33 | 34 | res = self.residual(x) 35 | shortcut = self.shortcut(x) 36 | 37 | return res + shortcut 38 | 39 | 40 | class PreActBottleNeck(nn.Module): 41 | 42 | expansion = 4 43 | def __init__(self, in_channels, out_channels, stride): 44 | super().__init__() 45 | 46 | self.residual = nn.Sequential( 47 | nn.BatchNorm2d(in_channels), 48 | nn.ReLU(inplace=True), 49 | nn.Conv2d(in_channels, out_channels, 1, stride=stride), 50 | 51 | nn.BatchNorm2d(out_channels), 52 | nn.ReLU(inplace=True), 53 | nn.Conv2d(out_channels, out_channels, 3, padding=1), 54 | 55 | nn.BatchNorm2d(out_channels), 56 | nn.ReLU(inplace=True), 57 | nn.Conv2d(out_channels, out_channels * PreActBottleNeck.expansion, 1) 58 | ) 59 | 60 | self.shortcut = nn.Sequential() 61 | 62 | if stride != 1 or in_channels != out_channels * PreActBottleNeck.expansion: 63 | self.shortcut = nn.Conv2d(in_channels, out_channels * PreActBottleNeck.expansion, 1, stride=stride) 64 | 65 | def forward(self, x): 66 | 67 | res = self.residual(x) 68 | shortcut = self.shortcut(x) 69 | 70 | return res + shortcut 71 | 72 | class PreActResNet(nn.Module): 73 | 74 | def __init__(self, block, num_block, num_classes=100): 75 | super().__init__() 76 | self.input_channels = 64 77 | 78 | self.pre = nn.Sequential( 79 | nn.Conv2d(3, 64, 3, padding=1), 80 | nn.BatchNorm2d(64), 81 | nn.ReLU(inplace=True) 82 | ) 83 | 84 | self.stage1 = self._make_layers(block, num_block[0], 64, 1) 85 | self.stage2 = self._make_layers(block, num_block[1], 128, 2) 86 | self.stage3 = self._make_layers(block, num_block[2], 256, 2) 87 | self.stage4 = self._make_layers(block, num_block[3], 512, 2) 88 | 89 | self.linear = nn.Linear(self.input_channels, num_classes) 90 | 91 | def _make_layers(self, block, block_num, out_channels, stride): 92 | layers = [] 93 | 94 | layers.append(block(self.input_channels, out_channels, stride)) 95 | self.input_channels = out_channels * block.expansion 96 | 97 | while block_num - 1: 98 | layers.append(block(self.input_channels, out_channels, 1)) 99 | self.input_channels = out_channels * block.expansion 100 | block_num -= 1 101 | 102 | return nn.Sequential(*layers) 103 | 104 | def forward(self, x): 105 | x = self.pre(x) 106 | 107 | x = self.stage1(x) 108 | x = self.stage2(x) 109 | x = self.stage3(x) 110 | x = self.stage4(x) 111 | 112 | x = F.adaptive_avg_pool2d(x, 1) 113 | x = x.view(x.size(0), -1) 114 | x = self.linear(x) 115 | 116 | return x 117 | 118 | def preactresnet18(num_classes=100): 119 | return PreActResNet(PreActBasic, [2, 2, 2, 2], num_classes=num_classes) 120 | 121 | def preactresnet34(num_classes=100): 122 | return PreActResNet(PreActBasic, [3, 4, 6, 3], num_classes=num_classes) 123 | 124 | def preactresnet50(num_classes=100): 125 | return PreActResNet(PreActBottleNeck, [3, 4, 6, 3], num_classes=num_classes) 126 | 127 | def preactresnet101(num_classes=100): 128 | return PreActResNet(PreActBottleNeck, [3, 4, 23, 3], num_classes=num_classes) 129 | 130 | def preactresnet152(num_classes=100): 131 | return PreActResNet(PreActBottleNeck, [3, 8, 36, 3], num_classes=num_classes) 132 | -------------------------------------------------------------------------------- /evals/ray_tune/models/cifarmodels/resnet.py: -------------------------------------------------------------------------------- 1 | '''ResNet in PyTorch. 2 | 3 | For Pre-activation ResNet, see 'preact_resnet.py'. 4 | 5 | Reference: 6 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 7 | Deep Residual Learning for Image Recognition. arXiv:1512.03385 8 | ''' 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | 13 | 14 | class BasicBlock(nn.Module): 15 | expansion = 1 16 | 17 | def __init__(self, in_planes, planes, stride=1): 18 | super(BasicBlock, self).__init__() 19 | self.conv1 = nn.Conv2d( 20 | in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 21 | self.bn1 = nn.BatchNorm2d(planes) 22 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, 23 | stride=1, padding=1, bias=False) 24 | self.bn2 = nn.BatchNorm2d(planes) 25 | 26 | self.shortcut = nn.Sequential() 27 | if stride != 1 or in_planes != self.expansion*planes: 28 | self.shortcut = nn.Sequential( 29 | nn.Conv2d(in_planes, self.expansion*planes, 30 | kernel_size=1, stride=stride, bias=False), 31 | nn.BatchNorm2d(self.expansion*planes) 32 | ) 33 | 34 | def forward(self, x): 35 | out = F.relu(self.bn1(self.conv1(x))) 36 | out = self.bn2(self.conv2(out)) 37 | out += self.shortcut(x) 38 | out = F.relu(out) 39 | return out 40 | 41 | 42 | class Bottleneck(nn.Module): 43 | expansion = 4 44 | 45 | def __init__(self, in_planes, planes, stride=1): 46 | super(Bottleneck, self).__init__() 47 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 48 | self.bn1 = nn.BatchNorm2d(planes) 49 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, 50 | stride=stride, padding=1, bias=False) 51 | self.bn2 = nn.BatchNorm2d(planes) 52 | self.conv3 = nn.Conv2d(planes, self.expansion * 53 | planes, kernel_size=1, bias=False) 54 | self.bn3 = nn.BatchNorm2d(self.expansion*planes) 55 | 56 | self.shortcut = nn.Sequential() 57 | if stride != 1 or in_planes != self.expansion*planes: 58 | self.shortcut = nn.Sequential( 59 | nn.Conv2d(in_planes, self.expansion*planes, 60 | kernel_size=1, stride=stride, bias=False), 61 | nn.BatchNorm2d(self.expansion*planes) 62 | ) 63 | 64 | def forward(self, x): 65 | out = F.relu(self.bn1(self.conv1(x))) 66 | out = F.relu(self.bn2(self.conv2(out))) 67 | out = self.bn3(self.conv3(out)) 68 | out += self.shortcut(x) 69 | out = F.relu(out) 70 | return out 71 | 72 | 73 | class ResNet(nn.Module): 74 | def __init__(self, block, num_blocks, num_classes=10): 75 | super(ResNet, self).__init__() 76 | self.in_planes = 64 77 | 78 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, 79 | stride=1, padding=1, bias=False) 80 | self.bn1 = nn.BatchNorm2d(64) 81 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) 82 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) 83 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) 84 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) 85 | self.linear = nn.Linear(512*block.expansion, num_classes) 86 | 87 | def _make_layer(self, block, planes, num_blocks, stride): 88 | strides = [stride] + [1]*(num_blocks-1) 89 | layers = [] 90 | for stride in strides: 91 | layers.append(block(self.in_planes, planes, stride)) 92 | self.in_planes = planes * block.expansion 93 | return nn.Sequential(*layers) 94 | 95 | def forward(self, x): 96 | out = F.relu(self.bn1(self.conv1(x))) 97 | out = self.layer1(out) 98 | out = self.layer2(out) 99 | out = self.layer3(out) 100 | out = self.layer4(out) 101 | out = F.avg_pool2d(out, 4) 102 | out = out.view(out.size(0), -1) 103 | out = self.linear(out) 104 | return out 105 | 106 | 107 | def ResNet18(num_classes=10): 108 | return ResNet(BasicBlock, [2, 2, 2, 2], num_classes=num_classes) 109 | 110 | 111 | def ResNet34(num_classes=10): 112 | return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes) 113 | 114 | 115 | def ResNet50(num_classes=10): 116 | return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes) 117 | 118 | 119 | def ResNet101(num_classes=10): 120 | return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes) 121 | 122 | 123 | def ResNet152(num_classes=10): 124 | return ResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes) 125 | 126 | 127 | def test(): 128 | net = ResNet18() 129 | y = net(torch.randn(1, 3, 32, 32)) 130 | print(y.size()) 131 | 132 | # test() 133 | -------------------------------------------------------------------------------- /evals/ray_tune/models/cifarmodels/resnext.py: -------------------------------------------------------------------------------- 1 | '''ResNeXt in PyTorch. 2 | 3 | See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details. 4 | ''' 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | 10 | class Block(nn.Module): 11 | '''Grouped convolution block.''' 12 | expansion = 2 13 | 14 | def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1): 15 | super(Block, self).__init__() 16 | group_width = cardinality * bottleneck_width 17 | self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False) 18 | self.bn1 = nn.BatchNorm2d(group_width) 19 | self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False) 20 | self.bn2 = nn.BatchNorm2d(group_width) 21 | self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False) 22 | self.bn3 = nn.BatchNorm2d(self.expansion*group_width) 23 | 24 | self.shortcut = nn.Sequential() 25 | if stride != 1 or in_planes != self.expansion*group_width: 26 | self.shortcut = nn.Sequential( 27 | nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False), 28 | nn.BatchNorm2d(self.expansion*group_width) 29 | ) 30 | 31 | def forward(self, x): 32 | out = F.relu(self.bn1(self.conv1(x))) 33 | out = F.relu(self.bn2(self.conv2(out))) 34 | out = self.bn3(self.conv3(out)) 35 | out += self.shortcut(x) 36 | out = F.relu(out) 37 | return out 38 | 39 | 40 | class ResNeXt(nn.Module): 41 | def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10): 42 | super(ResNeXt, self).__init__() 43 | self.cardinality = cardinality 44 | self.bottleneck_width = bottleneck_width 45 | self.in_planes = 64 46 | 47 | self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False) 48 | self.bn1 = nn.BatchNorm2d(64) 49 | self.layer1 = self._make_layer(num_blocks[0], 1) 50 | self.layer2 = self._make_layer(num_blocks[1], 2) 51 | self.layer3 = self._make_layer(num_blocks[2], 2) 52 | # self.layer4 = self._make_layer(num_blocks[3], 2) 53 | self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes) 54 | 55 | def _make_layer(self, num_blocks, stride): 56 | strides = [stride] + [1]*(num_blocks-1) 57 | layers = [] 58 | for stride in strides: 59 | layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride)) 60 | self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width 61 | # Increase bottleneck_width by 2 after each stage. 62 | self.bottleneck_width *= 2 63 | return nn.Sequential(*layers) 64 | 65 | def forward(self, x): 66 | out = F.relu(self.bn1(self.conv1(x))) 67 | out = self.layer1(out) 68 | out = self.layer2(out) 69 | out = self.layer3(out) 70 | # out = self.layer4(out) 71 | out = F.avg_pool2d(out, 8) 72 | out = out.view(out.size(0), -1) 73 | out = self.linear(out) 74 | return out 75 | 76 | 77 | def ResNeXt29_2x64d(num_classes=10): 78 | return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64, num_classes=num_classes) 79 | 80 | def ResNeXt29_4x64d(num_classes=10): 81 | return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64, num_classes=num_classes) 82 | 83 | def ResNeXt29_8x64d(num_classes=10): 84 | return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64, num_classes=num_classes) 85 | 86 | def ResNeXt29_32x4d(num_classes=10): 87 | return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4, num_classes=num_classes) 88 | 89 | def test_resnext(): 90 | net = ResNeXt29_2x64d() 91 | x = torch.randn(1,3,32,32) 92 | y = net(x) 93 | print(y.size()) 94 | 95 | # test_resnext() 96 | -------------------------------------------------------------------------------- /evals/ray_tune/models/cifarmodels/shufflenet.py: -------------------------------------------------------------------------------- 1 | '''ShuffleNet in PyTorch. 2 | 3 | See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details. 4 | ''' 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | 10 | class ShuffleBlock(nn.Module): 11 | def __init__(self, groups): 12 | super(ShuffleBlock, self).__init__() 13 | self.groups = groups 14 | 15 | def forward(self, x): 16 | '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]''' 17 | N,C,H,W = x.size() 18 | g = self.groups 19 | return x.view(N,g,C//g,H,W).permute(0,2,1,3,4).reshape(N,C,H,W) 20 | 21 | 22 | class Bottleneck(nn.Module): 23 | def __init__(self, in_planes, out_planes, stride, groups): 24 | super(Bottleneck, self).__init__() 25 | self.stride = stride 26 | 27 | mid_planes = int(out_planes/4) 28 | g = 1 if in_planes==24 else groups 29 | 30 | self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False) 31 | self.bn1 = nn.BatchNorm2d(mid_planes) 32 | self.shuffle1 = ShuffleBlock(groups=g) 33 | self.conv2 = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False) 34 | self.bn2 = nn.BatchNorm2d(mid_planes) 35 | self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False) 36 | self.bn3 = nn.BatchNorm2d(out_planes) 37 | 38 | self.shortcut = nn.Sequential() 39 | if stride == 2: 40 | self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1)) 41 | 42 | def forward(self, x): 43 | out = F.relu(self.bn1(self.conv1(x))) 44 | out = self.shuffle1(out) 45 | out = F.relu(self.bn2(self.conv2(out))) 46 | out = self.bn3(self.conv3(out)) 47 | res = self.shortcut(x) 48 | out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res) 49 | return out 50 | 51 | 52 | class ShuffleNet(nn.Module): 53 | def __init__(self, cfg, num_classes=10): 54 | super(ShuffleNet, self).__init__() 55 | out_planes = cfg['out_planes'] 56 | num_blocks = cfg['num_blocks'] 57 | groups = cfg['groups'] 58 | 59 | self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False) 60 | self.bn1 = nn.BatchNorm2d(24) 61 | self.in_planes = 24 62 | self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups) 63 | self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups) 64 | self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups) 65 | self.linear = nn.Linear(out_planes[2], num_classes) 66 | 67 | def _make_layer(self, out_planes, num_blocks, groups): 68 | layers = [] 69 | for i in range(num_blocks): 70 | stride = 2 if i == 0 else 1 71 | cat_planes = self.in_planes if i == 0 else 0 72 | layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups)) 73 | self.in_planes = out_planes 74 | return nn.Sequential(*layers) 75 | 76 | def forward(self, x): 77 | out = F.relu(self.bn1(self.conv1(x))) 78 | out = self.layer1(out) 79 | out = self.layer2(out) 80 | out = self.layer3(out) 81 | out = F.avg_pool2d(out, 4) 82 | out = out.view(out.size(0), -1) 83 | out = self.linear(out) 84 | return out 85 | 86 | 87 | def ShuffleNetG2(num_classes=10): 88 | cfg = { 89 | 'out_planes': [200,400,800], 90 | 'num_blocks': [4,8,4], 91 | 'groups': 2 92 | } 93 | return ShuffleNet(cfg, num_classes=num_classes) 94 | 95 | def ShuffleNetG3(num_classes=10): 96 | cfg = { 97 | 'out_planes': [240,480,960], 98 | 'num_blocks': [4,8,4], 99 | 'groups': 3 100 | } 101 | return ShuffleNet(cfg, num_classes=num_classes) 102 | 103 | 104 | def test(): 105 | net = ShuffleNetG2() 106 | x = torch.randn(1,3,32,32) 107 | y = net(x) 108 | print(y) 109 | 110 | # test() 111 | -------------------------------------------------------------------------------- /evals/ray_tune/models/cifarmodels/vgg.py: -------------------------------------------------------------------------------- 1 | """vgg in pytorch 2 | 3 | 4 | [1] Karen Simonyan, Andrew Zisserman 5 | 6 | Very Deep Convolutional Networks for Large-Scale Image Recognition. 7 | https://arxiv.org/abs/1409.1556v6 8 | """ 9 | '''VGG11/13/16/19 in Pytorch.''' 10 | 11 | import torch 12 | import torch.nn as nn 13 | 14 | vggcfg = { 15 | 'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 16 | 'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 17 | 'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 18 | 'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], 19 | } 20 | 21 | class VGG(nn.Module): 22 | 23 | def __init__(self, vgg_block, use_bn=False, num_classes=10): 24 | super(VGG, self).__init__() 25 | self.use_bn = use_bn 26 | self.features = self._make_layers(vggcfg[f"VGG{vgg_block}"]) 27 | self.classifier = nn.Linear(512, num_classes) 28 | 29 | def forward(self, x): 30 | out = self.features(x) 31 | out = out.view(out.size(0), -1) 32 | out = self.classifier(out) 33 | return out 34 | 35 | def _make_layers(self, cfg): 36 | layers = [] 37 | in_channels = 3 38 | for x in cfg: 39 | if x == 'M': 40 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 41 | else: 42 | layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1)] 43 | if self.use_bn: 44 | layers += [nn.BatchNorm2d(x)] 45 | layers += [nn.ReLU(inplace=True)] 46 | in_channels = x 47 | layers += [nn.AvgPool2d(kernel_size=1, stride=1)] 48 | return nn.Sequential(*layers) 49 | -------------------------------------------------------------------------------- /evals/ray_tune/models/nasbench/cell_infers/__init__.py: -------------------------------------------------------------------------------- 1 | ##################################################### 2 | # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 # 3 | ##################################################### 4 | from .nasnet_cifar import NASNetonCIFAR 5 | from .tiny_network import TinyNetwork 6 | -------------------------------------------------------------------------------- /evals/ray_tune/models/nasbench/cell_infers/cells.py: -------------------------------------------------------------------------------- 1 | ##################################################### 2 | # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 # 3 | ##################################################### 4 | 5 | from copy import deepcopy 6 | 7 | import torch 8 | import torch.nn as nn 9 | from models.cell_operations import OPS 10 | 11 | 12 | # Cell for NAS-Bench-201 13 | class InferCell(nn.Module): 14 | 15 | def __init__(self, genotype, C_in, C_out, stride, affine=True, track_running_stats=True): 16 | super(InferCell, self).__init__() 17 | 18 | self.layers = nn.ModuleList() 19 | self.node_IN = [] 20 | self.node_IX = [] 21 | self.genotype = deepcopy(genotype) 22 | for i in range(1, len(genotype)): 23 | node_info = genotype[i-1] 24 | cur_index = [] 25 | cur_innod = [] 26 | for (op_name, op_in) in node_info: 27 | if op_in == 0: 28 | layer = OPS[op_name](C_in , C_out, stride, affine, track_running_stats) 29 | else: 30 | layer = OPS[op_name](C_out, C_out, 1, affine, track_running_stats) 31 | cur_index.append( len(self.layers) ) 32 | cur_innod.append( op_in ) 33 | self.layers.append( layer ) 34 | self.node_IX.append( cur_index ) 35 | self.node_IN.append( cur_innod ) 36 | self.nodes = len(genotype) 37 | self.in_dim = C_in 38 | self.out_dim = C_out 39 | 40 | def extra_repr(self): 41 | string = 'info :: nodes={nodes}, inC={in_dim}, outC={out_dim}'.format(**self.__dict__) 42 | laystr = [] 43 | for i, (node_layers, node_innods) in enumerate(zip(self.node_IX,self.node_IN)): 44 | y = ['I{:}-L{:}'.format(_ii, _il) for _il, _ii in zip(node_layers, node_innods)] 45 | x = '{:}<-({:})'.format(i+1, ','.join(y)) 46 | laystr.append( x ) 47 | return string + ', [{:}]'.format( ' | '.join(laystr) ) + ', {:}'.format(self.genotype.tostr()) 48 | 49 | def forward(self, inputs): 50 | nodes = [inputs] 51 | for i, (node_layers, node_innods) in enumerate(zip(self.node_IX,self.node_IN)): 52 | node_feature = sum( self.layers[_il](nodes[_ii]) for _il, _ii in zip(node_layers, node_innods) ) 53 | nodes.append( node_feature ) 54 | return nodes[-1] 55 | 56 | 57 | 58 | # Learning Transferable Architectures for Scalable Image Recognition, CVPR 2018 59 | class NASNetInferCell(nn.Module): 60 | 61 | def __init__(self, genotype, C_prev_prev, C_prev, C, reduction, reduction_prev, affine, track_running_stats): 62 | super(NASNetInferCell, self).__init__() 63 | self.reduction = reduction 64 | if reduction_prev: self.preprocess0 = OPS['skip_connect'](C_prev_prev, C, 2, affine, track_running_stats) 65 | else : self.preprocess0 = OPS['nor_conv_1x1'](C_prev_prev, C, 1, affine, track_running_stats) 66 | self.preprocess1 = OPS['nor_conv_1x1'](C_prev, C, 1, affine, track_running_stats) 67 | 68 | if not reduction: 69 | nodes, concats = genotype['normal'], genotype['normal_concat'] 70 | else: 71 | nodes, concats = genotype['reduce'], genotype['reduce_concat'] 72 | self._multiplier = len(concats) 73 | self._concats = concats 74 | self._steps = len(nodes) 75 | self._nodes = nodes 76 | self.edges = nn.ModuleDict() 77 | for i, node in enumerate(nodes): 78 | for in_node in node: 79 | name, j = in_node[0], in_node[1] 80 | stride = 2 if reduction and j < 2 else 1 81 | node_str = '{:}<-{:}'.format(i+2, j) 82 | self.edges[node_str] = OPS[name](C, C, stride, affine, track_running_stats) 83 | 84 | # [TODO] to support drop_prob in this function.. 85 | def forward(self, s0, s1, unused_drop_prob): 86 | s0 = self.preprocess0(s0) 87 | s1 = self.preprocess1(s1) 88 | 89 | states = [s0, s1] 90 | for i, node in enumerate(self._nodes): 91 | clist = [] 92 | for in_node in node: 93 | name, j = in_node[0], in_node[1] 94 | node_str = '{:}<-{:}'.format(i+2, j) 95 | op = self.edges[ node_str ] 96 | clist.append( op(states[j]) ) 97 | states.append( sum(clist) ) 98 | return torch.cat([states[x] for x in self._concats], dim=1) 99 | 100 | 101 | class AuxiliaryHeadCIFAR(nn.Module): 102 | 103 | def __init__(self, C, num_classes): 104 | """assuming input size 8x8""" 105 | super(AuxiliaryHeadCIFAR, self).__init__() 106 | self.features = nn.Sequential( 107 | nn.ReLU(inplace=True), 108 | nn.AvgPool2d(5, stride=3, padding=0, count_include_pad=False), # image size = 2 x 2 109 | nn.Conv2d(C, 128, 1, bias=False), 110 | nn.BatchNorm2d(128), 111 | nn.ReLU(inplace=True), 112 | nn.Conv2d(128, 768, 2, bias=False), 113 | nn.BatchNorm2d(768), 114 | nn.ReLU(inplace=True) 115 | ) 116 | self.classifier = nn.Linear(768, num_classes) 117 | 118 | def forward(self, x): 119 | x = self.features(x) 120 | x = self.classifier(x.view(x.size(0),-1)) 121 | return x 122 | -------------------------------------------------------------------------------- /evals/ray_tune/models/nasbench/cell_infers/nasnet_cifar.py: -------------------------------------------------------------------------------- 1 | ##################################################### 2 | # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 # 3 | ##################################################### 4 | from copy import deepcopy 5 | 6 | import torch 7 | import torch.nn as nn 8 | 9 | from .cells import AuxiliaryHeadCIFAR 10 | from .cells import NASNetInferCell as InferCell 11 | 12 | 13 | # The macro structure is based on NASNet 14 | class NASNetonCIFAR(nn.Module): 15 | 16 | def __init__(self, C, N, stem_multiplier, num_classes, genotype, auxiliary, affine=True, track_running_stats=True): 17 | super(NASNetonCIFAR, self).__init__() 18 | self._C = C 19 | self._layerN = N 20 | self.stem = nn.Sequential( 21 | nn.Conv2d(3, C*stem_multiplier, kernel_size=3, padding=1, bias=False), 22 | nn.BatchNorm2d(C*stem_multiplier)) 23 | 24 | # config for each layer 25 | layer_channels = [C ] * N + [C*2 ] + [C*2 ] * (N-1) + [C*4 ] + [C*4 ] * (N-1) 26 | layer_reductions = [False] * N + [True] + [False] * (N-1) + [True] + [False] * (N-1) 27 | 28 | C_prev_prev, C_prev, C_curr, reduction_prev = C*stem_multiplier, C*stem_multiplier, C, False 29 | self.auxiliary_index = None 30 | self.auxiliary_head = None 31 | self.cells = nn.ModuleList() 32 | for index, (C_curr, reduction) in enumerate(zip(layer_channels, layer_reductions)): 33 | cell = InferCell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev, affine, track_running_stats) 34 | self.cells.append( cell ) 35 | C_prev_prev, C_prev, reduction_prev = C_prev, cell._multiplier*C_curr, reduction 36 | if reduction and C_curr == C*4 and auxiliary: 37 | self.auxiliary_head = AuxiliaryHeadCIFAR(C_prev, num_classes) 38 | self.auxiliary_index = index 39 | self._Layer = len(self.cells) 40 | self.lastact = nn.Sequential(nn.BatchNorm2d(C_prev), nn.ReLU(inplace=True)) 41 | self.global_pooling = nn.AdaptiveAvgPool2d(1) 42 | self.classifier = nn.Linear(C_prev, num_classes) 43 | self.drop_path_prob = -1 44 | 45 | def update_drop_path(self, drop_path_prob): 46 | self.drop_path_prob = drop_path_prob 47 | 48 | def auxiliary_param(self): 49 | if self.auxiliary_head is None: return [] 50 | else: return list( self.auxiliary_head.parameters() ) 51 | 52 | def get_message(self): 53 | string = self.extra_repr() 54 | for i, cell in enumerate(self.cells): 55 | string += '\n {:02d}/{:02d} :: {:}'.format(i, len(self.cells), cell.extra_repr()) 56 | return string 57 | 58 | def extra_repr(self): 59 | return ('{name}(C={_C}, N={_layerN}, L={_Layer})'.format(name=self.__class__.__name__, **self.__dict__)) 60 | 61 | def forward(self, inputs): 62 | stem_feature, logits_aux = self.stem(inputs), None 63 | cell_results = [stem_feature, stem_feature] 64 | for i, cell in enumerate(self.cells): 65 | cell_feature = cell(cell_results[-2], cell_results[-1], self.drop_path_prob) 66 | cell_results.append( cell_feature ) 67 | if self.auxiliary_index is not None and i == self.auxiliary_index and self.training: 68 | logits_aux = self.auxiliary_head( cell_results[-1] ) 69 | out = self.lastact(cell_results[-1]) 70 | out = self.global_pooling( out ) 71 | out = out.view(out.size(0), -1) 72 | logits = self.classifier(out) 73 | if logits_aux is None: return out, logits 74 | else: return out, [logits, logits_aux] 75 | -------------------------------------------------------------------------------- /evals/ray_tune/models/nasbench/cell_infers/tiny_network.py: -------------------------------------------------------------------------------- 1 | ##################################################### 2 | # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 # 3 | ##################################################### 4 | import torch.nn as nn 5 | 6 | from ..cell_operations import ResNetBasicblock 7 | from .cells import InferCell 8 | 9 | 10 | # The macro structure for architectures in NAS-Bench-201 11 | class TinyNetwork(nn.Module): 12 | 13 | def __init__(self, C, N, genotype, num_classes): 14 | super(TinyNetwork, self).__init__() 15 | self._C = C 16 | self._layerN = N 17 | 18 | self.stem = nn.Sequential( 19 | nn.Conv2d(3, C, kernel_size=3, padding=1, bias=False), 20 | nn.BatchNorm2d(C)) 21 | 22 | layer_channels = [C ] * N + [C*2 ] + [C*2 ] * N + [C*4 ] + [C*4 ] * N 23 | layer_reductions = [False] * N + [True] + [False] * N + [True] + [False] * N 24 | 25 | C_prev = C 26 | self.cells = nn.ModuleList() 27 | for index, (C_curr, reduction) in enumerate(zip(layer_channels, layer_reductions)): 28 | if reduction: 29 | cell = ResNetBasicblock(C_prev, C_curr, 2, True) 30 | else: 31 | cell = InferCell(genotype, C_prev, C_curr, 1) 32 | self.cells.append( cell ) 33 | C_prev = cell.out_dim 34 | self._Layer= len(self.cells) 35 | 36 | self.lastact = nn.Sequential(nn.BatchNorm2d(C_prev), nn.ReLU(inplace=True)) 37 | self.global_pooling = nn.AdaptiveAvgPool2d(1) 38 | self.classifier = nn.Linear(C_prev, num_classes) 39 | 40 | def get_message(self): 41 | string = self.extra_repr() 42 | for i, cell in enumerate(self.cells): 43 | string += '\n {:02d}/{:02d} :: {:}'.format(i, len(self.cells), cell.extra_repr()) 44 | return string 45 | 46 | def extra_repr(self): 47 | return ('{name}(C={_C}, N={_layerN}, L={_Layer})'.format(name=self.__class__.__name__, **self.__dict__)) 48 | 49 | def forward(self, inputs): 50 | feature = self.stem(inputs) 51 | for i, cell in enumerate(self.cells): 52 | feature = cell(feature) 53 | 54 | out = self.lastact(feature) 55 | out = self.global_pooling( out ) 56 | out = out.view(out.size(0), -1) 57 | logits = self.classifier(out) 58 | 59 | return out, logits 60 | -------------------------------------------------------------------------------- /evals/ray_tune/models/nasbench/configure_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | import json 8 | import os 9 | from collections import namedtuple 10 | from os import path as osp 11 | from pathlib import Path 12 | 13 | support_types = ('str', 'int', 'bool', 'float', 'none') 14 | 15 | 16 | def convert_param(original_lists): 17 | assert isinstance(original_lists, list), 'The type is not right : {:}'.format(original_lists) 18 | ctype, value = original_lists[0], original_lists[1] 19 | assert ctype in support_types, 'Ctype={:}, support={:}'.format(ctype, support_types) 20 | is_list = isinstance(value, list) 21 | if not is_list: value = [value] 22 | outs = [] 23 | for x in value: 24 | if ctype == 'int': 25 | x = int(x) 26 | elif ctype == 'str': 27 | x = str(x) 28 | elif ctype == 'bool': 29 | x = bool(int(x)) 30 | elif ctype == 'float': 31 | x = float(x) 32 | elif ctype == 'none': 33 | if x.lower() != 'none': 34 | raise ValueError('For the none type, the value must be none instead of {:}'.format(x)) 35 | x = None 36 | else: 37 | raise TypeError('Does not know this type : {:}'.format(ctype)) 38 | outs.append(x) 39 | if not is_list: outs = outs[0] 40 | return outs 41 | 42 | 43 | def load_config(path, extra, logger): 44 | path = str(path) 45 | if hasattr(logger, 'log'): logger.log(path) 46 | assert os.path.exists(path), 'Can not find {:}'.format(path) 47 | # Reading data back 48 | with open(path, 'r') as f: 49 | data = json.load(f) 50 | content = { k: convert_param(v) for k,v in data.items()} 51 | assert extra is None or isinstance(extra, dict), 'invalid type of extra : {:}'.format(extra) 52 | if isinstance(extra, dict): content = {**content, **extra} 53 | Arguments = namedtuple('Configure', ' '.join(content.keys())) 54 | content = Arguments(**content) 55 | if hasattr(logger, 'log'): logger.log('{:}'.format(content)) 56 | return content 57 | 58 | 59 | def configure2str(config, xpath=None): 60 | if not isinstance(config, dict): 61 | config = config._asdict() 62 | def cstring(x): 63 | return "\"{:}\"".format(x) 64 | def gtype(x): 65 | if isinstance(x, list): x = x[0] 66 | if isinstance(x, str) : return 'str' 67 | elif isinstance(x, bool) : return 'bool' 68 | elif isinstance(x, int): return 'int' 69 | elif isinstance(x, float): return 'float' 70 | elif x is None : return 'none' 71 | else: raise ValueError('invalid : {:}'.format(x)) 72 | def cvalue(x, xtype): 73 | if isinstance(x, list): is_list = True 74 | else: 75 | is_list, x = False, [x] 76 | temps = [] 77 | for temp in x: 78 | if xtype == 'bool' : temp = cstring(int(temp)) 79 | elif xtype == 'none': temp = cstring('None') 80 | else : temp = cstring(temp) 81 | temps.append( temp ) 82 | if is_list: 83 | return "[{:}]".format( ', '.join( temps ) ) 84 | else: 85 | return temps[0] 86 | 87 | xstrings = [] 88 | for key, value in config.items(): 89 | xtype = gtype(value) 90 | string = ' {:20s} : [{:8s}, {:}]'.format(cstring(key), cstring(xtype), cvalue(value, xtype)) 91 | xstrings.append(string) 92 | Fstring = '{\n' + ',\n'.join(xstrings) + '\n}' 93 | if xpath is not None: 94 | parent = Path(xpath).resolve().parent 95 | parent.mkdir(parents=True, exist_ok=True) 96 | if osp.isfile(xpath): os.remove(xpath) 97 | with open(xpath, "w") as text_file: 98 | text_file.write('{:}'.format(Fstring)) 99 | return Fstring 100 | 101 | 102 | def dict2config(xdict, logger): 103 | assert isinstance(xdict, dict), 'invalid type : {:}'.format( type(xdict) ) 104 | Arguments = namedtuple('Configure', ' '.join(xdict.keys())) 105 | content = Arguments(**xdict) 106 | if hasattr(logger, 'log'): logger.log('{:}'.format(content)) 107 | return content 108 | -------------------------------------------------------------------------------- /evals/ray_tune/models/torchcv/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SymbioticLab/ModelKeeper/9212bc79bfc4a271e6120c410bb9fb89cb151486/evals/ray_tune/models/torchcv/__init__.py -------------------------------------------------------------------------------- /evals/ray_tune/models/torchcv/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SymbioticLab/ModelKeeper/9212bc79bfc4a271e6120c410bb9fb89cb151486/evals/ray_tune/models/torchcv/models/__init__.py -------------------------------------------------------------------------------- /evals/ray_tune/models/torchcv/models/jasperdr.py: -------------------------------------------------------------------------------- 1 | """ 2 | Jasper DR (Dense Residual) for ASR, implemented in PyTorch. 3 | Original paper: 'Jasper: An End-to-End Convolutional Neural Acoustic Model,' https://arxiv.org/abs/1904.03288. 4 | """ 5 | 6 | __all__ = ['jasperdr10x5_en', 'jasperdr10x5_en_nr'] 7 | 8 | from .jasper import get_jasper 9 | 10 | 11 | def jasperdr10x5_en(num_classes=29, **kwargs): 12 | """ 13 | Jasper DR 10x5 model for English language from 'Jasper: An End-to-End Convolutional Neural Acoustic Model,' 14 | https://arxiv.org/abs/1904.03288. 15 | 16 | Parameters: 17 | ---------- 18 | num_classes : int, default 29 19 | Number of classification classes (number of graphemes). 20 | pretrained : bool, default False 21 | Whether to load the pretrained weights for model. 22 | root : str, default '~/.torch/models' 23 | Location for keeping the model parameters. 24 | """ 25 | return get_jasper(num_classes=num_classes, version=("jasper", "10x5"), use_dr=True, model_name="jasperdr10x5_en", 26 | **kwargs) 27 | 28 | 29 | def jasperdr10x5_en_nr(num_classes=29, **kwargs): 30 | """ 31 | Jasper DR 10x5 model for English language (with presence of noise) from 'Jasper: An End-to-End Convolutional Neural 32 | Acoustic Model,' https://arxiv.org/abs/1904.03288. 33 | 34 | Parameters: 35 | ---------- 36 | num_classes : int, default 29 37 | Number of classification classes (number of graphemes). 38 | pretrained : bool, default False 39 | Whether to load the pretrained weights for model. 40 | root : str, default '~/.torch/models' 41 | Location for keeping the model parameters. 42 | """ 43 | return get_jasper(num_classes=num_classes, version=("jasper", "10x5"), use_dr=True, model_name="jasperdr10x5_en_nr", 44 | **kwargs) 45 | 46 | 47 | def _calc_width(net): 48 | import numpy as np 49 | net_params = filter(lambda p: p.requires_grad, net.parameters()) 50 | weight_count = 0 51 | for param in net_params: 52 | weight_count += np.prod(param.size()) 53 | return weight_count 54 | 55 | 56 | def _test(): 57 | import numpy as np 58 | import torch 59 | 60 | pretrained = False 61 | audio_features = 64 62 | 63 | models = [ 64 | jasperdr10x5_en, 65 | jasperdr10x5_en_nr, 66 | ] 67 | 68 | for model in models: 69 | 70 | net = model( 71 | in_channels=audio_features, 72 | pretrained=pretrained) 73 | 74 | # net.train() 75 | net.eval() 76 | weight_count = _calc_width(net) 77 | print("m={}, {}".format(model.__name__, weight_count)) 78 | assert (model != jasperdr10x5_en or weight_count == 332632349) 79 | assert (model != jasperdr10x5_en_nr or weight_count == 332632349) 80 | 81 | batch = 3 82 | seq_len = np.random.randint(60, 150, batch) 83 | seq_len_max = seq_len.max() + 2 84 | x = torch.randn(batch, audio_features, seq_len_max) 85 | x_len = torch.tensor(seq_len, dtype=torch.long, device=x.device) 86 | 87 | y, y_len = net(x, x_len) 88 | # y.sum().backward() 89 | assert (tuple(y.size())[:2] == (batch, net.num_classes)) 90 | assert (y.size()[2] in [seq_len_max // 2, seq_len_max // 2 + 1]) 91 | 92 | 93 | if __name__ == "__main__": 94 | _test() 95 | -------------------------------------------------------------------------------- /evals/ray_tune/models/torchcv/models/mobilenetb.py: -------------------------------------------------------------------------------- 1 | """ 2 | MobileNet(B) with simplified depthwise separable convolution block for ImageNet-1K, implemented in Gluon. 3 | Original paper: 'MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications,' 4 | https://arxiv.org/abs/1704.04861. 5 | """ 6 | 7 | __all__ = ['mobilenetb_w1', 'mobilenetb_w3d4', 'mobilenetb_wd2', 'mobilenetb_wd4'] 8 | 9 | from .mobilenet import get_mobilenet 10 | 11 | 12 | def mobilenetb_w1(**kwargs): 13 | """ 14 | 1.0 MobileNet(B)-224 model with simplified depthwise separable convolution block from 'MobileNets: Efficient 15 | Convolutional Neural Networks for Mobile Vision Applications,' https://arxiv.org/abs/1704.04861. 16 | 17 | Parameters: 18 | ---------- 19 | pretrained : bool, default False 20 | Whether to load the pretrained weights for model. 21 | root : str, default '~/.torch/models' 22 | Location for keeping the model parameters. 23 | """ 24 | return get_mobilenet(width_scale=1.0, dws_simplified=True, model_name="mobilenetb_w1", **kwargs) 25 | 26 | 27 | def mobilenetb_w3d4(**kwargs): 28 | """ 29 | 0.75 MobileNet(B)-224 model with simplified depthwise separable convolution block from 'MobileNets: Efficient 30 | Convolutional Neural Networks for Mobile Vision Applications,' https://arxiv.org/abs/1704.04861. 31 | 32 | Parameters: 33 | ---------- 34 | pretrained : bool, default False 35 | Whether to load the pretrained weights for model. 36 | root : str, default '~/.torch/models' 37 | Location for keeping the model parameters. 38 | """ 39 | return get_mobilenet(width_scale=0.75, dws_simplified=True, model_name="mobilenetb_w3d4", **kwargs) 40 | 41 | 42 | def mobilenetb_wd2(**kwargs): 43 | """ 44 | 0.5 MobileNet(B)-224 model with simplified depthwise separable convolution block from 'MobileNets: Efficient 45 | Convolutional Neural Networks for Mobile Vision Applications,' https://arxiv.org/abs/1704.04861. 46 | 47 | Parameters: 48 | ---------- 49 | pretrained : bool, default False 50 | Whether to load the pretrained weights for model. 51 | root : str, default '~/.torch/models' 52 | Location for keeping the model parameters. 53 | """ 54 | return get_mobilenet(width_scale=0.5, dws_simplified=True, model_name="mobilenetb_wd2", **kwargs) 55 | 56 | 57 | def mobilenetb_wd4(**kwargs): 58 | """ 59 | 0.25 MobileNet(B)-224 model with simplified depthwise separable convolution block from 'MobileNets: Efficient 60 | Convolutional Neural Networks for Mobile Vision Applications,' https://arxiv.org/abs/1704.04861. 61 | 62 | Parameters: 63 | ---------- 64 | pretrained : bool, default False 65 | Whether to load the pretrained weights for model. 66 | root : str, default '~/.torch/models' 67 | Location for keeping the model parameters. 68 | """ 69 | return get_mobilenet(width_scale=0.25, dws_simplified=True, model_name="mobilenetb_wd4", **kwargs) 70 | 71 | 72 | def _calc_width(net): 73 | import numpy as np 74 | net_params = filter(lambda p: p.requires_grad, net.parameters()) 75 | weight_count = 0 76 | for param in net_params: 77 | weight_count += np.prod(param.size()) 78 | return weight_count 79 | 80 | 81 | def _test(): 82 | import torch 83 | 84 | pretrained = False 85 | 86 | models = [ 87 | mobilenetb_w1, 88 | mobilenetb_w3d4, 89 | mobilenetb_wd2, 90 | mobilenetb_wd4, 91 | ] 92 | 93 | for model in models: 94 | 95 | net = model(pretrained=pretrained) 96 | 97 | # net.train() 98 | net.eval() 99 | weight_count = _calc_width(net) 100 | print("m={}, {}".format(model.__name__, weight_count)) 101 | assert (model != mobilenetb_w1 or weight_count == 4222056) 102 | assert (model != mobilenetb_w3d4 or weight_count == 2578120) 103 | assert (model != mobilenetb_wd2 or weight_count == 1326632) 104 | assert (model != mobilenetb_wd4 or weight_count == 467592) 105 | 106 | x = torch.randn(1, 3, 224, 224) 107 | y = net(x) 108 | y.sum().backward() 109 | assert (tuple(y.size()) == (1, 1000)) 110 | 111 | 112 | if __name__ == "__main__": 113 | _test() 114 | -------------------------------------------------------------------------------- /evals/ray_tune/models/torchcv/models/others/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SymbioticLab/ModelKeeper/9212bc79bfc4a271e6120c410bb9fb89cb151486/evals/ray_tune/models/torchcv/models/others/__init__.py -------------------------------------------------------------------------------- /evals/ray_tune/models/torchcv/models/proxylessnas_cub.py: -------------------------------------------------------------------------------- 1 | """ 2 | ProxylessNAS for CUB-200-2011, implemented in Gluon. 3 | Original paper: 'ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware,' 4 | https://arxiv.org/abs/1812.00332. 5 | """ 6 | 7 | __all__ = ['proxylessnas_cpu_cub', 'proxylessnas_gpu_cub', 'proxylessnas_mobile_cub', 'proxylessnas_mobile14_cub'] 8 | 9 | from .proxylessnas import get_proxylessnas 10 | 11 | 12 | def proxylessnas_cpu_cub(num_classes=200, **kwargs): 13 | """ 14 | ProxylessNAS (CPU) model for CUB-200-2011 from 'ProxylessNAS: Direct Neural Architecture Search on Target Task and 15 | Hardware,' https://arxiv.org/abs/1812.00332. 16 | 17 | Parameters: 18 | ---------- 19 | num_classes : int, default 200 20 | Number of classification classes. 21 | pretrained : bool, default False 22 | Whether to load the pretrained weights for model. 23 | root : str, default '~/.torch/models' 24 | Location for keeping the model parameters. 25 | """ 26 | return get_proxylessnas(num_classes=num_classes, version="cpu", model_name="proxylessnas_cpu_cub", **kwargs) 27 | 28 | 29 | def proxylessnas_gpu_cub(num_classes=200, **kwargs): 30 | """ 31 | ProxylessNAS (GPU) model for CUB-200-2011 from 'ProxylessNAS: Direct Neural Architecture Search on Target Task and 32 | Hardware,' https://arxiv.org/abs/1812.00332. 33 | 34 | Parameters: 35 | ---------- 36 | num_classes : int, default 200 37 | Number of classification classes. 38 | pretrained : bool, default False 39 | Whether to load the pretrained weights for model. 40 | root : str, default '~/.torch/models' 41 | Location for keeping the model parameters. 42 | """ 43 | return get_proxylessnas(num_classes=num_classes, version="gpu", model_name="proxylessnas_gpu_cub", **kwargs) 44 | 45 | 46 | def proxylessnas_mobile_cub(num_classes=200, **kwargs): 47 | """ 48 | ProxylessNAS (Mobile) model for CUB-200-2011 from 'ProxylessNAS: Direct Neural Architecture Search on Target Task 49 | and Hardware,' https://arxiv.org/abs/1812.00332. 50 | 51 | Parameters: 52 | ---------- 53 | num_classes : int, default 200 54 | Number of classification classes. 55 | pretrained : bool, default False 56 | Whether to load the pretrained weights for model. 57 | root : str, default '~/.torch/models' 58 | Location for keeping the model parameters. 59 | """ 60 | return get_proxylessnas(num_classes=num_classes, version="mobile", model_name="proxylessnas_mobile_cub", **kwargs) 61 | 62 | 63 | def proxylessnas_mobile14_cub(num_classes=200, **kwargs): 64 | """ 65 | ProxylessNAS (Mobile-14) model for CUB-200-2011 from 'ProxylessNAS: Direct Neural Architecture Search on Target Task 66 | and Hardware,' https://arxiv.org/abs/1812.00332. 67 | 68 | Parameters: 69 | ---------- 70 | num_classes : int, default 200 71 | Number of classification classes. 72 | pretrained : bool, default False 73 | Whether to load the pretrained weights for model. 74 | root : str, default '~/.torch/models' 75 | Location for keeping the model parameters. 76 | """ 77 | return get_proxylessnas(num_classes=num_classes, version="mobile14", model_name="proxylessnas_mobile14_cub", 78 | **kwargs) 79 | 80 | 81 | def _calc_width(net): 82 | import numpy as np 83 | net_params = filter(lambda p: p.requires_grad, net.parameters()) 84 | weight_count = 0 85 | for param in net_params: 86 | weight_count += np.prod(param.size()) 87 | return weight_count 88 | 89 | 90 | def _test(): 91 | import torch 92 | 93 | pretrained = False 94 | 95 | models = [ 96 | proxylessnas_cpu_cub, 97 | proxylessnas_gpu_cub, 98 | proxylessnas_mobile_cub, 99 | proxylessnas_mobile14_cub, 100 | ] 101 | 102 | for model in models: 103 | 104 | net = model(pretrained=pretrained) 105 | 106 | # net.train() 107 | net.eval() 108 | weight_count = _calc_width(net) 109 | print("m={}, {}".format(model.__name__, weight_count)) 110 | assert (model != proxylessnas_cpu_cub or weight_count == 3215248) 111 | assert (model != proxylessnas_gpu_cub or weight_count == 5736648) 112 | assert (model != proxylessnas_mobile_cub or weight_count == 3055712) 113 | assert (model != proxylessnas_mobile14_cub or weight_count == 5423168) 114 | 115 | x = torch.randn(14, 3, 224, 224) 116 | y = net(x) 117 | y.sum().backward() 118 | assert (tuple(y.size()) == (14, 200)) 119 | 120 | 121 | if __name__ == "__main__": 122 | _test() 123 | -------------------------------------------------------------------------------- /evals/ray_tune/models/torchcv/models/zfnet.py: -------------------------------------------------------------------------------- 1 | """ 2 | ZFNet for ImageNet-1K, implemented in PyTorch. 3 | Original paper: 'Visualizing and Understanding Convolutional Networks,' https://arxiv.org/abs/1311.2901. 4 | """ 5 | 6 | __all__ = ['zfnet', 'zfnetb'] 7 | 8 | import os 9 | 10 | from .alexnet import AlexNet 11 | 12 | 13 | def get_zfnet(version="a", 14 | model_name=None, 15 | pretrained=False, 16 | root=os.path.join("~", ".torch", "models"), 17 | **kwargs): 18 | """ 19 | Create ZFNet model with specific parameters. 20 | 21 | Parameters: 22 | ---------- 23 | version : str, default 'a' 24 | Version of ZFNet ('a' or 'b'). 25 | model_name : str or None, default None 26 | Model name for loading pretrained model. 27 | pretrained : bool, default False 28 | Whether to load the pretrained weights for model. 29 | root : str, default '~/.torch/models' 30 | Location for keeping the model parameters. 31 | """ 32 | if version == "a": 33 | channels = [[96], [256], [384, 384, 256]] 34 | kernel_sizes = [[7], [5], [3, 3, 3]] 35 | strides = [[2], [2], [1, 1, 1]] 36 | paddings = [[1], [0], [1, 1, 1]] 37 | use_lrn = True 38 | elif version == "b": 39 | channels = [[96], [256], [512, 1024, 512]] 40 | kernel_sizes = [[7], [5], [3, 3, 3]] 41 | strides = [[2], [2], [1, 1, 1]] 42 | paddings = [[1], [0], [1, 1, 1]] 43 | use_lrn = True 44 | else: 45 | raise ValueError("Unsupported ZFNet version {}".format(version)) 46 | 47 | net = AlexNet( 48 | channels=channels, 49 | kernel_sizes=kernel_sizes, 50 | strides=strides, 51 | paddings=paddings, 52 | use_lrn=use_lrn, 53 | **kwargs) 54 | 55 | if pretrained: 56 | if (model_name is None) or (not model_name): 57 | raise ValueError("Parameter `model_name` should be properly initialized for loading pretrained model.") 58 | from .model_store import download_model 59 | download_model( 60 | net=net, 61 | model_name=model_name, 62 | local_model_store_dir_path=root) 63 | 64 | return net 65 | 66 | 67 | def zfnet(**kwargs): 68 | """ 69 | ZFNet model from 'Visualizing and Understanding Convolutional Networks,' https://arxiv.org/abs/1311.2901. 70 | 71 | Parameters: 72 | ---------- 73 | pretrained : bool, default False 74 | Whether to load the pretrained weights for model. 75 | root : str, default '~/.torch/models' 76 | Location for keeping the model parameters. 77 | """ 78 | return get_zfnet(model_name="zfnet", **kwargs) 79 | 80 | 81 | def zfnetb(**kwargs): 82 | """ 83 | ZFNet-b model from 'Visualizing and Understanding Convolutional Networks,' https://arxiv.org/abs/1311.2901. 84 | 85 | Parameters: 86 | ---------- 87 | pretrained : bool, default False 88 | Whether to load the pretrained weights for model. 89 | root : str, default '~/.torch/models' 90 | Location for keeping the model parameters. 91 | """ 92 | return get_zfnet(version="b", model_name="zfnetb", **kwargs) 93 | 94 | 95 | def _calc_width(net): 96 | import numpy as np 97 | net_params = filter(lambda p: p.requires_grad, net.parameters()) 98 | weight_count = 0 99 | for param in net_params: 100 | weight_count += np.prod(param.size()) 101 | return weight_count 102 | 103 | 104 | def _test(): 105 | import torch 106 | 107 | pretrained = False 108 | 109 | models = [ 110 | zfnet, 111 | zfnetb, 112 | ] 113 | 114 | for model in models: 115 | 116 | net = model(pretrained=pretrained) 117 | 118 | # net.train() 119 | net.eval() 120 | weight_count = _calc_width(net) 121 | print("m={}, {}".format(model.__name__, weight_count)) 122 | assert (model != zfnet or weight_count == 62357608) 123 | assert (model != zfnetb or weight_count == 107627624) 124 | 125 | x = torch.randn(1, 3, 224, 224) 126 | y = net(x) 127 | y.sum().backward() 128 | assert (tuple(y.size()) == (1, 1000)) 129 | 130 | 131 | if __name__ == "__main__": 132 | _test() 133 | -------------------------------------------------------------------------------- /evals/ray_tune/models/vgg.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch.nn as nn 4 | import torch.nn.init as init 5 | 6 | __all__ = [ 7 | 'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 8 | 'vgg19_bn', 'vgg19', 'make_layers', 'vgg_zoo' 9 | ] 10 | 11 | 12 | class VGG(nn.Module): 13 | ''' 14 | VGG model 15 | ''' 16 | def __init__(self, features): 17 | super(VGG, self).__init__() 18 | self.features, self.classifier = features 19 | # Initialize weights 20 | for m in self.modules(): 21 | if isinstance(m, nn.Conv2d): 22 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 23 | m.weight.data.normal_(0, math.sqrt(2. / n)) 24 | m.bias.data.zero_() 25 | 26 | 27 | def forward(self, x): 28 | x = self.features(x) 29 | x = x.view(x.size(0), -1) 30 | x = self.classifier(x) 31 | return x 32 | 33 | 34 | def make_layers(cfg, batch_norm=False, k = 3, num_of_class = 10): 35 | layers = [] 36 | in_channels = 3 37 | for v in cfg: 38 | if v == 'M': 39 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 40 | else: 41 | conv2d = nn.Conv2d(in_channels, v, kernel_size=k, padding=int((k-1)/2)) 42 | if batch_norm: 43 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] 44 | else: 45 | layers += [conv2d, nn.ReLU(inplace=True)] 46 | in_channels = v 47 | classifier = nn.Sequential( 48 | nn.Dropout(), 49 | nn.Linear(in_channels, 512), 50 | nn.ReLU(True), 51 | nn.Dropout(), 52 | nn.Linear(512, 512), 53 | nn.ReLU(True), 54 | nn.Linear(512, num_of_class), 55 | ) 56 | return nn.Sequential(*layers), classifier 57 | 58 | 59 | cfg = { 60 | 'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 61 | 'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 62 | 'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 63 | 'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 64 | 512, 512, 512, 512, 'M'], 65 | } 66 | 67 | 68 | def vgg11(): 69 | """VGG 11-layer model (configuration "A")""" 70 | return VGG(make_layers(cfg['A'])) 71 | 72 | 73 | def vgg11_bn(): 74 | """VGG 11-layer model (configuration "A") with batch normalization""" 75 | return VGG(make_layers(cfg['A'], batch_norm=True)) 76 | 77 | 78 | def vgg13(): 79 | """VGG 13-layer model (configuration "B")""" 80 | return VGG(make_layers(cfg['B'])) 81 | 82 | 83 | def vgg13_bn(): 84 | """VGG 13-layer model (configuration "B") with batch normalization""" 85 | return VGG(make_layers(cfg['B'], batch_norm=True)) 86 | 87 | 88 | def vgg16(): 89 | """VGG 16-layer model (configuration "D")""" 90 | return VGG(make_layers(cfg['D'])) 91 | 92 | 93 | def vgg16_bn(): 94 | """VGG 16-layer model (configuration "D") with batch normalization""" 95 | return VGG(make_layers(cfg['D'], batch_norm=True)) 96 | 97 | 98 | def vgg19(): 99 | """VGG 19-layer model (configuration "E")""" 100 | return VGG(make_layers(cfg['E'])) 101 | 102 | 103 | def vgg19_bn(): 104 | """VGG 19-layer model (configuration 'E') with batch normalization""" 105 | return VGG(make_layers(cfg['E'], batch_norm=True)) 106 | 107 | 108 | def vgg_zoo(): 109 | """VGG 16 model zoo""" 110 | 111 | config_list = [] 112 | init = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'] 113 | filter_num = [1.25, 1.5, 1.75, 2] 114 | k_size = [3, 5] 115 | 116 | counter = 0 117 | for num in filter_num: 118 | temp = [i for i in init] 119 | for j in range(len(init)): 120 | if temp[j] != "M": 121 | temp[j] = int(temp[j]*num) 122 | counter += 1 123 | for k in k_size: 124 | config_list.append(([t for t in temp], k)) 125 | return config_list 126 | 127 | #vggzoo = vgg_zoo() 128 | #print(vggzoo) 129 | #for i in range(len(vggzoo)): 130 | # for j in range(i+1, len(vggzoo)): 131 | # assert (vggzoo[i] != vggzoo[j]) 132 | # 133 | #print(len(vggzoo)) 134 | -------------------------------------------------------------------------------- /evals/ray_tune/onlinescheduler.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | from typing import Dict, Optional 4 | 5 | from ray.tune import trial_runner 6 | from ray.tune.schedulers import TrialScheduler 7 | from ray.tune.trial import Trial 8 | 9 | 10 | class OnlineScheduler(TrialScheduler): 11 | """Simple scheduler that just runs trials in submission order.""" 12 | 13 | def __init__(self, scheduler): 14 | 15 | self.scheduler = scheduler 16 | self.start_time = time.time() 17 | 18 | def on_trial_add(self, trial_runner: "trial_runner.TrialRunner", 19 | trial: Trial): 20 | return self.scheduler.on_trial_add(trial_runner, trial) 21 | 22 | def on_trial_error(self, trial_runner: "trial_runner.TrialRunner", 23 | trial: Trial): 24 | return self.scheduler.on_trial_error(trial_runner, trial) 25 | 26 | def on_trial_result(self, trial_runner: "trial_runner.TrialRunner", 27 | trial: Trial, result: Dict) -> str: 28 | return self.scheduler.on_trial_result(trial_runner, trial, result) 29 | 30 | def on_trial_complete(self, trial_runner: "trial_runner.TrialRunner", 31 | trial: Trial, result: Dict): 32 | return self.scheduler.on_trial_complete(trial_runner, trial, result) 33 | 34 | def on_trial_remove(self, trial_runner: "trial_runner.TrialRunner", 35 | trial: Trial): 36 | return self.scheduler.on_trial_remove(trial_runner, trial) 37 | 38 | def choose_trial_to_run( 39 | self, trial_runner: "trial_runner.TrialRunner") -> Optional[Trial]: 40 | 41 | # trial = self.scheduler.choose_trial_to_run(trial_runner) 42 | 43 | # if trial is None: 44 | # return trial 45 | # # get submission time 46 | # arrival_time = trial.config.get('config', {}).get('arrival', 0) 47 | # #job_name = trial.config.get('config', {}).get('name', None) 48 | # pending_time = arrival_time - (time.time() - self.start_time) 49 | 50 | # #logging.info(f"Supposed to submit job {job_name} at {arrival_time}, now is {time.time()-self.start_time}") 51 | # if pending_time > 0: 52 | # time.sleep(pending_time) 53 | 54 | # #logging.info(f"Submit job {job_name} at {time.time()-self.start_time}, Supposed at {arrival_time}") 55 | # return trial 56 | 57 | for trial in trial_runner.get_trials(): 58 | if (trial.status == Trial.PENDING and trial_runner.has_resources_for_trial(trial)): 59 | arrival_time = trial.config.get('config', {}).get('arrival', 0) 60 | #job_name = trial.config.get('config', {}).get('name', None) 61 | pending_time = arrival_time - (time.time() - self.start_time) 62 | if pending_time<0: 63 | return trial 64 | for trial in trial_runner.get_trials(): 65 | if (trial.status == Trial.PAUSED and trial_runner.has_resources_for_trial(trial)): 66 | arrival_time = trial.config.get('config', {}).get('arrival', 0) 67 | #job_name = trial.config.get('config', {}).get('name', None) 68 | pending_time = arrival_time - (time.time() - self.start_time) 69 | if pending_time<0: 70 | return trial 71 | return None 72 | 73 | def debug_string(self) -> str: 74 | return self.scheduler.debug_string() 75 | -------------------------------------------------------------------------------- /evals/ray_tune/setup/cluster_manager.py: -------------------------------------------------------------------------------- 1 | # Submit job to the remote cluster 2 | 3 | import datetime 4 | import os 5 | import pickle 6 | import random 7 | import subprocess 8 | import sys 9 | import time 10 | 11 | import yaml 12 | 13 | 14 | def load_yaml_conf(yaml_file): 15 | with open(yaml_file) as fin: 16 | data = yaml.load(fin, Loader=yaml.FullLoader) 17 | return data 18 | 19 | def process_cmd(yaml_file): 20 | 21 | yaml_conf = load_yaml_conf(yaml_file) 22 | 23 | master_ip = yaml_conf['master_ip'] 24 | worker_ips, total_gpus = [], [] 25 | cmd_script_list = [] 26 | 27 | executor_configs = ";".join(yaml_conf['worker_ips']) 28 | for ip_gpu in yaml_conf['worker_ips']: 29 | ip, num_gpu = ip_gpu.strip().split(':') 30 | worker_ips.append(ip) 31 | total_gpus.append(num_gpu) 32 | 33 | time_stamp = datetime.datetime.fromtimestamp(time.time()).strftime('%m%d_%H%M%S') 34 | running_vms = set() 35 | job_name = 'modelkeeper' 36 | log_path = './logs' 37 | submit_user = f"{yaml_conf['auth']['ssh_user']}@" if len(yaml_conf['auth']['ssh_user']) else "" 38 | 39 | job_conf = {'time_stamp':time_stamp, 40 | 'master_ip':master_ip, 41 | } 42 | 43 | for conf in yaml_conf['job_conf']: 44 | job_conf.update(conf) 45 | 46 | conf_script = '' 47 | setup_cmd = '' 48 | if yaml_conf['setup_commands'] is not None: 49 | setup_cmd += (yaml_conf['setup_commands'][0] + ' && ') 50 | for item in yaml_conf['setup_commands'][1:]: 51 | setup_cmd += (item + ' && ') 52 | 53 | cmd_sufix = f" " 54 | 55 | 56 | for conf_name in job_conf: 57 | conf_script = conf_script + f' --{conf_name}={job_conf[conf_name]}' 58 | if conf_name == "job_name": 59 | job_name = job_conf[conf_name] 60 | if conf_name == "log_path": 61 | log_path = os.path.join(job_conf[conf_name], 'log', job_name, time_stamp) 62 | 63 | # =========== Submit job to parameter server ============ 64 | running_vms.add(master_ip) 65 | ps_cmd = f'ray start --head --address={master_ip}:6379 --redis-password="5241590000000000" --num-cpus=1 --num-gpus=0 && sleep 240h' 66 | 67 | with open(f"{job_name}_logging", 'wb') as fout: 68 | pass 69 | 70 | print(f"Starting master on {master_ip}...") 71 | with open(f"{job_name}_logging", 'a') as fout: 72 | subprocess.Popen(f'ssh {submit_user}{master_ip} "{setup_cmd} {ps_cmd}"', 73 | shell=True, stdout=fout, stderr=fout) 74 | 75 | time.sleep(5) 76 | # =========== Submit job to each worker ============ 77 | rank_id = 1 78 | for worker, n_gpu in zip(worker_ips, total_gpus): 79 | running_vms.add(worker) 80 | print(f"Starting workers on {worker} ...") 81 | 82 | worker_cmd = f'ray start --address={master_ip}:6379 --redis-password="5241590000000000" --num-cpus={10} --num-gpus={n_gpu} && sleep 240h' 83 | 84 | with open(f"{job_name}_logging", 'a') as fout: 85 | time.sleep(0.5) 86 | subprocess.Popen(f'ssh {submit_user}{worker} "{setup_cmd} {worker_cmd}"', 87 | shell=True, stdout=fout, stderr=fout) 88 | 89 | # dump the address of running workers 90 | current_path = os.path.dirname(os.path.abspath(__file__)) 91 | job_name = os.path.join(current_path, job_name) 92 | with open(job_name, 'wb') as fout: 93 | job_meta = {'user':submit_user, 'vms': running_vms} 94 | pickle.dump(job_meta, fout) 95 | 96 | print(f"Submitted job, please check your logs ({log_path}) for status") 97 | 98 | 99 | def terminate(job_name): 100 | 101 | current_path = os.path.dirname(os.path.abspath(__file__)) 102 | job_meta_path = os.path.join(current_path, job_name) 103 | 104 | if not os.path.isfile(job_meta_path): 105 | print(f"Fail to terminate {job_name}, as it does not exist") 106 | 107 | with open(job_meta_path, 'rb') as fin: 108 | job_meta = pickle.load(fin) 109 | 110 | for vm_ip in job_meta['vms']: 111 | # os.system(f'scp shutdown.py {job_meta["user"]}{vm_ip}:~/') 112 | print(f"Shutting down job on {vm_ip}") 113 | with open(f"{job_name}_logging", 'a') as fout: 114 | subprocess.Popen(f'ssh {job_meta["user"]}{vm_ip} "ray stop; killall sleep"', 115 | shell=True, stdout=fout, stderr=fout) 116 | 117 | 118 | if sys.argv[1] == 'init': 119 | process_cmd(sys.argv[2]) 120 | elif sys.argv[1] == 'stop': 121 | terminate(sys.argv[2]) 122 | else: 123 | print("Unknown cmds ...") 124 | -------------------------------------------------------------------------------- /evals/ray_tune/setup/conf.yml: -------------------------------------------------------------------------------- 1 | # Configuration file of Ray experiment 2 | 3 | # ========== Cluster configuration ========== 4 | # ip address of the master 5 | master_ip: 10.0.0.1 6 | 7 | # ip address of each worker:# of available gpus process on each gpu in this node 8 | worker_ips: 9 | - 10.0.0.1:4 10 | - 10.0.0.2:4 11 | - 10.0.0.3:4 12 | 13 | auth: 14 | ssh_user: "" 15 | ssh_private_key: ~/.ssh/id_rsa 16 | 17 | # cmd to run before we can indeed run Ray (in order) 18 | setup_commands: 19 | - source $HOME/experiment/anaconda3/bin/activate modelkeeper 20 | 21 | # ========== Additional job configuration ========== 22 | # Default parameters are specified in argParser.py, wherein more description of the parameter can be found 23 | 24 | job_conf: 25 | - job_name: modelkeeper # Generate logs under this folder: log_path/job_name/time_stamp 26 | -------------------------------------------------------------------------------- /evals/ray_tune/thirdparty/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SymbioticLab/ModelKeeper/9212bc79bfc4a271e6120c410bb9fb89cb151486/evals/ray_tune/thirdparty/__init__.py -------------------------------------------------------------------------------- /evals/ray_tune/thirdparty/calculate_ged.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import contextlib 3 | import itertools 4 | import json 5 | import random 6 | import time 7 | from pathlib import Path 8 | 9 | import joblib 10 | import networkx as nx 11 | from interruptingcow import Quota, timeout 12 | from joblib import Parallel, delayed 13 | from tqdm.auto import tqdm 14 | from utils import make_graph 15 | 16 | 17 | @contextlib.contextmanager 18 | def tqdm_joblib(tqdm_object): 19 | """Context manager to patch joblib to report into tqdm progress bar given as argument""" 20 | class TqdmBatchCompletionCallback: 21 | def __init__(self, time, index, parallel): 22 | self.index = index 23 | self.parallel = parallel 24 | 25 | def __call__(self, index): 26 | tqdm_object.update() 27 | if self.parallel._original_iterator is not None: 28 | self.parallel.dispatch_next() 29 | 30 | old_batch_callback = joblib.parallel.BatchCompletionCallBack 31 | joblib.parallel.BatchCompletionCallBack = TqdmBatchCompletionCallback 32 | try: 33 | yield tqdm_object 34 | finally: 35 | joblib.parallel.BatchCompletionCallBack = old_batch_callback 36 | tqdm_object.close() 37 | 38 | def calc_ged(recepie1, recepie2, timeout_val=600): 39 | start_time = time.time() 40 | G1 = make_graph(recepie1) 41 | G2 = make_graph(recepie2) 42 | ged = None 43 | 44 | try: 45 | status = "OK" 46 | with timeout(Quota(timeout_val), exception=RuntimeError): 47 | for ged in nx.optimize_graph_edit_distance(G1, G2, lambda n1, n2: n1['op'] == n2['op']): 48 | pass 49 | 50 | except RuntimeError as e: 51 | status = "Timeout" 52 | 53 | except Exception as e: 54 | status = "Exception: " + str(e) 55 | 56 | return { 57 | "recepie_i": recepie1, 58 | "recepie_j": recepie2, 59 | "ged": ged, 60 | "time": time.time() - start_time, 61 | "status": status 62 | } 63 | 64 | if __name__ == "__main__": 65 | parser = argparse.ArgumentParser(description='Calculate GED') 66 | parser.add_argument('--recepies', type=str, default="./new_recepies_fix.json", 67 | help='path to JSON file with recepies') 68 | parser.add_argument('--num', type=int, default=10, 69 | help='number of random recepies for calculating GED to all another') 70 | parser.add_argument('--timeout', type=int, default=600, help="timeout for calculating one GED value in seconds") 71 | parser.add_argument('--n_jobs', type=int, default=-2, 72 | help="n_jobs in skit learn style") 73 | parser.add_argument('--num_parts', type=int, default=10, 74 | help="Num results parts for saving") 75 | 76 | args = parser.parse_args() 77 | 78 | with open(args.recepies, "r") as f: 79 | recepies = json.load(f) 80 | 81 | key_recepies = random.sample(recepies, args.num) 82 | part_size = len(recepies)//args.num_parts 83 | for part in range(1, args.num_parts+1): 84 | _recepies = recepies[(part-1)*part_size:part*part_size] 85 | combs = list(itertools.product(key_recepies, _recepies)) 86 | 87 | with tqdm_joblib(tqdm(desc="GED part {} of {}".format(part, args.num_parts), total=len(combs))) as progress_bar: 88 | results = Parallel(n_jobs=args.n_jobs, backend='multiprocessing')(delayed(calc_ged)(r1, r2, args.timeout) for r1, r2 in combs) 89 | 90 | with open("GED_CALC_RESULTS_part_{}.json".format(part), 'w') as f: 91 | json.dump(results, f) 92 | -------------------------------------------------------------------------------- /evals/ray_tune/thirdparty/data.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import Counter 3 | 4 | import torch 5 | 6 | 7 | class Dictionary(object): 8 | def __init__(self): 9 | self.word2idx = {} 10 | self.idx2word = [] 11 | self.counter = Counter() 12 | self.total = 0 13 | 14 | def add_word(self, word): 15 | if word not in self.word2idx: 16 | self.idx2word.append(word) 17 | self.word2idx[word] = len(self.idx2word) - 1 18 | token_id = self.word2idx[word] 19 | self.counter[token_id] += 1 20 | self.total += 1 21 | return self.word2idx[word] 22 | 23 | def __len__(self): 24 | return len(self.idx2word) 25 | 26 | 27 | class Corpus(object): 28 | def __init__(self, path): 29 | self.dictionary = Dictionary() 30 | self.train = self.tokenize(os.path.join(path, 'train.txt')) 31 | self.valid = self.tokenize(os.path.join(path, 'valid.txt')) 32 | self.test = self.tokenize(os.path.join(path, 'test.txt')) 33 | 34 | def tokenize(self, path): 35 | """Tokenizes a text file.""" 36 | assert os.path.exists(path) 37 | # Add words to the dictionary 38 | with open(path, 'r') as f: 39 | tokens = 0 40 | for line in f: 41 | words = line.split() + [''] 42 | tokens += len(words) 43 | for word in words: 44 | self.dictionary.add_word(word) 45 | 46 | # Tokenize file content 47 | with open(path, 'r') as f: 48 | ids = torch.LongTensor(tokens) 49 | token = 0 50 | for line in f: 51 | words = line.split() + [''] 52 | for word in words: 53 | ids[token] = self.dictionary.word2idx[word] 54 | token += 1 55 | 56 | return ids 57 | -------------------------------------------------------------------------------- /evals/ray_tune/thirdparty/embed_regularize.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def embedded_dropout(embed, words, dropout=0.1, scale=None): 6 | if dropout: 7 | mask = embed.weight.data.new().resize_((embed.weight.size(0), 1)).bernoulli_(1 - dropout).expand_as(embed.weight) / (1 - dropout) 8 | masked_embed_weight = mask * embed.weight 9 | else: 10 | masked_embed_weight = embed.weight 11 | if scale: 12 | masked_embed_weight = scale.expand_as(masked_embed_weight) * masked_embed_weight 13 | 14 | padding_idx = embed.padding_idx 15 | if padding_idx is None: 16 | padding_idx = -1 17 | 18 | X = torch.nn.functional.embedding(words, masked_embed_weight, 19 | -1, embed.max_norm, embed.norm_type, 20 | embed.scale_grad_by_freq, embed.sparse 21 | ) 22 | return X 23 | 24 | if __name__ == '__main__': 25 | V = 50 26 | h = 4 27 | bptt = 10 28 | batch_size = 2 29 | 30 | embed = torch.nn.Embedding(V, h) 31 | 32 | words = np.random.random_integers(low=0, high=V-1, size=(batch_size, bptt)) 33 | words = torch.LongTensor(words) 34 | 35 | origX = embed(words) 36 | X = embedded_dropout(embed, words) 37 | 38 | print(origX) 39 | print(X) 40 | 41 | -------------------------------------------------------------------------------- /evals/ray_tune/thirdparty/locked_dropout.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | 5 | 6 | class LockedDropout(nn.Module): 7 | def __init__(self): 8 | super().__init__() 9 | 10 | def forward(self, x, dropout=0.5): 11 | if not self.training or not dropout: 12 | return x 13 | m = x.data.new(1, x.size(1), x.size(2)).bernoulli_(1 - dropout) 14 | mask = Variable(m, requires_grad=False) / (1 - dropout) 15 | mask = mask.expand_as(x) 16 | return mask * x 17 | -------------------------------------------------------------------------------- /evals/ray_tune/thirdparty/multilinear.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn 5 | import torch.nn.functional as F 6 | 7 | 8 | class MultiLinear(torch.nn.Module): 9 | 10 | def __init__(self, input_sizes, output_size): 11 | super(MultiLinear, self).__init__() 12 | self.input_sizes = input_sizes 13 | self.output_size = output_size 14 | 15 | weights = [] 16 | for input_size in input_sizes: 17 | weights.append(torch.nn.Parameter(torch.Tensor(output_size, input_size))) 18 | self.weights = torch.nn.ParameterList(weights) 19 | 20 | self.bias = torch.nn.Parameter(torch.Tensor(output_size)) 21 | 22 | self.reset_parameters() 23 | 24 | def reset_parameters(self): 25 | for i in range(len(self.weights)): 26 | torch.nn.init.kaiming_uniform_(self.weights[i], a=math.sqrt(5)) 27 | 28 | fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weights[0]) 29 | bound = 1 / math.sqrt(fan_in) 30 | torch.nn.init.uniform_(self.bias, -bound, bound) 31 | 32 | def forward(self, *inputs): 33 | result = F.linear(inputs[0], self.weights[0], self.bias) 34 | for i in range(1, len(self.weights)): 35 | result = result + F.linear(inputs[i], self.weights[i]) 36 | return result 37 | 38 | def extra_repr(self): 39 | return 'input_sizes={}, output_size={}'.format( 40 | self.input_sizes, self.output_size 41 | ) 42 | -------------------------------------------------------------------------------- /evals/ray_tune/thirdparty/nas_environment.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import numpy as np 5 | 6 | 7 | class Environment: 8 | ''' 9 | Simulates NAS environment. Architecutres can be trained for a specified amount of epochs. 10 | Tarining results are cached, that is, training the same model for larer epochs 11 | will be timed as a continuation from the model's checkpoint. 12 | ''' 13 | def __init__(self, logs_dir): 14 | self._logs = [] 15 | self._arch_to_id = {} 16 | 17 | arch_id = 0 18 | for i, filename in enumerate(os.listdir(logs_dir)): 19 | if filename.endswith('.json'): 20 | log_path = os.path.join(logs_dir, filename) 21 | x = json.load(open(log_path, 'r')) 22 | self._logs.append(x) 23 | assert x['recepie'] not in self._arch_to_id 24 | self._arch_to_id[x['recepie']] = arch_id 25 | arch_id += 1 26 | 27 | self._training_states = {} 28 | 29 | def get_total_time(self): 30 | return sum([x['wall_time'] for x in self._training_states.values()]) 31 | 32 | 33 | def get_best_possible_test_loss(self): 34 | min_loss = np.inf 35 | for log in self._logs: 36 | if len(log['test_losses']) > 0: 37 | cur_loss = np.nanmin(log['test_losses']) 38 | if cur_loss < min_loss: 39 | min_loss = cur_loss 40 | return min_loss 41 | 42 | def get_test_loss_of_the_best_validated_architecture(self): 43 | return self._logs[self.best_arch_id]['test_losses'][self.best_arch_epoch] 44 | 45 | def get_precomputed_recepies(self): 46 | return [json.loads(x['recepie']) for x in self._logs] 47 | 48 | def get_recepie_ids(self): 49 | return [x['recepie_id'] for x in self._logs] 50 | 51 | def reset(self): 52 | self.best_arch_id = -1 53 | self.best_arch_epoch = -1 54 | self._training_states = {} 55 | 56 | def _make_state_dict(self, arch_id, epoch): 57 | state_dict = {f'{phase}_loss':self._logs[arch_id][f'{phase}_losses'][epoch] if epoch >= 0 else np.nan 58 | for phase in ['train', 'val', 'test']} 59 | state_dict['wall_time'] = np.sum(self._logs[arch_id]['wall_times'][:epoch]) 60 | state_dict['cur_epoch'] = epoch 61 | state_dict['status'] = 'OK' if epoch < len(self._logs[arch_id]['train_losses']) - 1 else self._logs[arch_id]['status'] 62 | return state_dict 63 | 64 | def simulated_train(self, arch, max_epoch): 65 | arch_id = self._arch_to_id[json.dumps(arch)] 66 | if (arch_id not in self._training_states) or (max_epoch > self._training_states[arch_id]['cur_epoch']): 67 | max_epoch = min([max_epoch, len(self._logs[arch_id]['train_losses']) - 1]) 68 | self._training_states[arch_id] = self._make_state_dict(arch_id, max_epoch) 69 | 70 | # update best result 71 | val_losses = self._logs[arch_id]['val_losses'][:self._training_states[arch_id]['cur_epoch'] + 1] 72 | if np.sum(~np.isnan(val_losses)) > 0: 73 | cur_best_epoch = np.nanargmin(val_losses) 74 | if (self.best_arch_id == -1) or\ 75 | (self._logs[self.best_arch_id]['val_losses'][self.best_arch_epoch] > val_losses[cur_best_epoch]): 76 | self.best_arch_id = arch_id 77 | self.best_arch_epoch = cur_best_epoch 78 | 79 | def get_model_status(self, arch): 80 | arch_id = self._arch_to_id[json.dumps(arch)] 81 | return self._training_states[arch_id]['status'] 82 | 83 | def get_model_stats(self, arch, epoch): 84 | arch_id = self._arch_to_id[json.dumps(arch)] 85 | if self._training_states[arch_id]['cur_epoch'] < epoch: 86 | raise Exception('Required epoch exceeds current training epochs.') 87 | 88 | return self._make_state_dict(arch_id, epoch) -------------------------------------------------------------------------------- /evals/ray_tune/thirdparty/train.py: -------------------------------------------------------------------------------- 1 | import math 2 | import time 3 | 4 | import numpy as np 5 | import torch 6 | import torch.nn 7 | from thirdparty.utils import get_batch, repackage_hidden 8 | 9 | 10 | def eval_nlp(model, criterion, data_source, batch_size, args, device=torch.device("cpu")): 11 | # Turn on evaluation mode which disables dropout. 12 | model.to(device).eval() 13 | total_loss = 0 14 | hidden = model.init_hidden(batch_size) 15 | for i in range(0, data_source.size(0) - 1, args.bptt): 16 | data, targets = get_batch(data_source, i, args, evaluation=True) 17 | output, hidden = model(data, hidden) 18 | total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data 19 | hidden = repackage_hidden(hidden) 20 | return total_loss.item() / len(data_source) 21 | 22 | 23 | def train_nlp(model, optimizer, params, criterion, train_data, args, epoch, device=torch.device("cpu")): 24 | # Turn on training mode which enables dropout. 25 | total_loss = 0 26 | start_time = time.time() 27 | model.to(device).train() 28 | hidden = model.init_hidden(args.batch_size) 29 | batch, i = 0, 0 30 | while i < train_data.size(0) - 1 - 1: 31 | bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2. 32 | # Prevent excessively small or negative sequence lengths 33 | seq_len = max(5, int(np.random.normal(bptt, 5))) 34 | # There's a very small chance that it could select a very long sequence length resulting in OOM 35 | # seq_len = min(seq_len, args.bptt + 10) 36 | 37 | lr2 = optimizer.param_groups[0]['lr'] 38 | optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt 39 | model.train() 40 | data, targets = get_batch(train_data, i, args, seq_len=seq_len) 41 | 42 | # Starting each batch, we detach the hidden state from how it was previously produced. 43 | # If we didn't, the model would try backpropagating all the way to start of the dataset. 44 | hidden = repackage_hidden(hidden) 45 | optimizer.zero_grad() 46 | 47 | output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True) 48 | raw_loss = criterion(model.decoder.weight, model.decoder.bias, output, targets) 49 | 50 | loss = raw_loss 51 | # Activiation Regularization 52 | if args.alpha: loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) 53 | # Temporal Activation Regularization (slowness) 54 | if args.beta: loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) 55 | loss.backward() 56 | 57 | # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. 58 | if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip) 59 | optimizer.step() 60 | 61 | total_loss += raw_loss.data 62 | optimizer.param_groups[0]['lr'] = lr2 63 | if batch % args.log_interval == 0 and batch > 0: 64 | cur_loss = total_loss.item() / args.log_interval 65 | elapsed = time.time() - start_time 66 | print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | ' 67 | 'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format( 68 | epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'], 69 | elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), cur_loss / math.log(2))) 70 | total_loss = 0 71 | start_time = time.time() 72 | ### 73 | batch += 1 74 | i += seq_len -------------------------------------------------------------------------------- /evals/ray_tune/thirdparty/utils.py: -------------------------------------------------------------------------------- 1 | from itertools import permutations 2 | 3 | import networkx as nx 4 | import numpy as np 5 | import torch 6 | 7 | 8 | def repackage_hidden(h): 9 | """Wraps hidden states in new Tensors, 10 | to detach them from their history.""" 11 | if isinstance(h, torch.Tensor): 12 | return h.detach() 13 | else: 14 | return tuple(repackage_hidden(v) for v in h) 15 | 16 | 17 | def batchify(data, bsz, args, cuda='cuda'): 18 | # Work out how cleanly we can divide the dataset into bsz parts. 19 | nbatch = data.size(0) // bsz 20 | # Trim off any extra elements that wouldn't cleanly fit (remainders). 21 | data = data.narrow(0, 0, nbatch * bsz) 22 | # Evenly divide the data across the bsz batches. 23 | data = data.view(bsz, -1).t().contiguous() 24 | if args.cuda: 25 | data = data.to(cuda) 26 | return data 27 | 28 | 29 | def get_batch(source, i, args, seq_len=None, evaluation=False): 30 | seq_len = min(seq_len if seq_len else args.bptt, len(source) - 1 - i) 31 | data = source[i:i+seq_len] 32 | target = source[i+1:i+1+seq_len].view(-1) 33 | return data, target 34 | 35 | 36 | def make_graph(recepie): 37 | G = nx.DiGraph() 38 | 39 | for key in recepie.keys(): 40 | op = recepie[key]['op'] 41 | if key.startswith("h_new_"): 42 | op = key+":"+op 43 | G.add_node(key, name=key, op=op) 44 | for inp in recepie[key]['input']: 45 | if "h_prev" in inp or inp == "x": 46 | G.add_node(inp, name=inp, op=inp) 47 | else: 48 | G.add_node(inp, name=inp) 49 | G.add_edge(inp, key) 50 | return G 51 | 52 | 53 | def recepie2matrixops(recepie): 54 | G = make_graph(recepie) 55 | labels = nx.get_node_attributes(G, "op") 56 | nodelist_with_ops = np.array(list(labels.items())) 57 | 58 | matrix = nx.to_numpy_array(G, nodelist=nodelist_with_ops[:, 0]) 59 | ops = nodelist_with_ops[:, 1] 60 | 61 | return matrix, ops 62 | 63 | 64 | 65 | def graph_edit_distance(matrixops1, matrixops2): 66 | m1, l1 = matrixops1 67 | m2, l2 = matrixops2 68 | 69 | # Pad 70 | n1, n2 = m1.shape[0], m2.shape[0] 71 | max_n = max(n1, n2) 72 | m1 = np.pad(m1, ((0, max_n - m1.shape[0]), (0, max_n - m1.shape[0]))) 73 | m2 = np.pad(m2, ((0, max_n - m2.shape[0]), (0, max_n - m2.shape[0]))) 74 | l1 = np.pad(l1, (0, max_n - l1.shape[0]), constant_values=None) 75 | l2 = np.pad(l2, (0, max_n - l2.shape[0]), constant_values=None) 76 | 77 | 78 | d = 100000000 79 | for p in permutations(range(len(m1))): 80 | p = list(p) 81 | d_p = (m1 != m2[p][:, p]).sum() + (l1 != l2[p]).sum() 82 | d = min(d, d_p) 83 | return d 84 | -------------------------------------------------------------------------------- /evals/ray_tune/thirdparty/weight_drop.py: -------------------------------------------------------------------------------- 1 | import functools 2 | from functools import wraps 3 | 4 | import torch 5 | from torch.nn import Parameter 6 | 7 | 8 | class WeightDrop(torch.nn.Module): 9 | def __init__(self, module, weights, dropout=0, variational=False): 10 | super(WeightDrop, self).__init__() 11 | self.module = module 12 | self.weights = weights 13 | self.dropout = dropout 14 | self.variational = variational 15 | self._setup() 16 | 17 | def widget_demagnetizer_y2k_edition(*args, **kwargs): 18 | # We need to replace flatten_parameters with a nothing function 19 | # It must be a function rather than a lambda as otherwise pickling explodes 20 | # We can't write boring code though, so ... WIDGET DEMAGNETIZER Y2K EDITION! 21 | # (╯°□°)╯︵ ┻━┻ 22 | return 23 | 24 | def _setup(self): 25 | # Terrible temporary solution to an issue regarding compacting weights re: CUDNN RNN 26 | if issubclass(type(self.module), torch.nn.RNNBase): 27 | self.module.flatten_parameters = self.widget_demagnetizer_y2k_edition 28 | 29 | for name_w in self.weights: 30 | #print('Applying weight drop of {} to {}'.format(self.dropout, name_w)) 31 | w = getattr(self.module, name_w) 32 | del self.module._parameters[name_w] 33 | self.module.register_parameter(name_w + '_raw', Parameter(w.data)) 34 | 35 | def _setweights(self): 36 | for name_w in self.weights: 37 | raw_w = getattr(self.module, name_w + '_raw') 38 | w = None 39 | if self.variational: 40 | mask = torch.autograd.Variable(torch.ones(raw_w.size(0), 1)) 41 | if raw_w.is_cuda: mask = mask.cuda() 42 | mask = torch.nn.functional.dropout(mask, p=self.dropout, training=True) 43 | w = torch.nn.Parameter(mask.expand_as(raw_w) * raw_w) 44 | else: 45 | w = torch.nn.Parameter(torch.nn.functional.dropout(raw_w, p=self.dropout, training=self.training)) 46 | setattr(self.module, name_w, w) 47 | 48 | def forward(self, *args): 49 | self._setweights() 50 | return self.module.forward(*args) 51 | 52 | def rsetattr(obj, attr, val): 53 | pre, _, post = attr.rpartition('.') 54 | return setattr(rgetattr(obj, pre) if pre else obj, post, val) 55 | 56 | def rgetattr(obj, attr, *args): 57 | def _getattr(obj, attr): 58 | return getattr(obj, attr, *args) 59 | return functools.reduce(_getattr, [obj] + attr.split('.')) 60 | 61 | class ParameterListWeightDrop(torch.nn.Module): 62 | def __init__(self, module, weights, dropout=0, variational=False): 63 | super(ParameterListWeightDrop, self).__init__() 64 | self.module = module 65 | self.weights = weights 66 | self.parents = {} 67 | for w in self.weights: 68 | p = '.'.join(w.split('.')[:-1]) 69 | i = int(w.split('.')[-1]) 70 | if p not in self.parents: 71 | self.parents[p] = [] 72 | self.parents[p].append(i) 73 | self.dropout = dropout 74 | self.variational = variational 75 | self._setup() 76 | 77 | 78 | def _setup(self): 79 | for name_w in self.parents: 80 | #print('Applying weight drop of {} to {}'.format(self.dropout, name_w)) 81 | ws = rgetattr(self.module, name_w) 82 | rsetattr(self.module, name_w, None) 83 | rsetattr(self.module, name_w + '_raw', torch.nn.ParameterList(ws)) 84 | 85 | def _setweights(self): 86 | for name_w in self.parents: 87 | raw_ws = rgetattr(self.module, name_w + '_raw') 88 | ws = [] 89 | for i, raw_w in enumerate(raw_ws): 90 | if i in self.parents[name_w]: 91 | if self.variational: 92 | mask = torch.autograd.Variable(torch.ones(raw_w.size(0), 1)) 93 | if raw_w.is_cuda: mask = mask.cuda() 94 | mask = torch.nn.functional.dropout(mask, p=self.dropout, training=True) 95 | w = torch.nn.Parameter(mask.expand_as(raw_w) * raw_w) 96 | else: 97 | w = torch.nn.Parameter(torch.nn.functional.dropout(raw_w, p=self.dropout, training=self.training)) 98 | else: 99 | w = raw_w 100 | ws.append(w) 101 | rsetattr(self.module, name_w, torch.nn.ParameterList(ws)) 102 | 103 | def forward(self, *args): 104 | self._setweights() 105 | return self.module.forward(*args) -------------------------------------------------------------------------------- /evals/ray_tune/workloads/nlp_list.csv: -------------------------------------------------------------------------------- 1 | name,arrival 2 | albert-base-v1,424 3 | YituTech/conv-bert-small,1232 4 | funnel-transformer/small,2608 5 | xlm-mlm-en-2048,2944 6 | roberta-base,5880 7 | microsoft/mpnet-base,10520 8 | junnyu/roformer_small_discriminator,12680 9 | google/electra-small-discriminator,14536 10 | xlm-roberta-large,15328 11 | albert-large-v1,15392 12 | bert-base-multilingual-cased,19504 13 | bert-base-cased,20144 14 | roberta-large,20280 15 | distilroberta-base,21880 16 | flaubert/flaubert_small_cased,22800 17 | funnel-transformer/intermediate,24584 18 | google/rembert,24856 19 | squeezebert/squeezebert-mnli,27224 20 | bert-large-cased,27776 21 | bert-large-uncased,29496 22 | albert-base-v2,29864 23 | microsoft/deberta-v2-xxlarge,32024 24 | google/mobilebert-uncased,32056 25 | flaubert/flaubert_large_cased,32624 26 | distilbert-base-uncased,32808 27 | flaubert/flaubert_base_cased,39584 28 | microsoft/deberta-large,39592 29 | bert-base-uncased,39640 30 | distilbert-base-cased,41376 31 | distilbert-base-multilingual-cased,42216 32 | microsoft/deberta-v2-xlarge,42280 33 | YituTech/conv-bert-medium-small,49088 34 | albert-xlarge-v2,50592 35 | google/electra-base-generator,51248 36 | facebook/bart-large,53848 37 | funnel-transformer/xlarge,55176 38 | funnel-transformer/large,62016 39 | microsoft/layoutlm-base-uncased,66296 40 | albert-xxlarge-v1,67552 41 | microsoft/deberta-xlarge,68256 42 | google/electra-large-generator,68912 43 | microsoft/layoutlm-large-uncased,73808 44 | google/bigbird-roberta-base,76400 45 | albert-xlarge-v1,76896 46 | xlm-mlm-100-1280,77856 47 | YituTech/conv-bert-base,79008 48 | google/bigbird-roberta-large,81208 49 | flaubert/flaubert_base_uncased,82544 50 | albert-xxlarge-v2,82552 51 | google/bigbird-base-trivia-itc,88264 52 | xlm-clm-enfr-1024,90048 53 | xlm-mlm-tlm-xnli15-1024,90304 54 | xlm-roberta-base,90336 55 | camembert-base,91568 56 | albert-large-v2,91928 57 | bert-base-multilingual-uncased,92120 58 | microsoft/deberta-base,94816 59 | -------------------------------------------------------------------------------- /evals/ray_tune/workloads/nlp_nwp.csv: -------------------------------------------------------------------------------- 1 | name,arrival 2 | albert-base-v1,0 3 | YituTech/conv-bert-small,0 4 | funnel-transformer/small,0 5 | xlm-mlm-en-2048,0 6 | roberta-base,0 7 | microsoft/mpnet-base,0 8 | junnyu/roformer_small_discriminator,0 9 | google/electra-small-discriminator,0 10 | bert-base-cased,0 11 | flaubert/flaubert_base_uncased,11544 12 | bert-base-multilingual-cased,14628 13 | distilbert-base-uncased,15108 14 | camembert-base,15210 15 | microsoft/deberta-v2-xxlarge,16410 16 | microsoft/layoutlm-base-uncased,17100 17 | funnel-transformer/intermediate,18438 18 | google/rembert,18642 19 | squeezebert/squeezebert-mnli,20418 20 | YituTech/conv-bert-base,20832 21 | bert-large-uncased,22122 22 | albert-base-v2,22398 23 | distilroberta-base,24018 24 | google/mobilebert-uncased,24042 25 | flaubert/flaubert_large_cased,24468 26 | funnel-transformer/large,24606 27 | google/bigbird-base-trivia-itc,29688 28 | microsoft/deberta-large,29694 29 | bert-base-uncased,29730 30 | distilbert-base-cased,31032 31 | distilbert-base-multilingual-cased,31662 32 | microsoft/deberta-v2-xlarge,31710 33 | roberta-large,36816 34 | albert-xlarge-v2,37944 35 | xlm-clm-enfr-1024,38436 36 | facebook/bart-large,40386 37 | funnel-transformer/xlarge,41382 38 | xlm-roberta-large,46512 39 | flaubert/flaubert_small_cased,49722 40 | albert-xxlarge-v1,50664 41 | microsoft/deberta-xlarge,51192 42 | google/electra-large-generator,51684 43 | bert-large-cased,55356 44 | google/bigbird-roberta-base,57300 45 | albert-xlarge-v1,57672 46 | xlm-mlm-100-1280,58392 47 | google/electra-base-generator,59256 48 | microsoft/deberta-base,60906 49 | albert-large-v1,61908 50 | albert-xxlarge-v2,61914 51 | flaubert/flaubert_base_cased,66198 52 | microsoft/layoutlm-large-uncased,67536 53 | xlm-mlm-tlm-xnli15-1024,67728 54 | xlm-roberta-base,67752 55 | YituTech/conv-bert-medium-small,68676 56 | albert-large-v2,68946 57 | bert-base-multilingual-uncased,69090 58 | google/bigbird-roberta-large,71112 59 | -------------------------------------------------------------------------------- /evals/ray_tune/workloads/torchcv_list: -------------------------------------------------------------------------------- 1 | DPN26() 2 | ResNet18() 3 | MobileNetV2(alpha=0.5) 4 | VGG(vgg_block=11) 5 | regnety004 6 | rir_cifar10 7 | DLA() 8 | wrn16_10_cifar10 9 | xdensenet40_2_k36_bc_cifar10 10 | ShuffleNetV2(net_size=0.5) 11 | efficientnet_b0 12 | msdnet22_cifar10 13 | DenseNet121() 14 | DenseNet161() 15 | DenseNet169() 16 | DenseNet201() 17 | DPN68() 18 | DPN92() 19 | DPN98() 20 | DPN107() 21 | SimpleDLA() 22 | ResNet34() 23 | ResNet50() 24 | ResNet101() 25 | ResNet152() 26 | stochastic_depth_resnet18() 27 | stochastic_depth_resnet34() 28 | stochastic_depth_resnet50() 29 | stochastic_depth_resnet101() 30 | stochastic_depth_resnet152() 31 | ResNeXt29_2x64d() 32 | ResNeXt29_4x64d() 33 | ResNeXt29_8x64d() 34 | seresnet18() 35 | seresnet34() 36 | seresnet50() 37 | seresnet101() 38 | seresnet152() 39 | preactresnet18() 40 | preactresnet34() 41 | preactresnet50() 42 | preactresnet101() 43 | preactresnet152() 44 | ShuffleNetG2() 45 | ShuffleNetG3() 46 | ShuffleNetV2(net_size=1) 47 | ShuffleNetV2(net_size=1.5) 48 | ShuffleNetV2(net_size=2) 49 | MobileNetV2(alpha=0.75) 50 | MobileNetV2(alpha=1) 51 | MobileNetV2(alpha=1.5) 52 | MobileNetV2(alpha=2) 53 | MobileNetV3(is_large=0, multiplier=0.5) 54 | MobileNetV3(is_large=0, multiplier=0.75) 55 | MobileNetV3(is_large=0, multiplier=1) 56 | MobileNetV3(is_large=0, multiplier=1.25) 57 | MobileNetV3(is_large=0, multiplier=1.5) 58 | MobileNetV3(is_large=0, multiplier=2) 59 | MobileNetV3(is_large=1, multiplier=0.5) 60 | MobileNetV3(is_large=1, multiplier=0.75) 61 | MobileNetV3(is_large=1, multiplier=1) 62 | MobileNetV3(is_large=1, multiplier=1.25) 63 | MobileNetV3(is_large=1, multiplier=1.5) 64 | MobileNetV3(is_large=1, multiplier=2) 65 | VGG(vgg_block=13) 66 | VGG(vgg_block=16) 67 | VGG(vgg_block=19) 68 | VGG(vgg_block=11, use_bn=True) 69 | VGG(vgg_block=13, use_bn=True) 70 | VGG(vgg_block=16, use_bn=True) 71 | VGG(vgg_block=19, use_bn=True) 72 | ror3_110_cifar10 73 | resnesta18 74 | efficientnet_b4 75 | resnet164bn_cifar10 76 | preresnet20_cifar10 77 | resnext272_1x64d_cifar10 78 | diaresnet20_cifar10 79 | regnetx320 80 | pyramidnet272_a200_bn_cifar10 81 | regnety002 82 | wrn40_8_cifar10 83 | regnety160 84 | xdensenet40_2_k24_bc_cifar10 85 | diapreresnet20_cifar10 86 | resnext20_32x4d_cifar10 87 | pyramidnet200_a240_bn_cifar10 88 | regnety016 89 | regnety008 90 | resnext29_16x64d_cifar10 91 | seresnet20_cifar10 92 | regnetx016 93 | resneta18 94 | densenet100_k24_cifar10 95 | sepreresnet20_cifar10 96 | regnetx080 97 | regnety080 98 | resnetd50b 99 | densenet100_k12_bc_cifar10 100 | resnesta152 101 | densenet40_k12_bc_cifar10 102 | resnext20_16x4d_cifar10 103 | diapreresnet1001_cifar10 104 | diapreresnet164bn_cifar10 105 | seresnet542bn_cifar10 106 | resnesta50 107 | regnetx160 108 | resnet110_cifar10 109 | resnesta269 110 | nin_cifar10 111 | regnetx008 112 | seresnet110_cifar10 113 | seresnet1202_cifar10 114 | wrn28_10_cifar10 115 | sepreresnet272bn_cifar10 116 | regnety006 117 | regnetx040 118 | regnetx006 119 | efficientnet_b3 120 | sepreresnet542bn_cifar10 121 | regnety040 122 | resneta101b 123 | preresnet1202_cifar10 124 | efficientnet_b8 125 | seresnet272bn_cifar10 126 | diaresnet110_cifar10 127 | regnetx002 128 | regnetx064 129 | diaresnet164bn_cifar10 130 | densenet40_k36_bc_cifar10 131 | regnety120 132 | prnet 133 | efficientnet_b6 134 | preresnet110_cifar10 135 | sepreresnet1001_cifar10 136 | resnesta101 137 | diaresnet1202_cifar10 138 | resnetd152b 139 | resnet272bn_cifar10 140 | regnetx004 141 | resneta10 142 | resnext29_32x4d_cifar10 143 | pyramidnet110_a84_cifar10 144 | efficientnet_b7 145 | diapreresnet1202_cifar10 146 | diapreresnet110_cifar10 147 | densenet190_k40_bc_cifar10 148 | resnext20_32x2d_cifar10 149 | densenet40_k24_bc_cifar10 150 | sepreresnet1202_cifar10 151 | pyramidnet236_a220_bn_cifar10 152 | resneta50b 153 | resnetabc14b 154 | resnesta200 155 | resnet20_cifar10 156 | resnet14_cifar10 157 | regnety032 158 | efficientnet_b5 159 | regnetx120 160 | resnet56_cifar10 161 | resnext272_2x32d_cifar10 162 | diaresnet1001_cifar10 163 | sepreresnet110_cifar10 164 | diaresnet56_cifar10 165 | preresnet56_cifar10 166 | seresnet1001_cifar10 167 | resnestabc26 168 | resnetd101b 169 | resnet1001_cifar10 170 | preresnet1001_cifar10 171 | resnestabc14 172 | fractalnet_cifar10 173 | resneta152b 174 | pyramidnet110_a48_cifar10 175 | seresnet56_cifar10 176 | sepreresnet164bn_cifar10 177 | densenet250_k24_bc_cifar10 178 | diapreresnet56_cifar10 179 | wrn20_10_32bit_cifar10 180 | sepreresnet56_cifar10 181 | ror3_56_cifar10 182 | pyramidnet164_a270_bn_cifar10 183 | densenet40_k12_cifar10 184 | resnet1202_cifar10 185 | efficientnet_b2 186 | pyramidnet110_a270_cifar10 187 | ror3_164_cifar10 188 | preresnet542bn_cifar10 189 | seresnet164bn_cifar10 190 | regnety320 191 | regnetx032 192 | resnet542bn_cifar10 193 | regnety064 194 | preresnet164bn_cifar10 195 | densenet100_k12_cifar10 196 | preresnet272bn_cifar10 197 | efficientnet_b1 198 | -------------------------------------------------------------------------------- /evals/ray_tune/workloads/workload-1.csv: -------------------------------------------------------------------------------- 1 | name,time,application,num_replicas,batch_size 2 | cifar10-0,107,cifar10,1,128 3 | deepspeech2-1,110,deepspeech2,1,20 4 | ncf-2,135,ncf,1,256 5 | deepspeech2-3,262,deepspeech2,1,20 6 | cifar10-4,356,cifar10,4,512 7 | ncf-5,839,ncf,1,256 8 | ncf-6,1131,ncf,1,256 9 | ncf-7,1282,ncf,1,256 10 | cifar10-8,1332,cifar10,1,128 11 | ncf-9,1453,ncf,1,256 12 | cifar10-10,1496,cifar10,1,128 13 | imagenet-11,1604,imagenet,8,1600 14 | cifar10-12,1611,cifar10,1,128 15 | cifar10-13,1740,cifar10,1,128 16 | cifar10-14,1778,cifar10,1,128 17 | cifar10-15,1932,cifar10,1,128 18 | ncf-16,2308,ncf,1,256 19 | cifar10-17,2354,cifar10,1,128 20 | ncf-18,2420,ncf,1,256 21 | ncf-19,2615,ncf,1,256 22 | ncf-20,2862,ncf,1,256 23 | ncf-21,3488,ncf,1,256 24 | deepspeech2-22,3917,deepspeech2,1,20 25 | ncf-23,4277,ncf,1,256 26 | cifar10-24,4326,cifar10,1,128 27 | cifar10-25,4407,cifar10,1,128 28 | ncf-26,4757,ncf,1,256 29 | bert-27,5813,bert,1,12 30 | cifar10-28,5841,cifar10,1,128 31 | ncf-29,5880,ncf,8,2048 32 | ncf-30,6103,ncf,1,256 33 | yolov3-31,6357,yolov3,4,32 34 | ncf-32,6487,ncf,1,256 35 | cifar10-33,6701,cifar10,1,128 36 | cifar10-34,6732,cifar10,1,128 37 | cifar10-35,6899,cifar10,1,128 38 | cifar10-36,7025,cifar10,1,128 39 | ncf-37,7332,ncf,1,256 40 | ncf-38,7333,ncf,1,256 41 | ncf-39,7744,ncf,1,256 42 | cifar10-40,8223,cifar10,1,128 43 | cifar10-41,8817,cifar10,1,128 44 | ncf-42,8825,ncf,1,256 45 | cifar10-43,9202,cifar10,1,128 46 | cifar10-44,10069,cifar10,1,128 47 | ncf-45,10477,ncf,1,256 48 | cifar10-46,10648,cifar10,1,128 49 | cifar10-47,10819,cifar10,1,128 50 | cifar10-48,10932,cifar10,1,128 51 | cifar10-49,11207,cifar10,1,128 52 | ncf-50,11331,ncf,1,256 53 | yolov3-51,11382,yolov3,1,8 54 | yolov3-52,11528,yolov3,1,8 55 | cifar10-53,12608,cifar10,1,128 56 | cifar10-54,12683,cifar10,1,128 57 | ncf-55,13223,ncf,1,256 58 | cifar10-56,13262,cifar10,1,128 59 | cifar10-57,13276,cifar10,1,128 60 | ncf-58,13307,ncf,1,256 61 | ncf-59,13608,ncf,1,256 62 | deepspeech2-60,13759,deepspeech2,1,20 63 | ncf-61,13806,ncf,1,256 64 | ncf-62,13891,ncf,1,256 65 | cifar10-63,13927,cifar10,1,128 66 | ncf-64,14065,ncf,1,256 67 | cifar10-65,14220,cifar10,1,128 68 | ncf-66,14242,ncf,1,256 69 | cifar10-67,14265,cifar10,1,128 70 | ncf-68,14439,ncf,1,256 71 | cifar10-69,14448,cifar10,1,128 72 | cifar10-70,14524,cifar10,1,128 73 | ncf-71,14533,ncf,1,256 74 | ncf-72,14735,ncf,1,256 75 | deepspeech2-73,14773,deepspeech2,1,20 76 | ncf-74,14878,ncf,1,256 77 | cifar10-75,14906,cifar10,1,128 78 | ncf-76,15010,ncf,1,256 79 | cifar10-77,15146,cifar10,1,128 80 | bert-78,15172,bert,1,12 81 | cifar10-79,15262,cifar10,1,128 82 | cifar10-80,15602,cifar10,1,128 83 | ncf-81,15603,ncf,1,256 84 | ncf-82,15715,ncf,1,256 85 | cifar10-83,15831,cifar10,1,128 86 | cifar10-84,15944,cifar10,1,128 87 | bert-85,15947,bert,1,12 88 | cifar10-86,16331,cifar10,1,128 89 | ncf-87,16336,ncf,1,256 90 | ncf-88,16834,ncf,1,256 91 | ncf-89,17168,ncf,1,256 92 | cifar10-90,17553,cifar10,1,128 93 | ncf-91,17636,ncf,1,256 94 | cifar10-92,17739,cifar10,1,128 95 | ncf-93,17911,ncf,1,256 96 | deepspeech2-94,17987,deepspeech2,1,20 97 | ncf-95,18115,ncf,1,256 98 | deepspeech2-96,18242,deepspeech2,4,80 99 | deepspeech2-97,18488,deepspeech2,1,20 100 | cifar10-98,18573,cifar10,1,128 101 | ncf-99,18660,ncf,1,256 102 | ncf-100,18719,ncf,1,256 103 | cifar10-101,18955,cifar10,1,128 104 | bert-102,18978,bert,1,12 105 | ncf-103,19331,ncf,1,256 106 | deepspeech2-104,19353,deepspeech2,1,20 107 | cifar10-105,19362,cifar10,1,128 108 | cifar10-106,19395,cifar10,1,128 109 | ncf-107,19436,ncf,1,256 110 | ncf-108,19464,ncf,1,256 111 | ncf-109,19658,ncf,1,256 112 | deepspeech2-110,19750,deepspeech2,1,20 113 | cifar10-111,19760,cifar10,1,128 114 | ncf-112,19916,ncf,1,256 115 | ncf-113,20014,ncf,1,256 116 | ncf-114,20104,ncf,1,256 117 | cifar10-115,20156,cifar10,1,128 118 | cifar10-116,20310,cifar10,1,128 119 | deepspeech2-117,20440,deepspeech2,1,20 120 | cifar10-118,20517,cifar10,1,128 121 | bert-119,20529,bert,1,12 122 | ncf-120,20602,ncf,1,256 123 | ncf-121,20701,ncf,1,256 124 | cifar10-122,21069,cifar10,1,128 125 | ncf-123,21084,ncf,1,256 126 | ncf-124,21132,ncf,1,256 127 | cifar10-125,21170,cifar10,1,128 128 | imagenet-126,21308,imagenet,8,1600 129 | ncf-127,21472,ncf,1,256 130 | cifar10-128,21664,cifar10,1,128 131 | cifar10-129,21733,cifar10,1,128 132 | bert-130,21744,bert,1,12 133 | cifar10-131,21928,cifar10,1,128 134 | cifar10-132,22185,cifar10,1,128 135 | ncf-133,22311,ncf,1,256 136 | cifar10-134,23533,cifar10,1,128 137 | ncf-135,23642,ncf,1,256 138 | cifar10-136,23646,cifar10,1,128 139 | ncf-137,24116,ncf,1,256 140 | cifar10-138,24129,cifar10,1,128 141 | ncf-139,24480,ncf,1,256 142 | cifar10-140,24554,cifar10,1,128 143 | ncf-141,24823,ncf,1,256 144 | cifar10-142,25003,cifar10,1,128 145 | ncf-143,25117,ncf,1,256 146 | ncf-144,25240,ncf,1,256 147 | cifar10-145,25555,cifar10,4,512 148 | cifar10-146,25682,cifar10,1,128 149 | cifar10-147,25691,cifar10,1,128 150 | ncf-148,25716,ncf,1,256 151 | ncf-149,25727,ncf,1,256 152 | ncf-150,25915,ncf,1,256 153 | cifar10-151,25931,cifar10,1,128 154 | bert-152,25971,bert,1,12 155 | ncf-153,26532,ncf,1,256 156 | cifar10-154,26599,cifar10,1,128 157 | deepspeech2-155,26752,deepspeech2,1,20 158 | imagenet-156,26786,imagenet,8,1600 159 | imagenet-157,27630,imagenet,16,3200 160 | ncf-158,27694,ncf,1,256 161 | ncf-159,27695,ncf,4,1024 162 | -------------------------------------------------------------------------------- /evals/ray_tune/workloads/workload-2.csv: -------------------------------------------------------------------------------- 1 | name,time,application,num_replicas,batch_size 2 | cifar10-0,498,cifar10,1,128 3 | ncf-1,1427,ncf,1,256 4 | ncf-2,1559,ncf,1,256 5 | cifar10-3,1597,cifar10,1,128 6 | cifar10-4,1642,cifar10,1,128 7 | ncf-5,1832,ncf,1,256 8 | ncf-6,1915,ncf,1,256 9 | imagenet-7,2173,imagenet,8,1600 10 | ncf-8,2420,ncf,1,256 11 | ncf-9,2576,ncf,1,256 12 | imagenet-10,3075,imagenet,8,1600 13 | cifar10-11,3117,cifar10,1,128 14 | ncf-12,3118,ncf,1,256 15 | cifar10-13,3341,cifar10,1,128 16 | ncf-14,3434,ncf,1,256 17 | ncf-15,3565,ncf,1,256 18 | cifar10-16,3730,cifar10,1,128 19 | cifar10-17,3756,cifar10,1,128 20 | ncf-18,3786,ncf,1,256 21 | cifar10-19,4101,cifar10,1,128 22 | cifar10-20,4221,cifar10,1,128 23 | ncf-21,4265,ncf,1,256 24 | ncf-22,4276,ncf,1,256 25 | ncf-23,4559,ncf,1,256 26 | cifar10-24,4732,cifar10,1,128 27 | cifar10-25,4843,cifar10,1,128 28 | ncf-26,4931,ncf,1,256 29 | ncf-27,4960,ncf,1,256 30 | yolov3-28,4970,yolov3,4,32 31 | cifar10-29,5533,cifar10,1,128 32 | yolov3-30,5676,yolov3,1,8 33 | bert-31,5750,bert,1,12 34 | ncf-32,5776,ncf,1,256 35 | cifar10-33,5799,cifar10,1,128 36 | cifar10-34,7331,cifar10,1,128 37 | cifar10-35,8448,cifar10,1,128 38 | cifar10-36,8917,cifar10,1,128 39 | cifar10-37,9075,cifar10,1,128 40 | ncf-38,9635,ncf,4,1024 41 | cifar10-39,10875,cifar10,1,128 42 | ncf-40,10881,ncf,1,256 43 | ncf-41,10923,ncf,4,1024 44 | deepspeech2-42,11268,deepspeech2,1,20 45 | ncf-43,11418,ncf,1,256 46 | ncf-44,12114,ncf,1,256 47 | cifar10-45,12294,cifar10,1,128 48 | ncf-46,12731,ncf,1,256 49 | cifar10-47,13331,cifar10,1,128 50 | cifar10-48,13332,cifar10,1,128 51 | ncf-49,13414,ncf,1,256 52 | ncf-50,13526,ncf,1,256 53 | cifar10-51,13746,cifar10,1,128 54 | yolov3-52,13875,yolov3,4,32 55 | cifar10-53,14230,cifar10,1,128 56 | cifar10-54,14305,cifar10,1,128 57 | cifar10-55,14372,cifar10,1,128 58 | ncf-56,14554,ncf,1,256 59 | ncf-57,14559,ncf,1,256 60 | cifar10-58,14681,cifar10,1,128 61 | yolov3-59,15025,yolov3,8,64 62 | ncf-60,15101,ncf,1,256 63 | deepspeech2-61,15168,deepspeech2,1,20 64 | cifar10-62,15196,cifar10,1,128 65 | ncf-63,15543,ncf,1,256 66 | yolov3-64,15698,yolov3,1,8 67 | ncf-65,15795,ncf,1,256 68 | ncf-66,15835,ncf,1,256 69 | cifar10-67,16005,cifar10,1,128 70 | ncf-68,16064,ncf,1,256 71 | deepspeech2-69,16120,deepspeech2,1,20 72 | deepspeech2-70,16254,deepspeech2,1,20 73 | ncf-71,16283,ncf,1,256 74 | bert-72,16418,bert,1,12 75 | ncf-73,16519,ncf,1,256 76 | yolov3-74,16696,yolov3,1,8 77 | cifar10-75,16699,cifar10,1,128 78 | ncf-76,16710,ncf,1,256 79 | ncf-77,16917,ncf,1,256 80 | ncf-78,17038,ncf,1,256 81 | imagenet-79,17294,imagenet,8,1600 82 | cifar10-80,17503,cifar10,1,128 83 | ncf-81,17532,ncf,1,256 84 | deepspeech2-82,17630,deepspeech2,1,20 85 | cifar10-83,17758,cifar10,1,128 86 | deepspeech2-84,17839,deepspeech2,1,20 87 | cifar10-85,17841,cifar10,1,128 88 | cifar10-86,17932,cifar10,1,128 89 | bert-87,17980,bert,1,12 90 | deepspeech2-88,18028,deepspeech2,1,20 91 | cifar10-89,18037,cifar10,1,128 92 | cifar10-90,18041,cifar10,1,128 93 | ncf-91,18133,ncf,1,256 94 | deepspeech2-92,18152,deepspeech2,1,20 95 | cifar10-93,18381,cifar10,1,128 96 | cifar10-94,18401,cifar10,1,128 97 | cifar10-95,18624,cifar10,1,128 98 | deepspeech2-96,18696,deepspeech2,1,20 99 | ncf-97,18732,ncf,1,256 100 | ncf-98,18836,ncf,1,256 101 | cifar10-99,18877,cifar10,1,128 102 | cifar10-100,18925,cifar10,1,128 103 | bert-101,18983,bert,1,12 104 | deepspeech2-102,19055,deepspeech2,1,20 105 | cifar10-103,19239,cifar10,1,128 106 | cifar10-104,19284,cifar10,1,128 107 | ncf-105,19340,ncf,1,256 108 | ncf-106,19382,ncf,1,256 109 | cifar10-107,19537,cifar10,1,128 110 | deepspeech2-108,19590,deepspeech2,1,20 111 | cifar10-109,19615,cifar10,1,128 112 | cifar10-110,20086,cifar10,1,128 113 | cifar10-111,20129,cifar10,1,128 114 | deepspeech2-112,20481,deepspeech2,1,20 115 | bert-113,20652,bert,1,12 116 | ncf-114,20803,ncf,1,256 117 | ncf-115,20804,ncf,1,256 118 | cifar10-116,20823,cifar10,1,128 119 | deepspeech2-117,20879,deepspeech2,4,80 120 | bert-118,20907,bert,1,12 121 | cifar10-119,20932,cifar10,1,128 122 | cifar10-120,21016,cifar10,1,128 123 | cifar10-121,21034,cifar10,1,128 124 | deepspeech2-122,21135,deepspeech2,1,20 125 | cifar10-123,21212,cifar10,1,128 126 | deepspeech2-124,21303,deepspeech2,1,20 127 | ncf-125,21332,ncf,1,256 128 | ncf-126,21512,ncf,1,256 129 | cifar10-127,21715,cifar10,1,128 130 | ncf-128,21720,ncf,1,256 131 | cifar10-129,21777,cifar10,1,128 132 | ncf-130,21787,ncf,1,256 133 | bert-131,22492,bert,1,12 134 | cifar10-132,22611,cifar10,1,128 135 | ncf-133,22713,ncf,1,256 136 | bert-134,23230,bert,1,12 137 | cifar10-135,23548,cifar10,1,128 138 | cifar10-136,23929,cifar10,1,128 139 | cifar10-137,23967,cifar10,1,128 140 | ncf-138,23981,ncf,1,256 141 | ncf-139,24135,ncf,1,256 142 | deepspeech2-140,24156,deepspeech2,1,20 143 | ncf-141,24250,ncf,1,256 144 | cifar10-142,24744,cifar10,1,128 145 | ncf-143,24835,ncf,1,256 146 | yolov3-144,25001,yolov3,2,16 147 | cifar10-145,25041,cifar10,1,128 148 | cifar10-146,25079,cifar10,1,128 149 | cifar10-147,25166,cifar10,1,128 150 | cifar10-148,25262,cifar10,1,128 151 | ncf-149,25573,ncf,1,256 152 | cifar10-150,25587,cifar10,1,128 153 | ncf-151,25591,ncf,1,256 154 | cifar10-152,25876,cifar10,1,128 155 | ncf-153,25901,ncf,1,256 156 | deepspeech2-154,25952,deepspeech2,1,20 157 | ncf-155,26190,ncf,1,256 158 | ncf-156,26311,ncf,1,256 159 | deepspeech2-157,26533,deepspeech2,1,20 160 | cifar10-158,26873,cifar10,1,128 161 | ncf-159,27131,ncf,1,256 162 | -------------------------------------------------------------------------------- /evals/ray_tune/workloads/workload-3.csv: -------------------------------------------------------------------------------- 1 | name,time,application,num_replicas,batch_size 2 | ncf-0,791,ncf,1,256 3 | cifar10-1,1151,cifar10,1,128 4 | cifar10-2,1315,cifar10,1,128 5 | ncf-3,1335,ncf,1,256 6 | deepspeech2-4,1386,deepspeech2,1,20 7 | ncf-5,1437,ncf,1,256 8 | ncf-6,1753,ncf,1,256 9 | cifar10-7,1855,cifar10,1,128 10 | cifar10-8,1937,cifar10,1,128 11 | ncf-9,1955,ncf,1,256 12 | bert-10,2957,bert,1,12 13 | ncf-11,3114,ncf,1,256 14 | cifar10-12,3143,cifar10,1,128 15 | cifar10-13,3159,cifar10,1,128 16 | cifar10-14,3189,cifar10,1,128 17 | cifar10-15,3701,cifar10,8,1024 18 | ncf-16,3789,ncf,1,256 19 | ncf-17,4232,ncf,1,256 20 | ncf-18,4252,ncf,1,256 21 | ncf-19,4305,ncf,1,256 22 | cifar10-20,4966,cifar10,1,128 23 | cifar10-21,4993,cifar10,1,128 24 | ncf-22,5125,ncf,1,256 25 | yolov3-23,5439,yolov3,1,8 26 | cifar10-24,5533,cifar10,1,128 27 | cifar10-25,5693,cifar10,1,128 28 | cifar10-26,6029,cifar10,1,128 29 | ncf-27,6117,ncf,1,256 30 | deepspeech2-28,6211,deepspeech2,1,20 31 | deepspeech2-29,6570,deepspeech2,8,160 32 | cifar10-30,6585,cifar10,1,128 33 | ncf-31,6732,ncf,1,256 34 | ncf-32,7188,ncf,1,256 35 | ncf-33,7331,ncf,1,256 36 | ncf-34,7332,ncf,1,256 37 | ncf-35,7736,ncf,1,256 38 | deepspeech2-36,8268,deepspeech2,1,20 39 | cifar10-37,8635,cifar10,1,128 40 | ncf-38,9115,ncf,1,256 41 | ncf-39,10315,ncf,1,256 42 | cifar10-40,10666,cifar10,1,128 43 | yolov3-41,10785,yolov3,1,8 44 | cifar10-42,10917,cifar10,1,128 45 | ncf-43,11281,ncf,1,256 46 | yolov3-44,11314,yolov3,1,8 47 | yolov3-45,11487,yolov3,4,32 48 | yolov3-46,11554,yolov3,8,64 49 | ncf-47,11582,ncf,1,256 50 | cifar10-48,11929,cifar10,1,128 51 | cifar10-49,12117,cifar10,1,128 52 | yolov3-50,12282,yolov3,4,32 53 | cifar10-51,12660,cifar10,1,128 54 | ncf-52,13071,ncf,1,256 55 | ncf-53,13118,ncf,1,256 56 | deepspeech2-54,13449,deepspeech2,1,20 57 | ncf-55,13458,ncf,1,256 58 | ncf-56,13497,ncf,1,256 59 | ncf-57,13632,ncf,1,256 60 | ncf-58,13674,ncf,1,256 61 | cifar10-59,13712,cifar10,1,128 62 | ncf-60,13958,ncf,1,256 63 | cifar10-61,14020,cifar10,1,128 64 | cifar10-62,14059,cifar10,1,128 65 | cifar10-63,14141,cifar10,1,128 66 | cifar10-64,14172,cifar10,1,128 67 | ncf-65,14322,ncf,1,256 68 | ncf-66,14477,ncf,1,256 69 | cifar10-67,14478,cifar10,1,128 70 | ncf-68,14515,ncf,1,256 71 | ncf-69,14536,ncf,1,256 72 | bert-70,14711,bert,1,12 73 | cifar10-71,14926,cifar10,1,128 74 | yolov3-72,15569,yolov3,1,8 75 | ncf-73,16581,ncf,1,256 76 | ncf-74,16935,ncf,1,256 77 | ncf-75,17062,ncf,1,256 78 | bert-76,17166,bert,1,12 79 | ncf-77,17552,ncf,1,256 80 | bert-78,17646,bert,1,12 81 | ncf-79,17804,ncf,1,256 82 | deepspeech2-80,17817,deepspeech2,1,20 83 | ncf-81,17941,ncf,1,256 84 | deepspeech2-82,17968,deepspeech2,1,20 85 | cifar10-83,18116,cifar10,1,128 86 | bert-84,18180,bert,1,12 87 | cifar10-85,18227,cifar10,1,128 88 | yolov3-86,18756,yolov3,1,8 89 | bert-87,18922,bert,1,12 90 | deepspeech2-88,18929,deepspeech2,1,20 91 | deepspeech2-89,18985,deepspeech2,1,20 92 | bert-90,19037,bert,1,12 93 | ncf-91,19074,ncf,1,256 94 | deepspeech2-92,19298,deepspeech2,1,20 95 | ncf-93,19420,ncf,1,256 96 | ncf-94,19434,ncf,1,256 97 | deepspeech2-95,19448,deepspeech2,1,20 98 | cifar10-96,19559,cifar10,1,128 99 | cifar10-97,19594,cifar10,1,128 100 | ncf-98,19847,ncf,1,256 101 | ncf-99,20105,ncf,1,256 102 | yolov3-100,20545,yolov3,16,128 103 | ncf-101,20704,ncf,1,256 104 | cifar10-102,20728,cifar10,1,128 105 | cifar10-103,20855,cifar10,1,128 106 | ncf-104,20870,ncf,1,256 107 | ncf-105,20873,ncf,1,256 108 | cifar10-106,20900,cifar10,1,128 109 | cifar10-107,20924,cifar10,1,128 110 | cifar10-108,20992,cifar10,1,128 111 | yolov3-109,21071,yolov3,1,8 112 | ncf-110,21115,ncf,1,256 113 | ncf-111,21177,ncf,1,256 114 | cifar10-112,21206,cifar10,1,128 115 | ncf-113,21208,ncf,1,256 116 | ncf-114,21550,ncf,1,256 117 | cifar10-115,21667,cifar10,1,128 118 | bert-116,21723,bert,1,12 119 | yolov3-117,21919,yolov3,1,8 120 | cifar10-118,21920,cifar10,1,128 121 | ncf-119,22012,ncf,1,256 122 | cifar10-120,22468,cifar10,1,128 123 | cifar10-121,22932,cifar10,1,128 124 | ncf-122,23312,ncf,1,256 125 | cifar10-123,23531,cifar10,1,128 126 | cifar10-124,23660,cifar10,1,128 127 | cifar10-125,23781,cifar10,1,128 128 | cifar10-126,23816,cifar10,1,128 129 | ncf-127,23882,ncf,1,256 130 | ncf-128,24250,ncf,1,256 131 | cifar10-129,24300,cifar10,1,128 132 | ncf-130,24435,ncf,1,256 133 | ncf-131,24607,ncf,1,256 134 | cifar10-132,24732,cifar10,1,128 135 | ncf-133,24803,ncf,1,256 136 | ncf-134,24808,ncf,1,256 137 | cifar10-135,25188,cifar10,1,128 138 | ncf-136,25216,ncf,1,256 139 | ncf-137,25329,ncf,1,256 140 | ncf-138,25558,ncf,1,256 141 | ncf-139,25683,ncf,1,256 142 | bert-140,25741,bert,1,12 143 | cifar10-141,25748,cifar10,1,128 144 | deepspeech2-142,25856,deepspeech2,1,20 145 | yolov3-143,25896,yolov3,4,32 146 | yolov3-144,25960,yolov3,1,8 147 | ncf-145,26037,ncf,1,256 148 | deepspeech2-146,26188,deepspeech2,1,20 149 | bert-147,26311,bert,1,12 150 | yolov3-148,26322,yolov3,4,32 151 | bert-149,26381,bert,1,12 152 | ncf-150,26406,ncf,1,256 153 | ncf-151,26436,ncf,1,256 154 | deepspeech2-152,26464,deepspeech2,1,20 155 | bert-153,26644,bert,1,12 156 | bert-154,26871,bert,1,12 157 | bert-155,27106,bert,1,12 158 | cifar10-156,27115,cifar10,1,128 159 | ncf-157,28335,ncf,1,256 160 | ncf-158,28388,ncf,1,256 161 | cifar10-159,28739,cifar10,1,128 162 | -------------------------------------------------------------------------------- /evals/ray_tune/workloads/workload-4.csv: -------------------------------------------------------------------------------- 1 | name,time,application,num_replicas,batch_size 2 | bert-0,259,bert,1,12 3 | cifar10-1,289,cifar10,1,128 4 | cifar10-2,447,cifar10,1,128 5 | deepspeech2-3,543,deepspeech2,1,20 6 | ncf-4,1107,ncf,1,256 7 | cifar10-5,1314,cifar10,1,128 8 | ncf-6,1403,ncf,1,256 9 | ncf-7,1543,ncf,1,256 10 | ncf-8,1760,ncf,1,256 11 | ncf-9,1873,ncf,1,256 12 | cifar10-10,1915,cifar10,1,128 13 | ncf-11,1916,ncf,1,256 14 | cifar10-12,1917,cifar10,1,128 15 | imagenet-13,2245,imagenet,16,3200 16 | cifar10-14,2353,cifar10,1,128 17 | ncf-15,3050,ncf,1,256 18 | cifar10-16,3087,cifar10,1,128 19 | cifar10-17,3131,cifar10,1,128 20 | cifar10-18,3178,cifar10,1,128 21 | cifar10-19,3261,cifar10,1,128 22 | cifar10-20,3692,cifar10,1,128 23 | ncf-21,4192,ncf,1,256 24 | cifar10-22,4252,cifar10,1,128 25 | cifar10-23,4276,cifar10,1,128 26 | cifar10-24,4501,cifar10,1,128 27 | ncf-25,4815,ncf,1,256 28 | cifar10-26,5334,cifar10,1,128 29 | ncf-27,5515,ncf,1,256 30 | cifar10-28,5531,cifar10,1,128 31 | ncf-29,5729,ncf,1,256 32 | cifar10-30,5801,cifar10,1,128 33 | ncf-31,6029,ncf,1,256 34 | cifar10-32,6117,cifar10,1,128 35 | ncf-33,6296,ncf,1,256 36 | deepspeech2-34,6379,deepspeech2,1,20 37 | cifar10-35,6668,cifar10,1,128 38 | cifar10-36,6739,cifar10,1,128 39 | ncf-37,6747,ncf,1,256 40 | cifar10-38,6784,cifar10,1,128 41 | cifar10-39,7062,cifar10,1,128 42 | imagenet-40,7161,imagenet,8,1600 43 | ncf-41,7623,ncf,1,256 44 | cifar10-42,7922,cifar10,1,128 45 | bert-43,8293,bert,1,12 46 | cifar10-44,8444,cifar10,1,128 47 | ncf-45,8916,ncf,1,256 48 | cifar10-46,9152,cifar10,1,128 49 | ncf-47,10073,ncf,1,256 50 | cifar10-48,10578,cifar10,1,128 51 | ncf-49,10916,ncf,1,256 52 | yolov3-50,10930,yolov3,1,8 53 | cifar10-51,10932,cifar10,1,128 54 | ncf-52,10989,ncf,1,256 55 | ncf-53,11078,ncf,1,256 56 | ncf-54,11188,ncf,1,256 57 | cifar10-55,11333,cifar10,1,128 58 | ncf-56,11354,ncf,1,256 59 | imagenet-57,11592,imagenet,16,3200 60 | deepspeech2-58,11769,deepspeech2,1,20 61 | ncf-59,12180,ncf,1,256 62 | bert-60,12291,bert,1,12 63 | ncf-61,13354,ncf,1,256 64 | ncf-62,13360,ncf,1,256 65 | cifar10-63,13468,cifar10,1,128 66 | cifar10-64,13515,cifar10,1,128 67 | ncf-65,13568,ncf,1,256 68 | bert-66,13616,bert,1,12 69 | cifar10-67,14156,cifar10,1,128 70 | ncf-68,14213,ncf,1,256 71 | cifar10-69,14228,cifar10,1,128 72 | ncf-70,14372,ncf,1,256 73 | ncf-71,14426,ncf,1,256 74 | cifar10-72,14459,cifar10,1,128 75 | cifar10-73,14550,cifar10,1,128 76 | ncf-74,14855,ncf,1,256 77 | cifar10-75,15460,cifar10,1,128 78 | ncf-76,15550,ncf,1,256 79 | ncf-77,15799,ncf,1,256 80 | deepspeech2-78,15807,deepspeech2,4,80 81 | ncf-79,16245,ncf,1,256 82 | ncf-80,16346,ncf,1,256 83 | cifar10-81,16430,cifar10,1,128 84 | ncf-82,17026,ncf,1,256 85 | ncf-83,17035,ncf,1,256 86 | ncf-84,17515,ncf,1,256 87 | ncf-85,17565,ncf,1,256 88 | bert-86,17758,bert,1,12 89 | ncf-87,17801,ncf,1,256 90 | deepspeech2-88,17829,deepspeech2,1,20 91 | deepspeech2-89,17904,deepspeech2,1,20 92 | ncf-90,18105,ncf,1,256 93 | cifar10-91,18202,cifar10,1,128 94 | ncf-92,18391,ncf,1,256 95 | deepspeech2-93,18616,deepspeech2,1,20 96 | bert-94,18618,bert,1,12 97 | ncf-95,18719,ncf,1,256 98 | cifar10-96,18771,cifar10,1,128 99 | cifar10-97,18810,cifar10,1,128 100 | ncf-98,18870,ncf,1,256 101 | ncf-99,18870,ncf,1,256 102 | cifar10-100,18942,cifar10,1,128 103 | ncf-101,19029,ncf,1,256 104 | ncf-102,19052,ncf,1,256 105 | ncf-103,19160,ncf,1,256 106 | ncf-104,19213,ncf,1,256 107 | deepspeech2-105,19267,deepspeech2,1,20 108 | imagenet-106,19368,imagenet,8,1600 109 | ncf-107,19382,ncf,1,256 110 | cifar10-108,19472,cifar10,1,128 111 | ncf-109,19698,ncf,1,256 112 | cifar10-110,19887,cifar10,1,128 113 | cifar10-111,19932,cifar10,1,128 114 | cifar10-112,19946,cifar10,1,128 115 | ncf-113,20043,ncf,1,256 116 | deepspeech2-114,20103,deepspeech2,1,20 117 | cifar10-115,20197,cifar10,1,128 118 | cifar10-116,20379,cifar10,1,128 119 | ncf-117,20454,ncf,1,256 120 | cifar10-118,20557,cifar10,1,128 121 | cifar10-119,20613,cifar10,1,128 122 | cifar10-120,20712,cifar10,1,128 123 | cifar10-121,21177,cifar10,1,128 124 | cifar10-122,21193,cifar10,1,128 125 | cifar10-123,21481,cifar10,1,128 126 | deepspeech2-124,21758,deepspeech2,1,20 127 | ncf-125,21810,ncf,1,256 128 | ncf-126,21815,ncf,1,256 129 | cifar10-127,22048,cifar10,1,128 130 | cifar10-128,22122,cifar10,1,128 131 | deepspeech2-129,22288,deepspeech2,1,20 132 | ncf-130,22535,ncf,1,256 133 | yolov3-131,22647,yolov3,2,16 134 | ncf-132,22721,ncf,1,256 135 | ncf-133,22915,ncf,1,256 136 | ncf-134,23332,ncf,1,256 137 | yolov3-135,23498,yolov3,4,32 138 | ncf-136,23660,ncf,1,256 139 | bert-137,24110,bert,1,12 140 | ncf-138,24816,ncf,1,256 141 | ncf-139,24884,ncf,1,256 142 | cifar10-140,24934,cifar10,1,128 143 | ncf-141,25054,ncf,1,256 144 | deepspeech2-142,25145,deepspeech2,1,20 145 | ncf-143,25254,ncf,1,256 146 | ncf-144,25726,ncf,1,256 147 | cifar10-145,25750,cifar10,1,128 148 | ncf-146,25762,ncf,1,256 149 | ncf-147,25782,ncf,1,256 150 | ncf-148,25915,ncf,1,256 151 | ncf-149,26014,ncf,1,256 152 | cifar10-150,26412,cifar10,1,128 153 | ncf-151,26525,ncf,1,256 154 | cifar10-152,26530,cifar10,1,128 155 | deepspeech2-153,26599,deepspeech2,1,20 156 | yolov3-154,26756,yolov3,4,32 157 | deepspeech2-155,27019,deepspeech2,1,20 158 | yolov3-156,27077,yolov3,4,32 159 | yolov3-157,27132,yolov3,1,8 160 | cifar10-158,27403,cifar10,1,128 161 | cifar10-159,27731,cifar10,1,128 162 | -------------------------------------------------------------------------------- /evals/ray_tune/workloads/workload-5.csv: -------------------------------------------------------------------------------- 1 | name,time,application,num_replicas,batch_size 2 | cifar10-0,16,cifar10,1,128 3 | yolov3-1,167,yolov3,4,32 4 | cifar10-2,181,cifar10,1,128 5 | cifar10-3,582,cifar10,1,128 6 | bert-4,1133,bert,1,12 7 | ncf-5,1286,ncf,1,256 8 | cifar10-6,1296,cifar10,1,128 9 | cifar10-7,1433,cifar10,1,128 10 | ncf-8,1506,ncf,1,256 11 | ncf-9,1621,ncf,1,256 12 | ncf-10,1812,ncf,1,256 13 | ncf-11,2041,ncf,1,256 14 | ncf-12,2070,ncf,1,256 15 | imagenet-13,2260,imagenet,8,1600 16 | cifar10-14,2590,cifar10,1,128 17 | cifar10-15,2856,cifar10,1,128 18 | cifar10-16,3317,cifar10,1,128 19 | ncf-17,3464,ncf,1,256 20 | cifar10-18,3659,cifar10,1,128 21 | imagenet-19,3942,imagenet,32,6400 22 | cifar10-20,4021,cifar10,1,128 23 | cifar10-21,4050,cifar10,1,128 24 | cifar10-22,4170,cifar10,1,128 25 | cifar10-23,4199,cifar10,1,128 26 | cifar10-24,4263,cifar10,1,128 27 | ncf-25,4292,ncf,1,256 28 | cifar10-26,4384,cifar10,1,128 29 | ncf-27,4404,ncf,1,256 30 | ncf-28,4513,ncf,1,256 31 | ncf-29,4646,ncf,1,256 32 | ncf-30,4892,ncf,1,256 33 | cifar10-31,4930,cifar10,1,128 34 | yolov3-32,5169,yolov3,1,8 35 | cifar10-33,5202,cifar10,1,128 36 | ncf-34,5367,ncf,1,256 37 | cifar10-35,5533,cifar10,1,128 38 | deepspeech2-36,5892,deepspeech2,1,20 39 | cifar10-37,7070,cifar10,1,128 40 | bert-38,7152,bert,1,12 41 | cifar10-39,7271,cifar10,1,128 42 | cifar10-40,7713,cifar10,1,128 43 | ncf-41,8183,ncf,1,256 44 | cifar10-42,8526,cifar10,1,128 45 | cifar10-43,8880,cifar10,1,128 46 | cifar10-44,8943,cifar10,1,128 47 | cifar10-45,9115,cifar10,1,128 48 | ncf-46,9164,ncf,1,256 49 | ncf-47,9303,ncf,1,256 50 | ncf-48,9309,ncf,1,256 51 | yolov3-49,9442,yolov3,4,32 52 | cifar10-50,9733,cifar10,1,128 53 | ncf-51,10158,ncf,1,256 54 | ncf-52,10333,ncf,1,256 55 | yolov3-53,10408,yolov3,4,32 56 | bert-54,10465,bert,1,12 57 | ncf-55,10603,ncf,1,256 58 | ncf-56,10996,ncf,1,256 59 | ncf-57,11061,ncf,1,256 60 | ncf-58,11161,ncf,1,256 61 | cifar10-59,11346,cifar10,1,128 62 | ncf-60,12252,ncf,1,256 63 | cifar10-61,12518,cifar10,1,128 64 | cifar10-62,12543,cifar10,1,128 65 | ncf-63,12717,ncf,1,256 66 | ncf-64,12732,ncf,1,256 67 | deepspeech2-65,12849,deepspeech2,1,20 68 | ncf-66,13413,ncf,1,256 69 | cifar10-67,13666,cifar10,1,128 70 | ncf-68,13742,ncf,1,256 71 | cifar10-69,13915,cifar10,1,128 72 | cifar10-70,14230,cifar10,1,128 73 | ncf-71,14464,ncf,1,256 74 | ncf-72,14531,ncf,1,256 75 | ncf-73,14773,ncf,1,256 76 | cifar10-74,14812,cifar10,1,128 77 | ncf-75,14925,ncf,1,256 78 | cifar10-76,15016,cifar10,1,128 79 | yolov3-77,15148,yolov3,1,8 80 | imagenet-78,15159,imagenet,8,1600 81 | cifar10-79,15242,cifar10,1,128 82 | ncf-80,15629,ncf,1,256 83 | ncf-81,15674,ncf,1,256 84 | ncf-82,15814,ncf,1,256 85 | ncf-83,15881,ncf,1,256 86 | ncf-84,15890,ncf,1,256 87 | cifar10-85,15914,cifar10,1,128 88 | cifar10-86,16149,cifar10,1,128 89 | ncf-87,16247,ncf,1,256 90 | ncf-88,16709,ncf,1,256 91 | bert-89,16918,bert,1,12 92 | deepspeech2-90,16967,deepspeech2,1,20 93 | ncf-91,17251,ncf,1,256 94 | ncf-92,17409,ncf,1,256 95 | ncf-93,17532,ncf,1,256 96 | deepspeech2-94,17535,deepspeech2,1,20 97 | ncf-95,17622,ncf,1,256 98 | cifar10-96,17804,cifar10,1,128 99 | cifar10-97,17921,cifar10,1,128 100 | deepspeech2-98,17977,deepspeech2,1,20 101 | bert-99,18008,bert,1,12 102 | ncf-100,18435,ncf,1,256 103 | cifar10-101,18721,cifar10,1,128 104 | cifar10-102,18753,cifar10,1,128 105 | deepspeech2-103,18773,deepspeech2,1,20 106 | ncf-104,18805,ncf,1,256 107 | cifar10-105,18931,cifar10,1,128 108 | ncf-106,18996,ncf,1,256 109 | ncf-107,19206,ncf,1,256 110 | cifar10-108,19237,cifar10,1,128 111 | ncf-109,19380,ncf,1,256 112 | cifar10-110,19396,cifar10,1,128 113 | ncf-111,19401,ncf,1,256 114 | cifar10-112,19550,cifar10,1,128 115 | cifar10-113,19626,cifar10,1,128 116 | ncf-114,19749,ncf,1,256 117 | bert-115,19839,bert,1,12 118 | cifar10-116,19918,cifar10,1,128 119 | bert-117,19991,bert,1,12 120 | ncf-118,20057,ncf,1,256 121 | deepspeech2-119,20143,deepspeech2,1,20 122 | bert-120,20274,bert,1,12 123 | ncf-121,20299,ncf,1,256 124 | cifar10-122,20342,cifar10,1,128 125 | deepspeech2-123,20386,deepspeech2,1,20 126 | yolov3-124,20478,yolov3,4,32 127 | deepspeech2-125,20515,deepspeech2,1,20 128 | ncf-126,20578,ncf,1,256 129 | ncf-127,20602,ncf,1,256 130 | ncf-128,20645,ncf,1,256 131 | cifar10-129,20696,cifar10,1,128 132 | bert-130,21016,bert,1,12 133 | ncf-131,21243,ncf,1,256 134 | ncf-132,21549,ncf,1,256 135 | ncf-133,21667,ncf,1,256 136 | ncf-134,21719,ncf,1,256 137 | ncf-135,22191,ncf,1,256 138 | cifar10-136,22555,cifar10,1,128 139 | ncf-137,22930,ncf,1,256 140 | ncf-138,22932,ncf,1,256 141 | deepspeech2-139,22968,deepspeech2,1,20 142 | bert-140,23424,bert,1,12 143 | ncf-141,24102,ncf,1,256 144 | ncf-142,24300,ncf,1,256 145 | deepspeech2-143,24524,deepspeech2,1,20 146 | ncf-144,24932,ncf,1,256 147 | cifar10-145,24993,cifar10,1,128 148 | cifar10-146,25014,cifar10,1,128 149 | bert-147,25095,bert,1,12 150 | imagenet-148,25511,imagenet,16,3200 151 | ncf-149,25555,ncf,1,256 152 | ncf-150,25638,ncf,1,256 153 | cifar10-151,25866,cifar10,1,128 154 | ncf-152,26311,ncf,1,256 155 | ncf-153,26639,ncf,1,256 156 | ncf-154,27012,ncf,1,256 157 | ncf-155,27101,ncf,1,256 158 | cifar10-156,27286,cifar10,1,128 159 | ncf-157,27578,ncf,1,256 160 | ncf-158,27759,ncf,1,256 161 | ncf-159,28755,ncf,1,256 162 | -------------------------------------------------------------------------------- /evals/ray_tune/workloads/workload-6.csv: -------------------------------------------------------------------------------- 1 | name,time,application,num_replicas,batch_size 2 | cifar10-0,53,cifar10,1,128 3 | cifar10-1,154,cifar10,1,128 4 | deepspeech2-2,326,deepspeech2,1,20 5 | cifar10-3,368,cifar10,1,128 6 | bert-4,735,bert,1,12 7 | cifar10-5,1315,cifar10,1,128 8 | ncf-6,1585,ncf,1,256 9 | ncf-7,1817,ncf,1,256 10 | cifar10-8,1916,cifar10,1,128 11 | ncf-9,1924,ncf,1,256 12 | bert-10,2438,bert,1,12 13 | cifar10-11,2518,cifar10,1,128 14 | ncf-12,2535,ncf,1,256 15 | bert-13,2735,bert,1,12 16 | ncf-14,2850,ncf,1,256 17 | cifar10-15,3073,cifar10,1,128 18 | cifar10-16,3107,cifar10,1,128 19 | ncf-17,3403,ncf,1,256 20 | imagenet-18,3472,imagenet,8,1600 21 | cifar10-19,3687,cifar10,1,128 22 | cifar10-20,3733,cifar10,1,128 23 | cifar10-21,4003,cifar10,1,128 24 | cifar10-22,4007,cifar10,1,128 25 | ncf-23,4078,ncf,1,256 26 | cifar10-24,4101,cifar10,1,128 27 | ncf-25,4948,ncf,1,256 28 | ncf-26,4949,ncf,1,256 29 | cifar10-27,4955,cifar10,1,128 30 | cifar10-28,5172,cifar10,1,128 31 | cifar10-29,5277,cifar10,1,128 32 | cifar10-30,5285,cifar10,1,128 33 | cifar10-31,6136,cifar10,1,128 34 | ncf-32,6324,ncf,1,256 35 | deepspeech2-33,6406,deepspeech2,1,20 36 | ncf-34,6731,ncf,1,256 37 | ncf-35,6897,ncf,1,256 38 | cifar10-36,7752,cifar10,1,128 39 | cifar10-37,8287,cifar10,1,128 40 | cifar10-38,8444,cifar10,1,128 41 | ncf-39,8532,ncf,1,256 42 | cifar10-40,8614,cifar10,1,128 43 | yolov3-41,9226,yolov3,4,32 44 | yolov3-42,9550,yolov3,1,8 45 | yolov3-43,9612,yolov3,8,64 46 | ncf-44,9732,ncf,1,256 47 | ncf-45,9876,ncf,1,256 48 | bert-46,10151,bert,1,12 49 | ncf-47,10318,ncf,1,256 50 | ncf-48,10319,ncf,1,256 51 | ncf-49,11033,ncf,1,256 52 | bert-50,11256,bert,1,12 53 | deepspeech2-51,11288,deepspeech2,1,20 54 | bert-52,11292,bert,1,12 55 | cifar10-53,11446,cifar10,1,128 56 | deepspeech2-54,11491,deepspeech2,1,20 57 | cifar10-55,11515,cifar10,1,128 58 | deepspeech2-56,11852,deepspeech2,1,20 59 | ncf-57,12204,ncf,1,256 60 | ncf-58,12558,ncf,1,256 61 | cifar10-59,12866,cifar10,1,128 62 | cifar10-60,12938,cifar10,1,128 63 | cifar10-61,14331,cifar10,1,128 64 | cifar10-62,14377,cifar10,1,128 65 | cifar10-63,14377,cifar10,1,128 66 | bert-64,14708,bert,4,48 67 | cifar10-65,14817,cifar10,1,128 68 | ncf-66,15115,ncf,1,256 69 | ncf-67,15563,ncf,1,256 70 | cifar10-68,15879,cifar10,1,128 71 | cifar10-69,16110,cifar10,1,128 72 | ncf-70,16319,ncf,1,256 73 | cifar10-71,16331,cifar10,1,128 74 | ncf-72,16437,ncf,1,256 75 | ncf-73,16560,ncf,1,256 76 | ncf-74,16585,ncf,1,256 77 | cifar10-75,16925,cifar10,1,128 78 | bert-76,16946,bert,1,12 79 | cifar10-77,16957,cifar10,1,128 80 | bert-78,17490,bert,1,12 81 | ncf-79,17515,ncf,1,256 82 | deepspeech2-80,17628,deepspeech2,1,20 83 | imagenet-81,18010,imagenet,8,1600 84 | cifar10-82,18159,cifar10,1,128 85 | ncf-83,18289,ncf,1,256 86 | ncf-84,18391,ncf,1,256 87 | cifar10-85,18510,cifar10,4,512 88 | deepspeech2-86,18699,deepspeech2,1,20 89 | ncf-87,18780,ncf,1,256 90 | ncf-88,18785,ncf,4,1024 91 | ncf-89,18806,ncf,1,256 92 | cifar10-90,18877,cifar10,1,128 93 | deepspeech2-91,19165,deepspeech2,1,20 94 | ncf-92,19202,ncf,1,256 95 | deepspeech2-93,19239,deepspeech2,1,20 96 | cifar10-94,19306,cifar10,1,128 97 | cifar10-95,19316,cifar10,1,128 98 | ncf-96,19332,ncf,1,256 99 | ncf-97,19335,ncf,1,256 100 | cifar10-98,19393,cifar10,1,128 101 | bert-99,19414,bert,1,12 102 | ncf-100,19450,ncf,1,256 103 | bert-101,19689,bert,1,12 104 | cifar10-102,20051,cifar10,1,128 105 | deepspeech2-103,20103,deepspeech2,1,20 106 | cifar10-104,20139,cifar10,1,128 107 | cifar10-105,20269,cifar10,4,512 108 | cifar10-106,20313,cifar10,1,128 109 | cifar10-107,20463,cifar10,1,128 110 | deepspeech2-108,20574,deepspeech2,1,20 111 | bert-109,20613,bert,4,48 112 | cifar10-110,20643,cifar10,1,128 113 | cifar10-111,20665,cifar10,1,128 114 | ncf-112,20669,ncf,1,256 115 | ncf-113,20703,ncf,1,256 116 | cifar10-114,20726,cifar10,1,128 117 | ncf-115,20851,ncf,1,256 118 | ncf-116,21008,ncf,1,256 119 | cifar10-117,21017,cifar10,1,128 120 | cifar10-118,21061,cifar10,1,128 121 | deepspeech2-119,21157,deepspeech2,8,160 122 | cifar10-120,21190,cifar10,1,128 123 | cifar10-121,21209,cifar10,1,128 124 | ncf-122,21433,ncf,1,256 125 | ncf-123,21686,ncf,1,256 126 | cifar10-124,21706,cifar10,1,128 127 | cifar10-125,21738,cifar10,1,128 128 | cifar10-126,21776,cifar10,1,128 129 | yolov3-127,21835,yolov3,8,64 130 | cifar10-128,21886,cifar10,1,128 131 | cifar10-129,22332,cifar10,1,128 132 | bert-130,22769,bert,1,12 133 | deepspeech2-131,22783,deepspeech2,1,20 134 | cifar10-132,23032,cifar10,1,128 135 | ncf-133,23040,ncf,1,256 136 | cifar10-134,23136,cifar10,1,128 137 | deepspeech2-135,23293,deepspeech2,1,20 138 | ncf-136,23478,ncf,1,256 139 | imagenet-137,23900,imagenet,8,1600 140 | ncf-138,24040,ncf,1,256 141 | cifar10-139,24132,cifar10,1,128 142 | ncf-140,24362,ncf,1,256 143 | ncf-141,24755,ncf,1,256 144 | ncf-142,24780,ncf,1,256 145 | cifar10-143,24855,cifar10,1,128 146 | ncf-144,24869,ncf,1,256 147 | cifar10-145,25125,cifar10,1,128 148 | cifar10-146,25129,cifar10,1,128 149 | cifar10-147,25679,cifar10,1,128 150 | cifar10-148,25875,cifar10,1,128 151 | ncf-149,25897,ncf,1,256 152 | ncf-150,25898,ncf,1,256 153 | bert-151,25952,bert,1,12 154 | deepspeech2-152,26080,deepspeech2,1,20 155 | deepspeech2-153,26243,deepspeech2,1,20 156 | ncf-154,26853,ncf,1,256 157 | yolov3-155,26876,yolov3,4,32 158 | ncf-156,27132,ncf,1,256 159 | bert-157,27142,bert,1,12 160 | ncf-158,27229,ncf,1,256 161 | cifar10-159,28189,cifar10,1,128 162 | -------------------------------------------------------------------------------- /evals/ray_tune/workloads/workload-7.csv: -------------------------------------------------------------------------------- 1 | name,time,application,num_replicas,batch_size 2 | cifar10-0,131,cifar10,1,128 3 | cifar10-1,132,cifar10,1,128 4 | cifar10-2,397,cifar10,1,128 5 | cifar10-3,505,cifar10,1,128 6 | cifar10-4,642,cifar10,1,128 7 | bert-5,812,bert,1,12 8 | ncf-6,848,ncf,4,1024 9 | ncf-7,916,ncf,4,1024 10 | cifar10-8,1397,cifar10,1,128 11 | cifar10-9,1606,cifar10,1,128 12 | cifar10-10,1728,cifar10,1,128 13 | cifar10-11,1908,cifar10,1,128 14 | bert-12,2126,bert,1,12 15 | cifar10-13,2285,cifar10,1,128 16 | cifar10-14,2440,cifar10,1,128 17 | cifar10-15,2712,cifar10,1,128 18 | deepspeech2-16,2931,deepspeech2,1,20 19 | ncf-17,3659,ncf,1,256 20 | cifar10-18,3714,cifar10,1,128 21 | cifar10-19,3788,cifar10,1,128 22 | yolov3-20,4001,yolov3,1,8 23 | cifar10-21,4081,cifar10,1,128 24 | ncf-22,4316,ncf,1,256 25 | cifar10-23,4317,cifar10,1,128 26 | ncf-24,4353,ncf,1,256 27 | imagenet-25,5021,imagenet,16,3200 28 | cifar10-26,5207,cifar10,1,128 29 | cifar10-27,5495,cifar10,1,128 30 | ncf-28,5627,ncf,1,256 31 | ncf-29,6014,ncf,1,256 32 | yolov3-30,6114,yolov3,8,64 33 | cifar10-31,6131,cifar10,1,128 34 | ncf-32,6842,ncf,1,256 35 | ncf-33,6963,ncf,1,256 36 | bert-34,7153,bert,1,12 37 | ncf-35,7392,ncf,1,256 38 | yolov3-36,7516,yolov3,1,8 39 | cifar10-37,7899,cifar10,1,128 40 | cifar10-38,7933,cifar10,1,128 41 | cifar10-39,8387,cifar10,1,128 42 | ncf-40,8537,ncf,1,256 43 | ncf-41,8617,ncf,1,256 44 | ncf-42,9037,ncf,1,256 45 | ncf-43,9727,ncf,1,256 46 | ncf-44,10529,ncf,1,256 47 | cifar10-45,10550,cifar10,1,128 48 | cifar10-46,10599,cifar10,1,128 49 | ncf-47,11332,ncf,1,256 50 | bert-48,11449,bert,1,12 51 | ncf-49,11695,ncf,1,256 52 | cifar10-50,12185,cifar10,1,128 53 | cifar10-51,12239,cifar10,1,128 54 | ncf-52,12718,ncf,1,256 55 | cifar10-53,12942,cifar10,1,128 56 | cifar10-54,12991,cifar10,1,128 57 | ncf-55,13332,ncf,1,256 58 | ncf-56,13486,ncf,1,256 59 | bert-57,13740,bert,1,12 60 | cifar10-58,13807,cifar10,1,128 61 | imagenet-59,13924,imagenet,16,3200 62 | deepspeech2-60,14199,deepspeech2,1,20 63 | ncf-61,14242,ncf,1,256 64 | ncf-62,14572,ncf,1,256 65 | ncf-63,14973,ncf,1,256 66 | ncf-64,14978,ncf,1,256 67 | ncf-65,15051,ncf,1,256 68 | cifar10-66,15226,cifar10,1,128 69 | ncf-67,15357,ncf,1,256 70 | yolov3-68,15814,yolov3,1,8 71 | cifar10-69,15841,cifar10,1,128 72 | bert-70,15876,bert,1,12 73 | ncf-71,16151,ncf,1,256 74 | deepspeech2-72,16303,deepspeech2,1,20 75 | ncf-73,16411,ncf,1,256 76 | yolov3-74,16852,yolov3,1,8 77 | cifar10-75,16932,cifar10,1,128 78 | ncf-76,17035,ncf,1,256 79 | ncf-77,17069,ncf,1,256 80 | bert-78,17174,bert,1,12 81 | cifar10-79,17263,cifar10,1,128 82 | cifar10-80,17452,cifar10,1,128 83 | cifar10-81,17515,cifar10,1,128 84 | cifar10-82,17517,cifar10,1,128 85 | ncf-83,17587,ncf,1,256 86 | cifar10-84,17626,cifar10,1,128 87 | cifar10-85,17677,cifar10,1,128 88 | cifar10-86,17830,cifar10,1,128 89 | imagenet-87,17991,imagenet,8,1600 90 | cifar10-88,18132,cifar10,1,128 91 | cifar10-89,18159,cifar10,1,128 92 | ncf-90,18238,ncf,1,256 93 | ncf-91,18288,ncf,1,256 94 | yolov3-92,18343,yolov3,8,64 95 | ncf-93,18607,ncf,1,256 96 | ncf-94,18634,ncf,1,256 97 | cifar10-95,18852,cifar10,1,128 98 | cifar10-96,18912,cifar10,1,128 99 | cifar10-97,18914,cifar10,1,128 100 | cifar10-98,18935,cifar10,1,128 101 | ncf-99,18983,ncf,1,256 102 | deepspeech2-100,19063,deepspeech2,1,20 103 | bert-101,19067,bert,1,12 104 | bert-102,19212,bert,1,12 105 | cifar10-103,19221,cifar10,1,128 106 | cifar10-104,19312,cifar10,1,128 107 | cifar10-105,19332,cifar10,1,128 108 | cifar10-106,19414,cifar10,1,128 109 | yolov3-107,19467,yolov3,4,32 110 | cifar10-108,19571,cifar10,1,128 111 | cifar10-109,19697,cifar10,1,128 112 | ncf-110,19775,ncf,1,256 113 | ncf-111,19999,ncf,1,256 114 | cifar10-112,20163,cifar10,1,128 115 | cifar10-113,20428,cifar10,1,128 116 | ncf-114,20466,ncf,1,256 117 | ncf-115,20519,ncf,1,256 118 | ncf-116,20519,ncf,1,256 119 | cifar10-117,20535,cifar10,1,128 120 | cifar10-118,20546,cifar10,1,128 121 | ncf-119,20766,ncf,1,256 122 | ncf-120,20919,ncf,1,256 123 | deepspeech2-121,20926,deepspeech2,1,20 124 | cifar10-122,21131,cifar10,1,128 125 | ncf-123,21365,ncf,1,256 126 | ncf-124,21561,ncf,1,256 127 | bert-125,21838,bert,1,12 128 | yolov3-126,21874,yolov3,8,64 129 | deepspeech2-127,21965,deepspeech2,1,20 130 | ncf-128,22034,ncf,1,256 131 | cifar10-129,22084,cifar10,1,128 132 | deepspeech2-130,22152,deepspeech2,1,20 133 | ncf-131,22657,ncf,1,256 134 | imagenet-132,22848,imagenet,8,1600 135 | yolov3-133,23886,yolov3,1,8 136 | bert-134,23932,bert,1,12 137 | cifar10-135,24504,cifar10,4,512 138 | ncf-136,24663,ncf,1,256 139 | yolov3-137,24663,yolov3,4,32 140 | cifar10-138,24677,cifar10,1,128 141 | ncf-139,24832,ncf,1,256 142 | cifar10-140,25116,cifar10,1,128 143 | ncf-141,25473,ncf,1,256 144 | ncf-142,25530,ncf,1,256 145 | cifar10-143,25598,cifar10,1,128 146 | ncf-144,25626,ncf,1,256 147 | ncf-145,25675,ncf,1,256 148 | cifar10-146,25749,cifar10,4,512 149 | ncf-147,25800,ncf,1,256 150 | cifar10-148,25857,cifar10,1,128 151 | deepspeech2-149,25884,deepspeech2,1,20 152 | imagenet-150,25903,imagenet,8,1600 153 | deepspeech2-151,26206,deepspeech2,1,20 154 | cifar10-152,26269,cifar10,1,128 155 | cifar10-153,26375,cifar10,8,1024 156 | bert-154,26403,bert,1,12 157 | ncf-155,26617,ncf,1,256 158 | cifar10-156,27359,cifar10,1,128 159 | ncf-157,27715,ncf,1,256 160 | cifar10-158,28671,cifar10,1,128 161 | ncf-159,28775,ncf,1,256 162 | -------------------------------------------------------------------------------- /evals/ray_tune/workloads/workload-8.csv: -------------------------------------------------------------------------------- 1 | name,time,application,num_replicas,batch_size 2 | ncf-0,35,ncf,1,256 3 | yolov3-1,520,yolov3,1,8 4 | ncf-2,1198,ncf,1,256 5 | bert-3,1270,bert,1,12 6 | deepspeech2-4,1303,deepspeech2,1,20 7 | ncf-5,1497,ncf,1,256 8 | cifar10-6,1519,cifar10,1,128 9 | ncf-7,1619,ncf,1,256 10 | ncf-8,1858,ncf,1,256 11 | ncf-9,2017,ncf,1,256 12 | cifar10-10,2254,cifar10,1,128 13 | ncf-11,2749,ncf,1,256 14 | cifar10-12,2911,cifar10,1,128 15 | ncf-13,3134,ncf,1,256 16 | cifar10-14,3208,cifar10,1,128 17 | ncf-15,3421,ncf,1,256 18 | ncf-16,3565,ncf,1,256 19 | cifar10-17,3732,cifar10,1,128 20 | cifar10-18,4316,cifar10,1,128 21 | cifar10-19,4692,cifar10,1,128 22 | cifar10-20,4702,cifar10,1,128 23 | ncf-21,5256,ncf,1,256 24 | ncf-22,5430,ncf,1,256 25 | cifar10-23,5500,cifar10,1,128 26 | ncf-24,5611,ncf,1,256 27 | cifar10-25,5707,cifar10,1,128 28 | imagenet-26,5774,imagenet,16,3200 29 | cifar10-27,5801,cifar10,1,128 30 | ncf-28,5854,ncf,1,256 31 | cifar10-29,5884,cifar10,1,128 32 | ncf-30,7110,ncf,1,256 33 | cifar10-31,7347,cifar10,1,128 34 | yolov3-32,7562,yolov3,2,16 35 | ncf-33,7626,ncf,1,256 36 | deepspeech2-34,7858,deepspeech2,4,80 37 | ncf-35,7906,ncf,1,256 38 | yolov3-36,8037,yolov3,2,16 39 | cifar10-37,8313,cifar10,1,128 40 | ncf-38,9268,ncf,1,256 41 | ncf-39,9311,ncf,1,256 42 | yolov3-40,9391,yolov3,1,8 43 | ncf-41,9513,ncf,2,512 44 | ncf-42,9582,ncf,1,256 45 | ncf-43,9612,ncf,1,256 46 | ncf-44,9715,ncf,1,256 47 | cifar10-45,10317,cifar10,1,128 48 | cifar10-46,10392,cifar10,1,128 49 | cifar10-47,10660,cifar10,1,128 50 | cifar10-48,10737,cifar10,1,128 51 | cifar10-49,10859,cifar10,1,128 52 | ncf-50,11060,ncf,1,256 53 | cifar10-51,11782,cifar10,1,128 54 | deepspeech2-52,12205,deepspeech2,1,20 55 | ncf-53,12379,ncf,1,256 56 | cifar10-54,12421,cifar10,1,128 57 | ncf-55,12712,ncf,1,256 58 | ncf-56,13106,ncf,1,256 59 | ncf-57,13457,ncf,1,256 60 | ncf-58,13554,ncf,1,256 61 | cifar10-59,13734,cifar10,1,128 62 | ncf-60,13797,ncf,1,256 63 | cifar10-61,13938,cifar10,1,128 64 | yolov3-62,14090,yolov3,1,8 65 | ncf-63,14186,ncf,1,256 66 | cifar10-64,14205,cifar10,1,128 67 | ncf-65,14245,ncf,1,256 68 | bert-66,14499,bert,1,12 69 | cifar10-67,14623,cifar10,1,128 70 | cifar10-68,14715,cifar10,1,128 71 | deepspeech2-69,14737,deepspeech2,1,20 72 | ncf-70,14871,ncf,1,256 73 | ncf-71,14932,ncf,1,256 74 | deepspeech2-72,14937,deepspeech2,1,20 75 | deepspeech2-73,14992,deepspeech2,1,20 76 | cifar10-74,15132,cifar10,1,128 77 | yolov3-75,15245,yolov3,1,8 78 | cifar10-76,15340,cifar10,1,128 79 | ncf-77,15735,ncf,1,256 80 | cifar10-78,15779,cifar10,1,128 81 | cifar10-79,15835,cifar10,1,128 82 | bert-80,15863,bert,1,12 83 | cifar10-81,16026,cifar10,1,128 84 | ncf-82,16067,ncf,1,256 85 | bert-83,16077,bert,1,12 86 | bert-84,16142,bert,1,12 87 | deepspeech2-85,16332,deepspeech2,1,20 88 | cifar10-86,16381,cifar10,1,128 89 | cifar10-87,16588,cifar10,1,128 90 | ncf-88,16610,ncf,1,256 91 | bert-89,17011,bert,1,12 92 | bert-90,17109,bert,1,12 93 | deepspeech2-91,17238,deepspeech2,1,20 94 | deepspeech2-92,17277,deepspeech2,1,20 95 | cifar10-93,17414,cifar10,1,128 96 | bert-94,17521,bert,1,12 97 | ncf-95,17749,ncf,1,256 98 | cifar10-96,17866,cifar10,1,128 99 | bert-97,18060,bert,1,12 100 | cifar10-98,18115,cifar10,1,128 101 | cifar10-99,18139,cifar10,1,128 102 | cifar10-100,18211,cifar10,1,128 103 | ncf-101,18244,ncf,1,256 104 | ncf-102,18284,ncf,1,256 105 | yolov3-103,18385,yolov3,1,8 106 | cifar10-104,18415,cifar10,1,128 107 | cifar10-105,18549,cifar10,1,128 108 | cifar10-106,18711,cifar10,1,128 109 | cifar10-107,18744,cifar10,1,128 110 | ncf-108,18800,ncf,1,256 111 | deepspeech2-109,18851,deepspeech2,1,20 112 | ncf-110,18853,ncf,1,256 113 | bert-111,18925,bert,1,12 114 | ncf-112,19084,ncf,1,256 115 | cifar10-113,19141,cifar10,1,128 116 | cifar10-114,19260,cifar10,1,128 117 | ncf-115,19331,ncf,1,256 118 | cifar10-116,19492,cifar10,1,128 119 | deepspeech2-117,19595,deepspeech2,1,20 120 | ncf-118,19915,ncf,1,256 121 | cifar10-119,20057,cifar10,1,128 122 | cifar10-120,20362,cifar10,1,128 123 | cifar10-121,20530,cifar10,1,128 124 | ncf-122,20531,ncf,1,256 125 | ncf-123,20531,ncf,1,256 126 | cifar10-124,20548,cifar10,1,128 127 | cifar10-125,20673,cifar10,1,128 128 | cifar10-126,20694,cifar10,1,128 129 | cifar10-127,20742,cifar10,1,128 130 | cifar10-128,20884,cifar10,1,128 131 | ncf-129,20937,ncf,1,256 132 | cifar10-130,21132,cifar10,1,128 133 | ncf-131,21335,ncf,1,256 134 | ncf-132,21487,ncf,1,256 135 | yolov3-133,21615,yolov3,4,32 136 | ncf-134,21680,ncf,1,256 137 | ncf-135,21732,ncf,1,256 138 | cifar10-136,21813,cifar10,1,128 139 | ncf-137,21894,ncf,1,256 140 | cifar10-138,22042,cifar10,1,128 141 | imagenet-139,22196,imagenet,8,1600 142 | bert-140,22968,bert,1,12 143 | cifar10-141,23480,cifar10,1,128 144 | cifar10-142,23609,cifar10,1,128 145 | cifar10-143,23641,cifar10,1,128 146 | deepspeech2-144,24063,deepspeech2,1,20 147 | cifar10-145,24115,cifar10,1,128 148 | deepspeech2-146,24140,deepspeech2,1,20 149 | deepspeech2-147,24263,deepspeech2,1,20 150 | cifar10-148,24918,cifar10,1,128 151 | cifar10-149,25240,cifar10,1,128 152 | ncf-150,25553,ncf,1,256 153 | ncf-151,25640,ncf,1,256 154 | cifar10-152,25661,cifar10,1,128 155 | cifar10-153,25690,cifar10,1,128 156 | deepspeech2-154,26246,deepspeech2,1,20 157 | cifar10-155,26565,cifar10,1,128 158 | bert-156,27082,bert,1,12 159 | imagenet-157,27176,imagenet,32,6400 160 | deepspeech2-158,27744,deepspeech2,1,20 161 | deepspeech2-159,28253,deepspeech2,1,20 162 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SymbioticLab/ModelKeeper/9212bc79bfc4a271e6120c410bb9fb89cb151486/examples/__init__.py -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | isPackageNotInstalled() { 3 | $1 --version &> /dev/null 4 | if [ $? -eq 0 ]; then 5 | echo "$1: Already installed" 6 | else 7 | install_dir=$HOME/anaconda3 8 | wget https://repo.anaconda.com/archive/Anaconda3-2020.11-Linux-x86_64.sh 9 | bash Anaconda3-2020.11-Linux-x86_64.sh -b -p $install_dir 10 | export PATH=$install_dir/bin:$PATH 11 | fi 12 | } 13 | 14 | # un-comment to install anaconda 15 | isPackageNotInstalled conda 16 | 17 | 18 | # create conda env 19 | conda init bash 20 | . ~/.bashrc 21 | conda env create -f environment.yml # Install dependencies 22 | conda activate modelkeeper 23 | 24 | 25 | if [ "$1" == "--cuda" ]; then 26 | wget https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda_10.2.89_440.33.01_linux.run 27 | sudo apt-get purge nvidia-* -y 28 | sudo sh -c "echo 'blacklist nouveau\noptions nouveau modeset=0' > /etc/modprobe.d/blacklist-nouveau.conf" 29 | sudo update-initramfs -u 30 | sudo sh cuda_10.2.89_440.33.01_linux.run --override --driver --toolkit --samples --silent 31 | export PATH=$PATH:/usr/local/cuda-10.2/ 32 | conda install cudatoolkit=10.2 -y 33 | fi -------------------------------------------------------------------------------- /modelkeeper/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SymbioticLab/ModelKeeper/9212bc79bfc4a271e6120c410bb9fb89cb151486/modelkeeper/__init__.py -------------------------------------------------------------------------------- /modelkeeper/aed_matcher.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | 4 | class AEDMatcher(object): 5 | 6 | def __init__(self, path, name): 7 | self.name = name 8 | self.mappings = self.load_mapping(path) 9 | 10 | def load_mapping(self, path): 11 | with open(path, 'rb') as fin: 12 | stores = pickle.load(fin) 13 | 14 | return stores.get(self.name, None) 15 | 16 | def query_child(self, child): 17 | child = child.split('.onnx')[0] 18 | if self.mappings is None or child not in self.mappings: 19 | return -float('inf'), [] 20 | 21 | dist = self.mappings[child]['GED'] 22 | mappings = [(a, b) for a, b in zip(self.mappings[child][ 23 | 'Path'][0], self.mappings[child]['Path'][1])] 24 | return -dist, mappings 25 | -------------------------------------------------------------------------------- /modelkeeper/analyze_zoo.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import ctypes 3 | import functools 4 | import gc 5 | import heapq 6 | import json 7 | import logging 8 | import multiprocessing 9 | import os 10 | import sys 11 | import time 12 | from multiprocessing import Manager 13 | 14 | import networkx as nx 15 | import numpy 16 | import onnx 17 | import torch 18 | from matcher import ModelKeeper 19 | from onnx import numpy_helper 20 | 21 | # Call C backend 22 | clib_matcher = ctypes.cdll.LoadLibrary('./backend/bin/matcher.so') 23 | clib_matcher.get_matching_score.restype = ctypes.c_char_p 24 | 25 | sys.setrecursionlimit(10000) 26 | logging.basicConfig(filename='logging', level=logging.INFO) 27 | 28 | 29 | def get_mapped(file): 30 | black_list = set() 31 | with open(file) as fin: 32 | lines = fin.readlines() 33 | for line in lines: 34 | if 'Find best mappings' in line: 35 | model_name = line.split('/')[3].split()[0] 36 | black_list.add(model_name) 37 | return black_list 38 | 39 | 40 | def analyze_zoo(): 41 | from config import modelkeeper_config 42 | 43 | start_time = time.time() 44 | zoo_path = "/users/fanlai/experiment/keeper/model_zoo" # '/mnt/zoo' 45 | 46 | modelkeeper_config.zoo_path = zoo_path 47 | mapper = ModelKeeper(modelkeeper_config) 48 | 49 | # ["/users/fanlai/model_zoo/ShuffleNetV2_net_size_2_@0.7142.onnx"] 50 | models = [x for x in os.listdir(zoo_path) if '.onnx' in x] 51 | 52 | #black_list = get_mapped('/users/fanlai/torchcv_scores') 53 | #models = [x for x in os.listdir(zoo_path) if x not in black_list] 54 | # print(models) 55 | # print(len(models)) 56 | all_models = [os.path.join(zoo_path, x) for x in models] 57 | for idx, model_name in enumerate(models): 58 | try: 59 | child_onnx_path = os.path.join(zoo_path, model_name) 60 | # child, child_onnx = mapper.load_model_meta(child_onnx_path) 61 | # child.graph['model_id'] = str(idx) 62 | 63 | black_list = set(all_models[:]) 64 | black_list.discard(child_onnx_path) 65 | black_list = set() 66 | # find the best mapping from the zoo 67 | weights, meta_data = mapper.map_for_onnx( 68 | child_onnx_path, black_list, model_name) 69 | print(meta_data) 70 | gc.collect() 71 | except Exception as e: 72 | print(e) 73 | 74 | print("==============") 75 | print(f"total duration is {(time.time()-start_time)/1000.0} sec") 76 | 77 | 78 | def analyze_zoo_folder(): 79 | from config import modelkeeper_config 80 | 81 | start_time = time.time() 82 | zoo_path = '/users/fanlai/experiment/data/my_zoo' 83 | 84 | modelkeeper_config.zoo_path = zoo_path 85 | mapper = ModelKeeper(modelkeeper_config) 86 | 87 | model_folders = [ 88 | x for x in os.listdir(zoo_path) if os.path.isdir( 89 | os.path.join( 90 | zoo_path, x))] 91 | models = [] # model_folders#["/users/fanlai/experiment/data/my_zoo/funnel_transformer_small/funnel_transformer_small.onnx"] 92 | #model_folders = models 93 | for idx, model_path in enumerate(model_folders): 94 | model_name = [ 95 | x for x in os.listdir( 96 | os.path.join( 97 | zoo_path, 98 | model_path)) if '.onnx' in x] 99 | if len(model_name) == 1: 100 | models.append(os.path.join(zoo_path, model_path, model_name[0])) 101 | mapper.add_to_zoo(models[-1]) 102 | print(f"===Add {models[-1]} to zoo...") 103 | 104 | # models = os.listdir(zoo_path) 105 | for idx, model_name in enumerate(models): 106 | child_onnx_path = model_name # os.path.join(zoo_path, model_name) 107 | # child, child_onnx = mapper.load_model_meta(child_onnx_path) 108 | # child.graph['model_id'] = str(idx) 109 | black_list = set(models[:]) 110 | black_list.discard(child_onnx_path) 111 | # find the best mapping from the zoo 112 | weights, meta_data = mapper.map_for_onnx( 113 | child_onnx_path, black_list, model_name.split('/')[-1]) 114 | print(meta_data) 115 | gc.collect() 116 | 117 | print("==============") 118 | print(f"total duration is {(time.time()-start_time)/1000.0} sec") 119 | 120 | 121 | # analyze_zoo_folder() 122 | analyze_zoo() 123 | -------------------------------------------------------------------------------- /modelkeeper/backend/Make: -------------------------------------------------------------------------------- 1 | gcc matcher.cpp -fPIC -shared -o matcher.so -std=c++11 2 | gcc matcher.cpp -fPIC -shared -o2 matcher.so -std=c++11 3 | mv matcher.so ./bin/ 4 | -------------------------------------------------------------------------------- /modelkeeper/backend/bin/matcher.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SymbioticLab/ModelKeeper/9212bc79bfc4a271e6120c410bb9fb89cb151486/modelkeeper/backend/bin/matcher.so -------------------------------------------------------------------------------- /modelkeeper/backend/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "Network.hpp" 6 | 7 | using namespace std; 8 | 9 | int main(int args, char * argv[]) 10 | { 11 | int n, m; 12 | 13 | if(args < 2){ 14 | cout << "USAGE: " << argv[0] << " " << "test_file" << endl; 15 | return 1; 16 | } 17 | ifstream fin(argv[1]); 18 | while(fin >> n >> m){ 19 | Network network(n); 20 | while(m--){ 21 | int x, y; 22 | double dist; 23 | fin >> x >> y >> dist; 24 | network.addEdge(x, y, -dist); 25 | } 26 | cout << network.simulateDistribution(0) << endl; 27 | cout << network.zhuliu(0) << endl; 28 | } 29 | cout << "Done ..."; 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /modelkeeper/backend/matcher.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __MATCHER__ 2 | #define __MATCHER__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | struct node_pair { 11 | double val; 12 | int parentidx; 13 | int childidx; 14 | int opt; 15 | 16 | node_pair(double v, int p, int c, int o) :val(v), parentidx(p), childidx(c), opt(o) {} 17 | }; 18 | 19 | struct Node 20 | { 21 | int idx; 22 | string type; 23 | vector shape; 24 | vector parents; 25 | }; 26 | 27 | class Matcher{ 28 | 29 | private: 30 | int len_parent; 31 | int len_child; 32 | bool dump_mapping; 33 | 34 | double **scores; 35 | map > backParentIdx; 36 | map > backChildIdx; 37 | 38 | vector parent_nodes; 39 | vector child_nodes; 40 | 41 | vector parent_parameters; 42 | vector child_parameters; 43 | 44 | public: 45 | 46 | char* gen_mapping(string json_str, bool dump_mapping); 47 | 48 | // merge k sorted list 49 | inline double merge_branch_mapping(vector > lists, vector & parent_list, vector & child_list); 50 | inline double cal_score(Node parent_node, Node child_node); 51 | 52 | void read_io(string json_str); 53 | void align_child_parent(); 54 | void init_score(); 55 | 56 | string encode_hash(int i, int j); 57 | }; 58 | 59 | #endif 60 | 61 | -------------------------------------------------------------------------------- /modelkeeper/backend/test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "json.hpp" 3 | #include 4 | 5 | using json=nlohmann::json; 6 | using namespace std; 7 | 8 | int main(){ 9 | string json_str = "{'nikhil': 1, 'akash': 5, 'manjeet': 10, 'akshat': 15}"; 10 | json second = json::parse(json_str); 11 | 12 | cout << second["nikhil"]; 13 | } 14 | 15 | -------------------------------------------------------------------------------- /modelkeeper/clientservice.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import pickle 4 | import shutil 5 | import time 6 | 7 | from paramiko import AutoAddPolicy, SSHClient 8 | from scp import SCPClient 9 | 10 | 11 | class ModelKeeperClient(object): 12 | 13 | """A very simple client service for ModelKeeper""" 14 | 15 | def __init__(self, args): 16 | 17 | self.zoo_server = args.zoo_server 18 | 19 | # TODO: These paths should be informed after querying the zoo host 20 | self.zoo_path = args.zoo_path 21 | self.zoo_query_path = args.zoo_query_path 22 | self.zoo_ans_path = args.zoo_ans_path 23 | self.zoo_register_path = args.zoo_register_path 24 | 25 | self.execution_path = args.execution_path 26 | 27 | self.create_runtime_store() 28 | self.connection = self.create_connection() 29 | self.connection_manager = SCPClient(self.connection.get_transport()) 30 | 31 | def create_runtime_store(self): 32 | os.makedirs(self.execution_path, exist_ok=True) 33 | 34 | def create_connection(self): 35 | connection = SSHClient() 36 | connection.load_system_host_keys() 37 | connection.set_missing_host_key_policy(AutoAddPolicy()) 38 | connection.connect(self.zoo_server) 39 | 40 | return connection 41 | 42 | def query_for_model(self, model_path, timeout=1800): 43 | """ 44 | @ model: assume the model is in onnx format 45 | """ 46 | model_name = model_path.split('/')[-1].replace('.onnx', '') 47 | ans_model_name = model_name + '.out' 48 | local_path = os.path.join(self.execution_path, ans_model_name) 49 | 50 | # 1. Upload the model to the modelkeeper pending queue 51 | self.register_model_to_zoo( 52 | model_path, os.path.join( 53 | self.zoo_query_path, model_name)) 54 | 55 | # 2. Ping the host for results 56 | # NOTE: ideally we should use gRpc 57 | waiting_duration, heartbeat = 0, 2 58 | os.system(f'echo > {local_path}') 59 | 60 | while waiting_duration < timeout: 61 | success = self.pull_model_from_zoo(os.path.join( 62 | self.zoo_ans_path, ans_model_name), local_path) 63 | if not success: 64 | time.sleep(heartbeat) 65 | waiting_duration += heartbeat 66 | else: 67 | break 68 | 69 | # 3. Remove result file from remote 70 | weights = meta = None 71 | 72 | if waiting_duration < timeout: 73 | # 3. Load model weights and return weights 74 | with open(local_path, 'rb') as fin: 75 | weights = pickle.load(fin) 76 | # {"matching_score", "parent_name", "parent_acc"} 77 | meta = pickle.load(fin) 78 | os.remove(local_path) 79 | else: 80 | logging.info(f"Querying the zoo server times out {timeout} sec") 81 | 82 | return weights, meta 83 | 84 | def register_model_to_zoo(self, model_path, zoo_path=None, accuracy=100): 85 | """ 86 | @ model: upload the model to the ModelKeeper zoo 87 | """ 88 | 89 | if zoo_path is None: 90 | zoo_path = os.path.join( 91 | self.zoo_register_path, model_path.split('/')[-1].replace('.onnx', '')) 92 | zoo_path = zoo_path + f'@{accuracy}' 93 | try: 94 | self.connection_manager.put(model_path, zoo_path) 95 | _ = self.connection.exec_command( 96 | f"mv {zoo_path} {zoo_path+'.onnx'}") 97 | logging.info( 98 | f"Successfully upload model {model_path} to the zoo server") 99 | except Exception as e: 100 | logging.warning( 101 | f"Failed to connect to the zoo host {self.zoo_server}") 102 | 103 | def pull_model_from_zoo(self, model_path, local_path): 104 | """ 105 | @ return the warmed weights of model 106 | """ 107 | success = True 108 | try: 109 | self.connection_manager.get(model_path, local_path) 110 | stdin, stdout, stderr = self.connection.exec_command( 111 | f"rm {model_path}", timeout=30) 112 | stdout.channel.recv_exit_status() 113 | except Exception as e: 114 | success = False 115 | 116 | return success 117 | 118 | def stop(self): 119 | self.connection_manager.close() 120 | self.connection.close() 121 | # shutil.rmtree(self.execution_path) 122 | -------------------------------------------------------------------------------- /modelkeeper/config.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | parser = argparse.ArgumentParser() 5 | 6 | # Path configuration 7 | parser.add_argument( 8 | '--zoo_path', 9 | type=str, 10 | default=f'{os.environ["HOME"]}/experiment/keeper/model_zoo/', 11 | help='Path of the model zoo') 12 | parser.add_argument( 13 | '--execution_path', 14 | type=str, 15 | default=f'{os.environ["HOME"]}/experiment/keeper/jobs/', 16 | help='Runtime data store of the framework') 17 | parser.add_argument( 18 | '--zoo_query_path', 19 | type=str, 20 | default=f'{os.environ["HOME"]}/experiment/keeper/query_zoo/', 21 | help='Runtime data store of querying models') 22 | parser.add_argument( 23 | '--zoo_ans_path', 24 | type=str, 25 | default=f'{os.environ["HOME"]}/experiment/keeper/ans_zoo/', 26 | help='Runtime data store of querying results') 27 | parser.add_argument( 28 | '--zoo_register_path', 29 | type=str, 30 | default=f'{os.environ["HOME"]}/experiment/keeper/register_zoo/', 31 | help='Runtime data store of new pending models') 32 | 33 | # Framework configuration 34 | parser.add_argument( 35 | '--num_of_processes', 36 | type=int, 37 | default=20, 38 | help='Number of threads used for mapping (~CPU cores)') 39 | parser.add_argument( 40 | '--zoo_server', 41 | type=str, 42 | default='10.0.0.1', 43 | help='Server of ModelKeeper') 44 | parser.add_argument( 45 | '--user_name', 46 | type=str, 47 | default='', 48 | help='User name in accessing the ModelKeeper server') 49 | 50 | # Parameters 51 | parser.add_argument( 52 | '--neigh_threshold', 53 | type=float, 54 | default=0.1, 55 | help='Threshold of evicting neighbors a if score(a,b)