├── .gitignore
├── README.md
├── environment.yml
├── evals
    ├── nni
    │   ├── README.md
    │   ├── base_model.onnx
    │   ├── base_model_weight
    │   ├── env.yml
    │   ├── keeper_nasbench201_graph.py
    │   ├── keeper_offline.py
    │   └── parser.py
    └── ray_tune
    │   ├── __init_.py
    │   ├── keeper_offline.py
    │   ├── models
    │       ├── cifarmodels
    │       │   ├── __init__.py
    │       │   ├── densenet.py
    │       │   ├── dla.py
    │       │   ├── dla_simple.py
    │       │   ├── dpn.py
    │       │   ├── efficientnet.py
    │       │   ├── mobilenet.py
    │       │   ├── mobilenetv2.py
    │       │   ├── mobilenetv3.py
    │       │   ├── model_provider.py
    │       │   ├── pnasnet.py
    │       │   ├── preact_resnet.py
    │       │   ├── preactresnet.py
    │       │   ├── resnet.py
    │       │   ├── resnext.py
    │       │   ├── senet.py
    │       │   ├── shufflenet.py
    │       │   ├── shufflenetv2.py
    │       │   ├── stochasticdepth.py
    │       │   └── vgg.py
    │       ├── nasbench
    │       │   ├── __init__.py
    │       │   ├── cell_infers
    │       │   │   ├── __init__.py
    │       │   │   ├── cells.py
    │       │   │   ├── nasnet_cifar.py
    │       │   │   └── tiny_network.py
    │       │   ├── cell_operations.py
    │       │   ├── configure_utils.py
    │       │   └── genotypes.py
    │       ├── torchcv
    │       │   ├── __init__.py
    │       │   ├── model_provider.py
    │       │   └── models
    │       │   │   ├── __init__.py
    │       │   │   ├── airnet.py
    │       │   │   ├── airnext.py
    │       │   │   ├── alexnet.py
    │       │   │   ├── alphapose_coco.py
    │       │   │   ├── bagnet.py
    │       │   │   ├── bamresnet.py
    │       │   │   ├── bisenet.py
    │       │   │   ├── bninception.py
    │       │   │   ├── cbamresnet.py
    │       │   │   ├── centernet.py
    │       │   │   ├── cgnet.py
    │       │   │   ├── channelnet.py
    │       │   │   ├── common.py
    │       │   │   ├── condensenet.py
    │       │   │   ├── contextnet.py
    │       │   │   ├── dabnet.py
    │       │   │   ├── danet.py
    │       │   │   ├── darknet.py
    │       │   │   ├── darknet53.py
    │       │   │   ├── darts.py
    │       │   │   ├── deeplabv3.py
    │       │   │   ├── densenet.py
    │       │   │   ├── densenet_cifar.py
    │       │   │   ├── diapreresnet.py
    │       │   │   ├── diapreresnet_cifar.py
    │       │   │   ├── diaresnet.py
    │       │   │   ├── diaresnet_cifar.py
    │       │   │   ├── dicenet.py
    │       │   │   ├── diracnetv2.py
    │       │   │   ├── dla.py
    │       │   │   ├── dpn.py
    │       │   │   ├── drn.py
    │       │   │   ├── edanet.py
    │       │   │   ├── efficientnet.py
    │       │   │   ├── efficientnetedge.py
    │       │   │   ├── enet.py
    │       │   │   ├── erfnet.py
    │       │   │   ├── esnet.py
    │       │   │   ├── espcnet.py
    │       │   │   ├── espnetv2.py
    │       │   │   ├── fastscnn.py
    │       │   │   ├── fastseresnet.py
    │       │   │   ├── fbnet.py
    │       │   │   ├── fcn8sd.py
    │       │   │   ├── fdmobilenet.py
    │       │   │   ├── fishnet.py
    │       │   │   ├── fpenet.py
    │       │   │   ├── fractalnet_cifar.py
    │       │   │   ├── ghostnet.py
    │       │   │   ├── hardnet.py
    │       │   │   ├── hrnet.py
    │       │   │   ├── ibnbresnet.py
    │       │   │   ├── ibndensenet.py
    │       │   │   ├── ibnresnet.py
    │       │   │   ├── ibnresnext.py
    │       │   │   ├── ibppose_coco.py
    │       │   │   ├── icnet.py
    │       │   │   ├── igcv3.py
    │       │   │   ├── inceptionresnetv1.py
    │       │   │   ├── inceptionresnetv2.py
    │       │   │   ├── inceptionv3.py
    │       │   │   ├── inceptionv4.py
    │       │   │   ├── irevnet.py
    │       │   │   ├── isqrtcovresnet.py
    │       │   │   ├── jasper.py
    │       │   │   ├── jasperdr.py
    │       │   │   ├── lednet.py
    │       │   │   ├── lffd.py
    │       │   │   ├── linknet.py
    │       │   │   ├── lwopenpose_cmupan.py
    │       │   │   ├── menet.py
    │       │   │   ├── mixnet.py
    │       │   │   ├── mnasnet.py
    │       │   │   ├── mobilenet.py
    │       │   │   ├── mobilenet_cub.py
    │       │   │   ├── mobilenetb.py
    │       │   │   ├── mobilenetv2.py
    │       │   │   ├── mobilenetv3.py
    │       │   │   ├── model_store.py
    │       │   │   ├── msdnet.py
    │       │   │   ├── msdnet_cifar10.py
    │       │   │   ├── nasnet.py
    │       │   │   ├── nin_cifar.py
    │       │   │   ├── ntsnet_cub.py
    │       │   │   ├── nvpattexp.py
    │       │   │   ├── octresnet.py
    │       │   │   ├── others
    │       │   │       ├── __init__.py
    │       │   │       ├── _espnet.py
    │       │   │       ├── _inceptionresnetv1_.py
    │       │   │       ├── oth_espnet.py
    │       │   │       ├── oth_inception_resnet_v1.py
    │       │   │       ├── oth_quartznet.py
    │       │   │       └── oth_vit.py
    │       │   │   ├── peleenet.py
    │       │   │   ├── pfpcnet.py
    │       │   │   ├── pnasnet.py
    │       │   │   ├── polynet.py
    │       │   │   ├── preresnet.py
    │       │   │   ├── preresnet_cifar.py
    │       │   │   ├── prnet.py
    │       │   │   ├── proxylessnas.py
    │       │   │   ├── proxylessnas_cub.py
    │       │   │   ├── pspnet.py
    │       │   │   ├── pyramidnet.py
    │       │   │   ├── pyramidnet_cifar.py
    │       │   │   ├── quartznet.py
    │       │   │   ├── regnet.py
    │       │   │   ├── resattnet.py
    │       │   │   ├── resdropresnet_cifar.py
    │       │   │   ├── resnesta.py
    │       │   │   ├── resnet.py
    │       │   │   ├── resnet_cifar.py
    │       │   │   ├── resnet_cub.py
    │       │   │   ├── resneta.py
    │       │   │   ├── resnetd.py
    │       │   │   ├── resnext.py
    │       │   │   ├── resnext_cifar.py
    │       │   │   ├── revnet.py
    │       │   │   ├── rir_cifar.py
    │       │   │   ├── ror_cifar.py
    │       │   │   ├── scnet.py
    │       │   │   ├── segnet.py
    │       │   │   ├── selecsls.py
    │       │   │   ├── senet.py
    │       │   │   ├── sepreresnet.py
    │       │   │   ├── sepreresnet_cifar.py
    │       │   │   ├── seresnet.py
    │       │   │   ├── seresnet_cifar.py
    │       │   │   ├── seresnet_cub.py
    │       │   │   ├── seresnext.py
    │       │   │   ├── shakedropresnet_cifar.py
    │       │   │   ├── shakeshakeresnet_cifar.py
    │       │   │   ├── sharesnet.py
    │       │   │   ├── shufflenet.py
    │       │   │   ├── shufflenetv2.py
    │       │   │   ├── shufflenetv2b.py
    │       │   │   ├── simplepose_coco.py
    │       │   │   ├── simpleposemobile_coco.py
    │       │   │   ├── sinet.py
    │       │   │   ├── sknet.py
    │       │   │   ├── sparsenet.py
    │       │   │   ├── spnasnet.py
    │       │   │   ├── sqnet.py
    │       │   │   ├── squeezenet.py
    │       │   │   ├── squeezenext.py
    │       │   │   ├── superpointnet.py
    │       │   │   ├── tresnet.py
    │       │   │   ├── unet.py
    │       │   │   ├── vgg.py
    │       │   │   ├── visemenet.py
    │       │   │   ├── voca.py
    │       │   │   ├── vovnet.py
    │       │   │   ├── wrn.py
    │       │   │   ├── wrn1bit_cifar.py
    │       │   │   ├── wrn_cifar.py
    │       │   │   ├── xception.py
    │       │   │   ├── xdensenet.py
    │       │   │   ├── xdensenet_cifar.py
    │       │   │   └── zfnet.py
    │       └── vgg.py
    │   ├── onlinescheduler.py
    │   ├── ray_tuner.py
    │   ├── setup
    │       ├── cluster_manager.py
    │       └── conf.yml
    │   ├── thirdparty
    │       ├── __init__.py
    │       ├── calculate_ged.py
    │       ├── custom_rnn.py
    │       ├── data.py
    │       ├── embed_regularize.py
    │       ├── locked_dropout.py
    │       ├── main_one_model_train.py
    │       ├── model.py
    │       ├── multilinear.py
    │       ├── nas_environment.py
    │       ├── search_space.py
    │       ├── splitcross.py
    │       ├── train.py
    │       ├── utils.py
    │       └── weight_drop.py
    │   └── workloads
    │       ├── imgclsmob-large.csv
    │       ├── nlp_list.csv
    │       ├── nlp_nwp.csv
    │       ├── torchcv_list
    │       ├── torchcv_list.csv
    │       ├── workload-1.csv
    │       ├── workload-2.csv
    │       ├── workload-3.csv
    │       ├── workload-4.csv
    │       ├── workload-5.csv
    │       ├── workload-6.csv
    │       ├── workload-7.csv
    │       └── workload-8.csv
├── examples
    ├── __init__.py
    ├── train_cifar10.py
    └── train_mnist.py
├── install.sh
├── modelkeeper
    ├── __init__.py
    ├── aed_matcher.py
    ├── analyze_zoo.py
    ├── backend
    │   ├── Make
    │   ├── bin
    │   │   └── matcher.so
    │   ├── json.hpp
    │   ├── main.cpp
    │   ├── matcher.cpp
    │   ├── matcher.hpp
    │   ├── test.cpp
    │   └── test.py
    ├── clientservice.py
    ├── clustering.py
    ├── config.py
    ├── dryrun_clustering.py
    ├── evictor.py
    ├── generate_zoo.py
    ├── gml_export.py
    ├── keeper_start.py
    ├── mapper.py
    ├── matcher.py
    ├── nettransformer.py
    └── test.py
├── setup.py
└── tests
    ├── test_map.py
    └── tests.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | plot/
  4 | *.py[cod]
  5 | *$py.class
  6 | *.swp
  7 | # C extensions
  8 | *.png
  9 | data/
 10 | /build/
 11 | /dist/
 12 | /modelkeeper_backend.chu_liu_edmonds.egg-info/
 13 | 
 14 | # Distribution / packaging
 15 | .Python
 16 | __pycache__/
 17 | build/
 18 | develop-eggs/
 19 | dist/
 20 | downloads/
 21 | eggs/
 22 | .eggs/
 23 | lib/
 24 | lib64/
 25 | parts/
 26 | sdist/
 27 | var/
 28 | wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | .hypothesis/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | .static_storage/
 62 | .media/
 63 | local_settings.py
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # pyenv
 82 | .python-version
 83 | 
 84 | # celery beat schedule file
 85 | celerybeat-schedule
 86 | 
 87 | # SageMath parsed files
 88 | *.sage.py
 89 | 
 90 | # Environments
 91 | .env
 92 | .venv
 93 | env/
 94 | venv/
 95 | ENV/
 96 | env.bak/
 97 | venv.bak/
 98 | 
 99 | # Spyder project settings
100 | .spyderproject
101 | .spyproject
102 | 
103 | # Rope project settings
104 | .ropeproject
105 | 
106 | # mkdocs documentation
107 | /site
108 | 
109 | # mypy
110 | .mypy_cache/
111 | 
112 | #log
113 | *.e
114 | *.o
115 | _generated_model/
116 | lightning_logs/
117 | .DS_Store


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ModelKeeper
 2 | 
 3 | This repository contains the evaluation artifacts of our NSDI '23 paper "[ModelKeeper: Accelerating DNN Training via Automated Training Warmup](https://symbioticlab.org/publications/files/modelkeeper:nsdi23/modelkeeper-nsdi23.pdf)".
 4 | 
 5 | **ModelKeeper is being merged as part of [FedScale](https://github.com/SymbioticLab/FedScale) and is actively maintained there. Please try it!**
 6 | 
 7 | # Overview
 8 | 
 9 | * [Getting Started](#getting-started)
10 | * [Run Experiments](#run-experiments)
11 | * [Repo Structure](#repo-structure)
12 | * [Contact](#contact)
13 | 
14 | # Getting Started
15 | 
16 | Our ```install.sh``` will install the following automatically:
17 | 
18 | * Anaconda Package Manager
19 | * CUDA 10.2
20 | 
21 | Note: if you prefer different versions of conda and CUDA, please check  comments in `install.sh` for details.
22 | 
23 | Run the following commands to install ModelKeeper. 
24 | 
25 | ```
26 | source install.sh 
27 | pip install -e .
28 | ```
29 | 
30 | # Run Experiments
31 | 
32 | # Repo Structure
33 | 
34 | ```
35 | Repo Root
36 | |---- modelkeeper   # Core implementation (e.g., Matcher).
37 | |---- evals         # MK support for different training backends
38 |     |---- ray_tune      # Ray experiments
39 |     |---- nni           # Retiarii experiments
40 | |---- examples      # Toy experiments of model transformation
41 | ```
42 | 
43 | # Notes
44 | please consider to cite our paper if you use the code or data in your research project.
45 | ```bibtex
46 | @inproceedings{modelkeeper-nsdi23,
47 |   title={ModelKeeper: Accelerating DNN Training via Automated Training Warmup},
48 |   author={Fan Lai and Yinwei Dai and Harsha V. Madhyastha and Mosharaf Chowdhury},
49 |   booktitle={USENIX Symposium on Networked Systems Design and Implementation (NSDI)},
50 |   year={2023}
51 | }
52 | ```
53 | 
54 | # Contact
55 | Fan Lai (fanlai@umich.edu) and Yinwei Dai (yinweid@princeton.edu).
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: modelkeeper
 2 | channels:
 3 |   - pytorch
 4 |   - conda-forge
 5 |   - anaconda
 6 |   - defaults
 7 | dependencies:
 8 |   - python=3.6
 9 |   - numba=0.49.1
10 |   - pip
11 |   - pip:
12 |     - torchvision  
13 |     - transformers
14 |     - matplotlib==3.1.3
15 |     - tensorboardX==2.1
16 |     - scipy
17 |     - PyYAML
18 |     - gdown
19 |     - ray==1.8.0
20 |     - networkx
21 |     - onnx
22 |     - paramiko
23 |     - scp
24 |     - pandas
25 |     - ray[tune]
26 |     - sentencepiece
27 |     - datasets
28 |     - ortools
29 |     - torch
30 | 


--------------------------------------------------------------------------------
/evals/nni/README.md:
--------------------------------------------------------------------------------
 1 | ## Install the dependency 
 2 | 
 3 | You can simply run `install.sh`.
 4 | 
 5 | ```
 6 | conda env create -f environment. yml
 7 | ```
 8 | 
 9 | ## Setting Up GPU Cluster
10 | 
11 | **Note:**
12 | Please assure that these paths are consistent across all nodes so that ModelKeeper can find the right path.
13 | 
14 | - ***Coordinator node***: Make sure that the coodinator (master node) has access to other worker nodes via ```ssh```. 
15 | 
16 | - ***All nodes***: Follow the same dependency setup.
17 | 
18 | ## Running the experiment
19 | ```
20 | python keeper_nasbench201_graph.py --use_keeper --user=xxx --num_nodes=xxx --num_gpu_per_nodes=xxx --max_trial_number_per_gpu=xxx --max_trial_number=xxx
21 | ```


--------------------------------------------------------------------------------
/evals/nni/base_model.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SymbioticLab/ModelKeeper/9212bc79bfc4a271e6120c410bb9fb89cb151486/evals/nni/base_model.onnx


--------------------------------------------------------------------------------
/evals/nni/base_model_weight:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SymbioticLab/ModelKeeper/9212bc79bfc4a271e6120c410bb9fb89cb151486/evals/nni/base_model_weight


--------------------------------------------------------------------------------
/evals/nni/env.yml:
--------------------------------------------------------------------------------
  1 | name: nni-mk
  2 | channels:
  3 |   - gurobi
  4 |   - pytorch
  5 |   - conda-forge
  6 |   - defaults
  7 | dependencies:
  8 |   - _libgcc_mutex=0.1
  9 |   - _openmp_mutex=4.5
 10 |   - audioread=2.1.9
 11 |   - bzip2=1.0.8
 12 |   - ca-certificates=2021.10.8
 13 |   - certifi=2021.5.30
 14 |   - cffi=1.14.6
 15 |   - cycler=0.10.0
 16 |   - ffmpeg=4.3
 17 |   - freetype=2.10.4
 18 |   - gettext=0.19.8.1
 19 |   - gmp=6.2.1
 20 |   - gnutls=3.6.13
 21 |   - gurobi=9.1.0
 22 |   - jbig=2.1
 23 |   - joblib=1.1.0
 24 |   - jpeg=9d
 25 |   - kiwisolver=1.3.1
 26 |   - lame=3.100
 27 |   - lcms2=2.12
 28 |   - ld_impl_linux-64=2.36.1
 29 |   - lerc=3.0
 30 |   - libblas=3.9.0
 31 |   - libcblas=3.9.0
 32 |   - libdeflate=1.8
 33 |   - libffi=3.4.2
 34 |   - libflac=1.3.3
 35 |   - libgcc-ng=11.2.0
 36 |   - libgfortran-ng=11.2.0
 37 |   - libgfortran5=11.2.0
 38 |   - libgomp=11.2.0
 39 |   - libiconv=1.16
 40 |   - liblapack=3.9.0
 41 |   - libllvm8=8.0.1
 42 |   - libogg=1.3.4
 43 |   - libopenblas=0.3.17
 44 |   - libopus=1.3.1
 45 |   - libpng=1.6.37
 46 |   - librosa=0.7.2
 47 |   - libsndfile=1.0.31
 48 |   - libstdcxx-ng=11.2.0
 49 |   - libtiff=4.3.0
 50 |   - libvorbis=1.3.7
 51 |   - libwebp-base=1.2.1
 52 |   - libzlib=1.2.11
 53 |   - llvmlite=0.32.1
 54 |   - lz4-c=1.9.3
 55 |   - mad=0.15.1b
 56 |   - ncurses=6.2
 57 |   - nettle=3.6
 58 |   - numba=0.49.1
 59 |   - numpy=1.17.5
 60 |   - olefile=0.46
 61 |   - openh264=2.1.1
 62 |   - openjpeg=2.4.0
 63 |   - openssl=1.1.1l
 64 |   - pillow=8.3.2
 65 |   - pip=20.0.2
 66 |   - pycparser=2.20
 67 |   - pyparsing=2.4.7
 68 |   - pysoundfile=0.10.3.post1
 69 |   - python=3.6.13
 70 |   - python-dateutil=2.8.2
 71 |   - python_abi=3.6
 72 |   - readline=8.1
 73 |   - resampy=0.2.2
 74 |   - scikit-learn=0.24.2
 75 |   - setuptools=58.0.4
 76 |   - sqlite=3.36.0
 77 |   - threadpoolctl=3.0.0
 78 |   - tk=8.6.11
 79 |   - tornado=6.1
 80 |   - wheel=0.37.0
 81 |   - xz=5.2.5
 82 |   - zlib=1.2.11
 83 |   - zstd=1.5.0
 84 |   - pip:
 85 |     - absl-py==0.10.0
 86 |     - aiohttp==3.7.4.post0
 87 |     - astor==0.8.1
 88 |     - astunparse==1.6.3
 89 |     - async-timeout==3.0.1
 90 |     - attrs==20.3.0
 91 |     - bcrypt==4.0.0
 92 |     - cached-property==1.5.2
 93 |     - cachetools==4.2.4
 94 |     - chardet==4.0.0
 95 |     - charset-normalizer==2.0.7
 96 |     - click==8.0.3
 97 |     - colorama==0.4.4
 98 |     - contextlib2==21.6.0
 99 |     - cryptography==38.0.1
100 |     - dataclasses==0.8
101 |     - decorator==4.4.2
102 |     - dill==0.3.4
103 |     - drill==1.2.0
104 |     - filelock==3.3.0
105 |     - fsspec==2021.10.1
106 |     - future==0.18.2
107 |     - gast==0.3.3
108 |     - gdown==4.0.2
109 |     - google-auth==1.35.0
110 |     - google-auth-oauthlib==0.4.6
111 |     - google-pasta==0.2.0
112 |     - grpcio==1.41.0
113 |     - h5py==3.1.0
114 |     - huggingface-hub==0.0.19
115 |     - hyperopt==0.1.2
116 |     - idna==3.3
117 |     - idna-ssl==1.1.0
118 |     - importlib-metadata==4.8.1
119 |     - json-tricks==3.15.5
120 |     - keras-preprocessing==1.1.2
121 |     - keras-tuner==1.0.2
122 |     - markdown==3.3.4
123 |     - matplotlib==3.1.3
124 |     - ml-metadata==0.26.0
125 |     - mock==4.0.3
126 |     - multidict==5.2.0
127 |     - networkx==2.5.1
128 |     - nni==2.4
129 |     - oauthlib==3.1.1
130 |     - onnx==1.4.1
131 |     - opt-einsum==3.3.0
132 |     - ortools==9.4.1874
133 |     - packaging==21.0
134 |     - pandas==1.1.0
135 |     - paramiko==2.11.0
136 |     - prettytable==2.2.1
137 |     - protobuf==3.18.1
138 |     - psutil==5.8.0
139 |     - pyasn1==0.4.8
140 |     - pyasn1-modules==0.2.8
141 |     - pybind11==2.8.0
142 |     - pydeprecate==0.3.1
143 |     - pyemd==0.5.1
144 |     - pymongo==3.12.0
145 |     - pynacl==1.5.0
146 |     - python-levenshtein==0.12.0
147 |     - pythonwebhdfs==0.2.3
148 |     - pytorch-lightning==1.4.9
149 |     - pytz==2021.3
150 |     - pyyaml==6.0
151 |     - regex==2021.10.8
152 |     - requests==2.26.0
153 |     - requests-oauthlib==1.3.0
154 |     - responses==0.14.0
155 |     - rsa==4.7.2
156 |     - sacremoses==0.0.46
157 |     - schema==0.7.4
158 |     - scipy==1.4.1
159 |     - scp==0.14.4
160 |     - simplejson==3.17.5
161 |     - six==1.15.0
162 |     - sklearn==0.0
163 |     - sox==1.3.7
164 |     - tabulate==0.8.9
165 |     - tensorboard==2.2.2
166 |     - tensorboard-plugin-wit==1.8.0
167 |     - tensorboardx==2.1
168 |     - tensorflow==2.2.0
169 |     - tensorflow-estimator==2.2.0
170 |     - termcolor==1.1.0
171 |     - terminaltables==3.1.0
172 |     - tf-slim==1.1.0
173 |     - timm==0.4.12
174 |     - tokenizers==0.10.3
175 |     - torch==1.9.1
176 |     - torch-baidu-ctc==0.3.0
177 |     - torchmetrics==0.5.1
178 |     - torchvision==0.10.1
179 |     - tqdm==4.62.3
180 |     - transformers==4.11.3
181 |     - typing==3.7.4.3
182 |     - typing-extensions==3.10.0.2
183 |     - urllib3==1.26.7
184 |     - wcwidth==0.2.5
185 |     - websockets==9.1
186 |     - werkzeug==2.0.2
187 |     - wrapt==1.13.2
188 |     - yarl==1.7.0
189 |     - zipp==3.6.0


--------------------------------------------------------------------------------
/evals/nni/keeper_offline.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     Offline register of model keeper client service
 3 | '''
 4 | 
 5 | from modelkeeper.config import modelkeeper_config
 6 | from modelkeeper.clientservice import ModelKeeperClient
 7 | 
 8 | import argparse
 9 | import logging
10 | import pickle
11 | import torch
12 | import time
13 | import os
14 | 
15 | log_path = './modelkeeper_log'
16 | logging.basicConfig(format='%(asctime)s,%(msecs)d %(levelname)s %(message)s',
17 |                 datefmt='%H:%M:%S',
18 |                 level=logging.INFO,
19 |                 handlers=[
20 |                     logging.FileHandler(log_path, mode='a'),
21 |                     logging.StreamHandler()
22 |                 ])
23 | 
24 | parser = argparse.ArgumentParser(description="ModelKeeper offline client APIs")
25 | parser.add_argument('--task', type=str, default='cv')
26 | parser.add_argument('--model_file', type=str, default=None)
27 | parser.add_argument('--export_path', type=str, default=None)
28 | parser.add_argument('--accuracy', type=float, default=-1)
29 | 
30 | def register_model(model_file, export_path, accuracy):
31 |     # with open(model_file, 'rb') as fin:
32 |     #     model = pickle.load(fin)
33 |     #     dummpy_input = pickle.load(fin)
34 | 
35 |     #os.remove(model_file)
36 |     # torch.onnx.export(model, dummpy_input, export_path, export_params=True, verbose=0, training=1)
37 | 
38 |     # register model to the zoo
39 |     modelkeeper_client = ModelKeeperClient(modelkeeper_config)
40 |     modelkeeper_client.register_model_to_zoo(export_path, accuracy=accuracy)
41 |     modelkeeper_client.stop()
42 |     os.remove(export_path)
43 | 
44 | 
45 | args, unknown = parser.parse_known_args()
46 | 
47 | logging.info(f"Start to upload {args.model_file}")
48 | register_model(args.model_file, args.model_file, args.accuracy)
49 | logging.info(f"Successfully upload model {args.model_file} to the zoo")
50 | 


--------------------------------------------------------------------------------
/evals/nni/parser.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from collections import defaultdict
 3 | 
 4 | json_file_path = "./experiment_random.json"
 5 | 
 6 | choices = ["layerchoice_cell__0_1_", "layerchoice_cell__0_2_", "layerchoice_cell__1_2_", "layerchoice_cell__0_3_", "layerchoice_cell__1_3_", "layerchoice_cell__2_3_"]
 7 | 
 8 | trial_count = defaultdict(int)
 9 | trial_idx = []
10 | 
11 | with open(json_file_path, 'r') as j:
12 |     contents = json.loads(j.read())
13 |     trials = contents.get("trialMessage")
14 |     count = 0
15 |     for i, trial in enumerate(trials):
16 |         if trial["status"] == "SUCCEEDED":
17 |             hp = json.loads(trial["hyperParameters"][0])
18 |             if trial_count[hp["parameter_id"]] != 0:
19 |                 continue
20 |             else:
21 |                 trial_count[hp["parameter_id"]] += 1
22 |                 script = hp["parameters"]["model_script"]
23 |                 config = []
24 |                 for choice in choices:
25 |                     result = script.find(choice)
26 |                     config.append(script[result: result+len(choice)+18].split()[0].split("_", 5)[-1])
27 |                 config = '-'.join(config)
28 |                 print(hp["parameter_id"], config)
29 |                 # print(trial["finalMetricData"])
30 |             count += 1
31 |         # if count > 200:
32 |         #     break
33 |     print(len(trial_count))
34 |     print(count)
35 | 


--------------------------------------------------------------------------------
/evals/ray_tune/__init_.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SymbioticLab/ModelKeeper/9212bc79bfc4a271e6120c410bb9fb89cb151486/evals/ray_tune/__init_.py


--------------------------------------------------------------------------------
/evals/ray_tune/keeper_offline.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     Offline register of model keeper client service
 3 | '''
 4 | 
 5 | import argparse
 6 | import logging
 7 | import os
 8 | import pickle
 9 | import time
10 | 
11 | import torch
12 | 
13 | from modelkeeper.clientservice import ModelKeeperClient
14 | from modelkeeper.config import modelkeeper_config
15 | 
16 | log_path = './modelkeeper_log'
17 | logging.basicConfig(format='%(asctime)s,%(msecs)d %(levelname)s %(message)s',
18 |                 datefmt='%H:%M:%S',
19 |                 level=logging.INFO,
20 |                 handlers=[
21 |                     logging.FileHandler(log_path, mode='a'),
22 |                     logging.StreamHandler()
23 |                 ])
24 | 
25 | parser = argparse.ArgumentParser(description="ModelKeeper offline client APIs")
26 | parser.add_argument('--task', type=str, default='cv')
27 | parser.add_argument('--model_file', type=str, default=None)
28 | parser.add_argument('--export_path', type=str, default=None)
29 | parser.add_argument('--accuracy', type=float, default=-1)
30 | 
31 | def register_model(model_file, export_path, accuracy):
32 |     with open(model_file, 'rb') as fin:
33 |         model = pickle.load(fin)
34 |         dummpy_input = pickle.load(fin)
35 | 
36 |     #os.remove(model_file)
37 |     torch.onnx.export(model, dummpy_input, export_path, export_params=True, verbose=0, training=1, do_constant_folding=False)
38 | 
39 |     # register model to the zoo
40 |     modelkeeper_client = ModelKeeperClient(modelkeeper_config)
41 |     modelkeeper_client.register_model_to_zoo(export_path, accuracy=accuracy)
42 |     modelkeeper_client.stop()
43 |     os.remove(export_path)
44 | 
45 | 
46 | args, unknown = parser.parse_known_args()
47 | 
48 | logging.info(f"Start to upload {args.model_file}")
49 | register_model(args.model_file, args.model_file, args.accuracy)
50 | logging.info(f"Successfully upload model {args.model_file} to the zoo")
51 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/cifarmodels/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/cifarmodels/densenet.py:
--------------------------------------------------------------------------------
  1 | '''DenseNet in PyTorch.'''
  2 | import math
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | 
  9 | class Bottleneck(nn.Module):
 10 |     def __init__(self, in_planes, growth_rate):
 11 |         super(Bottleneck, self).__init__()
 12 |         self.bn1 = nn.BatchNorm2d(in_planes)
 13 |         self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False)
 14 |         self.bn2 = nn.BatchNorm2d(4*growth_rate)
 15 |         self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)
 16 | 
 17 |     def forward(self, x):
 18 |         out = self.conv1(F.relu(self.bn1(x)))
 19 |         out = self.conv2(F.relu(self.bn2(out)))
 20 |         out = torch.cat([out,x], 1)
 21 |         return out
 22 | 
 23 | 
 24 | class Transition(nn.Module):
 25 |     def __init__(self, in_planes, out_planes):
 26 |         super(Transition, self).__init__()
 27 |         self.bn = nn.BatchNorm2d(in_planes)
 28 |         self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)
 29 | 
 30 |     def forward(self, x):
 31 |         out = self.conv(F.relu(self.bn(x)))
 32 |         out = F.avg_pool2d(out, 2)
 33 |         return out
 34 | 
 35 | 
 36 | class DenseNet(nn.Module):
 37 |     def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
 38 |         super(DenseNet, self).__init__()
 39 |         self.growth_rate = growth_rate
 40 | 
 41 |         num_planes = 2*growth_rate
 42 |         self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)
 43 | 
 44 |         self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
 45 |         num_planes += nblocks[0]*growth_rate
 46 |         out_planes = int(math.floor(num_planes*reduction))
 47 |         self.trans1 = Transition(num_planes, out_planes)
 48 |         num_planes = out_planes
 49 | 
 50 |         self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
 51 |         num_planes += nblocks[1]*growth_rate
 52 |         out_planes = int(math.floor(num_planes*reduction))
 53 |         self.trans2 = Transition(num_planes, out_planes)
 54 |         num_planes = out_planes
 55 | 
 56 |         self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
 57 |         num_planes += nblocks[2]*growth_rate
 58 |         out_planes = int(math.floor(num_planes*reduction))
 59 |         self.trans3 = Transition(num_planes, out_planes)
 60 |         num_planes = out_planes
 61 | 
 62 |         self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
 63 |         num_planes += nblocks[3]*growth_rate
 64 | 
 65 |         self.bn = nn.BatchNorm2d(num_planes)
 66 |         self.linear = nn.Linear(num_planes, num_classes)
 67 | 
 68 |     def _make_dense_layers(self, block, in_planes, nblock):
 69 |         layers = []
 70 |         for i in range(nblock):
 71 |             layers.append(block(in_planes, self.growth_rate))
 72 |             in_planes += self.growth_rate
 73 |         return nn.Sequential(*layers)
 74 | 
 75 |     def forward(self, x):
 76 |         out = self.conv1(x)
 77 |         out = self.trans1(self.dense1(out))
 78 |         out = self.trans2(self.dense2(out))
 79 |         out = self.trans3(self.dense3(out))
 80 |         out = self.dense4(out)
 81 |         out = F.avg_pool2d(F.relu(self.bn(out)), 4)
 82 |         out = out.view(out.size(0), -1)
 83 |         out = self.linear(out)
 84 |         return out
 85 | 
 86 | def DenseNet121(num_classes=10):
 87 |     return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32, num_classes=num_classes)
 88 | 
 89 | def DenseNet169(num_classes=10):
 90 |     return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32, num_classes=num_classes)
 91 | 
 92 | def DenseNet201(num_classes=10):
 93 |     return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32, num_classes=num_classes)
 94 | 
 95 | def DenseNet161(num_classes=10):
 96 |     return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48, num_classes=num_classes)
 97 | 
 98 | def densenet_cifar():
 99 |     return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12)
100 | 
101 | def test():
102 |     net = densenet_cifar()
103 |     x = torch.randn(1,3,32,32)
104 |     y = net(x)
105 |     print(y)
106 | 
107 | # test()
108 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/cifarmodels/dla.py:
--------------------------------------------------------------------------------
  1 | '''DLA in PyTorch.
  2 | 
  3 | Reference:
  4 |     Deep Layer Aggregation. https://arxiv.org/abs/1707.06484
  5 | '''
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | 
 10 | 
 11 | class BasicBlock(nn.Module):
 12 |     expansion = 1
 13 | 
 14 |     def __init__(self, in_planes, planes, stride=1):
 15 |         super(BasicBlock, self).__init__()
 16 |         self.conv1 = nn.Conv2d(
 17 |             in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 18 |         self.bn1 = nn.BatchNorm2d(planes)
 19 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
 20 |                                stride=1, padding=1, bias=False)
 21 |         self.bn2 = nn.BatchNorm2d(planes)
 22 | 
 23 |         self.shortcut = nn.Sequential()
 24 |         if stride != 1 or in_planes != self.expansion*planes:
 25 |             self.shortcut = nn.Sequential(
 26 |                 nn.Conv2d(in_planes, self.expansion*planes,
 27 |                           kernel_size=1, stride=stride, bias=False),
 28 |                 nn.BatchNorm2d(self.expansion*planes)
 29 |             )
 30 | 
 31 |     def forward(self, x):
 32 |         out = F.relu(self.bn1(self.conv1(x)))
 33 |         out = self.bn2(self.conv2(out))
 34 |         out += self.shortcut(x)
 35 |         out = F.relu(out)
 36 |         return out
 37 | 
 38 | 
 39 | class Root(nn.Module):
 40 |     def __init__(self, in_channels, out_channels, kernel_size=1):
 41 |         super(Root, self).__init__()
 42 |         self.conv = nn.Conv2d(
 43 |             in_channels, out_channels, kernel_size,
 44 |             stride=1, padding=(kernel_size - 1) // 2, bias=False)
 45 |         self.bn = nn.BatchNorm2d(out_channels)
 46 | 
 47 |     def forward(self, xs):
 48 |         x = torch.cat(xs, 1)
 49 |         out = F.relu(self.bn(self.conv(x)))
 50 |         return out
 51 | 
 52 | 
 53 | class Tree(nn.Module):
 54 |     def __init__(self, block, in_channels, out_channels, level=1, stride=1):
 55 |         super(Tree, self).__init__()
 56 |         self.level = level
 57 |         if level == 1:
 58 |             self.root = Root(2*out_channels, out_channels)
 59 |             self.left_node = block(in_channels, out_channels, stride=stride)
 60 |             self.right_node = block(out_channels, out_channels, stride=1)
 61 |         else:
 62 |             self.root = Root((level+2)*out_channels, out_channels)
 63 |             for i in reversed(range(1, level)):
 64 |                 subtree = Tree(block, in_channels, out_channels,
 65 |                                level=i, stride=stride)
 66 |                 self.__setattr__('level_%d' % i, subtree)
 67 |             self.prev_root = block(in_channels, out_channels, stride=stride)
 68 |             self.left_node = block(out_channels, out_channels, stride=1)
 69 |             self.right_node = block(out_channels, out_channels, stride=1)
 70 | 
 71 |     def forward(self, x):
 72 |         xs = [self.prev_root(x)] if self.level > 1 else []
 73 |         for i in reversed(range(1, self.level)):
 74 |             level_i = self.__getattr__('level_%d' % i)
 75 |             x = level_i(x)
 76 |             xs.append(x)
 77 |         x = self.left_node(x)
 78 |         xs.append(x)
 79 |         x = self.right_node(x)
 80 |         xs.append(x)
 81 |         out = self.root(xs)
 82 |         return out
 83 | 
 84 | 
 85 | class DLA(nn.Module):
 86 |     def __init__(self, block=BasicBlock, num_classes=10):
 87 |         super(DLA, self).__init__()
 88 |         self.base = nn.Sequential(
 89 |             nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
 90 |             nn.BatchNorm2d(16),
 91 |             nn.ReLU(True)
 92 |         )
 93 | 
 94 |         self.layer1 = nn.Sequential(
 95 |             nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
 96 |             nn.BatchNorm2d(16),
 97 |             nn.ReLU(True)
 98 |         )
 99 | 
100 |         self.layer2 = nn.Sequential(
101 |             nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
102 |             nn.BatchNorm2d(32),
103 |             nn.ReLU(True)
104 |         )
105 | 
106 |         self.layer3 = Tree(block,  32,  64, level=1, stride=1)
107 |         self.layer4 = Tree(block,  64, 128, level=2, stride=2)
108 |         self.layer5 = Tree(block, 128, 256, level=2, stride=2)
109 |         self.layer6 = Tree(block, 256, 512, level=1, stride=2)
110 |         self.linear = nn.Linear(512, num_classes)
111 | 
112 |     def forward(self, x):
113 |         out = self.base(x)
114 |         out = self.layer1(out)
115 |         out = self.layer2(out)
116 |         out = self.layer3(out)
117 |         out = self.layer4(out)
118 |         out = self.layer5(out)
119 |         out = self.layer6(out)
120 |         out = F.avg_pool2d(out, 4)
121 |         out = out.view(out.size(0), -1)
122 |         out = self.linear(out)
123 |         return out
124 | 
125 | 
126 | def test():
127 |     net = DLA()
128 |     print(net)
129 |     x = torch.randn(1, 3, 32, 32)
130 |     y = net(x)
131 |     print(y.size())
132 | 
133 | 
134 | if __name__ == '__main__':
135 |     test()
136 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/cifarmodels/dla_simple.py:
--------------------------------------------------------------------------------
  1 | '''Simplified version of DLA in PyTorch.
  2 | 
  3 | Note this implementation is not identical to the original paper version.
  4 | But it seems works fine.
  5 | 
  6 | See dla.py for the original paper version.
  7 | 
  8 | Reference:
  9 |     Deep Layer Aggregation. https://arxiv.org/abs/1707.06484
 10 | '''
 11 | import torch
 12 | import torch.nn as nn
 13 | import torch.nn.functional as F
 14 | 
 15 | 
 16 | class BasicBlock(nn.Module):
 17 |     expansion = 1
 18 | 
 19 |     def __init__(self, in_planes, planes, stride=1):
 20 |         super(BasicBlock, self).__init__()
 21 |         self.conv1 = nn.Conv2d(
 22 |             in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 23 |         self.bn1 = nn.BatchNorm2d(planes)
 24 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
 25 |                                stride=1, padding=1, bias=False)
 26 |         self.bn2 = nn.BatchNorm2d(planes)
 27 | 
 28 |         self.shortcut = nn.Sequential()
 29 |         if stride != 1 or in_planes != self.expansion*planes:
 30 |             self.shortcut = nn.Sequential(
 31 |                 nn.Conv2d(in_planes, self.expansion*planes,
 32 |                           kernel_size=1, stride=stride, bias=False),
 33 |                 nn.BatchNorm2d(self.expansion*planes)
 34 |             )
 35 | 
 36 |     def forward(self, x):
 37 |         out = F.relu(self.bn1(self.conv1(x)))
 38 |         out = self.bn2(self.conv2(out))
 39 |         out += self.shortcut(x)
 40 |         out = F.relu(out)
 41 |         return out
 42 | 
 43 | 
 44 | class Root(nn.Module):
 45 |     def __init__(self, in_channels, out_channels, kernel_size=1):
 46 |         super(Root, self).__init__()
 47 |         self.conv = nn.Conv2d(
 48 |             in_channels, out_channels, kernel_size,
 49 |             stride=1, padding=(kernel_size - 1) // 2, bias=False)
 50 |         self.bn = nn.BatchNorm2d(out_channels)
 51 | 
 52 |     def forward(self, xs):
 53 |         x = torch.cat(xs, 1)
 54 |         out = F.relu(self.bn(self.conv(x)))
 55 |         return out
 56 | 
 57 | 
 58 | class Tree(nn.Module):
 59 |     def __init__(self, block, in_channels, out_channels, level=1, stride=1):
 60 |         super(Tree, self).__init__()
 61 |         self.root = Root(2*out_channels, out_channels)
 62 |         if level == 1:
 63 |             self.left_tree = block(in_channels, out_channels, stride=stride)
 64 |             self.right_tree = block(out_channels, out_channels, stride=1)
 65 |         else:
 66 |             self.left_tree = Tree(block, in_channels,
 67 |                                   out_channels, level=level-1, stride=stride)
 68 |             self.right_tree = Tree(block, out_channels,
 69 |                                    out_channels, level=level-1, stride=1)
 70 | 
 71 |     def forward(self, x):
 72 |         out1 = self.left_tree(x)
 73 |         out2 = self.right_tree(out1)
 74 |         out = self.root([out1, out2])
 75 |         return out
 76 | 
 77 | 
 78 | class SimpleDLA(nn.Module):
 79 |     def __init__(self, block=BasicBlock, num_classes=10):
 80 |         super(SimpleDLA, self).__init__()
 81 |         self.base = nn.Sequential(
 82 |             nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
 83 |             nn.BatchNorm2d(16),
 84 |             nn.ReLU(True)
 85 |         )
 86 | 
 87 |         self.layer1 = nn.Sequential(
 88 |             nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
 89 |             nn.BatchNorm2d(16),
 90 |             nn.ReLU(True)
 91 |         )
 92 | 
 93 |         self.layer2 = nn.Sequential(
 94 |             nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
 95 |             nn.BatchNorm2d(32),
 96 |             nn.ReLU(True)
 97 |         )
 98 | 
 99 |         self.layer3 = Tree(block,  32,  64, level=1, stride=1)
100 |         self.layer4 = Tree(block,  64, 128, level=2, stride=2)
101 |         self.layer5 = Tree(block, 128, 256, level=2, stride=2)
102 |         self.layer6 = Tree(block, 256, 512, level=1, stride=2)
103 |         self.linear = nn.Linear(512, num_classes)
104 | 
105 |     def forward(self, x):
106 |         out = self.base(x)
107 |         out = self.layer1(out)
108 |         out = self.layer2(out)
109 |         out = self.layer3(out)
110 |         out = self.layer4(out)
111 |         out = self.layer5(out)
112 |         out = self.layer6(out)
113 |         out = F.avg_pool2d(out, 4)
114 |         out = out.view(out.size(0), -1)
115 |         out = self.linear(out)
116 |         return out
117 | 
118 | 
119 | def test():
120 |     net = SimpleDLA()
121 |     print(net)
122 |     x = torch.randn(1, 3, 32, 32)
123 |     y = net(x)
124 |     print(y.size())
125 | 
126 | 
127 | if __name__ == '__main__':
128 |     test()
129 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/cifarmodels/dpn.py:
--------------------------------------------------------------------------------
  1 | '''Dual Path Networks in PyTorch.'''
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | 
  7 | class Bottleneck(nn.Module):
  8 |     def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer):
  9 |         super(Bottleneck, self).__init__()
 10 |         self.out_planes = out_planes
 11 |         self.dense_depth = dense_depth
 12 | 
 13 |         self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False)
 14 |         self.bn1 = nn.BatchNorm2d(in_planes)
 15 |         self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False)
 16 |         self.bn2 = nn.BatchNorm2d(in_planes)
 17 |         self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False)
 18 |         self.bn3 = nn.BatchNorm2d(out_planes+dense_depth)
 19 | 
 20 |         self.shortcut = nn.Sequential()
 21 |         if first_layer:
 22 |             self.shortcut = nn.Sequential(
 23 |                 nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False),
 24 |                 nn.BatchNorm2d(out_planes+dense_depth)
 25 |             )
 26 | 
 27 |     def forward(self, x):
 28 |         out = F.relu(self.bn1(self.conv1(x)))
 29 |         out = F.relu(self.bn2(self.conv2(out)))
 30 |         out = self.bn3(self.conv3(out))
 31 |         x = self.shortcut(x)
 32 |         d = self.out_planes
 33 |         out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1)
 34 |         out = F.relu(out)
 35 |         return out
 36 | 
 37 | 
 38 | class DPN(nn.Module):
 39 |     def __init__(self, cfg, num_classes=10):
 40 |         super(DPN, self).__init__()
 41 |         in_planes, out_planes = cfg['in_planes'], cfg['out_planes']
 42 |         num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth']
 43 | 
 44 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
 45 |         self.bn1 = nn.BatchNorm2d(64)
 46 |         self.last_planes = 64
 47 |         self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1)
 48 |         self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2)
 49 |         self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2)
 50 |         self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2)
 51 |         self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], num_classes)
 52 | 
 53 |     def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride):
 54 |         strides = [stride] + [1]*(num_blocks-1)
 55 |         layers = []
 56 |         for i,stride in enumerate(strides):
 57 |             layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0))
 58 |             self.last_planes = out_planes + (i+2) * dense_depth
 59 |         return nn.Sequential(*layers)
 60 | 
 61 |     def forward(self, x):
 62 |         out = F.relu(self.bn1(self.conv1(x)))
 63 |         out = self.layer1(out)
 64 |         out = self.layer2(out)
 65 |         out = self.layer3(out)
 66 |         out = self.layer4(out)
 67 |         out = F.avg_pool2d(out, 4)
 68 |         out = out.view(out.size(0), -1)
 69 |         out = self.linear(out)
 70 |         return out
 71 | 
 72 | 
 73 | def DPN26(num_classes=10):
 74 |     cfg = {
 75 |         'in_planes': (96,192,384,768),
 76 |         'out_planes': (256,512,1024,2048),
 77 |         'num_blocks': (2,2,2,2),
 78 |         'dense_depth': (16,32,24,128)
 79 |     }
 80 |     return DPN(cfg, num_classes=num_classes)
 81 | 
 82 | def DPN68(num_classes=10):
 83 |     cfg = {
 84 |         'in_planes': (96,192,384,768),
 85 |         'out_planes': (256,512,1024,2048),
 86 |         'num_blocks': (2,2,12,3),
 87 |         'dense_depth': (16,32,32,64)
 88 |     }
 89 |     return DPN(cfg, num_classes=num_classes)
 90 | 
 91 | 
 92 | def DPN92(num_classes=10):
 93 |     cfg = {
 94 |         'in_planes': (96,192,384,768),
 95 |         'out_planes': (256,512,1024,2048),
 96 |         'num_blocks': (3,4,20,3),
 97 |         'dense_depth': (16,32,24,128)
 98 |     }
 99 |     return DPN(cfg, num_classes=num_classes)
100 | 
101 | 
102 | def DPN98(num_classes=10):
103 |     cfg = {
104 |         'in_planes': (96,192,384,768),
105 |         'out_planes': (256,512,1024,2048),
106 |         'num_blocks': (3,6,20,3),
107 |         'dense_depth': (16,32,32,128)
108 |     }
109 |     return DPN(cfg, num_classes=num_classes)
110 | 
111 | def DPN107(num_classes=10):
112 |     cfg = {
113 |         'in_planes': (96,192,384,768),
114 |         'out_planes': (256,512,1024,2048),
115 |         'num_blocks': (4,8,20,3),
116 |         'dense_depth': (20,64,64,128)
117 |     }
118 |     return DPN(cfg, num_classes=num_classes)
119 | 
120 | def test():
121 |     net = DPN92()
122 |     x = torch.randn(1,3,32,32)
123 |     y = net(x)
124 |     print(y)
125 | 
126 | # test()
127 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/cifarmodels/mobilenet.py:
--------------------------------------------------------------------------------
 1 | '''MobileNet in PyTorch.
 2 | 
 3 | See the paper "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications"
 4 | for more details.
 5 | '''
 6 | import torch
 7 | import torch.nn as nn
 8 | import torch.nn.functional as F
 9 | 
10 | 
11 | class Block(nn.Module):
12 |     '''Depthwise conv + Pointwise conv'''
13 |     def __init__(self, in_planes, out_planes, stride=1):
14 |         super(Block, self).__init__()
15 |         self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False)
16 |         self.bn1 = nn.BatchNorm2d(in_planes)
17 |         self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
18 |         self.bn2 = nn.BatchNorm2d(out_planes)
19 | 
20 |     def forward(self, x):
21 |         out = F.relu(self.bn1(self.conv1(x)))
22 |         out = F.relu(self.bn2(self.conv2(out)))
23 |         return out
24 | 
25 | 
26 | class MobileNet(nn.Module):
27 |     # (128,2) means conv planes=128, conv stride=2, by default conv stride=1
28 |     cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]
29 | 
30 |     def __init__(self, num_classes=10):
31 |         super(MobileNet, self).__init__()
32 |         self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
33 |         self.bn1 = nn.BatchNorm2d(32)
34 |         self.layers = self._make_layers(in_planes=32)
35 |         self.linear = nn.Linear(1024, num_classes)
36 | 
37 |     def _make_layers(self, in_planes):
38 |         layers = []
39 |         for x in self.cfg:
40 |             out_planes = x if isinstance(x, int) else x[0]
41 |             stride = 1 if isinstance(x, int) else x[1]
42 |             layers.append(Block(in_planes, out_planes, stride))
43 |             in_planes = out_planes
44 |         return nn.Sequential(*layers)
45 | 
46 |     def forward(self, x):
47 |         out = F.relu(self.bn1(self.conv1(x)))
48 |         out = self.layers(out)
49 |         out = F.avg_pool2d(out, 2)
50 |         out = out.view(out.size(0), -1)
51 |         out = self.linear(out)
52 |         return out
53 | 
54 | 
55 | def test():
56 |     net = MobileNet()
57 |     x = torch.randn(1,3,32,32)
58 |     y = net(x)
59 |     print(y.size())
60 | 
61 | # test()
62 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/cifarmodels/mobilenetv2.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | 
  8 | class BaseBlock(nn.Module):
  9 |     alpha = 1
 10 | 
 11 |     def __init__(self, input_channel, output_channel, t = 6, downsample = False):
 12 |         """
 13 |             t:  expansion factor, t*input_channel is channel of expansion layer
 14 |             alpha:  width multiplier, to get thinner models
 15 |             rho:    resolution multiplier, to get reduced representation
 16 |         """ 
 17 |         super(BaseBlock, self).__init__()
 18 |         self.stride = 2 if downsample else 1
 19 |         self.downsample = downsample
 20 |         self.shortcut = (not downsample) and (input_channel == output_channel) 
 21 | 
 22 |         # apply alpha
 23 |         input_channel = int(self.alpha * input_channel)
 24 |         output_channel = int(self.alpha * output_channel)
 25 |         
 26 |         # for main path:
 27 |         c  = t * input_channel
 28 |         # 1x1   point wise conv
 29 |         self.conv1 = nn.Conv2d(input_channel, c, kernel_size = 1, bias = False)
 30 |         self.bn1 = nn.BatchNorm2d(c)
 31 |         # 3x3   depth wise conv
 32 |         self.conv2 = nn.Conv2d(c, c, kernel_size = 3, stride = self.stride, padding = 1, groups = c, bias = False)
 33 |         self.bn2 = nn.BatchNorm2d(c)
 34 |         # 1x1   point wise conv
 35 |         self.conv3 = nn.Conv2d(c, output_channel, kernel_size = 1, bias = False)
 36 |         self.bn3 = nn.BatchNorm2d(output_channel)
 37 |         
 38 | 
 39 |     def forward(self, inputs):
 40 |         # main path
 41 |         x = F.relu6(self.bn1(self.conv1(inputs)), inplace = True)
 42 |         x = F.relu6(self.bn2(self.conv2(x)), inplace = True)
 43 |         x = self.bn3(self.conv3(x))
 44 | 
 45 |         # shortcut path
 46 |         x = x + inputs if self.shortcut else x
 47 | 
 48 |         return x
 49 | 
 50 | 
 51 | 
 52 | 
 53 | class MobileNetV2(nn.Module):
 54 |     def __init__(self, num_classes, alpha = 1):
 55 |         super(MobileNetV2, self).__init__()
 56 | 
 57 |         # first conv layer 
 58 |         self.conv0 = nn.Conv2d(3, int(32*alpha), kernel_size = 3, stride = 1, padding = 1, bias = False)
 59 |         self.bn0 = nn.BatchNorm2d(int(32*alpha))
 60 | 
 61 |         # build bottlenecks
 62 |         BaseBlock.alpha = alpha
 63 |         self.bottlenecks = nn.Sequential(
 64 |             BaseBlock(32, 16, t = 1, downsample = False),
 65 |             BaseBlock(16, 24, downsample = False),
 66 |             BaseBlock(24, 24),
 67 |             BaseBlock(24, 32, downsample = False),
 68 |             BaseBlock(32, 32),
 69 |             BaseBlock(32, 32),
 70 |             BaseBlock(32, 64, downsample = True),
 71 |             BaseBlock(64, 64),
 72 |             BaseBlock(64, 64),
 73 |             BaseBlock(64, 64),
 74 |             BaseBlock(64, 96, downsample = False),
 75 |             BaseBlock(96, 96),
 76 |             BaseBlock(96, 96),
 77 |             BaseBlock(96, 160, downsample = True),
 78 |             BaseBlock(160, 160),
 79 |             BaseBlock(160, 160),
 80 |             BaseBlock(160, 320, downsample = False))
 81 | 
 82 |         # last conv layers and fc layer
 83 |         self.conv1 = nn.Conv2d(int(320*alpha), 1280, kernel_size = 1, bias = False)
 84 |         self.bn1 = nn.BatchNorm2d(1280)
 85 |         self.fc = nn.Linear(1280, num_classes)
 86 | 
 87 |         # weights init
 88 |         self.weights_init()
 89 | 
 90 | 
 91 |     def weights_init(self):
 92 |         for m in self.modules():
 93 |             if isinstance(m, nn.Conv2d):
 94 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 95 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
 96 | 
 97 |             elif isinstance(m, nn.BatchNorm2d):
 98 |                 m.weight.data.fill_(1)
 99 |                 m.bias.data.zero_()
100 | 
101 | 
102 |     def forward(self, inputs):
103 | 
104 |         # first conv layer
105 |         x = F.relu6(self.bn0(self.conv0(inputs)), inplace = True)
106 |         # assert x.shape[1:] == torch.Size([32, 32, 32])
107 | 
108 |         # bottlenecks
109 |         x = self.bottlenecks(x)
110 |         # assert x.shape[1:] == torch.Size([320, 8, 8])
111 | 
112 |         # last conv layer
113 |         x = F.relu6(self.bn1(self.conv1(x)), inplace = True)
114 |         # assert x.shape[1:] == torch.Size([1280,8,8])
115 | 
116 |         # global pooling and fc (in place of conv 1x1 in paper)
117 |         x = F.adaptive_avg_pool2d(x, 1)
118 |         x = x.view(x.shape[0], -1)
119 |         x = self.fc(x)
120 | 
121 |         return x
122 | 
123 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/cifarmodels/model_provider.py:
--------------------------------------------------------------------------------
 1 | from .densenet import *
 2 | from .dla import *
 3 | from .dla_simple import *
 4 | from .dpn import *
 5 | from .efficientnet import *
 6 | from .mobilenetv2 import *
 7 | from .mobilenetv3 import *
 8 | from .pnasnet import *
 9 | from .preact_resnet import *
10 | from .preactresnet import *
11 | from .resnet import *
12 | from .resnext import *
13 | from .senet import *
14 | from .shufflenet import *
15 | from .shufflenetv2 import *
16 | from .stochasticdepth import *
17 | from .vgg import *
18 | 
19 | __all__ = ['get_model']
20 | 
21 | 
22 | _models = {
23 |     "DLA": DLA,
24 |     "DPN107": DPN107,
25 |     "DPN26": DPN26,
26 |     "DPN68": DPN68,
27 |     "DPN92": DPN92,
28 |     "DPN98": DPN98,
29 |     "DenseNet121": DenseNet121,
30 |     "DenseNet161": DenseNet161,
31 |     "DenseNet169": DenseNet169,
32 |     "DenseNet201": DenseNet201,
33 |     "MobileNetV2": MobileNetV2,
34 |     "MobileNetV3": MobileNetV3,
35 |     "PreActResNet101": PreActResNet101,
36 |     "PreActResNet152": PreActResNet152,
37 |     "PreActResNet18": PreActResNet18,
38 |     "PreActResNet34": PreActResNet34,
39 |     "PreActResNet50": PreActResNet50,
40 |     "ResNeXt29_2x64d": ResNeXt29_2x64d,
41 |     "ResNeXt29_32x4d": ResNeXt29_32x4d,
42 |     "ResNeXt29_4x64d": ResNeXt29_4x64d,
43 |     "ResNeXt29_8x64d": ResNeXt29_8x64d,
44 |     "ResNet101": ResNet101,
45 |     "ResNet152": ResNet152,
46 |     "ResNet18": ResNet18,
47 |     "ResNet34": ResNet34,
48 |     "ResNet50": ResNet50,
49 |     "ShuffleNetG2": ShuffleNetG2,
50 |     "ShuffleNetG3": ShuffleNetG3,
51 |     "ShuffleNetV2": ShuffleNetV2,
52 |     "SimpleDLA": SimpleDLA,
53 |     "VGG": VGG,
54 |     "preactresnet101": preactresnet101,
55 |     "preactresnet152": preactresnet152,
56 |     "preactresnet18": preactresnet18,
57 |     "preactresnet34": preactresnet34,
58 |     "preactresnet50": preactresnet50,
59 |     "seresnet101": seresnet101,
60 |     "seresnet152": seresnet152,
61 |     "seresnet18": seresnet18,
62 |     "seresnet34": seresnet34,
63 |     "seresnet50": seresnet50,
64 |     "stochastic_depth_resnet101": stochastic_depth_resnet101,
65 |     "stochastic_depth_resnet152": stochastic_depth_resnet152,
66 |     "stochastic_depth_resnet18": stochastic_depth_resnet18,
67 |     "stochastic_depth_resnet34": stochastic_depth_resnet34,
68 |     "stochastic_depth_resnet50": stochastic_depth_resnet50,
69 | }
70 | 
71 | 
72 | def get_cv_model(name, **kwargs):
73 |     """
74 |     Get supported model.
75 | 
76 |     Parameters:
77 |     ----------
78 |     name : str
79 |         Name of model.
80 | 
81 |     Returns:
82 |     -------
83 |     Module
84 |         Resulted model.
85 |     """
86 |     if name not in _models:
87 |         raise ValueError("Unsupported model: {}".format(name))
88 |     net = _models[name](**kwargs)
89 |     return net
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/cifarmodels/pnasnet.py:
--------------------------------------------------------------------------------
  1 | '''PNASNet in PyTorch.
  2 | 
  3 | Paper: Progressive Neural Architecture Search
  4 | '''
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | 
 10 | class SepConv(nn.Module):
 11 |     '''Separable Convolution.'''
 12 |     def __init__(self, in_planes, out_planes, kernel_size, stride):
 13 |         super(SepConv, self).__init__()
 14 |         self.conv1 = nn.Conv2d(in_planes, out_planes,
 15 |                                kernel_size, stride,
 16 |                                padding=(kernel_size-1)//2,
 17 |                                bias=False, groups=in_planes)
 18 |         self.bn1 = nn.BatchNorm2d(out_planes)
 19 | 
 20 |     def forward(self, x):
 21 |         return self.bn1(self.conv1(x))
 22 | 
 23 | 
 24 | class CellA(nn.Module):
 25 |     def __init__(self, in_planes, out_planes, stride=1):
 26 |         super(CellA, self).__init__()
 27 |         self.stride = stride
 28 |         self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
 29 |         if stride==2:
 30 |             self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
 31 |             self.bn1 = nn.BatchNorm2d(out_planes)
 32 | 
 33 |     def forward(self, x):
 34 |         y1 = self.sep_conv1(x)
 35 |         y2 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
 36 |         if self.stride==2:
 37 |             y2 = self.bn1(self.conv1(y2))
 38 |         return F.relu(y1+y2)
 39 | 
 40 | class CellB(nn.Module):
 41 |     def __init__(self, in_planes, out_planes, stride=1):
 42 |         super(CellB, self).__init__()
 43 |         self.stride = stride
 44 |         # Left branch
 45 |         self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
 46 |         self.sep_conv2 = SepConv(in_planes, out_planes, kernel_size=3, stride=stride)
 47 |         # Right branch
 48 |         self.sep_conv3 = SepConv(in_planes, out_planes, kernel_size=5, stride=stride)
 49 |         if stride==2:
 50 |             self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
 51 |             self.bn1 = nn.BatchNorm2d(out_planes)
 52 |         # Reduce channels
 53 |         self.conv2 = nn.Conv2d(2*out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
 54 |         self.bn2 = nn.BatchNorm2d(out_planes)
 55 | 
 56 |     def forward(self, x):
 57 |         # Left branch
 58 |         y1 = self.sep_conv1(x)
 59 |         y2 = self.sep_conv2(x)
 60 |         # Right branch
 61 |         y3 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
 62 |         if self.stride==2:
 63 |             y3 = self.bn1(self.conv1(y3))
 64 |         y4 = self.sep_conv3(x)
 65 |         # Concat & reduce channels
 66 |         b1 = F.relu(y1+y2)
 67 |         b2 = F.relu(y3+y4)
 68 |         y = torch.cat([b1,b2], 1)
 69 |         return F.relu(self.bn2(self.conv2(y)))
 70 | 
 71 | class PNASNet(nn.Module):
 72 |     def __init__(self, cell_type, num_cells, num_planes):
 73 |         super(PNASNet, self).__init__()
 74 |         self.in_planes = num_planes
 75 |         self.cell_type = cell_type
 76 | 
 77 |         self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=1, padding=1, bias=False)
 78 |         self.bn1 = nn.BatchNorm2d(num_planes)
 79 | 
 80 |         self.layer1 = self._make_layer(num_planes, num_cells=6)
 81 |         self.layer2 = self._downsample(num_planes*2)
 82 |         self.layer3 = self._make_layer(num_planes*2, num_cells=6)
 83 |         self.layer4 = self._downsample(num_planes*4)
 84 |         self.layer5 = self._make_layer(num_planes*4, num_cells=6)
 85 | 
 86 |         self.linear = nn.Linear(num_planes*4, 10)
 87 | 
 88 |     def _make_layer(self, planes, num_cells):
 89 |         layers = []
 90 |         for _ in range(num_cells):
 91 |             layers.append(self.cell_type(self.in_planes, planes, stride=1))
 92 |             self.in_planes = planes
 93 |         return nn.Sequential(*layers)
 94 | 
 95 |     def _downsample(self, planes):
 96 |         layer = self.cell_type(self.in_planes, planes, stride=2)
 97 |         self.in_planes = planes
 98 |         return layer
 99 | 
100 |     def forward(self, x):
101 |         out = F.relu(self.bn1(self.conv1(x)))
102 |         out = self.layer1(out)
103 |         out = self.layer2(out)
104 |         out = self.layer3(out)
105 |         out = self.layer4(out)
106 |         out = self.layer5(out)
107 |         out = F.avg_pool2d(out, 8)
108 |         out = self.linear(out.view(out.size(0), -1))
109 |         return out
110 | 
111 | 
112 | def PNASNetA():
113 |     return PNASNet(CellA, num_cells=6, num_planes=44)
114 | 
115 | def PNASNetB():
116 |     return PNASNet(CellB, num_cells=6, num_planes=32)
117 | 
118 | 
119 | def test():
120 |     net = PNASNetB()
121 |     x = torch.randn(1,3,32,32)
122 |     y = net(x)
123 |     print(y)
124 | 
125 | # test()
126 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/cifarmodels/preact_resnet.py:
--------------------------------------------------------------------------------
  1 | '''Pre-activation ResNet in PyTorch.
  2 | 
  3 | Reference:
  4 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
  5 |     Identity Mappings in Deep Residual Networks. arXiv:1603.05027
  6 | '''
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | 
 11 | 
 12 | class PreActBlock(nn.Module):
 13 |     '''Pre-activation version of the BasicBlock.'''
 14 |     expansion = 1
 15 | 
 16 |     def __init__(self, in_planes, planes, stride=1):
 17 |         super(PreActBlock, self).__init__()
 18 |         self.bn1 = nn.BatchNorm2d(in_planes)
 19 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 20 |         self.bn2 = nn.BatchNorm2d(planes)
 21 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 22 | 
 23 |         if stride != 1 or in_planes != self.expansion*planes:
 24 |             self.shortcut = nn.Sequential(
 25 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
 26 |             )
 27 | 
 28 |     def forward(self, x):
 29 |         out = F.relu(self.bn1(x))
 30 |         shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
 31 |         out = self.conv1(out)
 32 |         out = self.conv2(F.relu(self.bn2(out)))
 33 |         out += shortcut
 34 |         return out
 35 | 
 36 | 
 37 | class PreActBottleneck(nn.Module):
 38 |     '''Pre-activation version of the original Bottleneck module.'''
 39 |     expansion = 4
 40 | 
 41 |     def __init__(self, in_planes, planes, stride=1):
 42 |         super(PreActBottleneck, self).__init__()
 43 |         self.bn1 = nn.BatchNorm2d(in_planes)
 44 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 45 |         self.bn2 = nn.BatchNorm2d(planes)
 46 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 47 |         self.bn3 = nn.BatchNorm2d(planes)
 48 |         self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
 49 | 
 50 |         if stride != 1 or in_planes != self.expansion*planes:
 51 |             self.shortcut = nn.Sequential(
 52 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
 53 |             )
 54 | 
 55 |     def forward(self, x):
 56 |         out = F.relu(self.bn1(x))
 57 |         shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
 58 |         out = self.conv1(out)
 59 |         out = self.conv2(F.relu(self.bn2(out)))
 60 |         out = self.conv3(F.relu(self.bn3(out)))
 61 |         out += shortcut
 62 |         return out
 63 | 
 64 | 
 65 | class PreActResNet(nn.Module):
 66 |     def __init__(self, block, num_blocks, num_classes=10):
 67 |         super(PreActResNet, self).__init__()
 68 |         self.in_planes = 64
 69 | 
 70 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
 71 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
 72 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 73 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 74 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 75 |         self.linear = nn.Linear(512*block.expansion, num_classes)
 76 | 
 77 |     def _make_layer(self, block, planes, num_blocks, stride):
 78 |         strides = [stride] + [1]*(num_blocks-1)
 79 |         layers = []
 80 |         for stride in strides:
 81 |             layers.append(block(self.in_planes, planes, stride))
 82 |             self.in_planes = planes * block.expansion
 83 |         return nn.Sequential(*layers)
 84 | 
 85 |     def forward(self, x):
 86 |         out = self.conv1(x)
 87 |         out = self.layer1(out)
 88 |         out = self.layer2(out)
 89 |         out = self.layer3(out)
 90 |         out = self.layer4(out)
 91 |         out = F.avg_pool2d(out, 4)
 92 |         out = out.view(out.size(0), -1)
 93 |         out = self.linear(out)
 94 |         return out
 95 | 
 96 | 
 97 | def PreActResNet18(num_classes=10):
 98 |     return PreActResNet(PreActBlock, [2,2,2,2], num_classes=num_classes)
 99 | 
100 | def PreActResNet34(num_classes=10):
101 |     return PreActResNet(PreActBlock, [3,4,6,3], num_classes=num_classes)
102 | 
103 | def PreActResNet50(num_classes=10):
104 |     return PreActResNet(PreActBottleneck, [3,4,6,3], num_classes=num_classes)
105 | 
106 | def PreActResNet101(num_classes=10):
107 |     return PreActResNet(PreActBottleneck, [3,4,23,3], num_classes=num_classes)
108 | 
109 | def PreActResNet152(num_classes=10):
110 |     return PreActResNet(PreActBottleneck, [3,8,36,3], num_classes=num_classes)
111 | 
112 | 
113 | def test():
114 |     net = PreActResNet18()
115 |     y = net((torch.randn(1,3,32,32)))
116 |     print(y.size())
117 | 
118 | # test()
119 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/cifarmodels/preactresnet.py:
--------------------------------------------------------------------------------
  1 | """preactresnet in pytorch
  2 | 
  3 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
  4 | 
  5 |     Identity Mappings in Deep Residual Networks
  6 |     https://arxiv.org/abs/1603.05027
  7 | """
  8 | 
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | 
 13 | 
 14 | class PreActBasic(nn.Module):
 15 | 
 16 |     expansion = 1
 17 |     def __init__(self, in_channels, out_channels, stride):
 18 |         super().__init__()
 19 |         self.residual = nn.Sequential(
 20 |             nn.BatchNorm2d(in_channels),
 21 |             nn.ReLU(inplace=True),
 22 |             nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1),
 23 |             nn.BatchNorm2d(out_channels),
 24 |             nn.ReLU(inplace=True),
 25 |             nn.Conv2d(out_channels, out_channels * PreActBasic.expansion, kernel_size=3, padding=1)
 26 |         )
 27 | 
 28 |         self.shortcut = nn.Sequential()
 29 |         if stride != 1 or in_channels != out_channels * PreActBasic.expansion:
 30 |             self.shortcut = nn.Conv2d(in_channels, out_channels * PreActBasic.expansion, 1, stride=stride)
 31 | 
 32 |     def forward(self, x):
 33 | 
 34 |         res = self.residual(x)
 35 |         shortcut = self.shortcut(x)
 36 | 
 37 |         return res + shortcut
 38 | 
 39 | 
 40 | class PreActBottleNeck(nn.Module):
 41 | 
 42 |     expansion = 4
 43 |     def __init__(self, in_channels, out_channels, stride):
 44 |         super().__init__()
 45 | 
 46 |         self.residual = nn.Sequential(
 47 |             nn.BatchNorm2d(in_channels),
 48 |             nn.ReLU(inplace=True),
 49 |             nn.Conv2d(in_channels, out_channels, 1, stride=stride),
 50 | 
 51 |             nn.BatchNorm2d(out_channels),
 52 |             nn.ReLU(inplace=True),
 53 |             nn.Conv2d(out_channels, out_channels, 3, padding=1),
 54 | 
 55 |             nn.BatchNorm2d(out_channels),
 56 |             nn.ReLU(inplace=True),
 57 |             nn.Conv2d(out_channels, out_channels * PreActBottleNeck.expansion, 1)
 58 |         )
 59 | 
 60 |         self.shortcut = nn.Sequential()
 61 | 
 62 |         if stride != 1 or in_channels != out_channels * PreActBottleNeck.expansion:
 63 |             self.shortcut = nn.Conv2d(in_channels, out_channels * PreActBottleNeck.expansion, 1, stride=stride)
 64 | 
 65 |     def forward(self, x):
 66 | 
 67 |         res = self.residual(x)
 68 |         shortcut = self.shortcut(x)
 69 | 
 70 |         return res + shortcut
 71 | 
 72 | class PreActResNet(nn.Module):
 73 | 
 74 |     def __init__(self, block, num_block, num_classes=100):
 75 |         super().__init__()
 76 |         self.input_channels = 64
 77 | 
 78 |         self.pre = nn.Sequential(
 79 |             nn.Conv2d(3, 64, 3, padding=1),
 80 |             nn.BatchNorm2d(64),
 81 |             nn.ReLU(inplace=True)
 82 |         )
 83 | 
 84 |         self.stage1 = self._make_layers(block, num_block[0], 64,  1)
 85 |         self.stage2 = self._make_layers(block, num_block[1], 128, 2)
 86 |         self.stage3 = self._make_layers(block, num_block[2], 256, 2)
 87 |         self.stage4 = self._make_layers(block, num_block[3], 512, 2)
 88 | 
 89 |         self.linear = nn.Linear(self.input_channels, num_classes)
 90 | 
 91 |     def _make_layers(self, block, block_num, out_channels, stride):
 92 |         layers = []
 93 | 
 94 |         layers.append(block(self.input_channels, out_channels, stride))
 95 |         self.input_channels = out_channels * block.expansion
 96 | 
 97 |         while block_num - 1:
 98 |             layers.append(block(self.input_channels, out_channels, 1))
 99 |             self.input_channels = out_channels * block.expansion
100 |             block_num -= 1
101 | 
102 |         return nn.Sequential(*layers)
103 | 
104 |     def forward(self, x):
105 |         x = self.pre(x)
106 | 
107 |         x = self.stage1(x)
108 |         x = self.stage2(x)
109 |         x = self.stage3(x)
110 |         x = self.stage4(x)
111 | 
112 |         x = F.adaptive_avg_pool2d(x, 1)
113 |         x = x.view(x.size(0), -1)
114 |         x = self.linear(x)
115 | 
116 |         return x
117 | 
118 | def preactresnet18(num_classes=100):
119 |     return PreActResNet(PreActBasic, [2, 2, 2, 2], num_classes=num_classes)
120 | 
121 | def preactresnet34(num_classes=100):
122 |     return PreActResNet(PreActBasic, [3, 4, 6, 3], num_classes=num_classes)
123 | 
124 | def preactresnet50(num_classes=100):
125 |     return PreActResNet(PreActBottleNeck, [3, 4, 6, 3], num_classes=num_classes)
126 | 
127 | def preactresnet101(num_classes=100):
128 |     return PreActResNet(PreActBottleNeck, [3, 4, 23, 3], num_classes=num_classes)
129 | 
130 | def preactresnet152(num_classes=100):
131 |     return PreActResNet(PreActBottleNeck, [3, 8, 36, 3], num_classes=num_classes)
132 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/cifarmodels/resnet.py:
--------------------------------------------------------------------------------
  1 | '''ResNet in PyTorch.
  2 | 
  3 | For Pre-activation ResNet, see 'preact_resnet.py'.
  4 | 
  5 | Reference:
  6 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
  7 |     Deep Residual Learning for Image Recognition. arXiv:1512.03385
  8 | '''
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | 
 13 | 
 14 | class BasicBlock(nn.Module):
 15 |     expansion = 1
 16 | 
 17 |     def __init__(self, in_planes, planes, stride=1):
 18 |         super(BasicBlock, self).__init__()
 19 |         self.conv1 = nn.Conv2d(
 20 |             in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 21 |         self.bn1 = nn.BatchNorm2d(planes)
 22 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
 23 |                                stride=1, padding=1, bias=False)
 24 |         self.bn2 = nn.BatchNorm2d(planes)
 25 | 
 26 |         self.shortcut = nn.Sequential()
 27 |         if stride != 1 or in_planes != self.expansion*planes:
 28 |             self.shortcut = nn.Sequential(
 29 |                 nn.Conv2d(in_planes, self.expansion*planes,
 30 |                           kernel_size=1, stride=stride, bias=False),
 31 |                 nn.BatchNorm2d(self.expansion*planes)
 32 |             )
 33 | 
 34 |     def forward(self, x):
 35 |         out = F.relu(self.bn1(self.conv1(x)))
 36 |         out = self.bn2(self.conv2(out))
 37 |         out += self.shortcut(x)
 38 |         out = F.relu(out)
 39 |         return out
 40 | 
 41 | 
 42 | class Bottleneck(nn.Module):
 43 |     expansion = 4
 44 | 
 45 |     def __init__(self, in_planes, planes, stride=1):
 46 |         super(Bottleneck, self).__init__()
 47 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 48 |         self.bn1 = nn.BatchNorm2d(planes)
 49 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
 50 |                                stride=stride, padding=1, bias=False)
 51 |         self.bn2 = nn.BatchNorm2d(planes)
 52 |         self.conv3 = nn.Conv2d(planes, self.expansion *
 53 |                                planes, kernel_size=1, bias=False)
 54 |         self.bn3 = nn.BatchNorm2d(self.expansion*planes)
 55 | 
 56 |         self.shortcut = nn.Sequential()
 57 |         if stride != 1 or in_planes != self.expansion*planes:
 58 |             self.shortcut = nn.Sequential(
 59 |                 nn.Conv2d(in_planes, self.expansion*planes,
 60 |                           kernel_size=1, stride=stride, bias=False),
 61 |                 nn.BatchNorm2d(self.expansion*planes)
 62 |             )
 63 | 
 64 |     def forward(self, x):
 65 |         out = F.relu(self.bn1(self.conv1(x)))
 66 |         out = F.relu(self.bn2(self.conv2(out)))
 67 |         out = self.bn3(self.conv3(out))
 68 |         out += self.shortcut(x)
 69 |         out = F.relu(out)
 70 |         return out
 71 | 
 72 | 
 73 | class ResNet(nn.Module):
 74 |     def __init__(self, block, num_blocks, num_classes=10):
 75 |         super(ResNet, self).__init__()
 76 |         self.in_planes = 64
 77 | 
 78 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
 79 |                                stride=1, padding=1, bias=False)
 80 |         self.bn1 = nn.BatchNorm2d(64)
 81 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
 82 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 83 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 84 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 85 |         self.linear = nn.Linear(512*block.expansion, num_classes)
 86 | 
 87 |     def _make_layer(self, block, planes, num_blocks, stride):
 88 |         strides = [stride] + [1]*(num_blocks-1)
 89 |         layers = []
 90 |         for stride in strides:
 91 |             layers.append(block(self.in_planes, planes, stride))
 92 |             self.in_planes = planes * block.expansion
 93 |         return nn.Sequential(*layers)
 94 | 
 95 |     def forward(self, x):
 96 |         out = F.relu(self.bn1(self.conv1(x)))
 97 |         out = self.layer1(out)
 98 |         out = self.layer2(out)
 99 |         out = self.layer3(out)
100 |         out = self.layer4(out)
101 |         out = F.avg_pool2d(out, 4)
102 |         out = out.view(out.size(0), -1)
103 |         out = self.linear(out)
104 |         return out
105 | 
106 | 
107 | def ResNet18(num_classes=10):
108 |     return ResNet(BasicBlock, [2, 2, 2, 2], num_classes=num_classes)
109 | 
110 | 
111 | def ResNet34(num_classes=10):
112 |     return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes)
113 | 
114 | 
115 | def ResNet50(num_classes=10):
116 |     return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes)
117 | 
118 | 
119 | def ResNet101(num_classes=10):
120 |     return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes)
121 | 
122 | 
123 | def ResNet152(num_classes=10):
124 |     return ResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes)
125 | 
126 | 
127 | def test():
128 |     net = ResNet18()
129 |     y = net(torch.randn(1, 3, 32, 32))
130 |     print(y.size())
131 | 
132 | # test()
133 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/cifarmodels/resnext.py:
--------------------------------------------------------------------------------
 1 | '''ResNeXt in PyTorch.
 2 | 
 3 | See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details.
 4 | '''
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | 
 9 | 
10 | class Block(nn.Module):
11 |     '''Grouped convolution block.'''
12 |     expansion = 2
13 | 
14 |     def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
15 |         super(Block, self).__init__()
16 |         group_width = cardinality * bottleneck_width
17 |         self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
18 |         self.bn1 = nn.BatchNorm2d(group_width)
19 |         self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
20 |         self.bn2 = nn.BatchNorm2d(group_width)
21 |         self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False)
22 |         self.bn3 = nn.BatchNorm2d(self.expansion*group_width)
23 | 
24 |         self.shortcut = nn.Sequential()
25 |         if stride != 1 or in_planes != self.expansion*group_width:
26 |             self.shortcut = nn.Sequential(
27 |                 nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False),
28 |                 nn.BatchNorm2d(self.expansion*group_width)
29 |             )
30 | 
31 |     def forward(self, x):
32 |         out = F.relu(self.bn1(self.conv1(x)))
33 |         out = F.relu(self.bn2(self.conv2(out)))
34 |         out = self.bn3(self.conv3(out))
35 |         out += self.shortcut(x)
36 |         out = F.relu(out)
37 |         return out
38 | 
39 | 
40 | class ResNeXt(nn.Module):
41 |     def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
42 |         super(ResNeXt, self).__init__()
43 |         self.cardinality = cardinality
44 |         self.bottleneck_width = bottleneck_width
45 |         self.in_planes = 64
46 | 
47 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
48 |         self.bn1 = nn.BatchNorm2d(64)
49 |         self.layer1 = self._make_layer(num_blocks[0], 1)
50 |         self.layer2 = self._make_layer(num_blocks[1], 2)
51 |         self.layer3 = self._make_layer(num_blocks[2], 2)
52 |         # self.layer4 = self._make_layer(num_blocks[3], 2)
53 |         self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes)
54 | 
55 |     def _make_layer(self, num_blocks, stride):
56 |         strides = [stride] + [1]*(num_blocks-1)
57 |         layers = []
58 |         for stride in strides:
59 |             layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride))
60 |             self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
61 |         # Increase bottleneck_width by 2 after each stage.
62 |         self.bottleneck_width *= 2
63 |         return nn.Sequential(*layers)
64 | 
65 |     def forward(self, x):
66 |         out = F.relu(self.bn1(self.conv1(x)))
67 |         out = self.layer1(out)
68 |         out = self.layer2(out)
69 |         out = self.layer3(out)
70 |         # out = self.layer4(out)
71 |         out = F.avg_pool2d(out, 8)
72 |         out = out.view(out.size(0), -1)
73 |         out = self.linear(out)
74 |         return out
75 | 
76 | 
77 | def ResNeXt29_2x64d(num_classes=10):
78 |     return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64, num_classes=num_classes)
79 | 
80 | def ResNeXt29_4x64d(num_classes=10):
81 |     return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64, num_classes=num_classes)
82 | 
83 | def ResNeXt29_8x64d(num_classes=10):
84 |     return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64, num_classes=num_classes)
85 | 
86 | def ResNeXt29_32x4d(num_classes=10):
87 |     return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4, num_classes=num_classes)
88 | 
89 | def test_resnext():
90 |     net = ResNeXt29_2x64d()
91 |     x = torch.randn(1,3,32,32)
92 |     y = net(x)
93 |     print(y.size())
94 | 
95 | # test_resnext()
96 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/cifarmodels/shufflenet.py:
--------------------------------------------------------------------------------
  1 | '''ShuffleNet in PyTorch.
  2 | 
  3 | See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details.
  4 | '''
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | 
 10 | class ShuffleBlock(nn.Module):
 11 |     def __init__(self, groups):
 12 |         super(ShuffleBlock, self).__init__()
 13 |         self.groups = groups
 14 | 
 15 |     def forward(self, x):
 16 |         '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
 17 |         N,C,H,W = x.size()
 18 |         g = self.groups
 19 |         return x.view(N,g,C//g,H,W).permute(0,2,1,3,4).reshape(N,C,H,W)
 20 | 
 21 | 
 22 | class Bottleneck(nn.Module):
 23 |     def __init__(self, in_planes, out_planes, stride, groups):
 24 |         super(Bottleneck, self).__init__()
 25 |         self.stride = stride
 26 | 
 27 |         mid_planes = int(out_planes/4)
 28 |         g = 1 if in_planes==24 else groups
 29 | 
 30 |         self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False)
 31 |         self.bn1 = nn.BatchNorm2d(mid_planes)
 32 |         self.shuffle1 = ShuffleBlock(groups=g)
 33 |         self.conv2 = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False)
 34 |         self.bn2 = nn.BatchNorm2d(mid_planes)
 35 |         self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False)
 36 |         self.bn3 = nn.BatchNorm2d(out_planes)
 37 | 
 38 |         self.shortcut = nn.Sequential()
 39 |         if stride == 2:
 40 |             self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))
 41 | 
 42 |     def forward(self, x):
 43 |         out = F.relu(self.bn1(self.conv1(x)))
 44 |         out = self.shuffle1(out)
 45 |         out = F.relu(self.bn2(self.conv2(out)))
 46 |         out = self.bn3(self.conv3(out))
 47 |         res = self.shortcut(x)
 48 |         out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res)
 49 |         return out
 50 | 
 51 | 
 52 | class ShuffleNet(nn.Module):
 53 |     def __init__(self, cfg, num_classes=10):
 54 |         super(ShuffleNet, self).__init__()
 55 |         out_planes = cfg['out_planes']
 56 |         num_blocks = cfg['num_blocks']
 57 |         groups = cfg['groups']
 58 | 
 59 |         self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False)
 60 |         self.bn1 = nn.BatchNorm2d(24)
 61 |         self.in_planes = 24
 62 |         self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups)
 63 |         self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups)
 64 |         self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups)
 65 |         self.linear = nn.Linear(out_planes[2], num_classes)
 66 | 
 67 |     def _make_layer(self, out_planes, num_blocks, groups):
 68 |         layers = []
 69 |         for i in range(num_blocks):
 70 |             stride = 2 if i == 0 else 1
 71 |             cat_planes = self.in_planes if i == 0 else 0
 72 |             layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups))
 73 |             self.in_planes = out_planes
 74 |         return nn.Sequential(*layers)
 75 | 
 76 |     def forward(self, x):
 77 |         out = F.relu(self.bn1(self.conv1(x)))
 78 |         out = self.layer1(out)
 79 |         out = self.layer2(out)
 80 |         out = self.layer3(out)
 81 |         out = F.avg_pool2d(out, 4)
 82 |         out = out.view(out.size(0), -1)
 83 |         out = self.linear(out)
 84 |         return out
 85 | 
 86 | 
 87 | def ShuffleNetG2(num_classes=10):
 88 |     cfg = {
 89 |         'out_planes': [200,400,800],
 90 |         'num_blocks': [4,8,4],
 91 |         'groups': 2
 92 |     }
 93 |     return ShuffleNet(cfg, num_classes=num_classes)
 94 | 
 95 | def ShuffleNetG3(num_classes=10):
 96 |     cfg = {
 97 |         'out_planes': [240,480,960],
 98 |         'num_blocks': [4,8,4],
 99 |         'groups': 3
100 |     }
101 |     return ShuffleNet(cfg, num_classes=num_classes)
102 | 
103 | 
104 | def test():
105 |     net = ShuffleNetG2()
106 |     x = torch.randn(1,3,32,32)
107 |     y = net(x)
108 |     print(y)
109 | 
110 | # test()
111 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/cifarmodels/vgg.py:
--------------------------------------------------------------------------------
 1 | """vgg in pytorch
 2 | 
 3 | 
 4 | [1] Karen Simonyan, Andrew Zisserman
 5 | 
 6 |     Very Deep Convolutional Networks for Large-Scale Image Recognition.
 7 |     https://arxiv.org/abs/1409.1556v6
 8 | """
 9 | '''VGG11/13/16/19 in Pytorch.'''
10 | 
11 | import torch
12 | import torch.nn as nn
13 | 
14 | vggcfg = {
15 |     'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
16 |     'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
17 |     'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
18 |     'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
19 | }
20 | 
21 | class VGG(nn.Module):
22 | 
23 |     def __init__(self, vgg_block, use_bn=False, num_classes=10):
24 |         super(VGG, self).__init__()
25 |         self.use_bn = use_bn
26 |         self.features = self._make_layers(vggcfg[f"VGG{vgg_block}"])
27 |         self.classifier = nn.Linear(512, num_classes)
28 | 
29 |     def forward(self, x):
30 |         out = self.features(x)
31 |         out = out.view(out.size(0), -1)
32 |         out = self.classifier(out)
33 |         return out
34 | 
35 |     def _make_layers(self, cfg):
36 |         layers = []
37 |         in_channels = 3
38 |         for x in cfg:
39 |             if x == 'M':
40 |                 layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
41 |             else:
42 |                 layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1)]
43 |                 if self.use_bn:
44 |                     layers += [nn.BatchNorm2d(x)]
45 |                 layers += [nn.ReLU(inplace=True)]
46 |                 in_channels = x
47 |         layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
48 |         return nn.Sequential(*layers)
49 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/nasbench/cell_infers/__init__.py:
--------------------------------------------------------------------------------
1 | #####################################################
2 | # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 #
3 | #####################################################
4 | from .nasnet_cifar import NASNetonCIFAR
5 | from .tiny_network import TinyNetwork
6 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/nasbench/cell_infers/cells.py:
--------------------------------------------------------------------------------
  1 | #####################################################
  2 | # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 #
  3 | #####################################################
  4 | 
  5 | from copy import deepcopy
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | from models.cell_operations import OPS
 10 | 
 11 | 
 12 | # Cell for NAS-Bench-201
 13 | class InferCell(nn.Module):
 14 | 
 15 |   def __init__(self, genotype, C_in, C_out, stride, affine=True, track_running_stats=True):
 16 |     super(InferCell, self).__init__()
 17 | 
 18 |     self.layers  = nn.ModuleList()
 19 |     self.node_IN = []
 20 |     self.node_IX = []
 21 |     self.genotype = deepcopy(genotype)
 22 |     for i in range(1, len(genotype)):
 23 |       node_info = genotype[i-1]
 24 |       cur_index = []
 25 |       cur_innod = []
 26 |       for (op_name, op_in) in node_info:
 27 |         if op_in == 0:
 28 |           layer = OPS[op_name](C_in , C_out, stride, affine, track_running_stats)
 29 |         else:
 30 |           layer = OPS[op_name](C_out, C_out,      1, affine, track_running_stats)
 31 |         cur_index.append( len(self.layers) )
 32 |         cur_innod.append( op_in )
 33 |         self.layers.append( layer )
 34 |       self.node_IX.append( cur_index )
 35 |       self.node_IN.append( cur_innod )
 36 |     self.nodes   = len(genotype)
 37 |     self.in_dim  = C_in
 38 |     self.out_dim = C_out
 39 | 
 40 |   def extra_repr(self):
 41 |     string = 'info :: nodes={nodes}, inC={in_dim}, outC={out_dim}'.format(**self.__dict__)
 42 |     laystr = []
 43 |     for i, (node_layers, node_innods) in enumerate(zip(self.node_IX,self.node_IN)):
 44 |       y = ['I{:}-L{:}'.format(_ii, _il) for _il, _ii in zip(node_layers, node_innods)]
 45 |       x = '{:}<-({:})'.format(i+1, ','.join(y))
 46 |       laystr.append( x )
 47 |     return string + ', [{:}]'.format( ' | '.join(laystr) ) + ', {:}'.format(self.genotype.tostr())
 48 | 
 49 |   def forward(self, inputs):
 50 |     nodes = [inputs]
 51 |     for i, (node_layers, node_innods) in enumerate(zip(self.node_IX,self.node_IN)):
 52 |       node_feature = sum( self.layers[_il](nodes[_ii]) for _il, _ii in zip(node_layers, node_innods) )
 53 |       nodes.append( node_feature )
 54 |     return nodes[-1]
 55 | 
 56 | 
 57 | 
 58 | # Learning Transferable Architectures for Scalable Image Recognition, CVPR 2018
 59 | class NASNetInferCell(nn.Module):
 60 | 
 61 |   def __init__(self, genotype, C_prev_prev, C_prev, C, reduction, reduction_prev, affine, track_running_stats):
 62 |     super(NASNetInferCell, self).__init__()
 63 |     self.reduction = reduction
 64 |     if reduction_prev: self.preprocess0 = OPS['skip_connect'](C_prev_prev, C, 2, affine, track_running_stats)
 65 |     else             : self.preprocess0 = OPS['nor_conv_1x1'](C_prev_prev, C, 1, affine, track_running_stats)
 66 |     self.preprocess1 = OPS['nor_conv_1x1'](C_prev, C, 1, affine, track_running_stats)
 67 | 
 68 |     if not reduction:
 69 |       nodes, concats = genotype['normal'], genotype['normal_concat']
 70 |     else:
 71 |       nodes, concats = genotype['reduce'], genotype['reduce_concat']
 72 |     self._multiplier = len(concats)
 73 |     self._concats = concats
 74 |     self._steps = len(nodes)
 75 |     self._nodes = nodes
 76 |     self.edges = nn.ModuleDict()
 77 |     for i, node in enumerate(nodes):
 78 |       for in_node in node:
 79 |         name, j = in_node[0], in_node[1]
 80 |         stride = 2 if reduction and j < 2 else 1
 81 |         node_str = '{:}<-{:}'.format(i+2, j)
 82 |         self.edges[node_str] = OPS[name](C, C, stride, affine, track_running_stats)
 83 | 
 84 |   # [TODO] to support drop_prob in this function..
 85 |   def forward(self, s0, s1, unused_drop_prob):
 86 |     s0 = self.preprocess0(s0)
 87 |     s1 = self.preprocess1(s1)
 88 | 
 89 |     states = [s0, s1]
 90 |     for i, node in enumerate(self._nodes):
 91 |       clist = []
 92 |       for in_node in node:
 93 |         name, j = in_node[0], in_node[1]
 94 |         node_str = '{:}<-{:}'.format(i+2, j)
 95 |         op = self.edges[ node_str ]
 96 |         clist.append( op(states[j]) )
 97 |       states.append( sum(clist) )
 98 |     return torch.cat([states[x] for x in self._concats], dim=1)
 99 | 
100 | 
101 | class AuxiliaryHeadCIFAR(nn.Module):
102 | 
103 |   def __init__(self, C, num_classes):
104 |     """assuming input size 8x8"""
105 |     super(AuxiliaryHeadCIFAR, self).__init__()
106 |     self.features = nn.Sequential(
107 |       nn.ReLU(inplace=True),
108 |       nn.AvgPool2d(5, stride=3, padding=0, count_include_pad=False), # image size = 2 x 2
109 |       nn.Conv2d(C, 128, 1, bias=False),
110 |       nn.BatchNorm2d(128),
111 |       nn.ReLU(inplace=True),
112 |       nn.Conv2d(128, 768, 2, bias=False),
113 |       nn.BatchNorm2d(768),
114 |       nn.ReLU(inplace=True)
115 |     )
116 |     self.classifier = nn.Linear(768, num_classes)
117 | 
118 |   def forward(self, x):
119 |     x = self.features(x)
120 |     x = self.classifier(x.view(x.size(0),-1))
121 |     return x
122 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/nasbench/cell_infers/nasnet_cifar.py:
--------------------------------------------------------------------------------
 1 | #####################################################
 2 | # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 #
 3 | #####################################################
 4 | from copy import deepcopy
 5 | 
 6 | import torch
 7 | import torch.nn as nn
 8 | 
 9 | from .cells import AuxiliaryHeadCIFAR
10 | from .cells import NASNetInferCell as InferCell
11 | 
12 | 
13 | # The macro structure is based on NASNet
14 | class NASNetonCIFAR(nn.Module):
15 | 
16 |   def __init__(self, C, N, stem_multiplier, num_classes, genotype, auxiliary, affine=True, track_running_stats=True):
17 |     super(NASNetonCIFAR, self).__init__()
18 |     self._C        = C
19 |     self._layerN   = N
20 |     self.stem = nn.Sequential(
21 |                     nn.Conv2d(3, C*stem_multiplier, kernel_size=3, padding=1, bias=False),
22 |                     nn.BatchNorm2d(C*stem_multiplier))
23 |   
24 |     # config for each layer
25 |     layer_channels   = [C    ] * N + [C*2 ] + [C*2  ] * (N-1) + [C*4 ] + [C*4  ] * (N-1)
26 |     layer_reductions = [False] * N + [True] + [False] * (N-1) + [True] + [False] * (N-1)
27 | 
28 |     C_prev_prev, C_prev, C_curr, reduction_prev = C*stem_multiplier, C*stem_multiplier, C, False
29 |     self.auxiliary_index = None
30 |     self.auxiliary_head  = None
31 |     self.cells = nn.ModuleList()
32 |     for index, (C_curr, reduction) in enumerate(zip(layer_channels, layer_reductions)):
33 |       cell = InferCell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev, affine, track_running_stats)
34 |       self.cells.append( cell )
35 |       C_prev_prev, C_prev, reduction_prev = C_prev, cell._multiplier*C_curr, reduction
36 |       if reduction and C_curr == C*4 and auxiliary:
37 |         self.auxiliary_head = AuxiliaryHeadCIFAR(C_prev, num_classes)
38 |         self.auxiliary_index = index
39 |     self._Layer     = len(self.cells)
40 |     self.lastact    = nn.Sequential(nn.BatchNorm2d(C_prev), nn.ReLU(inplace=True))
41 |     self.global_pooling = nn.AdaptiveAvgPool2d(1)
42 |     self.classifier = nn.Linear(C_prev, num_classes)
43 |     self.drop_path_prob = -1
44 | 
45 |   def update_drop_path(self, drop_path_prob):
46 |     self.drop_path_prob = drop_path_prob
47 | 
48 |   def auxiliary_param(self):
49 |     if self.auxiliary_head is None: return []
50 |     else: return list( self.auxiliary_head.parameters() )
51 | 
52 |   def get_message(self):
53 |     string = self.extra_repr()
54 |     for i, cell in enumerate(self.cells):
55 |       string += '\n {:02d}/{:02d} :: {:}'.format(i, len(self.cells), cell.extra_repr())
56 |     return string
57 | 
58 |   def extra_repr(self):
59 |     return ('{name}(C={_C}, N={_layerN}, L={_Layer})'.format(name=self.__class__.__name__, **self.__dict__))
60 | 
61 |   def forward(self, inputs):
62 |     stem_feature, logits_aux = self.stem(inputs), None
63 |     cell_results = [stem_feature, stem_feature]
64 |     for i, cell in enumerate(self.cells):
65 |       cell_feature = cell(cell_results[-2], cell_results[-1], self.drop_path_prob)
66 |       cell_results.append( cell_feature )
67 |       if self.auxiliary_index is not None and i == self.auxiliary_index and self.training:
68 |         logits_aux = self.auxiliary_head( cell_results[-1] )
69 |     out = self.lastact(cell_results[-1])
70 |     out = self.global_pooling( out )
71 |     out = out.view(out.size(0), -1)
72 |     logits = self.classifier(out)
73 |     if logits_aux is None: return out, logits
74 |     else: return out, [logits, logits_aux]
75 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/nasbench/cell_infers/tiny_network.py:
--------------------------------------------------------------------------------
 1 | #####################################################
 2 | # Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019.01 #
 3 | #####################################################
 4 | import torch.nn as nn
 5 | 
 6 | from ..cell_operations import ResNetBasicblock
 7 | from .cells import InferCell
 8 | 
 9 | 
10 | # The macro structure for architectures in NAS-Bench-201
11 | class TinyNetwork(nn.Module):
12 | 
13 |   def __init__(self, C, N, genotype, num_classes):
14 |     super(TinyNetwork, self).__init__()
15 |     self._C               = C
16 |     self._layerN          = N
17 | 
18 |     self.stem = nn.Sequential(
19 |                     nn.Conv2d(3, C, kernel_size=3, padding=1, bias=False),
20 |                     nn.BatchNorm2d(C))
21 |   
22 |     layer_channels   = [C    ] * N + [C*2 ] + [C*2  ] * N + [C*4 ] + [C*4  ] * N    
23 |     layer_reductions = [False] * N + [True] + [False] * N + [True] + [False] * N
24 | 
25 |     C_prev = C
26 |     self.cells = nn.ModuleList()
27 |     for index, (C_curr, reduction) in enumerate(zip(layer_channels, layer_reductions)):
28 |       if reduction:
29 |         cell = ResNetBasicblock(C_prev, C_curr, 2, True)
30 |       else:
31 |         cell = InferCell(genotype, C_prev, C_curr, 1)
32 |       self.cells.append( cell )
33 |       C_prev = cell.out_dim
34 |     self._Layer= len(self.cells)
35 | 
36 |     self.lastact = nn.Sequential(nn.BatchNorm2d(C_prev), nn.ReLU(inplace=True))
37 |     self.global_pooling = nn.AdaptiveAvgPool2d(1)
38 |     self.classifier = nn.Linear(C_prev, num_classes)
39 | 
40 |   def get_message(self):
41 |     string = self.extra_repr()
42 |     for i, cell in enumerate(self.cells):
43 |       string += '\n {:02d}/{:02d} :: {:}'.format(i, len(self.cells), cell.extra_repr())
44 |     return string
45 | 
46 |   def extra_repr(self):
47 |     return ('{name}(C={_C}, N={_layerN}, L={_Layer})'.format(name=self.__class__.__name__, **self.__dict__))
48 | 
49 |   def forward(self, inputs):
50 |     feature = self.stem(inputs)
51 |     for i, cell in enumerate(self.cells):
52 |       feature = cell(feature)
53 | 
54 |     out = self.lastact(feature)
55 |     out = self.global_pooling( out )
56 |     out = out.view(out.size(0), -1)
57 |     logits = self.classifier(out)
58 | 
59 |     return out, logits
60 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/nasbench/configure_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | #
  7 | import json
  8 | import os
  9 | from collections import namedtuple
 10 | from os import path as osp
 11 | from pathlib import Path
 12 | 
 13 | support_types = ('str', 'int', 'bool', 'float', 'none')
 14 | 
 15 | 
 16 | def convert_param(original_lists):
 17 |   assert isinstance(original_lists, list), 'The type is not right : {:}'.format(original_lists)
 18 |   ctype, value = original_lists[0], original_lists[1]
 19 |   assert ctype in support_types, 'Ctype={:}, support={:}'.format(ctype, support_types)
 20 |   is_list = isinstance(value, list)
 21 |   if not is_list: value = [value]
 22 |   outs = []
 23 |   for x in value:
 24 |     if ctype == 'int':
 25 |       x = int(x)
 26 |     elif ctype == 'str':
 27 |       x = str(x)
 28 |     elif ctype == 'bool':
 29 |       x = bool(int(x))
 30 |     elif ctype == 'float':
 31 |       x = float(x)
 32 |     elif ctype == 'none':
 33 |       if x.lower() != 'none':
 34 |         raise ValueError('For the none type, the value must be none instead of {:}'.format(x))
 35 |       x = None
 36 |     else:
 37 |       raise TypeError('Does not know this type : {:}'.format(ctype))
 38 |     outs.append(x)
 39 |   if not is_list: outs = outs[0]
 40 |   return outs
 41 | 
 42 | 
 43 | def load_config(path, extra, logger):
 44 |   path = str(path)
 45 |   if hasattr(logger, 'log'): logger.log(path)
 46 |   assert os.path.exists(path), 'Can not find {:}'.format(path)
 47 |   # Reading data back
 48 |   with open(path, 'r') as f:
 49 |     data = json.load(f)
 50 |   content = { k: convert_param(v) for k,v in data.items()}
 51 |   assert extra is None or isinstance(extra, dict), 'invalid type of extra : {:}'.format(extra)
 52 |   if isinstance(extra, dict): content = {**content, **extra}
 53 |   Arguments = namedtuple('Configure', ' '.join(content.keys()))
 54 |   content   = Arguments(**content)
 55 |   if hasattr(logger, 'log'): logger.log('{:}'.format(content))
 56 |   return content
 57 | 
 58 | 
 59 | def configure2str(config, xpath=None):
 60 |   if not isinstance(config, dict):
 61 |     config = config._asdict()
 62 |   def cstring(x):
 63 |     return "\"{:}\"".format(x)
 64 |   def gtype(x):
 65 |     if isinstance(x, list): x = x[0]
 66 |     if isinstance(x, str)  : return 'str'
 67 |     elif isinstance(x, bool) : return 'bool'
 68 |     elif isinstance(x, int): return 'int'
 69 |     elif isinstance(x, float): return 'float'
 70 |     elif x is None           : return 'none'
 71 |     else: raise ValueError('invalid : {:}'.format(x))
 72 |   def cvalue(x, xtype):
 73 |     if isinstance(x, list): is_list = True
 74 |     else:
 75 |       is_list, x = False, [x]
 76 |     temps = []
 77 |     for temp in x:
 78 |       if xtype == 'bool'  : temp = cstring(int(temp))
 79 |       elif xtype == 'none': temp = cstring('None')
 80 |       else                : temp = cstring(temp)
 81 |       temps.append( temp )
 82 |     if is_list:
 83 |       return "[{:}]".format( ', '.join( temps ) )
 84 |     else:
 85 |       return temps[0]
 86 | 
 87 |   xstrings = []
 88 |   for key, value in config.items():
 89 |     xtype  = gtype(value)
 90 |     string = '  {:20s} : [{:8s}, {:}]'.format(cstring(key), cstring(xtype), cvalue(value, xtype))
 91 |     xstrings.append(string)
 92 |   Fstring = '{\n' + ',\n'.join(xstrings) + '\n}'
 93 |   if xpath is not None:
 94 |     parent = Path(xpath).resolve().parent
 95 |     parent.mkdir(parents=True, exist_ok=True)
 96 |     if osp.isfile(xpath): os.remove(xpath)
 97 |     with open(xpath, "w") as text_file:
 98 |       text_file.write('{:}'.format(Fstring))
 99 |   return Fstring
100 | 
101 | 
102 | def dict2config(xdict, logger):
103 |   assert isinstance(xdict, dict), 'invalid type : {:}'.format( type(xdict) )
104 |   Arguments = namedtuple('Configure', ' '.join(xdict.keys()))
105 |   content   = Arguments(**xdict)
106 |   if hasattr(logger, 'log'): logger.log('{:}'.format(content))
107 |   return content
108 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/torchcv/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SymbioticLab/ModelKeeper/9212bc79bfc4a271e6120c410bb9fb89cb151486/evals/ray_tune/models/torchcv/__init__.py


--------------------------------------------------------------------------------
/evals/ray_tune/models/torchcv/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SymbioticLab/ModelKeeper/9212bc79bfc4a271e6120c410bb9fb89cb151486/evals/ray_tune/models/torchcv/models/__init__.py


--------------------------------------------------------------------------------
/evals/ray_tune/models/torchcv/models/jasperdr.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     Jasper DR (Dense Residual) for ASR, implemented in PyTorch.
 3 |     Original paper: 'Jasper: An End-to-End Convolutional Neural Acoustic Model,' https://arxiv.org/abs/1904.03288.
 4 | """
 5 | 
 6 | __all__ = ['jasperdr10x5_en', 'jasperdr10x5_en_nr']
 7 | 
 8 | from .jasper import get_jasper
 9 | 
10 | 
11 | def jasperdr10x5_en(num_classes=29, **kwargs):
12 |     """
13 |     Jasper DR 10x5 model for English language from 'Jasper: An End-to-End Convolutional Neural Acoustic Model,'
14 |     https://arxiv.org/abs/1904.03288.
15 | 
16 |     Parameters:
17 |     ----------
18 |     num_classes : int, default 29
19 |         Number of classification classes (number of graphemes).
20 |     pretrained : bool, default False
21 |         Whether to load the pretrained weights for model.
22 |     root : str, default '~/.torch/models'
23 |         Location for keeping the model parameters.
24 |     """
25 |     return get_jasper(num_classes=num_classes, version=("jasper", "10x5"), use_dr=True, model_name="jasperdr10x5_en",
26 |                       **kwargs)
27 | 
28 | 
29 | def jasperdr10x5_en_nr(num_classes=29, **kwargs):
30 |     """
31 |     Jasper DR 10x5 model for English language (with presence of noise) from 'Jasper: An End-to-End Convolutional Neural
32 |     Acoustic Model,' https://arxiv.org/abs/1904.03288.
33 | 
34 |     Parameters:
35 |     ----------
36 |     num_classes : int, default 29
37 |         Number of classification classes (number of graphemes).
38 |     pretrained : bool, default False
39 |         Whether to load the pretrained weights for model.
40 |     root : str, default '~/.torch/models'
41 |         Location for keeping the model parameters.
42 |     """
43 |     return get_jasper(num_classes=num_classes, version=("jasper", "10x5"), use_dr=True, model_name="jasperdr10x5_en_nr",
44 |                       **kwargs)
45 | 
46 | 
47 | def _calc_width(net):
48 |     import numpy as np
49 |     net_params = filter(lambda p: p.requires_grad, net.parameters())
50 |     weight_count = 0
51 |     for param in net_params:
52 |         weight_count += np.prod(param.size())
53 |     return weight_count
54 | 
55 | 
56 | def _test():
57 |     import numpy as np
58 |     import torch
59 | 
60 |     pretrained = False
61 |     audio_features = 64
62 | 
63 |     models = [
64 |         jasperdr10x5_en,
65 |         jasperdr10x5_en_nr,
66 |     ]
67 | 
68 |     for model in models:
69 | 
70 |         net = model(
71 |             in_channels=audio_features,
72 |             pretrained=pretrained)
73 | 
74 |         # net.train()
75 |         net.eval()
76 |         weight_count = _calc_width(net)
77 |         print("m={}, {}".format(model.__name__, weight_count))
78 |         assert (model != jasperdr10x5_en or weight_count == 332632349)
79 |         assert (model != jasperdr10x5_en_nr or weight_count == 332632349)
80 | 
81 |         batch = 3
82 |         seq_len = np.random.randint(60, 150, batch)
83 |         seq_len_max = seq_len.max() + 2
84 |         x = torch.randn(batch, audio_features, seq_len_max)
85 |         x_len = torch.tensor(seq_len, dtype=torch.long, device=x.device)
86 | 
87 |         y, y_len = net(x, x_len)
88 |         # y.sum().backward()
89 |         assert (tuple(y.size())[:2] == (batch, net.num_classes))
90 |         assert (y.size()[2] in [seq_len_max // 2, seq_len_max // 2 + 1])
91 | 
92 | 
93 | if __name__ == "__main__":
94 |     _test()
95 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/torchcv/models/mobilenetb.py:
--------------------------------------------------------------------------------
  1 | """
  2 |     MobileNet(B) with simplified depthwise separable convolution block for ImageNet-1K, implemented in Gluon.
  3 |     Original paper: 'MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications,'
  4 |     https://arxiv.org/abs/1704.04861.
  5 | """
  6 | 
  7 | __all__ = ['mobilenetb_w1', 'mobilenetb_w3d4', 'mobilenetb_wd2', 'mobilenetb_wd4']
  8 | 
  9 | from .mobilenet import get_mobilenet
 10 | 
 11 | 
 12 | def mobilenetb_w1(**kwargs):
 13 |     """
 14 |     1.0 MobileNet(B)-224 model with simplified depthwise separable convolution block from 'MobileNets: Efficient
 15 |     Convolutional Neural Networks for Mobile Vision Applications,' https://arxiv.org/abs/1704.04861.
 16 | 
 17 |     Parameters:
 18 |     ----------
 19 |     pretrained : bool, default False
 20 |         Whether to load the pretrained weights for model.
 21 |     root : str, default '~/.torch/models'
 22 |         Location for keeping the model parameters.
 23 |     """
 24 |     return get_mobilenet(width_scale=1.0, dws_simplified=True, model_name="mobilenetb_w1", **kwargs)
 25 | 
 26 | 
 27 | def mobilenetb_w3d4(**kwargs):
 28 |     """
 29 |     0.75 MobileNet(B)-224 model with simplified depthwise separable convolution block from 'MobileNets: Efficient
 30 |     Convolutional Neural Networks for Mobile Vision Applications,' https://arxiv.org/abs/1704.04861.
 31 | 
 32 |     Parameters:
 33 |     ----------
 34 |     pretrained : bool, default False
 35 |         Whether to load the pretrained weights for model.
 36 |     root : str, default '~/.torch/models'
 37 |         Location for keeping the model parameters.
 38 |     """
 39 |     return get_mobilenet(width_scale=0.75, dws_simplified=True, model_name="mobilenetb_w3d4", **kwargs)
 40 | 
 41 | 
 42 | def mobilenetb_wd2(**kwargs):
 43 |     """
 44 |     0.5 MobileNet(B)-224 model with simplified depthwise separable convolution block from 'MobileNets: Efficient
 45 |     Convolutional Neural Networks for Mobile Vision Applications,' https://arxiv.org/abs/1704.04861.
 46 | 
 47 |     Parameters:
 48 |     ----------
 49 |     pretrained : bool, default False
 50 |         Whether to load the pretrained weights for model.
 51 |     root : str, default '~/.torch/models'
 52 |         Location for keeping the model parameters.
 53 |     """
 54 |     return get_mobilenet(width_scale=0.5, dws_simplified=True, model_name="mobilenetb_wd2", **kwargs)
 55 | 
 56 | 
 57 | def mobilenetb_wd4(**kwargs):
 58 |     """
 59 |     0.25 MobileNet(B)-224 model with simplified depthwise separable convolution block from 'MobileNets: Efficient
 60 |     Convolutional Neural Networks for Mobile Vision Applications,' https://arxiv.org/abs/1704.04861.
 61 | 
 62 |     Parameters:
 63 |     ----------
 64 |     pretrained : bool, default False
 65 |         Whether to load the pretrained weights for model.
 66 |     root : str, default '~/.torch/models'
 67 |         Location for keeping the model parameters.
 68 |     """
 69 |     return get_mobilenet(width_scale=0.25, dws_simplified=True, model_name="mobilenetb_wd4", **kwargs)
 70 | 
 71 | 
 72 | def _calc_width(net):
 73 |     import numpy as np
 74 |     net_params = filter(lambda p: p.requires_grad, net.parameters())
 75 |     weight_count = 0
 76 |     for param in net_params:
 77 |         weight_count += np.prod(param.size())
 78 |     return weight_count
 79 | 
 80 | 
 81 | def _test():
 82 |     import torch
 83 | 
 84 |     pretrained = False
 85 | 
 86 |     models = [
 87 |         mobilenetb_w1,
 88 |         mobilenetb_w3d4,
 89 |         mobilenetb_wd2,
 90 |         mobilenetb_wd4,
 91 |     ]
 92 | 
 93 |     for model in models:
 94 | 
 95 |         net = model(pretrained=pretrained)
 96 | 
 97 |         # net.train()
 98 |         net.eval()
 99 |         weight_count = _calc_width(net)
100 |         print("m={}, {}".format(model.__name__, weight_count))
101 |         assert (model != mobilenetb_w1 or weight_count == 4222056)
102 |         assert (model != mobilenetb_w3d4 or weight_count == 2578120)
103 |         assert (model != mobilenetb_wd2 or weight_count == 1326632)
104 |         assert (model != mobilenetb_wd4 or weight_count == 467592)
105 | 
106 |         x = torch.randn(1, 3, 224, 224)
107 |         y = net(x)
108 |         y.sum().backward()
109 |         assert (tuple(y.size()) == (1, 1000))
110 | 
111 | 
112 | if __name__ == "__main__":
113 |     _test()
114 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/torchcv/models/others/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SymbioticLab/ModelKeeper/9212bc79bfc4a271e6120c410bb9fb89cb151486/evals/ray_tune/models/torchcv/models/others/__init__.py


--------------------------------------------------------------------------------
/evals/ray_tune/models/torchcv/models/proxylessnas_cub.py:
--------------------------------------------------------------------------------
  1 | """
  2 |     ProxylessNAS for CUB-200-2011, implemented in Gluon.
  3 |     Original paper: 'ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware,'
  4 |     https://arxiv.org/abs/1812.00332.
  5 | """
  6 | 
  7 | __all__ = ['proxylessnas_cpu_cub', 'proxylessnas_gpu_cub', 'proxylessnas_mobile_cub', 'proxylessnas_mobile14_cub']
  8 | 
  9 | from .proxylessnas import get_proxylessnas
 10 | 
 11 | 
 12 | def proxylessnas_cpu_cub(num_classes=200, **kwargs):
 13 |     """
 14 |     ProxylessNAS (CPU) model for CUB-200-2011 from 'ProxylessNAS: Direct Neural Architecture Search on Target Task and
 15 |     Hardware,' https://arxiv.org/abs/1812.00332.
 16 | 
 17 |     Parameters:
 18 |     ----------
 19 |     num_classes : int, default 200
 20 |         Number of classification classes.
 21 |     pretrained : bool, default False
 22 |         Whether to load the pretrained weights for model.
 23 |     root : str, default '~/.torch/models'
 24 |         Location for keeping the model parameters.
 25 |     """
 26 |     return get_proxylessnas(num_classes=num_classes, version="cpu", model_name="proxylessnas_cpu_cub", **kwargs)
 27 | 
 28 | 
 29 | def proxylessnas_gpu_cub(num_classes=200, **kwargs):
 30 |     """
 31 |     ProxylessNAS (GPU) model for CUB-200-2011 from 'ProxylessNAS: Direct Neural Architecture Search on Target Task and
 32 |     Hardware,' https://arxiv.org/abs/1812.00332.
 33 | 
 34 |     Parameters:
 35 |     ----------
 36 |     num_classes : int, default 200
 37 |         Number of classification classes.
 38 |     pretrained : bool, default False
 39 |         Whether to load the pretrained weights for model.
 40 |     root : str, default '~/.torch/models'
 41 |         Location for keeping the model parameters.
 42 |     """
 43 |     return get_proxylessnas(num_classes=num_classes, version="gpu", model_name="proxylessnas_gpu_cub", **kwargs)
 44 | 
 45 | 
 46 | def proxylessnas_mobile_cub(num_classes=200, **kwargs):
 47 |     """
 48 |     ProxylessNAS (Mobile) model for CUB-200-2011 from 'ProxylessNAS: Direct Neural Architecture Search on Target Task
 49 |     and Hardware,' https://arxiv.org/abs/1812.00332.
 50 | 
 51 |     Parameters:
 52 |     ----------
 53 |     num_classes : int, default 200
 54 |         Number of classification classes.
 55 |     pretrained : bool, default False
 56 |         Whether to load the pretrained weights for model.
 57 |     root : str, default '~/.torch/models'
 58 |         Location for keeping the model parameters.
 59 |     """
 60 |     return get_proxylessnas(num_classes=num_classes, version="mobile", model_name="proxylessnas_mobile_cub", **kwargs)
 61 | 
 62 | 
 63 | def proxylessnas_mobile14_cub(num_classes=200, **kwargs):
 64 |     """
 65 |     ProxylessNAS (Mobile-14) model for CUB-200-2011 from 'ProxylessNAS: Direct Neural Architecture Search on Target Task
 66 |     and Hardware,' https://arxiv.org/abs/1812.00332.
 67 | 
 68 |     Parameters:
 69 |     ----------
 70 |     num_classes : int, default 200
 71 |         Number of classification classes.
 72 |     pretrained : bool, default False
 73 |         Whether to load the pretrained weights for model.
 74 |     root : str, default '~/.torch/models'
 75 |         Location for keeping the model parameters.
 76 |     """
 77 |     return get_proxylessnas(num_classes=num_classes, version="mobile14", model_name="proxylessnas_mobile14_cub",
 78 |                             **kwargs)
 79 | 
 80 | 
 81 | def _calc_width(net):
 82 |     import numpy as np
 83 |     net_params = filter(lambda p: p.requires_grad, net.parameters())
 84 |     weight_count = 0
 85 |     for param in net_params:
 86 |         weight_count += np.prod(param.size())
 87 |     return weight_count
 88 | 
 89 | 
 90 | def _test():
 91 |     import torch
 92 | 
 93 |     pretrained = False
 94 | 
 95 |     models = [
 96 |         proxylessnas_cpu_cub,
 97 |         proxylessnas_gpu_cub,
 98 |         proxylessnas_mobile_cub,
 99 |         proxylessnas_mobile14_cub,
100 |     ]
101 | 
102 |     for model in models:
103 | 
104 |         net = model(pretrained=pretrained)
105 | 
106 |         # net.train()
107 |         net.eval()
108 |         weight_count = _calc_width(net)
109 |         print("m={}, {}".format(model.__name__, weight_count))
110 |         assert (model != proxylessnas_cpu_cub or weight_count == 3215248)
111 |         assert (model != proxylessnas_gpu_cub or weight_count == 5736648)
112 |         assert (model != proxylessnas_mobile_cub or weight_count == 3055712)
113 |         assert (model != proxylessnas_mobile14_cub or weight_count == 5423168)
114 | 
115 |         x = torch.randn(14, 3, 224, 224)
116 |         y = net(x)
117 |         y.sum().backward()
118 |         assert (tuple(y.size()) == (14, 200))
119 | 
120 | 
121 | if __name__ == "__main__":
122 |     _test()
123 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/torchcv/models/zfnet.py:
--------------------------------------------------------------------------------
  1 | """
  2 |     ZFNet for ImageNet-1K, implemented in PyTorch.
  3 |     Original paper: 'Visualizing and Understanding Convolutional Networks,' https://arxiv.org/abs/1311.2901.
  4 | """
  5 | 
  6 | __all__ = ['zfnet', 'zfnetb']
  7 | 
  8 | import os
  9 | 
 10 | from .alexnet import AlexNet
 11 | 
 12 | 
 13 | def get_zfnet(version="a",
 14 |               model_name=None,
 15 |               pretrained=False,
 16 |               root=os.path.join("~", ".torch", "models"),
 17 |               **kwargs):
 18 |     """
 19 |     Create ZFNet model with specific parameters.
 20 | 
 21 |     Parameters:
 22 |     ----------
 23 |     version : str, default 'a'
 24 |         Version of ZFNet ('a' or 'b').
 25 |     model_name : str or None, default None
 26 |         Model name for loading pretrained model.
 27 |     pretrained : bool, default False
 28 |         Whether to load the pretrained weights for model.
 29 |     root : str, default '~/.torch/models'
 30 |         Location for keeping the model parameters.
 31 |     """
 32 |     if version == "a":
 33 |         channels = [[96], [256], [384, 384, 256]]
 34 |         kernel_sizes = [[7], [5], [3, 3, 3]]
 35 |         strides = [[2], [2], [1, 1, 1]]
 36 |         paddings = [[1], [0], [1, 1, 1]]
 37 |         use_lrn = True
 38 |     elif version == "b":
 39 |         channels = [[96], [256], [512, 1024, 512]]
 40 |         kernel_sizes = [[7], [5], [3, 3, 3]]
 41 |         strides = [[2], [2], [1, 1, 1]]
 42 |         paddings = [[1], [0], [1, 1, 1]]
 43 |         use_lrn = True
 44 |     else:
 45 |         raise ValueError("Unsupported ZFNet version {}".format(version))
 46 | 
 47 |     net = AlexNet(
 48 |         channels=channels,
 49 |         kernel_sizes=kernel_sizes,
 50 |         strides=strides,
 51 |         paddings=paddings,
 52 |         use_lrn=use_lrn,
 53 |         **kwargs)
 54 | 
 55 |     if pretrained:
 56 |         if (model_name is None) or (not model_name):
 57 |             raise ValueError("Parameter `model_name` should be properly initialized for loading pretrained model.")
 58 |         from .model_store import download_model
 59 |         download_model(
 60 |             net=net,
 61 |             model_name=model_name,
 62 |             local_model_store_dir_path=root)
 63 | 
 64 |     return net
 65 | 
 66 | 
 67 | def zfnet(**kwargs):
 68 |     """
 69 |     ZFNet model from 'Visualizing and Understanding Convolutional Networks,' https://arxiv.org/abs/1311.2901.
 70 | 
 71 |     Parameters:
 72 |     ----------
 73 |     pretrained : bool, default False
 74 |         Whether to load the pretrained weights for model.
 75 |     root : str, default '~/.torch/models'
 76 |         Location for keeping the model parameters.
 77 |     """
 78 |     return get_zfnet(model_name="zfnet", **kwargs)
 79 | 
 80 | 
 81 | def zfnetb(**kwargs):
 82 |     """
 83 |     ZFNet-b model from 'Visualizing and Understanding Convolutional Networks,' https://arxiv.org/abs/1311.2901.
 84 | 
 85 |     Parameters:
 86 |     ----------
 87 |     pretrained : bool, default False
 88 |         Whether to load the pretrained weights for model.
 89 |     root : str, default '~/.torch/models'
 90 |         Location for keeping the model parameters.
 91 |     """
 92 |     return get_zfnet(version="b", model_name="zfnetb", **kwargs)
 93 | 
 94 | 
 95 | def _calc_width(net):
 96 |     import numpy as np
 97 |     net_params = filter(lambda p: p.requires_grad, net.parameters())
 98 |     weight_count = 0
 99 |     for param in net_params:
100 |         weight_count += np.prod(param.size())
101 |     return weight_count
102 | 
103 | 
104 | def _test():
105 |     import torch
106 | 
107 |     pretrained = False
108 | 
109 |     models = [
110 |         zfnet,
111 |         zfnetb,
112 |     ]
113 | 
114 |     for model in models:
115 | 
116 |         net = model(pretrained=pretrained)
117 | 
118 |         # net.train()
119 |         net.eval()
120 |         weight_count = _calc_width(net)
121 |         print("m={}, {}".format(model.__name__, weight_count))
122 |         assert (model != zfnet or weight_count == 62357608)
123 |         assert (model != zfnetb or weight_count == 107627624)
124 | 
125 |         x = torch.randn(1, 3, 224, 224)
126 |         y = net(x)
127 |         y.sum().backward()
128 |         assert (tuple(y.size()) == (1, 1000))
129 | 
130 | 
131 | if __name__ == "__main__":
132 |     _test()
133 | 


--------------------------------------------------------------------------------
/evals/ray_tune/models/vgg.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import torch.nn as nn
  4 | import torch.nn.init as init
  5 | 
  6 | __all__ = [
  7 |     'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
  8 |     'vgg19_bn', 'vgg19', 'make_layers', 'vgg_zoo'
  9 | ]
 10 | 
 11 | 
 12 | class VGG(nn.Module):
 13 |     '''
 14 |     VGG model
 15 |     '''
 16 |     def __init__(self, features):
 17 |         super(VGG, self).__init__()
 18 |         self.features, self.classifier = features
 19 |          # Initialize weights
 20 |         for m in self.modules():
 21 |             if isinstance(m, nn.Conv2d):
 22 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 23 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
 24 |                 m.bias.data.zero_()
 25 | 
 26 | 
 27 |     def forward(self, x):
 28 |         x = self.features(x)
 29 |         x = x.view(x.size(0), -1)
 30 |         x = self.classifier(x)
 31 |         return x
 32 | 
 33 | 
 34 | def make_layers(cfg, batch_norm=False, k = 3, num_of_class = 10):
 35 |     layers = []
 36 |     in_channels = 3
 37 |     for v in cfg:
 38 |         if v == 'M':
 39 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
 40 |         else:
 41 |             conv2d = nn.Conv2d(in_channels, v, kernel_size=k, padding=int((k-1)/2))
 42 |             if batch_norm:
 43 |                 layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
 44 |             else:
 45 |                 layers += [conv2d, nn.ReLU(inplace=True)]
 46 |             in_channels = v
 47 |     classifier = nn.Sequential(
 48 |         nn.Dropout(),
 49 |         nn.Linear(in_channels, 512),
 50 |         nn.ReLU(True),
 51 |         nn.Dropout(),
 52 |         nn.Linear(512, 512),
 53 |         nn.ReLU(True),
 54 |         nn.Linear(512, num_of_class),
 55 |     )
 56 |     return nn.Sequential(*layers), classifier
 57 | 
 58 | 
 59 | cfg = {
 60 |     'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
 61 |     'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
 62 |     'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
 63 |     'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M',
 64 |           512, 512, 512, 512, 'M'],
 65 | }
 66 | 
 67 | 
 68 | def vgg11():
 69 |     """VGG 11-layer model (configuration "A")"""
 70 |     return VGG(make_layers(cfg['A']))
 71 | 
 72 | 
 73 | def vgg11_bn():
 74 |     """VGG 11-layer model (configuration "A") with batch normalization"""
 75 |     return VGG(make_layers(cfg['A'], batch_norm=True))
 76 | 
 77 | 
 78 | def vgg13():
 79 |     """VGG 13-layer model (configuration "B")"""
 80 |     return VGG(make_layers(cfg['B']))
 81 | 
 82 | 
 83 | def vgg13_bn():
 84 |     """VGG 13-layer model (configuration "B") with batch normalization"""
 85 |     return VGG(make_layers(cfg['B'], batch_norm=True))
 86 | 
 87 | 
 88 | def vgg16():
 89 |     """VGG 16-layer model (configuration "D")"""
 90 |     return VGG(make_layers(cfg['D']))
 91 | 
 92 | 
 93 | def vgg16_bn():
 94 |     """VGG 16-layer model (configuration "D") with batch normalization"""
 95 |     return VGG(make_layers(cfg['D'], batch_norm=True))
 96 | 
 97 | 
 98 | def vgg19():
 99 |     """VGG 19-layer model (configuration "E")"""
100 |     return VGG(make_layers(cfg['E']))
101 | 
102 | 
103 | def vgg19_bn():
104 |     """VGG 19-layer model (configuration 'E') with batch normalization"""
105 |     return VGG(make_layers(cfg['E'], batch_norm=True))
106 | 
107 | 
108 | def vgg_zoo():
109 |     """VGG 16 model zoo"""
110 | 
111 |     config_list = []
112 |     init = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']
113 |     filter_num = [1.25, 1.5, 1.75, 2]
114 |     k_size = [3, 5]
115 | 
116 |     counter = 0
117 |     for num in filter_num:
118 |         temp = [i for i in init]
119 |         for j in range(len(init)):
120 |             if temp[j] != "M":
121 |                     temp[j] = int(temp[j]*num)
122 |                     counter += 1
123 |                     for k in k_size:
124 |                         config_list.append(([t for t in temp], k))
125 |     return config_list
126 | 
127 | #vggzoo = vgg_zoo()
128 | #print(vggzoo)
129 | #for i in range(len(vggzoo)):
130 | #    for j in range(i+1, len(vggzoo)):
131 | #        assert (vggzoo[i] != vggzoo[j])
132 | #
133 | #print(len(vggzoo))
134 | 


--------------------------------------------------------------------------------
/evals/ray_tune/onlinescheduler.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import time
 3 | from typing import Dict, Optional
 4 | 
 5 | from ray.tune import trial_runner
 6 | from ray.tune.schedulers import TrialScheduler
 7 | from ray.tune.trial import Trial
 8 | 
 9 | 
10 | class OnlineScheduler(TrialScheduler):
11 |     """Simple scheduler that just runs trials in submission order."""
12 | 
13 |     def __init__(self, scheduler):
14 | 
15 |         self.scheduler = scheduler
16 |         self.start_time = time.time()
17 | 
18 |     def on_trial_add(self, trial_runner: "trial_runner.TrialRunner",
19 |                      trial: Trial):
20 |         return self.scheduler.on_trial_add(trial_runner, trial)
21 | 
22 |     def on_trial_error(self, trial_runner: "trial_runner.TrialRunner",
23 |                        trial: Trial):
24 |         return self.scheduler.on_trial_error(trial_runner, trial)
25 | 
26 |     def on_trial_result(self, trial_runner: "trial_runner.TrialRunner",
27 |                         trial: Trial, result: Dict) -> str:
28 |         return self.scheduler.on_trial_result(trial_runner, trial, result)
29 | 
30 |     def on_trial_complete(self, trial_runner: "trial_runner.TrialRunner",
31 |                           trial: Trial, result: Dict):
32 |         return self.scheduler.on_trial_complete(trial_runner, trial, result)
33 | 
34 |     def on_trial_remove(self, trial_runner: "trial_runner.TrialRunner",
35 |                         trial: Trial):
36 |         return self.scheduler.on_trial_remove(trial_runner, trial)
37 | 
38 |     def choose_trial_to_run(
39 |             self, trial_runner: "trial_runner.TrialRunner") -> Optional[Trial]:
40 | 
41 |         # trial = self.scheduler.choose_trial_to_run(trial_runner)
42 | 
43 |         # if trial is None:
44 |         #     return trial
45 |         # # get submission time
46 |         # arrival_time = trial.config.get('config', {}).get('arrival', 0)
47 |         # #job_name = trial.config.get('config', {}).get('name', None)
48 |         # pending_time = arrival_time - (time.time() - self.start_time)
49 | 
50 |         # #logging.info(f"Supposed to submit job {job_name} at {arrival_time}, now is {time.time()-self.start_time}")
51 |         # if pending_time > 0:
52 |         #     time.sleep(pending_time)
53 | 
54 |         # #logging.info(f"Submit job {job_name} at {time.time()-self.start_time}, Supposed at {arrival_time}")
55 |         # return trial
56 | 
57 |         for trial in trial_runner.get_trials():
58 |             if (trial.status == Trial.PENDING and trial_runner.has_resources_for_trial(trial)):
59 |                 arrival_time = trial.config.get('config', {}).get('arrival', 0)
60 |                 #job_name = trial.config.get('config', {}).get('name', None)
61 |                 pending_time = arrival_time - (time.time() - self.start_time)
62 |                 if pending_time<0:
63 |                     return trial
64 |         for trial in trial_runner.get_trials():
65 |             if (trial.status == Trial.PAUSED and trial_runner.has_resources_for_trial(trial)):
66 |                 arrival_time = trial.config.get('config', {}).get('arrival', 0)
67 |                 #job_name = trial.config.get('config', {}).get('name', None)
68 |                 pending_time = arrival_time - (time.time() - self.start_time)
69 |                 if pending_time<0:
70 |                     return trial
71 |         return None
72 | 
73 |     def debug_string(self) -> str:
74 |         return self.scheduler.debug_string()
75 | 


--------------------------------------------------------------------------------
/evals/ray_tune/setup/cluster_manager.py:
--------------------------------------------------------------------------------
  1 | # Submit job to the remote cluster
  2 | 
  3 | import datetime
  4 | import os
  5 | import pickle
  6 | import random
  7 | import subprocess
  8 | import sys
  9 | import time
 10 | 
 11 | import yaml
 12 | 
 13 | 
 14 | def load_yaml_conf(yaml_file):
 15 |     with open(yaml_file) as fin:
 16 |         data = yaml.load(fin, Loader=yaml.FullLoader)
 17 |     return data
 18 | 
 19 | def process_cmd(yaml_file):
 20 | 
 21 |     yaml_conf = load_yaml_conf(yaml_file)
 22 | 
 23 |     master_ip = yaml_conf['master_ip']
 24 |     worker_ips, total_gpus = [], []
 25 |     cmd_script_list = []
 26 | 
 27 |     executor_configs = ";".join(yaml_conf['worker_ips'])
 28 |     for ip_gpu in yaml_conf['worker_ips']:
 29 |         ip, num_gpu = ip_gpu.strip().split(':')
 30 |         worker_ips.append(ip)
 31 |         total_gpus.append(num_gpu)
 32 | 
 33 |     time_stamp = datetime.datetime.fromtimestamp(time.time()).strftime('%m%d_%H%M%S')
 34 |     running_vms = set()
 35 |     job_name = 'modelkeeper'
 36 |     log_path = './logs'
 37 |     submit_user = f"{yaml_conf['auth']['ssh_user']}@" if len(yaml_conf['auth']['ssh_user']) else ""
 38 | 
 39 |     job_conf = {'time_stamp':time_stamp,
 40 |                 'master_ip':master_ip,
 41 |                 }
 42 | 
 43 |     for conf in yaml_conf['job_conf']:
 44 |         job_conf.update(conf)
 45 | 
 46 |     conf_script = ''
 47 |     setup_cmd = ''
 48 |     if yaml_conf['setup_commands'] is not None:
 49 |         setup_cmd += (yaml_conf['setup_commands'][0] + ' && ')
 50 |         for item in yaml_conf['setup_commands'][1:]:
 51 |             setup_cmd += (item + ' && ')
 52 | 
 53 |     cmd_sufix = f" "
 54 | 
 55 | 
 56 |     for conf_name in job_conf:
 57 |         conf_script = conf_script + f' --{conf_name}={job_conf[conf_name]}'
 58 |         if conf_name == "job_name":
 59 |             job_name = job_conf[conf_name]
 60 |         if conf_name == "log_path":
 61 |             log_path = os.path.join(job_conf[conf_name], 'log', job_name, time_stamp)
 62 | 
 63 |     # =========== Submit job to parameter server ============
 64 |     running_vms.add(master_ip)
 65 |     ps_cmd = f'ray start --head --address={master_ip}:6379 --redis-password="5241590000000000" --num-cpus=1 --num-gpus=0  && sleep 240h'
 66 |     
 67 |     with open(f"{job_name}_logging", 'wb') as fout:
 68 |         pass
 69 | 
 70 |     print(f"Starting master on {master_ip}...")
 71 |     with open(f"{job_name}_logging", 'a') as fout:
 72 |         subprocess.Popen(f'ssh {submit_user}{master_ip} "{setup_cmd} {ps_cmd}"',
 73 |                         shell=True, stdout=fout, stderr=fout)
 74 | 
 75 |     time.sleep(5)
 76 |     # =========== Submit job to each worker ============
 77 |     rank_id = 1
 78 |     for worker, n_gpu in zip(worker_ips, total_gpus):
 79 |         running_vms.add(worker)
 80 |         print(f"Starting workers on {worker} ...")
 81 | 
 82 |         worker_cmd = f'ray start --address={master_ip}:6379 --redis-password="5241590000000000" --num-cpus={10} --num-gpus={n_gpu} && sleep 240h'
 83 |         
 84 |         with open(f"{job_name}_logging", 'a') as fout:
 85 |             time.sleep(0.5)
 86 |             subprocess.Popen(f'ssh {submit_user}{worker} "{setup_cmd} {worker_cmd}"',
 87 |                             shell=True, stdout=fout, stderr=fout)
 88 | 
 89 |     # dump the address of running workers
 90 |     current_path = os.path.dirname(os.path.abspath(__file__))
 91 |     job_name = os.path.join(current_path, job_name)
 92 |     with open(job_name, 'wb') as fout:
 93 |         job_meta = {'user':submit_user, 'vms': running_vms}
 94 |         pickle.dump(job_meta, fout)
 95 | 
 96 |     print(f"Submitted job, please check your logs ({log_path}) for status")
 97 | 
 98 | 
 99 | def terminate(job_name):
100 | 
101 |     current_path = os.path.dirname(os.path.abspath(__file__))
102 |     job_meta_path = os.path.join(current_path, job_name)
103 | 
104 |     if not os.path.isfile(job_meta_path):
105 |         print(f"Fail to terminate {job_name}, as it does not exist")
106 | 
107 |     with open(job_meta_path, 'rb') as fin:
108 |         job_meta = pickle.load(fin)
109 | 
110 |     for vm_ip in job_meta['vms']:
111 |         # os.system(f'scp shutdown.py {job_meta["user"]}{vm_ip}:~/')
112 |         print(f"Shutting down job on {vm_ip}")
113 |         with open(f"{job_name}_logging", 'a') as fout:
114 |             subprocess.Popen(f'ssh {job_meta["user"]}{vm_ip} "ray stop; killall sleep"',
115 |                             shell=True, stdout=fout, stderr=fout)
116 | 
117 | 
118 | if sys.argv[1] == 'init':
119 |     process_cmd(sys.argv[2])
120 | elif sys.argv[1] == 'stop':
121 |     terminate(sys.argv[2])
122 | else:
123 |     print("Unknown cmds ...")
124 | 


--------------------------------------------------------------------------------
/evals/ray_tune/setup/conf.yml:
--------------------------------------------------------------------------------
 1 | # Configuration file of Ray experiment
 2 | 
 3 | # ========== Cluster configuration ========== 
 4 | # ip address of the master
 5 | master_ip: 10.0.0.1
 6 | 
 7 | # ip address of each worker:# of available gpus process on each gpu in this node
 8 | worker_ips: 
 9 |     - 10.0.0.1:4
10 |     - 10.0.0.2:4
11 |     - 10.0.0.3:4
12 | 
13 | auth:
14 |     ssh_user: ""
15 |     ssh_private_key: ~/.ssh/id_rsa
16 | 
17 | # cmd to run before we can indeed run Ray (in order)
18 | setup_commands:
19 |     - source $HOME/experiment/anaconda3/bin/activate modelkeeper
20 | 
21 | # ========== Additional job configuration ========== 
22 | # Default parameters are specified in argParser.py, wherein more description of the parameter can be found
23 | 
24 | job_conf: 
25 |     - job_name: modelkeeper                   # Generate logs under this folder: log_path/job_name/time_stamp
26 | 


--------------------------------------------------------------------------------
/evals/ray_tune/thirdparty/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SymbioticLab/ModelKeeper/9212bc79bfc4a271e6120c410bb9fb89cb151486/evals/ray_tune/thirdparty/__init__.py


--------------------------------------------------------------------------------
/evals/ray_tune/thirdparty/calculate_ged.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import contextlib
 3 | import itertools
 4 | import json
 5 | import random
 6 | import time
 7 | from pathlib import Path
 8 | 
 9 | import joblib
10 | import networkx as nx
11 | from interruptingcow import Quota, timeout
12 | from joblib import Parallel, delayed
13 | from tqdm.auto import tqdm
14 | from utils import make_graph
15 | 
16 | 
17 | @contextlib.contextmanager
18 | def tqdm_joblib(tqdm_object):
19 |     """Context manager to patch joblib to report into tqdm progress bar given as argument"""
20 |     class TqdmBatchCompletionCallback:
21 |         def __init__(self, time, index, parallel):
22 |             self.index = index
23 |             self.parallel = parallel
24 | 
25 |         def __call__(self, index):
26 |             tqdm_object.update()
27 |             if self.parallel._original_iterator is not None:
28 |                 self.parallel.dispatch_next()
29 | 
30 |     old_batch_callback = joblib.parallel.BatchCompletionCallBack
31 |     joblib.parallel.BatchCompletionCallBack = TqdmBatchCompletionCallback
32 |     try:
33 |         yield tqdm_object
34 |     finally:
35 |         joblib.parallel.BatchCompletionCallBack = old_batch_callback
36 |         tqdm_object.close()
37 | 
38 | def calc_ged(recepie1, recepie2, timeout_val=600):
39 |     start_time = time.time()
40 |     G1 = make_graph(recepie1)
41 |     G2 = make_graph(recepie2)
42 |     ged = None
43 | 
44 |     try:
45 |         status = "OK"
46 |         with timeout(Quota(timeout_val), exception=RuntimeError):
47 |             for ged in nx.optimize_graph_edit_distance(G1, G2, lambda n1, n2: n1['op'] == n2['op']):
48 |                 pass
49 | 
50 |     except RuntimeError as e:
51 |         status = "Timeout"
52 | 
53 |     except Exception as e:
54 |         status = "Exception: " + str(e)
55 | 
56 |     return {
57 |         "recepie_i": recepie1,
58 |         "recepie_j": recepie2,
59 |         "ged": ged,
60 |         "time": time.time() - start_time,
61 |         "status": status
62 |     }
63 | 
64 | if __name__ == "__main__":
65 |     parser = argparse.ArgumentParser(description='Calculate GED')
66 |     parser.add_argument('--recepies', type=str, default="./new_recepies_fix.json",
67 |                         help='path to JSON file with recepies')
68 |     parser.add_argument('--num', type=int, default=10,
69 |                         help='number of random recepies for calculating GED to all another')
70 |     parser.add_argument('--timeout', type=int, default=600, help="timeout for calculating one GED value in seconds")
71 |     parser.add_argument('--n_jobs', type=int, default=-2,
72 |                         help="n_jobs in skit learn style")
73 |     parser.add_argument('--num_parts', type=int, default=10,
74 |                         help="Num results parts for saving")
75 | 
76 |     args = parser.parse_args()
77 |     
78 |     with open(args.recepies, "r") as f:
79 |         recepies = json.load(f)
80 | 
81 |     key_recepies = random.sample(recepies, args.num)
82 |     part_size = len(recepies)//args.num_parts
83 |     for part in range(1, args.num_parts+1):
84 |         _recepies = recepies[(part-1)*part_size:part*part_size]
85 |         combs = list(itertools.product(key_recepies, _recepies))
86 | 
87 |         with tqdm_joblib(tqdm(desc="GED part {} of {}".format(part, args.num_parts), total=len(combs))) as progress_bar:
88 |             results = Parallel(n_jobs=args.n_jobs, backend='multiprocessing')(delayed(calc_ged)(r1, r2, args.timeout) for r1, r2 in combs)
89 | 
90 |         with open("GED_CALC_RESULTS_part_{}.json".format(part), 'w') as f:
91 |             json.dump(results, f)
92 | 


--------------------------------------------------------------------------------
/evals/ray_tune/thirdparty/data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from collections import Counter
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | class Dictionary(object):
 8 |     def __init__(self):
 9 |         self.word2idx = {}
10 |         self.idx2word = []
11 |         self.counter = Counter()
12 |         self.total = 0
13 | 
14 |     def add_word(self, word):
15 |         if word not in self.word2idx:
16 |             self.idx2word.append(word)
17 |             self.word2idx[word] = len(self.idx2word) - 1
18 |         token_id = self.word2idx[word]
19 |         self.counter[token_id] += 1
20 |         self.total += 1
21 |         return self.word2idx[word]
22 | 
23 |     def __len__(self):
24 |         return len(self.idx2word)
25 | 
26 | 
27 | class Corpus(object):
28 |     def __init__(self, path):
29 |         self.dictionary = Dictionary()
30 |         self.train = self.tokenize(os.path.join(path, 'train.txt'))
31 |         self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
32 |         self.test = self.tokenize(os.path.join(path, 'test.txt'))
33 | 
34 |     def tokenize(self, path):
35 |         """Tokenizes a text file."""
36 |         assert os.path.exists(path)
37 |         # Add words to the dictionary
38 |         with open(path, 'r') as f:
39 |             tokens = 0
40 |             for line in f:
41 |                 words = line.split() + ['<eos>']
42 |                 tokens += len(words)
43 |                 for word in words:
44 |                     self.dictionary.add_word(word)
45 | 
46 |         # Tokenize file content
47 |         with open(path, 'r') as f:
48 |             ids = torch.LongTensor(tokens)
49 |             token = 0
50 |             for line in f:
51 |                 words = line.split() + ['<eos>']
52 |                 for word in words:
53 |                     ids[token] = self.dictionary.word2idx[word]
54 |                     token += 1
55 | 
56 |         return ids
57 | 


--------------------------------------------------------------------------------
/evals/ray_tune/thirdparty/embed_regularize.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | 
 5 | def embedded_dropout(embed, words, dropout=0.1, scale=None):
 6 |   if dropout:
 7 |     mask = embed.weight.data.new().resize_((embed.weight.size(0), 1)).bernoulli_(1 - dropout).expand_as(embed.weight) / (1 - dropout)
 8 |     masked_embed_weight = mask * embed.weight
 9 |   else:
10 |     masked_embed_weight = embed.weight
11 |   if scale:
12 |     masked_embed_weight = scale.expand_as(masked_embed_weight) * masked_embed_weight
13 | 
14 |   padding_idx = embed.padding_idx
15 |   if padding_idx is None:
16 |       padding_idx = -1
17 | 
18 |   X = torch.nn.functional.embedding(words, masked_embed_weight,
19 |     -1, embed.max_norm, embed.norm_type,
20 |     embed.scale_grad_by_freq, embed.sparse
21 |   )
22 |   return X
23 | 
24 | if __name__ == '__main__':
25 |   V = 50
26 |   h = 4
27 |   bptt = 10
28 |   batch_size = 2
29 | 
30 |   embed = torch.nn.Embedding(V, h)
31 | 
32 |   words = np.random.random_integers(low=0, high=V-1, size=(batch_size, bptt))
33 |   words = torch.LongTensor(words)
34 | 
35 |   origX = embed(words)
36 |   X = embedded_dropout(embed, words)
37 | 
38 |   print(origX)
39 |   print(X)
40 | 
41 | 


--------------------------------------------------------------------------------
/evals/ray_tune/thirdparty/locked_dropout.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Variable
 4 | 
 5 | 
 6 | class LockedDropout(nn.Module):
 7 |     def __init__(self):
 8 |         super().__init__()
 9 | 
10 |     def forward(self, x, dropout=0.5):
11 |         if not self.training or not dropout:
12 |             return x
13 |         m = x.data.new(1, x.size(1), x.size(2)).bernoulli_(1 - dropout)
14 |         mask = Variable(m, requires_grad=False) / (1 - dropout)
15 |         mask = mask.expand_as(x)
16 |         return mask * x
17 | 


--------------------------------------------------------------------------------
/evals/ray_tune/thirdparty/multilinear.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | import torch.nn
 5 | import torch.nn.functional as F
 6 | 
 7 | 
 8 | class MultiLinear(torch.nn.Module):
 9 | 
10 |     def __init__(self, input_sizes, output_size):
11 |         super(MultiLinear, self).__init__()
12 |         self.input_sizes = input_sizes
13 |         self.output_size = output_size
14 |         
15 |         weights = []
16 |         for input_size in input_sizes:
17 |             weights.append(torch.nn.Parameter(torch.Tensor(output_size, input_size)))
18 |         self.weights = torch.nn.ParameterList(weights)
19 |         
20 |         self.bias = torch.nn.Parameter(torch.Tensor(output_size))
21 |         
22 |         self.reset_parameters()
23 | 
24 |     def reset_parameters(self):
25 |         for i in range(len(self.weights)):
26 |             torch.nn.init.kaiming_uniform_(self.weights[i], a=math.sqrt(5))
27 |         
28 |         fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weights[0])
29 |         bound = 1 / math.sqrt(fan_in)
30 |         torch.nn.init.uniform_(self.bias, -bound, bound)
31 | 
32 |     def forward(self, *inputs):
33 |         result = F.linear(inputs[0], self.weights[0], self.bias)
34 |         for i in range(1, len(self.weights)):
35 |             result = result + F.linear(inputs[i], self.weights[i])            
36 |         return result
37 | 
38 |     def extra_repr(self):
39 |         return 'input_sizes={}, output_size={}'.format(
40 |             self.input_sizes, self.output_size
41 |         )
42 | 


--------------------------------------------------------------------------------
/evals/ray_tune/thirdparty/nas_environment.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | import numpy as np
 5 | 
 6 | 
 7 | class Environment:
 8 |     '''
 9 |     Simulates NAS environment. Architecutres can be trained for a specified amount of epochs. 
10 |     Tarining results are cached, that is, training the same model for larer epochs 
11 |     will be timed as a continuation from the model's checkpoint. 
12 |     '''
13 |     def __init__(self, logs_dir):
14 |         self._logs = []
15 |         self._arch_to_id = {}
16 |         
17 |         arch_id = 0
18 |         for i, filename in enumerate(os.listdir(logs_dir)):
19 |             if filename.endswith('.json'):
20 |                 log_path = os.path.join(logs_dir, filename)
21 |                 x = json.load(open(log_path, 'r'))
22 |                 self._logs.append(x)
23 |                 assert x['recepie'] not in self._arch_to_id
24 |                 self._arch_to_id[x['recepie']] = arch_id
25 |                 arch_id += 1
26 |         
27 |         self._training_states = {}
28 |         
29 |     def get_total_time(self):
30 |         return sum([x['wall_time'] for x in self._training_states.values()])
31 |     
32 | 
33 |     def get_best_possible_test_loss(self):
34 |         min_loss = np.inf
35 |         for log in self._logs:
36 |             if len(log['test_losses']) > 0:
37 |                 cur_loss = np.nanmin(log['test_losses'])
38 |                 if cur_loss < min_loss:
39 |                     min_loss = cur_loss
40 |         return min_loss
41 | 
42 |     def get_test_loss_of_the_best_validated_architecture(self):
43 |         return self._logs[self.best_arch_id]['test_losses'][self.best_arch_epoch]
44 |             
45 |     def get_precomputed_recepies(self):
46 |         return [json.loads(x['recepie']) for x in self._logs]
47 |     
48 |     def get_recepie_ids(self):
49 |         return [x['recepie_id'] for x in self._logs]
50 |     
51 |     def reset(self):
52 |         self.best_arch_id = -1
53 |         self.best_arch_epoch = -1
54 |         self._training_states = {}
55 |         
56 |     def _make_state_dict(self, arch_id, epoch):
57 |         state_dict = {f'{phase}_loss':self._logs[arch_id][f'{phase}_losses'][epoch] if epoch >= 0 else np.nan 
58 |                       for phase in ['train', 'val', 'test']}
59 |         state_dict['wall_time'] = np.sum(self._logs[arch_id]['wall_times'][:epoch])
60 |         state_dict['cur_epoch'] = epoch
61 |         state_dict['status'] = 'OK' if epoch < len(self._logs[arch_id]['train_losses']) - 1 else self._logs[arch_id]['status']
62 |         return state_dict
63 |     
64 |     def simulated_train(self, arch, max_epoch):
65 |         arch_id = self._arch_to_id[json.dumps(arch)]
66 |         if (arch_id not in self._training_states) or (max_epoch > self._training_states[arch_id]['cur_epoch']):
67 |             max_epoch = min([max_epoch, len(self._logs[arch_id]['train_losses']) - 1])
68 |             self._training_states[arch_id] = self._make_state_dict(arch_id, max_epoch)
69 |             
70 |             # update best result
71 |             val_losses = self._logs[arch_id]['val_losses'][:self._training_states[arch_id]['cur_epoch'] + 1]
72 |             if np.sum(~np.isnan(val_losses)) > 0:
73 |                 cur_best_epoch = np.nanargmin(val_losses)
74 |                 if (self.best_arch_id == -1) or\
75 |                 (self._logs[self.best_arch_id]['val_losses'][self.best_arch_epoch] > val_losses[cur_best_epoch]):
76 |                     self.best_arch_id = arch_id
77 |                     self.best_arch_epoch = cur_best_epoch
78 |         
79 |     def get_model_status(self, arch):
80 |         arch_id = self._arch_to_id[json.dumps(arch)]
81 |         return self._training_states[arch_id]['status']
82 |     
83 |     def get_model_stats(self, arch, epoch):
84 |         arch_id = self._arch_to_id[json.dumps(arch)]
85 |         if self._training_states[arch_id]['cur_epoch'] < epoch:
86 |             raise Exception('Required epoch exceeds current training epochs.')
87 |         
88 |         return self._make_state_dict(arch_id, epoch)


--------------------------------------------------------------------------------
/evals/ray_tune/thirdparty/train.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import time
 3 | 
 4 | import numpy as np
 5 | import torch
 6 | import torch.nn
 7 | from thirdparty.utils import get_batch, repackage_hidden
 8 | 
 9 | 
10 | def eval_nlp(model, criterion, data_source, batch_size, args, device=torch.device("cpu")):
11 |     # Turn on evaluation mode which disables dropout.
12 |     model.to(device).eval()
13 |     total_loss = 0
14 |     hidden = model.init_hidden(batch_size)
15 |     for i in range(0, data_source.size(0) - 1, args.bptt):
16 |         data, targets = get_batch(data_source, i, args, evaluation=True)
17 |         output, hidden = model(data, hidden)
18 |         total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data
19 |         hidden = repackage_hidden(hidden)
20 |     return total_loss.item() / len(data_source)
21 | 
22 | 
23 | def train_nlp(model, optimizer, params, criterion, train_data, args, epoch, device=torch.device("cpu")):
24 |     # Turn on training mode which enables dropout.
25 |     total_loss = 0
26 |     start_time = time.time()
27 |     model.to(device).train()
28 |     hidden = model.init_hidden(args.batch_size)
29 |     batch, i = 0, 0
30 |     while i < train_data.size(0) - 1 - 1:
31 |         bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
32 |         # Prevent excessively small or negative sequence lengths
33 |         seq_len = max(5, int(np.random.normal(bptt, 5)))
34 |         # There's a very small chance that it could select a very long sequence length resulting in OOM
35 |         # seq_len = min(seq_len, args.bptt + 10)
36 | 
37 |         lr2 = optimizer.param_groups[0]['lr']
38 |         optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
39 |         model.train()
40 |         data, targets = get_batch(train_data, i, args, seq_len=seq_len)
41 | 
42 |         # Starting each batch, we detach the hidden state from how it was previously produced.
43 |         # If we didn't, the model would try backpropagating all the way to start of the dataset.
44 |         hidden = repackage_hidden(hidden)
45 |         optimizer.zero_grad()
46 | 
47 |         output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
48 |         raw_loss = criterion(model.decoder.weight, model.decoder.bias, output, targets)
49 | 
50 |         loss = raw_loss
51 |         # Activiation Regularization
52 |         if args.alpha: loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
53 |         # Temporal Activation Regularization (slowness)
54 |         if args.beta: loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
55 |         loss.backward()
56 | 
57 |         # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
58 |         if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip)
59 |         optimizer.step()
60 | 
61 |         total_loss += raw_loss.data
62 |         optimizer.param_groups[0]['lr'] = lr2
63 |         if batch % args.log_interval == 0 and batch > 0:
64 |             cur_loss = total_loss.item() / args.log_interval
65 |             elapsed = time.time() - start_time
66 |             print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | '
67 |                     'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format(
68 |                 epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'],
69 |                 elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), cur_loss / math.log(2)))
70 |             total_loss = 0
71 |             start_time = time.time()
72 |         ###
73 |         batch += 1
74 |         i += seq_len


--------------------------------------------------------------------------------
/evals/ray_tune/thirdparty/utils.py:
--------------------------------------------------------------------------------
 1 | from itertools import permutations
 2 | 
 3 | import networkx as nx
 4 | import numpy as np
 5 | import torch
 6 | 
 7 | 
 8 | def repackage_hidden(h):
 9 |     """Wraps hidden states in new Tensors,
10 |     to detach them from their history."""
11 |     if isinstance(h, torch.Tensor):
12 |         return h.detach()
13 |     else:
14 |         return tuple(repackage_hidden(v) for v in h)
15 | 
16 | 
17 | def batchify(data, bsz, args, cuda='cuda'):
18 |     # Work out how cleanly we can divide the dataset into bsz parts.
19 |     nbatch = data.size(0) // bsz
20 |     # Trim off any extra elements that wouldn't cleanly fit (remainders).
21 |     data = data.narrow(0, 0, nbatch * bsz)
22 |     # Evenly divide the data across the bsz batches.
23 |     data = data.view(bsz, -1).t().contiguous()
24 |     if args.cuda:
25 |         data = data.to(cuda)
26 |     return data
27 | 
28 | 
29 | def get_batch(source, i, args, seq_len=None, evaluation=False):
30 |     seq_len = min(seq_len if seq_len else args.bptt, len(source) - 1 - i)
31 |     data = source[i:i+seq_len]
32 |     target = source[i+1:i+1+seq_len].view(-1)
33 |     return data, target
34 | 
35 | 
36 | def make_graph(recepie):
37 |     G = nx.DiGraph()
38 | 
39 |     for key in recepie.keys():
40 |         op = recepie[key]['op']
41 |         if key.startswith("h_new_"):
42 |             op = key+":"+op
43 |         G.add_node(key, name=key, op=op)
44 |         for inp in recepie[key]['input']:
45 |             if "h_prev" in inp or inp == "x":
46 |                 G.add_node(inp, name=inp, op=inp)
47 |             else:
48 |                 G.add_node(inp, name=inp)
49 |             G.add_edge(inp, key)
50 |     return G
51 | 
52 | 
53 | def recepie2matrixops(recepie):
54 |     G = make_graph(recepie)
55 |     labels = nx.get_node_attributes(G, "op")
56 |     nodelist_with_ops = np.array(list(labels.items()))
57 |     
58 |     matrix = nx.to_numpy_array(G, nodelist=nodelist_with_ops[:, 0])
59 |     ops = nodelist_with_ops[:, 1]
60 | 
61 |     return matrix, ops
62 | 
63 | 
64 | 
65 | def graph_edit_distance(matrixops1, matrixops2):
66 |     m1, l1 = matrixops1
67 |     m2, l2 = matrixops2
68 |     
69 |     # Pad
70 |     n1, n2 = m1.shape[0], m2.shape[0]
71 |     max_n = max(n1, n2)
72 |     m1 = np.pad(m1, ((0, max_n - m1.shape[0]), (0, max_n - m1.shape[0])))
73 |     m2 = np.pad(m2, ((0, max_n - m2.shape[0]), (0, max_n - m2.shape[0])))
74 |     l1 = np.pad(l1, (0, max_n - l1.shape[0]), constant_values=None)
75 |     l2 = np.pad(l2, (0, max_n - l2.shape[0]), constant_values=None)
76 |     
77 |     
78 |     d = 100000000
79 |     for p in permutations(range(len(m1))):
80 |         p = list(p)
81 |         d_p = (m1 != m2[p][:, p]).sum() + (l1 != l2[p]).sum()
82 |         d = min(d, d_p)
83 |     return d
84 | 


--------------------------------------------------------------------------------
/evals/ray_tune/thirdparty/weight_drop.py:
--------------------------------------------------------------------------------
  1 | import functools
  2 | from functools import wraps
  3 | 
  4 | import torch
  5 | from torch.nn import Parameter
  6 | 
  7 | 
  8 | class WeightDrop(torch.nn.Module):
  9 |     def __init__(self, module, weights, dropout=0, variational=False):
 10 |         super(WeightDrop, self).__init__()
 11 |         self.module = module
 12 |         self.weights = weights
 13 |         self.dropout = dropout
 14 |         self.variational = variational
 15 |         self._setup()
 16 | 
 17 |     def widget_demagnetizer_y2k_edition(*args, **kwargs):
 18 |         # We need to replace flatten_parameters with a nothing function
 19 |         # It must be a function rather than a lambda as otherwise pickling explodes
 20 |         # We can't write boring code though, so ... WIDGET DEMAGNETIZER Y2K EDITION!
 21 |         # (╯°□°）╯︵ ┻━┻
 22 |         return
 23 | 
 24 |     def _setup(self):
 25 |         # Terrible temporary solution to an issue regarding compacting weights re: CUDNN RNN
 26 |         if issubclass(type(self.module), torch.nn.RNNBase):
 27 |             self.module.flatten_parameters = self.widget_demagnetizer_y2k_edition
 28 | 
 29 |         for name_w in self.weights:
 30 |             #print('Applying weight drop of {} to {}'.format(self.dropout, name_w))
 31 |             w = getattr(self.module, name_w)
 32 |             del self.module._parameters[name_w]
 33 |             self.module.register_parameter(name_w + '_raw', Parameter(w.data))
 34 | 
 35 |     def _setweights(self):
 36 |         for name_w in self.weights:
 37 |             raw_w = getattr(self.module, name_w + '_raw')
 38 |             w = None
 39 |             if self.variational:
 40 |                 mask = torch.autograd.Variable(torch.ones(raw_w.size(0), 1))
 41 |                 if raw_w.is_cuda: mask = mask.cuda()
 42 |                 mask = torch.nn.functional.dropout(mask, p=self.dropout, training=True)
 43 |                 w = torch.nn.Parameter(mask.expand_as(raw_w) * raw_w)
 44 |             else:
 45 |                 w = torch.nn.Parameter(torch.nn.functional.dropout(raw_w, p=self.dropout, training=self.training))
 46 |             setattr(self.module, name_w, w)
 47 | 
 48 |     def forward(self, *args):
 49 |         self._setweights()
 50 |         return self.module.forward(*args)
 51 | 
 52 | def rsetattr(obj, attr, val):
 53 |     pre, _, post = attr.rpartition('.')
 54 |     return setattr(rgetattr(obj, pre) if pre else obj, post, val)
 55 | 
 56 | def rgetattr(obj, attr, *args):
 57 |     def _getattr(obj, attr):
 58 |         return getattr(obj, attr, *args)
 59 |     return functools.reduce(_getattr, [obj] + attr.split('.'))
 60 | 
 61 | class ParameterListWeightDrop(torch.nn.Module):
 62 |     def __init__(self, module, weights, dropout=0, variational=False):
 63 |         super(ParameterListWeightDrop, self).__init__()
 64 |         self.module = module
 65 |         self.weights = weights
 66 |         self.parents = {}
 67 |         for w in self.weights:
 68 |             p = '.'.join(w.split('.')[:-1])
 69 |             i = int(w.split('.')[-1])
 70 |             if p not in self.parents:
 71 |                 self.parents[p] = []
 72 |             self.parents[p].append(i)
 73 |         self.dropout = dropout
 74 |         self.variational = variational
 75 |         self._setup()
 76 | 
 77 | 
 78 |     def _setup(self):
 79 |         for name_w in self.parents:
 80 |             #print('Applying weight drop of {} to {}'.format(self.dropout, name_w))
 81 |             ws = rgetattr(self.module, name_w)
 82 |             rsetattr(self.module, name_w, None)
 83 |             rsetattr(self.module, name_w + '_raw', torch.nn.ParameterList(ws))
 84 | 
 85 |     def _setweights(self):
 86 |         for name_w in self.parents:
 87 |             raw_ws = rgetattr(self.module, name_w + '_raw')
 88 |             ws = []
 89 |             for i, raw_w in enumerate(raw_ws):
 90 |                 if i in self.parents[name_w]:
 91 |                     if self.variational:
 92 |                         mask = torch.autograd.Variable(torch.ones(raw_w.size(0), 1))
 93 |                         if raw_w.is_cuda: mask = mask.cuda()
 94 |                         mask = torch.nn.functional.dropout(mask, p=self.dropout, training=True)
 95 |                         w = torch.nn.Parameter(mask.expand_as(raw_w) * raw_w)
 96 |                     else:
 97 |                         w = torch.nn.Parameter(torch.nn.functional.dropout(raw_w, p=self.dropout, training=self.training))
 98 |                 else:
 99 |                     w = raw_w
100 |                 ws.append(w)
101 |             rsetattr(self.module, name_w, torch.nn.ParameterList(ws))
102 | 
103 |     def forward(self, *args):
104 |         self._setweights()
105 |         return self.module.forward(*args)


--------------------------------------------------------------------------------
/evals/ray_tune/workloads/nlp_list.csv:
--------------------------------------------------------------------------------
 1 | name,arrival
 2 | albert-base-v1,424
 3 | YituTech/conv-bert-small,1232
 4 | funnel-transformer/small,2608
 5 | xlm-mlm-en-2048,2944
 6 | roberta-base,5880
 7 | microsoft/mpnet-base,10520
 8 | junnyu/roformer_small_discriminator,12680
 9 | google/electra-small-discriminator,14536
10 | xlm-roberta-large,15328
11 | albert-large-v1,15392
12 | bert-base-multilingual-cased,19504
13 | bert-base-cased,20144
14 | roberta-large,20280
15 | distilroberta-base,21880
16 | flaubert/flaubert_small_cased,22800
17 | funnel-transformer/intermediate,24584
18 | google/rembert,24856
19 | squeezebert/squeezebert-mnli,27224
20 | bert-large-cased,27776
21 | bert-large-uncased,29496
22 | albert-base-v2,29864
23 | microsoft/deberta-v2-xxlarge,32024
24 | google/mobilebert-uncased,32056
25 | flaubert/flaubert_large_cased,32624
26 | distilbert-base-uncased,32808
27 | flaubert/flaubert_base_cased,39584
28 | microsoft/deberta-large,39592
29 | bert-base-uncased,39640
30 | distilbert-base-cased,41376
31 | distilbert-base-multilingual-cased,42216
32 | microsoft/deberta-v2-xlarge,42280
33 | YituTech/conv-bert-medium-small,49088
34 | albert-xlarge-v2,50592
35 | google/electra-base-generator,51248
36 | facebook/bart-large,53848
37 | funnel-transformer/xlarge,55176
38 | funnel-transformer/large,62016
39 | microsoft/layoutlm-base-uncased,66296
40 | albert-xxlarge-v1,67552
41 | microsoft/deberta-xlarge,68256
42 | google/electra-large-generator,68912
43 | microsoft/layoutlm-large-uncased,73808
44 | google/bigbird-roberta-base,76400
45 | albert-xlarge-v1,76896
46 | xlm-mlm-100-1280,77856
47 | YituTech/conv-bert-base,79008
48 | google/bigbird-roberta-large,81208
49 | flaubert/flaubert_base_uncased,82544
50 | albert-xxlarge-v2,82552
51 | google/bigbird-base-trivia-itc,88264
52 | xlm-clm-enfr-1024,90048
53 | xlm-mlm-tlm-xnli15-1024,90304
54 | xlm-roberta-base,90336
55 | camembert-base,91568
56 | albert-large-v2,91928
57 | bert-base-multilingual-uncased,92120
58 | microsoft/deberta-base,94816
59 | 


--------------------------------------------------------------------------------
/evals/ray_tune/workloads/nlp_nwp.csv:
--------------------------------------------------------------------------------
 1 | name,arrival
 2 | albert-base-v1,0
 3 | YituTech/conv-bert-small,0
 4 | funnel-transformer/small,0
 5 | xlm-mlm-en-2048,0
 6 | roberta-base,0
 7 | microsoft/mpnet-base,0
 8 | junnyu/roformer_small_discriminator,0
 9 | google/electra-small-discriminator,0
10 | bert-base-cased,0
11 | flaubert/flaubert_base_uncased,11544
12 | bert-base-multilingual-cased,14628
13 | distilbert-base-uncased,15108
14 | camembert-base,15210
15 | microsoft/deberta-v2-xxlarge,16410
16 | microsoft/layoutlm-base-uncased,17100
17 | funnel-transformer/intermediate,18438
18 | google/rembert,18642
19 | squeezebert/squeezebert-mnli,20418
20 | YituTech/conv-bert-base,20832
21 | bert-large-uncased,22122
22 | albert-base-v2,22398
23 | distilroberta-base,24018
24 | google/mobilebert-uncased,24042
25 | flaubert/flaubert_large_cased,24468
26 | funnel-transformer/large,24606
27 | google/bigbird-base-trivia-itc,29688
28 | microsoft/deberta-large,29694
29 | bert-base-uncased,29730
30 | distilbert-base-cased,31032
31 | distilbert-base-multilingual-cased,31662
32 | microsoft/deberta-v2-xlarge,31710
33 | roberta-large,36816
34 | albert-xlarge-v2,37944
35 | xlm-clm-enfr-1024,38436
36 | facebook/bart-large,40386
37 | funnel-transformer/xlarge,41382
38 | xlm-roberta-large,46512
39 | flaubert/flaubert_small_cased,49722
40 | albert-xxlarge-v1,50664
41 | microsoft/deberta-xlarge,51192
42 | google/electra-large-generator,51684
43 | bert-large-cased,55356
44 | google/bigbird-roberta-base,57300
45 | albert-xlarge-v1,57672
46 | xlm-mlm-100-1280,58392
47 | google/electra-base-generator,59256
48 | microsoft/deberta-base,60906
49 | albert-large-v1,61908
50 | albert-xxlarge-v2,61914
51 | flaubert/flaubert_base_cased,66198
52 | microsoft/layoutlm-large-uncased,67536
53 | xlm-mlm-tlm-xnli15-1024,67728
54 | xlm-roberta-base,67752
55 | YituTech/conv-bert-medium-small,68676
56 | albert-large-v2,68946
57 | bert-base-multilingual-uncased,69090
58 | google/bigbird-roberta-large,71112
59 | 


--------------------------------------------------------------------------------
/evals/ray_tune/workloads/torchcv_list:
--------------------------------------------------------------------------------
  1 | DPN26()
  2 | ResNet18()
  3 | MobileNetV2(alpha=0.5)
  4 | VGG(vgg_block=11)
  5 | regnety004
  6 | rir_cifar10
  7 | DLA()
  8 | wrn16_10_cifar10
  9 | xdensenet40_2_k36_bc_cifar10
 10 | ShuffleNetV2(net_size=0.5)
 11 | efficientnet_b0
 12 | msdnet22_cifar10
 13 | DenseNet121()
 14 | DenseNet161()
 15 | DenseNet169()
 16 | DenseNet201()
 17 | DPN68()
 18 | DPN92()
 19 | DPN98()
 20 | DPN107()
 21 | SimpleDLA()
 22 | ResNet34()
 23 | ResNet50()
 24 | ResNet101()
 25 | ResNet152()
 26 | stochastic_depth_resnet18()
 27 | stochastic_depth_resnet34()
 28 | stochastic_depth_resnet50()
 29 | stochastic_depth_resnet101()
 30 | stochastic_depth_resnet152()
 31 | ResNeXt29_2x64d()
 32 | ResNeXt29_4x64d()
 33 | ResNeXt29_8x64d()
 34 | seresnet18()
 35 | seresnet34()
 36 | seresnet50()
 37 | seresnet101()
 38 | seresnet152()
 39 | preactresnet18()
 40 | preactresnet34()
 41 | preactresnet50()
 42 | preactresnet101()
 43 | preactresnet152()
 44 | ShuffleNetG2()
 45 | ShuffleNetG3()
 46 | ShuffleNetV2(net_size=1)
 47 | ShuffleNetV2(net_size=1.5)
 48 | ShuffleNetV2(net_size=2)
 49 | MobileNetV2(alpha=0.75)
 50 | MobileNetV2(alpha=1)
 51 | MobileNetV2(alpha=1.5)
 52 | MobileNetV2(alpha=2)
 53 | MobileNetV3(is_large=0, multiplier=0.5)
 54 | MobileNetV3(is_large=0, multiplier=0.75)
 55 | MobileNetV3(is_large=0, multiplier=1)
 56 | MobileNetV3(is_large=0, multiplier=1.25)
 57 | MobileNetV3(is_large=0, multiplier=1.5)
 58 | MobileNetV3(is_large=0, multiplier=2)
 59 | MobileNetV3(is_large=1, multiplier=0.5)
 60 | MobileNetV3(is_large=1, multiplier=0.75)
 61 | MobileNetV3(is_large=1, multiplier=1)
 62 | MobileNetV3(is_large=1, multiplier=1.25)
 63 | MobileNetV3(is_large=1, multiplier=1.5)
 64 | MobileNetV3(is_large=1, multiplier=2)
 65 | VGG(vgg_block=13)
 66 | VGG(vgg_block=16)
 67 | VGG(vgg_block=19)
 68 | VGG(vgg_block=11, use_bn=True)
 69 | VGG(vgg_block=13, use_bn=True)
 70 | VGG(vgg_block=16, use_bn=True)
 71 | VGG(vgg_block=19, use_bn=True)
 72 | ror3_110_cifar10
 73 | resnesta18
 74 | efficientnet_b4
 75 | resnet164bn_cifar10
 76 | preresnet20_cifar10
 77 | resnext272_1x64d_cifar10
 78 | diaresnet20_cifar10
 79 | regnetx320
 80 | pyramidnet272_a200_bn_cifar10
 81 | regnety002
 82 | wrn40_8_cifar10
 83 | regnety160
 84 | xdensenet40_2_k24_bc_cifar10
 85 | diapreresnet20_cifar10
 86 | resnext20_32x4d_cifar10
 87 | pyramidnet200_a240_bn_cifar10
 88 | regnety016
 89 | regnety008
 90 | resnext29_16x64d_cifar10
 91 | seresnet20_cifar10
 92 | regnetx016
 93 | resneta18
 94 | densenet100_k24_cifar10
 95 | sepreresnet20_cifar10
 96 | regnetx080
 97 | regnety080
 98 | resnetd50b
 99 | densenet100_k12_bc_cifar10
100 | resnesta152
101 | densenet40_k12_bc_cifar10
102 | resnext20_16x4d_cifar10
103 | diapreresnet1001_cifar10
104 | diapreresnet164bn_cifar10
105 | seresnet542bn_cifar10
106 | resnesta50
107 | regnetx160
108 | resnet110_cifar10
109 | resnesta269
110 | nin_cifar10
111 | regnetx008
112 | seresnet110_cifar10
113 | seresnet1202_cifar10
114 | wrn28_10_cifar10
115 | sepreresnet272bn_cifar10
116 | regnety006
117 | regnetx040
118 | regnetx006
119 | efficientnet_b3
120 | sepreresnet542bn_cifar10
121 | regnety040
122 | resneta101b
123 | preresnet1202_cifar10
124 | efficientnet_b8
125 | seresnet272bn_cifar10
126 | diaresnet110_cifar10
127 | regnetx002
128 | regnetx064
129 | diaresnet164bn_cifar10
130 | densenet40_k36_bc_cifar10
131 | regnety120
132 | prnet
133 | efficientnet_b6
134 | preresnet110_cifar10
135 | sepreresnet1001_cifar10
136 | resnesta101
137 | diaresnet1202_cifar10
138 | resnetd152b
139 | resnet272bn_cifar10
140 | regnetx004
141 | resneta10
142 | resnext29_32x4d_cifar10
143 | pyramidnet110_a84_cifar10
144 | efficientnet_b7
145 | diapreresnet1202_cifar10
146 | diapreresnet110_cifar10
147 | densenet190_k40_bc_cifar10
148 | resnext20_32x2d_cifar10
149 | densenet40_k24_bc_cifar10
150 | sepreresnet1202_cifar10
151 | pyramidnet236_a220_bn_cifar10
152 | resneta50b
153 | resnetabc14b
154 | resnesta200
155 | resnet20_cifar10
156 | resnet14_cifar10
157 | regnety032
158 | efficientnet_b5
159 | regnetx120
160 | resnet56_cifar10
161 | resnext272_2x32d_cifar10
162 | diaresnet1001_cifar10
163 | sepreresnet110_cifar10
164 | diaresnet56_cifar10
165 | preresnet56_cifar10
166 | seresnet1001_cifar10
167 | resnestabc26
168 | resnetd101b
169 | resnet1001_cifar10
170 | preresnet1001_cifar10
171 | resnestabc14
172 | fractalnet_cifar10
173 | resneta152b
174 | pyramidnet110_a48_cifar10
175 | seresnet56_cifar10
176 | sepreresnet164bn_cifar10
177 | densenet250_k24_bc_cifar10
178 | diapreresnet56_cifar10
179 | wrn20_10_32bit_cifar10
180 | sepreresnet56_cifar10
181 | ror3_56_cifar10
182 | pyramidnet164_a270_bn_cifar10
183 | densenet40_k12_cifar10
184 | resnet1202_cifar10
185 | efficientnet_b2
186 | pyramidnet110_a270_cifar10
187 | ror3_164_cifar10
188 | preresnet542bn_cifar10
189 | seresnet164bn_cifar10
190 | regnety320
191 | regnetx032
192 | resnet542bn_cifar10
193 | regnety064
194 | preresnet164bn_cifar10
195 | densenet100_k12_cifar10
196 | preresnet272bn_cifar10
197 | efficientnet_b1
198 | 


--------------------------------------------------------------------------------
/evals/ray_tune/workloads/workload-1.csv:
--------------------------------------------------------------------------------
  1 | name,time,application,num_replicas,batch_size
  2 | cifar10-0,107,cifar10,1,128
  3 | deepspeech2-1,110,deepspeech2,1,20
  4 | ncf-2,135,ncf,1,256
  5 | deepspeech2-3,262,deepspeech2,1,20
  6 | cifar10-4,356,cifar10,4,512
  7 | ncf-5,839,ncf,1,256
  8 | ncf-6,1131,ncf,1,256
  9 | ncf-7,1282,ncf,1,256
 10 | cifar10-8,1332,cifar10,1,128
 11 | ncf-9,1453,ncf,1,256
 12 | cifar10-10,1496,cifar10,1,128
 13 | imagenet-11,1604,imagenet,8,1600
 14 | cifar10-12,1611,cifar10,1,128
 15 | cifar10-13,1740,cifar10,1,128
 16 | cifar10-14,1778,cifar10,1,128
 17 | cifar10-15,1932,cifar10,1,128
 18 | ncf-16,2308,ncf,1,256
 19 | cifar10-17,2354,cifar10,1,128
 20 | ncf-18,2420,ncf,1,256
 21 | ncf-19,2615,ncf,1,256
 22 | ncf-20,2862,ncf,1,256
 23 | ncf-21,3488,ncf,1,256
 24 | deepspeech2-22,3917,deepspeech2,1,20
 25 | ncf-23,4277,ncf,1,256
 26 | cifar10-24,4326,cifar10,1,128
 27 | cifar10-25,4407,cifar10,1,128
 28 | ncf-26,4757,ncf,1,256
 29 | bert-27,5813,bert,1,12
 30 | cifar10-28,5841,cifar10,1,128
 31 | ncf-29,5880,ncf,8,2048
 32 | ncf-30,6103,ncf,1,256
 33 | yolov3-31,6357,yolov3,4,32
 34 | ncf-32,6487,ncf,1,256
 35 | cifar10-33,6701,cifar10,1,128
 36 | cifar10-34,6732,cifar10,1,128
 37 | cifar10-35,6899,cifar10,1,128
 38 | cifar10-36,7025,cifar10,1,128
 39 | ncf-37,7332,ncf,1,256
 40 | ncf-38,7333,ncf,1,256
 41 | ncf-39,7744,ncf,1,256
 42 | cifar10-40,8223,cifar10,1,128
 43 | cifar10-41,8817,cifar10,1,128
 44 | ncf-42,8825,ncf,1,256
 45 | cifar10-43,9202,cifar10,1,128
 46 | cifar10-44,10069,cifar10,1,128
 47 | ncf-45,10477,ncf,1,256
 48 | cifar10-46,10648,cifar10,1,128
 49 | cifar10-47,10819,cifar10,1,128
 50 | cifar10-48,10932,cifar10,1,128
 51 | cifar10-49,11207,cifar10,1,128
 52 | ncf-50,11331,ncf,1,256
 53 | yolov3-51,11382,yolov3,1,8
 54 | yolov3-52,11528,yolov3,1,8
 55 | cifar10-53,12608,cifar10,1,128
 56 | cifar10-54,12683,cifar10,1,128
 57 | ncf-55,13223,ncf,1,256
 58 | cifar10-56,13262,cifar10,1,128
 59 | cifar10-57,13276,cifar10,1,128
 60 | ncf-58,13307,ncf,1,256
 61 | ncf-59,13608,ncf,1,256
 62 | deepspeech2-60,13759,deepspeech2,1,20
 63 | ncf-61,13806,ncf,1,256
 64 | ncf-62,13891,ncf,1,256
 65 | cifar10-63,13927,cifar10,1,128
 66 | ncf-64,14065,ncf,1,256
 67 | cifar10-65,14220,cifar10,1,128
 68 | ncf-66,14242,ncf,1,256
 69 | cifar10-67,14265,cifar10,1,128
 70 | ncf-68,14439,ncf,1,256
 71 | cifar10-69,14448,cifar10,1,128
 72 | cifar10-70,14524,cifar10,1,128
 73 | ncf-71,14533,ncf,1,256
 74 | ncf-72,14735,ncf,1,256
 75 | deepspeech2-73,14773,deepspeech2,1,20
 76 | ncf-74,14878,ncf,1,256
 77 | cifar10-75,14906,cifar10,1,128
 78 | ncf-76,15010,ncf,1,256
 79 | cifar10-77,15146,cifar10,1,128
 80 | bert-78,15172,bert,1,12
 81 | cifar10-79,15262,cifar10,1,128
 82 | cifar10-80,15602,cifar10,1,128
 83 | ncf-81,15603,ncf,1,256
 84 | ncf-82,15715,ncf,1,256
 85 | cifar10-83,15831,cifar10,1,128
 86 | cifar10-84,15944,cifar10,1,128
 87 | bert-85,15947,bert,1,12
 88 | cifar10-86,16331,cifar10,1,128
 89 | ncf-87,16336,ncf,1,256
 90 | ncf-88,16834,ncf,1,256
 91 | ncf-89,17168,ncf,1,256
 92 | cifar10-90,17553,cifar10,1,128
 93 | ncf-91,17636,ncf,1,256
 94 | cifar10-92,17739,cifar10,1,128
 95 | ncf-93,17911,ncf,1,256
 96 | deepspeech2-94,17987,deepspeech2,1,20
 97 | ncf-95,18115,ncf,1,256
 98 | deepspeech2-96,18242,deepspeech2,4,80
 99 | deepspeech2-97,18488,deepspeech2,1,20
100 | cifar10-98,18573,cifar10,1,128
101 | ncf-99,18660,ncf,1,256
102 | ncf-100,18719,ncf,1,256
103 | cifar10-101,18955,cifar10,1,128
104 | bert-102,18978,bert,1,12
105 | ncf-103,19331,ncf,1,256
106 | deepspeech2-104,19353,deepspeech2,1,20
107 | cifar10-105,19362,cifar10,1,128
108 | cifar10-106,19395,cifar10,1,128
109 | ncf-107,19436,ncf,1,256
110 | ncf-108,19464,ncf,1,256
111 | ncf-109,19658,ncf,1,256
112 | deepspeech2-110,19750,deepspeech2,1,20
113 | cifar10-111,19760,cifar10,1,128
114 | ncf-112,19916,ncf,1,256
115 | ncf-113,20014,ncf,1,256
116 | ncf-114,20104,ncf,1,256
117 | cifar10-115,20156,cifar10,1,128
118 | cifar10-116,20310,cifar10,1,128
119 | deepspeech2-117,20440,deepspeech2,1,20
120 | cifar10-118,20517,cifar10,1,128
121 | bert-119,20529,bert,1,12
122 | ncf-120,20602,ncf,1,256
123 | ncf-121,20701,ncf,1,256
124 | cifar10-122,21069,cifar10,1,128
125 | ncf-123,21084,ncf,1,256
126 | ncf-124,21132,ncf,1,256
127 | cifar10-125,21170,cifar10,1,128
128 | imagenet-126,21308,imagenet,8,1600
129 | ncf-127,21472,ncf,1,256
130 | cifar10-128,21664,cifar10,1,128
131 | cifar10-129,21733,cifar10,1,128
132 | bert-130,21744,bert,1,12
133 | cifar10-131,21928,cifar10,1,128
134 | cifar10-132,22185,cifar10,1,128
135 | ncf-133,22311,ncf,1,256
136 | cifar10-134,23533,cifar10,1,128
137 | ncf-135,23642,ncf,1,256
138 | cifar10-136,23646,cifar10,1,128
139 | ncf-137,24116,ncf,1,256
140 | cifar10-138,24129,cifar10,1,128
141 | ncf-139,24480,ncf,1,256
142 | cifar10-140,24554,cifar10,1,128
143 | ncf-141,24823,ncf,1,256
144 | cifar10-142,25003,cifar10,1,128
145 | ncf-143,25117,ncf,1,256
146 | ncf-144,25240,ncf,1,256
147 | cifar10-145,25555,cifar10,4,512
148 | cifar10-146,25682,cifar10,1,128
149 | cifar10-147,25691,cifar10,1,128
150 | ncf-148,25716,ncf,1,256
151 | ncf-149,25727,ncf,1,256
152 | ncf-150,25915,ncf,1,256
153 | cifar10-151,25931,cifar10,1,128
154 | bert-152,25971,bert,1,12
155 | ncf-153,26532,ncf,1,256
156 | cifar10-154,26599,cifar10,1,128
157 | deepspeech2-155,26752,deepspeech2,1,20
158 | imagenet-156,26786,imagenet,8,1600
159 | imagenet-157,27630,imagenet,16,3200
160 | ncf-158,27694,ncf,1,256
161 | ncf-159,27695,ncf,4,1024
162 | 


--------------------------------------------------------------------------------
/evals/ray_tune/workloads/workload-2.csv:
--------------------------------------------------------------------------------
  1 | name,time,application,num_replicas,batch_size
  2 | cifar10-0,498,cifar10,1,128
  3 | ncf-1,1427,ncf,1,256
  4 | ncf-2,1559,ncf,1,256
  5 | cifar10-3,1597,cifar10,1,128
  6 | cifar10-4,1642,cifar10,1,128
  7 | ncf-5,1832,ncf,1,256
  8 | ncf-6,1915,ncf,1,256
  9 | imagenet-7,2173,imagenet,8,1600
 10 | ncf-8,2420,ncf,1,256
 11 | ncf-9,2576,ncf,1,256
 12 | imagenet-10,3075,imagenet,8,1600
 13 | cifar10-11,3117,cifar10,1,128
 14 | ncf-12,3118,ncf,1,256
 15 | cifar10-13,3341,cifar10,1,128
 16 | ncf-14,3434,ncf,1,256
 17 | ncf-15,3565,ncf,1,256
 18 | cifar10-16,3730,cifar10,1,128
 19 | cifar10-17,3756,cifar10,1,128
 20 | ncf-18,3786,ncf,1,256
 21 | cifar10-19,4101,cifar10,1,128
 22 | cifar10-20,4221,cifar10,1,128
 23 | ncf-21,4265,ncf,1,256
 24 | ncf-22,4276,ncf,1,256
 25 | ncf-23,4559,ncf,1,256
 26 | cifar10-24,4732,cifar10,1,128
 27 | cifar10-25,4843,cifar10,1,128
 28 | ncf-26,4931,ncf,1,256
 29 | ncf-27,4960,ncf,1,256
 30 | yolov3-28,4970,yolov3,4,32
 31 | cifar10-29,5533,cifar10,1,128
 32 | yolov3-30,5676,yolov3,1,8
 33 | bert-31,5750,bert,1,12
 34 | ncf-32,5776,ncf,1,256
 35 | cifar10-33,5799,cifar10,1,128
 36 | cifar10-34,7331,cifar10,1,128
 37 | cifar10-35,8448,cifar10,1,128
 38 | cifar10-36,8917,cifar10,1,128
 39 | cifar10-37,9075,cifar10,1,128
 40 | ncf-38,9635,ncf,4,1024
 41 | cifar10-39,10875,cifar10,1,128
 42 | ncf-40,10881,ncf,1,256
 43 | ncf-41,10923,ncf,4,1024
 44 | deepspeech2-42,11268,deepspeech2,1,20
 45 | ncf-43,11418,ncf,1,256
 46 | ncf-44,12114,ncf,1,256
 47 | cifar10-45,12294,cifar10,1,128
 48 | ncf-46,12731,ncf,1,256
 49 | cifar10-47,13331,cifar10,1,128
 50 | cifar10-48,13332,cifar10,1,128
 51 | ncf-49,13414,ncf,1,256
 52 | ncf-50,13526,ncf,1,256
 53 | cifar10-51,13746,cifar10,1,128
 54 | yolov3-52,13875,yolov3,4,32
 55 | cifar10-53,14230,cifar10,1,128
 56 | cifar10-54,14305,cifar10,1,128
 57 | cifar10-55,14372,cifar10,1,128
 58 | ncf-56,14554,ncf,1,256
 59 | ncf-57,14559,ncf,1,256
 60 | cifar10-58,14681,cifar10,1,128
 61 | yolov3-59,15025,yolov3,8,64
 62 | ncf-60,15101,ncf,1,256
 63 | deepspeech2-61,15168,deepspeech2,1,20
 64 | cifar10-62,15196,cifar10,1,128
 65 | ncf-63,15543,ncf,1,256
 66 | yolov3-64,15698,yolov3,1,8
 67 | ncf-65,15795,ncf,1,256
 68 | ncf-66,15835,ncf,1,256
 69 | cifar10-67,16005,cifar10,1,128
 70 | ncf-68,16064,ncf,1,256
 71 | deepspeech2-69,16120,deepspeech2,1,20
 72 | deepspeech2-70,16254,deepspeech2,1,20
 73 | ncf-71,16283,ncf,1,256
 74 | bert-72,16418,bert,1,12
 75 | ncf-73,16519,ncf,1,256
 76 | yolov3-74,16696,yolov3,1,8
 77 | cifar10-75,16699,cifar10,1,128
 78 | ncf-76,16710,ncf,1,256
 79 | ncf-77,16917,ncf,1,256
 80 | ncf-78,17038,ncf,1,256
 81 | imagenet-79,17294,imagenet,8,1600
 82 | cifar10-80,17503,cifar10,1,128
 83 | ncf-81,17532,ncf,1,256
 84 | deepspeech2-82,17630,deepspeech2,1,20
 85 | cifar10-83,17758,cifar10,1,128
 86 | deepspeech2-84,17839,deepspeech2,1,20
 87 | cifar10-85,17841,cifar10,1,128
 88 | cifar10-86,17932,cifar10,1,128
 89 | bert-87,17980,bert,1,12
 90 | deepspeech2-88,18028,deepspeech2,1,20
 91 | cifar10-89,18037,cifar10,1,128
 92 | cifar10-90,18041,cifar10,1,128
 93 | ncf-91,18133,ncf,1,256
 94 | deepspeech2-92,18152,deepspeech2,1,20
 95 | cifar10-93,18381,cifar10,1,128
 96 | cifar10-94,18401,cifar10,1,128
 97 | cifar10-95,18624,cifar10,1,128
 98 | deepspeech2-96,18696,deepspeech2,1,20
 99 | ncf-97,18732,ncf,1,256
100 | ncf-98,18836,ncf,1,256
101 | cifar10-99,18877,cifar10,1,128
102 | cifar10-100,18925,cifar10,1,128
103 | bert-101,18983,bert,1,12
104 | deepspeech2-102,19055,deepspeech2,1,20
105 | cifar10-103,19239,cifar10,1,128
106 | cifar10-104,19284,cifar10,1,128
107 | ncf-105,19340,ncf,1,256
108 | ncf-106,19382,ncf,1,256
109 | cifar10-107,19537,cifar10,1,128
110 | deepspeech2-108,19590,deepspeech2,1,20
111 | cifar10-109,19615,cifar10,1,128
112 | cifar10-110,20086,cifar10,1,128
113 | cifar10-111,20129,cifar10,1,128
114 | deepspeech2-112,20481,deepspeech2,1,20
115 | bert-113,20652,bert,1,12
116 | ncf-114,20803,ncf,1,256
117 | ncf-115,20804,ncf,1,256
118 | cifar10-116,20823,cifar10,1,128
119 | deepspeech2-117,20879,deepspeech2,4,80
120 | bert-118,20907,bert,1,12
121 | cifar10-119,20932,cifar10,1,128
122 | cifar10-120,21016,cifar10,1,128
123 | cifar10-121,21034,cifar10,1,128
124 | deepspeech2-122,21135,deepspeech2,1,20
125 | cifar10-123,21212,cifar10,1,128
126 | deepspeech2-124,21303,deepspeech2,1,20
127 | ncf-125,21332,ncf,1,256
128 | ncf-126,21512,ncf,1,256
129 | cifar10-127,21715,cifar10,1,128
130 | ncf-128,21720,ncf,1,256
131 | cifar10-129,21777,cifar10,1,128
132 | ncf-130,21787,ncf,1,256
133 | bert-131,22492,bert,1,12
134 | cifar10-132,22611,cifar10,1,128
135 | ncf-133,22713,ncf,1,256
136 | bert-134,23230,bert,1,12
137 | cifar10-135,23548,cifar10,1,128
138 | cifar10-136,23929,cifar10,1,128
139 | cifar10-137,23967,cifar10,1,128
140 | ncf-138,23981,ncf,1,256
141 | ncf-139,24135,ncf,1,256
142 | deepspeech2-140,24156,deepspeech2,1,20
143 | ncf-141,24250,ncf,1,256
144 | cifar10-142,24744,cifar10,1,128
145 | ncf-143,24835,ncf,1,256
146 | yolov3-144,25001,yolov3,2,16
147 | cifar10-145,25041,cifar10,1,128
148 | cifar10-146,25079,cifar10,1,128
149 | cifar10-147,25166,cifar10,1,128
150 | cifar10-148,25262,cifar10,1,128
151 | ncf-149,25573,ncf,1,256
152 | cifar10-150,25587,cifar10,1,128
153 | ncf-151,25591,ncf,1,256
154 | cifar10-152,25876,cifar10,1,128
155 | ncf-153,25901,ncf,1,256
156 | deepspeech2-154,25952,deepspeech2,1,20
157 | ncf-155,26190,ncf,1,256
158 | ncf-156,26311,ncf,1,256
159 | deepspeech2-157,26533,deepspeech2,1,20
160 | cifar10-158,26873,cifar10,1,128
161 | ncf-159,27131,ncf,1,256
162 | 


--------------------------------------------------------------------------------
/evals/ray_tune/workloads/workload-3.csv:
--------------------------------------------------------------------------------
  1 | name,time,application,num_replicas,batch_size
  2 | ncf-0,791,ncf,1,256
  3 | cifar10-1,1151,cifar10,1,128
  4 | cifar10-2,1315,cifar10,1,128
  5 | ncf-3,1335,ncf,1,256
  6 | deepspeech2-4,1386,deepspeech2,1,20
  7 | ncf-5,1437,ncf,1,256
  8 | ncf-6,1753,ncf,1,256
  9 | cifar10-7,1855,cifar10,1,128
 10 | cifar10-8,1937,cifar10,1,128
 11 | ncf-9,1955,ncf,1,256
 12 | bert-10,2957,bert,1,12
 13 | ncf-11,3114,ncf,1,256
 14 | cifar10-12,3143,cifar10,1,128
 15 | cifar10-13,3159,cifar10,1,128
 16 | cifar10-14,3189,cifar10,1,128
 17 | cifar10-15,3701,cifar10,8,1024
 18 | ncf-16,3789,ncf,1,256
 19 | ncf-17,4232,ncf,1,256
 20 | ncf-18,4252,ncf,1,256
 21 | ncf-19,4305,ncf,1,256
 22 | cifar10-20,4966,cifar10,1,128
 23 | cifar10-21,4993,cifar10,1,128
 24 | ncf-22,5125,ncf,1,256
 25 | yolov3-23,5439,yolov3,1,8
 26 | cifar10-24,5533,cifar10,1,128
 27 | cifar10-25,5693,cifar10,1,128
 28 | cifar10-26,6029,cifar10,1,128
 29 | ncf-27,6117,ncf,1,256
 30 | deepspeech2-28,6211,deepspeech2,1,20
 31 | deepspeech2-29,6570,deepspeech2,8,160
 32 | cifar10-30,6585,cifar10,1,128
 33 | ncf-31,6732,ncf,1,256
 34 | ncf-32,7188,ncf,1,256
 35 | ncf-33,7331,ncf,1,256
 36 | ncf-34,7332,ncf,1,256
 37 | ncf-35,7736,ncf,1,256
 38 | deepspeech2-36,8268,deepspeech2,1,20
 39 | cifar10-37,8635,cifar10,1,128
 40 | ncf-38,9115,ncf,1,256
 41 | ncf-39,10315,ncf,1,256
 42 | cifar10-40,10666,cifar10,1,128
 43 | yolov3-41,10785,yolov3,1,8
 44 | cifar10-42,10917,cifar10,1,128
 45 | ncf-43,11281,ncf,1,256
 46 | yolov3-44,11314,yolov3,1,8
 47 | yolov3-45,11487,yolov3,4,32
 48 | yolov3-46,11554,yolov3,8,64
 49 | ncf-47,11582,ncf,1,256
 50 | cifar10-48,11929,cifar10,1,128
 51 | cifar10-49,12117,cifar10,1,128
 52 | yolov3-50,12282,yolov3,4,32
 53 | cifar10-51,12660,cifar10,1,128
 54 | ncf-52,13071,ncf,1,256
 55 | ncf-53,13118,ncf,1,256
 56 | deepspeech2-54,13449,deepspeech2,1,20
 57 | ncf-55,13458,ncf,1,256
 58 | ncf-56,13497,ncf,1,256
 59 | ncf-57,13632,ncf,1,256
 60 | ncf-58,13674,ncf,1,256
 61 | cifar10-59,13712,cifar10,1,128
 62 | ncf-60,13958,ncf,1,256
 63 | cifar10-61,14020,cifar10,1,128
 64 | cifar10-62,14059,cifar10,1,128
 65 | cifar10-63,14141,cifar10,1,128
 66 | cifar10-64,14172,cifar10,1,128
 67 | ncf-65,14322,ncf,1,256
 68 | ncf-66,14477,ncf,1,256
 69 | cifar10-67,14478,cifar10,1,128
 70 | ncf-68,14515,ncf,1,256
 71 | ncf-69,14536,ncf,1,256
 72 | bert-70,14711,bert,1,12
 73 | cifar10-71,14926,cifar10,1,128
 74 | yolov3-72,15569,yolov3,1,8
 75 | ncf-73,16581,ncf,1,256
 76 | ncf-74,16935,ncf,1,256
 77 | ncf-75,17062,ncf,1,256
 78 | bert-76,17166,bert,1,12
 79 | ncf-77,17552,ncf,1,256
 80 | bert-78,17646,bert,1,12
 81 | ncf-79,17804,ncf,1,256
 82 | deepspeech2-80,17817,deepspeech2,1,20
 83 | ncf-81,17941,ncf,1,256
 84 | deepspeech2-82,17968,deepspeech2,1,20
 85 | cifar10-83,18116,cifar10,1,128
 86 | bert-84,18180,bert,1,12
 87 | cifar10-85,18227,cifar10,1,128
 88 | yolov3-86,18756,yolov3,1,8
 89 | bert-87,18922,bert,1,12
 90 | deepspeech2-88,18929,deepspeech2,1,20
 91 | deepspeech2-89,18985,deepspeech2,1,20
 92 | bert-90,19037,bert,1,12
 93 | ncf-91,19074,ncf,1,256
 94 | deepspeech2-92,19298,deepspeech2,1,20
 95 | ncf-93,19420,ncf,1,256
 96 | ncf-94,19434,ncf,1,256
 97 | deepspeech2-95,19448,deepspeech2,1,20
 98 | cifar10-96,19559,cifar10,1,128
 99 | cifar10-97,19594,cifar10,1,128
100 | ncf-98,19847,ncf,1,256
101 | ncf-99,20105,ncf,1,256
102 | yolov3-100,20545,yolov3,16,128
103 | ncf-101,20704,ncf,1,256
104 | cifar10-102,20728,cifar10,1,128
105 | cifar10-103,20855,cifar10,1,128
106 | ncf-104,20870,ncf,1,256
107 | ncf-105,20873,ncf,1,256
108 | cifar10-106,20900,cifar10,1,128
109 | cifar10-107,20924,cifar10,1,128
110 | cifar10-108,20992,cifar10,1,128
111 | yolov3-109,21071,yolov3,1,8
112 | ncf-110,21115,ncf,1,256
113 | ncf-111,21177,ncf,1,256
114 | cifar10-112,21206,cifar10,1,128
115 | ncf-113,21208,ncf,1,256
116 | ncf-114,21550,ncf,1,256
117 | cifar10-115,21667,cifar10,1,128
118 | bert-116,21723,bert,1,12
119 | yolov3-117,21919,yolov3,1,8
120 | cifar10-118,21920,cifar10,1,128
121 | ncf-119,22012,ncf,1,256
122 | cifar10-120,22468,cifar10,1,128
123 | cifar10-121,22932,cifar10,1,128
124 | ncf-122,23312,ncf,1,256
125 | cifar10-123,23531,cifar10,1,128
126 | cifar10-124,23660,cifar10,1,128
127 | cifar10-125,23781,cifar10,1,128
128 | cifar10-126,23816,cifar10,1,128
129 | ncf-127,23882,ncf,1,256
130 | ncf-128,24250,ncf,1,256
131 | cifar10-129,24300,cifar10,1,128
132 | ncf-130,24435,ncf,1,256
133 | ncf-131,24607,ncf,1,256
134 | cifar10-132,24732,cifar10,1,128
135 | ncf-133,24803,ncf,1,256
136 | ncf-134,24808,ncf,1,256
137 | cifar10-135,25188,cifar10,1,128
138 | ncf-136,25216,ncf,1,256
139 | ncf-137,25329,ncf,1,256
140 | ncf-138,25558,ncf,1,256
141 | ncf-139,25683,ncf,1,256
142 | bert-140,25741,bert,1,12
143 | cifar10-141,25748,cifar10,1,128
144 | deepspeech2-142,25856,deepspeech2,1,20
145 | yolov3-143,25896,yolov3,4,32
146 | yolov3-144,25960,yolov3,1,8
147 | ncf-145,26037,ncf,1,256
148 | deepspeech2-146,26188,deepspeech2,1,20
149 | bert-147,26311,bert,1,12
150 | yolov3-148,26322,yolov3,4,32
151 | bert-149,26381,bert,1,12
152 | ncf-150,26406,ncf,1,256
153 | ncf-151,26436,ncf,1,256
154 | deepspeech2-152,26464,deepspeech2,1,20
155 | bert-153,26644,bert,1,12
156 | bert-154,26871,bert,1,12
157 | bert-155,27106,bert,1,12
158 | cifar10-156,27115,cifar10,1,128
159 | ncf-157,28335,ncf,1,256
160 | ncf-158,28388,ncf,1,256
161 | cifar10-159,28739,cifar10,1,128
162 | 


--------------------------------------------------------------------------------
/evals/ray_tune/workloads/workload-4.csv:
--------------------------------------------------------------------------------
  1 | name,time,application,num_replicas,batch_size
  2 | bert-0,259,bert,1,12
  3 | cifar10-1,289,cifar10,1,128
  4 | cifar10-2,447,cifar10,1,128
  5 | deepspeech2-3,543,deepspeech2,1,20
  6 | ncf-4,1107,ncf,1,256
  7 | cifar10-5,1314,cifar10,1,128
  8 | ncf-6,1403,ncf,1,256
  9 | ncf-7,1543,ncf,1,256
 10 | ncf-8,1760,ncf,1,256
 11 | ncf-9,1873,ncf,1,256
 12 | cifar10-10,1915,cifar10,1,128
 13 | ncf-11,1916,ncf,1,256
 14 | cifar10-12,1917,cifar10,1,128
 15 | imagenet-13,2245,imagenet,16,3200
 16 | cifar10-14,2353,cifar10,1,128
 17 | ncf-15,3050,ncf,1,256
 18 | cifar10-16,3087,cifar10,1,128
 19 | cifar10-17,3131,cifar10,1,128
 20 | cifar10-18,3178,cifar10,1,128
 21 | cifar10-19,3261,cifar10,1,128
 22 | cifar10-20,3692,cifar10,1,128
 23 | ncf-21,4192,ncf,1,256
 24 | cifar10-22,4252,cifar10,1,128
 25 | cifar10-23,4276,cifar10,1,128
 26 | cifar10-24,4501,cifar10,1,128
 27 | ncf-25,4815,ncf,1,256
 28 | cifar10-26,5334,cifar10,1,128
 29 | ncf-27,5515,ncf,1,256
 30 | cifar10-28,5531,cifar10,1,128
 31 | ncf-29,5729,ncf,1,256
 32 | cifar10-30,5801,cifar10,1,128
 33 | ncf-31,6029,ncf,1,256
 34 | cifar10-32,6117,cifar10,1,128
 35 | ncf-33,6296,ncf,1,256
 36 | deepspeech2-34,6379,deepspeech2,1,20
 37 | cifar10-35,6668,cifar10,1,128
 38 | cifar10-36,6739,cifar10,1,128
 39 | ncf-37,6747,ncf,1,256
 40 | cifar10-38,6784,cifar10,1,128
 41 | cifar10-39,7062,cifar10,1,128
 42 | imagenet-40,7161,imagenet,8,1600
 43 | ncf-41,7623,ncf,1,256
 44 | cifar10-42,7922,cifar10,1,128
 45 | bert-43,8293,bert,1,12
 46 | cifar10-44,8444,cifar10,1,128
 47 | ncf-45,8916,ncf,1,256
 48 | cifar10-46,9152,cifar10,1,128
 49 | ncf-47,10073,ncf,1,256
 50 | cifar10-48,10578,cifar10,1,128
 51 | ncf-49,10916,ncf,1,256
 52 | yolov3-50,10930,yolov3,1,8
 53 | cifar10-51,10932,cifar10,1,128
 54 | ncf-52,10989,ncf,1,256
 55 | ncf-53,11078,ncf,1,256
 56 | ncf-54,11188,ncf,1,256
 57 | cifar10-55,11333,cifar10,1,128
 58 | ncf-56,11354,ncf,1,256
 59 | imagenet-57,11592,imagenet,16,3200
 60 | deepspeech2-58,11769,deepspeech2,1,20
 61 | ncf-59,12180,ncf,1,256
 62 | bert-60,12291,bert,1,12
 63 | ncf-61,13354,ncf,1,256
 64 | ncf-62,13360,ncf,1,256
 65 | cifar10-63,13468,cifar10,1,128
 66 | cifar10-64,13515,cifar10,1,128
 67 | ncf-65,13568,ncf,1,256
 68 | bert-66,13616,bert,1,12
 69 | cifar10-67,14156,cifar10,1,128
 70 | ncf-68,14213,ncf,1,256
 71 | cifar10-69,14228,cifar10,1,128
 72 | ncf-70,14372,ncf,1,256
 73 | ncf-71,14426,ncf,1,256
 74 | cifar10-72,14459,cifar10,1,128
 75 | cifar10-73,14550,cifar10,1,128
 76 | ncf-74,14855,ncf,1,256
 77 | cifar10-75,15460,cifar10,1,128
 78 | ncf-76,15550,ncf,1,256
 79 | ncf-77,15799,ncf,1,256
 80 | deepspeech2-78,15807,deepspeech2,4,80
 81 | ncf-79,16245,ncf,1,256
 82 | ncf-80,16346,ncf,1,256
 83 | cifar10-81,16430,cifar10,1,128
 84 | ncf-82,17026,ncf,1,256
 85 | ncf-83,17035,ncf,1,256
 86 | ncf-84,17515,ncf,1,256
 87 | ncf-85,17565,ncf,1,256
 88 | bert-86,17758,bert,1,12
 89 | ncf-87,17801,ncf,1,256
 90 | deepspeech2-88,17829,deepspeech2,1,20
 91 | deepspeech2-89,17904,deepspeech2,1,20
 92 | ncf-90,18105,ncf,1,256
 93 | cifar10-91,18202,cifar10,1,128
 94 | ncf-92,18391,ncf,1,256
 95 | deepspeech2-93,18616,deepspeech2,1,20
 96 | bert-94,18618,bert,1,12
 97 | ncf-95,18719,ncf,1,256
 98 | cifar10-96,18771,cifar10,1,128
 99 | cifar10-97,18810,cifar10,1,128
100 | ncf-98,18870,ncf,1,256
101 | ncf-99,18870,ncf,1,256
102 | cifar10-100,18942,cifar10,1,128
103 | ncf-101,19029,ncf,1,256
104 | ncf-102,19052,ncf,1,256
105 | ncf-103,19160,ncf,1,256
106 | ncf-104,19213,ncf,1,256
107 | deepspeech2-105,19267,deepspeech2,1,20
108 | imagenet-106,19368,imagenet,8,1600
109 | ncf-107,19382,ncf,1,256
110 | cifar10-108,19472,cifar10,1,128
111 | ncf-109,19698,ncf,1,256
112 | cifar10-110,19887,cifar10,1,128
113 | cifar10-111,19932,cifar10,1,128
114 | cifar10-112,19946,cifar10,1,128
115 | ncf-113,20043,ncf,1,256
116 | deepspeech2-114,20103,deepspeech2,1,20
117 | cifar10-115,20197,cifar10,1,128
118 | cifar10-116,20379,cifar10,1,128
119 | ncf-117,20454,ncf,1,256
120 | cifar10-118,20557,cifar10,1,128
121 | cifar10-119,20613,cifar10,1,128
122 | cifar10-120,20712,cifar10,1,128
123 | cifar10-121,21177,cifar10,1,128
124 | cifar10-122,21193,cifar10,1,128
125 | cifar10-123,21481,cifar10,1,128
126 | deepspeech2-124,21758,deepspeech2,1,20
127 | ncf-125,21810,ncf,1,256
128 | ncf-126,21815,ncf,1,256
129 | cifar10-127,22048,cifar10,1,128
130 | cifar10-128,22122,cifar10,1,128
131 | deepspeech2-129,22288,deepspeech2,1,20
132 | ncf-130,22535,ncf,1,256
133 | yolov3-131,22647,yolov3,2,16
134 | ncf-132,22721,ncf,1,256
135 | ncf-133,22915,ncf,1,256
136 | ncf-134,23332,ncf,1,256
137 | yolov3-135,23498,yolov3,4,32
138 | ncf-136,23660,ncf,1,256
139 | bert-137,24110,bert,1,12
140 | ncf-138,24816,ncf,1,256
141 | ncf-139,24884,ncf,1,256
142 | cifar10-140,24934,cifar10,1,128
143 | ncf-141,25054,ncf,1,256
144 | deepspeech2-142,25145,deepspeech2,1,20
145 | ncf-143,25254,ncf,1,256
146 | ncf-144,25726,ncf,1,256
147 | cifar10-145,25750,cifar10,1,128
148 | ncf-146,25762,ncf,1,256
149 | ncf-147,25782,ncf,1,256
150 | ncf-148,25915,ncf,1,256
151 | ncf-149,26014,ncf,1,256
152 | cifar10-150,26412,cifar10,1,128
153 | ncf-151,26525,ncf,1,256
154 | cifar10-152,26530,cifar10,1,128
155 | deepspeech2-153,26599,deepspeech2,1,20
156 | yolov3-154,26756,yolov3,4,32
157 | deepspeech2-155,27019,deepspeech2,1,20
158 | yolov3-156,27077,yolov3,4,32
159 | yolov3-157,27132,yolov3,1,8
160 | cifar10-158,27403,cifar10,1,128
161 | cifar10-159,27731,cifar10,1,128
162 | 


--------------------------------------------------------------------------------
/evals/ray_tune/workloads/workload-5.csv:
--------------------------------------------------------------------------------
  1 | name,time,application,num_replicas,batch_size
  2 | cifar10-0,16,cifar10,1,128
  3 | yolov3-1,167,yolov3,4,32
  4 | cifar10-2,181,cifar10,1,128
  5 | cifar10-3,582,cifar10,1,128
  6 | bert-4,1133,bert,1,12
  7 | ncf-5,1286,ncf,1,256
  8 | cifar10-6,1296,cifar10,1,128
  9 | cifar10-7,1433,cifar10,1,128
 10 | ncf-8,1506,ncf,1,256
 11 | ncf-9,1621,ncf,1,256
 12 | ncf-10,1812,ncf,1,256
 13 | ncf-11,2041,ncf,1,256
 14 | ncf-12,2070,ncf,1,256
 15 | imagenet-13,2260,imagenet,8,1600
 16 | cifar10-14,2590,cifar10,1,128
 17 | cifar10-15,2856,cifar10,1,128
 18 | cifar10-16,3317,cifar10,1,128
 19 | ncf-17,3464,ncf,1,256
 20 | cifar10-18,3659,cifar10,1,128
 21 | imagenet-19,3942,imagenet,32,6400
 22 | cifar10-20,4021,cifar10,1,128
 23 | cifar10-21,4050,cifar10,1,128
 24 | cifar10-22,4170,cifar10,1,128
 25 | cifar10-23,4199,cifar10,1,128
 26 | cifar10-24,4263,cifar10,1,128
 27 | ncf-25,4292,ncf,1,256
 28 | cifar10-26,4384,cifar10,1,128
 29 | ncf-27,4404,ncf,1,256
 30 | ncf-28,4513,ncf,1,256
 31 | ncf-29,4646,ncf,1,256
 32 | ncf-30,4892,ncf,1,256
 33 | cifar10-31,4930,cifar10,1,128
 34 | yolov3-32,5169,yolov3,1,8
 35 | cifar10-33,5202,cifar10,1,128
 36 | ncf-34,5367,ncf,1,256
 37 | cifar10-35,5533,cifar10,1,128
 38 | deepspeech2-36,5892,deepspeech2,1,20
 39 | cifar10-37,7070,cifar10,1,128
 40 | bert-38,7152,bert,1,12
 41 | cifar10-39,7271,cifar10,1,128
 42 | cifar10-40,7713,cifar10,1,128
 43 | ncf-41,8183,ncf,1,256
 44 | cifar10-42,8526,cifar10,1,128
 45 | cifar10-43,8880,cifar10,1,128
 46 | cifar10-44,8943,cifar10,1,128
 47 | cifar10-45,9115,cifar10,1,128
 48 | ncf-46,9164,ncf,1,256
 49 | ncf-47,9303,ncf,1,256
 50 | ncf-48,9309,ncf,1,256
 51 | yolov3-49,9442,yolov3,4,32
 52 | cifar10-50,9733,cifar10,1,128
 53 | ncf-51,10158,ncf,1,256
 54 | ncf-52,10333,ncf,1,256
 55 | yolov3-53,10408,yolov3,4,32
 56 | bert-54,10465,bert,1,12
 57 | ncf-55,10603,ncf,1,256
 58 | ncf-56,10996,ncf,1,256
 59 | ncf-57,11061,ncf,1,256
 60 | ncf-58,11161,ncf,1,256
 61 | cifar10-59,11346,cifar10,1,128
 62 | ncf-60,12252,ncf,1,256
 63 | cifar10-61,12518,cifar10,1,128
 64 | cifar10-62,12543,cifar10,1,128
 65 | ncf-63,12717,ncf,1,256
 66 | ncf-64,12732,ncf,1,256
 67 | deepspeech2-65,12849,deepspeech2,1,20
 68 | ncf-66,13413,ncf,1,256
 69 | cifar10-67,13666,cifar10,1,128
 70 | ncf-68,13742,ncf,1,256
 71 | cifar10-69,13915,cifar10,1,128
 72 | cifar10-70,14230,cifar10,1,128
 73 | ncf-71,14464,ncf,1,256
 74 | ncf-72,14531,ncf,1,256
 75 | ncf-73,14773,ncf,1,256
 76 | cifar10-74,14812,cifar10,1,128
 77 | ncf-75,14925,ncf,1,256
 78 | cifar10-76,15016,cifar10,1,128
 79 | yolov3-77,15148,yolov3,1,8
 80 | imagenet-78,15159,imagenet,8,1600
 81 | cifar10-79,15242,cifar10,1,128
 82 | ncf-80,15629,ncf,1,256
 83 | ncf-81,15674,ncf,1,256
 84 | ncf-82,15814,ncf,1,256
 85 | ncf-83,15881,ncf,1,256
 86 | ncf-84,15890,ncf,1,256
 87 | cifar10-85,15914,cifar10,1,128
 88 | cifar10-86,16149,cifar10,1,128
 89 | ncf-87,16247,ncf,1,256
 90 | ncf-88,16709,ncf,1,256
 91 | bert-89,16918,bert,1,12
 92 | deepspeech2-90,16967,deepspeech2,1,20
 93 | ncf-91,17251,ncf,1,256
 94 | ncf-92,17409,ncf,1,256
 95 | ncf-93,17532,ncf,1,256
 96 | deepspeech2-94,17535,deepspeech2,1,20
 97 | ncf-95,17622,ncf,1,256
 98 | cifar10-96,17804,cifar10,1,128
 99 | cifar10-97,17921,cifar10,1,128
100 | deepspeech2-98,17977,deepspeech2,1,20
101 | bert-99,18008,bert,1,12
102 | ncf-100,18435,ncf,1,256
103 | cifar10-101,18721,cifar10,1,128
104 | cifar10-102,18753,cifar10,1,128
105 | deepspeech2-103,18773,deepspeech2,1,20
106 | ncf-104,18805,ncf,1,256
107 | cifar10-105,18931,cifar10,1,128
108 | ncf-106,18996,ncf,1,256
109 | ncf-107,19206,ncf,1,256
110 | cifar10-108,19237,cifar10,1,128
111 | ncf-109,19380,ncf,1,256
112 | cifar10-110,19396,cifar10,1,128
113 | ncf-111,19401,ncf,1,256
114 | cifar10-112,19550,cifar10,1,128
115 | cifar10-113,19626,cifar10,1,128
116 | ncf-114,19749,ncf,1,256
117 | bert-115,19839,bert,1,12
118 | cifar10-116,19918,cifar10,1,128
119 | bert-117,19991,bert,1,12
120 | ncf-118,20057,ncf,1,256
121 | deepspeech2-119,20143,deepspeech2,1,20
122 | bert-120,20274,bert,1,12
123 | ncf-121,20299,ncf,1,256
124 | cifar10-122,20342,cifar10,1,128
125 | deepspeech2-123,20386,deepspeech2,1,20
126 | yolov3-124,20478,yolov3,4,32
127 | deepspeech2-125,20515,deepspeech2,1,20
128 | ncf-126,20578,ncf,1,256
129 | ncf-127,20602,ncf,1,256
130 | ncf-128,20645,ncf,1,256
131 | cifar10-129,20696,cifar10,1,128
132 | bert-130,21016,bert,1,12
133 | ncf-131,21243,ncf,1,256
134 | ncf-132,21549,ncf,1,256
135 | ncf-133,21667,ncf,1,256
136 | ncf-134,21719,ncf,1,256
137 | ncf-135,22191,ncf,1,256
138 | cifar10-136,22555,cifar10,1,128
139 | ncf-137,22930,ncf,1,256
140 | ncf-138,22932,ncf,1,256
141 | deepspeech2-139,22968,deepspeech2,1,20
142 | bert-140,23424,bert,1,12
143 | ncf-141,24102,ncf,1,256
144 | ncf-142,24300,ncf,1,256
145 | deepspeech2-143,24524,deepspeech2,1,20
146 | ncf-144,24932,ncf,1,256
147 | cifar10-145,24993,cifar10,1,128
148 | cifar10-146,25014,cifar10,1,128
149 | bert-147,25095,bert,1,12
150 | imagenet-148,25511,imagenet,16,3200
151 | ncf-149,25555,ncf,1,256
152 | ncf-150,25638,ncf,1,256
153 | cifar10-151,25866,cifar10,1,128
154 | ncf-152,26311,ncf,1,256
155 | ncf-153,26639,ncf,1,256
156 | ncf-154,27012,ncf,1,256
157 | ncf-155,27101,ncf,1,256
158 | cifar10-156,27286,cifar10,1,128
159 | ncf-157,27578,ncf,1,256
160 | ncf-158,27759,ncf,1,256
161 | ncf-159,28755,ncf,1,256
162 | 


--------------------------------------------------------------------------------
/evals/ray_tune/workloads/workload-6.csv:
--------------------------------------------------------------------------------
  1 | name,time,application,num_replicas,batch_size
  2 | cifar10-0,53,cifar10,1,128
  3 | cifar10-1,154,cifar10,1,128
  4 | deepspeech2-2,326,deepspeech2,1,20
  5 | cifar10-3,368,cifar10,1,128
  6 | bert-4,735,bert,1,12
  7 | cifar10-5,1315,cifar10,1,128
  8 | ncf-6,1585,ncf,1,256
  9 | ncf-7,1817,ncf,1,256
 10 | cifar10-8,1916,cifar10,1,128
 11 | ncf-9,1924,ncf,1,256
 12 | bert-10,2438,bert,1,12
 13 | cifar10-11,2518,cifar10,1,128
 14 | ncf-12,2535,ncf,1,256
 15 | bert-13,2735,bert,1,12
 16 | ncf-14,2850,ncf,1,256
 17 | cifar10-15,3073,cifar10,1,128
 18 | cifar10-16,3107,cifar10,1,128
 19 | ncf-17,3403,ncf,1,256
 20 | imagenet-18,3472,imagenet,8,1600
 21 | cifar10-19,3687,cifar10,1,128
 22 | cifar10-20,3733,cifar10,1,128
 23 | cifar10-21,4003,cifar10,1,128
 24 | cifar10-22,4007,cifar10,1,128
 25 | ncf-23,4078,ncf,1,256
 26 | cifar10-24,4101,cifar10,1,128
 27 | ncf-25,4948,ncf,1,256
 28 | ncf-26,4949,ncf,1,256
 29 | cifar10-27,4955,cifar10,1,128
 30 | cifar10-28,5172,cifar10,1,128
 31 | cifar10-29,5277,cifar10,1,128
 32 | cifar10-30,5285,cifar10,1,128
 33 | cifar10-31,6136,cifar10,1,128
 34 | ncf-32,6324,ncf,1,256
 35 | deepspeech2-33,6406,deepspeech2,1,20
 36 | ncf-34,6731,ncf,1,256
 37 | ncf-35,6897,ncf,1,256
 38 | cifar10-36,7752,cifar10,1,128
 39 | cifar10-37,8287,cifar10,1,128
 40 | cifar10-38,8444,cifar10,1,128
 41 | ncf-39,8532,ncf,1,256
 42 | cifar10-40,8614,cifar10,1,128
 43 | yolov3-41,9226,yolov3,4,32
 44 | yolov3-42,9550,yolov3,1,8
 45 | yolov3-43,9612,yolov3,8,64
 46 | ncf-44,9732,ncf,1,256
 47 | ncf-45,9876,ncf,1,256
 48 | bert-46,10151,bert,1,12
 49 | ncf-47,10318,ncf,1,256
 50 | ncf-48,10319,ncf,1,256
 51 | ncf-49,11033,ncf,1,256
 52 | bert-50,11256,bert,1,12
 53 | deepspeech2-51,11288,deepspeech2,1,20
 54 | bert-52,11292,bert,1,12
 55 | cifar10-53,11446,cifar10,1,128
 56 | deepspeech2-54,11491,deepspeech2,1,20
 57 | cifar10-55,11515,cifar10,1,128
 58 | deepspeech2-56,11852,deepspeech2,1,20
 59 | ncf-57,12204,ncf,1,256
 60 | ncf-58,12558,ncf,1,256
 61 | cifar10-59,12866,cifar10,1,128
 62 | cifar10-60,12938,cifar10,1,128
 63 | cifar10-61,14331,cifar10,1,128
 64 | cifar10-62,14377,cifar10,1,128
 65 | cifar10-63,14377,cifar10,1,128
 66 | bert-64,14708,bert,4,48
 67 | cifar10-65,14817,cifar10,1,128
 68 | ncf-66,15115,ncf,1,256
 69 | ncf-67,15563,ncf,1,256
 70 | cifar10-68,15879,cifar10,1,128
 71 | cifar10-69,16110,cifar10,1,128
 72 | ncf-70,16319,ncf,1,256
 73 | cifar10-71,16331,cifar10,1,128
 74 | ncf-72,16437,ncf,1,256
 75 | ncf-73,16560,ncf,1,256
 76 | ncf-74,16585,ncf,1,256
 77 | cifar10-75,16925,cifar10,1,128
 78 | bert-76,16946,bert,1,12
 79 | cifar10-77,16957,cifar10,1,128
 80 | bert-78,17490,bert,1,12
 81 | ncf-79,17515,ncf,1,256
 82 | deepspeech2-80,17628,deepspeech2,1,20
 83 | imagenet-81,18010,imagenet,8,1600
 84 | cifar10-82,18159,cifar10,1,128
 85 | ncf-83,18289,ncf,1,256
 86 | ncf-84,18391,ncf,1,256
 87 | cifar10-85,18510,cifar10,4,512
 88 | deepspeech2-86,18699,deepspeech2,1,20
 89 | ncf-87,18780,ncf,1,256
 90 | ncf-88,18785,ncf,4,1024
 91 | ncf-89,18806,ncf,1,256
 92 | cifar10-90,18877,cifar10,1,128
 93 | deepspeech2-91,19165,deepspeech2,1,20
 94 | ncf-92,19202,ncf,1,256
 95 | deepspeech2-93,19239,deepspeech2,1,20
 96 | cifar10-94,19306,cifar10,1,128
 97 | cifar10-95,19316,cifar10,1,128
 98 | ncf-96,19332,ncf,1,256
 99 | ncf-97,19335,ncf,1,256
100 | cifar10-98,19393,cifar10,1,128
101 | bert-99,19414,bert,1,12
102 | ncf-100,19450,ncf,1,256
103 | bert-101,19689,bert,1,12
104 | cifar10-102,20051,cifar10,1,128
105 | deepspeech2-103,20103,deepspeech2,1,20
106 | cifar10-104,20139,cifar10,1,128
107 | cifar10-105,20269,cifar10,4,512
108 | cifar10-106,20313,cifar10,1,128
109 | cifar10-107,20463,cifar10,1,128
110 | deepspeech2-108,20574,deepspeech2,1,20
111 | bert-109,20613,bert,4,48
112 | cifar10-110,20643,cifar10,1,128
113 | cifar10-111,20665,cifar10,1,128
114 | ncf-112,20669,ncf,1,256
115 | ncf-113,20703,ncf,1,256
116 | cifar10-114,20726,cifar10,1,128
117 | ncf-115,20851,ncf,1,256
118 | ncf-116,21008,ncf,1,256
119 | cifar10-117,21017,cifar10,1,128
120 | cifar10-118,21061,cifar10,1,128
121 | deepspeech2-119,21157,deepspeech2,8,160
122 | cifar10-120,21190,cifar10,1,128
123 | cifar10-121,21209,cifar10,1,128
124 | ncf-122,21433,ncf,1,256
125 | ncf-123,21686,ncf,1,256
126 | cifar10-124,21706,cifar10,1,128
127 | cifar10-125,21738,cifar10,1,128
128 | cifar10-126,21776,cifar10,1,128
129 | yolov3-127,21835,yolov3,8,64
130 | cifar10-128,21886,cifar10,1,128
131 | cifar10-129,22332,cifar10,1,128
132 | bert-130,22769,bert,1,12
133 | deepspeech2-131,22783,deepspeech2,1,20
134 | cifar10-132,23032,cifar10,1,128
135 | ncf-133,23040,ncf,1,256
136 | cifar10-134,23136,cifar10,1,128
137 | deepspeech2-135,23293,deepspeech2,1,20
138 | ncf-136,23478,ncf,1,256
139 | imagenet-137,23900,imagenet,8,1600
140 | ncf-138,24040,ncf,1,256
141 | cifar10-139,24132,cifar10,1,128
142 | ncf-140,24362,ncf,1,256
143 | ncf-141,24755,ncf,1,256
144 | ncf-142,24780,ncf,1,256
145 | cifar10-143,24855,cifar10,1,128
146 | ncf-144,24869,ncf,1,256
147 | cifar10-145,25125,cifar10,1,128
148 | cifar10-146,25129,cifar10,1,128
149 | cifar10-147,25679,cifar10,1,128
150 | cifar10-148,25875,cifar10,1,128
151 | ncf-149,25897,ncf,1,256
152 | ncf-150,25898,ncf,1,256
153 | bert-151,25952,bert,1,12
154 | deepspeech2-152,26080,deepspeech2,1,20
155 | deepspeech2-153,26243,deepspeech2,1,20
156 | ncf-154,26853,ncf,1,256
157 | yolov3-155,26876,yolov3,4,32
158 | ncf-156,27132,ncf,1,256
159 | bert-157,27142,bert,1,12
160 | ncf-158,27229,ncf,1,256
161 | cifar10-159,28189,cifar10,1,128
162 | 


--------------------------------------------------------------------------------
/evals/ray_tune/workloads/workload-7.csv:
--------------------------------------------------------------------------------
  1 | name,time,application,num_replicas,batch_size
  2 | cifar10-0,131,cifar10,1,128
  3 | cifar10-1,132,cifar10,1,128
  4 | cifar10-2,397,cifar10,1,128
  5 | cifar10-3,505,cifar10,1,128
  6 | cifar10-4,642,cifar10,1,128
  7 | bert-5,812,bert,1,12
  8 | ncf-6,848,ncf,4,1024
  9 | ncf-7,916,ncf,4,1024
 10 | cifar10-8,1397,cifar10,1,128
 11 | cifar10-9,1606,cifar10,1,128
 12 | cifar10-10,1728,cifar10,1,128
 13 | cifar10-11,1908,cifar10,1,128
 14 | bert-12,2126,bert,1,12
 15 | cifar10-13,2285,cifar10,1,128
 16 | cifar10-14,2440,cifar10,1,128
 17 | cifar10-15,2712,cifar10,1,128
 18 | deepspeech2-16,2931,deepspeech2,1,20
 19 | ncf-17,3659,ncf,1,256
 20 | cifar10-18,3714,cifar10,1,128
 21 | cifar10-19,3788,cifar10,1,128
 22 | yolov3-20,4001,yolov3,1,8
 23 | cifar10-21,4081,cifar10,1,128
 24 | ncf-22,4316,ncf,1,256
 25 | cifar10-23,4317,cifar10,1,128
 26 | ncf-24,4353,ncf,1,256
 27 | imagenet-25,5021,imagenet,16,3200
 28 | cifar10-26,5207,cifar10,1,128
 29 | cifar10-27,5495,cifar10,1,128
 30 | ncf-28,5627,ncf,1,256
 31 | ncf-29,6014,ncf,1,256
 32 | yolov3-30,6114,yolov3,8,64
 33 | cifar10-31,6131,cifar10,1,128
 34 | ncf-32,6842,ncf,1,256
 35 | ncf-33,6963,ncf,1,256
 36 | bert-34,7153,bert,1,12
 37 | ncf-35,7392,ncf,1,256
 38 | yolov3-36,7516,yolov3,1,8
 39 | cifar10-37,7899,cifar10,1,128
 40 | cifar10-38,7933,cifar10,1,128
 41 | cifar10-39,8387,cifar10,1,128
 42 | ncf-40,8537,ncf,1,256
 43 | ncf-41,8617,ncf,1,256
 44 | ncf-42,9037,ncf,1,256
 45 | ncf-43,9727,ncf,1,256
 46 | ncf-44,10529,ncf,1,256
 47 | cifar10-45,10550,cifar10,1,128
 48 | cifar10-46,10599,cifar10,1,128
 49 | ncf-47,11332,ncf,1,256
 50 | bert-48,11449,bert,1,12
 51 | ncf-49,11695,ncf,1,256
 52 | cifar10-50,12185,cifar10,1,128
 53 | cifar10-51,12239,cifar10,1,128
 54 | ncf-52,12718,ncf,1,256
 55 | cifar10-53,12942,cifar10,1,128
 56 | cifar10-54,12991,cifar10,1,128
 57 | ncf-55,13332,ncf,1,256
 58 | ncf-56,13486,ncf,1,256
 59 | bert-57,13740,bert,1,12
 60 | cifar10-58,13807,cifar10,1,128
 61 | imagenet-59,13924,imagenet,16,3200
 62 | deepspeech2-60,14199,deepspeech2,1,20
 63 | ncf-61,14242,ncf,1,256
 64 | ncf-62,14572,ncf,1,256
 65 | ncf-63,14973,ncf,1,256
 66 | ncf-64,14978,ncf,1,256
 67 | ncf-65,15051,ncf,1,256
 68 | cifar10-66,15226,cifar10,1,128
 69 | ncf-67,15357,ncf,1,256
 70 | yolov3-68,15814,yolov3,1,8
 71 | cifar10-69,15841,cifar10,1,128
 72 | bert-70,15876,bert,1,12
 73 | ncf-71,16151,ncf,1,256
 74 | deepspeech2-72,16303,deepspeech2,1,20
 75 | ncf-73,16411,ncf,1,256
 76 | yolov3-74,16852,yolov3,1,8
 77 | cifar10-75,16932,cifar10,1,128
 78 | ncf-76,17035,ncf,1,256
 79 | ncf-77,17069,ncf,1,256
 80 | bert-78,17174,bert,1,12
 81 | cifar10-79,17263,cifar10,1,128
 82 | cifar10-80,17452,cifar10,1,128
 83 | cifar10-81,17515,cifar10,1,128
 84 | cifar10-82,17517,cifar10,1,128
 85 | ncf-83,17587,ncf,1,256
 86 | cifar10-84,17626,cifar10,1,128
 87 | cifar10-85,17677,cifar10,1,128
 88 | cifar10-86,17830,cifar10,1,128
 89 | imagenet-87,17991,imagenet,8,1600
 90 | cifar10-88,18132,cifar10,1,128
 91 | cifar10-89,18159,cifar10,1,128
 92 | ncf-90,18238,ncf,1,256
 93 | ncf-91,18288,ncf,1,256
 94 | yolov3-92,18343,yolov3,8,64
 95 | ncf-93,18607,ncf,1,256
 96 | ncf-94,18634,ncf,1,256
 97 | cifar10-95,18852,cifar10,1,128
 98 | cifar10-96,18912,cifar10,1,128
 99 | cifar10-97,18914,cifar10,1,128
100 | cifar10-98,18935,cifar10,1,128
101 | ncf-99,18983,ncf,1,256
102 | deepspeech2-100,19063,deepspeech2,1,20
103 | bert-101,19067,bert,1,12
104 | bert-102,19212,bert,1,12
105 | cifar10-103,19221,cifar10,1,128
106 | cifar10-104,19312,cifar10,1,128
107 | cifar10-105,19332,cifar10,1,128
108 | cifar10-106,19414,cifar10,1,128
109 | yolov3-107,19467,yolov3,4,32
110 | cifar10-108,19571,cifar10,1,128
111 | cifar10-109,19697,cifar10,1,128
112 | ncf-110,19775,ncf,1,256
113 | ncf-111,19999,ncf,1,256
114 | cifar10-112,20163,cifar10,1,128
115 | cifar10-113,20428,cifar10,1,128
116 | ncf-114,20466,ncf,1,256
117 | ncf-115,20519,ncf,1,256
118 | ncf-116,20519,ncf,1,256
119 | cifar10-117,20535,cifar10,1,128
120 | cifar10-118,20546,cifar10,1,128
121 | ncf-119,20766,ncf,1,256
122 | ncf-120,20919,ncf,1,256
123 | deepspeech2-121,20926,deepspeech2,1,20
124 | cifar10-122,21131,cifar10,1,128
125 | ncf-123,21365,ncf,1,256
126 | ncf-124,21561,ncf,1,256
127 | bert-125,21838,bert,1,12
128 | yolov3-126,21874,yolov3,8,64
129 | deepspeech2-127,21965,deepspeech2,1,20
130 | ncf-128,22034,ncf,1,256
131 | cifar10-129,22084,cifar10,1,128
132 | deepspeech2-130,22152,deepspeech2,1,20
133 | ncf-131,22657,ncf,1,256
134 | imagenet-132,22848,imagenet,8,1600
135 | yolov3-133,23886,yolov3,1,8
136 | bert-134,23932,bert,1,12
137 | cifar10-135,24504,cifar10,4,512
138 | ncf-136,24663,ncf,1,256
139 | yolov3-137,24663,yolov3,4,32
140 | cifar10-138,24677,cifar10,1,128
141 | ncf-139,24832,ncf,1,256
142 | cifar10-140,25116,cifar10,1,128
143 | ncf-141,25473,ncf,1,256
144 | ncf-142,25530,ncf,1,256
145 | cifar10-143,25598,cifar10,1,128
146 | ncf-144,25626,ncf,1,256
147 | ncf-145,25675,ncf,1,256
148 | cifar10-146,25749,cifar10,4,512
149 | ncf-147,25800,ncf,1,256
150 | cifar10-148,25857,cifar10,1,128
151 | deepspeech2-149,25884,deepspeech2,1,20
152 | imagenet-150,25903,imagenet,8,1600
153 | deepspeech2-151,26206,deepspeech2,1,20
154 | cifar10-152,26269,cifar10,1,128
155 | cifar10-153,26375,cifar10,8,1024
156 | bert-154,26403,bert,1,12
157 | ncf-155,26617,ncf,1,256
158 | cifar10-156,27359,cifar10,1,128
159 | ncf-157,27715,ncf,1,256
160 | cifar10-158,28671,cifar10,1,128
161 | ncf-159,28775,ncf,1,256
162 | 


--------------------------------------------------------------------------------
/evals/ray_tune/workloads/workload-8.csv:
--------------------------------------------------------------------------------
  1 | name,time,application,num_replicas,batch_size
  2 | ncf-0,35,ncf,1,256
  3 | yolov3-1,520,yolov3,1,8
  4 | ncf-2,1198,ncf,1,256
  5 | bert-3,1270,bert,1,12
  6 | deepspeech2-4,1303,deepspeech2,1,20
  7 | ncf-5,1497,ncf,1,256
  8 | cifar10-6,1519,cifar10,1,128
  9 | ncf-7,1619,ncf,1,256
 10 | ncf-8,1858,ncf,1,256
 11 | ncf-9,2017,ncf,1,256
 12 | cifar10-10,2254,cifar10,1,128
 13 | ncf-11,2749,ncf,1,256
 14 | cifar10-12,2911,cifar10,1,128
 15 | ncf-13,3134,ncf,1,256
 16 | cifar10-14,3208,cifar10,1,128
 17 | ncf-15,3421,ncf,1,256
 18 | ncf-16,3565,ncf,1,256
 19 | cifar10-17,3732,cifar10,1,128
 20 | cifar10-18,4316,cifar10,1,128
 21 | cifar10-19,4692,cifar10,1,128
 22 | cifar10-20,4702,cifar10,1,128
 23 | ncf-21,5256,ncf,1,256
 24 | ncf-22,5430,ncf,1,256
 25 | cifar10-23,5500,cifar10,1,128
 26 | ncf-24,5611,ncf,1,256
 27 | cifar10-25,5707,cifar10,1,128
 28 | imagenet-26,5774,imagenet,16,3200
 29 | cifar10-27,5801,cifar10,1,128
 30 | ncf-28,5854,ncf,1,256
 31 | cifar10-29,5884,cifar10,1,128
 32 | ncf-30,7110,ncf,1,256
 33 | cifar10-31,7347,cifar10,1,128
 34 | yolov3-32,7562,yolov3,2,16
 35 | ncf-33,7626,ncf,1,256
 36 | deepspeech2-34,7858,deepspeech2,4,80
 37 | ncf-35,7906,ncf,1,256
 38 | yolov3-36,8037,yolov3,2,16
 39 | cifar10-37,8313,cifar10,1,128
 40 | ncf-38,9268,ncf,1,256
 41 | ncf-39,9311,ncf,1,256
 42 | yolov3-40,9391,yolov3,1,8
 43 | ncf-41,9513,ncf,2,512
 44 | ncf-42,9582,ncf,1,256
 45 | ncf-43,9612,ncf,1,256
 46 | ncf-44,9715,ncf,1,256
 47 | cifar10-45,10317,cifar10,1,128
 48 | cifar10-46,10392,cifar10,1,128
 49 | cifar10-47,10660,cifar10,1,128
 50 | cifar10-48,10737,cifar10,1,128
 51 | cifar10-49,10859,cifar10,1,128
 52 | ncf-50,11060,ncf,1,256
 53 | cifar10-51,11782,cifar10,1,128
 54 | deepspeech2-52,12205,deepspeech2,1,20
 55 | ncf-53,12379,ncf,1,256
 56 | cifar10-54,12421,cifar10,1,128
 57 | ncf-55,12712,ncf,1,256
 58 | ncf-56,13106,ncf,1,256
 59 | ncf-57,13457,ncf,1,256
 60 | ncf-58,13554,ncf,1,256
 61 | cifar10-59,13734,cifar10,1,128
 62 | ncf-60,13797,ncf,1,256
 63 | cifar10-61,13938,cifar10,1,128
 64 | yolov3-62,14090,yolov3,1,8
 65 | ncf-63,14186,ncf,1,256
 66 | cifar10-64,14205,cifar10,1,128
 67 | ncf-65,14245,ncf,1,256
 68 | bert-66,14499,bert,1,12
 69 | cifar10-67,14623,cifar10,1,128
 70 | cifar10-68,14715,cifar10,1,128
 71 | deepspeech2-69,14737,deepspeech2,1,20
 72 | ncf-70,14871,ncf,1,256
 73 | ncf-71,14932,ncf,1,256
 74 | deepspeech2-72,14937,deepspeech2,1,20
 75 | deepspeech2-73,14992,deepspeech2,1,20
 76 | cifar10-74,15132,cifar10,1,128
 77 | yolov3-75,15245,yolov3,1,8
 78 | cifar10-76,15340,cifar10,1,128
 79 | ncf-77,15735,ncf,1,256
 80 | cifar10-78,15779,cifar10,1,128
 81 | cifar10-79,15835,cifar10,1,128
 82 | bert-80,15863,bert,1,12
 83 | cifar10-81,16026,cifar10,1,128
 84 | ncf-82,16067,ncf,1,256
 85 | bert-83,16077,bert,1,12
 86 | bert-84,16142,bert,1,12
 87 | deepspeech2-85,16332,deepspeech2,1,20
 88 | cifar10-86,16381,cifar10,1,128
 89 | cifar10-87,16588,cifar10,1,128
 90 | ncf-88,16610,ncf,1,256
 91 | bert-89,17011,bert,1,12
 92 | bert-90,17109,bert,1,12
 93 | deepspeech2-91,17238,deepspeech2,1,20
 94 | deepspeech2-92,17277,deepspeech2,1,20
 95 | cifar10-93,17414,cifar10,1,128
 96 | bert-94,17521,bert,1,12
 97 | ncf-95,17749,ncf,1,256
 98 | cifar10-96,17866,cifar10,1,128
 99 | bert-97,18060,bert,1,12
100 | cifar10-98,18115,cifar10,1,128
101 | cifar10-99,18139,cifar10,1,128
102 | cifar10-100,18211,cifar10,1,128
103 | ncf-101,18244,ncf,1,256
104 | ncf-102,18284,ncf,1,256
105 | yolov3-103,18385,yolov3,1,8
106 | cifar10-104,18415,cifar10,1,128
107 | cifar10-105,18549,cifar10,1,128
108 | cifar10-106,18711,cifar10,1,128
109 | cifar10-107,18744,cifar10,1,128
110 | ncf-108,18800,ncf,1,256
111 | deepspeech2-109,18851,deepspeech2,1,20
112 | ncf-110,18853,ncf,1,256
113 | bert-111,18925,bert,1,12
114 | ncf-112,19084,ncf,1,256
115 | cifar10-113,19141,cifar10,1,128
116 | cifar10-114,19260,cifar10,1,128
117 | ncf-115,19331,ncf,1,256
118 | cifar10-116,19492,cifar10,1,128
119 | deepspeech2-117,19595,deepspeech2,1,20
120 | ncf-118,19915,ncf,1,256
121 | cifar10-119,20057,cifar10,1,128
122 | cifar10-120,20362,cifar10,1,128
123 | cifar10-121,20530,cifar10,1,128
124 | ncf-122,20531,ncf,1,256
125 | ncf-123,20531,ncf,1,256
126 | cifar10-124,20548,cifar10,1,128
127 | cifar10-125,20673,cifar10,1,128
128 | cifar10-126,20694,cifar10,1,128
129 | cifar10-127,20742,cifar10,1,128
130 | cifar10-128,20884,cifar10,1,128
131 | ncf-129,20937,ncf,1,256
132 | cifar10-130,21132,cifar10,1,128
133 | ncf-131,21335,ncf,1,256
134 | ncf-132,21487,ncf,1,256
135 | yolov3-133,21615,yolov3,4,32
136 | ncf-134,21680,ncf,1,256
137 | ncf-135,21732,ncf,1,256
138 | cifar10-136,21813,cifar10,1,128
139 | ncf-137,21894,ncf,1,256
140 | cifar10-138,22042,cifar10,1,128
141 | imagenet-139,22196,imagenet,8,1600
142 | bert-140,22968,bert,1,12
143 | cifar10-141,23480,cifar10,1,128
144 | cifar10-142,23609,cifar10,1,128
145 | cifar10-143,23641,cifar10,1,128
146 | deepspeech2-144,24063,deepspeech2,1,20
147 | cifar10-145,24115,cifar10,1,128
148 | deepspeech2-146,24140,deepspeech2,1,20
149 | deepspeech2-147,24263,deepspeech2,1,20
150 | cifar10-148,24918,cifar10,1,128
151 | cifar10-149,25240,cifar10,1,128
152 | ncf-150,25553,ncf,1,256
153 | ncf-151,25640,ncf,1,256
154 | cifar10-152,25661,cifar10,1,128
155 | cifar10-153,25690,cifar10,1,128
156 | deepspeech2-154,26246,deepspeech2,1,20
157 | cifar10-155,26565,cifar10,1,128
158 | bert-156,27082,bert,1,12
159 | imagenet-157,27176,imagenet,32,6400
160 | deepspeech2-158,27744,deepspeech2,1,20
161 | deepspeech2-159,28253,deepspeech2,1,20
162 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SymbioticLab/ModelKeeper/9212bc79bfc4a271e6120c410bb9fb89cb151486/examples/__init__.py


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | isPackageNotInstalled() {
 3 |   $1 --version &> /dev/null
 4 |   if [ $? -eq 0 ]; then
 5 |     echo "$1: Already installed"
 6 |   else
 7 |     install_dir=$HOME/anaconda3
 8 |     wget https://repo.anaconda.com/archive/Anaconda3-2020.11-Linux-x86_64.sh
 9 |     bash Anaconda3-2020.11-Linux-x86_64.sh -b -p  $install_dir
10 |     export PATH=$install_dir/bin:$PATH
11 |   fi
12 | }
13 | 
14 | # un-comment to install anaconda
15 | isPackageNotInstalled conda
16 | 
17 | 
18 | # create conda env
19 | conda init bash
20 | . ~/.bashrc
21 | conda env create -f environment.yml # Install dependencies
22 | conda activate modelkeeper
23 | 
24 | 
25 | if [ "$1" == "--cuda" ]; then
26 |   wget https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda_10.2.89_440.33.01_linux.run
27 |   sudo apt-get purge nvidia-* -y
28 |   sudo sh -c "echo 'blacklist nouveau\noptions nouveau modeset=0' > /etc/modprobe.d/blacklist-nouveau.conf"
29 |   sudo update-initramfs -u
30 |   sudo sh cuda_10.2.89_440.33.01_linux.run --override --driver --toolkit --samples --silent
31 |   export PATH=$PATH:/usr/local/cuda-10.2/
32 |   conda install cudatoolkit=10.2 -y
33 | fi


--------------------------------------------------------------------------------
/modelkeeper/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SymbioticLab/ModelKeeper/9212bc79bfc4a271e6120c410bb9fb89cb151486/modelkeeper/__init__.py


--------------------------------------------------------------------------------
/modelkeeper/aed_matcher.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | 
 4 | class AEDMatcher(object):
 5 | 
 6 |     def __init__(self, path, name):
 7 |         self.name = name
 8 |         self.mappings = self.load_mapping(path)
 9 | 
10 |     def load_mapping(self, path):
11 |         with open(path, 'rb') as fin:
12 |             stores = pickle.load(fin)
13 | 
14 |         return stores.get(self.name, None)
15 | 
16 |     def query_child(self, child):
17 |         child = child.split('.onnx')[0]
18 |         if self.mappings is None or child not in self.mappings:
19 |             return -float('inf'), []
20 | 
21 |         dist = self.mappings[child]['GED']
22 |         mappings = [(a, b) for a, b in zip(self.mappings[child][
23 |             'Path'][0], self.mappings[child]['Path'][1])]
24 |         return -dist, mappings
25 | 


--------------------------------------------------------------------------------
/modelkeeper/analyze_zoo.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import ctypes
  3 | import functools
  4 | import gc
  5 | import heapq
  6 | import json
  7 | import logging
  8 | import multiprocessing
  9 | import os
 10 | import sys
 11 | import time
 12 | from multiprocessing import Manager
 13 | 
 14 | import networkx as nx
 15 | import numpy
 16 | import onnx
 17 | import torch
 18 | from matcher import ModelKeeper
 19 | from onnx import numpy_helper
 20 | 
 21 | # Call C backend
 22 | clib_matcher = ctypes.cdll.LoadLibrary('./backend/bin/matcher.so')
 23 | clib_matcher.get_matching_score.restype = ctypes.c_char_p
 24 | 
 25 | sys.setrecursionlimit(10000)
 26 | logging.basicConfig(filename='logging', level=logging.INFO)
 27 | 
 28 | 
 29 | def get_mapped(file):
 30 |     black_list = set()
 31 |     with open(file) as fin:
 32 |         lines = fin.readlines()
 33 |         for line in lines:
 34 |             if 'Find best mappings' in line:
 35 |                 model_name = line.split('/')[3].split()[0]
 36 |                 black_list.add(model_name)
 37 |     return black_list
 38 | 
 39 | 
 40 | def analyze_zoo():
 41 |     from config import modelkeeper_config
 42 | 
 43 |     start_time = time.time()
 44 |     zoo_path = "/users/fanlai/experiment/keeper/model_zoo"  # '/mnt/zoo'
 45 | 
 46 |     modelkeeper_config.zoo_path = zoo_path
 47 |     mapper = ModelKeeper(modelkeeper_config)
 48 | 
 49 |     # ["/users/fanlai/model_zoo/ShuffleNetV2_net_size_2_@0.7142.onnx"]
 50 |     models = [x for x in os.listdir(zoo_path) if '.onnx' in x]
 51 | 
 52 |     #black_list = get_mapped('/users/fanlai/torchcv_scores')
 53 |     #models = [x for x in os.listdir(zoo_path) if x not in black_list]
 54 |     # print(models)
 55 |     # print(len(models))
 56 |     all_models = [os.path.join(zoo_path, x) for x in models]
 57 |     for idx, model_name in enumerate(models):
 58 |         try:
 59 |             child_onnx_path = os.path.join(zoo_path, model_name)
 60 |             # child, child_onnx = mapper.load_model_meta(child_onnx_path)
 61 |             # child.graph['model_id'] = str(idx)
 62 | 
 63 |             black_list = set(all_models[:])
 64 |             black_list.discard(child_onnx_path)
 65 |             black_list = set()
 66 |             # find the best mapping from the zoo
 67 |             weights, meta_data = mapper.map_for_onnx(
 68 |                 child_onnx_path, black_list, model_name)
 69 |             print(meta_data)
 70 |             gc.collect()
 71 |         except Exception as e:
 72 |             print(e)
 73 | 
 74 |     print("==============")
 75 |     print(f"total duration is {(time.time()-start_time)/1000.0} sec")
 76 | 
 77 | 
 78 | def analyze_zoo_folder():
 79 |     from config import modelkeeper_config
 80 | 
 81 |     start_time = time.time()
 82 |     zoo_path = '/users/fanlai/experiment/data/my_zoo'
 83 | 
 84 |     modelkeeper_config.zoo_path = zoo_path
 85 |     mapper = ModelKeeper(modelkeeper_config)
 86 | 
 87 |     model_folders = [
 88 |         x for x in os.listdir(zoo_path) if os.path.isdir(
 89 |             os.path.join(
 90 |                 zoo_path, x))]
 91 |     models = []  # model_folders#["/users/fanlai/experiment/data/my_zoo/funnel_transformer_small/funnel_transformer_small.onnx"]
 92 |     #model_folders = models
 93 |     for idx, model_path in enumerate(model_folders):
 94 |         model_name = [
 95 |             x for x in os.listdir(
 96 |                 os.path.join(
 97 |                     zoo_path,
 98 |                     model_path)) if '.onnx' in x]
 99 |         if len(model_name) == 1:
100 |             models.append(os.path.join(zoo_path, model_path, model_name[0]))
101 |             mapper.add_to_zoo(models[-1])
102 |             print(f"===Add {models[-1]} to zoo...")
103 | 
104 |     # models = os.listdir(zoo_path)
105 |     for idx, model_name in enumerate(models):
106 |         child_onnx_path = model_name  # os.path.join(zoo_path, model_name)
107 |         # child, child_onnx = mapper.load_model_meta(child_onnx_path)
108 |         # child.graph['model_id'] = str(idx)
109 |         black_list = set(models[:])
110 |         black_list.discard(child_onnx_path)
111 |         # find the best mapping from the zoo
112 |         weights, meta_data = mapper.map_for_onnx(
113 |             child_onnx_path, black_list, model_name.split('/')[-1])
114 |         print(meta_data)
115 |         gc.collect()
116 | 
117 |     print("==============")
118 |     print(f"total duration is {(time.time()-start_time)/1000.0} sec")
119 | 
120 | 
121 | # analyze_zoo_folder()
122 | analyze_zoo()
123 | 


--------------------------------------------------------------------------------
/modelkeeper/backend/Make:
--------------------------------------------------------------------------------
1 | gcc matcher.cpp -fPIC -shared -o matcher.so -std=c++11
2 | gcc matcher.cpp -fPIC -shared -o2 matcher.so -std=c++11
3 | mv matcher.so ./bin/
4 | 


--------------------------------------------------------------------------------
/modelkeeper/backend/bin/matcher.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SymbioticLab/ModelKeeper/9212bc79bfc4a271e6120c410bb9fb89cb151486/modelkeeper/backend/bin/matcher.so


--------------------------------------------------------------------------------
/modelkeeper/backend/main.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <fstream>
 3 | #include <vector>
 4 | 
 5 | #include "Network.hpp"
 6 | 
 7 | using namespace std;
 8 | 
 9 | int main(int args, char * argv[])
10 | {
11 | 	int n, m;
12 | 
13 | 	if(args < 2){
14 | 		cout << "USAGE: " << argv[0] << " " << "test_file" << endl;
15 | 		return 1;
16 | 	}
17 | 	ifstream fin(argv[1]);
18 | 	while(fin >> n >> m){
19 | 		Network network(n);
20 | 		while(m--){
21 | 			int x, y;
22 | 			double dist;
23 | 			fin >> x >> y >> dist;
24 | 			network.addEdge(x, y, -dist);
25 | 		}
26 | 		cout << network.simulateDistribution(0) << endl;	
27 | 		cout << network.zhuliu(0) << endl;
28 | 	}
29 | 	cout << "Done ...";
30 | 	return 0;
31 | }
32 | 


--------------------------------------------------------------------------------
/modelkeeper/backend/matcher.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef __MATCHER__
 2 | #define __MATCHER__
 3 | 
 4 | #include <vector>
 5 | #include <map>
 6 | #include <string>
 7 | 
 8 | using namespace std;
 9 | 
10 | struct node_pair {
11 | 	double val;
12 | 	int parentidx;
13 | 	int childidx;
14 | 	int opt;
15 | 
16 | 	node_pair(double v, int p, int c, int o) :val(v), parentidx(p), childidx(c), opt(o) {}
17 | };
18 | 
19 | struct Node
20 | {	
21 | 	int idx;
22 | 	string type;
23 | 	vector<int> shape;
24 | 	vector<int> parents;
25 | };
26 | 
27 | class Matcher{
28 | 
29 | private:
30 | 	int len_parent;
31 | 	int len_child;
32 | 	bool dump_mapping;
33 | 
34 | 	double **scores;
35 | 	map<string, vector<int> > backParentIdx;
36 | 	map<string, vector<int> > backChildIdx;
37 | 
38 | 	vector<Node> parent_nodes;
39 | 	vector<Node> child_nodes;
40 | 
41 | 	vector<long long> parent_parameters;
42 | 	vector<long long> child_parameters;
43 | 
44 | public:
45 | 
46 | 	char* gen_mapping(string json_str, bool dump_mapping);
47 | 
48 | 	// merge k sorted list
49 | 	inline double merge_branch_mapping(vector<vector<node_pair> > lists, vector<int> & parent_list, vector<int> & child_list);
50 | 	inline double cal_score(Node parent_node, Node child_node);
51 | 
52 | 	void read_io(string json_str);
53 | 	void align_child_parent();
54 | 	void init_score();
55 | 
56 | 	string encode_hash(int i, int j);
57 | };
58 | 
59 | #endif
60 | 
61 | 


--------------------------------------------------------------------------------
/modelkeeper/backend/test.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include "json.hpp"
 3 | #include <string>
 4 | 
 5 | using json=nlohmann::json;
 6 | using namespace std;
 7 | 
 8 | int main(){
 9 | 	string json_str = "{'nikhil': 1, 'akash': 5, 'manjeet': 10, 'akshat': 15}";
10 | 	json second = json::parse(json_str);
11 | 
12 | 	cout << second["nikhil"];
13 | }
14 | 
15 | 


--------------------------------------------------------------------------------
/modelkeeper/clientservice.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import pickle
  4 | import shutil
  5 | import time
  6 | 
  7 | from paramiko import AutoAddPolicy, SSHClient
  8 | from scp import SCPClient
  9 | 
 10 | 
 11 | class ModelKeeperClient(object):
 12 | 
 13 |     """A very simple client service for ModelKeeper"""
 14 | 
 15 |     def __init__(self, args):
 16 | 
 17 |         self.zoo_server = args.zoo_server
 18 | 
 19 |         # TODO: These paths should be informed after querying the zoo host
 20 |         self.zoo_path = args.zoo_path
 21 |         self.zoo_query_path = args.zoo_query_path
 22 |         self.zoo_ans_path = args.zoo_ans_path
 23 |         self.zoo_register_path = args.zoo_register_path
 24 | 
 25 |         self.execution_path = args.execution_path
 26 | 
 27 |         self.create_runtime_store()
 28 |         self.connection = self.create_connection()
 29 |         self.connection_manager = SCPClient(self.connection.get_transport())
 30 | 
 31 |     def create_runtime_store(self):
 32 |         os.makedirs(self.execution_path, exist_ok=True)
 33 | 
 34 |     def create_connection(self):
 35 |         connection = SSHClient()
 36 |         connection.load_system_host_keys()
 37 |         connection.set_missing_host_key_policy(AutoAddPolicy())
 38 |         connection.connect(self.zoo_server)
 39 | 
 40 |         return connection
 41 | 
 42 |     def query_for_model(self, model_path, timeout=1800):
 43 |         """
 44 |         @ model: assume the model is in onnx format
 45 |         """
 46 |         model_name = model_path.split('/')[-1].replace('.onnx', '')
 47 |         ans_model_name = model_name + '.out'
 48 |         local_path = os.path.join(self.execution_path, ans_model_name)
 49 | 
 50 |         # 1. Upload the model to the modelkeeper pending queue
 51 |         self.register_model_to_zoo(
 52 |             model_path, os.path.join(
 53 |                 self.zoo_query_path, model_name))
 54 | 
 55 |         # 2. Ping the host for results
 56 |         # NOTE: ideally we should use gRpc
 57 |         waiting_duration, heartbeat = 0, 2
 58 |         os.system(f'echo > {local_path}')
 59 | 
 60 |         while waiting_duration < timeout:
 61 |             success = self.pull_model_from_zoo(os.path.join(
 62 |                 self.zoo_ans_path, ans_model_name), local_path)
 63 |             if not success:
 64 |                 time.sleep(heartbeat)
 65 |                 waiting_duration += heartbeat
 66 |             else:
 67 |                 break
 68 | 
 69 |         # 3. Remove result file from remote
 70 |         weights = meta = None
 71 | 
 72 |         if waiting_duration < timeout:
 73 |             # 3. Load model weights and return weights
 74 |             with open(local_path, 'rb') as fin:
 75 |                 weights = pickle.load(fin)
 76 |                 # {"matching_score", "parent_name", "parent_acc"}
 77 |                 meta = pickle.load(fin)
 78 |             os.remove(local_path)
 79 |         else:
 80 |             logging.info(f"Querying the zoo server times out {timeout} sec")
 81 | 
 82 |         return weights, meta
 83 | 
 84 |     def register_model_to_zoo(self, model_path, zoo_path=None, accuracy=100):
 85 |         """
 86 |         @ model: upload the model to the ModelKeeper zoo
 87 |         """
 88 | 
 89 |         if zoo_path is None:
 90 |             zoo_path = os.path.join(
 91 |                 self.zoo_register_path, model_path.split('/')[-1].replace('.onnx', ''))
 92 |             zoo_path = zoo_path + f'@{accuracy}'
 93 |         try:
 94 |             self.connection_manager.put(model_path, zoo_path)
 95 |             _ = self.connection.exec_command(
 96 |                 f"mv {zoo_path} {zoo_path+'.onnx'}")
 97 |             logging.info(
 98 |                 f"Successfully upload model {model_path} to the zoo server")
 99 |         except Exception as e:
100 |             logging.warning(
101 |                 f"Failed to connect to the zoo host {self.zoo_server}")
102 | 
103 |     def pull_model_from_zoo(self, model_path, local_path):
104 |         """
105 |         @ return the warmed weights of model
106 |         """
107 |         success = True
108 |         try:
109 |             self.connection_manager.get(model_path, local_path)
110 |             stdin, stdout, stderr = self.connection.exec_command(
111 |                 f"rm {model_path}", timeout=30)
112 |             stdout.channel.recv_exit_status()
113 |         except Exception as e:
114 |             success = False
115 | 
116 |         return success
117 | 
118 |     def stop(self):
119 |         self.connection_manager.close()
120 |         self.connection.close()
121 |         # shutil.rmtree(self.execution_path)
122 | 


--------------------------------------------------------------------------------
/modelkeeper/config.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | 
 4 | parser = argparse.ArgumentParser()
 5 | 
 6 | # Path configuration
 7 | parser.add_argument(
 8 |     '--zoo_path',
 9 |     type=str,
10 |     default=f'{os.environ["HOME"]}/experiment/keeper/model_zoo/',
11 |     help='Path of the model zoo')
12 | parser.add_argument(
13 |     '--execution_path',
14 |     type=str,
15 |     default=f'{os.environ["HOME"]}/experiment/keeper/jobs/',
16 |     help='Runtime data store of the framework')
17 | parser.add_argument(
18 |     '--zoo_query_path',
19 |     type=str,
20 |     default=f'{os.environ["HOME"]}/experiment/keeper/query_zoo/',
21 |     help='Runtime data store of querying models')
22 | parser.add_argument(
23 |     '--zoo_ans_path',
24 |     type=str,
25 |     default=f'{os.environ["HOME"]}/experiment/keeper/ans_zoo/',
26 |     help='Runtime data store of querying results')
27 | parser.add_argument(
28 |     '--zoo_register_path',
29 |     type=str,
30 |     default=f'{os.environ["HOME"]}/experiment/keeper/register_zoo/',
31 |     help='Runtime data store of new pending models')
32 | 
33 | # Framework configuration
34 | parser.add_argument(
35 |     '--num_of_processes',
36 |     type=int,
37 |     default=20,
38 |     help='Number of threads used for mapping (~CPU cores)')
39 | parser.add_argument(
40 |     '--zoo_server',
41 |     type=str,
42 |     default='10.0.0.1',
43 |     help='Server of ModelKeeper')
44 | parser.add_argument(
45 |     '--user_name',
46 |     type=str,
47 |     default='',
48 |     help='User name in accessing the ModelKeeper server')
49 | 
50 | # Parameters
51 | parser.add_argument(
52 |     '--neigh_threshold',
53 |     type=float,
54 |     default=0.1,
55 |     help='Threshold of evicting neighbors a if score(a,b)<T and acc(a)<acc(b)')
56 | parser.add_argument('--zoo_capacity', type=int, default=1024 * 1024 * 100,
57 |                     help='Physical capacity (100TB)')
58 | parser.add_argument(
59 |     '--bucketing_selection',
60 |     action='store_true',
61 |     default=False)
62 | parser.add_argument(
63 |     '--disable_transformer',
64 |     action='store_true',
65 |     default=False)
66 | parser.add_argument('--bucket_interval', type=int, default=10)
67 | parser.add_argument('--zscore_disable', action='store_true', default=False)
68 | 
69 | # AED Matcher
70 | parser.add_argument('--aed_match', action='store_true', default=False)
71 | parser.add_argument('--aed_path', type=str, default=f'./aed_store.pkl')
72 | 
73 | 
74 | modelkeeper_config, unknown = parser.parse_known_args()
75 | 


--------------------------------------------------------------------------------
/modelkeeper/evictor.py:
--------------------------------------------------------------------------------
 1 | from ortools.linear_solver import pywraplp
 2 | 
 3 | 
 4 | def mip(weight_threshold, weight_list, util_list):
 5 |     """
 6 |     A Integer Programming model that solves the 0-1 Knapsack problem.
 7 |     Args:
 8 |         weight_threshold: Weight threshold
 9 |         weight_list: List of weights for each item in item set I
10 |         util_list: utility score of each item i
11 |     Returns:
12 |         The optimal utility score of the knapsack problem
13 |     """
14 |     n = len(weight_list)
15 | 
16 |     # initialize the integer programming model with the open source CBC solver
17 |     solver = pywraplp.Solver('simple_mip_program',
18 |                              pywraplp.Solver.CBC_MIXED_INTEGER_PROGRAMMING)
19 | 
20 |     # Declare binary variable x for each item from 1 to n
21 |     x_dict = []
22 |     for i in range(n):
23 |         x_dict.append(solver.IntVar(0, 1, f'x_{i}'))
24 |     # Add constraint on total weight of items selected cannot exceed   weight
25 |     # threshold
26 |     solver.Add(solver.Sum([weight_list[i] * x_dict[i]
27 |                            for i in range(n)]) <= weight_threshold)
28 |     # Maximize total utility score
29 |     solver.Maximize(solver.Sum([util_list[i] * x_dict[i] for i in range(n)]))
30 | 
31 |     status = solver.Solve()
32 | 
33 |     # Uncomment the section below to print solution details
34 |     # if status == pywraplp.Solver.OPTIMAL:
35 |     # print('Problem solved in %f milliseconds' % solver.wall_time())
36 |     res_x = [x_dict[i].solution_value() for i in range(n)]
37 | 
38 |     return solver.Objective().Value(), res_x
39 | 
40 | # model_size = 100
41 | # weight_threshold = model_size-1
42 | # weight_list = [1] * model_size
43 | # value_list = [1] * model_size #np.random.random(model_size)
44 | # mip(weight_threshold, weight_list, value_list)
45 | 


--------------------------------------------------------------------------------
/modelkeeper/generate_zoo.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch
 4 | import torchvision.models as models
 5 | 
 6 | 
 7 | def is_valid_name(name):
 8 |     # 1. has numbers; 2. all lower case
 9 |     number = False
10 |     for i in range(len(name)):
11 |         if 'A' <= name[i] <= 'Z':
12 |             return False
13 |         if '1' <= name[i] <= '9':
14 |             number = True
15 |     return number
16 | 
17 | 
18 | def gen_model_zoo(path):
19 |     if not os.path.isdir(path):
20 |         os.makedirs(path, exist_ok=True)
21 | 
22 |     args = dir(models)
23 |     # batch:32; 3 channels; 32 x 32 size
24 |     dummy_input = torch.rand(32, 3, 32, 32)
25 | 
26 |     for model_type in args:
27 |         if is_valid_name(model_type):
28 |             try:
29 |                 model = models.__dict__[model_type](num_classes=10)
30 |                 torch.onnx.export(
31 |                     model,
32 |                     dummy_input,
33 |                     os.path.join(
34 |                         path,
35 |                         model_type +
36 |                         ".onnx"),
37 |                     export_params=True,
38 |                     verbose=0,
39 |                     training=1)
40 |                 print('Generate {} to zoo'.format(model_type))
41 |             except Exception as e:
42 |                 print("Error: ", e)
43 | 
44 | 
45 | gen_model_zoo('../zoo')
46 | 


--------------------------------------------------------------------------------
/modelkeeper/gml_export.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import ctypes
  3 | import functools
  4 | import gc
  5 | import heapq
  6 | import json
  7 | import logging
  8 | import multiprocessing
  9 | import os
 10 | import sys
 11 | import time
 12 | from multiprocessing import Manager
 13 | 
 14 | import networkx as nx
 15 | import numpy
 16 | import onnx
 17 | import torch
 18 | from matcher import ModelKeeper
 19 | from onnx import numpy_helper
 20 | 
 21 | # Call C backend
 22 | clib_matcher = ctypes.cdll.LoadLibrary('./backend/bin/matcher.so')
 23 | clib_matcher.get_matching_score.restype = ctypes.c_char_p
 24 | 
 25 | sys.setrecursionlimit(10000)
 26 | logging.basicConfig(filename='logging', level=logging.INFO)
 27 | 
 28 | 
 29 | def get_mapped(file):
 30 |     black_list = set()
 31 |     with open(file) as fin:
 32 |         lines = fin.readlines()
 33 |         for line in lines:
 34 |             if 'Find best mappings' in line:
 35 |                 model_name = line.split('/')[3].split()[0]
 36 |                 black_list.add(model_name)
 37 |     return black_list
 38 | 
 39 | 
 40 | def analyze_zoo():
 41 |     from config import modelkeeper_config
 42 | 
 43 |     start_time = time.time()
 44 |     zoo_path = "/users/fanlai/experiment/zoo"  # '/mnt/zoo'
 45 | 
 46 |     modelkeeper_config.zoo_path = zoo_path
 47 |     mapper = ModelKeeper(modelkeeper_config)
 48 | 
 49 |     # ["/users/fanlai/model_zoo/ShuffleNetV2_net_size_2_@0.7142.onnx"]
 50 |     models = [x for x in os.listdir(zoo_path) if '.onnx' in x]
 51 | 
 52 |     #black_list = get_mapped('/users/fanlai/torchcv_scores')
 53 |     #models = [x for x in os.listdir(zoo_path) if x not in black_list]
 54 |     # print(models)
 55 |     # print(len(models))
 56 |     all_models = [os.path.join(zoo_path, x) for x in models]
 57 | 
 58 |     for model in mapper.model_zoo:
 59 |         m_graph = mapper.model_zoo[model].parent
 60 |         del m_graph.graph['num_tensors']
 61 |         del m_graph.graph['accuracy']
 62 |         del m_graph.graph['num_nodes']
 63 |         del m_graph.graph['name']
 64 |         del m_graph.graph['model_id']
 65 | 
 66 |         # print(m_graph.graph)
 67 |         m_graph.add_node(99999)
 68 |         for n in m_graph.nodes():
 69 |             m_graph.nodes[n]['attr'] = {}
 70 |             if m_graph.out_degree(n) == 0:
 71 |                 m_graph.add_edge(n, 99999)
 72 |                 m_graph.add_edge(99999, n)
 73 | 
 74 |         name = model.split('/')[-1].split('@')[0]
 75 | 
 76 |         nx.write_gml(m_graph, f'./zoo_gml/{name}.gml')
 77 | 
 78 |     print("==============")
 79 |     print(f"total duration is {(time.time()-start_time)/1000.0} sec")
 80 | 
 81 | 
 82 | def analyze_zoo_folder():
 83 |     from config import modelkeeper_config
 84 | 
 85 |     start_time = time.time()
 86 |     zoo_path = '/users/fanlai/experiment/data/my_zoo'
 87 | 
 88 |     modelkeeper_config.zoo_path = zoo_path
 89 |     mapper = ModelKeeper(modelkeeper_config)
 90 | 
 91 |     model_folders = [
 92 |         x for x in os.listdir(zoo_path) if os.path.isdir(
 93 |             os.path.join(
 94 |                 zoo_path, x))]
 95 |     models = []  # model_folders#["/users/fanlai/experiment/data/my_zoo/funnel_transformer_small/funnel_transformer_small.onnx"]
 96 |     #model_folders = models
 97 |     for idx, model_path in enumerate(model_folders):
 98 |         model_name = [
 99 |             x for x in os.listdir(
100 |                 os.path.join(
101 |                     zoo_path,
102 |                     model_path)) if '.onnx' in x]
103 |         if len(model_name) == 1:
104 |             models.append(os.path.join(zoo_path, model_path, model_name[0]))
105 |             mapper.add_to_zoo(models[-1])
106 |             print(f"===Add {models[-1]} to zoo...")
107 | 
108 |     # models = os.listdir(zoo_path)
109 |     for idx, model_name in enumerate(models):
110 |         child_onnx_path = model_name  # os.path.join(zoo_path, model_name)
111 |         # child, child_onnx = mapper.load_model_meta(child_onnx_path)
112 |         # child.graph['model_id'] = str(idx)
113 |         black_list = set(models[:])
114 |         black_list.discard(child_onnx_path)
115 |         # find the best mapping from the zoo
116 |         weights, meta_data = mapper.map_for_onnx(
117 |             child_onnx_path, black_list, model_name.split('/')[-1])
118 |         print(meta_data)
119 |         gc.collect()
120 | 
121 |     print("==============")
122 |     print(f"total duration is {(time.time()-start_time)/1000.0} sec")
123 | 
124 | 
125 | # analyze_zoo_folder()
126 | analyze_zoo()
127 | 


--------------------------------------------------------------------------------
/modelkeeper/keeper_start.py:
--------------------------------------------------------------------------------
1 | from config import modelkeeper_config
2 | from matcher import ModelKeeper
3 | 
4 | keeper_service = ModelKeeper(modelkeeper_config)
5 | keeper_service.start()
6 | 


--------------------------------------------------------------------------------
/modelkeeper/test.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | from matcher import ModelKeeper
 5 | 
 6 | 
 7 | def test():
 8 |     # import argparse
 9 | 
10 |     # start_time = time.time()
11 |     # zoo_path = '/mnt/zoo/tests/'
12 | 
13 |     # parser = argparse.ArgumentParser()
14 |     # parser.add_argument('--zoo_path', type=str, default=zoo_path)
15 |     # parser.add_argument('--num_of_processes', type=int, default=30)
16 |     # parser.add_argument('--neigh_threshold', type=float, default=0.05)
17 | 
18 |     # args = parser.parse_args()
19 |     from config import modelkeeper_config
20 |     zoo_path = '/users/fanlai/experiment/keeper/model_zoo'
21 |     #zoo_path = "/users/fanlai/experiment/exp_logs/keeper/model_zoo/regnety002@0.4767.onnx"
22 |     modelkeeper_config.zoo_path = zoo_path
23 | 
24 |     mapper = ModelKeeper(modelkeeper_config)
25 | 
26 |     #child_onnx_path = '/mnt/zoo/tests/vgg11.onnx'
27 |     models = os.listdir(zoo_path)
28 | 
29 |     match_list = ['../query_zoo/nin_cifar10.onnx']
30 |     for model in match_list:
31 |         child_onnx_path = os.path.join(zoo_path, model)
32 |         weights, meta_data = mapper.map_for_onnx(child_onnx_path, blacklist=set(
33 |             [child_onnx_path]), model_name=child_onnx_path.split('/')[-1])
34 | 
35 |         logging.info(
36 |             "\n\nMatching {}, results: {}\n".format(
37 |                 child_onnx_path, meta_data))
38 | 
39 |     # time.sleep(40)
40 | 
41 | 
42 | test()
43 | # test_fake()
44 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | setup(
 4 |     name="modelkeeper",
 5 |     version="0.1",
 6 |     description="A Model Manager to Accelerate DNN Training via Automated Training Warmup",
 7 |     url="https://github.com/SymbioticLab/ModelKeeper",
 8 |     author="Fan Lai, Yinwei Dai",
 9 |     author_email="fedscale@googlegroups.com",
10 |     packages=find_packages(),
11 | )
12 | 


--------------------------------------------------------------------------------
/tests/test_map.py:
--------------------------------------------------------------------------------
 1 | # modelkeeper lib
 2 | import os
 3 | import time
 4 | 
 5 | import torch
 6 | import torchvision.models as models
 7 | 
 8 | from modelkeeper.config import args
 9 | from modelkeeper.matcher import modelkeeper
10 | 
11 | 
12 | def simulator(path):
13 |     mapper = modelkeeper(args)
14 |     num_of_matched = 0
15 | 
16 |     for i in range(100):
17 |         model_path = os.path.join(path, f"model_{i}.onnx")
18 |         child, _ = mapper.load_model_meta(model_path)
19 |         parent, mappings, best_score = mapper.get_best_mapping(child)
20 |         if parent is not None:
21 |             weights, num_of_matched = mapper.warm_weights(
22 |                 parent, child, mappings)
23 |         mapper.add_to_zoo(model_path)
24 |         print("Child model {}".format(i))
25 | 
26 | 
27 | def main():
28 |     model_type = 'resnet50'
29 |     model = models.__dict__[model_type](num_classes=10)
30 | 
31 |     start_time = time.time()
32 |     mapper = modelkeeper(args)
33 | 
34 |     print(
35 |         "Initiate matching operator takes {:.2f} sec".format(
36 |             time.time() -
37 |             start_time))
38 | 
39 |     mapper.map_for_model(model, torch.rand(8, 3, 32, 32))
40 | 
41 | 
42 | # main()
43 | simulator('/gpfs/gpfs0/groups/chowdhury/fanlai/model_zoo/nasbench201')
44 | 


--------------------------------------------------------------------------------