├── .gitignore
├── LICENSE
├── MODEL_ZOO.md
├── README.md
├── configs
├── archive
│ ├── cifar
│ │ ├── anynet
│ │ │ ├── R-110_nds_1gpu.yaml
│ │ │ ├── R-56_nds_1gpu.yaml
│ │ │ └── V-56_nds_1gpu.yaml
│ │ └── resnet
│ │ │ ├── R-110_nds_1gpu.yaml
│ │ │ └── R-56_nds_1gpu.yaml
│ └── imagenet
│ │ ├── anynet
│ │ ├── R-101-1x64d_step_1gpu.yaml
│ │ ├── R-101-1x64d_step_2gpu.yaml
│ │ ├── R-101-1x64d_step_8gpu.yaml
│ │ ├── R-50-1x64d_step_1gpu.yaml
│ │ ├── R-50-1x64d_step_2gpu.yaml
│ │ ├── R-50-1x64d_step_8gpu.yaml
│ │ ├── X-101-32x4d_step_1gpu.yaml
│ │ ├── X-101-32x4d_step_2gpu.yaml
│ │ ├── X-101-32x4d_step_8gpu.yaml
│ │ ├── X-50-32x4d_step_1gpu.yaml
│ │ ├── X-50-32x4d_step_2gpu.yaml
│ │ └── X-50-32x4d_step_8gpu.yaml
│ │ └── resnet
│ │ ├── R-101-1x64d_step_1gpu.yaml
│ │ ├── R-101-1x64d_step_2gpu.yaml
│ │ ├── R-101-1x64d_step_8gpu.yaml
│ │ ├── R-50-1x64d_step_1gpu.yaml
│ │ ├── R-50-1x64d_step_2gpu.yaml
│ │ ├── R-50-1x64d_step_8gpu.yaml
│ │ ├── X-101-32x4d_step_1gpu.yaml
│ │ ├── X-101-32x4d_step_2gpu.yaml
│ │ ├── X-101-32x4d_step_8gpu.yaml
│ │ ├── X-50-32x4d_step_1gpu.yaml
│ │ ├── X-50-32x4d_step_2gpu.yaml
│ │ └── X-50-32x4d_step_8gpu.yaml
├── dds_baselines
│ ├── effnet
│ │ ├── EN-B0_dds_8gpu.yaml
│ │ ├── EN-B1_dds_8gpu.yaml
│ │ ├── EN-B2_dds_8gpu.yaml
│ │ ├── EN-B3_dds_8gpu.yaml
│ │ ├── EN-B4_dds_8gpu.yaml
│ │ └── EN-B5_dds_8gpu.yaml
│ ├── regnetx
│ │ ├── RegNetX-1.6GF_dds_8gpu.yaml
│ │ ├── RegNetX-12GF_dds_8gpu.yaml
│ │ ├── RegNetX-16GF_dds_8gpu.yaml
│ │ ├── RegNetX-200MF_dds_8gpu.yaml
│ │ ├── RegNetX-3.2GF_dds_8gpu.yaml
│ │ ├── RegNetX-32GF_dds_8gpu.yaml
│ │ ├── RegNetX-4.0GF_dds_8gpu.yaml
│ │ ├── RegNetX-400MF_dds_8gpu.yaml
│ │ ├── RegNetX-6.4GF_dds_8gpu.yaml
│ │ ├── RegNetX-600MF_dds_8gpu.yaml
│ │ ├── RegNetX-8.0GF_dds_8gpu.yaml
│ │ └── RegNetX-800MF_dds_8gpu.yaml
│ ├── regnety
│ │ ├── RegNetY-1.6GF_dds_8gpu.yaml
│ │ ├── RegNetY-12GF_dds_8gpu.yaml
│ │ ├── RegNetY-16GF_dds_8gpu.yaml
│ │ ├── RegNetY-200MF_dds_8gpu.yaml
│ │ ├── RegNetY-3.2GF_dds_8gpu.yaml
│ │ ├── RegNetY-32GF_dds_8gpu.yaml
│ │ ├── RegNetY-4.0GF_dds_8gpu.yaml
│ │ ├── RegNetY-400MF_dds_8gpu.yaml
│ │ ├── RegNetY-6.4GF_dds_8gpu.yaml
│ │ ├── RegNetY-600MF_dds_8gpu.yaml
│ │ ├── RegNetY-8.0GF_dds_8gpu.yaml
│ │ └── RegNetY-800MF_dds_8gpu.yaml
│ ├── resnet
│ │ ├── R-101-1x64d_dds_8gpu.yaml
│ │ ├── R-152-1x64d_dds_8gpu.yaml
│ │ └── R-50-1x64d_dds_8gpu.yaml
│ └── resnext
│ │ ├── X-101-32x4d_dds_8gpu.yaml
│ │ ├── X-152-32x4d_dds_8gpu.yaml
│ │ └── X-50-32x4d_dds_8gpu.yaml
├── examples
│ └── ffcv.yaml
├── fsdp
│ └── example.yaml
└── sweeps
│ └── cifar
│ ├── cifar_best.yaml
│ ├── cifar_optim.yaml
│ └── cifar_regnet.yaml
├── dev
├── .flake8
├── .isort.cfg
├── linter.sh
├── model_complexity.json
├── model_error.json
├── model_timing.json
├── model_zoo_tables.py
└── test_models.py
├── docs
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── DATA.md
├── GETTING_STARTED.md
├── SWEEP_INFO.md
└── regnetx_nets.png
├── pycls
├── __init__.py
├── core
│ ├── __init__.py
│ ├── benchmark.py
│ ├── builders.py
│ ├── checkpoint.py
│ ├── config.py
│ ├── distributed.py
│ ├── io.py
│ ├── logging.py
│ ├── meters.py
│ ├── net.py
│ ├── optimizer.py
│ ├── plotting.py
│ ├── timer.py
│ └── trainer.py
├── datasets
│ ├── __init__.py
│ ├── augment.py
│ ├── cifar10.py
│ ├── imagenet.py
│ ├── loader.py
│ └── transforms.py
├── models
│ ├── __init__.py
│ ├── anynet.py
│ ├── blocks.py
│ ├── effnet.py
│ ├── model_zoo.py
│ ├── regnet.py
│ ├── resnet.py
│ ├── scaler.py
│ └── vit.py
└── sweep
│ ├── __init__.py
│ ├── analysis.py
│ ├── config.py
│ ├── htmlbook.py
│ ├── plotting.py
│ ├── random.py
│ └── samplers.py
├── requirements.txt
├── setup.py
└── tools
├── run_net.py
├── sweep_analyze.py
├── sweep_collect.py
├── sweep_launch.py
├── sweep_launch_job.py
└── sweep_setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # Shared objects
7 | *.so
8 |
9 | # Distribution / packaging
10 | build/
11 | *.egg-info/
12 | *.egg
13 |
14 | # Temporary files
15 | *.swn
16 | *.swo
17 | *.swp
18 |
19 | # PyCharm
20 | .idea/
21 |
22 | # Mac
23 | .DS_STORE
24 |
25 | # Data symlinks
26 | pycls/datasets/data/
27 |
28 | # Other
29 | logs/
30 | scratch*
31 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) Facebook, Inc. and its affiliates.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pycls
2 |
3 | [](https://opensource.fb.com/support-ukraine)
4 |
5 | **pycls** is an image classification codebase, written in [PyTorch](https://pytorch.org/). It was originally developed for the [On Network Design Spaces for Visual Recognition](https://arxiv.org/abs/1905.13214) project. **pycls** has since matured and been adopted by a number of [projects](#projects) at Facebook AI Research.
6 |
7 |
8 |

9 |
pycls provides a large set of baseline models across a wide range of flop regimes.
10 |
11 |
12 | ## Introduction
13 |
14 | The goal of **pycls** is to provide a simple and flexible codebase for image classification. It is designed to support rapid implementation and evaluation of research ideas. **pycls** also provides a large collection of baseline results ([Model Zoo](MODEL_ZOO.md)). The codebase supports efficient single-machine multi-gpu training, powered by the PyTorch distributed package, and provides implementations of standard models including [ResNet](https://arxiv.org/abs/1512.03385), [ResNeXt](https://arxiv.org/abs/1611.05431), [EfficientNet](https://arxiv.org/abs/1905.11946), and [RegNet](https://arxiv.org/abs/2003.13678).
15 |
16 | ## Using pycls
17 |
18 | Please see [`GETTING_STARTED`](docs/GETTING_STARTED.md) for brief installation instructions and basic usage examples.
19 |
20 | ## Model Zoo
21 |
22 | We provide a large set of baseline results and pretrained models available for download in the **pycls** [Model Zoo](MODEL_ZOO.md); including the simple, fast, and effective [RegNet](https://arxiv.org/abs/2003.13678) models that we hope can serve as solid baselines across a wide range of flop regimes.
23 |
24 | ## Sweep Code
25 |
26 | The pycls codebase now provides powerful support for studying *design spaces* and more generally *population statistics* of models as introduced in [On Network Design Spaces for Visual Recognition](https://arxiv.org/abs/1905.13214) and [Designing Network Design Spaces](https://arxiv.org/abs/2003.13678). This idea is that instead of planning a single pycls job (e.g., testing a specific model configuration), one can study the behavior of an entire population of models. This allows for quite powerful and succinct experimental design, and elevates the study of individual model behavior to the study of the behavior of model populations. Please see [`SWEEP_INFO`](docs/SWEEP_INFO.md) for details.
27 |
28 | ## Projects
29 |
30 | A number of projects at FAIR have been built on top of **pycls**:
31 |
32 | - [On Network Design Spaces for Visual Recognition](https://arxiv.org/abs/1905.13214)
33 | - [Exploring Randomly Wired Neural Networks for Image Recognition](https://arxiv.org/abs/1904.01569)
34 | - [Designing Network Design Spaces](https://arxiv.org/abs/2003.13678)
35 | - [Fast and Accurate Model Scaling](https://arxiv.org/abs/2103.06877)
36 | - [Are Labels Necessary for Neural Architecture Search?](https://arxiv.org/abs/2003.12056)
37 | - [PySlowFast Video Understanding Codebase](https://github.com/facebookresearch/SlowFast)
38 |
39 | If you are using **pycls** in your research and would like to include your project here, please let us know or send a PR.
40 |
41 | ## Citing pycls
42 |
43 | If you find **pycls** helpful in your research or refer to the baseline results in the [Model Zoo](MODEL_ZOO.md), please consider citing an appropriate subset of the following papers:
44 |
45 | ```
46 | @InProceedings{Radosavovic2019,
47 | title = {On Network Design Spaces for Visual Recognition},
48 | author = {Ilija Radosavovic and Justin Johnson and Saining Xie Wan-Yen Lo and Piotr Doll{\'a}r},
49 | booktitle = {ICCV},
50 | year = {2019}
51 | }
52 |
53 | @InProceedings{Radosavovic2020,
54 | title = {Designing Network Design Spaces},
55 | author = {Ilija Radosavovic and Raj Prateek Kosaraju and Ross Girshick and Kaiming He and Piotr Doll{\'a}r},
56 | booktitle = {CVPR},
57 | year = {2020}
58 | }
59 |
60 | @InProceedings{Dollar2021,
61 | title = {Fast and Accurate Model Scaling},
62 | author = {Piotr Doll{\'a}r and Mannat Singh and Ross Girshick},
63 | booktitle = {CVPR},
64 | year = {2021}
65 | }
66 | ```
67 |
68 | ## License
69 |
70 | **pycls** is released under the MIT license. Please see the [`LICENSE`](LICENSE) file for more information.
71 |
72 | ## Contributing
73 |
74 | We actively welcome your pull requests! Please see [`CONTRIBUTING.md`](docs/CONTRIBUTING.md) and [`CODE_OF_CONDUCT.md`](docs/CODE_OF_CONDUCT.md) for more info.
75 |
--------------------------------------------------------------------------------
/configs/archive/cifar/anynet/R-110_nds_1gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: anynet
3 | NUM_CLASSES: 10
4 | ANYNET:
5 | STEM_TYPE: res_stem_cifar
6 | STEM_W: 16
7 | BLOCK_TYPE: res_basic_block
8 | DEPTHS: [18, 18, 18]
9 | WIDTHS: [16, 32, 64]
10 | STRIDES: [1, 2, 2]
11 | BN:
12 | USE_PRECISE_STATS: True
13 | NUM_SAMPLES_PRECISE: 1024
14 | OPTIM:
15 | BASE_LR: 0.1
16 | LR_POLICY: cos
17 | MAX_EPOCH: 200
18 | MOMENTUM: 0.9
19 | NESTEROV: True
20 | WEIGHT_DECAY: 0.0005
21 | TRAIN:
22 | DATASET: cifar10
23 | SPLIT: train
24 | BATCH_SIZE: 128
25 | IM_SIZE: 32
26 | TEST:
27 | DATASET: cifar10
28 | SPLIT: test
29 | BATCH_SIZE: 200
30 | IM_SIZE: 32
31 | NUM_GPUS: 1
32 | DATA_LOADER:
33 | NUM_WORKERS: 4
34 | CUDNN:
35 | BENCHMARK: False
36 | OUT_DIR: .
37 |
--------------------------------------------------------------------------------
/configs/archive/cifar/anynet/R-56_nds_1gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: anynet
3 | NUM_CLASSES: 10
4 | ANYNET:
5 | STEM_TYPE: res_stem_cifar
6 | STEM_W: 16
7 | BLOCK_TYPE: res_basic_block
8 | DEPTHS: [9, 9, 9]
9 | WIDTHS: [16, 32, 64]
10 | STRIDES: [1, 2, 2]
11 | BN:
12 | USE_PRECISE_STATS: True
13 | NUM_SAMPLES_PRECISE: 1024
14 | OPTIM:
15 | BASE_LR: 0.1
16 | LR_POLICY: cos
17 | MAX_EPOCH: 200
18 | MOMENTUM: 0.9
19 | NESTEROV: True
20 | WEIGHT_DECAY: 0.0005
21 | TRAIN:
22 | DATASET: cifar10
23 | SPLIT: train
24 | BATCH_SIZE: 128
25 | IM_SIZE: 32
26 | TEST:
27 | DATASET: cifar10
28 | SPLIT: test
29 | BATCH_SIZE: 200
30 | IM_SIZE: 32
31 | NUM_GPUS: 1
32 | DATA_LOADER:
33 | NUM_WORKERS: 4
34 | CUDNN:
35 | BENCHMARK: False
36 | OUT_DIR: .
37 |
--------------------------------------------------------------------------------
/configs/archive/cifar/anynet/V-56_nds_1gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: anynet
3 | NUM_CLASSES: 10
4 | ANYNET:
5 | STEM_TYPE: res_stem_cifar
6 | STEM_W: 16
7 | BLOCK_TYPE: vanilla_block
8 | DEPTHS: [9, 9, 9]
9 | WIDTHS: [16, 32, 64]
10 | STRIDES: [1, 2, 2]
11 | BN:
12 | USE_PRECISE_STATS: True
13 | NUM_SAMPLES_PRECISE: 1024
14 | OPTIM:
15 | BASE_LR: 0.1
16 | LR_POLICY: cos
17 | MAX_EPOCH: 200
18 | MOMENTUM: 0.9
19 | NESTEROV: True
20 | WEIGHT_DECAY: 0.0005
21 | TRAIN:
22 | DATASET: cifar10
23 | SPLIT: train
24 | BATCH_SIZE: 128
25 | IM_SIZE: 32
26 | TEST:
27 | DATASET: cifar10
28 | SPLIT: test
29 | BATCH_SIZE: 200
30 | IM_SIZE: 32
31 | NUM_GPUS: 1
32 | DATA_LOADER:
33 | NUM_WORKERS: 4
34 | CUDNN:
35 | BENCHMARK: False
36 | OUT_DIR: .
37 |
--------------------------------------------------------------------------------
/configs/archive/cifar/resnet/R-110_nds_1gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: resnet
3 | DEPTH: 110
4 | NUM_CLASSES: 10
5 | RESNET:
6 | TRANS_FUN: basic_transform
7 | BN:
8 | USE_PRECISE_STATS: True
9 | NUM_SAMPLES_PRECISE: 1024
10 | OPTIM:
11 | BASE_LR: 0.1
12 | LR_POLICY: cos
13 | MAX_EPOCH: 200
14 | MOMENTUM: 0.9
15 | NESTEROV: True
16 | WEIGHT_DECAY: 0.0005
17 | TRAIN:
18 | DATASET: cifar10
19 | SPLIT: train
20 | BATCH_SIZE: 128
21 | IM_SIZE: 32
22 | TEST:
23 | DATASET: cifar10
24 | SPLIT: test
25 | BATCH_SIZE: 200
26 | IM_SIZE: 32
27 | NUM_GPUS: 1
28 | DATA_LOADER:
29 | NUM_WORKERS: 4
30 | CUDNN:
31 | BENCHMARK: False
32 | OUT_DIR: .
33 |
--------------------------------------------------------------------------------
/configs/archive/cifar/resnet/R-56_nds_1gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: resnet
3 | DEPTH: 56
4 | NUM_CLASSES: 10
5 | RESNET:
6 | TRANS_FUN: basic_transform
7 | BN:
8 | USE_PRECISE_STATS: True
9 | NUM_SAMPLES_PRECISE: 1024
10 | OPTIM:
11 | BASE_LR: 0.1
12 | LR_POLICY: cos
13 | MAX_EPOCH: 200
14 | MOMENTUM: 0.9
15 | NESTEROV: True
16 | WEIGHT_DECAY: 0.0005
17 | TRAIN:
18 | DATASET: cifar10
19 | SPLIT: train
20 | BATCH_SIZE: 128
21 | IM_SIZE: 32
22 | TEST:
23 | DATASET: cifar10
24 | SPLIT: test
25 | BATCH_SIZE: 200
26 | IM_SIZE: 32
27 | NUM_GPUS: 1
28 | DATA_LOADER:
29 | NUM_WORKERS: 4
30 | CUDNN:
31 | BENCHMARK: False
32 | OUT_DIR: .
33 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/anynet/R-101-1x64d_step_1gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: anynet
3 | NUM_CLASSES: 1000
4 | ANYNET:
5 | STEM_TYPE: res_stem_in
6 | STEM_W: 64
7 | BLOCK_TYPE: res_bottleneck_block
8 | DEPTHS: [3, 4, 23, 3]
9 | WIDTHS: [256, 512, 1024, 2048]
10 | STRIDES: [1, 2, 2, 2]
11 | BOT_MULS: [0.25, 0.25, 0.25, 0.25]
12 | GROUP_WS: [64, 128, 256, 512]
13 | BN:
14 | ZERO_INIT_FINAL_GAMMA: True
15 | OPTIM:
16 | BASE_LR: 0.0125
17 | LR_POLICY: steps
18 | STEPS: [0, 30, 60, 90]
19 | LR_MULT: 0.1
20 | MAX_EPOCH: 100
21 | MOMENTUM: 0.9
22 | NESTEROV: True
23 | WEIGHT_DECAY: 0.0001
24 | TRAIN:
25 | DATASET: imagenet
26 | SPLIT: train
27 | BATCH_SIZE: 32
28 | IM_SIZE: 224
29 | TEST:
30 | DATASET: imagenet
31 | SPLIT: val
32 | BATCH_SIZE: 25
33 | IM_SIZE: 256
34 | NUM_GPUS: 1
35 | DATA_LOADER:
36 | NUM_WORKERS: 4
37 | CUDNN:
38 | BENCHMARK: True
39 | OUT_DIR: .
40 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/anynet/R-101-1x64d_step_2gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: anynet
3 | NUM_CLASSES: 1000
4 | ANYNET:
5 | STEM_TYPE: res_stem_in
6 | STEM_W: 64
7 | BLOCK_TYPE: res_bottleneck_block
8 | DEPTHS: [3, 4, 23, 3]
9 | WIDTHS: [256, 512, 1024, 2048]
10 | STRIDES: [1, 2, 2, 2]
11 | BOT_MULS: [0.25, 0.25, 0.25, 0.25]
12 | GROUP_WS: [64, 128, 256, 512]
13 | BN:
14 | ZERO_INIT_FINAL_GAMMA: True
15 | OPTIM:
16 | BASE_LR: 0.025
17 | LR_POLICY: steps
18 | STEPS: [0, 30, 60, 90]
19 | LR_MULT: 0.1
20 | MAX_EPOCH: 100
21 | MOMENTUM: 0.9
22 | NESTEROV: True
23 | WEIGHT_DECAY: 0.0001
24 | TRAIN:
25 | DATASET: imagenet
26 | SPLIT: train
27 | BATCH_SIZE: 64
28 | IM_SIZE: 224
29 | TEST:
30 | DATASET: imagenet
31 | SPLIT: val
32 | BATCH_SIZE: 50
33 | IM_SIZE: 256
34 | NUM_GPUS: 2
35 | DATA_LOADER:
36 | NUM_WORKERS: 4
37 | CUDNN:
38 | BENCHMARK: True
39 | OUT_DIR: .
40 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/anynet/R-101-1x64d_step_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: anynet
3 | NUM_CLASSES: 1000
4 | ANYNET:
5 | STEM_TYPE: res_stem_in
6 | STEM_W: 64
7 | BLOCK_TYPE: res_bottleneck_block
8 | DEPTHS: [3, 4, 23, 3]
9 | WIDTHS: [256, 512, 1024, 2048]
10 | STRIDES: [1, 2, 2, 2]
11 | BOT_MULS: [0.25, 0.25, 0.25, 0.25]
12 | GROUP_WS: [64, 128, 256, 512]
13 | BN:
14 | ZERO_INIT_FINAL_GAMMA: True
15 | OPTIM:
16 | BASE_LR: 0.1
17 | LR_POLICY: steps
18 | STEPS: [0, 30, 60, 90]
19 | LR_MULT: 0.1
20 | MAX_EPOCH: 100
21 | MOMENTUM: 0.9
22 | NESTEROV: True
23 | WEIGHT_DECAY: 0.0001
24 | TRAIN:
25 | DATASET: imagenet
26 | SPLIT: train
27 | BATCH_SIZE: 256
28 | IM_SIZE: 224
29 | TEST:
30 | DATASET: imagenet
31 | SPLIT: val
32 | BATCH_SIZE: 200
33 | IM_SIZE: 256
34 | NUM_GPUS: 8
35 | DATA_LOADER:
36 | NUM_WORKERS: 4
37 | CUDNN:
38 | BENCHMARK: True
39 | OUT_DIR: .
40 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/anynet/R-50-1x64d_step_1gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: anynet
3 | NUM_CLASSES: 1000
4 | ANYNET:
5 | STEM_TYPE: res_stem_in
6 | STEM_W: 64
7 | BLOCK_TYPE: res_bottleneck_block
8 | DEPTHS: [3, 4, 6, 3]
9 | WIDTHS: [256, 512, 1024, 2048]
10 | STRIDES: [1, 2, 2, 2]
11 | BOT_MULS: [0.25, 0.25, 0.25, 0.25]
12 | GROUP_WS: [64, 128, 256, 512]
13 | BN:
14 | ZERO_INIT_FINAL_GAMMA: True
15 | OPTIM:
16 | BASE_LR: 0.0125
17 | LR_POLICY: steps
18 | STEPS: [0, 30, 60, 90]
19 | LR_MULT: 0.1
20 | MAX_EPOCH: 100
21 | MOMENTUM: 0.9
22 | NESTEROV: True
23 | WEIGHT_DECAY: 0.0001
24 | TRAIN:
25 | DATASET: imagenet
26 | SPLIT: train
27 | BATCH_SIZE: 32
28 | IM_SIZE: 224
29 | TEST:
30 | DATASET: imagenet
31 | SPLIT: val
32 | BATCH_SIZE: 25
33 | IM_SIZE: 256
34 | NUM_GPUS: 1
35 | DATA_LOADER:
36 | NUM_WORKERS: 4
37 | CUDNN:
38 | BENCHMARK: True
39 | OUT_DIR: .
40 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/anynet/R-50-1x64d_step_2gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: anynet
3 | NUM_CLASSES: 1000
4 | ANYNET:
5 | STEM_TYPE: res_stem_in
6 | STEM_W: 64
7 | BLOCK_TYPE: res_bottleneck_block
8 | DEPTHS: [3, 4, 6, 3]
9 | WIDTHS: [256, 512, 1024, 2048]
10 | STRIDES: [1, 2, 2, 2]
11 | BOT_MULS: [0.25, 0.25, 0.25, 0.25]
12 | GROUP_WS: [64, 128, 256, 512]
13 | BN:
14 | ZERO_INIT_FINAL_GAMMA: True
15 | OPTIM:
16 | BASE_LR: 0.025
17 | LR_POLICY: steps
18 | STEPS: [0, 30, 60, 90]
19 | LR_MULT: 0.1
20 | MAX_EPOCH: 100
21 | MOMENTUM: 0.9
22 | NESTEROV: True
23 | WEIGHT_DECAY: 0.0001
24 | TRAIN:
25 | DATASET: imagenet
26 | SPLIT: train
27 | BATCH_SIZE: 64
28 | IM_SIZE: 224
29 | TEST:
30 | DATASET: imagenet
31 | SPLIT: val
32 | BATCH_SIZE: 50
33 | IM_SIZE: 256
34 | NUM_GPUS: 2
35 | DATA_LOADER:
36 | NUM_WORKERS: 4
37 | CUDNN:
38 | BENCHMARK: True
39 | OUT_DIR: .
40 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/anynet/R-50-1x64d_step_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: anynet
3 | NUM_CLASSES: 1000
4 | ANYNET:
5 | STEM_TYPE: res_stem_in
6 | STEM_W: 64
7 | BLOCK_TYPE: res_bottleneck_block
8 | DEPTHS: [3, 4, 6, 3]
9 | WIDTHS: [256, 512, 1024, 2048]
10 | STRIDES: [1, 2, 2, 2]
11 | BOT_MULS: [0.25, 0.25, 0.25, 0.25]
12 | GROUP_WS: [64, 128, 256, 512]
13 | BN:
14 | ZERO_INIT_FINAL_GAMMA: True
15 | OPTIM:
16 | BASE_LR: 0.1
17 | LR_POLICY: steps
18 | STEPS: [0, 30, 60, 90]
19 | LR_MULT: 0.1
20 | MAX_EPOCH: 100
21 | MOMENTUM: 0.9
22 | NESTEROV: True
23 | WEIGHT_DECAY: 0.0001
24 | TRAIN:
25 | DATASET: imagenet
26 | SPLIT: train
27 | BATCH_SIZE: 256
28 | IM_SIZE: 224
29 | TEST:
30 | DATASET: imagenet
31 | SPLIT: val
32 | BATCH_SIZE: 200
33 | IM_SIZE: 256
34 | NUM_GPUS: 8
35 | DATA_LOADER:
36 | NUM_WORKERS: 4
37 | CUDNN:
38 | BENCHMARK: True
39 | OUT_DIR: .
40 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/anynet/X-101-32x4d_step_1gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: anynet
3 | NUM_CLASSES: 1000
4 | ANYNET:
5 | STEM_TYPE: res_stem_in
6 | STEM_W: 64
7 | BLOCK_TYPE: res_bottleneck_block
8 | DEPTHS: [3, 4, 23, 3]
9 | WIDTHS: [256, 512, 1024, 2048]
10 | STRIDES: [1, 2, 2, 2]
11 | BOT_MULS: [0.5, 0.5, 0.5, 0.5]
12 | GROUP_WS: [4, 8, 16, 32]
13 | BN:
14 | ZERO_INIT_FINAL_GAMMA: True
15 | OPTIM:
16 | BASE_LR: 0.0125
17 | LR_POLICY: steps
18 | STEPS: [0, 30, 60, 90]
19 | LR_MULT: 0.1
20 | MAX_EPOCH: 100
21 | MOMENTUM: 0.9
22 | NESTEROV: True
23 | WEIGHT_DECAY: 0.0001
24 | TRAIN:
25 | DATASET: imagenet
26 | SPLIT: train
27 | BATCH_SIZE: 32
28 | IM_SIZE: 224
29 | TEST:
30 | DATASET: imagenet
31 | SPLIT: val
32 | BATCH_SIZE: 25
33 | IM_SIZE: 256
34 | NUM_GPUS: 1
35 | DATA_LOADER:
36 | NUM_WORKERS: 4
37 | CUDNN:
38 | BENCHMARK: True
39 | OUT_DIR: .
40 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/anynet/X-101-32x4d_step_2gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: anynet
3 | NUM_CLASSES: 1000
4 | ANYNET:
5 | STEM_TYPE: res_stem_in
6 | STEM_W: 64
7 | BLOCK_TYPE: res_bottleneck_block
8 | DEPTHS: [3, 4, 23, 3]
9 | WIDTHS: [256, 512, 1024, 2048]
10 | STRIDES: [1, 2, 2, 2]
11 | BOT_MULS: [0.5, 0.5, 0.5, 0.5]
12 | GROUP_WS: [4, 8, 16, 32]
13 | BN:
14 | ZERO_INIT_FINAL_GAMMA: True
15 | OPTIM:
16 | BASE_LR: 0.025
17 | LR_POLICY: steps
18 | STEPS: [0, 30, 60, 90]
19 | LR_MULT: 0.1
20 | MAX_EPOCH: 100
21 | MOMENTUM: 0.9
22 | NESTEROV: True
23 | WEIGHT_DECAY: 0.0001
24 | TRAIN:
25 | DATASET: imagenet
26 | SPLIT: train
27 | BATCH_SIZE: 64
28 | IM_SIZE: 224
29 | TEST:
30 | DATASET: imagenet
31 | SPLIT: val
32 | BATCH_SIZE: 50
33 | IM_SIZE: 256
34 | NUM_GPUS: 2
35 | DATA_LOADER:
36 | NUM_WORKERS: 4
37 | CUDNN:
38 | BENCHMARK: True
39 | OUT_DIR: .
40 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/anynet/X-101-32x4d_step_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: anynet
3 | NUM_CLASSES: 1000
4 | ANYNET:
5 | STEM_TYPE: res_stem_in
6 | STEM_W: 64
7 | BLOCK_TYPE: res_bottleneck_block
8 | DEPTHS: [3, 4, 23, 3]
9 | WIDTHS: [256, 512, 1024, 2048]
10 | STRIDES: [1, 2, 2, 2]
11 | BOT_MULS: [0.5, 0.5, 0.5, 0.5]
12 | GROUP_WS: [4, 8, 16, 32]
13 | BN:
14 | ZERO_INIT_FINAL_GAMMA: True
15 | OPTIM:
16 | BASE_LR: 0.1
17 | LR_POLICY: steps
18 | STEPS: [0, 30, 60, 90]
19 | LR_MULT: 0.1
20 | MAX_EPOCH: 100
21 | MOMENTUM: 0.9
22 | NESTEROV: True
23 | WEIGHT_DECAY: 0.0001
24 | TRAIN:
25 | DATASET: imagenet
26 | SPLIT: train
27 | BATCH_SIZE: 256
28 | IM_SIZE: 224
29 | TEST:
30 | DATASET: imagenet
31 | SPLIT: val
32 | BATCH_SIZE: 200
33 | IM_SIZE: 256
34 | NUM_GPUS: 8
35 | DATA_LOADER:
36 | NUM_WORKERS: 4
37 | CUDNN:
38 | BENCHMARK: True
39 | OUT_DIR: .
40 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/anynet/X-50-32x4d_step_1gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: anynet
3 | NUM_CLASSES: 1000
4 | ANYNET:
5 | STEM_TYPE: res_stem_in
6 | STEM_W: 64
7 | BLOCK_TYPE: res_bottleneck_block
8 | DEPTHS: [3, 4, 6, 3]
9 | WIDTHS: [256, 512, 1024, 2048]
10 | STRIDES: [1, 2, 2, 2]
11 | BOT_MULS: [0.5, 0.5, 0.5, 0.5]
12 | GROUP_WS: [4, 8, 16, 32]
13 | BN:
14 | ZERO_INIT_FINAL_GAMMA: True
15 | OPTIM:
16 | BASE_LR: 0.0125
17 | LR_POLICY: steps
18 | STEPS: [0, 30, 60, 90]
19 | LR_MULT: 0.1
20 | MAX_EPOCH: 100
21 | MOMENTUM: 0.9
22 | NESTEROV: True
23 | WEIGHT_DECAY: 0.0001
24 | TRAIN:
25 | DATASET: imagenet
26 | SPLIT: train
27 | BATCH_SIZE: 32
28 | IM_SIZE: 224
29 | TEST:
30 | DATASET: imagenet
31 | SPLIT: val
32 | BATCH_SIZE: 25
33 | IM_SIZE: 256
34 | NUM_GPUS: 1
35 | DATA_LOADER:
36 | NUM_WORKERS: 4
37 | CUDNN:
38 | BENCHMARK: True
39 | OUT_DIR: .
40 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/anynet/X-50-32x4d_step_2gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: anynet
3 | NUM_CLASSES: 1000
4 | ANYNET:
5 | STEM_TYPE: res_stem_in
6 | STEM_W: 64
7 | BLOCK_TYPE: res_bottleneck_block
8 | DEPTHS: [3, 4, 6, 3]
9 | WIDTHS: [256, 512, 1024, 2048]
10 | STRIDES: [1, 2, 2, 2]
11 | BOT_MULS: [0.5, 0.5, 0.5, 0.5]
12 | GROUP_WS: [4, 8, 16, 32]
13 | BN:
14 | ZERO_INIT_FINAL_GAMMA: True
15 | OPTIM:
16 | BASE_LR: 0.025
17 | LR_POLICY: steps
18 | STEPS: [0, 30, 60, 90]
19 | LR_MULT: 0.1
20 | MAX_EPOCH: 100
21 | MOMENTUM: 0.9
22 | NESTEROV: True
23 | WEIGHT_DECAY: 0.0001
24 | TRAIN:
25 | DATASET: imagenet
26 | SPLIT: train
27 | BATCH_SIZE: 64
28 | IM_SIZE: 224
29 | TEST:
30 | DATASET: imagenet
31 | SPLIT: val
32 | BATCH_SIZE: 50
33 | IM_SIZE: 256
34 | NUM_GPUS: 2
35 | DATA_LOADER:
36 | NUM_WORKERS: 4
37 | CUDNN:
38 | BENCHMARK: True
39 | OUT_DIR: .
40 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/anynet/X-50-32x4d_step_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: anynet
3 | NUM_CLASSES: 1000
4 | ANYNET:
5 | STEM_TYPE: res_stem_in
6 | STEM_W: 64
7 | BLOCK_TYPE: res_bottleneck_block
8 | DEPTHS: [3, 4, 6, 3]
9 | WIDTHS: [256, 512, 1024, 2048]
10 | STRIDES: [1, 2, 2, 2]
11 | BOT_MULS: [0.5, 0.5, 0.5, 0.5]
12 | GROUP_WS: [4, 8, 16, 32]
13 | BN:
14 | ZERO_INIT_FINAL_GAMMA: True
15 | OPTIM:
16 | BASE_LR: 0.1
17 | LR_POLICY: steps
18 | STEPS: [0, 30, 60, 90]
19 | LR_MULT: 0.1
20 | MAX_EPOCH: 100
21 | MOMENTUM: 0.9
22 | NESTEROV: True
23 | WEIGHT_DECAY: 0.0001
24 | TRAIN:
25 | DATASET: imagenet
26 | SPLIT: train
27 | BATCH_SIZE: 256
28 | IM_SIZE: 224
29 | TEST:
30 | DATASET: imagenet
31 | SPLIT: val
32 | BATCH_SIZE: 200
33 | IM_SIZE: 256
34 | NUM_GPUS: 8
35 | DATA_LOADER:
36 | NUM_WORKERS: 4
37 | CUDNN:
38 | BENCHMARK: True
39 | OUT_DIR: .
40 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/resnet/R-101-1x64d_step_1gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: resnet
3 | DEPTH: 101
4 | NUM_CLASSES: 1000
5 | RESNET:
6 | TRANS_FUN: bottleneck_transform
7 | NUM_GROUPS: 1
8 | WIDTH_PER_GROUP: 64
9 | STRIDE_1X1: False
10 | BN:
11 | ZERO_INIT_FINAL_GAMMA: True
12 | OPTIM:
13 | BASE_LR: 0.0125
14 | LR_POLICY: steps
15 | STEPS: [0, 30, 60, 90]
16 | LR_MULT: 0.1
17 | MAX_EPOCH: 100
18 | MOMENTUM: 0.9
19 | NESTEROV: True
20 | WEIGHT_DECAY: 0.0001
21 | TRAIN:
22 | DATASET: imagenet
23 | SPLIT: train
24 | BATCH_SIZE: 32
25 | IM_SIZE: 224
26 | TEST:
27 | DATASET: imagenet
28 | SPLIT: val
29 | BATCH_SIZE: 25
30 | IM_SIZE: 256
31 | NUM_GPUS: 1
32 | DATA_LOADER:
33 | NUM_WORKERS: 4
34 | CUDNN:
35 | BENCHMARK: True
36 | OUT_DIR: .
37 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/resnet/R-101-1x64d_step_2gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: resnet
3 | DEPTH: 101
4 | NUM_CLASSES: 1000
5 | RESNET:
6 | TRANS_FUN: bottleneck_transform
7 | NUM_GROUPS: 1
8 | WIDTH_PER_GROUP: 64
9 | STRIDE_1X1: False
10 | BN:
11 | ZERO_INIT_FINAL_GAMMA: True
12 | OPTIM:
13 | BASE_LR: 0.025
14 | LR_POLICY: steps
15 | STEPS: [0, 30, 60, 90]
16 | LR_MULT: 0.1
17 | MAX_EPOCH: 100
18 | MOMENTUM: 0.9
19 | NESTEROV: True
20 | WEIGHT_DECAY: 0.0001
21 | TRAIN:
22 | DATASET: imagenet
23 | SPLIT: train
24 | BATCH_SIZE: 64
25 | IM_SIZE: 224
26 | TEST:
27 | DATASET: imagenet
28 | SPLIT: val
29 | BATCH_SIZE: 50
30 | IM_SIZE: 256
31 | NUM_GPUS: 2
32 | DATA_LOADER:
33 | NUM_WORKERS: 4
34 | CUDNN:
35 | BENCHMARK: True
36 | OUT_DIR: .
37 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/resnet/R-101-1x64d_step_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: resnet
3 | DEPTH: 101
4 | NUM_CLASSES: 1000
5 | RESNET:
6 | TRANS_FUN: bottleneck_transform
7 | NUM_GROUPS: 1
8 | WIDTH_PER_GROUP: 64
9 | STRIDE_1X1: False
10 | BN:
11 | ZERO_INIT_FINAL_GAMMA: True
12 | OPTIM:
13 | BASE_LR: 0.1
14 | LR_POLICY: steps
15 | STEPS: [0, 30, 60, 90]
16 | LR_MULT: 0.1
17 | MAX_EPOCH: 100
18 | MOMENTUM: 0.9
19 | NESTEROV: True
20 | WEIGHT_DECAY: 0.0001
21 | TRAIN:
22 | DATASET: imagenet
23 | SPLIT: train
24 | BATCH_SIZE: 256
25 | IM_SIZE: 224
26 | TEST:
27 | DATASET: imagenet
28 | SPLIT: val
29 | BATCH_SIZE: 200
30 | IM_SIZE: 256
31 | NUM_GPUS: 8
32 | DATA_LOADER:
33 | NUM_WORKERS: 4
34 | CUDNN:
35 | BENCHMARK: True
36 | OUT_DIR: .
37 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/resnet/R-50-1x64d_step_1gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: resnet
3 | DEPTH: 50
4 | NUM_CLASSES: 1000
5 | RESNET:
6 | TRANS_FUN: bottleneck_transform
7 | NUM_GROUPS: 1
8 | WIDTH_PER_GROUP: 64
9 | STRIDE_1X1: False
10 | BN:
11 | ZERO_INIT_FINAL_GAMMA: True
12 | OPTIM:
13 | BASE_LR: 0.0125
14 | LR_POLICY: steps
15 | STEPS: [0, 30, 60, 90]
16 | LR_MULT: 0.1
17 | MAX_EPOCH: 100
18 | MOMENTUM: 0.9
19 | NESTEROV: True
20 | WEIGHT_DECAY: 0.0001
21 | TRAIN:
22 | DATASET: imagenet
23 | SPLIT: train
24 | BATCH_SIZE: 32
25 | IM_SIZE: 224
26 | TEST:
27 | DATASET: imagenet
28 | SPLIT: val
29 | BATCH_SIZE: 25
30 | IM_SIZE: 256
31 | NUM_GPUS: 1
32 | DATA_LOADER:
33 | NUM_WORKERS: 4
34 | CUDNN:
35 | BENCHMARK: True
36 | OUT_DIR: .
37 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/resnet/R-50-1x64d_step_2gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: resnet
3 | DEPTH: 50
4 | NUM_CLASSES: 1000
5 | RESNET:
6 | TRANS_FUN: bottleneck_transform
7 | NUM_GROUPS: 1
8 | WIDTH_PER_GROUP: 64
9 | STRIDE_1X1: False
10 | BN:
11 | ZERO_INIT_FINAL_GAMMA: True
12 | OPTIM:
13 | BASE_LR: 0.025
14 | LR_POLICY: steps
15 | STEPS: [0, 30, 60, 90]
16 | LR_MULT: 0.1
17 | MAX_EPOCH: 100
18 | MOMENTUM: 0.9
19 | NESTEROV: True
20 | WEIGHT_DECAY: 0.0001
21 | TRAIN:
22 | DATASET: imagenet
23 | SPLIT: train
24 | BATCH_SIZE: 64
25 | IM_SIZE: 224
26 | TEST:
27 | DATASET: imagenet
28 | SPLIT: val
29 | BATCH_SIZE: 50
30 | IM_SIZE: 256
31 | NUM_GPUS: 2
32 | DATA_LOADER:
33 | NUM_WORKERS: 4
34 | CUDNN:
35 | BENCHMARK: True
36 | OUT_DIR: .
37 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/resnet/R-50-1x64d_step_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: resnet
3 | DEPTH: 50
4 | NUM_CLASSES: 1000
5 | RESNET:
6 | TRANS_FUN: bottleneck_transform
7 | NUM_GROUPS: 1
8 | WIDTH_PER_GROUP: 64
9 | STRIDE_1X1: False
10 | BN:
11 | ZERO_INIT_FINAL_GAMMA: True
12 | OPTIM:
13 | BASE_LR: 0.1
14 | LR_POLICY: steps
15 | STEPS: [0, 30, 60, 90]
16 | LR_MULT: 0.1
17 | MAX_EPOCH: 100
18 | MOMENTUM: 0.9
19 | NESTEROV: True
20 | WEIGHT_DECAY: 0.0001
21 | TRAIN:
22 | DATASET: imagenet
23 | SPLIT: train
24 | BATCH_SIZE: 256
25 | IM_SIZE: 224
26 | TEST:
27 | DATASET: imagenet
28 | SPLIT: val
29 | BATCH_SIZE: 200
30 | IM_SIZE: 256
31 | NUM_GPUS: 8
32 | DATA_LOADER:
33 | NUM_WORKERS: 4
34 | CUDNN:
35 | BENCHMARK: True
36 | OUT_DIR: .
37 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/resnet/X-101-32x4d_step_1gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: resnet
3 | DEPTH: 101
4 | NUM_CLASSES: 1000
5 | RESNET:
6 | TRANS_FUN: bottleneck_transform
7 | NUM_GROUPS: 32
8 | WIDTH_PER_GROUP: 4
9 | STRIDE_1X1: False
10 | BN:
11 | ZERO_INIT_FINAL_GAMMA: True
12 | OPTIM:
13 | BASE_LR: 0.0125
14 | LR_POLICY: steps
15 | STEPS: [0, 30, 60, 90]
16 | LR_MULT: 0.1
17 | MAX_EPOCH: 100
18 | MOMENTUM: 0.9
19 | NESTEROV: True
20 | WEIGHT_DECAY: 0.0001
21 | TRAIN:
22 | DATASET: imagenet
23 | SPLIT: train
24 | BATCH_SIZE: 32
25 | IM_SIZE: 224
26 | TEST:
27 | DATASET: imagenet
28 | SPLIT: val
29 | BATCH_SIZE: 25
30 | IM_SIZE: 256
31 | NUM_GPUS: 1
32 | DATA_LOADER:
33 | NUM_WORKERS: 4
34 | CUDNN:
35 | BENCHMARK: True
36 | OUT_DIR: .
37 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/resnet/X-101-32x4d_step_2gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: resnet
3 | DEPTH: 101
4 | NUM_CLASSES: 1000
5 | RESNET:
6 | TRANS_FUN: bottleneck_transform
7 | NUM_GROUPS: 32
8 | WIDTH_PER_GROUP: 4
9 | STRIDE_1X1: False
10 | BN:
11 | ZERO_INIT_FINAL_GAMMA: True
12 | OPTIM:
13 | BASE_LR: 0.025
14 | LR_POLICY: steps
15 | STEPS: [0, 30, 60, 90]
16 | LR_MULT: 0.1
17 | MAX_EPOCH: 100
18 | MOMENTUM: 0.9
19 | NESTEROV: True
20 | WEIGHT_DECAY: 0.0001
21 | TRAIN:
22 | DATASET: imagenet
23 | SPLIT: train
24 | BATCH_SIZE: 64
25 | IM_SIZE: 224
26 | TEST:
27 | DATASET: imagenet
28 | SPLIT: val
29 | BATCH_SIZE: 50
30 | IM_SIZE: 256
31 | NUM_GPUS: 2
32 | DATA_LOADER:
33 | NUM_WORKERS: 4
34 | CUDNN:
35 | BENCHMARK: True
36 | OUT_DIR: .
37 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/resnet/X-101-32x4d_step_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: resnet
3 | DEPTH: 101
4 | NUM_CLASSES: 1000
5 | RESNET:
6 | TRANS_FUN: bottleneck_transform
7 | NUM_GROUPS: 32
8 | WIDTH_PER_GROUP: 4
9 | STRIDE_1X1: False
10 | BN:
11 | ZERO_INIT_FINAL_GAMMA: True
12 | OPTIM:
13 | BASE_LR: 0.1
14 | LR_POLICY: steps
15 | STEPS: [0, 30, 60, 90]
16 | LR_MULT: 0.1
17 | MAX_EPOCH: 100
18 | MOMENTUM: 0.9
19 | NESTEROV: True
20 | WEIGHT_DECAY: 0.0001
21 | TRAIN:
22 | DATASET: imagenet
23 | SPLIT: train
24 | BATCH_SIZE: 256
25 | IM_SIZE: 224
26 | TEST:
27 | DATASET: imagenet
28 | SPLIT: val
29 | BATCH_SIZE: 200
30 | IM_SIZE: 256
31 | NUM_GPUS: 8
32 | DATA_LOADER:
33 | NUM_WORKERS: 4
34 | CUDNN:
35 | BENCHMARK: True
36 | OUT_DIR: .
37 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/resnet/X-50-32x4d_step_1gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: resnet
3 | DEPTH: 50
4 | NUM_CLASSES: 1000
5 | RESNET:
6 | TRANS_FUN: bottleneck_transform
7 | NUM_GROUPS: 32
8 | WIDTH_PER_GROUP: 4
9 | STRIDE_1X1: False
10 | BN:
11 | ZERO_INIT_FINAL_GAMMA: True
12 | OPTIM:
13 | BASE_LR: 0.0125
14 | LR_POLICY: steps
15 | STEPS: [0, 30, 60, 90]
16 | LR_MULT: 0.1
17 | MAX_EPOCH: 100
18 | MOMENTUM: 0.9
19 | NESTEROV: True
20 | WEIGHT_DECAY: 0.0001
21 | TRAIN:
22 | DATASET: imagenet
23 | SPLIT: train
24 | BATCH_SIZE: 32
25 | IM_SIZE: 224
26 | TEST:
27 | DATASET: imagenet
28 | SPLIT: val
29 | BATCH_SIZE: 25
30 | IM_SIZE: 256
31 | NUM_GPUS: 1
32 | DATA_LOADER:
33 | NUM_WORKERS: 4
34 | CUDNN:
35 | BENCHMARK: True
36 | OUT_DIR: .
37 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/resnet/X-50-32x4d_step_2gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: resnet
3 | DEPTH: 50
4 | NUM_CLASSES: 1000
5 | RESNET:
6 | TRANS_FUN: bottleneck_transform
7 | NUM_GROUPS: 32
8 | WIDTH_PER_GROUP: 4
9 | STRIDE_1X1: False
10 | BN:
11 | ZERO_INIT_FINAL_GAMMA: True
12 | OPTIM:
13 | BASE_LR: 0.025
14 | LR_POLICY: steps
15 | STEPS: [0, 30, 60, 90]
16 | LR_MULT: 0.1
17 | MAX_EPOCH: 100
18 | MOMENTUM: 0.9
19 | NESTEROV: True
20 | WEIGHT_DECAY: 0.0001
21 | TRAIN:
22 | DATASET: imagenet
23 | SPLIT: train
24 | BATCH_SIZE: 64
25 | IM_SIZE: 224
26 | TEST:
27 | DATASET: imagenet
28 | SPLIT: val
29 | BATCH_SIZE: 50
30 | IM_SIZE: 256
31 | NUM_GPUS: 2
32 | DATA_LOADER:
33 | NUM_WORKERS: 4
34 | CUDNN:
35 | BENCHMARK: True
36 | OUT_DIR: .
37 |
--------------------------------------------------------------------------------
/configs/archive/imagenet/resnet/X-50-32x4d_step_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: resnet
3 | DEPTH: 50
4 | NUM_CLASSES: 1000
5 | RESNET:
6 | TRANS_FUN: bottleneck_transform
7 | NUM_GROUPS: 32
8 | WIDTH_PER_GROUP: 4
9 | STRIDE_1X1: False
10 | BN:
11 | ZERO_INIT_FINAL_GAMMA: True
12 | OPTIM:
13 | BASE_LR: 0.1
14 | LR_POLICY: steps
15 | STEPS: [0, 30, 60, 90]
16 | LR_MULT: 0.1
17 | MAX_EPOCH: 100
18 | MOMENTUM: 0.9
19 | NESTEROV: True
20 | WEIGHT_DECAY: 0.0001
21 | TRAIN:
22 | DATASET: imagenet
23 | SPLIT: train
24 | BATCH_SIZE: 256
25 | IM_SIZE: 224
26 | TEST:
27 | DATASET: imagenet
28 | SPLIT: val
29 | BATCH_SIZE: 200
30 | IM_SIZE: 256
31 | NUM_GPUS: 8
32 | DATA_LOADER:
33 | NUM_WORKERS: 4
34 | CUDNN:
35 | BENCHMARK: True
36 | OUT_DIR: .
37 |
--------------------------------------------------------------------------------
/configs/dds_baselines/effnet/EN-B0_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: effnet
3 | NUM_CLASSES: 1000
4 | ACTIVATION_FUN: silu
5 | EN:
6 | STEM_W: 32
7 | STRIDES: [1, 2, 2, 2, 1, 2, 1]
8 | DEPTHS: [1, 2, 2, 3, 3, 4, 1]
9 | WIDTHS: [16, 24, 40, 80, 112, 192, 320]
10 | EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6]
11 | KERNELS: [3, 3, 5, 3, 5, 5, 3]
12 | HEAD_W: 1280
13 | OPTIM:
14 | LR_POLICY: cos
15 | BASE_LR: 0.4
16 | MAX_EPOCH: 100
17 | MOMENTUM: 0.9
18 | WEIGHT_DECAY: 1e-5
19 | TRAIN:
20 | DATASET: imagenet
21 | IM_SIZE: 224
22 | BATCH_SIZE: 256
23 | TEST:
24 | DATASET: imagenet
25 | IM_SIZE: 256
26 | BATCH_SIZE: 200
27 | NUM_GPUS: 8
28 | OUT_DIR: .
29 |
--------------------------------------------------------------------------------
/configs/dds_baselines/effnet/EN-B1_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: effnet
3 | NUM_CLASSES: 1000
4 | ACTIVATION_FUN: silu
5 | EN:
6 | STEM_W: 32
7 | STRIDES: [1, 2, 2, 2, 1, 2, 1]
8 | DEPTHS: [2, 3, 3, 4, 4, 5, 2]
9 | WIDTHS: [16, 24, 40, 80, 112, 192, 320]
10 | EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6]
11 | KERNELS: [3, 3, 5, 3, 5, 5, 3]
12 | HEAD_W: 1280
13 | OPTIM:
14 | LR_POLICY: cos
15 | BASE_LR: 0.4
16 | MAX_EPOCH: 100
17 | MOMENTUM: 0.9
18 | WEIGHT_DECAY: 1e-5
19 | TRAIN:
20 | DATASET: imagenet
21 | IM_SIZE: 240
22 | BATCH_SIZE: 256
23 | TEST:
24 | DATASET: imagenet
25 | IM_SIZE: 274
26 | BATCH_SIZE: 200
27 | NUM_GPUS: 8
28 | OUT_DIR: .
29 |
--------------------------------------------------------------------------------
/configs/dds_baselines/effnet/EN-B2_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: effnet
3 | NUM_CLASSES: 1000
4 | ACTIVATION_FUN: silu
5 | EN:
6 | STEM_W: 32
7 | STRIDES: [1, 2, 2, 2, 1, 2, 1]
8 | DEPTHS: [2, 3, 3, 4, 4, 5, 2]
9 | WIDTHS: [16, 24, 48, 88, 120, 208, 352]
10 | EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6]
11 | KERNELS: [3, 3, 5, 3, 5, 5, 3]
12 | HEAD_W: 1408
13 | OPTIM:
14 | LR_POLICY: cos
15 | BASE_LR: 0.4
16 | MAX_EPOCH: 100
17 | MOMENTUM: 0.9
18 | WEIGHT_DECAY: 1e-5
19 | TRAIN:
20 | DATASET: imagenet
21 | IM_SIZE: 260
22 | BATCH_SIZE: 256
23 | TEST:
24 | DATASET: imagenet
25 | IM_SIZE: 298
26 | BATCH_SIZE: 200
27 | NUM_GPUS: 8
28 | OUT_DIR: .
29 |
--------------------------------------------------------------------------------
/configs/dds_baselines/effnet/EN-B3_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: effnet
3 | NUM_CLASSES: 1000
4 | ACTIVATION_FUN: silu
5 | EN:
6 | STEM_W: 40
7 | STRIDES: [1, 2, 2, 2, 1, 2, 1]
8 | DEPTHS: [2, 3, 3, 5, 5, 6, 2]
9 | WIDTHS: [24, 32, 48, 96, 136, 232, 384]
10 | EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6]
11 | KERNELS: [3, 3, 5, 3, 5, 5, 3]
12 | HEAD_W: 1536
13 | OPTIM:
14 | LR_POLICY: cos
15 | BASE_LR: 0.4
16 | MAX_EPOCH: 100
17 | MOMENTUM: 0.9
18 | WEIGHT_DECAY: 1e-5
19 | TRAIN:
20 | DATASET: imagenet
21 | IM_SIZE: 300
22 | BATCH_SIZE: 256
23 | TEST:
24 | DATASET: imagenet
25 | IM_SIZE: 342
26 | BATCH_SIZE: 200
27 | NUM_GPUS: 8
28 | OUT_DIR: .
29 |
--------------------------------------------------------------------------------
/configs/dds_baselines/effnet/EN-B4_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: effnet
3 | NUM_CLASSES: 1000
4 | ACTIVATION_FUN: silu
5 | EN:
6 | STEM_W: 48
7 | STRIDES: [1, 2, 2, 2, 1, 2, 1]
8 | DEPTHS: [2, 4, 4, 6, 6, 8, 2]
9 | WIDTHS: [24, 32, 56, 112, 160, 272, 448]
10 | EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6]
11 | KERNELS: [3, 3, 5, 3, 5, 5, 3]
12 | HEAD_W: 1792
13 | OPTIM:
14 | LR_POLICY: cos
15 | BASE_LR: 0.2
16 | MAX_EPOCH: 100
17 | MOMENTUM: 0.9
18 | WEIGHT_DECAY: 1e-5
19 | TRAIN:
20 | DATASET: imagenet
21 | IM_SIZE: 380
22 | BATCH_SIZE: 128
23 | TEST:
24 | DATASET: imagenet
25 | IM_SIZE: 434
26 | BATCH_SIZE: 104
27 | NUM_GPUS: 8
28 | OUT_DIR: .
29 |
--------------------------------------------------------------------------------
/configs/dds_baselines/effnet/EN-B5_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: effnet
3 | NUM_CLASSES: 1000
4 | ACTIVATION_FUN: silu
5 | EN:
6 | STEM_W: 48
7 | STRIDES: [1, 2, 2, 2, 1, 2, 1]
8 | DEPTHS: [3, 5, 5, 7, 7, 9, 3]
9 | WIDTHS: [24, 40, 64, 128, 176, 304, 512]
10 | EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6]
11 | KERNELS: [3, 3, 5, 3, 5, 5, 3]
12 | HEAD_W: 2048
13 | OPTIM:
14 | LR_POLICY: cos
15 | BASE_LR: 0.1
16 | MAX_EPOCH: 100
17 | MOMENTUM: 0.9
18 | WEIGHT_DECAY: 1e-5
19 | TRAIN:
20 | DATASET: imagenet
21 | IM_SIZE: 456
22 | BATCH_SIZE: 64
23 | TEST:
24 | DATASET: imagenet
25 | IM_SIZE: 522
26 | BATCH_SIZE: 48
27 | NUM_GPUS: 8
28 | OUT_DIR: .
29 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnetx/RegNetX-1.6GF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | DEPTH: 18
6 | W0: 80
7 | WA: 34.01
8 | WM: 2.25
9 | GROUP_W: 24
10 | OPTIM:
11 | LR_POLICY: cos
12 | BASE_LR: 0.8
13 | MAX_EPOCH: 100
14 | MOMENTUM: 0.9
15 | WEIGHT_DECAY: 5e-5
16 | WARMUP_EPOCHS: 5
17 | TRAIN:
18 | DATASET: imagenet
19 | IM_SIZE: 224
20 | BATCH_SIZE: 1024
21 | TEST:
22 | DATASET: imagenet
23 | IM_SIZE: 256
24 | BATCH_SIZE: 800
25 | NUM_GPUS: 8
26 | OUT_DIR: .
27 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnetx/RegNetX-12GF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | DEPTH: 19
6 | W0: 168
7 | WA: 73.36
8 | WM: 2.37
9 | GROUP_W: 112
10 | OPTIM:
11 | LR_POLICY: cos
12 | BASE_LR: 0.4
13 | MAX_EPOCH: 100
14 | MOMENTUM: 0.9
15 | WEIGHT_DECAY: 5e-5
16 | WARMUP_EPOCHS: 5
17 | TRAIN:
18 | DATASET: imagenet
19 | IM_SIZE: 224
20 | BATCH_SIZE: 512
21 | TEST:
22 | DATASET: imagenet
23 | IM_SIZE: 256
24 | BATCH_SIZE: 400
25 | NUM_GPUS: 8
26 | OUT_DIR: .
27 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnetx/RegNetX-16GF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | DEPTH: 22
6 | W0: 216
7 | WA: 55.59
8 | WM: 2.1
9 | GROUP_W: 128
10 | OPTIM:
11 | LR_POLICY: cos
12 | BASE_LR: 0.4
13 | MAX_EPOCH: 100
14 | MOMENTUM: 0.9
15 | WEIGHT_DECAY: 5e-5
16 | WARMUP_EPOCHS: 5
17 | TRAIN:
18 | DATASET: imagenet
19 | IM_SIZE: 224
20 | BATCH_SIZE: 512
21 | TEST:
22 | DATASET: imagenet
23 | IM_SIZE: 256
24 | BATCH_SIZE: 400
25 | NUM_GPUS: 8
26 | OUT_DIR: .
27 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnetx/RegNetX-200MF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | DEPTH: 13
6 | W0: 24
7 | WA: 36.44
8 | WM: 2.49
9 | GROUP_W: 8
10 | OPTIM:
11 | LR_POLICY: cos
12 | BASE_LR: 0.8
13 | MAX_EPOCH: 100
14 | MOMENTUM: 0.9
15 | WEIGHT_DECAY: 5e-5
16 | WARMUP_EPOCHS: 5
17 | TRAIN:
18 | DATASET: imagenet
19 | IM_SIZE: 224
20 | BATCH_SIZE: 1024
21 | TEST:
22 | DATASET: imagenet
23 | IM_SIZE: 256
24 | BATCH_SIZE: 800
25 | NUM_GPUS: 8
26 | OUT_DIR: .
27 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnetx/RegNetX-3.2GF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | DEPTH: 25
6 | W0: 88
7 | WA: 26.31
8 | WM: 2.25
9 | GROUP_W: 48
10 | OPTIM:
11 | LR_POLICY: cos
12 | BASE_LR: 0.4
13 | MAX_EPOCH: 100
14 | MOMENTUM: 0.9
15 | WEIGHT_DECAY: 5e-5
16 | WARMUP_EPOCHS: 5
17 | TRAIN:
18 | DATASET: imagenet
19 | IM_SIZE: 224
20 | BATCH_SIZE: 512
21 | TEST:
22 | DATASET: imagenet
23 | IM_SIZE: 256
24 | BATCH_SIZE: 400
25 | NUM_GPUS: 8
26 | OUT_DIR: .
27 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnetx/RegNetX-32GF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | DEPTH: 23
6 | W0: 320
7 | WA: 69.86
8 | WM: 2.0
9 | GROUP_W: 168
10 | OPTIM:
11 | LR_POLICY: cos
12 | BASE_LR: 0.2
13 | MAX_EPOCH: 100
14 | MOMENTUM: 0.9
15 | WEIGHT_DECAY: 5e-5
16 | WARMUP_EPOCHS: 5
17 | TRAIN:
18 | DATASET: imagenet
19 | IM_SIZE: 224
20 | BATCH_SIZE: 256
21 | TEST:
22 | DATASET: imagenet
23 | IM_SIZE: 256
24 | BATCH_SIZE: 200
25 | NUM_GPUS: 8
26 | OUT_DIR: .
27 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnetx/RegNetX-4.0GF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | DEPTH: 23
6 | W0: 96
7 | WA: 38.65
8 | WM: 2.43
9 | GROUP_W: 40
10 | OPTIM:
11 | LR_POLICY: cos
12 | BASE_LR: 0.4
13 | MAX_EPOCH: 100
14 | MOMENTUM: 0.9
15 | WEIGHT_DECAY: 5e-5
16 | WARMUP_EPOCHS: 5
17 | TRAIN:
18 | DATASET: imagenet
19 | IM_SIZE: 224
20 | BATCH_SIZE: 512
21 | TEST:
22 | DATASET: imagenet
23 | IM_SIZE: 256
24 | BATCH_SIZE: 400
25 | NUM_GPUS: 8
26 | OUT_DIR: .
27 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnetx/RegNetX-400MF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | DEPTH: 22
6 | W0: 24
7 | WA: 24.48
8 | WM: 2.54
9 | GROUP_W: 16
10 | OPTIM:
11 | LR_POLICY: cos
12 | BASE_LR: 0.8
13 | MAX_EPOCH: 100
14 | MOMENTUM: 0.9
15 | WEIGHT_DECAY: 5e-5
16 | WARMUP_EPOCHS: 5
17 | TRAIN:
18 | DATASET: imagenet
19 | IM_SIZE: 224
20 | BATCH_SIZE: 1024
21 | TEST:
22 | DATASET: imagenet
23 | IM_SIZE: 256
24 | BATCH_SIZE: 800
25 | NUM_GPUS: 8
26 | OUT_DIR: .
27 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnetx/RegNetX-6.4GF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | DEPTH: 17
6 | W0: 184
7 | WA: 60.83
8 | WM: 2.07
9 | GROUP_W: 56
10 | OPTIM:
11 | LR_POLICY: cos
12 | BASE_LR: 0.4
13 | MAX_EPOCH: 100
14 | MOMENTUM: 0.9
15 | WEIGHT_DECAY: 5e-5
16 | WARMUP_EPOCHS: 5
17 | TRAIN:
18 | DATASET: imagenet
19 | IM_SIZE: 224
20 | BATCH_SIZE: 512
21 | TEST:
22 | DATASET: imagenet
23 | IM_SIZE: 256
24 | BATCH_SIZE: 400
25 | NUM_GPUS: 8
26 | OUT_DIR: .
27 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnetx/RegNetX-600MF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | DEPTH: 16
6 | W0: 48
7 | WA: 36.97
8 | WM: 2.24
9 | GROUP_W: 24
10 | OPTIM:
11 | LR_POLICY: cos
12 | BASE_LR: 0.8
13 | MAX_EPOCH: 100
14 | MOMENTUM: 0.9
15 | WEIGHT_DECAY: 5e-5
16 | WARMUP_EPOCHS: 5
17 | TRAIN:
18 | DATASET: imagenet
19 | IM_SIZE: 224
20 | BATCH_SIZE: 1024
21 | TEST:
22 | DATASET: imagenet
23 | IM_SIZE: 256
24 | BATCH_SIZE: 800
25 | NUM_GPUS: 8
26 | OUT_DIR: .
27 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnetx/RegNetX-8.0GF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | DEPTH: 23
6 | W0: 80
7 | WA: 49.56
8 | WM: 2.88
9 | GROUP_W: 120
10 | OPTIM:
11 | LR_POLICY: cos
12 | BASE_LR: 0.4
13 | MAX_EPOCH: 100
14 | MOMENTUM: 0.9
15 | WEIGHT_DECAY: 5e-5
16 | WARMUP_EPOCHS: 5
17 | TRAIN:
18 | DATASET: imagenet
19 | IM_SIZE: 224
20 | BATCH_SIZE: 512
21 | TEST:
22 | DATASET: imagenet
23 | IM_SIZE: 256
24 | BATCH_SIZE: 400
25 | NUM_GPUS: 8
26 | OUT_DIR: .
27 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnetx/RegNetX-800MF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | DEPTH: 16
6 | W0: 56
7 | WA: 35.73
8 | WM: 2.28
9 | GROUP_W: 16
10 | OPTIM:
11 | LR_POLICY: cos
12 | BASE_LR: 0.8
13 | MAX_EPOCH: 100
14 | MOMENTUM: 0.9
15 | WEIGHT_DECAY: 5e-5
16 | WARMUP_EPOCHS: 5
17 | TRAIN:
18 | DATASET: imagenet
19 | IM_SIZE: 224
20 | BATCH_SIZE: 1024
21 | TEST:
22 | DATASET: imagenet
23 | IM_SIZE: 256
24 | BATCH_SIZE: 800
25 | NUM_GPUS: 8
26 | OUT_DIR: .
27 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnety/RegNetY-1.6GF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | SE_ON: True
6 | DEPTH: 27
7 | W0: 48
8 | WA: 20.71
9 | WM: 2.65
10 | GROUP_W: 24
11 | OPTIM:
12 | LR_POLICY: cos
13 | BASE_LR: 0.8
14 | MAX_EPOCH: 100
15 | MOMENTUM: 0.9
16 | WEIGHT_DECAY: 5e-5
17 | WARMUP_EPOCHS: 5
18 | TRAIN:
19 | DATASET: imagenet
20 | IM_SIZE: 224
21 | BATCH_SIZE: 1024
22 | TEST:
23 | DATASET: imagenet
24 | IM_SIZE: 256
25 | BATCH_SIZE: 800
26 | NUM_GPUS: 8
27 | OUT_DIR: .
28 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnety/RegNetY-12GF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | SE_ON: True
6 | DEPTH: 19
7 | W0: 168
8 | WA: 73.36
9 | WM: 2.37
10 | GROUP_W: 112
11 | OPTIM:
12 | LR_POLICY: cos
13 | BASE_LR: 0.4
14 | MAX_EPOCH: 100
15 | MOMENTUM: 0.9
16 | WEIGHT_DECAY: 5e-5
17 | WARMUP_EPOCHS: 5
18 | TRAIN:
19 | DATASET: imagenet
20 | IM_SIZE: 224
21 | BATCH_SIZE: 512
22 | TEST:
23 | DATASET: imagenet
24 | IM_SIZE: 256
25 | BATCH_SIZE: 400
26 | NUM_GPUS: 8
27 | OUT_DIR: .
28 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnety/RegNetY-16GF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | SE_ON: True
6 | DEPTH: 18
7 | W0: 200
8 | WA: 106.23
9 | WM: 2.48
10 | GROUP_W: 112
11 | OPTIM:
12 | LR_POLICY: cos
13 | BASE_LR: 0.2
14 | MAX_EPOCH: 100
15 | MOMENTUM: 0.9
16 | WEIGHT_DECAY: 5e-5
17 | WARMUP_EPOCHS: 5
18 | TRAIN:
19 | DATASET: imagenet
20 | IM_SIZE: 224
21 | BATCH_SIZE: 256
22 | TEST:
23 | DATASET: imagenet
24 | IM_SIZE: 256
25 | BATCH_SIZE: 200
26 | NUM_GPUS: 8
27 | OUT_DIR: .
28 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnety/RegNetY-200MF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | SE_ON: True
6 | DEPTH: 13
7 | W0: 24
8 | WA: 36.44
9 | WM: 2.49
10 | GROUP_W: 8
11 | OPTIM:
12 | LR_POLICY: cos
13 | BASE_LR: 0.8
14 | MAX_EPOCH: 100
15 | MOMENTUM: 0.9
16 | WEIGHT_DECAY: 5e-5
17 | WARMUP_EPOCHS: 5
18 | TRAIN:
19 | DATASET: imagenet
20 | IM_SIZE: 224
21 | BATCH_SIZE: 1024
22 | TEST:
23 | DATASET: imagenet
24 | IM_SIZE: 256
25 | BATCH_SIZE: 800
26 | NUM_GPUS: 8
27 | OUT_DIR: .
28 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnety/RegNetY-3.2GF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | SE_ON: True
6 | DEPTH: 21
7 | W0: 80
8 | WA: 42.63
9 | WM: 2.66
10 | GROUP_W: 24
11 | OPTIM:
12 | LR_POLICY: cos
13 | BASE_LR: 0.4
14 | MAX_EPOCH: 100
15 | MOMENTUM: 0.9
16 | WEIGHT_DECAY: 5e-5
17 | WARMUP_EPOCHS: 5
18 | TRAIN:
19 | DATASET: imagenet
20 | IM_SIZE: 224
21 | BATCH_SIZE: 512
22 | TEST:
23 | DATASET: imagenet
24 | IM_SIZE: 256
25 | BATCH_SIZE: 400
26 | NUM_GPUS: 8
27 | OUT_DIR: .
28 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnety/RegNetY-32GF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | SE_ON: True
6 | DEPTH: 20
7 | W0: 232
8 | WA: 115.89
9 | WM: 2.53
10 | GROUP_W: 232
11 | OPTIM:
12 | LR_POLICY: cos
13 | BASE_LR: 0.2
14 | MAX_EPOCH: 100
15 | MOMENTUM: 0.9
16 | WEIGHT_DECAY: 5e-5
17 | WARMUP_EPOCHS: 5
18 | TRAIN:
19 | DATASET: imagenet
20 | IM_SIZE: 224
21 | BATCH_SIZE: 256
22 | TEST:
23 | DATASET: imagenet
24 | IM_SIZE: 256
25 | BATCH_SIZE: 200
26 | NUM_GPUS: 8
27 | OUT_DIR: .
28 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnety/RegNetY-4.0GF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | SE_ON: True
6 | DEPTH: 22
7 | W0: 96
8 | WA: 31.41
9 | WM: 2.24
10 | GROUP_W: 64
11 | OPTIM:
12 | LR_POLICY: cos
13 | BASE_LR: 0.4
14 | MAX_EPOCH: 100
15 | MOMENTUM: 0.9
16 | WEIGHT_DECAY: 5e-5
17 | WARMUP_EPOCHS: 5
18 | TRAIN:
19 | DATASET: imagenet
20 | IM_SIZE: 224
21 | BATCH_SIZE: 512
22 | TEST:
23 | DATASET: imagenet
24 | IM_SIZE: 256
25 | BATCH_SIZE: 400
26 | NUM_GPUS: 8
27 | OUT_DIR: .
28 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnety/RegNetY-400MF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | SE_ON: True
6 | DEPTH: 16
7 | W0: 48
8 | WA: 27.89
9 | WM: 2.09
10 | GROUP_W: 8
11 | OPTIM:
12 | LR_POLICY: cos
13 | BASE_LR: 0.8
14 | MAX_EPOCH: 100
15 | MOMENTUM: 0.9
16 | WEIGHT_DECAY: 5e-5
17 | WARMUP_EPOCHS: 5
18 | TRAIN:
19 | DATASET: imagenet
20 | IM_SIZE: 224
21 | BATCH_SIZE: 1024
22 | TEST:
23 | DATASET: imagenet
24 | IM_SIZE: 256
25 | BATCH_SIZE: 800
26 | NUM_GPUS: 8
27 | OUT_DIR: .
28 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnety/RegNetY-6.4GF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | SE_ON: True
6 | DEPTH: 25
7 | W0: 112
8 | WA: 33.22
9 | WM: 2.27
10 | GROUP_W: 72
11 | OPTIM:
12 | LR_POLICY: cos
13 | BASE_LR: 0.4
14 | MAX_EPOCH: 100
15 | MOMENTUM: 0.9
16 | WEIGHT_DECAY: 5e-5
17 | WARMUP_EPOCHS: 5
18 | TRAIN:
19 | DATASET: imagenet
20 | IM_SIZE: 224
21 | BATCH_SIZE: 512
22 | TEST:
23 | DATASET: imagenet
24 | IM_SIZE: 256
25 | BATCH_SIZE: 400
26 | NUM_GPUS: 8
27 | OUT_DIR: .
28 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnety/RegNetY-600MF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | SE_ON: True
6 | DEPTH: 15
7 | W0: 48
8 | WA: 32.54
9 | WM: 2.32
10 | GROUP_W: 16
11 | OPTIM:
12 | LR_POLICY: cos
13 | BASE_LR: 0.8
14 | MAX_EPOCH: 100
15 | MOMENTUM: 0.9
16 | WEIGHT_DECAY: 5e-5
17 | WARMUP_EPOCHS: 5
18 | TRAIN:
19 | DATASET: imagenet
20 | IM_SIZE: 224
21 | BATCH_SIZE: 1024
22 | TEST:
23 | DATASET: imagenet
24 | IM_SIZE: 256
25 | BATCH_SIZE: 800
26 | NUM_GPUS: 8
27 | OUT_DIR: .
28 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnety/RegNetY-8.0GF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | SE_ON: true
6 | DEPTH: 17
7 | W0: 192
8 | WA: 76.82
9 | WM: 2.19
10 | GROUP_W: 56
11 | OPTIM:
12 | LR_POLICY: cos
13 | BASE_LR: 0.4
14 | MAX_EPOCH: 100
15 | MOMENTUM: 0.9
16 | WEIGHT_DECAY: 5e-5
17 | WARMUP_EPOCHS: 5
18 | TRAIN:
19 | DATASET: imagenet
20 | IM_SIZE: 224
21 | BATCH_SIZE: 512
22 | TEST:
23 | DATASET: imagenet
24 | IM_SIZE: 256
25 | BATCH_SIZE: 400
26 | NUM_GPUS: 8
27 | OUT_DIR: .
28 |
--------------------------------------------------------------------------------
/configs/dds_baselines/regnety/RegNetY-800MF_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: regnet
3 | NUM_CLASSES: 1000
4 | REGNET:
5 | SE_ON: True
6 | DEPTH: 14
7 | W0: 56
8 | WA: 38.84
9 | WM: 2.4
10 | GROUP_W: 16
11 | OPTIM:
12 | LR_POLICY: cos
13 | BASE_LR: 0.8
14 | MAX_EPOCH: 100
15 | MOMENTUM: 0.9
16 | WEIGHT_DECAY: 5e-5
17 | WARMUP_EPOCHS: 5
18 | TRAIN:
19 | DATASET: imagenet
20 | IM_SIZE: 224
21 | BATCH_SIZE: 1024
22 | TEST:
23 | DATASET: imagenet
24 | IM_SIZE: 256
25 | BATCH_SIZE: 800
26 | NUM_GPUS: 8
27 | OUT_DIR: .
28 |
--------------------------------------------------------------------------------
/configs/dds_baselines/resnet/R-101-1x64d_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: anynet
3 | NUM_CLASSES: 1000
4 | ANYNET:
5 | STEM_TYPE: res_stem_in
6 | STEM_W: 64
7 | BLOCK_TYPE: res_bottleneck_block
8 | STRIDES: [1, 2, 2, 2]
9 | DEPTHS: [3, 4, 23, 3]
10 | WIDTHS: [256, 512, 1024, 2048]
11 | BOT_MULS: [0.25, 0.25, 0.25, 0.25]
12 | GROUP_WS: [64, 128, 256, 512]
13 | OPTIM:
14 | LR_POLICY: cos
15 | BASE_LR: 0.2
16 | MAX_EPOCH: 100
17 | MOMENTUM: 0.9
18 | WEIGHT_DECAY: 5e-5
19 | TRAIN:
20 | DATASET: imagenet
21 | IM_SIZE: 224
22 | BATCH_SIZE: 256
23 | TEST:
24 | DATASET: imagenet
25 | IM_SIZE: 256
26 | BATCH_SIZE: 200
27 | NUM_GPUS: 8
28 | OUT_DIR: .
29 |
--------------------------------------------------------------------------------
/configs/dds_baselines/resnet/R-152-1x64d_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: anynet
3 | NUM_CLASSES: 1000
4 | ANYNET:
5 | STEM_TYPE: res_stem_in
6 | STEM_W: 64
7 | BLOCK_TYPE: res_bottleneck_block
8 | STRIDES: [1, 2, 2, 2]
9 | DEPTHS: [3, 8, 36, 3]
10 | WIDTHS: [256, 512, 1024, 2048]
11 | BOT_MULS: [0.25, 0.25, 0.25, 0.25]
12 | GROUP_WS: [64, 128, 256, 512]
13 | OPTIM:
14 | LR_POLICY: cos
15 | BASE_LR: 0.2
16 | MAX_EPOCH: 100
17 | MOMENTUM: 0.9
18 | WEIGHT_DECAY: 5e-5
19 | TRAIN:
20 | DATASET: imagenet
21 | IM_SIZE: 224
22 | BATCH_SIZE: 256
23 | TEST:
24 | DATASET: imagenet
25 | IM_SIZE: 256
26 | BATCH_SIZE: 200
27 | NUM_GPUS: 8
28 | OUT_DIR: .
29 |
--------------------------------------------------------------------------------
/configs/dds_baselines/resnet/R-50-1x64d_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: anynet
3 | NUM_CLASSES: 1000
4 | ANYNET:
5 | STEM_TYPE: res_stem_in
6 | STEM_W: 64
7 | BLOCK_TYPE: res_bottleneck_block
8 | STRIDES: [1, 2, 2, 2]
9 | DEPTHS: [3, 4, 6, 3]
10 | WIDTHS: [256, 512, 1024, 2048]
11 | BOT_MULS: [0.25, 0.25, 0.25, 0.25]
12 | GROUP_WS: [64, 128, 256, 512]
13 | OPTIM:
14 | LR_POLICY: cos
15 | BASE_LR: 0.2
16 | MAX_EPOCH: 100
17 | MOMENTUM: 0.9
18 | WEIGHT_DECAY: 5e-5
19 | TRAIN:
20 | DATASET: imagenet
21 | IM_SIZE: 224
22 | BATCH_SIZE: 256
23 | TEST:
24 | DATASET: imagenet
25 | IM_SIZE: 256
26 | BATCH_SIZE: 200
27 | NUM_GPUS: 8
28 | OUT_DIR: .
29 |
--------------------------------------------------------------------------------
/configs/dds_baselines/resnext/X-101-32x4d_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: anynet
3 | NUM_CLASSES: 1000
4 | ANYNET:
5 | STEM_TYPE: res_stem_in
6 | STEM_W: 64
7 | BLOCK_TYPE: res_bottleneck_block
8 | STRIDES: [1, 2, 2, 2]
9 | DEPTHS: [3, 4, 23, 3]
10 | WIDTHS: [256, 512, 1024, 2048]
11 | BOT_MULS: [0.5, 0.5, 0.5, 0.5]
12 | GROUP_WS: [4, 8, 16, 32]
13 | OPTIM:
14 | LR_POLICY: cos
15 | BASE_LR: 0.2
16 | MAX_EPOCH: 100
17 | MOMENTUM: 0.9
18 | WEIGHT_DECAY: 5e-5
19 | TRAIN:
20 | DATASET: imagenet
21 | IM_SIZE: 224
22 | BATCH_SIZE: 256
23 | TEST:
24 | DATASET: imagenet
25 | IM_SIZE: 256
26 | BATCH_SIZE: 200
27 | NUM_GPUS: 8
28 | OUT_DIR: .
29 |
--------------------------------------------------------------------------------
/configs/dds_baselines/resnext/X-152-32x4d_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: anynet
3 | NUM_CLASSES: 1000
4 | ANYNET:
5 | STEM_TYPE: res_stem_in
6 | STEM_W: 64
7 | BLOCK_TYPE: res_bottleneck_block
8 | STRIDES: [1, 2, 2, 2]
9 | DEPTHS: [3, 8, 36, 3]
10 | WIDTHS: [256, 512, 1024, 2048]
11 | BOT_MULS: [0.5, 0.5, 0.5, 0.5]
12 | GROUP_WS: [4, 8, 16, 32]
13 | OPTIM:
14 | LR_POLICY: cos
15 | BASE_LR: 0.2
16 | MAX_EPOCH: 100
17 | MOMENTUM: 0.9
18 | WEIGHT_DECAY: 5e-5
19 | TRAIN:
20 | DATASET: imagenet
21 | IM_SIZE: 224
22 | BATCH_SIZE: 256
23 | TEST:
24 | DATASET: imagenet
25 | IM_SIZE: 256
26 | BATCH_SIZE: 200
27 | NUM_GPUS: 8
28 | OUT_DIR: .
29 |
--------------------------------------------------------------------------------
/configs/dds_baselines/resnext/X-50-32x4d_dds_8gpu.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: anynet
3 | NUM_CLASSES: 1000
4 | ANYNET:
5 | STEM_TYPE: res_stem_in
6 | STEM_W: 64
7 | BLOCK_TYPE: res_bottleneck_block
8 | STRIDES: [1, 2, 2, 2]
9 | DEPTHS: [3, 4, 6, 3]
10 | WIDTHS: [256, 512, 1024, 2048]
11 | BOT_MULS: [0.5, 0.5, 0.5, 0.5]
12 | GROUP_WS: [4, 8, 16, 32]
13 | OPTIM:
14 | LR_POLICY: cos
15 | BASE_LR: 0.2
16 | MAX_EPOCH: 100
17 | MOMENTUM: 0.9
18 | WEIGHT_DECAY: 5e-5
19 | TRAIN:
20 | DATASET: imagenet
21 | IM_SIZE: 224
22 | BATCH_SIZE: 256
23 | TEST:
24 | DATASET: imagenet
25 | IM_SIZE: 256
26 | BATCH_SIZE: 200
27 | NUM_GPUS: 8
28 | OUT_DIR: .
29 |
--------------------------------------------------------------------------------
/configs/examples/ffcv.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: anynet
3 | NUM_CLASSES: 1000
4 | ANYNET:
5 | STEM_TYPE: res_stem_in
6 | STEM_W: 64
7 | BLOCK_TYPE: res_bottleneck_block
8 | STRIDES: [1, 2, 2, 2]
9 | DEPTHS: [3, 4, 6, 3]
10 | WIDTHS: [256, 512, 1024, 2048]
11 | BOT_MULS: [0.25, 0.25, 0.25, 0.25]
12 | GROUP_WS: [64, 128, 256, 512]
13 | OPTIM:
14 | LR_POLICY: cos
15 | BASE_LR: 0.2
16 | MAX_EPOCH: 100
17 | MOMENTUM: 0.9
18 | WEIGHT_DECAY: 5e-5
19 | TRAIN:
20 | DATASET: imagenet
21 | IM_SIZE: 224
22 | BATCH_SIZE: 256
23 | TEST:
24 | DATASET: imagenet
25 | IM_SIZE: 256
26 | BATCH_SIZE: 200
27 | DATA_LOADER:
28 | MODE: ffcv
29 | NUM_GPUS: 8
30 | OUT_DIR: .
31 | LAUNCH:
32 | GPU_TYPE: "volta32gb"
33 | MODE: "local"
34 |
--------------------------------------------------------------------------------
/configs/fsdp/example.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: vit
3 | NUM_CLASSES: 1000
4 | VIT:
5 | PATCH_SIZE: 16
6 | STEM_TYPE: "patchify"
7 | NUM_LAYERS: 32
8 | NUM_HEADS: 16
9 | HIDDEN_DIM: 1280
10 | MLP_DIM: 5120
11 | CLASSIFIER_TYPE: "pooled"
12 | BN:
13 | USE_PRECISE_STATS: False
14 | OPTIM:
15 | OPTIMIZER: adamw
16 | LR_POLICY: cos
17 | BASE_LR: 0.001
18 | MIN_LR: 0.005
19 | MAX_EPOCH: 100
20 | WEIGHT_DECAY: 0.24
21 | WARMUP_EPOCHS: 5
22 | EMA_ALPHA: 1.0e-5
23 | EMA_UPDATE_PERIOD: 32
24 | BIAS_USE_CUSTOM_WEIGHT_DECAY: True
25 | BIAS_CUSTOM_WEIGHT_DECAY: 0.
26 | MTA: True
27 | LN:
28 | EPS: 1e-6
29 | USE_CUSTOM_WEIGHT_DECAY: True
30 | CUSTOM_WEIGHT_DECAY: 0.
31 | TRAIN:
32 | DATASET: imagenet
33 | IM_SIZE: 224
34 | BATCH_SIZE: 256
35 | MIXUP_ALPHA: 0.8
36 | CUTMIX_ALPHA: 1.0
37 | LABEL_SMOOTHING: 0.1
38 | AUGMENT: AutoAugment
39 | MIXED_PRECISION: True
40 | TEST:
41 | DATASET: imagenet
42 | IM_SIZE: 224
43 | BATCH_SIZE: 256
44 | DATA_LOADER:
45 | NUM_WORKERS: 10
46 | LOG_PERIOD: 100
47 | NUM_GPUS: 8
48 | LAUNCH:
49 | GPU_TYPE: "volta32gb"
50 | MODE: "local"
51 | FSDP:
52 | ENABLED: True
53 | RESHARD_AFTER_FW: False
54 | LAYER_NORM_FP32: True
55 |
--------------------------------------------------------------------------------
/configs/sweeps/cifar/cifar_best.yaml:
--------------------------------------------------------------------------------
1 | DESC:
2 | Example CIFAR sweep 3 of 3 (trains the best model from cifar_regnet sweep).
3 | Train the best RegNet-125M from cifar_regnet sweep for variable epoch lengths.
4 | Trains 3 copies of every model (to obtain mean and std of the error).
5 | The purpose of this sweep is to show how to train FINAL version of a model.
6 | NAME: cifar/cifar_best
7 | SETUP:
8 | # Number of configs to sample
9 | NUM_CONFIGS: 12
10 | # SAMPLERS for optimization parameters
11 | SAMPLERS:
12 | OPTIM.MAX_EPOCH:
13 | TYPE: value_sampler
14 | VALUES: [50, 100, 200, 400]
15 | RNG_SEED:
16 | TYPE: int_sampler
17 | RAND_TYPE: uniform
18 | RANGE: [1, 3]
19 | QUANTIZE: 1
20 | CONSTRAINTS:
21 | REGNET:
22 | NUM_STAGES: [2, 2]
23 | # BASE_CFG is RegNet-125MF (best model from cifar_regnet sweep)
24 | BASE_CFG:
25 | MODEL:
26 | TYPE: regnet
27 | NUM_CLASSES: 10
28 | REGNET:
29 | STEM_TYPE: res_stem_cifar
30 | SE_ON: True
31 | STEM_W: 16
32 | DEPTH: 12
33 | W0: 96
34 | WA: 19.5
35 | WM: 2.942
36 | GROUP_W: 8
37 | OPTIM:
38 | BASE_LR: 1.0
39 | LR_POLICY: cos
40 | MAX_EPOCH: 50
41 | MOMENTUM: 0.9
42 | NESTEROV: True
43 | WARMUP_EPOCHS: 5
44 | WEIGHT_DECAY: 0.0005
45 | EMA_ALPHA: 0.00025
46 | EMA_UPDATE_PERIOD: 32
47 | BN:
48 | USE_CUSTOM_WEIGHT_DECAY: True
49 | TRAIN:
50 | DATASET: cifar10
51 | SPLIT: train
52 | BATCH_SIZE: 1024
53 | IM_SIZE: 32
54 | MIXED_PRECISION: True
55 | LABEL_SMOOTHING: 0.1
56 | MIXUP_ALPHA: 0.5
57 | TEST:
58 | DATASET: cifar10
59 | SPLIT: test
60 | BATCH_SIZE: 1000
61 | IM_SIZE: 32
62 | NUM_GPUS: 1
63 | DATA_LOADER:
64 | NUM_WORKERS: 4
65 | LOG_PERIOD: 25
66 | VERBOSE: False
67 | # Launch config options
68 | LAUNCH:
69 | PARTITION: devlab
70 | NUM_GPUS: 1
71 | PARALLEL_JOBS: 12
72 | TIME_LIMIT: 180
73 | # Analyze config options
74 | ANALYZE:
75 | PLOT_METRIC_VALUES: False
76 | PLOT_COMPLEXITY_VALUES: False
77 | PLOT_CURVES_BEST: 3
78 | PLOT_CURVES_WORST: 0
79 | PLOT_MODELS_BEST: 1
80 | METRICS: []
81 | COMPLEXITY: [flops, params, acts, memory, epoch_fw_bw, epoch_time]
82 | PRE_FILTERS: {done: [0, 1, 1]}
83 | SPLIT_FILTERS:
84 | epochs=050: {cfg.OPTIM.MAX_EPOCH: [ 50, 50, 50]}
85 | epochs=100: {cfg.OPTIM.MAX_EPOCH: [100, 100, 100]}
86 | epochs=200: {cfg.OPTIM.MAX_EPOCH: [200, 200, 200]}
87 | epochs=400: {cfg.OPTIM.MAX_EPOCH: [400, 400, 400]}
88 |
--------------------------------------------------------------------------------
/configs/sweeps/cifar/cifar_optim.yaml:
--------------------------------------------------------------------------------
1 | DESC:
2 | Example CIFAR sweep 1 of 3 (find lr and wd for cifar_regnet and cifar_best sweeps).
3 | Tunes the learning rate (lr) and weight decay (wd) for ResNet-56 at 50 epochs.
4 | The purpose of this sweep is to show how to optimize OPTIM parameters.
5 | NAME: cifar/cifar_optim
6 | SETUP:
7 | # Number of configs to sample
8 | NUM_CONFIGS: 64
9 | # SAMPLERS for optimization parameters
10 | SAMPLERS:
11 | OPTIM.BASE_LR:
12 | TYPE: float_sampler
13 | RAND_TYPE: log_uniform
14 | RANGE: [0.25, 5.0]
15 | QUANTIZE: 1.0e-10
16 | OPTIM.WEIGHT_DECAY:
17 | TYPE: float_sampler
18 | RAND_TYPE: log_uniform
19 | RANGE: [5.0e-5, 1.0e-3]
20 | QUANTIZE: 1.0e-10
21 | # BASE_CFG is R-56 with large batch size and stronger augmentation
22 | BASE_CFG:
23 | MODEL:
24 | TYPE: anynet
25 | NUM_CLASSES: 10
26 | ANYNET:
27 | STEM_TYPE: res_stem_cifar
28 | STEM_W: 16
29 | BLOCK_TYPE: res_basic_block
30 | DEPTHS: [9, 9, 9]
31 | WIDTHS: [16, 32, 64]
32 | STRIDES: [1, 2, 2]
33 | OPTIM:
34 | BASE_LR: 1.0
35 | LR_POLICY: cos
36 | MAX_EPOCH: 50
37 | MOMENTUM: 0.9
38 | NESTEROV: True
39 | WARMUP_EPOCHS: 5
40 | WEIGHT_DECAY: 0.0005
41 | EMA_ALPHA: 0.00025
42 | EMA_UPDATE_PERIOD: 32
43 | BN:
44 | USE_CUSTOM_WEIGHT_DECAY: True
45 | TRAIN:
46 | DATASET: cifar10
47 | SPLIT: train
48 | BATCH_SIZE: 1024
49 | IM_SIZE: 32
50 | MIXED_PRECISION: True
51 | LABEL_SMOOTHING: 0.1
52 | MIXUP_ALPHA: 0.5
53 | TEST:
54 | DATASET: cifar10
55 | SPLIT: test
56 | BATCH_SIZE: 1000
57 | IM_SIZE: 32
58 | NUM_GPUS: 1
59 | DATA_LOADER:
60 | NUM_WORKERS: 4
61 | LOG_PERIOD: 25
62 | VERBOSE: False
63 | # Launch config options
64 | LAUNCH:
65 | PARTITION: devlab
66 | NUM_GPUS: 1
67 | PARALLEL_JOBS: 32
68 | TIME_LIMIT: 60
69 | # Analyze config options
70 | ANALYZE:
71 | PLOT_CURVES_BEST: 3
72 | PLOT_METRIC_VALUES: True
73 | PLOT_COMPLEXITY_VALUES: True
74 | METRICS: [lr, wd, lr_wd]
75 | COMPLEXITY: [flops, params, acts, memory, epoch_fw_bw, epoch_time]
76 | PRE_FILTERS: {done: [1, 1, 1]}
77 |
--------------------------------------------------------------------------------
/configs/sweeps/cifar/cifar_regnet.yaml:
--------------------------------------------------------------------------------
1 | DESC:
2 | Example CIFAR sweep 2 of 3 (uses lr and wd found by cifar_optim sweep).
3 | This sweep searches for a good RegNet-125MF model on cifar (same flops as R56).
4 | The purpose of this sweep is to show how to optimize REGNET parameters.
5 | NAME: cifar/cifar_regnet
6 | SETUP:
7 | # Number of configs to sample
8 | NUM_CONFIGS: 32
9 | # SAMPLER for RegNet
10 | SAMPLERS:
11 | REGNET:
12 | TYPE: regnet_sampler
13 | DEPTH: [6, 16]
14 | GROUP_W: [1, 32]
15 | # CONSTRAINTS for complexity (roughly based on R-56)
16 | CONSTRAINTS:
17 | CX:
18 | FLOPS: [0.12e+9, 0.13e+9]
19 | PARAMS: [0, 2.0e+6]
20 | ACTS: [0, 1.0e+6]
21 | REGNET:
22 | NUM_STAGES: [2, 2]
23 | # BASE_CFG is R-56 with large batch size and stronger augmentation
24 | BASE_CFG:
25 | MODEL:
26 | TYPE: regnet
27 | NUM_CLASSES: 10
28 | REGNET:
29 | STEM_TYPE: res_stem_cifar
30 | SE_ON: True
31 | STEM_W: 16
32 | OPTIM:
33 | BASE_LR: 1.0
34 | LR_POLICY: cos
35 | MAX_EPOCH: 50
36 | MOMENTUM: 0.9
37 | NESTEROV: True
38 | WARMUP_EPOCHS: 5
39 | WEIGHT_DECAY: 0.0005
40 | EMA_ALPHA: 0.00025
41 | EMA_UPDATE_PERIOD: 32
42 | BN:
43 | USE_CUSTOM_WEIGHT_DECAY: True
44 | TRAIN:
45 | DATASET: cifar10
46 | SPLIT: train
47 | BATCH_SIZE: 1024
48 | IM_SIZE: 32
49 | MIXED_PRECISION: True
50 | LABEL_SMOOTHING: 0.1
51 | MIXUP_ALPHA: 0.5
52 | TEST:
53 | DATASET: cifar10
54 | SPLIT: test
55 | BATCH_SIZE: 1000
56 | IM_SIZE: 32
57 | NUM_GPUS: 1
58 | DATA_LOADER:
59 | NUM_WORKERS: 4
60 | LOG_PERIOD: 25
61 | VERBOSE: False
62 | # Launch config options
63 | LAUNCH:
64 | PARTITION: devlab
65 | NUM_GPUS: 1
66 | PARALLEL_JOBS: 32
67 | TIME_LIMIT: 60
68 | # Analyze config options
69 | ANALYZE:
70 | PLOT_METRIC_VALUES: True
71 | PLOT_COMPLEXITY_VALUES: True
72 | PLOT_CURVES_BEST: 3
73 | PLOT_CURVES_WORST: 0
74 | PLOT_MODELS_BEST: 8
75 | PLOT_MODELS_WORST: 0
76 | METRICS: [regnet_depth, regnet_w0, regnet_wa, regnet_wm, regnet_gw]
77 | COMPLEXITY: [flops, params, acts, memory, epoch_fw_bw, epoch_time]
78 | PRE_FILTERS: {done: [0, 1, 1]}
79 |
--------------------------------------------------------------------------------
/dev/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = E203, E266, E501, W503, E221
3 | max-line-length = 88
4 | max-complexity = 18
5 | select = B,C,E,F,W,T4,B9
6 | exclude = build,__init__.py,pycls/datasets/data
7 |
--------------------------------------------------------------------------------
/dev/.isort.cfg:
--------------------------------------------------------------------------------
1 | [isort]
2 | line_length = 88
3 | multi_line_output = 3
4 | include_trailing_comma = True
5 | force_grid_warp = 0
6 | default_section = THIRDPARTY
7 | lines_after_imports = 2
8 | combine_as_imports = True
9 | skip=pycls/datasets/data
10 | # Using force_alphabetical_sort_within_sections to match other Meta codebase
11 | # convention.
12 | force_alphabetical_sort_within_sections = True
13 |
--------------------------------------------------------------------------------
/dev/linter.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -ev
2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
3 |
4 | # Run this script at project root by "./dev/linter.sh" before you commit.
5 |
6 | {
7 | black --version | grep "19.3b0" > /dev/null
8 | } || {
9 | echo "Linter requires black==19.3b0 !"
10 | exit 1
11 | }
12 |
13 | echo "Running isort..."
14 | isort -y -sp ./dev
15 |
16 | echo "Running black..."
17 | black . --exclude pycls/datasets/data/
18 |
19 | echo "Running flake8..."
20 | if [ -x "$(command -v flake8)" ]; then
21 | flake8 . --config ./dev/.flake8
22 | else
23 | python3 -m flake8 . --config ./dev/.flake8
24 | fi
25 |
26 | command -v arc > /dev/null && {
27 | echo "Running arc lint ..."
28 | arc lint
29 | }
30 |
--------------------------------------------------------------------------------
/dev/model_error.json:
--------------------------------------------------------------------------------
1 | {
2 | "EfficientNet-B0": {
3 | "top1_err": 24.92,
4 | "top5_err": 7.814
5 | },
6 | "EfficientNet-B1": {
7 | "top1_err": 24.068,
8 | "top5_err": 7.228
9 | },
10 | "EfficientNet-B2": {
11 | "top1_err": 23.518,
12 | "top5_err": 6.968
13 | },
14 | "EfficientNet-B3": {
15 | "top1_err": 22.47,
16 | "top5_err": 6.514
17 | },
18 | "EfficientNet-B4": {
19 | "top1_err": 21.374,
20 | "top5_err": 5.808
21 | },
22 | "EfficientNet-B5": {
23 | "top1_err": 21.674,
24 | "top5_err": 6.238
25 | },
26 | "RegNetX-1.6GF": {
27 | "top1_err": 23.038,
28 | "top5_err": 6.484
29 | },
30 | "RegNetX-12GF": {
31 | "top1_err": 20.288,
32 | "top5_err": 5.284
33 | },
34 | "RegNetX-16GF": {
35 | "top1_err": 19.956,
36 | "top5_err": 5.022
37 | },
38 | "RegNetX-200MF": {
39 | "top1_err": 31.088,
40 | "top5_err": 11.342
41 | },
42 | "RegNetX-3.2GF": {
43 | "top1_err": 21.736,
44 | "top5_err": 5.862
45 | },
46 | "RegNetX-32GF": {
47 | "top1_err": 19.498,
48 | "top5_err": 4.878
49 | },
50 | "RegNetX-4.0GF": {
51 | "top1_err": 21.356,
52 | "top5_err": 5.736
53 | },
54 | "RegNetX-400MF": {
55 | "top1_err": 27.382,
56 | "top5_err": 9.076
57 | },
58 | "RegNetX-6.4GF": {
59 | "top1_err": 20.844,
60 | "top5_err": 5.434
61 | },
62 | "RegNetX-600MF": {
63 | "top1_err": 25.864,
64 | "top5_err": 8.198
65 | },
66 | "RegNetX-8.0GF": {
67 | "top1_err": 20.728,
68 | "top5_err": 5.42
69 | },
70 | "RegNetX-800MF": {
71 | "top1_err": 24.846,
72 | "top5_err": 7.592
73 | },
74 | "RegNetY-1.6GF": {
75 | "top1_err": 22.066,
76 | "top5_err": 6.134
77 | },
78 | "RegNetY-12GF": {
79 | "top1_err": 19.67,
80 | "top5_err": 4.856
81 | },
82 | "RegNetY-16GF": {
83 | "top1_err": 19.608,
84 | "top5_err": 4.944
85 | },
86 | "RegNetY-200MF": {
87 | "top1_err": 29.658,
88 | "top5_err": 10.378
89 | },
90 | "RegNetY-3.2GF": {
91 | "top1_err": 21.074,
92 | "top5_err": 5.454
93 | },
94 | "RegNetY-32GF": {
95 | "top1_err": 19.066,
96 | "top5_err": 4.568
97 | },
98 | "RegNetY-4.0GF": {
99 | "top1_err": 20.64,
100 | "top5_err": 5.336
101 | },
102 | "RegNetY-400MF": {
103 | "top1_err": 25.898,
104 | "top5_err": 8.07
105 | },
106 | "RegNetY-6.4GF": {
107 | "top1_err": 20.11,
108 | "top5_err": 5.1
109 | },
110 | "RegNetY-600MF": {
111 | "top1_err": 24.54,
112 | "top5_err": 7.28
113 | },
114 | "RegNetY-8.0GF": {
115 | "top1_err": 20.052,
116 | "top5_err": 5.06
117 | },
118 | "RegNetY-800MF": {
119 | "top1_err": 23.684,
120 | "top5_err": 6.846
121 | },
122 | "ResNeXt-101": {
123 | "top1_err": 20.664,
124 | "top5_err": 5.392
125 | },
126 | "ResNeXt-152": {
127 | "top1_err": 20.386,
128 | "top5_err": 5.318
129 | },
130 | "ResNeXt-50": {
131 | "top1_err": 21.884,
132 | "top5_err": 6.062
133 | },
134 | "ResNet-101": {
135 | "top1_err": 21.414,
136 | "top5_err": 5.792
137 | },
138 | "ResNet-152": {
139 | "top1_err": 20.87,
140 | "top5_err": 5.44
141 | },
142 | "ResNet-50": {
143 | "top1_err": 23.19,
144 | "top5_err": 6.63
145 | },
146 | "date-created": "2020-09-04 08:38:35.880459"
147 | }
148 |
--------------------------------------------------------------------------------
/dev/model_timing.json:
--------------------------------------------------------------------------------
1 | {
2 | "EfficientNet-B0": {
3 | "test_fw_time": 0.0121,
4 | "train_bw_time": 0.0442,
5 | "train_fw_bw_time": 0.0633,
6 | "train_fw_time": 0.0192
7 | },
8 | "EfficientNet-B1": {
9 | "test_fw_time": 0.0183,
10 | "train_bw_time": 0.0678,
11 | "train_fw_bw_time": 0.0952,
12 | "train_fw_time": 0.0274
13 | },
14 | "EfficientNet-B2": {
15 | "test_fw_time": 0.0226,
16 | "train_bw_time": 0.0833,
17 | "train_fw_bw_time": 0.1178,
18 | "train_fw_time": 0.0345
19 | },
20 | "EfficientNet-B3": {
21 | "test_fw_time": 0.0361,
22 | "train_bw_time": 0.136,
23 | "train_fw_bw_time": 0.19,
24 | "train_fw_time": 0.054
25 | },
26 | "EfficientNet-B4": {
27 | "test_fw_time": 0.0409,
28 | "train_bw_time": 0.1473,
29 | "train_fw_bw_time": 0.2053,
30 | "train_fw_time": 0.058
31 | },
32 | "EfficientNet-B5": {
33 | "test_fw_time": 0.0418,
34 | "train_bw_time": 0.1559,
35 | "train_fw_bw_time": 0.2204,
36 | "train_fw_time": 0.0645
37 | },
38 | "RegNetX-1.6GF": {
39 | "test_fw_time": 0.0543,
40 | "train_bw_time": 0.1786,
41 | "train_fw_bw_time": 0.2565,
42 | "train_fw_time": 0.0779
43 | },
44 | "RegNetX-12GF": {
45 | "test_fw_time": 0.1107,
46 | "train_bw_time": 0.3454,
47 | "train_fw_bw_time": 0.4985,
48 | "train_fw_time": 0.1531
49 | },
50 | "RegNetX-16GF": {
51 | "test_fw_time": 0.1333,
52 | "train_bw_time": 0.4086,
53 | "train_fw_bw_time": 0.5947,
54 | "train_fw_time": 0.1861
55 | },
56 | "RegNetX-200MF": {
57 | "test_fw_time": 0.0165,
58 | "train_bw_time": 0.0619,
59 | "train_fw_bw_time": 0.0859,
60 | "train_fw_time": 0.024
61 | },
62 | "RegNetX-3.2GF": {
63 | "test_fw_time": 0.0463,
64 | "train_bw_time": 0.1499,
65 | "train_fw_bw_time": 0.2167,
66 | "train_fw_time": 0.0669
67 | },
68 | "RegNetX-32GF": {
69 | "test_fw_time": 0.1316,
70 | "train_bw_time": 0.4117,
71 | "train_fw_bw_time": 0.5883,
72 | "train_fw_time": 0.1765
73 | },
74 | "RegNetX-4.0GF": {
75 | "test_fw_time": 0.0577,
76 | "train_bw_time": 0.1781,
77 | "train_fw_bw_time": 0.257,
78 | "train_fw_time": 0.0789
79 | },
80 | "RegNetX-400MF": {
81 | "test_fw_time": 0.0241,
82 | "train_bw_time": 0.0858,
83 | "train_fw_bw_time": 0.1206,
84 | "train_fw_time": 0.0348
85 | },
86 | "RegNetX-6.4GF": {
87 | "test_fw_time": 0.0743,
88 | "train_bw_time": 0.2455,
89 | "train_fw_bw_time": 0.3489,
90 | "train_fw_time": 0.1034
91 | },
92 | "RegNetX-600MF": {
93 | "test_fw_time": 0.0276,
94 | "train_bw_time": 0.092,
95 | "train_fw_bw_time": 0.1307,
96 | "train_fw_time": 0.0387
97 | },
98 | "RegNetX-8.0GF": {
99 | "test_fw_time": 0.0754,
100 | "train_bw_time": 0.2352,
101 | "train_fw_bw_time": 0.3414,
102 | "train_fw_time": 0.1062
103 | },
104 | "RegNetX-800MF": {
105 | "test_fw_time": 0.0359,
106 | "train_bw_time": 0.1213,
107 | "train_fw_bw_time": 0.1698,
108 | "train_fw_time": 0.0484
109 | },
110 | "RegNetY-1.6GF": {
111 | "test_fw_time": 0.0641,
112 | "train_bw_time": 0.2058,
113 | "train_fw_bw_time": 0.2963,
114 | "train_fw_time": 0.0905
115 | },
116 | "RegNetY-12GF": {
117 | "test_fw_time": 0.1186,
118 | "train_bw_time": 0.3697,
119 | "train_fw_bw_time": 0.5311,
120 | "train_fw_time": 0.1614
121 | },
122 | "RegNetY-16GF": {
123 | "test_fw_time": 0.0814,
124 | "train_bw_time": 0.2582,
125 | "train_fw_bw_time": 0.3652,
126 | "train_fw_time": 0.107
127 | },
128 | "RegNetY-200MF": {
129 | "test_fw_time": 0.0194,
130 | "train_bw_time": 0.0683,
131 | "train_fw_bw_time": 0.0943,
132 | "train_fw_time": 0.026
133 | },
134 | "RegNetY-3.2GF": {
135 | "test_fw_time": 0.0539,
136 | "train_bw_time": 0.1713,
137 | "train_fw_bw_time": 0.2443,
138 | "train_fw_time": 0.073
139 | },
140 | "RegNetY-32GF": {
141 | "test_fw_time": 0.1293,
142 | "train_bw_time": 0.402,
143 | "train_fw_bw_time": 0.5715,
144 | "train_fw_time": 0.1695
145 | },
146 | "RegNetY-4.0GF": {
147 | "test_fw_time": 0.0538,
148 | "train_bw_time": 0.1758,
149 | "train_fw_bw_time": 0.2515,
150 | "train_fw_time": 0.0757
151 | },
152 | "RegNetY-400MF": {
153 | "test_fw_time": 0.0301,
154 | "train_bw_time": 0.1109,
155 | "train_fw_bw_time": 0.1524,
156 | "train_fw_time": 0.0415
157 | },
158 | "RegNetY-6.4GF": {
159 | "test_fw_time": 0.0845,
160 | "train_bw_time": 0.2774,
161 | "train_fw_bw_time": 0.3915,
162 | "train_fw_time": 0.1141
163 | },
164 | "RegNetY-600MF": {
165 | "test_fw_time": 0.0315,
166 | "train_bw_time": 0.1083,
167 | "train_fw_bw_time": 0.1519,
168 | "train_fw_time": 0.0436
169 | },
170 | "RegNetY-8.0GF": {
171 | "test_fw_time": 0.0897,
172 | "train_bw_time": 0.2956,
173 | "train_fw_bw_time": 0.4216,
174 | "train_fw_time": 0.126
175 | },
176 | "RegNetY-800MF": {
177 | "test_fw_time": 0.0365,
178 | "train_bw_time": 0.1255,
179 | "train_fw_bw_time": 0.1763,
180 | "train_fw_time": 0.0508
181 | },
182 | "ResNeXt-101": {
183 | "test_fw_time": 0.059,
184 | "train_bw_time": 0.1986,
185 | "train_fw_bw_time": 0.2867,
186 | "train_fw_time": 0.0882
187 | },
188 | "ResNeXt-152": {
189 | "test_fw_time": 0.0901,
190 | "train_bw_time": 0.2892,
191 | "train_fw_bw_time": 0.4143,
192 | "train_fw_time": 0.1251
193 | },
194 | "ResNeXt-50": {
195 | "test_fw_time": 0.0331,
196 | "train_bw_time": 0.1107,
197 | "train_fw_bw_time": 0.1592,
198 | "train_fw_time": 0.0485
199 | },
200 | "ResNet-101": {
201 | "test_fw_time": 0.0378,
202 | "train_bw_time": 0.1187,
203 | "train_fw_bw_time": 0.1724,
204 | "train_fw_time": 0.0537
205 | },
206 | "ResNet-152": {
207 | "test_fw_time": 0.0548,
208 | "train_bw_time": 0.1661,
209 | "train_fw_bw_time": 0.2426,
210 | "train_fw_time": 0.0764
211 | },
212 | "ResNet-50": {
213 | "test_fw_time": 0.0223,
214 | "train_bw_time": 0.0741,
215 | "train_fw_bw_time": 0.1066,
216 | "train_fw_time": 0.0325
217 | },
218 | "date-created": "2020-09-28 13:59:58.235577"
219 | }
220 |
--------------------------------------------------------------------------------
/dev/model_zoo_tables.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Generate all MODEL_ZOO.md tables."""
9 |
10 | import json
11 | import os
12 |
13 | import pycls.core.builders as builders
14 | import pycls.core.net as net
15 | import pycls.models.model_zoo as model_zoo
16 | from pycls.core.config import cfg, load_cfg, reset_cfg
17 |
18 |
19 | # Location of pycls directory
20 | _PYCLS_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..")
21 |
22 | # Template for tables for each model family
23 | _TABLE_TEMPLATE = [
24 | "### {model_family} Models",
25 | "",
26 | "",
27 | "",
28 | "",
29 | 'model | ',
30 | 'flops (B) | ',
31 | 'params (M) | ',
32 | 'acts (M) | ',
33 | 'batch size | ',
34 | 'infer (ms) | ',
35 | 'train (hr) | ',
36 | 'error (top-1) | ',
37 | 'model id | ',
38 | 'download | ',
39 | "",
40 | "{table_rows}",
41 | "",
42 | "
\n",
43 | ]
44 |
45 |
46 | def get_model_data(name, timings, errors):
47 | """Get model data for a single model."""
48 | # Load model config
49 | reset_cfg()
50 | load_cfg(model_zoo.get_config_file(name))
51 | config_url, _, model_id, _, weight_url_full = model_zoo.get_model_info(name)
52 | # Get model complexity
53 | cx = net.complexity(builders.get_model())
54 | # Inference time is measured in ms with a reference batch_size and num_gpus
55 | batch_size, num_gpus = 64, 1
56 | reference = batch_size / cfg.TEST.BATCH_SIZE * cfg.NUM_GPUS / num_gpus
57 | infer_time = timings[name]["test_fw_time"] * reference * 1000
58 | # Training time is measured in hours for 100 epochs over the ImageNet train set
59 | iterations = 1281167 / cfg.TRAIN.BATCH_SIZE * 100
60 | train_time = timings[name]["train_fw_bw_time"] * iterations / 3600
61 | # Gather all data about the model
62 | return {
63 | "config_url": "configs/" + config_url,
64 | "flops": round(cx["flops"] / 1e9, 1),
65 | "params": round(cx["params"] / 1e6, 1),
66 | "acts": round(cx["acts"] / 1e6, 1),
67 | "batch_size": cfg.TRAIN.BATCH_SIZE,
68 | "infer_time": round(infer_time),
69 | "train_time": round(train_time, 1),
70 | "error": round(errors[name]["top1_err"], 1),
71 | "model_id": model_id,
72 | "weight_url": weight_url_full,
73 | }
74 |
75 |
76 | def model_zoo_table_row(name, timings, errors):
77 | """Make a single row for the MODEL_ZOO.md table."""
78 | data = get_model_data(name, timings, errors)
79 | out = "\n\n".format(name)
80 | template = '{} | \n'
81 | out += template.format(data["config_url"], name)
82 | template = '{} | \n'
83 | for key in list(data.keys())[1:-1]:
84 | out += template.format(data[key])
85 | template = 'model | \n
'
86 | out += template.format(data["weight_url"], name)
87 | return out
88 |
89 |
90 | def model_zoo_table(model_family):
91 | """Make MODEL_ZOO.md table for a given model family."""
92 | filename = _PYCLS_DIR + "/dev/model_{}.json"
93 | with open(filename.format("timing"), "r") as f:
94 | timings = json.load(f)
95 | with open(filename.format("error"), "r") as f:
96 | errors = json.load(f)
97 | names = [n for n in model_zoo.get_model_list() if model_family in n]
98 | table_rows = "\n".join(model_zoo_table_row(n, timings, errors) for n in names)
99 | table_template = "\n".join(_TABLE_TEMPLATE)
100 | return table_template.format(model_family=model_family, table_rows=table_rows)
101 |
102 |
103 | def model_zoo_tables():
104 | """Make MODEL_ZOO.md tables for all model family."""
105 | model_families = ["RegNetX", "RegNetY", "ResNet", "ResNeXt", "EfficientNet"]
106 | out = [model_zoo_table(model_family) for model_family in model_families]
107 | return "\n".join(out)
108 |
109 |
110 | if __name__ == "__main__":
111 | print(model_zoo_tables())
112 |
--------------------------------------------------------------------------------
/dev/test_models.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Units test for all models in model zoo."""
9 |
10 | import datetime
11 | import json
12 | import os
13 | import shutil
14 | import tempfile
15 | import unittest
16 |
17 | import pycls.core.builders as builders
18 | import pycls.core.distributed as dist
19 | import pycls.core.logging as logging
20 | import pycls.core.net as net
21 | import pycls.core.trainer as trainer
22 | import pycls.models.model_zoo as model_zoo
23 | from parameterized import parameterized
24 | from pycls.core.config import cfg, load_cfg, reset_cfg
25 |
26 |
27 | # Location of pycls directory
28 | _PYCLS_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..")
29 |
30 | # If True run selected tests
31 | _RUN_COMPLEXITY_TESTS = True
32 | _RUN_ERROR_TESTS = False
33 | _RUN_TIMING_TESTS = False
34 |
35 |
36 | def test_complexity(key):
37 | """Measure the complexity of a single model."""
38 | reset_cfg()
39 | cfg_file = os.path.join(_PYCLS_DIR, key)
40 | load_cfg(cfg_file)
41 | return net.complexity(builders.get_model())
42 |
43 |
44 | def test_timing(key):
45 | """Measure the timing of a single model."""
46 | reset_cfg()
47 | load_cfg(model_zoo.get_config_file(key))
48 | cfg.PREC_TIME.WARMUP_ITER, cfg.PREC_TIME.NUM_ITER = 5, 50
49 | cfg.OUT_DIR, cfg.LOG_DEST = tempfile.mkdtemp(), "file"
50 | dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.time_model)
51 | log_file = os.path.join(cfg.OUT_DIR, "stdout.log")
52 | data = logging.sort_log_data(logging.load_log_data(log_file))["iter_times"]
53 | shutil.rmtree(cfg.OUT_DIR)
54 | return data
55 |
56 |
57 | def test_error(key):
58 | """Measure the error of a single model."""
59 | reset_cfg()
60 | load_cfg(model_zoo.get_config_file(key))
61 | cfg.TEST.WEIGHTS = model_zoo.get_weights_file(key)
62 | cfg.OUT_DIR, cfg.LOG_DEST = tempfile.mkdtemp(), "file"
63 | dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.test_model)
64 | log_file = os.path.join(cfg.OUT_DIR, "stdout.log")
65 | data = logging.sort_log_data(logging.load_log_data(log_file))["test_epoch"]
66 | data = {"top1_err": data["top1_err"][-1], "top5_err": data["top5_err"][-1]}
67 | shutil.rmtree(cfg.OUT_DIR)
68 | return data
69 |
70 |
71 | def generate_complexity_tests():
72 | """Generate complexity tests for every model in the configs directory."""
73 | configs_dir = os.path.join(_PYCLS_DIR, "configs")
74 | keys = [os.path.join(r, f) for r, _, fs in os.walk(configs_dir) for f in fs]
75 | keys = [os.path.relpath(k, _PYCLS_DIR) for k in keys if ".yaml" in k]
76 | generate_tests("complexity", test_complexity, keys)
77 |
78 |
79 | def generate_timing_tests():
80 | """Generate timing tests for every model in the model zoo."""
81 | keys = model_zoo.get_model_list()
82 | generate_tests("timing", test_timing, keys)
83 |
84 |
85 | def generate_error_tests():
86 | """Generate error tests for every model in the model zoo."""
87 | keys = model_zoo.get_model_list()
88 | generate_tests("error", test_error, keys)
89 |
90 |
91 | def generate_tests(test_name, test_fun, keys):
92 | """Generate and store all the unit tests."""
93 | data = load_test_data(test_name)
94 | keys = sorted(k for k in keys if k not in data)
95 | for key in keys:
96 | data[key] = test_fun(key)
97 | print("data['{}'] = {}".format(key, data[key]))
98 | save_test_data(data, test_name)
99 |
100 |
101 | def save_test_data(data, test_name):
102 | """Save the data file for a given set of tests."""
103 | filename = os.path.join(_PYCLS_DIR, "dev/model_{}.json".format(test_name))
104 | with open(filename, "w") as file:
105 | data["date-created"] = str(datetime.datetime.now())
106 | json.dump(data, file, sort_keys=True, indent=4)
107 |
108 |
109 | def load_test_data(test_name):
110 | """Load the data file for a given set of tests."""
111 | filename = os.path.join(_PYCLS_DIR, "dev/model_{}.json".format(test_name))
112 | if not os.path.exists(filename):
113 | return {}
114 | with open(filename, "r") as f:
115 | return json.load(f)
116 |
117 |
118 | def parse_tests(data):
119 | """Parse the data file in a format useful for the unit tests."""
120 | return [[f, data[f]] for f in data if f != "date-created"]
121 |
122 |
123 | class TestComplexity(unittest.TestCase):
124 | """Generates unit tests for complexity."""
125 |
126 | @parameterized.expand(parse_tests(load_test_data("complexity")), skip_on_empty=True)
127 | @unittest.skipIf(not _RUN_COMPLEXITY_TESTS, "Skipping complexity tests")
128 | def test(self, key, out_expected):
129 | print("Testing complexity of: {}".format(key))
130 | out = test_complexity(key)
131 | self.assertEqual(out, out_expected)
132 |
133 |
134 | class TestError(unittest.TestCase):
135 | """Generates unit tests for error."""
136 |
137 | @parameterized.expand(parse_tests(load_test_data("error")), skip_on_empty=True)
138 | @unittest.skipIf(not _RUN_ERROR_TESTS, "Skipping error tests")
139 | def test(self, key, out_expected):
140 | print("\nTesting error of: {}".format(key))
141 | out = test_error(key)
142 | print("expected = {}".format(out_expected))
143 | print("measured = {}".format(out))
144 | for k in out.keys():
145 | self.assertAlmostEqual(out[k], out_expected[k], 2)
146 |
147 |
148 | class TestTiming(unittest.TestCase):
149 | """Generates unit tests for timing."""
150 |
151 | @parameterized.expand(parse_tests(load_test_data("timing")), skip_on_empty=True)
152 | @unittest.skipIf(not _RUN_TIMING_TESTS, "Skipping timing tests")
153 | def test(self, key, out_expected):
154 | print("\nTesting timing of: {}".format(key))
155 | out = test_timing(key)
156 | print("expected = {}".format(out_expected))
157 | print("measured = {}".format(out))
158 | for k in out.keys():
159 | self.assertLessEqual(out[k] / out_expected[k], 1.10)
160 | self.assertLessEqual(out_expected[k] / out[k], 1.10)
161 |
162 |
163 | if __name__ == "__main__":
164 | # generate_complexity_tests()
165 | # generate_timing_tests()
166 | # generate_error_tests()
167 | unittest.main()
168 |
--------------------------------------------------------------------------------
/docs/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as
6 | contributors and maintainers pledge to make participation in our project and
7 | our community a harassment-free experience for everyone, regardless of age, body
8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 |
12 | ## Our Standards
13 |
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 |
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 |
23 | Examples of unacceptable behavior by participants include:
24 |
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 |
34 | ## Our Responsibilities
35 |
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 |
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 |
46 | ## Scope
47 |
48 | This Code of Conduct applies within all project spaces, and it also applies when
49 | an individual is representing the project or its community in public spaces.
50 | Examples of representing a project or community include using an official
51 | project e-mail address, posting via an official social media account, or acting
52 | as an appointed representative at an online or offline event. Representation of
53 | a project may be further defined and clarified by project maintainers.
54 |
55 | ## Enforcement
56 |
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at . All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 |
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 |
68 | ## Attribution
69 |
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 |
73 | [homepage]: https://www.contributor-covenant.org
74 |
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 |
--------------------------------------------------------------------------------
/docs/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to pycls
2 | We want to make contributing to this project as easy and transparent as
3 | possible.
4 |
5 | ## Pull Requests
6 | We actively welcome your pull requests.
7 |
8 | 1. Fork the repo and create your branch from `main`.
9 | 2. If you've added code that should be tested, add tests.
10 | 3. If you've changed APIs, update the documentation.
11 | 4. Ensure the test suite passes.
12 | 5. Make sure your code lints (run ```./dev/linter.sh```).
13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
14 |
15 | ## Contributor License Agreement ("CLA")
16 | In order to accept your pull request, we need you to submit a CLA. You only need
17 | to do this once to work on any of Facebook's open source projects.
18 |
19 | Complete your CLA here:
20 |
21 | ## Issues
22 | We use GitHub issues to track public bugs. Please ensure your description is
23 | clear and has sufficient instructions to be able to reproduce the issue.
24 |
25 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
26 | disclosure of security bugs. In those cases, please go through the process
27 | outlined on that page and do not file a public issue.
28 |
29 | ## License
30 | By contributing to pycls, you agree that your contributions will be licensed
31 | under the LICENSE file in the root directory of this source tree.
--------------------------------------------------------------------------------
/docs/DATA.md:
--------------------------------------------------------------------------------
1 | # Setting Up Data Paths
2 |
3 | **pycls** finds datasets via symlinks from `pycls/datasets/data` to the actual locations where the dataset images and labels are stored. The instructions on how to create symlinks for ImageNet and CIFAR are given below.
4 |
5 | Expected datasets structure for ImageNet:
6 |
7 | ```
8 | imagenet
9 | |_ train
10 | | |_ n01440764
11 | | |_ ...
12 | | |_ n15075141
13 | |_ val
14 | | |_ n01440764
15 | | |_ ...
16 | | |_ n15075141
17 | |_ ...
18 | ```
19 |
20 | Expected datasets structure for CIFAR-10:
21 |
22 | ```
23 | cifar10
24 | |_ data_batch_1
25 | |_ data_batch_2
26 | |_ data_batch_3
27 | |_ data_batch_4
28 | |_ data_batch_5
29 | |_ test_batch
30 | |_ ...
31 | ```
32 |
33 | Create a directory containing symlinks:
34 |
35 | ```
36 | mkdir -p /path/pycls/pycls/datasets/data
37 | ```
38 |
39 | Symlink ImageNet (`/datasets01/imagenet_full_size/061417/` on FAIR cluster):
40 |
41 | ```
42 | ln -sv /path/imagenet /path/pycls/pycls/datasets/data/imagenet
43 | ```
44 |
45 | Symlink CIFAR-10 (`/datasets01/cifar-10-batches-py/060817/` on FAIR cluster):
46 |
47 | ```
48 | ln -sv /path/cifar10 /path/pycls/pycls/datasets/data/cifar10
49 | ```
50 |
--------------------------------------------------------------------------------
/docs/GETTING_STARTED.md:
--------------------------------------------------------------------------------
1 | # Getting Started
2 |
3 | **pycls** can be used as a [library](#library-usage) (e.g. import pretrained models) or as a [framework](#framework-usage) (e.g. modify for your needs). This document provides brief installation instructions and basic usage examples for both use cases.
4 |
5 | **Notes:**
6 |
7 | - **pycls** has been tested with PyTorch 1.6, CUDA 9.2 and cuDNN 7.1
8 | - **pycls** currently does not support running on CPU; a GPU system is required
9 |
10 | ## Library Usage
11 |
12 | Install the package:
13 |
14 | ```
15 | pip install pycls
16 | ```
17 |
18 | Load a pretrained model:
19 |
20 | ```
21 | model = pycls.models.regnetx("400MF", pretrained=True)
22 | ```
23 |
24 | Create a model with the number of classes altered:
25 |
26 | ```
27 | model = pycls.models.regnety("4.0GF", pretrained=False, cfg_list=("MODEL.NUM_CLASSES", 100))
28 | ```
29 |
30 | Please see the [`MODEL_ZOO.md`](../MODEL_ZOO.md) for the available pretrained models.
31 |
32 | ## Framework Usage
33 |
34 | Clone the repository:
35 |
36 | ```
37 | git clone https://github.com/facebookresearch/pycls
38 | ```
39 |
40 | Install dependencies:
41 |
42 | ```
43 | pip install -r requirements.txt
44 | ```
45 |
46 | Set all the files in ./tools to be executable by the user:
47 |
48 | ```
49 | chmod 744 ./tools/*.py
50 | ```
51 |
52 | Set up modules:
53 |
54 | ```
55 | python setup.py develop --user
56 | ```
57 |
58 | Please see [`DATA.md`](DATA.md) for the instructions on setting up datasets.
59 |
60 | The examples below use a config for RegNetX-400MF on ImageNet with 8 GPUs.
61 |
62 | ### Model Info
63 |
64 | ```
65 | ./tools/run_net.py --mode info \
66 | --cfg configs/dds_baselines/regnetx/RegNetX-400MF_dds_8gpu.yaml
67 | ```
68 |
69 | ### Model Evaluation
70 |
71 | ```
72 | ./tools/run_net.py --mode test \
73 | --cfg configs/dds_baselines/regnetx/RegNetX-400MF_dds_8gpu.yaml \
74 | TEST.WEIGHTS https://dl.fbaipublicfiles.com/pycls/dds_baselines/160905967/RegNetX-400MF_dds_8gpu.pyth \
75 | OUT_DIR /tmp
76 | ```
77 |
78 | ### Model Evaluation (multi-node)
79 |
80 | ```
81 | ./tools/run_net.py --mode test \
82 | --cfg configs/dds_baselines/regnetx/RegNetX-400MF_dds_8gpu.yaml \
83 | TEST.WEIGHTS https://dl.fbaipublicfiles.com/pycls/dds_baselines/160905967/RegNetX-400MF_dds_8gpu.pyth \
84 | OUT_DIR test/ LOG_DEST file LAUNCH.MODE slurm LAUNCH.PARTITION devlab NUM_GPUS 16 LAUNCH.NAME pycls_eval_test
85 | ```
86 |
87 | ### Model Training
88 |
89 | ```
90 | ./tools/run_net.py --mode train \
91 | --cfg configs/dds_baselines/regnetx/RegNetX-400MF_dds_8gpu.yaml \
92 | OUT_DIR /tmp
93 | ```
94 |
95 | ### Model Finetuning
96 |
97 | ```
98 | ./tools/run_net.py --mode train \
99 | --cfg configs/dds_baselines/regnetx/RegNetX-400MF_dds_8gpu.yaml \
100 | TRAIN.WEIGHTS https://dl.fbaipublicfiles.com/pycls/dds_baselines/160905967/RegNetX-400MF_dds_8gpu.pyth \
101 | OUT_DIR /tmp
102 | ```
103 |
104 | ### Model Timing
105 |
106 | ```
107 | ./tools/run_net.py --mode time \
108 | --cfg configs/dds_baselines/regnetx/RegNetX-400MF_dds_8gpu.yaml \
109 | NUM_GPUS 1 \
110 | TRAIN.BATCH_SIZE 64 \
111 | TEST.BATCH_SIZE 64 \
112 | PREC_TIME.WARMUP_ITER 5 \
113 | PREC_TIME.NUM_ITER 50
114 | ```
115 |
116 | ### Model Scaling
117 |
118 | Scale a RegNetY-4GF by 4x using fast compound scaling (see https://arxiv.org/abs/2103.06877):
119 |
120 | ```
121 | ./tools/run_net.py --mode scale \
122 | --cfg configs/dds_baselines/regnety/RegNetY-4.0GF_dds_8gpu.yaml \
123 | OUT_DIR ./ \
124 | CFG_DEST "RegNetY-4.0GF_dds_8gpu_scaled.yaml" \
125 | MODEL.SCALING_FACTOR 4.0 \
126 | MODEL.SCALING_TYPE "d1_w8_g8_r1"
127 | ```
128 |
--------------------------------------------------------------------------------
/docs/regnetx_nets.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/pycls/5719de641657c251f807c20a1fcf8ef6f4b60144/docs/regnetx_nets.png
--------------------------------------------------------------------------------
/pycls/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/pycls/5719de641657c251f807c20a1fcf8ef6f4b60144/pycls/__init__.py
--------------------------------------------------------------------------------
/pycls/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/pycls/5719de641657c251f807c20a1fcf8ef6f4b60144/pycls/core/__init__.py
--------------------------------------------------------------------------------
/pycls/core/benchmark.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Benchmarking functions."""
9 |
10 | import pycls.core.logging as logging
11 | import pycls.core.net as net
12 | import pycls.datasets.loader as loader
13 | import torch
14 | import torch.cuda.amp as amp
15 | from pycls.core.config import cfg
16 | from pycls.core.timer import Timer
17 |
18 |
19 | logger = logging.get_logger(__name__)
20 |
21 |
22 | @torch.no_grad()
23 | def compute_time_eval(model):
24 | """Computes precise model forward test time using dummy data."""
25 | # Use eval mode
26 | model.eval()
27 | # Generate a dummy mini-batch and copy data to GPU
28 | im_size, batch_size = cfg.TRAIN.IM_SIZE, int(cfg.TEST.BATCH_SIZE / cfg.NUM_GPUS)
29 | inputs = torch.zeros(batch_size, 3, im_size, im_size).cuda(non_blocking=False)
30 | # Compute precise forward pass time
31 | timer = Timer()
32 | total_iter = cfg.PREC_TIME.NUM_ITER + cfg.PREC_TIME.WARMUP_ITER
33 | for cur_iter in range(total_iter):
34 | # Reset the timers after the warmup phase
35 | if cur_iter == cfg.PREC_TIME.WARMUP_ITER:
36 | timer.reset()
37 | # Forward
38 | timer.tic()
39 | with amp.autocast(enabled=cfg.TRAIN.MIXED_PRECISION):
40 | model(inputs)
41 | torch.cuda.synchronize()
42 | timer.toc()
43 | return timer.average_time
44 |
45 |
46 | def compute_time_train(model, loss_fun):
47 | """Computes precise model forward + backward time using dummy data."""
48 | # Use train mode
49 | model.train()
50 | # Generate a dummy mini-batch and copy data to GPU
51 | im_size, batch_size = cfg.TRAIN.IM_SIZE, int(cfg.TRAIN.BATCH_SIZE / cfg.NUM_GPUS)
52 | inputs = torch.rand(batch_size, 3, im_size, im_size).cuda(non_blocking=False)
53 | labels = torch.zeros(batch_size, dtype=torch.int64).cuda(non_blocking=False)
54 | labels_one_hot = net.smooth_one_hot_labels(labels)
55 | # Cache BatchNorm2D running stats
56 | bns = [m for m in model.modules() if isinstance(m, torch.nn.BatchNorm2d)]
57 | bn_stats = [[bn.running_mean.clone(), bn.running_var.clone()] for bn in bns]
58 | # Create a GradScaler for mixed precision training
59 | scaler = amp.GradScaler(enabled=cfg.TRAIN.MIXED_PRECISION)
60 | # Compute precise forward backward pass time
61 | fw_timer, bw_timer = Timer(), Timer()
62 | total_iter = cfg.PREC_TIME.NUM_ITER + cfg.PREC_TIME.WARMUP_ITER
63 | for cur_iter in range(total_iter):
64 | # Reset the timers after the warmup phase
65 | if cur_iter == cfg.PREC_TIME.WARMUP_ITER:
66 | fw_timer.reset()
67 | bw_timer.reset()
68 | # Forward
69 | fw_timer.tic()
70 | with amp.autocast(enabled=cfg.TRAIN.MIXED_PRECISION):
71 | preds = model(inputs)
72 | loss = loss_fun(preds, labels_one_hot)
73 | torch.cuda.synchronize()
74 | fw_timer.toc()
75 | # Backward
76 | bw_timer.tic()
77 | scaler.scale(loss).backward()
78 | torch.cuda.synchronize()
79 | bw_timer.toc()
80 | # Restore BatchNorm2D running stats
81 | for bn, (mean, var) in zip(bns, bn_stats):
82 | bn.running_mean, bn.running_var = mean, var
83 | return fw_timer.average_time, bw_timer.average_time
84 |
85 |
86 | def compute_time_loader(data_loader):
87 | """Computes loader time."""
88 | timer = Timer()
89 | if cfg.DATA_LOADER.MODE != loader.FFCV:
90 | loader.shuffle(data_loader, 0)
91 | data_loader_iterator = iter(data_loader)
92 | total_iter = cfg.PREC_TIME.NUM_ITER + cfg.PREC_TIME.WARMUP_ITER
93 | total_iter = min(total_iter, len(data_loader))
94 | for cur_iter in range(total_iter):
95 | if cur_iter == cfg.PREC_TIME.WARMUP_ITER:
96 | timer.reset()
97 | timer.tic()
98 | next(data_loader_iterator)
99 | timer.toc()
100 | return timer.average_time
101 |
102 |
103 | def compute_time_model(model, loss_fun):
104 | """Times model."""
105 | logger.info("Computing model timings only...")
106 | # Compute timings
107 | test_fw_time = compute_time_eval(model)
108 | train_fw_time, train_bw_time = compute_time_train(model, loss_fun)
109 | train_fw_bw_time = train_fw_time + train_bw_time
110 | # Output iter timing
111 | iter_times = {
112 | "test_fw_time": test_fw_time,
113 | "train_fw_time": train_fw_time,
114 | "train_bw_time": train_bw_time,
115 | "train_fw_bw_time": train_fw_bw_time,
116 | }
117 | logger.info(logging.dump_log_data(iter_times, "iter_times"))
118 |
119 |
120 | def compute_time_full(model, loss_fun, train_loader, test_loader):
121 | """Times model and data loader."""
122 | logger.info("Computing model and loader timings...")
123 | # Compute timings
124 | test_fw_time = compute_time_eval(model)
125 | train_fw_time, train_bw_time = compute_time_train(model, loss_fun)
126 | train_fw_bw_time = train_fw_time + train_bw_time
127 | train_loader_time = compute_time_loader(train_loader)
128 | # Output iter timing
129 | iter_times = {
130 | "test_fw_time": test_fw_time,
131 | "train_fw_time": train_fw_time,
132 | "train_bw_time": train_bw_time,
133 | "train_fw_bw_time": train_fw_bw_time,
134 | "train_loader_time": train_loader_time,
135 | }
136 | logger.info(logging.dump_log_data(iter_times, "iter_times"))
137 | # Output epoch timing
138 | epoch_times = {
139 | "test_fw_time": test_fw_time * len(test_loader),
140 | "train_fw_time": train_fw_time * len(train_loader),
141 | "train_bw_time": train_bw_time * len(train_loader),
142 | "train_fw_bw_time": train_fw_bw_time * len(train_loader),
143 | "train_loader_time": train_loader_time * len(train_loader),
144 | }
145 | logger.info(logging.dump_log_data(epoch_times, "epoch_times"))
146 | # Compute data loader overhead (assuming DATA_LOADER.NUM_WORKERS>1)
147 | overhead = max(0, train_loader_time - train_fw_bw_time) / train_fw_bw_time
148 | logger.info("Overhead of data loader is {:.2f}%".format(overhead * 100))
149 |
--------------------------------------------------------------------------------
/pycls/core/builders.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Model and loss construction functions."""
9 |
10 | from pycls.core.config import cfg
11 | from pycls.core.net import SoftCrossEntropyLoss
12 | from pycls.models.anynet import AnyNet
13 | from pycls.models.effnet import EffNet
14 | from pycls.models.regnet import RegNet
15 | from pycls.models.resnet import ResNet
16 | from pycls.models.vit import ViT
17 |
18 |
19 | # Supported models
20 | _models = {
21 | "anynet": AnyNet,
22 | "effnet": EffNet,
23 | "resnet": ResNet,
24 | "regnet": RegNet,
25 | "vit": ViT,
26 | }
27 |
28 |
29 | # Supported loss functions
30 | _loss_funs = {"cross_entropy": SoftCrossEntropyLoss}
31 |
32 |
33 | def get_model():
34 | """Gets the model class specified in the config."""
35 | err_str = "Model type '{}' not supported"
36 | assert cfg.MODEL.TYPE in _models.keys(), err_str.format(cfg.MODEL.TYPE)
37 | return _models[cfg.MODEL.TYPE]
38 |
39 |
40 | def get_loss_fun():
41 | """Gets the loss function class specified in the config."""
42 | err_str = "Loss function type '{}' not supported"
43 | assert cfg.MODEL.LOSS_FUN in _loss_funs.keys(), err_str.format(cfg.MODEL.LOSS_FUN)
44 | return _loss_funs[cfg.MODEL.LOSS_FUN]
45 |
46 |
47 | def build_model():
48 | """Builds the model."""
49 | return get_model()()
50 |
51 |
52 | def build_loss_fun():
53 | """Build the loss function."""
54 | return get_loss_fun()()
55 |
56 |
57 | def register_model(name, ctor):
58 | """Registers a model dynamically."""
59 | _models[name] = ctor
60 |
61 |
62 | def register_loss_fun(name, ctor):
63 | """Registers a loss function dynamically."""
64 | _loss_funs[name] = ctor
65 |
--------------------------------------------------------------------------------
/pycls/core/checkpoint.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Functions that handle saving and loading of checkpoints."""
9 |
10 | import os
11 |
12 | import pycls.core.distributed as dist
13 | import torch
14 | from pycls.core.config import cfg
15 | from pycls.core.io import pathmgr
16 | from pycls.core.net import unwrap_model
17 |
18 |
19 | # Common prefix for checkpoint file names
20 | _NAME_PREFIX = "model_epoch_"
21 |
22 | # Checkpoints directory name
23 | _DIR_NAME = "checkpoints"
24 |
25 |
26 | def get_checkpoint_dir():
27 | """Retrieves the location for storing checkpoints."""
28 | return os.path.join(cfg.OUT_DIR, _DIR_NAME)
29 |
30 |
31 | def get_checkpoint(epoch):
32 | """Retrieves the path to a checkpoint file."""
33 | name = "{}{:04d}.pyth".format(_NAME_PREFIX, epoch)
34 | return os.path.join(get_checkpoint_dir(), name)
35 |
36 |
37 | def get_checkpoint_best():
38 | """Retrieves the path to the best checkpoint file."""
39 | return os.path.join(cfg.OUT_DIR, "model.pyth")
40 |
41 |
42 | def get_last_checkpoint():
43 | """Retrieves the most recent checkpoint (highest epoch number)."""
44 | checkpoint_dir = get_checkpoint_dir()
45 | checkpoints = [f for f in pathmgr.ls(checkpoint_dir) if _NAME_PREFIX in f]
46 | last_checkpoint_name = sorted(checkpoints)[-1]
47 | return os.path.join(checkpoint_dir, last_checkpoint_name)
48 |
49 |
50 | def has_checkpoint():
51 | """Determines if there are checkpoints available."""
52 | checkpoint_dir = get_checkpoint_dir()
53 | if not pathmgr.exists(checkpoint_dir):
54 | return False
55 | return any(_NAME_PREFIX in f for f in pathmgr.ls(checkpoint_dir))
56 |
57 |
58 | def save_checkpoint(model, model_ema, optimizer, epoch, test_err, ema_err):
59 | """Saves a checkpoint and also the best weights so far in a best checkpoint."""
60 |
61 | # Ensure that the checkpoint dir exists
62 | pathmgr.mkdirs(get_checkpoint_dir())
63 | # Record the state
64 | checkpoint = {
65 | "epoch": epoch,
66 | "test_err": test_err,
67 | "ema_err": ema_err,
68 | "model_state": unwrap_model(model).state_dict(),
69 | "ema_state": unwrap_model(model_ema).state_dict(),
70 | "optimizer_state": optimizer.state_dict(),
71 | "cfg": cfg.dump(),
72 | }
73 |
74 | # Write the checkpoint
75 | checkpoint_file = get_checkpoint(epoch + 1)
76 | # Save checkpoints only from the main process
77 | if not dist.is_main_proc():
78 | return
79 |
80 | with pathmgr.open(checkpoint_file, "wb") as f:
81 | torch.save(checkpoint, f)
82 | # Store the best model and model_ema weights so far
83 | if not pathmgr.exists(get_checkpoint_best()):
84 | with pathmgr.open(get_checkpoint_best(), "wb") as f:
85 | torch.save(checkpoint, f)
86 | else:
87 | with open(get_checkpoint_best(), "rb") as f:
88 | best = torch.load(f, map_location="cpu")
89 | # Select the best model weights and the best model_ema weights
90 | if test_err < best["test_err"] or ema_err < best["ema_err"]:
91 | if test_err < best["test_err"]:
92 | best["model_state"] = checkpoint["model_state"]
93 | best["test_err"] = test_err
94 | if ema_err < best["ema_err"]:
95 | best["ema_state"] = checkpoint["ema_state"]
96 | best["ema_err"] = ema_err
97 | with pathmgr.open(get_checkpoint_best(), "wb") as f:
98 | torch.save(best, f)
99 | return checkpoint_file
100 |
101 |
102 | def load_checkpoint(checkpoint_file, model, model_ema=None, optimizer=None):
103 | """
104 | Loads a checkpoint selectively based on the input options.
105 |
106 | Each checkpoint contains both the model and model_ema weights (except checkpoints
107 | created by old versions of the code). If both the model and model_weights are
108 | requested, both sets of weights are loaded. If only the model weights are requested
109 | (that is if model_ema=None), the *better* set of weights is selected to be loaded
110 | (according to the lesser of test_err and ema_err, also stored in the checkpoint).
111 |
112 | The code is backward compatible with checkpoints that do not store the ema weights.
113 | """
114 | err_str = "Checkpoint '{}' not found"
115 | assert pathmgr.exists(checkpoint_file), err_str.format(checkpoint_file)
116 | with pathmgr.open(checkpoint_file, "rb") as f:
117 | checkpoint = torch.load(f, map_location="cpu")
118 | # Get test_err and ema_err (with backward compatibility)
119 | test_err = checkpoint["test_err"] if "test_err" in checkpoint else 100
120 | ema_err = checkpoint["ema_err"] if "ema_err" in checkpoint else 100
121 | # Load model and optionally model_ema weights (with backward compatibility)
122 | ema_state = "ema_state" if "ema_state" in checkpoint else "model_state"
123 | if model_ema:
124 | unwrap_model(model).load_state_dict(checkpoint["model_state"])
125 | unwrap_model(model_ema).load_state_dict(checkpoint[ema_state])
126 | else:
127 | best_state = "model_state" if test_err <= ema_err else ema_state
128 | unwrap_model(model).load_state_dict(checkpoint[best_state])
129 | # Load optimizer if requested
130 | if optimizer:
131 | optimizer.load_state_dict(checkpoint["optimizer_state"])
132 | return checkpoint["epoch"], test_err, ema_err
133 |
134 |
135 | def delete_checkpoints(checkpoint_dir=None, keep="all"):
136 | """Deletes unneeded checkpoints, keep can be "all", "last", or "none"."""
137 | assert keep in ["all", "last", "none"], "Invalid keep setting: {}".format(keep)
138 | checkpoint_dir = checkpoint_dir if checkpoint_dir else get_checkpoint_dir()
139 | if keep == "all" or not pathmgr.exists(checkpoint_dir):
140 | return 0
141 | checkpoints = [f for f in pathmgr.ls(checkpoint_dir) if _NAME_PREFIX in f]
142 | checkpoints = sorted(checkpoints)[:-1] if keep == "last" else checkpoints
143 | for checkpoint in checkpoints:
144 | pathmgr.rm(os.path.join(checkpoint_dir, checkpoint))
145 | return len(checkpoints)
146 |
--------------------------------------------------------------------------------
/pycls/core/distributed.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Distributed helpers."""
9 |
10 | import os
11 | import random
12 |
13 | import submitit
14 | import torch
15 | from pycls.core.config import cfg
16 |
17 |
18 | # Make work w recent PyTorch versions (https://github.com/pytorch/pytorch/issues/37377)
19 | os.environ["MKL_THREADING_LAYER"] = "GNU"
20 |
21 |
22 | class SubmititRunner(submitit.helpers.Checkpointable):
23 | """A callable which is passed to submitit to launch the jobs."""
24 |
25 | def __init__(self, port, fun, cfg_state):
26 | self.cfg_state = cfg_state
27 | self.port = port
28 | self.fun = fun
29 |
30 | def __call__(self):
31 | job_env = submitit.JobEnvironment()
32 | os.environ["MASTER_ADDR"] = job_env.hostnames[0]
33 | os.environ["MASTER_PORT"] = str(self.port)
34 | os.environ["RANK"] = str(job_env.global_rank)
35 | os.environ["LOCAL_RANK"] = str(job_env.local_rank)
36 | os.environ["WORLD_SIZE"] = str(job_env.num_tasks)
37 | setup_distributed(self.cfg_state)
38 | self.fun()
39 |
40 |
41 | def is_main_proc(local=False):
42 | """
43 | Determines if the current process is the main process.
44 |
45 | Main process is responsible for logging, writing and loading checkpoints. In
46 | the multi GPU setting, we assign the main role to the rank 0 process. When
47 | training using a single GPU, there is a single process which is considered main.
48 |
49 | If local==True, then check if the current process is the main on the current node.
50 | """
51 | m = cfg.MAX_GPUS_PER_NODE if local else cfg.NUM_GPUS
52 | return cfg.NUM_GPUS == 1 or torch.distributed.get_rank() % m == 0
53 |
54 |
55 | def scaled_all_reduce(tensors):
56 | """
57 | Performs the scaled all_reduce operation on the provided tensors.
58 |
59 | The input tensors are modified in-place. Currently supports only the sum
60 | reduction operator. The reduced values are scaled by the inverse size of the
61 | process group (equivalent to cfg.NUM_GPUS).
62 | """
63 | # There is no need for reduction in the single-proc case
64 | if cfg.NUM_GPUS == 1:
65 | return tensors
66 | # Queue the reductions
67 | reductions = []
68 | for tensor in tensors:
69 | reduction = torch.distributed.all_reduce(tensor, async_op=True)
70 | reductions.append(reduction)
71 | # Wait for reductions to finish
72 | for reduction in reductions:
73 | reduction.wait()
74 | # Scale the results
75 | for tensor in tensors:
76 | tensor.mul_(1.0 / cfg.NUM_GPUS)
77 | return tensors
78 |
79 |
80 | def setup_distributed(cfg_state):
81 | """
82 | Initialize torch.distributed and set the CUDA device.
83 |
84 | Expects environment variables to be set as per
85 | https://pytorch.org/docs/stable/distributed.html#environment-variable-initialization
86 | along with the environ variable "LOCAL_RANK" which is used to set the CUDA device.
87 |
88 | This is run inside a new process, so the cfg is reset and must be set explicitly.
89 | """
90 | cfg.defrost()
91 | cfg.update(**cfg_state)
92 | cfg.freeze()
93 | local_rank = int(os.environ["LOCAL_RANK"])
94 | torch.distributed.init_process_group(backend=cfg.DIST_BACKEND)
95 | torch.cuda.set_device(local_rank)
96 |
97 |
98 | def single_proc_run(local_rank, fun, main_port, cfg_state, world_size):
99 | """Executes fun() on a single GPU in a multi-GPU setup."""
100 | os.environ["MASTER_ADDR"] = "localhost"
101 | os.environ["MASTER_PORT"] = str(main_port)
102 | os.environ["RANK"] = str(local_rank)
103 | os.environ["LOCAL_RANK"] = str(local_rank)
104 | os.environ["WORLD_SIZE"] = str(world_size)
105 | setup_distributed(cfg_state)
106 | fun()
107 |
108 |
109 | def multi_proc_run(num_proc, fun):
110 | """Run a single or multi GPU job locally on the current node."""
111 | launch = cfg.LAUNCH
112 | if launch.MODE in ["submitit_local", "slurm"]:
113 | # Launch fun() using submitit either locally or on SLURM
114 | use_slurm = launch.MODE == "slurm"
115 | executor = submitit.AutoExecutor if use_slurm else submitit.LocalExecutor
116 | kwargs = {"slurm_max_num_timeout": launch.MAX_RETRY} if use_slurm else {}
117 | executor = executor(folder=cfg.OUT_DIR, **kwargs)
118 | num_gpus_per_node = min(cfg.NUM_GPUS, cfg.MAX_GPUS_PER_NODE)
119 | executor.update_parameters(
120 | mem_gb=launch.MEM_PER_GPU * num_gpus_per_node,
121 | gpus_per_node=num_gpus_per_node,
122 | tasks_per_node=num_gpus_per_node,
123 | cpus_per_task=launch.CPUS_PER_GPU,
124 | nodes=max(1, cfg.NUM_GPUS // cfg.MAX_GPUS_PER_NODE),
125 | timeout_min=launch.TIME_LIMIT,
126 | name=launch.NAME,
127 | slurm_partition=launch.PARTITION,
128 | slurm_comment=launch.COMMENT,
129 | slurm_constraint=launch.GPU_TYPE,
130 | slurm_additional_parameters={"mail-user": launch.EMAIL, "mail-type": "END"},
131 | )
132 | main_port = random.randint(cfg.PORT_RANGE[0], cfg.PORT_RANGE[1])
133 | job = executor.submit(SubmititRunner(main_port, fun, cfg))
134 | print("Submitted job_id {} with out_dir: {}".format(job.job_id, cfg.OUT_DIR))
135 | if not use_slurm:
136 | job.wait()
137 | elif num_proc > 1:
138 | main_port = random.randint(cfg.PORT_RANGE[0], cfg.PORT_RANGE[1])
139 | mp_runner = torch.multiprocessing.start_processes
140 | args = (fun, main_port, cfg, num_proc)
141 | # Note: using "fork" below, "spawn" causes time and error regressions. Using
142 | # spawn changes the default multiprocessing context to spawn, which doesn't
143 | # interact well with the dataloaders (likely due to the use of OpenCV).
144 | mp_runner(single_proc_run, args=args, nprocs=num_proc, start_method="fork")
145 | else:
146 | fun()
147 |
--------------------------------------------------------------------------------
/pycls/core/io.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """IO utilities (adapted from Detectron)"""
9 |
10 | import logging
11 | import os
12 | import re
13 | import sys
14 | from urllib import request as urlrequest
15 |
16 | from iopath.common.file_io import PathManagerFactory
17 |
18 |
19 | # instantiate global path manager for pycls
20 | pathmgr = PathManagerFactory.get()
21 |
22 | logger = logging.getLogger(__name__)
23 |
24 | _PYCLS_BASE_URL = "https://dl.fbaipublicfiles.com/pycls"
25 |
26 |
27 | def cache_url(url_or_file, cache_dir, base_url=_PYCLS_BASE_URL, download=True):
28 | """Download the file specified by the URL to the cache_dir and return the path to
29 | the cached file. If the argument is not a URL, simply return it as is.
30 | """
31 | is_url = re.match(r"^(?:http)s?://", url_or_file, re.IGNORECASE) is not None
32 | if not is_url:
33 | return url_or_file
34 | url = url_or_file
35 | assert url.startswith(base_url), "url must start with: {}".format(base_url)
36 | cache_file_path = url.replace(base_url, cache_dir)
37 | if pathmgr.exists(cache_file_path):
38 | return cache_file_path
39 | cache_file_dir = os.path.dirname(cache_file_path)
40 | if not pathmgr.exists(cache_file_dir):
41 | pathmgr.mkdirs(cache_file_dir)
42 | if download:
43 | logger.info("Downloading remote file {} to {}".format(url, cache_file_path))
44 | download_url(url, cache_file_path)
45 | return cache_file_path
46 |
47 |
48 | def _progress_bar(count, total):
49 | """Report download progress. Credit:
50 | https://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console/27871113
51 | """
52 | bar_len = 60
53 | filled_len = int(round(bar_len * count / float(total)))
54 | percents = round(100.0 * count / float(total), 1)
55 | bar = "=" * filled_len + "-" * (bar_len - filled_len)
56 | sys.stdout.write(
57 | " [{}] {}% of {:.1f}MB file \r".format(bar, percents, total / 1024 / 1024)
58 | )
59 | sys.stdout.flush()
60 | if count >= total:
61 | sys.stdout.write("\n")
62 |
63 |
64 | def download_url(url, dst_file_path, chunk_size=8192, progress_hook=_progress_bar):
65 | """Download url and write it to dst_file_path. Credit:
66 | https://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
67 | """
68 | req = urlrequest.Request(url)
69 | response = urlrequest.urlopen(req)
70 | total_size = response.info().get("Content-Length").strip()
71 | total_size = int(total_size)
72 | bytes_so_far = 0
73 | with pathmgr.open(dst_file_path, "wb") as f:
74 | while 1:
75 | chunk = response.read(chunk_size)
76 | bytes_so_far += len(chunk)
77 | if not chunk:
78 | break
79 | if progress_hook:
80 | progress_hook(bytes_so_far, total_size)
81 | f.write(chunk)
82 | return bytes_so_far
83 |
--------------------------------------------------------------------------------
/pycls/core/logging.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Logging."""
9 |
10 | import builtins
11 | import decimal
12 | import logging
13 | import os
14 | import sys
15 |
16 | import pycls.core.distributed as dist
17 | import simplejson
18 | from pycls.core.config import cfg
19 | from pycls.core.io import pathmgr
20 |
21 |
22 | # Show filename and line number in logs
23 | _FORMAT = "[%(filename)s: %(lineno)3d]: %(message)s"
24 |
25 | # Log file name (for cfg.LOG_DEST = 'file')
26 | _LOG_FILE = "stdout.log"
27 |
28 | # Data output with dump_log_data(data, data_type) will be tagged w/ this
29 | _TAG = "json_stats: "
30 |
31 | # Data output with dump_log_data(data, data_type) will have data[_TYPE]=data_type
32 | _TYPE = "_type"
33 |
34 |
35 | def _suppress_print():
36 | """Suppresses printing from the current process."""
37 |
38 | def ignore(*_objects, _sep=" ", _end="\n", _file=sys.stdout, _flush=False):
39 | pass
40 |
41 | builtins.print = ignore
42 |
43 |
44 | def setup_logging():
45 | """Sets up the logging."""
46 | # Enable logging only for the main process
47 | if dist.is_main_proc():
48 | # Clear the root logger to prevent any existing logging config
49 | # (e.g. set by another module) from messing with our setup
50 | logging.root.handlers = []
51 | # Construct logging configuration
52 | logging_config = {"level": logging.INFO, "format": _FORMAT}
53 | # Log either to stdout or to a file
54 | if cfg.LOG_DEST == "stdout":
55 | logging_config["stream"] = sys.stdout
56 | else:
57 | logging_config["filename"] = os.path.join(cfg.OUT_DIR, _LOG_FILE)
58 | # Configure logging
59 | logging.basicConfig(**logging_config)
60 | else:
61 | _suppress_print()
62 |
63 |
64 | def get_logger(name):
65 | """Retrieves the logger."""
66 | return logging.getLogger(name)
67 |
68 |
69 | def dump_log_data(data, data_type, prec=4):
70 | """Covert data (a dictionary) into tagged json string for logging."""
71 | data[_TYPE] = data_type
72 | data = float_to_decimal(data, prec)
73 | data_json = simplejson.dumps(data, sort_keys=True, use_decimal=True)
74 | return "{:s}{:s}".format(_TAG, data_json)
75 |
76 |
77 | def float_to_decimal(data, prec=4):
78 | """Convert floats to decimals which allows for fixed width json."""
79 | if prec and isinstance(data, dict):
80 | return {k: float_to_decimal(v, prec) for k, v in data.items()}
81 | if prec and isinstance(data, float):
82 | return decimal.Decimal(("{:." + str(prec) + "f}").format(data))
83 | else:
84 | return data
85 |
86 |
87 | def get_log_files(log_dir, name_filter="", log_file=_LOG_FILE):
88 | """Get all log files in directory containing subdirs of trained models."""
89 | names = [n for n in sorted(pathmgr.ls(log_dir)) if name_filter in n]
90 | files = [os.path.join(log_dir, n, log_file) for n in names]
91 | f_n_ps = [(f, n) for (f, n) in zip(files, names) if pathmgr.exists(f)]
92 | files, names = zip(*f_n_ps) if f_n_ps else ([], [])
93 | return files, names
94 |
95 |
96 | def load_log_data(log_file, data_types_to_skip=()):
97 | """Loads log data into a dictionary of the form data[data_type][metric][index]."""
98 | # Load log_file
99 | assert pathmgr.exists(log_file), "Log file not found: {}".format(log_file)
100 | with pathmgr.open(log_file, "r") as f:
101 | lines = f.readlines()
102 | # Extract and parse lines that start with _TAG and have a type specified
103 | lines = [l[l.find(_TAG) + len(_TAG) :] for l in lines if _TAG in l]
104 | lines = [simplejson.loads(l) for l in lines]
105 | lines = [l for l in lines if _TYPE in l and not l[_TYPE] in data_types_to_skip]
106 | # Generate data structure accessed by data[data_type][index][metric]
107 | data_types = [l[_TYPE] for l in lines]
108 | data = {t: [] for t in data_types}
109 | for t, line in zip(data_types, lines):
110 | del line[_TYPE]
111 | data[t].append(line)
112 | # Generate data structure accessed by data[data_type][metric][index]
113 | for t in data:
114 | metrics = sorted(data[t][0].keys())
115 | err_str = "Inconsistent metrics in log for _type={}: {}".format(t, metrics)
116 | assert all(sorted(d.keys()) == metrics for d in data[t]), err_str
117 | data[t] = {m: [d[m] for d in data[t]] for m in metrics}
118 | return data
119 |
120 |
121 | def sort_log_data(data):
122 | """Sort each data[data_type][metric] by epoch or keep only first instance."""
123 | for t in data:
124 | if "epoch" in data[t]:
125 | assert "epoch_ind" not in data[t] and "epoch_max" not in data[t]
126 | data[t]["epoch_ind"] = [int(e.split("/")[0]) for e in data[t]["epoch"]]
127 | data[t]["epoch_max"] = [int(e.split("/")[1]) for e in data[t]["epoch"]]
128 | epoch = data[t]["epoch_ind"]
129 | if "iter" in data[t]:
130 | assert "iter_ind" not in data[t] and "iter_max" not in data[t]
131 | data[t]["iter_ind"] = [int(i.split("/")[0]) for i in data[t]["iter"]]
132 | data[t]["iter_max"] = [int(i.split("/")[1]) for i in data[t]["iter"]]
133 | itr = zip(epoch, data[t]["iter_ind"], data[t]["iter_max"])
134 | epoch = [e + (i_ind - 1) / i_max for e, i_ind, i_max in itr]
135 | for m in data[t]:
136 | data[t][m] = [v for _, v in sorted(zip(epoch, data[t][m]))]
137 | else:
138 | data[t] = {m: d[0] for m, d in data[t].items()}
139 | return data
140 |
--------------------------------------------------------------------------------
/pycls/core/net.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Functions for manipulating networks."""
9 |
10 | import itertools
11 |
12 | import numpy as np
13 | import pycls.core.distributed as dist
14 | import torch
15 | from pycls.core.config import cfg
16 |
17 |
18 | def unwrap_model(model):
19 | """Remove the DistributedDataParallel wrapper if present."""
20 | wrapped = isinstance(model, torch.nn.parallel.distributed.DistributedDataParallel)
21 | return model.module if wrapped else model
22 |
23 |
24 | @torch.no_grad()
25 | def compute_precise_bn_stats(model, loader):
26 | """Computes precise BN stats on training data."""
27 | # Compute the number of minibatches to use
28 | num_iter = int(cfg.BN.NUM_SAMPLES_PRECISE / loader.batch_size / cfg.NUM_GPUS)
29 | num_iter = min(num_iter, len(loader))
30 | # Retrieve the BN layers
31 | bns = [m for m in model.modules() if isinstance(m, torch.nn.BatchNorm2d)]
32 | # Initialize BN stats storage for computing mean(mean(batch)) and mean(var(batch))
33 | running_means = [torch.zeros_like(bn.running_mean) for bn in bns]
34 | running_vars = [torch.zeros_like(bn.running_var) for bn in bns]
35 | # Remember momentum values
36 | momentums = [bn.momentum for bn in bns]
37 | # Set momentum to 1.0 to compute BN stats that only reflect the current batch
38 | for bn in bns:
39 | bn.momentum = 1.0
40 | # Average the BN stats for each BN layer over the batches
41 | for inputs, _labels in itertools.islice(loader, num_iter):
42 | model(inputs.cuda())
43 | for i, bn in enumerate(bns):
44 | running_means[i] += bn.running_mean / num_iter
45 | running_vars[i] += bn.running_var / num_iter
46 | # Sync BN stats across GPUs (no reduction if 1 GPU used)
47 | running_means = dist.scaled_all_reduce(running_means)
48 | running_vars = dist.scaled_all_reduce(running_vars)
49 | # Set BN stats and restore original momentum values
50 | for i, bn in enumerate(bns):
51 | bn.running_mean = running_means[i]
52 | bn.running_var = running_vars[i]
53 | bn.momentum = momentums[i]
54 |
55 |
56 | def complexity(model):
57 | """Compute model complexity (model can be model instance or model class)."""
58 | size = cfg.TRAIN.IM_SIZE
59 | cx = {"h": size, "w": size, "flops": 0, "params": 0, "acts": 0}
60 | cx = unwrap_model(model).complexity(cx)
61 | return {"flops": cx["flops"], "params": cx["params"], "acts": cx["acts"]}
62 |
63 |
64 | def smooth_one_hot_labels(labels):
65 | """Convert each label to a one-hot vector."""
66 | n_classes, label_smooth = cfg.MODEL.NUM_CLASSES, cfg.TRAIN.LABEL_SMOOTHING
67 | err_str = "Invalid input to one_hot_vector()"
68 | assert labels.ndim == 1 and labels.max() < n_classes, err_str
69 | shape = (labels.shape[0], n_classes)
70 | neg_val = label_smooth / n_classes
71 | pos_val = 1.0 - label_smooth + neg_val
72 | labels_one_hot = torch.full(shape, neg_val, dtype=torch.float, device=labels.device)
73 | labels_one_hot.scatter_(1, labels.long().view(-1, 1), pos_val)
74 | return labels_one_hot
75 |
76 |
77 | class SoftCrossEntropyLoss(torch.nn.Module):
78 | """SoftCrossEntropyLoss (useful for label smoothing and mixup).
79 | Identical to torch.nn.CrossEntropyLoss if used with one-hot labels."""
80 |
81 | def __init__(self):
82 | super(SoftCrossEntropyLoss, self).__init__()
83 |
84 | def forward(self, x, y):
85 | loss = -y * torch.nn.functional.log_softmax(x, -1)
86 | return torch.sum(loss) / x.shape[0]
87 |
88 |
89 | def mixup(inputs, labels):
90 | """
91 | Apply mixup or cutmix to minibatch depending MIXUP_ALPHA and CUTMIX_ALPHA.
92 | IF MIXUP_ALPHA > 0, applies mixup (https://arxiv.org/abs/1710.09412).
93 | IF CUTMIX_ALPHA > 0, applies cutmix (https://arxiv.org/abs/1905.04899).
94 | If both MIXUP_ALPHA > 0 and CUTMIX_ALPHA > 0, 50-50 chance of which is applied.
95 | """
96 | assert labels.shape[1] == cfg.MODEL.NUM_CLASSES, "mixup labels must be one-hot"
97 | mixup_alpha, cutmix_alpha = cfg.TRAIN.MIXUP_ALPHA, cfg.TRAIN.CUTMIX_ALPHA
98 | mixup_alpha = mixup_alpha if (cutmix_alpha == 0 or np.random.rand() < 0.5) else 0
99 | if mixup_alpha > 0:
100 | m = np.random.beta(mixup_alpha, mixup_alpha)
101 | permutation = torch.randperm(labels.shape[0])
102 | inputs = m * inputs + (1.0 - m) * inputs[permutation, :]
103 | labels = m * labels + (1.0 - m) * labels[permutation, :]
104 | elif cutmix_alpha > 0:
105 | m = np.random.beta(cutmix_alpha, cutmix_alpha)
106 | permutation = torch.randperm(labels.shape[0])
107 | h, w = inputs.shape[2], inputs.shape[3]
108 | w_b, h_b = int(w * np.sqrt(1.0 - m)), int(h * np.sqrt(1.0 - m))
109 | x_c, y_c = np.random.randint(w), np.random.randint(h)
110 | x_0, y_0 = np.clip(x_c - w_b // 2, 0, w), np.clip(y_c - h_b // 2, 0, h)
111 | x_1, y_1 = np.clip(x_c + w_b // 2, 0, w), np.clip(y_c + h_b // 2, 0, h)
112 | m = 1.0 - ((x_1 - x_0) * (y_1 - y_0) / (h * w))
113 | inputs[:, :, y_0:y_1, x_0:x_1] = inputs[permutation, :, y_0:y_1, x_0:x_1]
114 | labels = m * labels + (1.0 - m) * labels[permutation, :]
115 | return inputs, labels, labels.argmax(1)
116 |
117 |
118 | def update_model_ema(model, model_ema, cur_epoch, cur_iter):
119 | """Update exponential moving average (ema) of model weights."""
120 | update_period = cfg.OPTIM.EMA_UPDATE_PERIOD
121 | if update_period == 0 or cur_iter % update_period != 0:
122 | return
123 | # Adjust alpha to be fairly independent of other parameters
124 | adjust = cfg.TRAIN.BATCH_SIZE / cfg.OPTIM.MAX_EPOCH * update_period
125 | alpha = min(1.0, cfg.OPTIM.EMA_ALPHA * adjust)
126 | # During warmup simply copy over weights instead of using ema
127 | alpha = 1.0 if cur_epoch < cfg.OPTIM.WARMUP_EPOCHS else alpha
128 | # Take ema of all parameters (not just named parameters)
129 | params = unwrap_model(model).state_dict()
130 | for name, param in unwrap_model(model_ema).state_dict().items():
131 | param.copy_(param * (1.0 - alpha) + params[name] * alpha)
132 |
--------------------------------------------------------------------------------
/pycls/core/optimizer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Optimizer."""
9 |
10 | import matplotlib.pyplot as plt
11 | import numpy as np
12 | import torch
13 | from pycls.core.config import cfg
14 |
15 |
16 | def construct_optimizer(model):
17 | """Constructs the optimizer.
18 |
19 | Note that the momentum update in PyTorch differs from the one in Caffe2.
20 | In particular,
21 |
22 | Caffe2:
23 | V := mu * V + lr * g
24 | p := p - V
25 |
26 | PyTorch:
27 | V := mu * V + g
28 | p := p - lr * V
29 |
30 | where V is the velocity, mu is the momentum factor, lr is the learning rate,
31 | g is the gradient and p are the parameters.
32 |
33 | Since V is defined independently of the learning rate in PyTorch,
34 | when the learning rate is changed there is no need to perform the
35 | momentum correction by scaling V (unlike in the Caffe2 case).
36 | """
37 | # Split parameters into types and get weight decay for each type
38 | optim, wd, params = cfg.OPTIM, cfg.OPTIM.WEIGHT_DECAY, [[], [], [], []]
39 | for n, p in model.named_parameters():
40 | ks = [k for (k, x) in enumerate(["bn", "ln", "bias", ""]) if x in n]
41 | params[ks[0]].append(p)
42 | wds = [
43 | cfg.BN.CUSTOM_WEIGHT_DECAY if cfg.BN.USE_CUSTOM_WEIGHT_DECAY else wd,
44 | cfg.LN.CUSTOM_WEIGHT_DECAY if cfg.LN.USE_CUSTOM_WEIGHT_DECAY else wd,
45 | optim.BIAS_CUSTOM_WEIGHT_DECAY if optim.BIAS_USE_CUSTOM_WEIGHT_DECAY else wd,
46 | wd,
47 | ]
48 | param_wds = [{"params": p, "weight_decay": w} for (p, w) in zip(params, wds) if p]
49 | # Set up optimizer
50 | if optim.OPTIMIZER == "sgd":
51 | if cfg.OPTIM.MTA:
52 | optimizer_fn = torch.optim._multi_tensor.SGD
53 | else:
54 | optimizer_fn = torch.optim.SGD
55 | return optimizer_fn(
56 | param_wds,
57 | lr=optim.BASE_LR,
58 | momentum=optim.MOMENTUM,
59 | weight_decay=wd,
60 | dampening=optim.DAMPENING,
61 | nesterov=optim.NESTEROV,
62 | )
63 | elif optim.OPTIMIZER == "adam":
64 | if cfg.OPTIM.MTA:
65 | optimizer_fn = torch.optim._multi_tensor.Adam
66 | else:
67 | optimizer_fn = torch.optim.Adam
68 | return optimizer_fn(
69 | param_wds,
70 | lr=optim.BASE_LR,
71 | betas=(optim.BETA1, optim.BETA2),
72 | weight_decay=wd,
73 | )
74 | elif optim.OPTIMIZER == "adamw":
75 | if cfg.OPTIM.MTA:
76 | optimizer_fn = torch.optim._multi_tensor.AdamW
77 | else:
78 | optimizer_fn = torch.optim.AdamW
79 | return optimizer_fn(
80 | param_wds,
81 | lr=optim.BASE_LR,
82 | betas=(optim.BETA1, optim.BETA2),
83 | weight_decay=wd,
84 | )
85 | else:
86 | raise NotImplementedError
87 |
88 |
89 | def lr_fun_steps(cur_epoch):
90 | """Steps schedule (cfg.OPTIM.LR_POLICY = 'steps')."""
91 | ind = [i for i, s in enumerate(cfg.OPTIM.STEPS) if cur_epoch >= s][-1]
92 | return cfg.OPTIM.LR_MULT**ind
93 |
94 |
95 | def lr_fun_exp(cur_epoch):
96 | """Exponential schedule (cfg.OPTIM.LR_POLICY = 'exp')."""
97 | return cfg.OPTIM.MIN_LR ** (cur_epoch / cfg.OPTIM.MAX_EPOCH)
98 |
99 |
100 | def lr_fun_cos(cur_epoch):
101 | """Cosine schedule (cfg.OPTIM.LR_POLICY = 'cos')."""
102 | lr = 0.5 * (1.0 + np.cos(np.pi * cur_epoch / cfg.OPTIM.MAX_EPOCH))
103 | return (1.0 - cfg.OPTIM.MIN_LR) * lr + cfg.OPTIM.MIN_LR
104 |
105 |
106 | def lr_fun_lin(cur_epoch):
107 | """Linear schedule (cfg.OPTIM.LR_POLICY = 'lin')."""
108 | lr = 1.0 - cur_epoch / cfg.OPTIM.MAX_EPOCH
109 | return (1.0 - cfg.OPTIM.MIN_LR) * lr + cfg.OPTIM.MIN_LR
110 |
111 |
112 | def get_lr_fun():
113 | """Retrieves the specified lr policy function"""
114 | lr_fun = "lr_fun_" + cfg.OPTIM.LR_POLICY
115 | assert lr_fun in globals(), "Unknown LR policy: " + cfg.OPTIM.LR_POLICY
116 | err_str = "exp lr policy requires OPTIM.MIN_LR to be greater than 0."
117 | assert cfg.OPTIM.LR_POLICY != "exp" or cfg.OPTIM.MIN_LR > 0, err_str
118 | return globals()[lr_fun]
119 |
120 |
121 | def get_epoch_lr(cur_epoch):
122 | """Retrieves the lr for the given epoch according to the policy."""
123 | # Get lr and scale by by BASE_LR
124 | lr = get_lr_fun()(cur_epoch) * cfg.OPTIM.BASE_LR
125 | # Linear warmup
126 | if cur_epoch < cfg.OPTIM.WARMUP_EPOCHS:
127 | alpha = cur_epoch / cfg.OPTIM.WARMUP_EPOCHS
128 | warmup_factor = cfg.OPTIM.WARMUP_FACTOR * (1.0 - alpha) + alpha
129 | lr *= warmup_factor
130 | return lr
131 |
132 |
133 | def set_lr(optimizer, new_lr):
134 | """Sets the optimizer lr to the specified value."""
135 | for param_group in optimizer.param_groups:
136 | param_group["lr"] = new_lr
137 |
138 |
139 | def plot_lr_fun():
140 | """Visualizes lr function."""
141 | epochs = list(range(cfg.OPTIM.MAX_EPOCH))
142 | lrs = [get_epoch_lr(epoch) for epoch in epochs]
143 | plt.plot(epochs, lrs, ".-")
144 | plt.title("lr_policy: {}".format(cfg.OPTIM.LR_POLICY))
145 | plt.xlabel("epochs")
146 | plt.ylabel("learning rate")
147 | plt.ylim(bottom=0)
148 | plt.show()
149 |
--------------------------------------------------------------------------------
/pycls/core/plotting.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Plotting functions."""
9 |
10 | import colorlover as cl
11 | import matplotlib.pyplot as plt
12 | import plotly.graph_objs as go
13 | import plotly.offline as offline
14 | import pycls.core.logging as logging
15 |
16 |
17 | def get_plot_colors(max_colors, color_format="pyplot"):
18 | """Generate colors for plotting."""
19 | colors = cl.scales["11"]["qual"]["Paired"]
20 | if max_colors > len(colors):
21 | colors = cl.to_rgb(cl.interp(colors, max_colors))
22 | if color_format == "pyplot":
23 | return [[j / 255.0 for j in c] for c in cl.to_numeric(colors)]
24 | return colors
25 |
26 |
27 | def prepare_plot_data(log_files, names, metric="top1_err"):
28 | """Load logs and extract data for plotting error curves."""
29 | plot_data = []
30 | for file, name in zip(log_files, names):
31 | d, data = {}, logging.sort_log_data(logging.load_log_data(file))
32 | for phase in ["train", "test"]:
33 | x = data[phase + "_epoch"]["epoch_ind"]
34 | y = data[phase + "_epoch"][metric]
35 | d["x_" + phase], d["y_" + phase] = x, y
36 | d[phase + "_label"] = "[{:5.2f}] ".format(min(y) if y else 0) + name
37 | plot_data.append(d)
38 | assert len(plot_data) > 0, "No data to plot"
39 | return plot_data
40 |
41 |
42 | def plot_error_curves_plotly(log_files, names, filename, metric="top1_err"):
43 | """Plot error curves using plotly and save to file."""
44 | plot_data = prepare_plot_data(log_files, names, metric)
45 | colors = get_plot_colors(len(plot_data), "plotly")
46 | # Prepare data for plots (3 sets, train duplicated w and w/o legend)
47 | data = []
48 | for i, d in enumerate(plot_data):
49 | s = str(i)
50 | line_train = {"color": colors[i], "dash": "dashdot", "width": 1.5}
51 | line_test = {"color": colors[i], "dash": "solid", "width": 1.5}
52 | data.append(
53 | go.Scatter(
54 | x=d["x_train"],
55 | y=d["y_train"],
56 | mode="lines",
57 | name=d["train_label"],
58 | line=line_train,
59 | legendgroup=s,
60 | visible=True,
61 | showlegend=False,
62 | )
63 | )
64 | data.append(
65 | go.Scatter(
66 | x=d["x_test"],
67 | y=d["y_test"],
68 | mode="lines",
69 | name=d["test_label"],
70 | line=line_test,
71 | legendgroup=s,
72 | visible=True,
73 | showlegend=True,
74 | )
75 | )
76 | data.append(
77 | go.Scatter(
78 | x=d["x_train"],
79 | y=d["y_train"],
80 | mode="lines",
81 | name=d["train_label"],
82 | line=line_train,
83 | legendgroup=s,
84 | visible=False,
85 | showlegend=True,
86 | )
87 | )
88 | # Prepare layout w ability to toggle 'all', 'train', 'test'
89 | titlefont = {"size": 18, "color": "#7f7f7f"}
90 | vis = [[True, True, False], [False, False, True], [False, True, False]]
91 | buttons = zip(["all", "train", "test"], [[{"visible": v}] for v in vis])
92 | buttons = [{"label": b, "args": v, "method": "update"} for b, v in buttons]
93 | layout = go.Layout(
94 | title=metric + " vs. epoch
[dash=train, solid=test]",
95 | xaxis={"title": "epoch", "titlefont": titlefont},
96 | yaxis={"title": metric, "titlefont": titlefont},
97 | showlegend=True,
98 | hoverlabel={"namelength": -1},
99 | updatemenus=[
100 | {
101 | "buttons": buttons,
102 | "direction": "down",
103 | "showactive": True,
104 | "x": 1.02,
105 | "xanchor": "left",
106 | "y": 1.08,
107 | "yanchor": "top",
108 | }
109 | ],
110 | )
111 | # Create plotly plot
112 | offline.plot({"data": data, "layout": layout}, filename=filename)
113 |
114 |
115 | def plot_error_curves_pyplot(log_files, names, filename=None, metric="top1_err"):
116 | """Plot error curves using matplotlib.pyplot and save to file."""
117 | plot_data = prepare_plot_data(log_files, names, metric)
118 | colors = get_plot_colors(len(names))
119 | for ind, d in enumerate(plot_data):
120 | c, lbl = colors[ind], d["test_label"]
121 | plt.plot(d["x_train"], d["y_train"], "--", c=c, alpha=0.8)
122 | plt.plot(d["x_test"], d["y_test"], "-", c=c, alpha=0.8, label=lbl)
123 | plt.title(metric + " vs. epoch\n[dash=train, solid=test]", fontsize=14)
124 | plt.xlabel("epoch", fontsize=14)
125 | plt.ylabel(metric, fontsize=14)
126 | plt.grid(alpha=0.4)
127 | plt.legend()
128 | if filename:
129 | plt.savefig(filename)
130 | plt.clf()
131 | else:
132 | plt.show()
133 |
--------------------------------------------------------------------------------
/pycls/core/timer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Timer."""
9 |
10 | import time
11 |
12 |
13 | class Timer:
14 | """A simple timer (adapted from Detectron)."""
15 |
16 | def __init__(self):
17 | self.total_time = None
18 | self.calls = None
19 | self.start_time = None
20 | self.diff = None
21 | self.average_time = None
22 | self.reset()
23 |
24 | def tic(self):
25 | # using time.time as time.clock does not normalize for multithreading
26 | self.start_time = time.time()
27 |
28 | def toc(self):
29 | self.diff = time.time() - self.start_time
30 | self.total_time += self.diff
31 | self.calls += 1
32 | self.average_time = self.total_time / self.calls
33 |
34 | def reset(self):
35 | self.total_time = 0.0
36 | self.calls = 0
37 | self.start_time = 0.0
38 | self.diff = 0.0
39 | self.average_time = 0.0
40 |
--------------------------------------------------------------------------------
/pycls/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/pycls/5719de641657c251f807c20a1fcf8ef6f4b60144/pycls/datasets/__init__.py
--------------------------------------------------------------------------------
/pycls/datasets/cifar10.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """CIFAR10 dataset."""
9 |
10 | import os
11 | import pickle
12 |
13 | import numpy as np
14 | import pycls.core.logging as logging
15 | import torch.utils.data
16 | from pycls.core.config import cfg
17 | from pycls.core.io import pathmgr
18 |
19 |
20 | logger = logging.get_logger(__name__)
21 |
22 | # Per-channel mean and standard deviation values on CIFAR
23 | _MEAN = [125.3, 123.0, 113.9]
24 | _STD = [63.0, 62.1, 66.7]
25 |
26 |
27 | class Cifar10(torch.utils.data.Dataset):
28 | """CIFAR-10 dataset."""
29 |
30 | def __init__(self, data_path, split):
31 | assert pathmgr.exists(data_path), "Data path '{}' not found".format(data_path)
32 | splits = ["train", "test"]
33 | assert split in splits, "Split '{}' not supported for cifar".format(split)
34 | logger.info("Constructing CIFAR-10 {}...".format(split))
35 | self._data_path, self._split = data_path, split
36 | self._inputs, self._labels = self._load_data()
37 |
38 | def _load_data(self):
39 | """Loads data into memory."""
40 | logger.info("{} data path: {}".format(self._split, self._data_path))
41 | # Compute data batch names
42 | if self._split == "train":
43 | batch_names = ["data_batch_{}".format(i) for i in range(1, 6)]
44 | else:
45 | batch_names = ["test_batch"]
46 | # Load data batches
47 | inputs, labels = [], []
48 | for batch_name in batch_names:
49 | batch_path = os.path.join(self._data_path, batch_name)
50 | with pathmgr.open(batch_path, "rb") as f:
51 | data = pickle.load(f, encoding="bytes")
52 | inputs.append(data[b"data"])
53 | labels += data[b"labels"]
54 | # Combine and reshape the inputs
55 | assert cfg.TRAIN.IM_SIZE == 32, "CIFAR-10 images are 32x32"
56 | inputs = np.vstack(inputs).astype(np.float32)
57 | inputs = inputs.reshape((-1, 3, cfg.TRAIN.IM_SIZE, cfg.TRAIN.IM_SIZE))
58 | return inputs, labels
59 |
60 | def _prepare_im(self, im):
61 | """Prepares the image for network input."""
62 | for i in range(3):
63 | # Perform per-channel normalization on CHW image
64 | im[i] = (im[i] - _MEAN[i]) / _STD[i]
65 | if self._split == "train":
66 | # Randomly flip and crop center patch from CHW image
67 | size = cfg.TRAIN.IM_SIZE
68 | im = im[:, :, ::-1] if np.random.uniform() < 0.5 else im
69 | im = np.pad(im, ((0, 0), (4, 4), (4, 4)), mode="constant")
70 | y = np.random.randint(0, im.shape[1] - size)
71 | x = np.random.randint(0, im.shape[2] - size)
72 | im = im[:, y : (y + size), x : (x + size)]
73 | return im
74 |
75 | def __getitem__(self, index):
76 | im, label = self._inputs[index, ...].copy(), self._labels[index]
77 | im = self._prepare_im(im)
78 | return im, label
79 |
80 | def __len__(self):
81 | return self._inputs.shape[0]
82 |
--------------------------------------------------------------------------------
/pycls/datasets/loader.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Data loader."""
9 |
10 | import os
11 |
12 | import torch
13 | from pycls.core.config import cfg
14 | from pycls.datasets.cifar10 import Cifar10
15 | from pycls.datasets.imagenet import ImageNet, ImageNetFFCV
16 | from torch.utils.data.distributed import DistributedSampler
17 | from torch.utils.data.sampler import RandomSampler
18 |
19 |
20 | try:
21 | from ffcv.loader import Loader, OrderOption
22 | except ImportError:
23 | import pycls.core.logging as logging
24 |
25 | logger = logging.get_logger(__name__)
26 | logger.info("ffcv.loader failed to import")
27 |
28 | # Supported data loaders
29 | FFCV = "ffcv"
30 |
31 | # Supported datasets
32 | _DATASETS = {"cifar10": Cifar10, "imagenet": ImageNet}
33 | _FFCV_DATASETS = {"imagenet": ImageNetFFCV}
34 |
35 | # Default data directory (/path/pycls/pycls/datasets/data)
36 | _DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
37 |
38 | # Relative data paths to default data directory
39 | _PATHS = {"cifar10": "cifar10", "imagenet": "imagenet"}
40 |
41 |
42 | def _construct_loader(dataset_name, split, batch_size, shuffle, drop_last):
43 | """Constructs the data loader for the given dataset."""
44 | err_str = "Dataset '{}' not supported".format(dataset_name)
45 | assert dataset_name in _DATASETS and dataset_name in _PATHS, err_str
46 | # Retrieve the data path for the dataset
47 | data_path = os.path.join(_DATA_DIR, _PATHS[dataset_name])
48 | # Construct the dataset
49 | dataset = _DATASETS[dataset_name](data_path, split)
50 | # Create a sampler for multi-process training
51 | sampler = DistributedSampler(dataset) if cfg.NUM_GPUS > 1 else None
52 | # Create a loader
53 | loader = torch.utils.data.DataLoader(
54 | dataset,
55 | batch_size=batch_size,
56 | shuffle=(False if sampler else shuffle),
57 | sampler=sampler,
58 | num_workers=cfg.DATA_LOADER.NUM_WORKERS,
59 | pin_memory=cfg.DATA_LOADER.PIN_MEMORY,
60 | drop_last=drop_last,
61 | )
62 | return loader
63 |
64 |
65 | def _construct_loader_ffcv(dataset_name, split, batch_size, shuffle, drop_last):
66 | """Constructs the data loader via ffcv for the given dataset."""
67 | err_str = "Dataset '{}' not supported".format(dataset_name)
68 | assert dataset_name in _DATASETS and dataset_name in _PATHS, err_str
69 | # Retrieve the data path for the dataset
70 | data_path = os.path.join(_DATA_DIR, "..", "ffcv", _PATHS[dataset_name])
71 | # Construct the dataset
72 | dataset = _FFCV_DATASETS[dataset_name](data_path, split)
73 | # Create a loader
74 | dataset.construct_ffcv()
75 | loader = Loader(
76 | dataset.split_path,
77 | batch_size=batch_size,
78 | num_workers=cfg.DATA_LOADER.NUM_WORKERS,
79 | order=OrderOption.RANDOM if shuffle else OrderOption.SEQUENTIAL,
80 | os_cache=dataset.os_cache,
81 | drop_last=drop_last,
82 | pipelines=dataset.pipelines,
83 | distributed=dataset.distributed,
84 | )
85 | return loader
86 |
87 |
88 | def construct_train_loader():
89 | """Train loader wrapper."""
90 | if cfg.DATA_LOADER.MODE == "ffcv":
91 | return _construct_loader_ffcv(
92 | dataset_name=cfg.TRAIN.DATASET,
93 | split=cfg.TRAIN.SPLIT,
94 | batch_size=int(cfg.TRAIN.BATCH_SIZE / cfg.NUM_GPUS),
95 | shuffle=True,
96 | drop_last=True,
97 | )
98 | return _construct_loader(
99 | dataset_name=cfg.TRAIN.DATASET,
100 | split=cfg.TRAIN.SPLIT,
101 | batch_size=int(cfg.TRAIN.BATCH_SIZE / cfg.NUM_GPUS),
102 | shuffle=True,
103 | drop_last=True,
104 | )
105 |
106 |
107 | def construct_test_loader():
108 | """Test loader wrapper."""
109 | if cfg.DATA_LOADER.MODE == "ffcv":
110 | return _construct_loader_ffcv(
111 | dataset_name=cfg.TEST.DATASET,
112 | split=cfg.TEST.SPLIT,
113 | batch_size=int(cfg.TEST.BATCH_SIZE / cfg.NUM_GPUS),
114 | shuffle=False,
115 | drop_last=False,
116 | )
117 | return _construct_loader(
118 | dataset_name=cfg.TEST.DATASET,
119 | split=cfg.TEST.SPLIT,
120 | batch_size=int(cfg.TEST.BATCH_SIZE / cfg.NUM_GPUS),
121 | shuffle=False,
122 | drop_last=False,
123 | )
124 |
125 |
126 | def shuffle(loader, cur_epoch):
127 | """ "Shuffles the data."""
128 | err_str = "Sampler type '{}' not supported".format(type(loader.sampler))
129 | assert isinstance(loader.sampler, (RandomSampler, DistributedSampler)), err_str
130 | # RandomSampler handles shuffling automatically
131 | if isinstance(loader.sampler, DistributedSampler):
132 | # DistributedSampler shuffles data based on epoch
133 | loader.sampler.set_epoch(cur_epoch)
134 |
--------------------------------------------------------------------------------
/pycls/datasets/transforms.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Image transformations on HWC float images with RGB channel order."""
9 |
10 | from math import ceil, sqrt
11 |
12 | import cv2
13 | import numpy as np
14 | from PIL import Image
15 | from pycls.datasets.augment import make_augment
16 |
17 |
18 | def scale_and_center_crop(im, scale_size, crop_size):
19 | """Performs scaling and center cropping (used for testing)."""
20 | h, w = im.shape[:2]
21 | if w < h and w != scale_size:
22 | w, h = scale_size, int(h / w * scale_size)
23 | im = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
24 | elif h <= w and h != scale_size:
25 | w, h = int(w / h * scale_size), scale_size
26 | im = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
27 | x = ceil((w - crop_size) / 2)
28 | y = ceil((h - crop_size) / 2)
29 | return im[y : (y + crop_size), x : (x + crop_size), :]
30 |
31 |
32 | def random_sized_crop(im, size, area_frac=0.08, max_iter=10):
33 | """Performs Inception-style cropping (used for training)."""
34 | h, w = im.shape[:2]
35 | area = h * w
36 | for _ in range(max_iter):
37 | target_area = np.random.uniform(area_frac, 1.0) * area
38 | aspect_ratio = np.random.uniform(3.0 / 4.0, 4.0 / 3.0)
39 | w_crop = round(sqrt(target_area * aspect_ratio))
40 | h_crop = round(sqrt(target_area / aspect_ratio))
41 | if np.random.uniform() < 0.5:
42 | w_crop, h_crop = h_crop, w_crop
43 | if h_crop <= h and w_crop <= w:
44 | y = 0 if h_crop == h else np.random.randint(0, h - h_crop)
45 | x = 0 if w_crop == w else np.random.randint(0, w - w_crop)
46 | im = im[y : (y + h_crop), x : (x + w_crop), :]
47 | return cv2.resize(im, (size, size), interpolation=cv2.INTER_LINEAR)
48 | return scale_and_center_crop(im, size, size)
49 |
50 |
51 | def horizontal_flip(im, prob=0.5):
52 | """Performs horizontal flip (used for training)."""
53 | return im[:, ::-1, :] if np.random.uniform() < prob else im
54 |
55 |
56 | def augment(im, augment_str):
57 | """Augments image (used for training)."""
58 | if augment_str:
59 | im = Image.fromarray((im * 255).astype(np.uint8))
60 | im = make_augment(augment_str)(im)
61 | im = np.asarray(im).astype(np.float32) / 255
62 | return im
63 |
64 |
65 | def lighting(im, alpha_std, eig_val, eig_vec):
66 | """Performs AlexNet-style PCA jitter (used for training)."""
67 | alpha = np.random.normal(0, alpha_std, size=(1, 3))
68 | alpha = np.repeat(alpha, 3, axis=0)
69 | eig_val = np.repeat(np.array(eig_val), 3, axis=0)
70 | rgb = np.sum(np.array(eig_vec) * alpha * eig_val, axis=1)
71 | for i in range(3):
72 | im[:, :, i] = im[:, :, i] + rgb[i]
73 | return im
74 |
75 |
76 | def color_norm(im, mean, std):
77 | """Performs per-channel normalization (used for training and testing)."""
78 | for i in range(3):
79 | im[:, :, i] = (im[:, :, i] - mean[i]) / std[i]
80 | return im
81 |
--------------------------------------------------------------------------------
/pycls/models/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Expose model zoo constructors."""
9 |
10 | from pycls.models.model_zoo import effnet, regnetx, regnety, resnet, resnext
11 |
--------------------------------------------------------------------------------
/pycls/models/effnet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """EfficientNet models."""
9 |
10 | from pycls.core.config import cfg
11 | from pycls.models.blocks import (
12 | activation,
13 | conv2d,
14 | conv2d_cx,
15 | drop_connect,
16 | gap2d,
17 | gap2d_cx,
18 | init_weights,
19 | linear,
20 | linear_cx,
21 | norm2d,
22 | norm2d_cx,
23 | SE,
24 | )
25 | from torch.nn import Dropout, Module
26 |
27 |
28 | class EffHead(Module):
29 | """EfficientNet head: 1x1, BN, AF, AvgPool, Dropout, FC."""
30 |
31 | def __init__(self, w_in, w_out, num_classes):
32 | super(EffHead, self).__init__()
33 | dropout_ratio = cfg.EN.DROPOUT_RATIO
34 | self.conv = conv2d(w_in, w_out, 1)
35 | self.conv_bn = norm2d(w_out)
36 | self.conv_af = activation()
37 | self.avg_pool = gap2d(w_out)
38 | self.dropout = Dropout(p=dropout_ratio) if dropout_ratio > 0 else None
39 | self.fc = linear(w_out, num_classes, bias=True)
40 |
41 | def forward(self, x):
42 | x = self.conv_af(self.conv_bn(self.conv(x)))
43 | x = self.avg_pool(x)
44 | x = x.view(x.size(0), -1)
45 | x = self.dropout(x) if self.dropout else x
46 | x = self.fc(x)
47 | return x
48 |
49 | @staticmethod
50 | def complexity(cx, w_in, w_out, num_classes):
51 | cx = conv2d_cx(cx, w_in, w_out, 1)
52 | cx = norm2d_cx(cx, w_out)
53 | cx = gap2d_cx(cx, w_out)
54 | cx = linear_cx(cx, w_out, num_classes, bias=True)
55 | return cx
56 |
57 |
58 | class MBConv(Module):
59 | """Mobile inverted bottleneck block with SE."""
60 |
61 | def __init__(self, w_in, exp_r, k, stride, se_r, w_out):
62 | # Expansion, kxk dwise, BN, AF, SE, 1x1, BN, skip_connection
63 | super(MBConv, self).__init__()
64 | self.exp = None
65 | w_exp = int(w_in * exp_r)
66 | if w_exp != w_in:
67 | self.exp = conv2d(w_in, w_exp, 1)
68 | self.exp_bn = norm2d(w_exp)
69 | self.exp_af = activation()
70 | self.dwise = conv2d(w_exp, w_exp, k, stride=stride, groups=w_exp)
71 | self.dwise_bn = norm2d(w_exp)
72 | self.dwise_af = activation()
73 | self.se = SE(w_exp, int(w_in * se_r))
74 | self.lin_proj = conv2d(w_exp, w_out, 1)
75 | self.lin_proj_bn = norm2d(w_out)
76 | self.has_skip = stride == 1 and w_in == w_out
77 |
78 | def forward(self, x):
79 | f_x = self.exp_af(self.exp_bn(self.exp(x))) if self.exp else x
80 | f_x = self.dwise_af(self.dwise_bn(self.dwise(f_x)))
81 | f_x = self.se(f_x)
82 | f_x = self.lin_proj_bn(self.lin_proj(f_x))
83 | if self.has_skip:
84 | if self.training and cfg.EN.DC_RATIO > 0.0:
85 | f_x = drop_connect(f_x, cfg.EN.DC_RATIO)
86 | f_x = x + f_x
87 | return f_x
88 |
89 | @staticmethod
90 | def complexity(cx, w_in, exp_r, k, stride, se_r, w_out):
91 | w_exp = int(w_in * exp_r)
92 | if w_exp != w_in:
93 | cx = conv2d_cx(cx, w_in, w_exp, 1)
94 | cx = norm2d_cx(cx, w_exp)
95 | cx = conv2d_cx(cx, w_exp, w_exp, k, stride=stride, groups=w_exp)
96 | cx = norm2d_cx(cx, w_exp)
97 | cx = SE.complexity(cx, w_exp, int(w_in * se_r))
98 | cx = conv2d_cx(cx, w_exp, w_out, 1)
99 | cx = norm2d_cx(cx, w_out)
100 | return cx
101 |
102 |
103 | class EffStage(Module):
104 | """EfficientNet stage."""
105 |
106 | def __init__(self, w_in, exp_r, k, stride, se_r, w_out, d):
107 | super(EffStage, self).__init__()
108 | for i in range(d):
109 | block = MBConv(w_in, exp_r, k, stride, se_r, w_out)
110 | self.add_module("b{}".format(i + 1), block)
111 | stride, w_in = 1, w_out
112 |
113 | def forward(self, x):
114 | for block in self.children():
115 | x = block(x)
116 | return x
117 |
118 | @staticmethod
119 | def complexity(cx, w_in, exp_r, k, stride, se_r, w_out, d):
120 | for _ in range(d):
121 | cx = MBConv.complexity(cx, w_in, exp_r, k, stride, se_r, w_out)
122 | stride, w_in = 1, w_out
123 | return cx
124 |
125 |
126 | class StemIN(Module):
127 | """EfficientNet stem for ImageNet: 3x3, BN, AF."""
128 |
129 | def __init__(self, w_in, w_out):
130 | super(StemIN, self).__init__()
131 | self.conv = conv2d(w_in, w_out, 3, stride=2)
132 | self.bn = norm2d(w_out)
133 | self.af = activation()
134 |
135 | def forward(self, x):
136 | for layer in self.children():
137 | x = layer(x)
138 | return x
139 |
140 | @staticmethod
141 | def complexity(cx, w_in, w_out):
142 | cx = conv2d_cx(cx, w_in, w_out, 3, stride=2)
143 | cx = norm2d_cx(cx, w_out)
144 | return cx
145 |
146 |
147 | class EffNet(Module):
148 | """EfficientNet model."""
149 |
150 | @staticmethod
151 | def get_params():
152 | return {
153 | "sw": cfg.EN.STEM_W,
154 | "ds": cfg.EN.DEPTHS,
155 | "ws": cfg.EN.WIDTHS,
156 | "exp_rs": cfg.EN.EXP_RATIOS,
157 | "se_r": cfg.EN.SE_R,
158 | "ss": cfg.EN.STRIDES,
159 | "ks": cfg.EN.KERNELS,
160 | "hw": cfg.EN.HEAD_W,
161 | "nc": cfg.MODEL.NUM_CLASSES,
162 | }
163 |
164 | def __init__(self, params=None):
165 | super(EffNet, self).__init__()
166 | p = EffNet.get_params() if not params else params
167 | vs = ["sw", "ds", "ws", "exp_rs", "se_r", "ss", "ks", "hw", "nc"]
168 | sw, ds, ws, exp_rs, se_r, ss, ks, hw, nc = [p[v] for v in vs]
169 | stage_params = list(zip(ds, ws, exp_rs, ss, ks))
170 | self.stem = StemIN(3, sw)
171 | prev_w = sw
172 | for i, (d, w, exp_r, stride, k) in enumerate(stage_params):
173 | stage = EffStage(prev_w, exp_r, k, stride, se_r, w, d)
174 | self.add_module("s{}".format(i + 1), stage)
175 | prev_w = w
176 | self.head = EffHead(prev_w, hw, nc)
177 | self.apply(init_weights)
178 |
179 | def forward(self, x):
180 | for module in self.children():
181 | x = module(x)
182 | return x
183 |
184 | @staticmethod
185 | def complexity(cx, params=None):
186 | """Computes model complexity (if you alter the model, make sure to update)."""
187 | p = EffNet.get_params() if not params else params
188 | vs = ["sw", "ds", "ws", "exp_rs", "se_r", "ss", "ks", "hw", "nc"]
189 | sw, ds, ws, exp_rs, se_r, ss, ks, hw, nc = [p[v] for v in vs]
190 | stage_params = list(zip(ds, ws, exp_rs, ss, ks))
191 | cx = StemIN.complexity(cx, 3, sw)
192 | prev_w = sw
193 | for d, w, exp_r, stride, k in stage_params:
194 | cx = EffStage.complexity(cx, prev_w, exp_r, k, stride, se_r, w, d)
195 | prev_w = w
196 | cx = EffHead.complexity(cx, prev_w, hw, nc)
197 | return cx
198 |
--------------------------------------------------------------------------------
/pycls/models/regnet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """RegNet models."""
9 |
10 | import numpy as np
11 | import pycls.models.blocks as bk
12 | from pycls.core.config import cfg
13 | from pycls.models.anynet import AnyNet
14 |
15 |
16 | def generate_regnet(w_a, w_0, w_m, d, q=8):
17 | """Generates per stage widths and depths from RegNet parameters."""
18 | assert w_a >= 0 and w_0 > 0 and w_m > 1 and w_0 % q == 0
19 | # Generate continuous per-block ws
20 | ws_cont = np.arange(d) * w_a + w_0
21 | # Generate quantized per-block ws
22 | ks = np.round(np.log(ws_cont / w_0) / np.log(w_m))
23 | ws_all = w_0 * np.power(w_m, ks)
24 | ws_all = np.round(np.divide(ws_all, q)).astype(int) * q
25 | # Generate per stage ws and ds (assumes ws_all are sorted)
26 | ws, ds = np.unique(ws_all, return_counts=True)
27 | # Compute number of actual stages and total possible stages
28 | num_stages, total_stages = len(ws), ks.max() + 1
29 | # Convert numpy arrays to lists and return
30 | ws, ds, ws_all, ws_cont = (x.tolist() for x in (ws, ds, ws_all, ws_cont))
31 | return ws, ds, num_stages, total_stages, ws_all, ws_cont
32 |
33 |
34 | def generate_regnet_full():
35 | """Generates per stage ws, ds, gs, bs, and ss from RegNet cfg."""
36 | w_a, w_0, w_m, d = cfg.REGNET.WA, cfg.REGNET.W0, cfg.REGNET.WM, cfg.REGNET.DEPTH
37 | ws, ds = generate_regnet(w_a, w_0, w_m, d)[0:2]
38 | ss = [cfg.REGNET.STRIDE for _ in ws]
39 | bs = [cfg.REGNET.BOT_MUL for _ in ws]
40 | gs = [cfg.REGNET.GROUP_W for _ in ws]
41 | ws, bs, gs = bk.adjust_block_compatibility(ws, bs, gs)
42 | return ws, ds, ss, bs, gs
43 |
44 |
45 | def regnet_cfg_to_anynet_cfg():
46 | """Convert RegNet cfg to AnyNet cfg format (note: alters global cfg)."""
47 | assert cfg.MODEL.TYPE == "regnet"
48 | ws, ds, ss, bs, gs = generate_regnet_full()
49 | cfg.MODEL.TYPE = "anynet"
50 | cfg.ANYNET.STEM_TYPE = cfg.REGNET.STEM_TYPE
51 | cfg.ANYNET.STEM_W = cfg.REGNET.STEM_W
52 | cfg.ANYNET.BLOCK_TYPE = cfg.REGNET.BLOCK_TYPE
53 | cfg.ANYNET.DEPTHS = ds
54 | cfg.ANYNET.WIDTHS = ws
55 | cfg.ANYNET.STRIDES = ss
56 | cfg.ANYNET.BOT_MULS = bs
57 | cfg.ANYNET.GROUP_WS = gs
58 | cfg.ANYNET.HEAD_W = cfg.REGNET.HEAD_W
59 | cfg.ANYNET.SE_ON = cfg.REGNET.SE_ON
60 | cfg.ANYNET.SE_R = cfg.REGNET.SE_R
61 |
62 |
63 | class RegNet(AnyNet):
64 | """RegNet model."""
65 |
66 | @staticmethod
67 | def get_params():
68 | """Get AnyNet parameters that correspond to the RegNet."""
69 | ws, ds, ss, bs, gs = generate_regnet_full()
70 | return {
71 | "stem_type": cfg.REGNET.STEM_TYPE,
72 | "stem_w": cfg.REGNET.STEM_W,
73 | "block_type": cfg.REGNET.BLOCK_TYPE,
74 | "depths": ds,
75 | "widths": ws,
76 | "strides": ss,
77 | "bot_muls": bs,
78 | "group_ws": gs,
79 | "head_w": cfg.REGNET.HEAD_W,
80 | "se_r": cfg.REGNET.SE_R if cfg.REGNET.SE_ON else 0,
81 | "num_classes": cfg.MODEL.NUM_CLASSES,
82 | }
83 |
84 | def __init__(self):
85 | params = RegNet.get_params()
86 | super(RegNet, self).__init__(params)
87 |
88 | @staticmethod
89 | def complexity(cx, params=None):
90 | """Computes model complexity (if you alter the model, make sure to update)."""
91 | params = RegNet.get_params() if not params else params
92 | return AnyNet.complexity(cx, params)
93 |
--------------------------------------------------------------------------------
/pycls/models/scaler.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Model scaler for scaling strategies in https://arxiv.org/abs/2103.06877."""
9 |
10 | from math import isclose
11 |
12 | import pycls.models.regnet as regnet
13 | from pycls.core.config import cfg
14 | from pycls.models.blocks import adjust_block_compatibility
15 |
16 |
17 | def scaling_factors(scale_type, scale_factor):
18 | """
19 | Computes model scaling factors to allow for scaling along d, w, g, r.
20 |
21 | Compute scaling factors such that d * w * w * r * r == scale_factor.
22 | Here d is depth, w is width, g is groups, and r is resolution.
23 | Note that scaling along g is handled in a special manner (see paper or code).
24 |
25 | Examples of scale_type include "d", "dw", "d1_w2", and "d1_w2_g2_r0".
26 | A scale_type of the form "dw" is equivalent to "d1_w1_g0_r0". The scalar value
27 | after each scaling dimensions gives the relative scaling along that dimension.
28 | For example, "d1_w2" indicates to scale twice more along width than depth.
29 | Finally, scale_factor indicates the absolute amount of scaling.
30 |
31 | The "fast compound scaling" strategy from the paper is specified via "d1_w8_g8_r1".
32 | """
33 | if all(s in "dwgr" for s in scale_type):
34 | weights = {s: 1.0 if s in scale_type else 0.0 for s in "dwgr"}
35 | else:
36 | weights = {sw[0]: float(sw[1::]) for sw in scale_type.split("_")}
37 | weights = {**{s: 0.0 for s in "dwgr"}, **weights}
38 | assert all(s in "dwgr" for s in weights.keys()), scale_type
39 | sum_weights = weights["d"] + weights["w"] + weights["r"] or weights["g"] / 2 or 1.0
40 | d = scale_factor ** (weights["d"] / sum_weights)
41 | w = scale_factor ** (weights["w"] / sum_weights / 2.0)
42 | g = scale_factor ** (weights["g"] / sum_weights / 2.0)
43 | r = scale_factor ** (weights["r"] / sum_weights / 2.0)
44 | s_actual = d * w * w * r * r
45 | assert d == w == r == 1.0 or isclose(s_actual, scale_factor, rel_tol=0.01)
46 | return d, w, g, r
47 |
48 |
49 | def scale_model():
50 | """
51 | Scale model blocks by the specified type and amount (note: alters global cfg).
52 |
53 | Scale a model using scaling strategies from "Fast and Accurate Model Scaling".
54 | For reference on scaling strategies, see: https://arxiv.org/abs/2103.06877.
55 | For example usage, see GETTING_STARTED, MODEL SCALING section.
56 |
57 | The actual scaling is specified by MODEL.SCALING_TYPE and MODEL.SCALING_FACTOR.
58 | For example, SCALING_TYPE of "d1_w8_g8_r1" is fast compound scaling and is the
59 | likely best default option, and SCALING_FACTOR indicates the scaling amount.
60 | For further details on controlling the scaling, see comments for scaling_factors().
61 |
62 | Note that the scaler must be employed on a standalone config outside of the main
63 | training loop. This is because it alters the global config, which is typically
64 | frozen during training. So one should use this function to generate a new config and
65 | save it to a file, and then evoke training separately on the new config.
66 | """
67 | assert cfg.MODEL.TYPE in ["anynet", "effnet", "regnet"]
68 | # Get scaling factors
69 | scale_type, scale = cfg.MODEL.SCALING_TYPE, cfg.MODEL.SCALING_FACTOR
70 | d_scale, w_scale, g_scale, r_scale = scaling_factors(scale_type, scale)
71 | if cfg.MODEL.TYPE == "regnet":
72 | # Convert a RegNet to an AnyNet prior to scaling
73 | regnet.regnet_cfg_to_anynet_cfg()
74 | if cfg.MODEL.TYPE == "anynet":
75 | # Scale AnyNet
76 | an = cfg.ANYNET
77 | ds, ws, bs, gs = an.DEPTHS, an.WIDTHS, an.BOT_MULS, an.GROUP_WS
78 | bs = bs if bs else [1] * len(ds)
79 | gs = gs if gs else [1] * len(ds)
80 | ds = [max(1, round(d * d_scale)) for d in ds]
81 | ws = [max(1, round(w * w_scale / 8)) * 8 for w in ws]
82 | gs = [max(1, round(g * g_scale)) for g in gs]
83 | gs = [g if g <= 2 else 4 if g <= 5 else round(g / 8) * 8 for g in gs]
84 | ws, bs, gs = adjust_block_compatibility(ws, bs, gs)
85 | an.DEPTHS, an.WIDTHS, an.BOT_MULS, an.GROUP_WS = ds, ws, bs, gs
86 | elif cfg.MODEL.TYPE == "effnet":
87 | # Scale EfficientNet
88 | en = cfg.EN
89 | ds, ws, bs, sw, hw = en.DEPTHS, en.WIDTHS, en.EXP_RATIOS, en.STEM_W, en.HEAD_W
90 | ds = [max(1, round(d * d_scale)) for d in ds]
91 | ws = [max(1, round(w * w_scale / 8)) * 8 for w in ws]
92 | sw = max(1, round(sw * w_scale / 8)) * 8
93 | hw = max(1, round(hw * w_scale / 8)) * 8
94 | ws, bs, _ = adjust_block_compatibility(ws, bs, [1] * len(ds))
95 | en.DEPTHS, en.WIDTHS, en.EXP_RATIOS, en.STEM_W, en.HEAD_W = ds, ws, bs, sw, hw
96 | # Scale image resolution
97 | cfg.TRAIN.IM_SIZE = round(cfg.TRAIN.IM_SIZE * r_scale / 4) * 4
98 | cfg.TEST.IM_SIZE = round(cfg.TEST.IM_SIZE * r_scale / 4) * 4
99 |
--------------------------------------------------------------------------------
/pycls/sweep/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/pycls/5719de641657c251f807c20a1fcf8ef6f4b60144/pycls/sweep/__init__.py
--------------------------------------------------------------------------------
/pycls/sweep/analysis.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 |
9 | """Sweep analysis functions."""
10 |
11 | import json
12 | from functools import reduce
13 | from operator import getitem
14 |
15 | import numpy as np
16 |
17 |
18 | def load_sweep(sweep_file):
19 | """Loads sweep data from a file."""
20 | with open(sweep_file, "r") as f:
21 | sweep = json.load(f)
22 | sweep = [data for data in sweep if "test_ema_epoch" in data]
23 | augment_sweep(sweep)
24 | return sweep
25 |
26 |
27 | def augment_sweep(sweep):
28 | """Augments sweep data with fields useful for analysis."""
29 | # Augment data with "aug" field
30 | for data in sweep:
31 | data["aug"] = {}
32 | # Augment with "aug.lr_wd" field = log(lr) + log(wd) = log(lr * wd)
33 | lrs, wds = get_vals(sweep, "lr"), get_vals(sweep, "wd")
34 | for data, lr, wd in zip(sweep, lrs, wds):
35 | data["aug"]["lr_wd"] = lr + wd
36 | # Augment with "aug.done" field
37 | epoch_ind = get_vals(sweep, "test_epoch.epoch_ind")
38 | epoch_max = get_vals(sweep, "test_epoch.epoch_max")
39 | for data, i, m in zip(sweep, epoch_ind, epoch_max):
40 | data["aug"]["done"] = i[-1] / m[-1]
41 | # Augment with "ema_gain"
42 | errors = get_vals(sweep, "test_epoch.min_top1_err")
43 | errors_ema = get_vals(sweep, "test_ema_epoch.min_top1_err")
44 | for data, error, error_ema in zip(sweep, errors, errors_ema):
45 | data["aug"]["ema_gain"] = max(0, min(error) - min(error_ema))
46 |
47 |
48 | def sort_sweep(sweep, metric, reverse=False):
49 | """Sorts sweep by any metric (including non scalar metrics)."""
50 | keys = get_vals(sweep, metric)
51 | keys = [k if np.isscalar(k) else json.dumps(k, sort_keys=True) for k in keys]
52 | keys, sweep = zip(*sorted(zip(keys, sweep), key=lambda k: k[0], reverse=reverse))
53 | return sweep, keys
54 |
55 |
56 | def describe_sweep(sweep, reverse=False):
57 | """Generate a string description of sweep."""
58 | keys = ["error_ema", "error_tst", "done", "log_file", "cfg.DESC"]
59 | formats = ["ema={:.2f}", "err={:.2f}", "done={:.2f}", "{}", "{}"]
60 | vals = [get_vals(sweep, key) for key in keys]
61 | vals[3] = [v.split("/")[-2] for v in vals[3]]
62 | desc = [" | ".join(formats).format(*val) for val in zip(*vals)]
63 | desc = [s for _, s in sorted(zip(vals[0], desc), reverse=reverse)]
64 | return "\n".join(desc)
65 |
66 |
67 | metrics_info = {
68 | # Each metric has the form [compound_key, label, transform]
69 | "error": ["test_ema_epoch.min_top1_err", "", min],
70 | "error_ema": ["test_ema_epoch.min_top1_err", "", min],
71 | "error_tst": ["test_epoch.min_top1_err", "", min],
72 | "done": ["aug.done", "fraction done", None],
73 | "epochs": ["cfg.OPTIM.MAX_EPOCH", "epochs", None],
74 | # Complexity metrics
75 | "flops": ["complexity.flops", "flops (B)", lambda v: v / 1e9],
76 | "params": ["complexity.params", "params (M)", lambda v: v / 1e6],
77 | "acts": ["complexity.acts", "activations (M)", lambda v: v / 1e6],
78 | "memory": ["train_epoch.mem", "memory (GB)", lambda v: max(v) / 1e3],
79 | "resolution": ["cfg.TRAIN.IM_SIZE", "resolution", None],
80 | "epoch_fw_bw": ["epoch_times.train_fw_bw_time", "epoch fw_bw time (s)", None],
81 | "epoch_time": ["train_epoch.time_epoch", "epoch total time (s)", np.mean],
82 | "batch_size": ["cfg.TRAIN.BATCH_SIZE", "batch size", None],
83 | # Regnet metrics
84 | "regnet_depth": ["cfg.REGNET.DEPTH", "depth", None],
85 | "regnet_w0": ["cfg.REGNET.W0", "w0", None],
86 | "regnet_wa": ["cfg.REGNET.WA", "wa", None],
87 | "regnet_wm": ["cfg.REGNET.WM", "wm", None],
88 | "regnet_gw": ["cfg.REGNET.GROUP_W", "gw", None],
89 | "regnet_bm": ["cfg.REGNET.BOT_MUL", "bm", None],
90 | # Anynet metrics
91 | "anynet_ds": ["cfg.ANYNET.DEPTHS", "ds", None],
92 | "anynet_ws": ["cfg.ANYNET.WIDTHS", "ws", None],
93 | "anynet_gs": ["cfg.ANYNET.GROUP_WS", "gs", None],
94 | "anynet_bs": ["cfg.ANYNET.BOT_MULS", "bs", None],
95 | "anynet_d": ["cfg.ANYNET.DEPTHS", "d", sum],
96 | "anynet_w": ["cfg.ANYNET.WIDTHS", "w", max],
97 | "anynet_g": ["cfg.ANYNET.GROUP_WS", "g", max],
98 | "anynet_b": ["cfg.ANYNET.BOT_MULS", "b", max],
99 | # Effnet metrics
100 | "effnet_ds": ["cfg.EN.DEPTHS", "ds", None],
101 | "effnet_ws": ["cfg.EN.WIDTHS", "ws", None],
102 | "effnet_ss": ["cfg.EN.STRIDES", "ss", None],
103 | "effnet_bs": ["cfg.EN.EXP_RATIOS", "bs", None],
104 | "effnet_d": ["cfg.EN.DEPTHS", "d", sum],
105 | "effnet_w": ["cfg.EN.WIDTHS", "w", max],
106 | # Optimization metrics
107 | "lr": ["cfg.OPTIM.BASE_LR", r"log$_{10}(lr)$", np.log10],
108 | "min_lr": ["cfg.OPTIM.MIN_LR", r"min_lr", None],
109 | "wd": ["cfg.OPTIM.WEIGHT_DECAY", r"log$_{10}(wd)$", np.log10],
110 | "lr_wd": ["aug.lr_wd", r"log$_{10}(lr \cdot wd)$", None],
111 | "bn_wd": ["cfg.BN.CUSTOM_WEIGHT_DECAY", r"log$_{10}$(bn_wd)", np.log10],
112 | "momentum": ["cfg.OPTIM.MOMENTUM", "", None],
113 | "ema_alpha": ["cfg.OPTIM.EMA_ALPHA", r"log$_{10}$(ema_alpha)", np.log10],
114 | "ema_beta": ["cfg.OPTIM.EMA_BETA", r"log$_{10}$(ema_beta)", np.log10],
115 | "ema_update": ["cfg.OPTIM.EMA_UPDATE_PERIOD", r"log$_{10}$(ema_update)", np.log2],
116 | }
117 |
118 |
119 | def get_info(metric):
120 | """Returns [compound_key, label, transform] for metric."""
121 | info = metrics_info[metric] if metric in metrics_info else [metric, metric, None]
122 | info[1] = info[1] if info[1] else metric
123 | return info
124 |
125 |
126 | def get_vals(sweep, metric):
127 | """Gets values for given metric (transformed if metric transform is specified)."""
128 | compound_key, _, transform = get_info(metric)
129 | metric_keys = compound_key.split(".")
130 | vals = [reduce(getitem, metric_keys, data) for data in sweep]
131 | vals = [transform(v) for v in vals] if transform else vals
132 | return vals
133 |
134 |
135 | def get_filters(sweep, metrics, alpha=5, sample=0.25, b=2500):
136 | """Use empirical bootstrap to estimate filter ranges per metric for good errors."""
137 | assert len(sweep), "Sweep cannot be empty."
138 | errs = np.array(get_vals(sweep, "error"))
139 | n, b, filters = len(errs), int(b), {}
140 | percentiles = [alpha / 2, 50, 100 - alpha / 2]
141 | n_sample = int(sample) if sample > 1 else max(1, int(n * sample))
142 | samples = [np.random.choice(n, n_sample) for _ in range(b)]
143 | samples = [s[np.argmin(errs[s])] for s in samples]
144 | for metric in metrics:
145 | vals = np.array(get_vals(sweep, metric))
146 | vals = [vals[s] for s in samples]
147 | v_min, v_med, v_max = tuple(np.percentile(vals, percentiles))
148 | filters[metric] = [v_min, v_med, v_max]
149 | return filters
150 |
151 |
152 | def apply_filters(sweep, filters):
153 | """Filter sweep according to dict of filters of form {metric: [min, med, max]}."""
154 | filters = filters if filters else {}
155 | for metric, (v_min, _, v_max) in filters.items():
156 | keep = [v_min <= v <= v_max for v in get_vals(sweep, metric)]
157 | sweep = [data for k, data in zip(keep, sweep) if k]
158 | return sweep
159 |
--------------------------------------------------------------------------------
/pycls/sweep/htmlbook.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Htmlbook - Piotr's lightweight alternative to notebooks."""
9 |
10 | import base64
11 | from io import BytesIO
12 |
13 | import matplotlib.pyplot as plt
14 | from yattag import Doc, indent
15 |
16 |
17 | class Htmlbook:
18 | """An Htmlbook is used to generate an html page from text and matplotlib figures."""
19 |
20 | def __init__(self, title):
21 | """Initializes Htmlbook with a given title."""
22 | # The doc is used for the body of the document
23 | self.doc, self.tag, self.text, self.line = Doc().ttl()
24 | # The top_doc is used for the title and table of contents
25 | self.top_doc, self.top_tag, self.top_text, self.top_line = Doc().ttl()
26 | # Add link anchor and title to the top_doc
27 | self.top_line("a", "", name="top")
28 | self.top_line("h1", title)
29 | self.section_counter = 1
30 |
31 | def add_section(self, name):
32 | """Adds a section to the Htmlbook (also updates table of contents)."""
33 | anchor = "section{:03d}".format(self.section_counter)
34 | name = str(self.section_counter) + " " + name
35 | anchor_style = "text-decoration: none;"
36 | self.section_counter += 1
37 | # Add section to main text
38 | self.doc.stag("br")
39 | self.doc.stag("hr", style="border: 2px solid")
40 | with self.tag("h3"):
41 | self.line("a", "", name=anchor)
42 | self.text(name + " ")
43 | self.line("a", "[top]", href="#top", style=anchor_style)
44 | # Add section to table of contents
45 | self.top_line("a", name, href="#" + anchor, style=anchor_style)
46 | self.top_doc.stag("br")
47 |
48 | def add_plot(self, matplotlib_figure, ext="svg", **kwargs):
49 | """Adds a matplotlib figure embedded directly into the html."""
50 | out = BytesIO()
51 | matplotlib_figure.savefig(out, format=ext, bbox_inches="tight", **kwargs)
52 | plt.close(matplotlib_figure)
53 | if ext == "svg":
54 | self.doc.asis("".format(ext, out))
58 | self.doc.stag("br")
59 |
60 | def add_details(self, summary, details):
61 | """Adds a collapsible details section to Htmlbook."""
62 | with self.tag("details"):
63 | self.line("summary", summary)
64 | self.line("pre", details)
65 |
66 | def to_text(self):
67 | """Generates a string representing the Htmlbook (including figures)."""
68 | return indent(self.top_doc.getvalue() + self.doc.getvalue())
69 |
70 | def to_file(self, out_file):
71 | """Saves Htmlbook to a file (typically should have .html extension)."""
72 | with open(out_file, "w") as file:
73 | file.write(self.to_text())
74 |
--------------------------------------------------------------------------------
/pycls/sweep/random.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Functions for sampling in the closed interval [low, high] quantized by q."""
9 |
10 | from decimal import Decimal
11 |
12 | import matplotlib.pyplot as plt
13 | import numpy as np
14 |
15 |
16 | def quantize(f, q, op=np.floor):
17 | """Quantizes f to be divisible by q and have q's type."""
18 | quantized = Decimal(op(f / q)) * Decimal(str(q))
19 | return type(q)(quantized)
20 |
21 |
22 | def uniform(low, high, q):
23 | """Samples uniform value from [low, high] quantized to q."""
24 | # Samples f in [l, h+q) then quantizes f to [l, h] via floor()
25 | # This is equivalent to sampling f in (l-q, h] then quantizing via ceil()
26 | f = np.random.uniform(low, high + q)
27 | return quantize(f, q, np.floor)
28 |
29 |
30 | def log_uniform(low, high, q):
31 | """Samples log uniform value from [low, high] quantized to q."""
32 | # Samples f in (l-q*, h] then quantizes f to [l, h] via ceil(), where q*=min(q,l/2)
33 | # This is NOT equivalent to sampling f in [l, h-q) then quantizing via floor()
34 | f = np.exp(-np.random.uniform(-(np.log(high)), -(np.log(low - min(q, low / 2)))))
35 | return quantize(f, q, np.ceil)
36 |
37 |
38 | def power2_uniform(low, high, q):
39 | """Samples uniform powers of 2 from [low, high] quantized to q."""
40 | # Samples f2 in [l2, h2+1) then quantizes f2 to [l2, h2] via floor()
41 | f2 = np.floor(np.random.uniform(np.log2(low), np.log2(high) + 1))
42 | return quantize(2**f2, q)
43 |
44 |
45 | def power2_or_log_uniform(low, high, q):
46 | """Samples uniform powers of 2 or values divisible by q from [low, high]."""
47 | # The overall CDF is log-linear because range in log_uniform is (q/2, high]
48 | f = type(q)(power2_uniform(low, high, low))
49 | f = log_uniform(max(low, q), high, min(high, q)) if f >= q else f
50 | return f
51 |
52 |
53 | def normal(low, high, q):
54 | """Samples values from a clipped normal (Gaussian) distribution quantized to q."""
55 | # mu/sigma are computed from low/high such that ~99.7% of samples are in range
56 | f, mu, sigma = np.inf, (low + high) / 2, (high - low) / 6
57 | while not low <= f <= high:
58 | f = np.random.normal(mu, sigma)
59 | return quantize(f, q, np.round)
60 |
61 |
62 | def log_normal(low, high, q):
63 | """Samples values from a clipped log-normal distribution quantized to q."""
64 | # mu/sigma are computed from low/high such that ~99.7% of samples are in range
65 | log_low, log_high = np.log(low), np.log(high)
66 | f, mu, sigma = np.inf, (log_low + log_high) / 2, (log_high - log_low) / 6
67 | while not low <= f <= high:
68 | f = np.random.lognormal(mu, sigma)
69 | return quantize(f, q, np.round)
70 |
71 |
72 | rand_types = {
73 | "uniform": uniform,
74 | "log_uniform": log_uniform,
75 | "power2_uniform": power2_uniform,
76 | "power2_or_log_uniform": power2_or_log_uniform,
77 | "normal": normal,
78 | "log_normal": log_normal,
79 | }
80 |
81 |
82 | def validate_rand(err_str, rand_type, low, high, q):
83 | """Validate parameters to random number generators."""
84 | err_msg = "{}: {}(low={}, high={}, q={}) is invalid."
85 | err_msg = err_msg.format(err_str, rand_type, low, high, q)
86 | low_q = Decimal(str(low)) % Decimal(str(q)) == 0
87 | high_q = Decimal(str(high)) % Decimal(str(q)) == 0
88 | assert type(q) == type(low) == type(high), err_msg
89 | assert rand_type in rand_types, err_msg
90 | assert q > 0 and low <= high, err_msg
91 | assert low > 0 or rand_type in ["uniform", "normal"], err_msg
92 | assert low_q and high_q or rand_type == "power2_or_log_uniform", err_msg
93 | if rand_type in ["power2_uniform", "power2_or_log_uniform"]:
94 | assert all(np.log2(v).is_integer() for v in [low, high, q]), err_msg
95 |
96 |
97 | def plot_rand_cdf(rand_type, low, high, q, n=10000):
98 | """Visualizes CDF of rand_fun, resulting CDF should be linear (or log-linear)."""
99 | validate_rand("plot_rand_cdf", rand_type, low, high, q)
100 | samples = [rand_types[rand_type](low, high, q) for _ in range(n)]
101 | unique = list(np.unique(samples))
102 | assert min(unique) >= low and max(unique) <= high, "Sampled value out of range."
103 | cdf = np.cumsum(np.histogram(samples, unique + [np.inf])[0]) / len(samples)
104 | plot_fun = plt.plot if rand_type in ["uniform", "normal"] else plt.semilogx
105 | plot_fun(unique, cdf, "o-", [low, low], [0, 1], "-k", [high, high], [0, 1], "-k")
106 | plot_fun([low, high], [cdf[0], cdf[-1]]) if "normal" not in rand_type else ()
107 | plt.title("{}(low={}, high={}, q={})".format(rand_type, low, high, q))
108 | plt.show()
109 |
110 |
111 | def plot_rand_cdfs():
112 | """Visualize CDFs of selected distributions, for visualization/debugging only."""
113 | plot_rand_cdf("uniform", -0.5, 0.5, 0.1)
114 | plot_rand_cdf("power2_uniform", 2, 512, 1)
115 | plot_rand_cdf("power2_uniform", 0.25, 8.0, 0.25)
116 | plot_rand_cdf("log_uniform", 1, 32, 1)
117 | plot_rand_cdf("log_uniform", 0.5, 16.0, 0.5)
118 | plot_rand_cdf("power2_or_log_uniform", 1.0, 16.0, 1.0)
119 | plot_rand_cdf("power2_or_log_uniform", 0.25, 4.0, 4.0)
120 | plot_rand_cdf("power2_or_log_uniform", 1, 128, 4)
121 |
--------------------------------------------------------------------------------
/pycls/sweep/samplers.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Sweep sampling functions."""
9 |
10 | import numpy as np
11 | import pycls.core.builders as builders
12 | import pycls.core.net as net
13 | import pycls.models.regnet as regnet
14 | import pycls.sweep.random as rand
15 | from pycls.core.config import cfg
16 | from pycls.sweep.random import validate_rand
17 |
18 |
19 | def scalar_sampler(sampler):
20 | """Sampler for scalars in RANGE quantized to QUANTIZE."""
21 | low, high = sampler.RANGE[0], sampler.RANGE[1]
22 | rand_fun, q = rand.rand_types[sampler.RAND_TYPE], sampler.QUANTIZE
23 | return rand_fun(low, high, q)
24 |
25 |
26 | def value_sampler(sampler):
27 | """Sampler for uniform sampling from a list of values."""
28 | rand_index = np.random.randint(len(sampler.VALUES))
29 | return sampler.VALUES[rand_index]
30 |
31 |
32 | def list_sampler(sampler):
33 | """Sampler for a list of n items sampled independently by the item_sampler."""
34 | item_sampler, n = sampler.ITEM_SAMPLER, sampler.LENGTH
35 | sampler_function = sampler_types[item_sampler.TYPE]
36 | return [sampler_function(item_sampler) for _ in range(n)]
37 |
38 |
39 | def regnet_sampler(sampler):
40 | """Sampler for main RegNet parameters."""
41 | d = rand.uniform(*sampler.DEPTH, 1)
42 | w0 = rand.log_uniform(*sampler.W0, 8)
43 | wa = rand.log_uniform(*sampler.WA, 0.1)
44 | wm = rand.log_uniform(*sampler.WM, 0.001)
45 | gw = rand.power2_or_log_uniform(*sampler.GROUP_W, 8)
46 | bm = rand.power2_uniform(*sampler.BOT_MUL, 1 / 128)
47 | params = ["DEPTH", d, "W0", w0, "WA", wa, "WM", wm, "GROUP_W", gw, "BOT_MUL", bm]
48 | return ["REGNET." + p if i % 2 == 0 else p for i, p in enumerate(params)]
49 |
50 |
51 | sampler_types = {
52 | "float_sampler": scalar_sampler,
53 | "int_sampler": scalar_sampler,
54 | "value_sampler": value_sampler,
55 | "list_sampler": list_sampler,
56 | "regnet_sampler": regnet_sampler,
57 | }
58 |
59 |
60 | def validate_sampler(param, sampler):
61 | """Performs various checks on sampler to see if it is valid."""
62 | if sampler.TYPE in ["int_sampler", "float_sampler"]:
63 | validate_rand(param, sampler.RAND_TYPE, *sampler.RANGE, sampler.QUANTIZE)
64 | elif sampler.TYPE == "regnet_sampler":
65 | assert param == "REGNET", "regnet_sampler can only be used for REGNET"
66 | validate_rand("REGNET.DEPTH", "uniform", *sampler.DEPTH, 1)
67 | validate_rand("REGNET.W0", "log_uniform", *sampler.W0, 8)
68 | validate_rand("REGNET.WA", "log_uniform", *sampler.WA, 0.1)
69 | validate_rand("REGNET.WM", "log_uniform", *sampler.WM, 0.001)
70 | validate_rand("REGNET.GROUP_W", "power2_or_log_uniform", *sampler.GROUP_W, 8)
71 | validate_rand("REGNET.BOT_MUL", "power2_uniform", *sampler.BOT_MUL, 1 / 128)
72 |
73 |
74 | def is_composite_sampler(sampler_type):
75 | """Composite samplers return a [key, val, ...] list as opposed to just a val."""
76 | composite_samplers = ["regnet_sampler"]
77 | return sampler_type in composite_samplers
78 |
79 |
80 | def sample_parameters(samplers):
81 | """Samples params [key, val, ...] list based on the samplers."""
82 | params = []
83 | for param, sampler in samplers.items():
84 | val = sampler_types[sampler.TYPE](sampler)
85 | is_composite = is_composite_sampler(sampler.TYPE)
86 | params.extend(val if is_composite else [param, val])
87 | return params
88 |
89 |
90 | def check_regnet_constraints(constraints):
91 | """Checks RegNet specific constraints."""
92 | if cfg.MODEL.TYPE == "regnet":
93 | wa, w0, wm, d = cfg.REGNET.WA, cfg.REGNET.W0, cfg.REGNET.WM, cfg.REGNET.DEPTH
94 | _, _, num_s, max_s, _, _ = regnet.generate_regnet(wa, w0, wm, d, 8)
95 | num_stages = constraints.REGNET.NUM_STAGES
96 | if num_s != max_s or not num_stages[0] <= num_s <= num_stages[1]:
97 | return False
98 | return True
99 |
100 |
101 | def check_complexity_constraints(constraints):
102 | """Checks complexity constraints."""
103 | cx, valid = None, True
104 | for p, v in constraints.CX.items():
105 | p, min_v, max_v = p.lower(), v[0], v[1]
106 | if min_v != 0 or max_v != 0:
107 | cx = cx if cx else net.complexity(builders.get_model())
108 | min_v = cx[p] if min_v == 0 else min_v
109 | max_v = cx[p] if max_v == 0 else max_v
110 | valid = valid and (min_v <= cx[p] <= max_v)
111 | return valid
112 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | black==19.3b0
2 | isort==4.3.21
3 | iopath
4 | fairscale
5 | ffcv
6 | flake8
7 | pyyaml
8 | matplotlib
9 | numpy
10 | opencv-python
11 | parameterized
12 | setuptools
13 | simplejson
14 | submitit
15 | yacs
16 | yattag
17 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Setup pycls."""
9 |
10 | from setuptools import find_packages, setup
11 |
12 |
13 | def readme():
14 | """Retrieves the readme content."""
15 | with open("README.md", "r") as f:
16 | content = f.read()
17 | return content
18 |
19 |
20 | setup(
21 | name="pycls",
22 | version="0.1.1",
23 | description="A codebase for image classification",
24 | long_description=readme(),
25 | long_description_content_type="text/markdown",
26 | url="https://github.com/facebookresearch/pycls",
27 | packages=find_packages(),
28 | classifiers=[
29 | "Programming Language :: Python :: 3",
30 | "License :: OSI Approved :: MIT License",
31 | ],
32 | install_requires=["numpy", "opencv-python", "simplejson", "yacs"],
33 | )
34 |
--------------------------------------------------------------------------------
/tools/run_net.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Execute various operations (train, test, time, etc.) on a classification model."""
9 |
10 | import argparse
11 | import sys
12 |
13 | import pycls.core.builders as builders
14 | import pycls.core.config as config
15 | import pycls.core.distributed as dist
16 | import pycls.core.net as net
17 | import pycls.core.trainer as trainer
18 | import pycls.models.scaler as scaler
19 | from pycls.core.config import cfg
20 |
21 |
22 | def parse_args():
23 | """Parse command line options (mode and config)."""
24 | parser = argparse.ArgumentParser(description="Run a model.")
25 | help_s, choices = "Run mode", ["info", "train", "test", "time", "scale"]
26 | parser.add_argument("--mode", help=help_s, choices=choices, required=True, type=str)
27 | help_s = "Config file location"
28 | parser.add_argument("--cfg", help=help_s, required=True, type=str)
29 | help_s = "See pycls/core/config.py for all options"
30 | parser.add_argument("opts", help=help_s, default=None, nargs=argparse.REMAINDER)
31 | if len(sys.argv) == 1:
32 | parser.print_help()
33 | sys.exit(1)
34 | return parser.parse_args()
35 |
36 |
37 | def main():
38 | """Execute operation (train, test, time, etc.)."""
39 | args = parse_args()
40 | mode = args.mode
41 | config.load_cfg(args.cfg)
42 | cfg.merge_from_list(args.opts)
43 | config.assert_cfg()
44 | cfg.freeze()
45 | if mode == "info":
46 | print(builders.get_model()())
47 | print("complexity:", net.complexity(builders.get_model()))
48 | elif mode == "train":
49 | dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.train_model)
50 | elif mode == "test":
51 | dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.test_model)
52 | elif mode == "time":
53 | dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.time_model)
54 | elif mode == "scale":
55 | cfg.defrost()
56 | cx_orig = net.complexity(builders.get_model())
57 | scaler.scale_model()
58 | cx_scaled = net.complexity(builders.get_model())
59 | cfg_file = config.dump_cfg()
60 | print("Scaled config dumped to:", cfg_file)
61 | print("Original model complexity:", cx_orig)
62 | print("Scaled model complexity:", cx_scaled)
63 |
64 |
65 | if __name__ == "__main__":
66 | main()
67 |
--------------------------------------------------------------------------------
/tools/sweep_analyze.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Analyze results of a sweep."""
9 |
10 | import os
11 | import time
12 |
13 | import matplotlib.pyplot as plt
14 | import pycls.sweep.analysis as analysis
15 | import pycls.sweep.config as sweep_config
16 | import pycls.sweep.plotting as plotting
17 | from pycls.sweep.config import sweep_cfg
18 | from pycls.sweep.htmlbook import Htmlbook
19 |
20 |
21 | def sweep_analyze():
22 | """Analyzes results of a sweep."""
23 | start_time = time.time()
24 | analyze_cfg = sweep_cfg.ANALYZE
25 | sweep_dir = os.path.join(sweep_cfg.ROOT_DIR, sweep_cfg.NAME)
26 | print("Generating sweepbook for {:s}... ".format(sweep_dir), end="", flush=True)
27 | # Initialize Htmlbook for results
28 | h = Htmlbook(sweep_cfg.NAME)
29 | # Output sweep config
30 | h.add_section("Config")
31 | with open(sweep_cfg.SWEEP_CFG_FILE, "r") as f:
32 | sweep_cfg_raw = f.read()
33 | h.add_details("sweep_cfg", sweep_cfg_raw)
34 | h.add_details("sweep_cfg_full", str(sweep_cfg))
35 | # Load sweep and plot EDF
36 | names = [sweep_cfg.NAME] + analyze_cfg.EXTRA_SWEEP_NAMES
37 | files = [os.path.join(sweep_cfg.ROOT_DIR, name, "sweep.json") for name in names]
38 | sweeps = [analysis.load_sweep(file) for file in files]
39 | names = [os.path.basename(name) for name in names]
40 | assert all(len(sweep) for sweep in sweeps), "Loaded sweep cannot be empty."
41 | h.add_section("EDF")
42 | h.add_plot(plotting.plot_edf(sweeps, names))
43 | for sweep, name in zip(sweeps, names):
44 | h.add_details(name, analysis.describe_sweep(sweep))
45 | # Pre filter sweep according to pre_filters and plot EDF
46 | pre_filters = analyze_cfg.PRE_FILTERS
47 | if pre_filters:
48 | sweeps = [analysis.apply_filters(sweep, pre_filters) for sweep in sweeps]
49 | assert all(len(sweep) for sweep in sweeps), "Filtered sweep cannot be empty."
50 | h.add_section("EDF Filtered")
51 | h.add_plot(plotting.plot_edf(sweeps, names))
52 | for sweep, name in zip(sweeps, names):
53 | h.add_details(name, analysis.describe_sweep(sweep))
54 | # Split sweep according to split_filters and plot EDF
55 | split_filters = analyze_cfg.SPLIT_FILTERS
56 | if split_filters and len(names) == 1:
57 | names = list(split_filters.keys())
58 | sweeps = [analysis.apply_filters(sweeps[0], f) for f in split_filters.values()]
59 | assert all(len(sweep) for sweep in sweeps), "Split sweep cannot be empty."
60 | h.add_section("EDF Split")
61 | h.add_plot(plotting.plot_edf(sweeps, names))
62 | for sweep, name in zip(sweeps, names):
63 | h.add_details(name, analysis.describe_sweep(sweep))
64 | # Plot metric scatter plots
65 | metrics = analyze_cfg.METRICS
66 | plot_metric_trends = analyze_cfg.PLOT_METRIC_TRENDS and len(sweeps) > 1
67 | if metrics and (analyze_cfg.PLOT_METRIC_VALUES or plot_metric_trends):
68 | h.add_section("Metrics")
69 | filters = [analysis.get_filters(sweep, metrics) for sweep in sweeps]
70 | if analyze_cfg.PLOT_METRIC_VALUES:
71 | h.add_plot(plotting.plot_values(sweeps, names, metrics, filters))
72 | if plot_metric_trends:
73 | h.add_plot(plotting.plot_trends(sweeps, names, metrics, filters))
74 | # Plot complexity scatter plots
75 | complexity = analyze_cfg.COMPLEXITY
76 | plot_complexity_trends = analyze_cfg.PLOT_COMPLEXITY_TRENDS and len(sweeps) > 1
77 | if complexity and (analyze_cfg.PLOT_COMPLEXITY_VALUES or plot_complexity_trends):
78 | h.add_section("Complexity")
79 | filters = [analysis.get_filters(sweep, complexity) for sweep in sweeps]
80 | if analyze_cfg.PLOT_COMPLEXITY_VALUES:
81 | h.add_plot(plotting.plot_values(sweeps, names, complexity, filters))
82 | if plot_complexity_trends:
83 | h.add_plot(plotting.plot_trends(sweeps, names, complexity, filters))
84 | # Plot best/worst error curves
85 | n = analyze_cfg.PLOT_CURVES_BEST
86 | if n > 0:
87 | h.add_section("Best Errors")
88 | h.add_plot(plotting.plot_curves(sweeps, names, "top1_err", n, False))
89 | n = analyze_cfg.PLOT_CURVES_WORST
90 | if n > 0:
91 | h.add_section("Worst Errors")
92 | h.add_plot(plotting.plot_curves(sweeps, names, "top1_err", n, True))
93 | # Plot best/worst models
94 | n = analyze_cfg.PLOT_MODELS_BEST
95 | if n > 0:
96 | h.add_section("Best Models")
97 | h.add_plot(plotting.plot_models(sweeps, names, n, False))
98 | n = analyze_cfg.PLOT_MODELS_WORST
99 | if n > 0:
100 | h.add_section("Worst Models")
101 | h.add_plot(plotting.plot_models(sweeps, names, n, True))
102 | # Output Htmlbook and finalize analysis
103 | h.to_file(os.path.join(sweep_dir, "analysis.html"))
104 | plt.close("all")
105 | print("Done [t={:.1f}s]".format(time.time() - start_time))
106 |
107 |
108 | def main():
109 | desc = "Analyze results of a sweep."
110 | sweep_config.load_cfg_fom_args(desc)
111 | sweep_cfg.freeze()
112 | sweep_analyze()
113 |
114 |
115 | if __name__ == "__main__":
116 | main()
117 |
--------------------------------------------------------------------------------
/tools/sweep_collect.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 |
9 | """Collect results of a sweep."""
10 |
11 | import functools
12 | import json
13 | import multiprocessing
14 | import os
15 |
16 | import pycls.core.checkpoint as cp
17 | import pycls.core.logging as logging
18 | import pycls.sweep.config as sweep_config
19 | from pycls.sweep.config import sweep_cfg
20 |
21 |
22 | # Skip over these data types as they make sweep logs too large
23 | _DATA_TYPES_TO_SKIP = ["train_iter", "test_iter"]
24 |
25 |
26 | def load_data(log_file):
27 | """Loads and sorts log data or returns None."""
28 | data = logging.load_log_data(log_file, _DATA_TYPES_TO_SKIP)
29 | data = logging.sort_log_data(data)
30 | err_file = log_file.replace("stdout.log", "stderr.log")
31 | data["log_file"] = log_file
32 | data["err_file"] = err_file
33 | with open(err_file, "r") as f:
34 | data["err"] = f.read()
35 | return data
36 |
37 |
38 | def sweep_collect():
39 | """Collects results of a sweep."""
40 | # Get cfg and log files
41 | sweep_dir = os.path.join(sweep_cfg.ROOT_DIR, sweep_cfg.NAME)
42 | print("Collecting jobs for {:s}... ".format(sweep_dir))
43 | cfgs_dir = os.path.join(sweep_dir, "cfgs")
44 | logs_dir = os.path.join(sweep_dir, "logs")
45 | assert os.path.exists(cfgs_dir), "Cfgs dir {} not found".format(cfgs_dir)
46 | assert os.path.exists(logs_dir), "Logs dir {} not found".format(logs_dir)
47 | cfg_files = [c for c in os.listdir(cfgs_dir) if c.endswith(".yaml")]
48 | log_files = logging.get_log_files(logs_dir)[0]
49 | # Create worker pool for collecting jobs
50 | process_pool = multiprocessing.Pool(sweep_cfg.NUM_PROC)
51 | # Load the sweep and keep only non-empty data
52 | print("Collecting jobs...")
53 | sweep = list(process_pool.map(load_data, log_files))
54 | # Print basic stats for sweep status
55 | key = "test_epoch"
56 | epoch_ind = [d[key]["epoch_ind"][-1] if key in d else 0 for d in sweep]
57 | epoch_max = [d[key]["epoch_max"][-1] if key in d else 1 for d in sweep]
58 | epoch = ["{}/{}".format(i, m) for i, m in zip(epoch_ind, epoch_max)]
59 | epoch = [e.ljust(len(max(epoch, key=len))) for e in epoch]
60 | job_done = sum(i == m for i, m in zip(epoch_ind, epoch_max))
61 | for d, e, i, m in zip(sweep, epoch, epoch_ind, epoch_max):
62 | out_str = " {} [{:3d}%] [{:}]" + (" [stderr]" if d["err"] else "")
63 | print(out_str.format(d["log_file"], int(i / m * 100), e))
64 | jobs_start = "jobs_started={}/{}".format(len(sweep), len(cfg_files))
65 | jobs_done = "jobs_done={}/{}".format(job_done, len(cfg_files))
66 | ep_done = "epochs_done={}/{}".format(sum(epoch_ind), sum(epoch_max))
67 | print("Status: {}, {}, {}".format(jobs_start, jobs_done, ep_done))
68 | # Save the sweep data
69 | sweep_file = os.path.join(sweep_dir, "sweep.json")
70 | print("Writing sweep data to: {}".format(sweep_file))
71 | with open(sweep_file, "w") as f:
72 | json.dump(sweep, f, sort_keys=True)
73 | # Clean up checkpoints after saving sweep data, if needed
74 | keep = sweep_cfg.COLLECT.CHECKPOINTS_KEEP
75 | cp_dirs = [f.replace("stdout.log", "checkpoints/") for f in log_files]
76 | delete_cps = functools.partial(cp.delete_checkpoints, keep=keep)
77 | num_cleaned = sum(process_pool.map(delete_cps, cp_dirs))
78 | print("Deleted {} total checkpoints".format(num_cleaned))
79 |
80 |
81 | def main():
82 | desc = "Collect results of a sweep."
83 | sweep_config.load_cfg_fom_args(desc)
84 | sweep_cfg.freeze()
85 | sweep_collect()
86 |
87 |
88 | if __name__ == "__main__":
89 | main()
90 |
--------------------------------------------------------------------------------
/tools/sweep_launch.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Launch sweep on a SLURM managed cluster."""
9 |
10 | import os
11 |
12 | import pycls.sweep.config as sweep_config
13 | from pycls.sweep.config import sweep_cfg
14 |
15 |
16 | _SBATCH_CMD = (
17 | "sbatch"
18 | " --job-name={name}"
19 | " --partition={partition}"
20 | " --gpus={num_gpus}"
21 | " --constraint={gpu_type}"
22 | " --mem={mem}GB"
23 | " --cpus-per-task={cpus}"
24 | " --array=0-{last_job}%{parallel_jobs}"
25 | " --output={sweep_dir}/logs/sbatch/%A_%a.out"
26 | " --error={sweep_dir}/logs/sbatch/%A_%a.out"
27 | " --time={time_limit}"
28 | ' --comment="{comment}"'
29 | " --signal=B:USR1@300"
30 | " --nodes=1"
31 | " --open-mode=append"
32 | " --ntasks-per-node=1"
33 | " {current_dir}/sweep_launch_job.py"
34 | " --conda-env {conda_env}"
35 | " --script-path {script_path}"
36 | " --script-mode {script_mode}"
37 | " --cfgs-dir {cfgs_dir}"
38 | " --pycls-dir {pycls_dir}"
39 | " --logs-dir {logs_dir}"
40 | " --max-retry {max_retry}"
41 | )
42 |
43 |
44 | def sweep_launch():
45 | """Launch sweep on a SLURM managed cluster."""
46 | launch_cfg = sweep_cfg.LAUNCH
47 | # Get and check directory and script locations
48 | current_dir = os.path.dirname(os.path.abspath(__file__))
49 | sweep_dir = os.path.abspath(os.path.join(sweep_cfg.ROOT_DIR, sweep_cfg.NAME))
50 | cfgs_dir = os.path.join(sweep_dir, "cfgs")
51 | logs_dir = os.path.join(sweep_dir, "logs")
52 | sbatch_dir = os.path.join(logs_dir, "sbatch")
53 | script_path = os.path.abspath("tools/run_net.py")
54 | assert os.path.exists(sweep_dir), "Sweep dir {} invalid".format(sweep_dir)
55 | assert os.path.exists(script_path), "Script path {} invalid".format(script_path)
56 | n_cfgs = len([c for c in os.listdir(cfgs_dir) if c.endswith(".yaml")])
57 | # Replace path to be relative to copy of pycls
58 | pycls_copy_dir = os.path.join(sweep_dir, "pycls")
59 | pycls_dir = os.path.abspath(os.path.join(current_dir, ".."))
60 | script_path = script_path.replace(pycls_dir, pycls_copy_dir)
61 | current_dir = current_dir.replace(pycls_dir, pycls_copy_dir)
62 | # Prepare command to copy pycls to sweep_dir/pycls
63 | cmd_to_copy_pycls = "cp -R {}/ {}".format(pycls_dir, pycls_copy_dir)
64 | print("Cmd to copy pycls:", cmd_to_copy_pycls)
65 | # Prepare launch command
66 | cmd_to_launch_sweep = _SBATCH_CMD.format(
67 | name=sweep_cfg.NAME,
68 | partition=launch_cfg.PARTITION,
69 | num_gpus=launch_cfg.NUM_GPUS,
70 | gpu_type=launch_cfg.GPU_TYPE,
71 | mem=launch_cfg.MEM_PER_GPU * launch_cfg.NUM_GPUS,
72 | cpus=launch_cfg.CPUS_PER_GPU * launch_cfg.NUM_GPUS,
73 | last_job=n_cfgs - 1,
74 | parallel_jobs=launch_cfg.PARALLEL_JOBS,
75 | time_limit=launch_cfg.TIME_LIMIT,
76 | comment=launch_cfg.COMMENT,
77 | sweep_dir=sweep_dir,
78 | current_dir=current_dir,
79 | conda_env=launch_cfg.CONDA_ENV,
80 | script_path=script_path,
81 | script_mode=launch_cfg.MODE,
82 | cfgs_dir=cfgs_dir,
83 | pycls_dir=pycls_copy_dir,
84 | logs_dir=logs_dir,
85 | max_retry=launch_cfg.MAX_RETRY,
86 | )
87 | print("Cmd to launch sweep:", cmd_to_launch_sweep.replace(" ", "\n "), sep="\n\n")
88 | # Prompt user to resume or launch sweep
89 | if os.path.exists(sbatch_dir):
90 | print("\nSweep exists! Relaunch ONLY if no jobs are running!")
91 | print("\nRelaunch sweep? [relaunch/n]")
92 | if input().lower() == "relaunch":
93 | os.system(cmd_to_launch_sweep)
94 | else:
95 | print("\nLaunch sweep? [y/n]")
96 | if input().lower() == "y":
97 | os.makedirs(sbatch_dir, exist_ok=False)
98 | os.system(cmd_to_copy_pycls)
99 | os.system(cmd_to_launch_sweep)
100 |
101 |
102 | def main():
103 | desc = "Launch a sweep on the cluster."
104 | sweep_config.load_cfg_fom_args(desc)
105 | sweep_cfg.freeze()
106 | sweep_launch()
107 |
108 |
109 | if __name__ == "__main__":
110 | main()
111 |
--------------------------------------------------------------------------------
/tools/sweep_launch_job.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Launch a job on SLURM managed cluster. Should only be called from sweep_launch.py"""
9 |
10 | import argparse
11 | import json
12 | import os
13 | import signal
14 | import subprocess
15 | import sys
16 | from datetime import datetime
17 |
18 |
19 | def prt(*args, **kwargs):
20 | """Wrapper for print that prepends a timestamp and flushes output."""
21 | print("[{}]".format(str(datetime.now())), *args, flush=True, **kwargs)
22 |
23 |
24 | def run_os_cmd(cmd):
25 | """Runs commands in bash environment in foreground."""
26 | os.system('bash -c "{}"'.format(cmd))
27 |
28 |
29 | def requeue_job():
30 | job_id = os.environ["SLURM_ARRAY_JOB_ID"]
31 | task_id = os.environ["SLURM_ARRAY_TASK_ID"]
32 | cmd_to_req = "scontrol requeue {}_{}".format(job_id, task_id)
33 | prt("Requeuing job using cmd: {}".format(cmd_to_req))
34 | os.system(cmd_to_req)
35 | prt("Requeued job {}. Exiting.\n\n".format(job_id))
36 | sys.exit(0)
37 |
38 |
39 | def sigusr1_handler(signum, _):
40 | """Handles SIGUSR1 that is sent before a job is killed by requeuing it."""
41 | prt("Caught SIGUSR1 with code {}".format(signum))
42 | requeue_job()
43 |
44 |
45 | def sigterm_handler(signum, _):
46 | """Handles SIGTERM that is sent before a job is preempted by bypassing it."""
47 | prt("Caught SIGTERM with code {}".format(signum))
48 | prt("Bypassing SIGTERM")
49 |
50 |
51 | def main():
52 | # Parse arguments
53 | desc = "Launch a job on SLURM cluster. Should only be called from sweep_launch.py"
54 | parser = argparse.ArgumentParser(description=desc)
55 | parser.add_argument("--conda-env", required=True)
56 | parser.add_argument("--script-path", required=True)
57 | parser.add_argument("--script-mode", required=True)
58 | parser.add_argument("--cfgs-dir", required=True)
59 | parser.add_argument("--pycls-dir", required=True)
60 | parser.add_argument("--logs-dir", required=True)
61 | parser.add_argument("--max-retry", required=True, type=int)
62 | args = parser.parse_args()
63 | prt("Called with args: {}".format(args))
64 | # Attach signal handlers for SIGUSR1 and SIGTERM
65 | signal.signal(signal.SIGUSR1, sigusr1_handler)
66 | signal.signal(signal.SIGTERM, sigterm_handler)
67 | # Print info about run
68 | job_id = os.environ["SLURM_ARRAY_JOB_ID"]
69 | task_id = os.environ["SLURM_ARRAY_TASK_ID"]
70 | prt("Job array main job ID: {}".format(job_id))
71 | prt("Job array task ID (index): {}".format(task_id))
72 | prt("Running job on: {}".format(str(os.uname())))
73 | # Load what we need
74 | run_os_cmd("module purge")
75 | run_os_cmd("module load anaconda3")
76 | run_os_cmd("source deactivate")
77 | run_os_cmd("source activate {}".format(args.conda_env))
78 | # Get cfg_file to use
79 | cfg_files = sorted(f for f in os.listdir(args.cfgs_dir) if f.endswith(".yaml"))
80 | cfg_file = os.path.join(args.cfgs_dir, cfg_files[int(task_id)])
81 | prt("Using cfg_file: {}".format(cfg_file))
82 | # Create out_dir
83 | out_dir = os.path.join(args.logs_dir, "{:06}".format(int(task_id)))
84 | os.makedirs(out_dir, exist_ok=True)
85 | prt("Using out_dir: {}".format(out_dir))
86 | # Create slurm_file with SLURM info
87 | slurm_file = os.path.join(out_dir, "SLURM.txt")
88 | with open(slurm_file, "a") as f:
89 | f.write("SLURM env variables for the job writing to this directory:\n")
90 | slurm_info = {k: os.environ[k] for k in os.environ if k.startswith("SLURM_")}
91 | f.write(json.dumps(slurm_info, indent=4))
92 | prt("Dumped SLURM job info to {}".format(slurm_file))
93 | # Set PYTHONPATH to pycls copy for sweep
94 | os.environ["PYTHONPATH"] = args.pycls_dir
95 | prt("Using PYTHONPATH={}".format(args.pycls_dir))
96 | # Generate srun command to launch
97 | cmd_to_run = (
98 | "srun"
99 | " --output {out_dir}/stdout.log"
100 | " --error {out_dir}/stderr.log"
101 | " python {script}"
102 | " --mode {mode}"
103 | " --cfg {cfg}"
104 | " OUT_DIR {out_dir}"
105 | ).format(
106 | out_dir=out_dir, script=args.script_path, mode=args.script_mode, cfg=cfg_file
107 | )
108 | prt("Running cmd:\n", cmd_to_run.replace(" ", "\n "))
109 | # Run command in background using subprocess and wait so that signals can be caught
110 | p = subprocess.Popen(cmd_to_run, shell=True)
111 | prt("Waiting for job to complete")
112 | p.wait()
113 | prt("Completed waiting. Return code for job: {}".format(p.returncode))
114 | if p.returncode != 0:
115 | retry_file = os.path.join(out_dir, "RETRY.txt")
116 | with open(retry_file, "a") as f:
117 | f.write("Encountered non-zero exit code\n")
118 | with open(retry_file, "r") as f:
119 | retry_count = len(f.readlines()) - 1
120 | prt("Retry count for job: {}".format(retry_count))
121 | if retry_count < args.max_retry:
122 | requeue_job()
123 |
124 |
125 | if __name__ == "__main__":
126 | main()
127 |
--------------------------------------------------------------------------------
/tools/sweep_setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright (c) Facebook, Inc. and its affiliates.
4 | #
5 | # This source code is licensed under the MIT license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | """Sample cfgs for a sweep using a sweep_cfg."""
9 |
10 | import multiprocessing
11 | import os
12 |
13 | import numpy as np
14 | import pycls.models.scaler as scaler
15 | import pycls.sweep.config as sweep_config
16 | import pycls.sweep.samplers as samplers
17 | import yaml
18 | from pycls.core.config import cfg, reset_cfg
19 | from pycls.core.timer import Timer
20 | from pycls.sweep.config import sweep_cfg
21 |
22 |
23 | def sample_cfgs(seed):
24 | """Samples chunk configs and return those that are unique and valid."""
25 | # Fix RNG seed (every call to this function should use a unique seed)
26 | np.random.seed(seed)
27 | setup_cfg = sweep_cfg.SETUP
28 | cfgs = {}
29 | for _ in range(setup_cfg.CHUNK_SIZE):
30 | # Sample parameters [key, val, ...] list based on the samplers
31 | params = samplers.sample_parameters(setup_cfg.SAMPLERS)
32 | # Check if config is unique, if not continue
33 | key = zip(params[0::2], params[1::2])
34 | key = " ".join(["{} {}".format(k, v) for k, v in key])
35 | if key in cfgs:
36 | continue
37 | # Generate config from parameters
38 | reset_cfg()
39 | cfg.merge_from_other_cfg(setup_cfg.BASE_CFG)
40 | cfg.merge_from_list(params)
41 | # Check if config is valid, if not continue
42 | is_valid = samplers.check_regnet_constraints(setup_cfg.CONSTRAINTS)
43 | if not is_valid:
44 | continue
45 | # Special logic for dealing w model scaling (side effect is to standardize cfg)
46 | if cfg.MODEL.TYPE in ["anynet", "effnet", "regnet"]:
47 | scaler.scale_model()
48 | # Check if config is valid, if not continue
49 | is_valid = samplers.check_complexity_constraints(setup_cfg.CONSTRAINTS)
50 | if not is_valid:
51 | continue
52 | # Set config description to key
53 | cfg.DESC = key
54 | # Store copy of config if unique and valid
55 | cfgs[key] = cfg.clone()
56 | # Stop sampling if already reached quota
57 | if len(cfgs) == setup_cfg.NUM_CONFIGS:
58 | break
59 | return cfgs
60 |
61 |
62 | def dump_cfg(cfg_file, cfg):
63 | """Dumps the config to the specified location."""
64 | with open(cfg_file, "w") as f:
65 | cfg.dump(stream=f)
66 |
67 |
68 | def sweep_setup():
69 | """Samples cfgs for the sweep."""
70 | setup_cfg = sweep_cfg.SETUP
71 | # Create output directories
72 | sweep_dir = os.path.join(sweep_cfg.ROOT_DIR, sweep_cfg.NAME)
73 | cfgs_dir = os.path.join(sweep_dir, "cfgs")
74 | logs_dir = os.path.join(sweep_dir, "logs")
75 | print("Sweep directory is: {}".format(sweep_dir))
76 | assert not os.path.exists(logs_dir), "Sweep already started: " + sweep_dir
77 | if os.path.exists(logs_dir) or os.path.exists(cfgs_dir):
78 | print("Overwriting sweep which has not yet launched")
79 | os.makedirs(sweep_dir, exist_ok=True)
80 | os.makedirs(cfgs_dir, exist_ok=True)
81 | # Dump the original sweep_cfg
82 | sweep_cfg_file = os.path.join(sweep_dir, "sweep_cfg.yaml")
83 | os.system("cp {} {}".format(sweep_cfg.SWEEP_CFG_FILE, sweep_cfg_file))
84 | # Create worker pool for sampling and saving configs
85 | n_proc, chunk = sweep_cfg.NUM_PROC, setup_cfg.CHUNK_SIZE
86 | process_pool = multiprocessing.Pool(n_proc)
87 | # Fix random number generator seed and generate per chunk seeds
88 | np.random.seed(setup_cfg.RNG_SEED)
89 | n_chunks = int(np.ceil(setup_cfg.NUM_SAMPLES / chunk))
90 | chunk_seeds = np.random.choice(1000000, size=n_chunks, replace=False)
91 | # Sample configs in chunks using multiple workers each with a unique seed
92 | info_str = "Number configs sampled: {}, configs kept: {} [t={:.2f}s]"
93 | n_samples, n_cfgs, i, cfgs, timer = 0, 0, 0, {}, Timer()
94 | while n_samples < setup_cfg.NUM_SAMPLES and n_cfgs < setup_cfg.NUM_CONFIGS:
95 | timer.tic()
96 | seeds = chunk_seeds[i * n_proc : i * n_proc + n_proc]
97 | cfgs_all = process_pool.map(sample_cfgs, seeds)
98 | cfgs = dict(cfgs, **{k: v for d in cfgs_all for k, v in d.items()})
99 | n_samples, n_cfgs, i = n_samples + chunk * n_proc, len(cfgs), i + 1
100 | timer.toc()
101 | print(info_str.format(n_samples, n_cfgs, timer.total_time))
102 | # Randomize cfgs order and subsample if oversampled
103 | keys, cfgs = list(cfgs.keys()), list(cfgs.values())
104 | n_cfgs = min(n_cfgs, setup_cfg.NUM_CONFIGS)
105 | ids = np.random.choice(len(cfgs), n_cfgs, replace=False)
106 | keys, cfgs = [keys[i] for i in ids], [cfgs[i] for i in ids]
107 | # Save the cfgs and a cfgs_summary
108 | timer.tic()
109 | cfg_names = ["{:06}.yaml".format(i) for i in range(n_cfgs)]
110 | cfgs_summary = {cfg_name: key for cfg_name, key in zip(cfg_names, keys)}
111 | with open(os.path.join(sweep_dir, "cfgs_summary.yaml"), "w") as f:
112 | yaml.dump(cfgs_summary, f, width=float("inf"))
113 | cfg_files = [os.path.join(cfgs_dir, cfg_name) for cfg_name in cfg_names]
114 | process_pool.starmap(dump_cfg, zip(cfg_files, cfgs))
115 | timer.toc()
116 | print(info_str.format(n_samples, n_cfgs, timer.total_time))
117 |
118 |
119 | def main():
120 | desc = "Set up sweep by generating job configs."
121 | sweep_config.load_cfg_fom_args(desc)
122 | sweep_cfg.freeze()
123 | sweep_setup()
124 |
125 |
126 | if __name__ == "__main__":
127 | main()
128 |
--------------------------------------------------------------------------------