├── AutoFormer
    ├── model
    │   ├── module
    │   │   ├── __init__.py
    │   │   ├── layernorm_super.py
    │   │   ├── embedding_super.py
    │   │   ├── Linear_super.py
    │   │   └── qkv_super.py
    │   └── utils.py
    ├── .figure
    │   ├── ofa.png
    │   ├── overview.png
    │   └── performance.png
    ├── attn_map.xlsx
    ├── generate_subImagenet.sh
    ├── result
    │   ├── img.png
    │   ├── attn_map.png
    │   ├── cls_weight.png
    │   ├── img_resized.png
    │   └── img_resized_overlay.png
    ├── cls_attn_map.xlsx
    ├── requirements.txt
    ├── training_free
    │   ├── __init__.py
    │   ├── indicators
    │   │   ├── __init__.py
    │   │   ├── snip.py
    │   │   ├── NASWOT.py
    │   │   └── grasp.py
    │   └── compute_indicators.py
    ├── experiments
    │   ├── supernet
    │   │   ├── supernet-T.yaml
    │   │   ├── supernet-B.yaml
    │   │   └── supernet-S.yaml
    │   └── subnet
    │   │   ├── AutoFormer-T.yaml
    │   │   ├── AutoFormer-S.yaml
    │   │   └── AutoFormer-B.yaml
    ├── observe_supernet.sh
    ├── evolution_search.sh
    ├── lib
    │   ├── config.py
    │   ├── subImageNet.py
    │   ├── samplers.py
    │   ├── imagenet_withhold.py
    │   └── utils.py
    ├── config.yaml
    ├── top_k_parser.py
    ├── train_supernet.sh
    ├── tmp.py
    ├── performance_parser.py
    └── supernet_engine(save).py
├── AutoFormer_original
    ├── model
    │   ├── module
    │   │   ├── __init__.py
    │   │   ├── layernorm_super.py
    │   │   ├── embedding_super.py
    │   │   ├── Linear_super.py
    │   │   └── qkv_super.py
    │   └── utils.py
    ├── .figure
    │   ├── ofa.png
    │   ├── overview.png
    │   └── performance.png
    ├── requirements.txt
    ├── train_supernet.sh
    ├── experiments
    │   ├── supernet
    │   │   ├── supernet-T.yaml
    │   │   ├── supernet-B.yaml
    │   │   └── supernet-S.yaml
    │   └── subnet
    │   │   ├── AutoFormer-T.yaml
    │   │   ├── AutoFormer-S.yaml
    │   │   └── AutoFormer-B.yaml
    ├── lib
    │   ├── config.py
    │   ├── subImageNet.py
    │   ├── samplers.py
    │   ├── imagenet_withhold.py
    │   └── utils.py
    └── README.md
├── AutoFormer_original_greedy
    ├── model
    │   ├── module
    │   │   ├── __init__.py
    │   │   ├── layernorm_super.py
    │   │   ├── embedding_super.py
    │   │   ├── Linear_super.py
    │   │   └── qkv_super.py
    │   └── utils.py
    ├── .figure
    │   ├── ofa.png
    │   ├── overview.png
    │   └── performance.png
    ├── requirements.txt
    ├── experiments
    │   ├── supernet
    │   │   ├── supernet-T.yaml
    │   │   ├── supernet-B.yaml
    │   │   └── supernet-S.yaml
    │   └── subnet
    │   │   ├── AutoFormer-T.yaml
    │   │   ├── AutoFormer-S.yaml
    │   │   └── AutoFormer-B.yaml
    ├── train_supernet_base.sh
    ├── train_supernet_observation.sh
    ├── z_train_supernet.sh
    ├── lib
    │   ├── config.py
    │   ├── subImageNet.py
    │   ├── samplers.py
    │   └── imagenet_withhold.py
    ├── config.yaml
    ├── train_supernet_small.sh
    ├── config_prenas.yaml
    ├── train_supernet_only_supernet.sh
    ├── evolution_search copy.sh
    ├── evolution_search.sh
    ├── performance_parser.py
    ├── README.md
    ├── z_supernet_engine.py
    ├── supernet_engine_base.py
    ├── supernet_engine_real_original.py
    └── supernet_engine_only_supernet.py
├── .gitignore
└── README.md


/AutoFormer/model/module/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/AutoFormer_original/model/module/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/model/module/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/AutoFormer/.figure/ofa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer/.figure/ofa.png


--------------------------------------------------------------------------------
/AutoFormer/attn_map.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer/attn_map.xlsx


--------------------------------------------------------------------------------
/AutoFormer/generate_subImagenet.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python3 ./lib/subImageNet.py --data-path '/data'
3 | 
4 | 
5 | 


--------------------------------------------------------------------------------
/AutoFormer/result/img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer/result/img.png


--------------------------------------------------------------------------------
/AutoFormer/cls_attn_map.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer/cls_attn_map.xlsx


--------------------------------------------------------------------------------
/AutoFormer/.figure/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer/.figure/overview.png


--------------------------------------------------------------------------------
/AutoFormer/result/attn_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer/result/attn_map.png


--------------------------------------------------------------------------------
/AutoFormer/result/cls_weight.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer/result/cls_weight.png


--------------------------------------------------------------------------------
/AutoFormer/result/img_resized.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer/result/img_resized.png


--------------------------------------------------------------------------------
/AutoFormer/.figure/performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer/.figure/performance.png


--------------------------------------------------------------------------------
/AutoFormer_original/.figure/ofa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer_original/.figure/ofa.png


--------------------------------------------------------------------------------
/AutoFormer/result/img_resized_overlay.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer/result/img_resized_overlay.png


--------------------------------------------------------------------------------
/AutoFormer_original/.figure/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer_original/.figure/overview.png


--------------------------------------------------------------------------------
/AutoFormer_original/.figure/performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer_original/.figure/performance.png


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/.figure/ofa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer_original_greedy/.figure/ofa.png


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/.figure/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer_original_greedy/.figure/overview.png


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/.figure/performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer_original_greedy/.figure/performance.png


--------------------------------------------------------------------------------
/AutoFormer/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch==1.7.0
 2 | timm==0.3.2
 3 | scikit-image
 4 | ptflops
 5 | easydict
 6 | PyYAML
 7 | pillow
 8 | torchvision==0.2.1
 9 | opencv-python
10 | 


--------------------------------------------------------------------------------
/AutoFormer_original/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch==1.7.0
 2 | timm==0.3.2
 3 | scikit-image
 4 | ptflops
 5 | easydict
 6 | PyYAML
 7 | pillow
 8 | torchvision==0.2.1
 9 | opencv-python
10 | 


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch==1.7.0
 2 | timm==0.3.2
 3 | scikit-image
 4 | ptflops
 5 | easydict
 6 | PyYAML
 7 | pillow
 8 | torchvision==0.2.1
 9 | opencv-python
10 | 


--------------------------------------------------------------------------------
/AutoFormer/training_free/__init__.py:
--------------------------------------------------------------------------------
1 | from .compute_indicators import *
2 | from os.path import dirname, basename, isfile, join
3 | import glob
4 | modules = glob.glob(join(dirname(__file__), "*.py"))
5 | __all__ = [ basename(f)[:-3] for f in modules if isfile(f) and not f.endswith('__init__.py')]


--------------------------------------------------------------------------------
/AutoFormer_original/train_supernet.sh:
--------------------------------------------------------------------------------
1 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path '/data' --gp \
2 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --epochs 500 --warmup-epochs 20 \
3 | --output /OUTPUT_PATH --batch-size 128 


--------------------------------------------------------------------------------
/AutoFormer/experiments/supernet/supernet-T.yaml:
--------------------------------------------------------------------------------
 1 | SUPERNET:
 2 |   MLP_RATIO: 4.0
 3 |   NUM_HEADS: 4
 4 |   EMBED_DIM: 256
 5 |   DEPTH: 14
 6 | SEARCH_SPACE:
 7 |   MLP_RATIO:
 8 |     - 3.5
 9 |     - 4
10 |   NUM_HEADS:
11 |     - 3
12 |     - 4
13 |   DEPTH:
14 |     - 12
15 |     - 13
16 |     - 14
17 |   EMBED_DIM:
18 |     - 192
19 |     - 216
20 |     - 240


--------------------------------------------------------------------------------
/AutoFormer_original/experiments/supernet/supernet-T.yaml:
--------------------------------------------------------------------------------
 1 | SUPERNET:
 2 |   MLP_RATIO: 4.0
 3 |   NUM_HEADS: 4
 4 |   EMBED_DIM: 256
 5 |   DEPTH: 14
 6 | SEARCH_SPACE:
 7 |   MLP_RATIO:
 8 |     - 3.5
 9 |     - 4
10 |   NUM_HEADS:
11 |     - 3
12 |     - 4
13 |   DEPTH:
14 |     - 12
15 |     - 13
16 |     - 14
17 |   EMBED_DIM:
18 |     - 192
19 |     - 216
20 |     - 240
21 | 


--------------------------------------------------------------------------------
/AutoFormer/observe_supernet.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env observe_supernet.py --data-path '/data' --gp \
3 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume './experiments/supernet/supernet-tiny.pth' \
4 | --min-param-limits 1 --param-limits 7
5 | # --data-set EVO_IMNET
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/experiments/supernet/supernet-T.yaml:
--------------------------------------------------------------------------------
 1 | SUPERNET:
 2 |   MLP_RATIO: 4.0
 3 |   NUM_HEADS: 4
 4 |   EMBED_DIM: 256
 5 |   DEPTH: 14
 6 | SEARCH_SPACE:
 7 |   MLP_RATIO:
 8 |     - 3.5
 9 |     - 4
10 |   NUM_HEADS:
11 |     - 3
12 |     - 4
13 |   DEPTH:
14 |     - 12
15 |     - 13
16 |     - 14
17 |   EMBED_DIM:
18 |     - 192
19 |     - 216
20 |     - 240
21 | 


--------------------------------------------------------------------------------
/AutoFormer/experiments/supernet/supernet-B.yaml:
--------------------------------------------------------------------------------
 1 | SUPERNET:
 2 |   MLP_RATIO: 4.0
 3 |   NUM_HEADS: 10
 4 |   EMBED_DIM: 640
 5 |   DEPTH: 16
 6 | SEARCH_SPACE:
 7 |   MLP_RATIO:
 8 |     - 3.0
 9 |     - 3.5
10 |     - 4.0
11 |   NUM_HEADS:
12 |     - 9
13 |     - 10
14 |   DEPTH:
15 |     - 14
16 |     - 15
17 |     - 16
18 |   EMBED_DIM:
19 |     - 528
20 |     - 576
21 |     - 624
22 | 


--------------------------------------------------------------------------------
/AutoFormer/experiments/supernet/supernet-S.yaml:
--------------------------------------------------------------------------------
 1 | SUPERNET:
 2 |   MLP_RATIO: 4.0
 3 |   NUM_HEADS: 7
 4 |   EMBED_DIM: 448
 5 |   DEPTH: 14
 6 | SEARCH_SPACE:
 7 |   MLP_RATIO:
 8 |     - 3.0
 9 |     - 3.5
10 |     - 4.0
11 |   NUM_HEADS:
12 |     - 5
13 |     - 6
14 |     - 7
15 |   DEPTH:
16 |     - 12
17 |     - 13
18 |     - 14
19 |   EMBED_DIM:
20 |     - 320
21 |     - 384
22 |     - 448
23 | 


--------------------------------------------------------------------------------
/AutoFormer_original/experiments/supernet/supernet-B.yaml:
--------------------------------------------------------------------------------
 1 | SUPERNET:
 2 |   MLP_RATIO: 4.0
 3 |   NUM_HEADS: 10
 4 |   EMBED_DIM: 640
 5 |   DEPTH: 16
 6 | SEARCH_SPACE:
 7 |   MLP_RATIO:
 8 |     - 3.0
 9 |     - 3.5
10 |     - 4.0
11 |   NUM_HEADS:
12 |     - 9
13 |     - 10
14 |   DEPTH:
15 |     - 14
16 |     - 15
17 |     - 16
18 |   EMBED_DIM:
19 |     - 528
20 |     - 576
21 |     - 624
22 | 


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/experiments/supernet/supernet-B.yaml:
--------------------------------------------------------------------------------
 1 | SUPERNET:
 2 |   MLP_RATIO: 4.0
 3 |   NUM_HEADS: 10
 4 |   EMBED_DIM: 640
 5 |   DEPTH: 16
 6 | SEARCH_SPACE:
 7 |   MLP_RATIO:
 8 |     - 3.0
 9 |     - 3.5
10 |     - 4.0
11 |   NUM_HEADS:
12 |     - 9
13 |     - 10
14 |   DEPTH:
15 |     - 14
16 |     - 15
17 |     - 16
18 |   EMBED_DIM:
19 |     - 528
20 |     - 576
21 |     - 624
22 | 


--------------------------------------------------------------------------------
/AutoFormer_original/experiments/supernet/supernet-S.yaml:
--------------------------------------------------------------------------------
 1 | SUPERNET:
 2 |   MLP_RATIO: 4.0
 3 |   NUM_HEADS: 7
 4 |   EMBED_DIM: 448
 5 |   DEPTH: 14
 6 | SEARCH_SPACE:
 7 |   MLP_RATIO:
 8 |     - 3.0
 9 |     - 3.5
10 |     - 4.0
11 |   NUM_HEADS:
12 |     - 5
13 |     - 6
14 |     - 7
15 |   DEPTH:
16 |     - 12
17 |     - 13
18 |     - 14
19 |   EMBED_DIM:
20 |     - 320
21 |     - 384
22 |     - 448
23 | 


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/experiments/supernet/supernet-S.yaml:
--------------------------------------------------------------------------------
 1 | SUPERNET:
 2 |   MLP_RATIO: 4.0
 3 |   NUM_HEADS: 7
 4 |   EMBED_DIM: 448
 5 |   DEPTH: 14
 6 | SEARCH_SPACE:
 7 |   MLP_RATIO:
 8 |     - 3.0
 9 |     - 3.5
10 |     - 4.0
11 |   NUM_HEADS:
12 |     - 5
13 |     - 6
14 |     - 7
15 |   DEPTH:
16 |     - 12
17 |     - 13
18 |     - 14
19 |   EMBED_DIM:
20 |     - 320
21 |     - 384
22 |     - 448
23 | 


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/train_supernet_base.sh:
--------------------------------------------------------------------------------
1 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train_base.py --data-path '/data' --gp \
2 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-B.yaml --epochs 500 --warmup-epochs 20 \
3 | --output /OUTPUT_PATH --batch-size 128 \
4 | --save_checkpoint_path 'checkpoint-original-base-' --save_log_path './log/supernet_original_base.log' --interval 1
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | checkpoint/
 2 | *.pyc
 3 | /*.log
 4 | *ptm
 5 | .vscode
 6 | *visualize_images*/
 7 | *_tsne_*
 8 | *hard_triplet_*
 9 | *_feature.pkl
10 | Visualize_Network_*
11 | hard_mining/
12 | backup.py
13 | *pdf
14 | *.bak*
15 | # *.sh
16 | *.log
17 | *.pkl
18 | *.tar
19 | *.pth
20 | AutoFormer/tftasenv
21 | AutoFormer/Python-3.6.15
22 | AutoFormer/autoformer3.6
23 | *.tgz
24 | config.txt
25 | AutoFormer/greedyTAS/autoformer-greedyTAS-09121607-greedy.log
26 | AutoFormer/greedyTAS/autoformer-greedyTAS(09121607)-greedy.log


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/train_supernet_observation.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # UTF-8 환경 변수 설정 (UnicodeEncodeError 방지)
 4 | export PYTHONIOENCODING=utf-8
 5 | export LC_ALL=C.UTF-8
 6 | export LANG=C.UTF-8
 7 | 
 8 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_observation.py --data-path '/data' --gp \
 9 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --epochs 500 --warmup-epochs 20 \
10 | --resume '/OUTPUT_PATH/checkpoint-original-24.pth' --output /OUTPUT_PATH --batch-size 128 \
11 | --save_checkpoint_path 'checkpoint-tiny-observation2-' --save_log_path './log/supernet_tiny_observation2.log' --interval 1
12 | 


--------------------------------------------------------------------------------
/AutoFormer/experiments/subnet/AutoFormer-T.yaml:
--------------------------------------------------------------------------------
 1 | SUPERNET:
 2 |   MLP_RATIO: 4.0
 3 |   NUM_HEADS: 4
 4 |   EMBED_DIM: 256
 5 |   DEPTH: 14
 6 | SEARCH_SPACE:
 7 |   MLP_RATIO:
 8 |     - 3.5
 9 |     - 4
10 |   NUM_HEADS:
11 |     - 3
12 |     - 4
13 |   DEPTH:
14 |     - 12
15 |     - 13
16 |     - 14
17 |   EMBED_DIM:
18 |     - 192
19 |     - 216
20 |     - 240
21 | RETRAIN:
22 |   MLP_RATIO:
23 |     - 3.5
24 |     - 3.5
25 |     - 3.0
26 |     - 3.5
27 |     - 3.0
28 |     - 3.0
29 |     - 4.0
30 |     - 4.0
31 |     - 3.5
32 |     - 4.0
33 |     - 3.5
34 |     - 4.0
35 |     - 3.5
36 |   NUM_HEADS:
37 |     - 3
38 |     - 3
39 |     - 3
40 |     - 3
41 |     - 3
42 |     - 3
43 |     - 3
44 |     - 3
45 |     - 3
46 |     - 3
47 |     - 4
48 |     - 3
49 |     - 3
50 |   DEPTH: 13
51 |   EMBED_DIM: 192


--------------------------------------------------------------------------------
/AutoFormer_original/experiments/subnet/AutoFormer-T.yaml:
--------------------------------------------------------------------------------
 1 | SUPERNET:
 2 |   MLP_RATIO: 4.0
 3 |   NUM_HEADS: 4
 4 |   EMBED_DIM: 256
 5 |   DEPTH: 14
 6 | SEARCH_SPACE:
 7 |   MLP_RATIO:
 8 |     - 3.5
 9 |     - 4
10 |   NUM_HEADS:
11 |     - 3
12 |     - 4
13 |   DEPTH:
14 |     - 12
15 |     - 13
16 |     - 14
17 |   EMBED_DIM:
18 |     - 192
19 |     - 216
20 |     - 240
21 | RETRAIN:
22 |   MLP_RATIO:
23 |     - 3.5
24 |     - 3.5
25 |     - 3.0
26 |     - 3.5
27 |     - 3.0
28 |     - 3.0
29 |     - 4.0
30 |     - 4.0
31 |     - 3.5
32 |     - 4.0
33 |     - 3.5
34 |     - 4.0
35 |     - 3.5
36 |   NUM_HEADS:
37 |     - 3
38 |     - 3
39 |     - 3
40 |     - 3
41 |     - 3
42 |     - 3
43 |     - 3
44 |     - 3
45 |     - 3
46 |     - 3
47 |     - 4
48 |     - 3
49 |     - 3
50 |   DEPTH: 13
51 |   EMBED_DIM: 192


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/experiments/subnet/AutoFormer-T.yaml:
--------------------------------------------------------------------------------
 1 | SUPERNET:
 2 |   MLP_RATIO: 4.0
 3 |   NUM_HEADS: 4
 4 |   EMBED_DIM: 256
 5 |   DEPTH: 14
 6 | SEARCH_SPACE:
 7 |   MLP_RATIO:
 8 |     - 3.5
 9 |     - 4
10 |   NUM_HEADS:
11 |     - 3
12 |     - 4
13 |   DEPTH:
14 |     - 12
15 |     - 13
16 |     - 14
17 |   EMBED_DIM:
18 |     - 192
19 |     - 216
20 |     - 240
21 | RETRAIN:
22 |   MLP_RATIO:
23 |     - 3.5
24 |     - 3.5
25 |     - 3.0
26 |     - 3.5
27 |     - 3.0
28 |     - 3.0
29 |     - 4.0
30 |     - 4.0
31 |     - 3.5
32 |     - 4.0
33 |     - 3.5
34 |     - 4.0
35 |     - 3.5
36 |   NUM_HEADS:
37 |     - 3
38 |     - 3
39 |     - 3
40 |     - 3
41 |     - 3
42 |     - 3
43 |     - 3
44 |     - 3
45 |     - 3
46 |     - 3
47 |     - 4
48 |     - 3
49 |     - 3
50 |   DEPTH: 13
51 |   EMBED_DIM: 192


--------------------------------------------------------------------------------
/AutoFormer/experiments/subnet/AutoFormer-S.yaml:
--------------------------------------------------------------------------------
 1 | SUPERNET:
 2 |   MLP_RATIO: 4.0
 3 |   NUM_HEADS: 7
 4 |   EMBED_DIM: 448
 5 |   DEPTH: 14
 6 | SEARCH_SPACE:
 7 |   MLP_RATIO:
 8 |     - 3.0
 9 |     - 3.5
10 |     - 4.0
11 |   NUM_HEADS:
12 |     - 5
13 |     - 6
14 |     - 7
15 |   DEPTH:
16 |     - 12
17 |     - 13
18 |     - 14
19 |   EMBED_DIM:
20 |     - 320
21 |     - 384
22 |     - 448
23 | RETRAIN:
24 |   MLP_RATIO:
25 |     - 3.0
26 |     - 3.5
27 |     - 3.0
28 |     - 3.5
29 |     - 4.0
30 |     - 4.0
31 |     - 4.0
32 |     - 4.0
33 |     - 4.0
34 |     - 4.0
35 |     - 4.0
36 |     - 3.5
37 |     - 4.0
38 |   NUM_HEADS:
39 |     - 6
40 |     - 6
41 |     - 5
42 |     - 7
43 |     - 5
44 |     - 5
45 |     - 5
46 |     - 6
47 |     - 6
48 |     - 7
49 |     - 7
50 |     - 6
51 |     - 7
52 |   DEPTH: 13
53 |   EMBED_DIM: 384
54 | 
55 | 


--------------------------------------------------------------------------------
/AutoFormer_original/experiments/subnet/AutoFormer-S.yaml:
--------------------------------------------------------------------------------
 1 | SUPERNET:
 2 |   MLP_RATIO: 4.0
 3 |   NUM_HEADS: 7
 4 |   EMBED_DIM: 448
 5 |   DEPTH: 14
 6 | SEARCH_SPACE:
 7 |   MLP_RATIO:
 8 |     - 3.0
 9 |     - 3.5
10 |     - 4.0
11 |   NUM_HEADS:
12 |     - 5
13 |     - 6
14 |     - 7
15 |   DEPTH:
16 |     - 12
17 |     - 13
18 |     - 14
19 |   EMBED_DIM:
20 |     - 320
21 |     - 384
22 |     - 448
23 | RETRAIN:
24 |   MLP_RATIO:
25 |     - 3.0
26 |     - 3.5
27 |     - 3.0
28 |     - 3.5
29 |     - 4.0
30 |     - 4.0
31 |     - 4.0
32 |     - 4.0
33 |     - 4.0
34 |     - 4.0
35 |     - 4.0
36 |     - 3.5
37 |     - 4.0
38 |   NUM_HEADS:
39 |     - 6
40 |     - 6
41 |     - 5
42 |     - 7
43 |     - 5
44 |     - 5
45 |     - 5
46 |     - 6
47 |     - 6
48 |     - 7
49 |     - 7
50 |     - 6
51 |     - 7
52 |   DEPTH: 13
53 |   EMBED_DIM: 384
54 | 
55 | 


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/experiments/subnet/AutoFormer-S.yaml:
--------------------------------------------------------------------------------
 1 | SUPERNET:
 2 |   MLP_RATIO: 4.0
 3 |   NUM_HEADS: 7
 4 |   EMBED_DIM: 448
 5 |   DEPTH: 14
 6 | SEARCH_SPACE:
 7 |   MLP_RATIO:
 8 |     - 3.0
 9 |     - 3.5
10 |     - 4.0
11 |   NUM_HEADS:
12 |     - 5
13 |     - 6
14 |     - 7
15 |   DEPTH:
16 |     - 12
17 |     - 13
18 |     - 14
19 |   EMBED_DIM:
20 |     - 320
21 |     - 384
22 |     - 448
23 | RETRAIN:
24 |   MLP_RATIO:
25 |     - 3.0
26 |     - 3.5
27 |     - 3.0
28 |     - 3.5
29 |     - 4.0
30 |     - 4.0
31 |     - 4.0
32 |     - 4.0
33 |     - 4.0
34 |     - 4.0
35 |     - 4.0
36 |     - 3.5
37 |     - 4.0
38 |   NUM_HEADS:
39 |     - 6
40 |     - 6
41 |     - 5
42 |     - 7
43 |     - 5
44 |     - 5
45 |     - 5
46 |     - 6
47 |     - 6
48 |     - 7
49 |     - 7
50 |     - 6
51 |     - 7
52 |   DEPTH: 13
53 |   EMBED_DIM: 384
54 | 
55 | 


--------------------------------------------------------------------------------
/AutoFormer/evolution_search.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # 첫 번째 작업 실행
 4 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
 5 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume './experiments/supernet/checkpoint-25.pth' \
 6 | --min-param-limits 1 --param-limits 100 --config-list-path './greedyTAS/m(2500)_path_epoch100.pkl' \
 7 | --log-file-path './greedyTAS/m(2500)_path_epoch100-subnet.log'
 8 | 
 9 | 
10 | # #!/bin/bash
11 | # python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
12 | # --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume './experiments/supernet/checkpoint-25.pth' \
13 | # --min-param-limits 1 --param-limits 100
14 | # # --data-set EVO_IMNET
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/AutoFormer/experiments/subnet/AutoFormer-B.yaml:
--------------------------------------------------------------------------------
 1 | SUPERNET:
 2 |   MLP_RATIO: 4.0
 3 |   NUM_HEADS: 10
 4 |   EMBED_DIM: 640
 5 |   DEPTH: 16
 6 | SEARCH_SPACE:
 7 |   MLP_RATIO:
 8 |     - 3.0
 9 |     - 3.5
10 |     - 4.0
11 |   NUM_HEADS:
12 |     - 8
13 |     - 9
14 |     - 10
15 |   DEPTH:
16 |     - 14
17 |     - 15
18 |     - 16
19 |   EMBED_DIM:
20 |     - 528
21 |     - 576
22 |     - 624
23 | RETRAIN:
24 |   MLP_RATIO:
25 |     - 3.5
26 |     - 3.5
27 |     - 4.0
28 |     - 3.5
29 |     - 4.0
30 |     - 3.5
31 |     - 3.5
32 |     - 3.0
33 |     - 4.0
34 |     - 4.0
35 |     - 3.0
36 |     - 4.0
37 |     - 3.0
38 |     - 3.5
39 |   NUM_HEADS:
40 |     - 9
41 |     - 9
42 |     - 9
43 |     - 9
44 |     - 9
45 |     - 10
46 |     - 9
47 |     - 9
48 |     - 10
49 |     - 9
50 |     - 10
51 |     - 9
52 |     - 9
53 |     - 10
54 |   DEPTH: 14
55 |   EMBED_DIM: 576
56 | 
57 | 
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/AutoFormer_original/experiments/subnet/AutoFormer-B.yaml:
--------------------------------------------------------------------------------
 1 | SUPERNET:
 2 |   MLP_RATIO: 4.0
 3 |   NUM_HEADS: 10
 4 |   EMBED_DIM: 640
 5 |   DEPTH: 16
 6 | SEARCH_SPACE:
 7 |   MLP_RATIO:
 8 |     - 3.0
 9 |     - 3.5
10 |     - 4.0
11 |   NUM_HEADS:
12 |     - 8
13 |     - 9
14 |     - 10
15 |   DEPTH:
16 |     - 14
17 |     - 15
18 |     - 16
19 |   EMBED_DIM:
20 |     - 528
21 |     - 576
22 |     - 624
23 | RETRAIN:
24 |   MLP_RATIO:
25 |     - 3.5
26 |     - 3.5
27 |     - 4.0
28 |     - 3.5
29 |     - 4.0
30 |     - 3.5
31 |     - 3.5
32 |     - 3.0
33 |     - 4.0
34 |     - 4.0
35 |     - 3.0
36 |     - 4.0
37 |     - 3.0
38 |     - 3.5
39 |   NUM_HEADS:
40 |     - 9
41 |     - 9
42 |     - 9
43 |     - 9
44 |     - 9
45 |     - 10
46 |     - 9
47 |     - 9
48 |     - 10
49 |     - 9
50 |     - 10
51 |     - 9
52 |     - 9
53 |     - 10
54 |   DEPTH: 14
55 |   EMBED_DIM: 576
56 | 
57 | 
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/experiments/subnet/AutoFormer-B.yaml:
--------------------------------------------------------------------------------
 1 | SUPERNET:
 2 |   MLP_RATIO: 4.0
 3 |   NUM_HEADS: 10
 4 |   EMBED_DIM: 640
 5 |   DEPTH: 16
 6 | SEARCH_SPACE:
 7 |   MLP_RATIO:
 8 |     - 3.0
 9 |     - 3.5
10 |     - 4.0
11 |   NUM_HEADS:
12 |     - 8
13 |     - 9
14 |     - 10
15 |   DEPTH:
16 |     - 14
17 |     - 15
18 |     - 16
19 |   EMBED_DIM:
20 |     - 528
21 |     - 576
22 |     - 624
23 | RETRAIN:
24 |   MLP_RATIO:
25 |     - 3.5
26 |     - 3.5
27 |     - 4.0
28 |     - 3.5
29 |     - 4.0
30 |     - 3.5
31 |     - 3.5
32 |     - 3.0
33 |     - 4.0
34 |     - 4.0
35 |     - 3.0
36 |     - 4.0
37 |     - 3.0
38 |     - 3.5
39 |   NUM_HEADS:
40 |     - 9
41 |     - 9
42 |     - 9
43 |     - 9
44 |     - 9
45 |     - 10
46 |     - 9
47 |     - 9
48 |     - 10
49 |     - 9
50 |     - 10
51 |     - 9
52 |     - 9
53 |     - 10
54 |   DEPTH: 14
55 |   EMBED_DIM: 576
56 | 
57 | 
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/z_train_supernet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # UTF-8 환경 변수 설정 (UnicodeEncodeError 방지)
 4 | export PYTHONIOENCODING=utf-8
 5 | export LC_ALL=C.UTF-8
 6 | export LANG=C.UTF-8
 7 | 
 8 | python -m torch.distributed.launch --nproc_per_node=8 --use_env z_supernet_train.py --data-path '/data' --gp \
 9 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-S.yaml --epochs 500 --warmup-epochs 20 \
10 | --output /OUTPUT_PATH --batch-size 128 \
11 | --save_checkpoint_path 'checkpoint-z_original_auto_s_prenassmallaug' --save_log_path './log/supernet_z_original_auto_s_prenassmallaug.log' --interval 1
12 | 
13 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env z_evolution.py --data-path '/data' --gp \
14 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-S.yaml --resume '/OUTPUT_PATH/checkpoint-z_original_auto_s_prenassmallaug-25.pth' \
15 | --min-param-limits 5 --param-limits 23 \
16 | --log-file-path './log/search_z_original_auto_s_prenassmallaug_23M.log'
17 | 


--------------------------------------------------------------------------------
/AutoFormer/lib/config.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | import yaml
 3 | 
 4 | cfg = edict()
 5 | 
 6 | 
 7 | def _edict2dict(dest_dict, src_edict):
 8 |     if isinstance(dest_dict, dict) and isinstance(src_edict, dict):
 9 |         for k, v in src_edict.items():
10 |             if not isinstance(v, edict):
11 |                 dest_dict[k] = v
12 |             else:
13 |                 dest_dict[k] = {}
14 |                 _edict2dict(dest_dict[k], v)
15 |     else:
16 |         return
17 | 
18 | def gen_config(config_file):
19 |     cfg_dict = {}
20 |     _edict2dict(cfg_dict, cfg)
21 |     with open(config_file, 'w') as f:
22 |         yaml.dump(cfg_dict, f, default_flow_style=False)
23 | 
24 | 
25 | def _update_config(base_cfg, exp_cfg):
26 |     if isinstance(base_cfg, edict) and isinstance(exp_cfg, edict):
27 |         for k, v in exp_cfg.items():
28 |             base_cfg[k] = v
29 |     else:
30 |         return
31 | 
32 | 
33 | def update_config_from_file(filename):
34 |     exp_config = None
35 |     with open(filename) as f:
36 |         exp_config = edict(yaml.safe_load(f))
37 |         _update_config(cfg, exp_config)
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/AutoFormer_original/lib/config.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | import yaml
 3 | 
 4 | cfg = edict()
 5 | 
 6 | 
 7 | def _edict2dict(dest_dict, src_edict):
 8 |     if isinstance(dest_dict, dict) and isinstance(src_edict, dict):
 9 |         for k, v in src_edict.items():
10 |             if not isinstance(v, edict):
11 |                 dest_dict[k] = v
12 |             else:
13 |                 dest_dict[k] = {}
14 |                 _edict2dict(dest_dict[k], v)
15 |     else:
16 |         return
17 | 
18 | def gen_config(config_file):
19 |     cfg_dict = {}
20 |     _edict2dict(cfg_dict, cfg)
21 |     with open(config_file, 'w') as f:
22 |         yaml.dump(cfg_dict, f, default_flow_style=False)
23 | 
24 | 
25 | def _update_config(base_cfg, exp_cfg):
26 |     if isinstance(base_cfg, edict) and isinstance(exp_cfg, edict):
27 |         for k, v in exp_cfg.items():
28 |             base_cfg[k] = v
29 |     else:
30 |         return
31 | 
32 | 
33 | def update_config_from_file(filename):
34 |     exp_config = None
35 |     with open(filename) as f:
36 |         exp_config = edict(yaml.safe_load(f))
37 |         _update_config(cfg, exp_config)
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/lib/config.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | import yaml
 3 | 
 4 | cfg = edict()
 5 | 
 6 | 
 7 | def _edict2dict(dest_dict, src_edict):
 8 |     if isinstance(dest_dict, dict) and isinstance(src_edict, dict):
 9 |         for k, v in src_edict.items():
10 |             if not isinstance(v, edict):
11 |                 dest_dict[k] = v
12 |             else:
13 |                 dest_dict[k] = {}
14 |                 _edict2dict(dest_dict[k], v)
15 |     else:
16 |         return
17 | 
18 | def gen_config(config_file):
19 |     cfg_dict = {}
20 |     _edict2dict(cfg_dict, cfg)
21 |     with open(config_file, 'w') as f:
22 |         yaml.dump(cfg_dict, f, default_flow_style=False)
23 | 
24 | 
25 | def _update_config(base_cfg, exp_cfg):
26 |     if isinstance(base_cfg, edict) and isinstance(exp_cfg, edict):
27 |         for k, v in exp_cfg.items():
28 |             base_cfg[k] = v
29 |     else:
30 |         return
31 | 
32 | 
33 | def update_config_from_file(filename):
34 |     exp_config = None
35 |     with open(filename) as f:
36 |         exp_config = edict(yaml.safe_load(f))
37 |         _update_config(cfg, exp_config)
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/AutoFormer/training_free/indicators/__init__.py:
--------------------------------------------------------------------------------
 1 | available_indicators = []
 2 | _indicator_impls = {}
 3 | 
 4 | 
 5 | def indicator(name, bn=True, copy_net=True, force_clean=True, **impl_args):
 6 |     def make_impl(func):
 7 |         def indicator_impl(net_orig, device, *args, **kwargs):
 8 |             if copy_net:
 9 |                 net = net_orig.get_copy(bn=bn).to(device)
10 |             else:
11 |                 net = net_orig
12 |             if name =='NASWOT':
13 |                 ret = func(net, device)
14 |             elif name =='te_nas':
15 |                 ret = func(net)
16 |             else:
17 |                 ret = func(net, *args, **kwargs, **impl_args)
18 |             if copy_net and force_clean:
19 |                 import gc
20 |                 import torch
21 |                 del net
22 |                 torch.cuda.empty_cache()
23 |                 gc.collect()
24 |             return ret
25 | 
26 |         global _indicator_impls
27 |         if name in _indicator_impls:
28 |             raise KeyError(f'Duplicated indicator! {name}')
29 |         available_indicators.append(name)
30 |         _indicator_impls[name] = indicator_impl
31 |         return func
32 |     return make_impl
33 | 
34 | 
35 | def calc_indicator(name, net, device, *args, **kwargs):
36 |     return _indicator_impls[name](net, device, *args, **kwargs)
37 | 
38 | 
39 | def load_all():
40 |    # from . import snip
41 |    # from . import grasp
42 |    # from . import NASWOT
43 |    # from . import te_nas
44 |    from . import dss
45 | 
46 | load_all()
47 | 


--------------------------------------------------------------------------------
/AutoFormer_original/model/module/layernorm_super.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class LayerNormSuper(torch.nn.LayerNorm):
 6 |     def __init__(self, super_embed_dim):
 7 |         super().__init__(super_embed_dim)
 8 | 
 9 |         # the largest embed dim
10 |         self.super_embed_dim = super_embed_dim
11 | 
12 |         # the current sampled embed dim
13 |         self.sample_embed_dim = None
14 | 
15 |         self.samples = {}
16 |         self.profiling = False
17 | 
18 |     def profile(self, mode=True):
19 |         self.profiling = mode
20 | 
21 |     def sample_parameters(self, resample=False):
22 |         if self.profiling or resample:
23 |             return self._sample_parameters()
24 |         return self.samples
25 | 
26 |     def _sample_parameters(self):
27 |         self.samples['weight'] = self.weight[:self.sample_embed_dim]
28 |         self.samples['bias'] = self.bias[:self.sample_embed_dim]
29 |         return self.samples
30 | 
31 |     def set_sample_config(self, sample_embed_dim):
32 |         self.sample_embed_dim = sample_embed_dim
33 |         self._sample_parameters()
34 | 
35 |     def forward(self, x):
36 |         self.sample_parameters()
37 |         return F.layer_norm(x, (self.sample_embed_dim,), weight=self.samples['weight'], bias=self.samples['bias'], eps=self.eps)
38 | 
39 |     def calc_sampled_param_num(self):
40 |         assert 'weight' in self.samples.keys()
41 |         assert 'bias' in self.samples.keys()
42 |         return self.samples['weight'].numel() + self.samples['bias'].numel()
43 | 
44 |     def get_complexity(self, sequence_length):
45 |         return sequence_length * self.sample_embed_dim
46 | 


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/model/module/layernorm_super.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class LayerNormSuper(torch.nn.LayerNorm):
 6 |     def __init__(self, super_embed_dim):
 7 |         super().__init__(super_embed_dim)
 8 | 
 9 |         # the largest embed dim
10 |         self.super_embed_dim = super_embed_dim
11 | 
12 |         # the current sampled embed dim
13 |         self.sample_embed_dim = None
14 | 
15 |         self.samples = {}
16 |         self.profiling = False
17 | 
18 |     def profile(self, mode=True):
19 |         self.profiling = mode
20 | 
21 |     def sample_parameters(self, resample=False):
22 |         if self.profiling or resample:
23 |             return self._sample_parameters()
24 |         return self.samples
25 | 
26 |     def _sample_parameters(self):
27 |         self.samples['weight'] = self.weight[:self.sample_embed_dim]
28 |         self.samples['bias'] = self.bias[:self.sample_embed_dim]
29 |         return self.samples
30 | 
31 |     def set_sample_config(self, sample_embed_dim):
32 |         self.sample_embed_dim = sample_embed_dim
33 |         self._sample_parameters()
34 | 
35 |     def forward(self, x):
36 |         self.sample_parameters()
37 |         return F.layer_norm(x, (self.sample_embed_dim,), weight=self.samples['weight'], bias=self.samples['bias'], eps=self.eps)
38 | 
39 |     def calc_sampled_param_num(self):
40 |         assert 'weight' in self.samples.keys()
41 |         assert 'bias' in self.samples.keys()
42 |         return self.samples['weight'].numel() + self.samples['bias'].numel()
43 | 
44 |     def get_complexity(self, sequence_length):
45 |         return sequence_length * self.sample_embed_dim
46 | 


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/config.yaml:
--------------------------------------------------------------------------------
 1 | aa: rand-m9-mstd0.5-inc1
 2 | amp: true
 3 | batch_size: 64
 4 | cfg: ./experiments/supernet/supernet-T.yaml
 5 | change_qkv: true
 6 | clip_grad: null
 7 | color_jitter: 0.4
 8 | cooldown_epochs: 10
 9 | crossover_num: 25
10 | cutmix: 1.0
11 | cutmix_minmax: null
12 | data_path: /data
13 | data_set: IMNET
14 | decay_epochs: 30
15 | decay_rate: 0.1
16 | device: cuda
17 | dist_backend: nccl
18 | dist_eval: true
19 | dist_url: env://
20 | distributed: true
21 | drop: 0.0
22 | drop_block: null
23 | drop_path: 0.1
24 | epochs: 30
25 | eval: false
26 | gp: true
27 | gpu: 0
28 | inat_category: name
29 | input_size: 224
30 | log_file_path: ./log/search_tiny-only-supernet192-minimum_pop1050_10M.log
31 | lr: 0.0005
32 | lr_noise: null
33 | lr_noise_pct: 0.67
34 | lr_noise_std: 1.0
35 | lr_power: 1.0
36 | m_prob: 0.2
37 | max_epochs: 20
38 | max_relative_position: 14
39 | min_lr: 1.0e-05
40 | min_param_limits: 9.0
41 | mixup: 0.8
42 | mixup_mode: batch
43 | mixup_prob: 1.0
44 | mixup_switch_prob: 0.5
45 | model: ''
46 | model_ema: false
47 | model_ema_decay: 0.99996
48 | model_ema_force_cpu: false
49 | momentum: 0.9
50 | mutation_num: 25
51 | no_abs_pos: false
52 | no_prefetcher: false
53 | num_workers: 10
54 | opt: adamw
55 | opt_betas: null
56 | opt_eps: 1.0e-08
57 | output_dir: ''
58 | param_limits: 10.0
59 | patch_size: 16
60 | patience_epochs: 10
61 | pin_mem: true
62 | platform: pai
63 | population_num: 50
64 | post_norm: false
65 | rank: 0
66 | recount: 1
67 | relative_position: true
68 | remode: pixel
69 | repeated_aug: true
70 | reprob: 0.25
71 | resplit: false
72 | resume: /OUTPUT_PATH/checkpoint-tiny-only-supernet-minimum-21.pth
73 | rpe_type: bias
74 | s_prob: 0.4
75 | scale: false
76 | sched: cosine
77 | seed: 0
78 | select_num: 10
79 | smoothing: 0.1
80 | start_epoch: 0
81 | teacher_model: ''
82 | train_interpolation: bicubic
83 | warmup_epochs: 5
84 | warmup_lr: 1.0e-06
85 | weight_decay: 0.05
86 | world_size: 8
87 | 


--------------------------------------------------------------------------------
/AutoFormer/config.yaml:
--------------------------------------------------------------------------------
 1 | aa: rand-m9-mstd0.5-inc1
 2 | amp: true
 3 | batch_size: 64
 4 | cfg: ./experiments/supernet/supernet-T.yaml
 5 | change_qkv: true
 6 | clip_grad: null
 7 | color_jitter: 0.4
 8 | config_list_path: ./greedyTAS/m(2500)_path_epoch100.pkl
 9 | cooldown_epochs: 10
10 | crossover_num: 25
11 | cutmix: 1.0
12 | cutmix_minmax: null
13 | data_path: /data
14 | data_set: IMNET
15 | decay_epochs: 30
16 | decay_rate: 0.1
17 | device: cuda
18 | dist_backend: nccl
19 | dist_eval: true
20 | dist_url: tcp://localhost:2042
21 | distributed: true
22 | drop: 0.0
23 | drop_block: null
24 | drop_path: 0.1
25 | epochs: 30
26 | eval: false
27 | gp: true
28 | gpu: 5
29 | inat_category: name
30 | input_size: 224
31 | log_file_path: ./greedyTAS/m(2500)_path_epoch100-subnet.log
32 | lr: 0.0005
33 | lr_noise: null
34 | lr_noise_pct: 0.67
35 | lr_noise_std: 1.0
36 | lr_power: 1.0
37 | m_prob: 0.2
38 | max_epochs: 20
39 | max_relative_position: 14
40 | min_lr: 1.0e-05
41 | min_param_limits: 1.0
42 | mixup: 0.8
43 | mixup_mode: batch
44 | mixup_prob: 1.0
45 | mixup_switch_prob: 0.5
46 | model: ''
47 | model_ema: false
48 | model_ema_decay: 0.99996
49 | model_ema_force_cpu: false
50 | momentum: 0.9
51 | mutation_num: 25
52 | no_abs_pos: false
53 | no_prefetcher: false
54 | num_workers: 10
55 | opt: adamw
56 | opt_betas: null
57 | opt_eps: 1.0e-08
58 | output_dir: ''
59 | param_limits: 100.0
60 | patch_size: 16
61 | patience_epochs: 10
62 | pin_mem: true
63 | platform: pai
64 | population_num: 200
65 | post_norm: false
66 | rank: 5
67 | recount: 1
68 | relative_position: true
69 | remode: pixel
70 | repeated_aug: true
71 | reprob: 0.25
72 | resplit: false
73 | resume: ./experiments/supernet/checkpoint-25.pth
74 | rpe_type: bias
75 | s_prob: 0.4
76 | scale: false
77 | sched: cosine
78 | seed: 0
79 | select_num: 10
80 | smoothing: 0.1
81 | start_epoch: 0
82 | teacher_model: ''
83 | train_interpolation: bicubic
84 | warmup_epochs: 5
85 | warmup_lr: 1.0e-06
86 | weight_decay: 0.05
87 | world_size: 8
88 | 


--------------------------------------------------------------------------------
/AutoFormer/lib/subImageNet.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import argparse
 4 | random.seed(0)
 5 | parser = argparse.ArgumentParser('Generate SubImageNet', add_help=False)
 6 | parser.add_argument('--data-path', default='../data/imagenet', type=str,
 7 |                     help='dataset path')
 8 | args = parser.parse_args()
 9 | 
10 | data_path = args.data_path
11 | ImageNet_train_path = os.path.join(data_path, 'train')
12 | subImageNet_name = 'subImageNet'
13 | class_idx_txt_path = os.path.join(data_path, subImageNet_name)
14 | 
15 | # train
16 | classes = sorted(os.listdir(ImageNet_train_path))
17 | if not os.path.exists(os.path.join(data_path, subImageNet_name)):
18 |     os.mkdir(os.path.join(data_path, subImageNet_name))
19 | 
20 | subImageNet = dict()
21 | with open(os.path.join(class_idx_txt_path, 'subimages_list.txt'), 'w') as f:
22 |     subImageNet_class = classes
23 |     for iclass in subImageNet_class:
24 |         class_path = os.path.join(ImageNet_train_path, iclass)
25 |         if not os.path.exists(
26 |             os.path.join(
27 |                 data_path,
28 |                 subImageNet_name,
29 |                 iclass)):
30 |             os.mkdir(os.path.join(data_path, subImageNet_name, iclass))
31 |         subImages = random.sample(sorted(os.listdir(class_path)), 100)
32 |         # print("{}\n".format(subImages))
33 |         f.write("{}\n".format(subImages))
34 |         subImageNet[iclass] = subImages
35 |         for image in subImages:
36 |             raw_path = os.path.join(ImageNet_train_path, iclass, image)
37 |             new_ipath = os.path.join(
38 |                 data_path, subImageNet_name, iclass, image)
39 |             os.system('cp {} {}'.format(raw_path, new_ipath))
40 | 
41 | sub_classes = sorted(subImageNet.keys())
42 | with open(os.path.join(class_idx_txt_path, 'info.txt'), 'w') as f:
43 |     class_idx = 0
44 |     for key in sub_classes:
45 |         images = sorted((subImageNet[key]))
46 |         # print(len(images))
47 |         f.write("{}\n".format(key))
48 |         class_idx = class_idx + 1


--------------------------------------------------------------------------------
/AutoFormer_original/lib/subImageNet.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import argparse
 4 | random.seed(0)
 5 | parser = argparse.ArgumentParser('Generate SubImageNet', add_help=False)
 6 | parser.add_argument('--data-path', default='../data/imagenet', type=str,
 7 |                     help='dataset path')
 8 | args = parser.parse_args()
 9 | 
10 | data_path = args.data_path
11 | ImageNet_train_path = os.path.join(data_path, 'train')
12 | subImageNet_name = 'subImageNet'
13 | class_idx_txt_path = os.path.join(data_path, subImageNet_name)
14 | 
15 | # train
16 | classes = sorted(os.listdir(ImageNet_train_path))
17 | if not os.path.exists(os.path.join(data_path, subImageNet_name)):
18 |     os.mkdir(os.path.join(data_path, subImageNet_name))
19 | 
20 | subImageNet = dict()
21 | with open(os.path.join(class_idx_txt_path, 'subimages_list.txt'), 'w') as f:
22 |     subImageNet_class = classes
23 |     for iclass in subImageNet_class:
24 |         class_path = os.path.join(ImageNet_train_path, iclass)
25 |         if not os.path.exists(
26 |             os.path.join(
27 |                 data_path,
28 |                 subImageNet_name,
29 |                 iclass)):
30 |             os.mkdir(os.path.join(data_path, subImageNet_name, iclass))
31 |         subImages = random.sample(sorted(os.listdir(class_path)), 100)
32 |         # print("{}\n".format(subImages))
33 |         f.write("{}\n".format(subImages))
34 |         subImageNet[iclass] = subImages
35 |         for image in subImages:
36 |             raw_path = os.path.join(ImageNet_train_path, iclass, image)
37 |             new_ipath = os.path.join(
38 |                 data_path, subImageNet_name, iclass, image)
39 |             os.system('cp {} {}'.format(raw_path, new_ipath))
40 | 
41 | sub_classes = sorted(subImageNet.keys())
42 | with open(os.path.join(class_idx_txt_path, 'info.txt'), 'w') as f:
43 |     class_idx = 0
44 |     for key in sub_classes:
45 |         images = sorted((subImageNet[key]))
46 |         # print(len(images))
47 |         f.write("{}\n".format(key))
48 |         class_idx = class_idx + 1


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/lib/subImageNet.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import argparse
 4 | random.seed(0)
 5 | parser = argparse.ArgumentParser('Generate SubImageNet', add_help=False)
 6 | parser.add_argument('--data-path', default='../data/imagenet', type=str,
 7 |                     help='dataset path')
 8 | args = parser.parse_args()
 9 | 
10 | data_path = args.data_path
11 | ImageNet_train_path = os.path.join(data_path, 'train')
12 | subImageNet_name = 'subImageNet'
13 | class_idx_txt_path = os.path.join(data_path, subImageNet_name)
14 | 
15 | # train
16 | classes = sorted(os.listdir(ImageNet_train_path))
17 | if not os.path.exists(os.path.join(data_path, subImageNet_name)):
18 |     os.mkdir(os.path.join(data_path, subImageNet_name))
19 | 
20 | subImageNet = dict()
21 | with open(os.path.join(class_idx_txt_path, 'subimages_list.txt'), 'w') as f:
22 |     subImageNet_class = classes
23 |     for iclass in subImageNet_class:
24 |         class_path = os.path.join(ImageNet_train_path, iclass)
25 |         if not os.path.exists(
26 |             os.path.join(
27 |                 data_path,
28 |                 subImageNet_name,
29 |                 iclass)):
30 |             os.mkdir(os.path.join(data_path, subImageNet_name, iclass))
31 |         subImages = random.sample(sorted(os.listdir(class_path)), 100)
32 |         # print("{}\n".format(subImages))
33 |         f.write("{}\n".format(subImages))
34 |         subImageNet[iclass] = subImages
35 |         for image in subImages:
36 |             raw_path = os.path.join(ImageNet_train_path, iclass, image)
37 |             new_ipath = os.path.join(
38 |                 data_path, subImageNet_name, iclass, image)
39 |             os.system('cp {} {}'.format(raw_path, new_ipath))
40 | 
41 | sub_classes = sorted(subImageNet.keys())
42 | with open(os.path.join(class_idx_txt_path, 'info.txt'), 'w') as f:
43 |     class_idx = 0
44 |     for key in sub_classes:
45 |         images = sorted((subImageNet[key]))
46 |         # print(len(images))
47 |         f.write("{}\n".format(key))
48 |         class_idx = class_idx + 1


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/train_supernet_small.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # UTF-8 환경 변수 설정 (UnicodeEncodeError 방지)
 4 | export PYTHONIOENCODING=utf-8
 5 | export LC_ALL=C.UTF-8
 6 | export LANG=C.UTF-8
 7 | 
 8 | # python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train_small_sn.py --data-path '/data' --gp \
 9 | # --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-S.yaml --epochs 500 --warmup-epochs 20 \
10 | # --resume '/OUTPUT_PATH/checkpoint_small_original_450.pth' --output /OUTPUT_PATH --batch-size 128 \
11 | # --save_checkpoint_path 'checkpoint-sn-small-450ep-' --save_log_path './log/supernet_sn_small_450.log' --interval 1
12 | 
13 | # python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train_small_sn.py --data-path '/data' --gp \
14 | # --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-S.yaml --epochs 500 --warmup-epochs 20 \
15 | # --resume '/OUTPUT_PATH/checkpoint-sn-small-450ep-23.pth' --output /OUTPUT_PATH --batch-size 128 \
16 | # --save_checkpoint_path 'checkpoint-sn-small-450ep-ing-' --save_log_path './log/supernet_sn_small_450_ing.log' --interval 1
17 | 
18 | # python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train_small_sn.py --data-path '/data' --gp \
19 | # --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-S.yaml --epochs 500 --warmup-epochs 20 \
20 | # --resume '/OUTPUT_PATH/checkpoint_small_original_450.pth' --output /OUTPUT_PATH --batch-size 128 \
21 | # --save_checkpoint_path 'checkpoint-sn-small-450ep-droppath01-' --save_log_path './log/supernet_sn_small_450_droppath01.log' --interval 1
22 | 
23 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
24 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-S.yaml --resume '/OUTPUT_PATH/checkpoint-sn-small-450ep-droppath01-25.pth' \
25 | --min-param-limits 22 --param-limits 23 \
26 | --log-file-path './log/search_sn-small-450ep-droppath01_6M.log'
27 | 


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/config_prenas.yaml:
--------------------------------------------------------------------------------
 1 | aa: rand-n3-m10-mstd0.5-inc1 #rand-m9-mstd0.5-inc1 #m9-n2-mstd0.5-inc1 (random augment) (autoaug x)
 2 | amp: true
 3 | batch_size: 128
 4 | candfile: ./interval_cands/tiny.json
 5 | cfg: ./experiments/supernet/tiny.yaml
 6 | change_qkv: true
 7 | clip_grad: null
 8 | color_jitter: 0.4
 9 | cooldown_epochs: 10
10 | cutmix: 1.0 #0.0
11 | cutmix_minmax: null
12 | data_path: /data
13 | data_set: IMNET
14 | decay_epochs: 30
15 | decay_rate: 0.1
16 | device: cuda
17 | dist_backend: nccl
18 | dist_eval: true
19 | dist_url: tcp://localhost:2042
20 | distributed: true
21 | drop: 0.0
22 | drop_block: null
23 | drop_path: 0.1
24 | epochs: 500
25 | eval: false
26 | eval_crops: 1
27 | gp: true
28 | gpu: 0
29 | group_by_depth: true
30 | group_by_dim: true
31 | inat_category: name
32 | input_size: 224
33 | lr: 0.0005
34 | lr_noise: null
35 | lr_noise_pct: 0.67
36 | lr_noise_std: 1.0
37 | lr_power: 1.0
38 | max_relative_position: 14
39 | min_lr: 1.0e-07
40 | mixup: 0.8 #0.0
41 | mixup_mode: elem #x
42 | mixup_prob: 1.0 #0.0 ???
43 | mixup_switch_prob: 0.5 #0.0
44 | mode: super
45 | model: ''
46 | model_ema: false
47 | model_ema_decay: 0.99996
48 | model_ema_force_cpu: false
49 | momentum: 0.9
50 | no_abs_pos: false
51 | no_sandwich_base: false
52 | no_sandwich_top: false
53 | num_workers: 10
54 | opt: adamw
55 | opt_betas: null
56 | opt_eps: 1.0e-08
57 | output_dir: ./output/tiny/train/2024_07_23-19_48_51
58 | patch_size: 16
59 | patience_epochs: 10
60 | pin_mem: true
61 | platform: pai
62 | post_norm: false
63 | print2file: false
64 | rank: 0
65 | recount: 2 # 1
66 | relative_position: true
67 | remode: pixel
68 | repeated_aug: true
69 | reprob: 0.25
70 | resplit: false
71 | resume: output/tiny/train/2024_07_18-12_40_28/checkpoint-460.pth
72 | rpe_type: bias
73 | sandwich: 0
74 | scale_attn: false
75 | scale_embed: false
76 | scale_mlp: false
77 | sched: cosine
78 | seed: 0
79 | shuffle: false
80 | smoothing: 0.1
81 | start_epoch: 0
82 | switch_ln: false
83 | task: ''
84 | teacher_model: ''
85 | train_interpolation: bicubic
86 | warmup_epochs: 20
87 | warmup_lr: 1.0e-06
88 | weight_decay: 0.05 #0.02
89 | world_size: 8
90 | 


--------------------------------------------------------------------------------
/AutoFormer/top_k_parser.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import pickle
 3 | 
 4 | # 로그 파일을 읽어서 top_k_paths 부분을 추출하는 함수
 5 | def parse_log_file(log_file_path):
 6 |     # 정규 표현식으로 top_k_paths 추출
 7 |     top_k_pattern = re.compile(r"top_k_paths\s*:\s*(\[\(.*?\)\])")
 8 |     
 9 |     config_list = []
10 |     id = 0
11 | 
12 |     with open(log_file_path, 'r') as log_file:
13 |         for line in log_file:
14 |             match = top_k_pattern.search(line)
15 |             if match:
16 |                 top_k_str = match.group(1)
17 |                 # eval을 사용하여 문자열을 실제 리스트로 변환
18 |                 top_k_paths = eval(top_k_str)
19 | 
20 |                 for item in top_k_paths:
21 |                     loss = item[0]
22 |                     config = item[1]
23 |                     mlp_ratio = config['mlp_ratio']
24 |                     num_heads = config['num_heads']
25 |                     embed_dim = config['embed_dim']
26 |                     layer_num = config['layer_num']
27 | 
28 |                     # 각 item의 정보 (loss, mlp_ratio, num_heads, embed_dim, layer_num) 추가
29 |                     config_list.append({
30 |                         'loss': loss,
31 |                         'mlp_ratio': mlp_ratio,
32 |                         'num_heads': num_heads,
33 |                         'embed_dim': embed_dim,
34 |                         'layer_num': layer_num,
35 |                         'id': id
36 |                     })
37 |                     id += 1
38 | 
39 |     return config_list
40 | 
41 | # 파싱한 config 리스트를 pkl로 저장하는 함수
42 | def save_config_list_to_pkl(config_list, output_pkl_path):
43 |     with open(output_pkl_path, 'wb') as f:
44 |         pickle.dump(config_list, f)
45 |     print(f"Config list saved to {output_pkl_path}")
46 | 
47 | # 실행 부분
48 | if __name__ == "__main__":
49 |     log_file_path = './greedyTAS/greedyTAS-epoch20-test/autoformer-greedyTAS(dss)-20epoch.log'  # 로그 파일 경로
50 |     output_pkl_path = './greedyTAS/greedyTAS-epoch20-test/autoformer-greedyTAS(dss)-20epoch.pkl'  # 저장할 pkl 파일 경로
51 | 
52 |     config_list = parse_log_file(log_file_path)
53 |     save_config_list_to_pkl(config_list, output_pkl_path)
54 |     print(len(config_list))
55 |     print(config_list[100])
56 | 


--------------------------------------------------------------------------------
/AutoFormer_original/model/module/embedding_super.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from model.utils import to_2tuple
 5 | import numpy as np
 6 | 
 7 | class PatchembedSuper(nn.Module):
 8 |     def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, scale=False):
 9 |         super(PatchembedSuper, self).__init__()
10 | 
11 |         img_size = to_2tuple(img_size)
12 |         patch_size = to_2tuple(patch_size)
13 |         num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
14 |         self.img_size = img_size
15 |         self.patch_size = patch_size
16 |         self.num_patches = num_patches
17 |         self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
18 |         self.super_embed_dim = embed_dim
19 |         self.scale = scale
20 | 
21 |     # sampled_
22 |         self.sample_embed_dim = None
23 |         self.sampled_weight = None
24 |         self.sampled_bias = None
25 |         self.sampled_scale = None
26 | 
27 |     def set_sample_config(self, sample_embed_dim):
28 |         self.sample_embed_dim = sample_embed_dim
29 |         self.sampled_weight = self.proj.weight[:sample_embed_dim, ...]
30 |         self.sampled_bias = self.proj.bias[:self.sample_embed_dim, ...]
31 |         if self.scale:
32 |             self.sampled_scale = self.super_embed_dim / sample_embed_dim
33 |     def forward(self, x):
34 |         B, C, H, W = x.shape
35 |         assert H == self.img_size[0] and W == self.img_size[1], \
36 |             f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
37 |         x = F.conv2d(x, self.sampled_weight, self.sampled_bias, stride=self.patch_size, padding=self.proj.padding, dilation=self.proj.dilation).flatten(2).transpose(1,2)
38 |         if self.scale:
39 |             return x * self.sampled_scale
40 |         return x
41 |     def calc_sampled_param_num(self):
42 |         return  self.sampled_weight.numel() + self.sampled_bias.numel()
43 | 
44 |     def get_complexity(self, sequence_length):
45 |         total_flops = 0
46 |         if self.sampled_bias is not None:
47 |              total_flops += self.sampled_bias.size(0)
48 |         total_flops += sequence_length * np.prod(self.sampled_weight.size())
49 |         return total_flops


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/model/module/embedding_super.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from model.utils import to_2tuple
 5 | import numpy as np
 6 | 
 7 | class PatchembedSuper(nn.Module):
 8 |     def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, scale=False):
 9 |         super(PatchembedSuper, self).__init__()
10 | 
11 |         img_size = to_2tuple(img_size)
12 |         patch_size = to_2tuple(patch_size)
13 |         num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
14 |         self.img_size = img_size
15 |         self.patch_size = patch_size
16 |         self.num_patches = num_patches
17 |         self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
18 |         self.super_embed_dim = embed_dim
19 |         self.scale = scale
20 | 
21 |     # sampled_
22 |         self.sample_embed_dim = None
23 |         self.sampled_weight = None
24 |         self.sampled_bias = None
25 |         self.sampled_scale = None
26 | 
27 |     def set_sample_config(self, sample_embed_dim):
28 |         self.sample_embed_dim = sample_embed_dim
29 |         self.sampled_weight = self.proj.weight[:sample_embed_dim, ...]
30 |         self.sampled_bias = self.proj.bias[:self.sample_embed_dim, ...]
31 |         if self.scale:
32 |             self.sampled_scale = self.super_embed_dim / sample_embed_dim
33 |     def forward(self, x):
34 |         B, C, H, W = x.shape
35 |         assert H == self.img_size[0] and W == self.img_size[1], \
36 |             f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
37 |         x = F.conv2d(x, self.sampled_weight, self.sampled_bias, stride=self.patch_size, padding=self.proj.padding, dilation=self.proj.dilation).flatten(2).transpose(1,2)
38 |         if self.scale:
39 |             return x * self.sampled_scale
40 |         return x
41 |     def calc_sampled_param_num(self):
42 |         return  self.sampled_weight.numel() + self.sampled_bias.numel()
43 | 
44 |     def get_complexity(self, sequence_length):
45 |         total_flops = 0
46 |         if self.sampled_bias is not None:
47 |              total_flops += self.sampled_bias.size(0)
48 |         total_flops += sequence_length * np.prod(self.sampled_weight.size())
49 |         return total_flops


--------------------------------------------------------------------------------
/AutoFormer/model/module/layernorm_super.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | def uniform_element_selection(tensor, target_dim):
 6 |     """
 7 |     Uniformly selects elements from the tensor along the specified dimension.
 8 |     
 9 |     Parameters:
10 |     tensor (torch.Tensor): The input tensor.
11 |     target_dim (int): The target dimension size.
12 |     
13 |     Returns:
14 |     torch.Tensor: A tensor with the selected elements.
15 |     """
16 |     original_dim = tensor.size(0)
17 |     indices = torch.linspace(0, original_dim - 1, target_dim).long().to(tensor.device)
18 |     return tensor[indices]
19 | 
20 | class LayerNormSuper(torch.nn.LayerNorm):
21 |     def __init__(self, super_embed_dim):
22 |         super().__init__(super_embed_dim)
23 | 
24 |         # the largest embed dim
25 |         self.super_embed_dim = super_embed_dim
26 | 
27 |         # the current sampled embed dim
28 |         self.sample_embed_dim = None
29 | 
30 |         self.samples = {}
31 |         self.profiling = False
32 | 
33 |     def profile(self, mode=True):
34 |         self.profiling = mode
35 | 
36 |     def sample_parameters(self, resample=False):
37 |         if self.profiling or resample:
38 |             return self._sample_parameters()
39 |         return self.samples
40 | 
41 |     def _sample_parameters(self):
42 |         # self.samples['weight'] = self.weight[:self.sample_embed_dim]
43 |         # self.samples['bias'] = self.bias[:self.sample_embed_dim]
44 |         self.samples['weight'] = uniform_element_selection(self.weight, self.sample_embed_dim)
45 |         self.samples['bias'] = uniform_element_selection(self.bias, self.sample_embed_dim)
46 |         return self.samples
47 | 
48 |     def set_sample_config(self, sample_embed_dim):
49 |         self.sample_embed_dim = sample_embed_dim
50 |         self._sample_parameters()
51 | 
52 |     def forward(self, x):
53 |         self.sample_parameters()
54 |         return F.layer_norm(x, (self.sample_embed_dim,), weight=self.samples['weight'], bias=self.samples['bias'], eps=self.eps)
55 | 
56 |     def calc_sampled_param_num(self):
57 |         assert 'weight' in self.samples.keys()
58 |         assert 'bias' in self.samples.keys()
59 |         return self.samples['weight'].numel() + self.samples['bias'].numel()
60 | 
61 |     def get_complexity(self, sequence_length):
62 |         return sequence_length * self.sample_embed_dim
63 | 


--------------------------------------------------------------------------------
/AutoFormer/lib/samplers.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.distributed as dist
 3 | import math
 4 | 
 5 | 
 6 | class RASampler(torch.utils.data.Sampler):
 7 |     """Sampler that restricts data loading to a subset of the dataset for distributed,
 8 |     with repeated augmentation.
 9 |     It ensures that different each augmented version of a sample will be visible to a
10 |     different process (GPU)
11 |     Heavily based on torch.utils.data.DistributedSampler
12 |     """
13 | 
14 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
15 |         if num_replicas is None:
16 |             if not dist.is_available():
17 |                 raise RuntimeError("Requires distributed package to be available")
18 |             num_replicas = dist.get_world_size()
19 |         if rank is None:
20 |             if not dist.is_available():
21 |                 raise RuntimeError("Requires distributed package to be available")
22 |             rank = dist.get_rank()
23 |         self.dataset = dataset
24 |         self.num_replicas = num_replicas
25 |         self.rank = rank
26 |         self.epoch = 0
27 |         self.num_samples = int(math.ceil(len(self.dataset) * 3.0 / self.num_replicas))
28 |         self.total_size = self.num_samples * self.num_replicas
29 |         # self.num_selected_samples = int(math.ceil(len(self.dataset) / self.num_replicas))
30 |         self.num_selected_samples = int(math.floor(len(self.dataset) // 256 * 256 / self.num_replicas))
31 |         self.shuffle = shuffle
32 | 
33 |     def __iter__(self):
34 |         # deterministically shuffle based on epoch
35 |         g = torch.Generator()
36 |         g.manual_seed(self.epoch)
37 |         if self.shuffle:
38 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
39 |         else:
40 |             indices = list(range(len(self.dataset)))
41 | 
42 |         # add extra samples to make it evenly divisible
43 |         indices = [ele for ele in indices for i in range(3)]
44 |         indices += indices[:(self.total_size - len(indices))]
45 |         assert len(indices) == self.total_size
46 | 
47 |         # subsample
48 |         indices = indices[self.rank:self.total_size:self.num_replicas]
49 | 
50 |         assert len(indices) == self.num_samples
51 |         return iter(indices[:self.num_selected_samples])
52 | 
53 |     def __len__(self):
54 |         return self.num_selected_samples
55 | 
56 |     def set_epoch(self, epoch):
57 |         self.epoch = epoch
58 | 


--------------------------------------------------------------------------------
/AutoFormer_original/lib/samplers.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.distributed as dist
 3 | import math
 4 | 
 5 | 
 6 | class RASampler(torch.utils.data.Sampler):
 7 |     """Sampler that restricts data loading to a subset of the dataset for distributed,
 8 |     with repeated augmentation.
 9 |     It ensures that different each augmented version of a sample will be visible to a
10 |     different process (GPU)
11 |     Heavily based on torch.utils.data.DistributedSampler
12 |     """
13 | 
14 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
15 |         if num_replicas is None:
16 |             if not dist.is_available():
17 |                 raise RuntimeError("Requires distributed package to be available")
18 |             num_replicas = dist.get_world_size()
19 |         if rank is None:
20 |             if not dist.is_available():
21 |                 raise RuntimeError("Requires distributed package to be available")
22 |             rank = dist.get_rank()
23 |         self.dataset = dataset
24 |         self.num_replicas = num_replicas
25 |         self.rank = rank
26 |         self.epoch = 0
27 |         self.num_samples = int(math.ceil(len(self.dataset) * 3.0 / self.num_replicas))
28 |         self.total_size = self.num_samples * self.num_replicas
29 |         # self.num_selected_samples = int(math.ceil(len(self.dataset) / self.num_replicas))
30 |         self.num_selected_samples = int(math.floor(len(self.dataset) // 256 * 256 / self.num_replicas))
31 |         self.shuffle = shuffle
32 | 
33 |     def __iter__(self):
34 |         # deterministically shuffle based on epoch
35 |         g = torch.Generator()
36 |         g.manual_seed(self.epoch)
37 |         if self.shuffle:
38 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
39 |         else:
40 |             indices = list(range(len(self.dataset)))
41 | 
42 |         # add extra samples to make it evenly divisible
43 |         indices = [ele for ele in indices for i in range(3)]
44 |         indices += indices[:(self.total_size - len(indices))]
45 |         assert len(indices) == self.total_size
46 | 
47 |         # subsample
48 |         indices = indices[self.rank:self.total_size:self.num_replicas]
49 | 
50 |         assert len(indices) == self.num_samples
51 |         return iter(indices[:self.num_selected_samples])
52 | 
53 |     def __len__(self):
54 |         return self.num_selected_samples
55 | 
56 |     def set_epoch(self, epoch):
57 |         self.epoch = epoch
58 | 


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/lib/samplers.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.distributed as dist
 3 | import math
 4 | 
 5 | 
 6 | class RASampler(torch.utils.data.Sampler):
 7 |     """Sampler that restricts data loading to a subset of the dataset for distributed,
 8 |     with repeated augmentation.
 9 |     It ensures that different each augmented version of a sample will be visible to a
10 |     different process (GPU)
11 |     Heavily based on torch.utils.data.DistributedSampler
12 |     """
13 | 
14 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
15 |         if num_replicas is None:
16 |             if not dist.is_available():
17 |                 raise RuntimeError("Requires distributed package to be available")
18 |             num_replicas = dist.get_world_size()
19 |         if rank is None:
20 |             if not dist.is_available():
21 |                 raise RuntimeError("Requires distributed package to be available")
22 |             rank = dist.get_rank()
23 |         self.dataset = dataset
24 |         self.num_replicas = num_replicas
25 |         self.rank = rank
26 |         self.epoch = 0
27 |         self.num_samples = int(math.ceil(len(self.dataset) * 3.0 / self.num_replicas))
28 |         self.total_size = self.num_samples * self.num_replicas
29 |         # self.num_selected_samples = int(math.ceil(len(self.dataset) / self.num_replicas))
30 |         self.num_selected_samples = int(math.floor(len(self.dataset) // 256 * 256 / self.num_replicas))
31 |         self.shuffle = shuffle
32 | 
33 |     def __iter__(self):
34 |         # deterministically shuffle based on epoch
35 |         g = torch.Generator()
36 |         g.manual_seed(self.epoch)
37 |         if self.shuffle:
38 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
39 |         else:
40 |             indices = list(range(len(self.dataset)))
41 | 
42 |         # add extra samples to make it evenly divisible
43 |         indices = [ele for ele in indices for i in range(3)]
44 |         indices += indices[:(self.total_size - len(indices))]
45 |         assert len(indices) == self.total_size
46 | 
47 |         # subsample
48 |         indices = indices[self.rank:self.total_size:self.num_replicas]
49 | 
50 |         assert len(indices) == self.num_samples
51 |         return iter(indices[:self.num_selected_samples])
52 | 
53 |     def __len__(self):
54 |         return self.num_selected_samples
55 | 
56 |     def set_epoch(self, epoch):
57 |         self.epoch = epoch
58 | 


--------------------------------------------------------------------------------
/AutoFormer/model/module/embedding_super.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from model.utils import to_2tuple
 5 | import numpy as np
 6 | 
 7 | def uniform_element_selection(tensor, target_dim):
 8 |     """
 9 |     Uniformly selects elements from the tensor along the specified dimension.
10 |     
11 |     Parameters:
12 |     tensor (torch.Tensor): The input tensor.
13 |     target_dim (int): The target dimension size.
14 |     
15 |     Returns:
16 |     torch.Tensor: A tensor with the selected elements.
17 |     """
18 |     original_dim = tensor.size(0)
19 |     indices = torch.linspace(0, original_dim - 1, target_dim).long().to(tensor.device)
20 |     return tensor[indices]
21 | 
22 | class PatchembedSuper(nn.Module):
23 |     def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, scale=False):
24 |         super(PatchembedSuper, self).__init__()
25 | 
26 |         img_size = to_2tuple(img_size)
27 |         patch_size = to_2tuple(patch_size)
28 |         num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
29 |         self.img_size = img_size
30 |         self.patch_size = patch_size
31 |         self.num_patches = num_patches
32 |         self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
33 |         self.super_embed_dim = embed_dim
34 |         self.scale = scale
35 | 
36 |     # sampled_
37 |         self.sample_embed_dim = None
38 |         self.sampled_weight = None
39 |         self.sampled_bias = None
40 |         self.sampled_scale = None
41 | 
42 |     def set_sample_config(self, sample_embed_dim):
43 |         self.sample_embed_dim = sample_embed_dim
44 |         # self.sampled_weight = self.proj.weight[:sample_embed_dim, ...]
45 |         # self.sampled_bias = self.proj.bias[:self.sample_embed_dim, ...]
46 |         self.sampled_weight = uniform_element_selection(self.proj.weight, sample_embed_dim)
47 |         self.sampled_bias = uniform_element_selection(self.proj.bias, sample_embed_dim)
48 |         if self.scale:
49 |             self.sampled_scale = self.super_embed_dim / sample_embed_dim
50 |             
51 |     def forward(self, x):
52 |         B, C, H, W = x.shape
53 |         assert H == self.img_size[0] and W == self.img_size[1], \
54 |             f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
55 |         x = F.conv2d(x, self.sampled_weight, self.sampled_bias, stride=self.patch_size, padding=self.proj.padding, dilation=self.proj.dilation).flatten(2).transpose(1,2)
56 |         if self.scale:
57 |             return x * self.sampled_scale
58 |         return x
59 |     def calc_sampled_param_num(self):
60 |         return  self.sampled_weight.numel() + self.sampled_bias.numel()
61 | 
62 |     def get_complexity(self, sequence_length):
63 |         total_flops = 0
64 |         if self.sampled_bias is not None:
65 |              total_flops += self.sampled_bias.size(0)
66 |         total_flops += sequence_length * np.prod(self.sampled_weight.size())
67 |         return total_flops


--------------------------------------------------------------------------------
/AutoFormer/lib/imagenet_withhold.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from PIL import Image
 3 | import io
 4 | 
 5 | import torch
 6 | from torch.utils.data import Dataset
 7 | import torchvision.transforms as transforms
 8 | 
 9 | 
10 | class ImageNet_Withhold(Dataset):
11 |     def __init__(self, data_root, ann_file='', transform=None, train=True, task ='train'):
12 |         super(ImageNet_Withhold, self).__init__()
13 |         ann_file = ann_file + '/' + 'val_true.txt'
14 |         train_split  = (task == 'train' or  task == 'val')
15 |         self.data_root = data_root + '/'+ ('train' if train_split else 'val')
16 | 
17 |         self.data = []
18 |         self.nb_classes = 0
19 |         folders = {}
20 |         cnt = 0
21 |         self.z = ZipReader()
22 |         # if train:
23 |         #     for member in self.tarfile.getmembers():
24 |         #         print(member)
25 |         # self.tarfile = tarfile.open(self.data_root)
26 | 
27 |         f = open(ann_file)
28 |         prefix =  'data/sdb/imagenet'+'/'+ ('train' if train_split else 'val') + '/'
29 |         for line in f:
30 |             tmp = line.strip().split('\t')[0]
31 |             class_pic = tmp.split('/')
32 |             class_tmp = class_pic[0]
33 |             pic = class_pic[1]
34 | 
35 |             if class_tmp in folders:
36 |                 # print(self.tarfile.getmember(('train/' if train else 'val/') + tmp[0] + '.JPEG'))
37 |                 self.data.append((class_tmp + '.zip', prefix + tmp + '.JPEG', folders[class_tmp]))
38 |             else:
39 |                 folders[class_tmp] = cnt
40 |                 cnt += 1
41 |                 self.data.append((class_tmp + '.zip', prefix + tmp + '.JPEG',folders[class_tmp]))
42 | 
43 |         normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
44 |                                          std=[0.229, 0.224, 0.225])
45 |         if transform is not None:
46 |             self.transforms = transform
47 |         else:
48 |             if train:
49 |                 self.transforms = transforms.Compose([
50 |                     transforms.RandomSizedCrop(224),
51 |                     transforms.RandomHorizontalFlip(),
52 |                     transforms.ToTensor(),
53 |                     normalize,
54 |                 ])
55 |             else:
56 |                 self.transforms = transforms.Compose([
57 |                     transforms.Scale(256),
58 |                     transforms.CenterCrop(224),
59 |                     transforms.ToTensor(),
60 |                     normalize,
61 |                 ])
62 | 
63 | 
64 |         self.nb_classes = cnt
65 |     def __len__(self):
66 |         return len(self.data)
67 | 
68 |     def __getitem__(self, idx):
69 | 
70 |         # print('extract_file', time.time()-start_time)
71 |         iob = self.z.read(self.data_root + '/' + self.data[idx][0], self.data[idx][1])
72 |         iob = io.BytesIO(iob)
73 |         img = Image.open(iob).convert('RGB')
74 |         target = self.data[idx][2]
75 |         if self.transforms is not None:
76 |             img = self.transforms(img)
77 |         # print('open', time.time()-start_time)
78 |         return img, target
79 | 


--------------------------------------------------------------------------------
/AutoFormer_original/lib/imagenet_withhold.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from PIL import Image
 3 | import io
 4 | 
 5 | import torch
 6 | from torch.utils.data import Dataset
 7 | import torchvision.transforms as transforms
 8 | 
 9 | 
10 | class ImageNet_Withhold(Dataset):
11 |     def __init__(self, data_root, ann_file='', transform=None, train=True, task ='train'):
12 |         super(ImageNet_Withhold, self).__init__()
13 |         ann_file = ann_file + '/' + 'val_true.txt'
14 |         train_split  = (task == 'train' or  task == 'val')
15 |         self.data_root = data_root + '/'+ ('train' if train_split else 'val')
16 | 
17 |         self.data = []
18 |         self.nb_classes = 0
19 |         folders = {}
20 |         cnt = 0
21 |         self.z = ZipReader()
22 |         # if train:
23 |         #     for member in self.tarfile.getmembers():
24 |         #         print(member)
25 |         # self.tarfile = tarfile.open(self.data_root)
26 | 
27 |         f = open(ann_file)
28 |         prefix =  'data/sdb/imagenet'+'/'+ ('train' if train_split else 'val') + '/'
29 |         for line in f:
30 |             tmp = line.strip().split('\t')[0]
31 |             class_pic = tmp.split('/')
32 |             class_tmp = class_pic[0]
33 |             pic = class_pic[1]
34 | 
35 |             if class_tmp in folders:
36 |                 # print(self.tarfile.getmember(('train/' if train else 'val/') + tmp[0] + '.JPEG'))
37 |                 self.data.append((class_tmp + '.zip', prefix + tmp + '.JPEG', folders[class_tmp]))
38 |             else:
39 |                 folders[class_tmp] = cnt
40 |                 cnt += 1
41 |                 self.data.append((class_tmp + '.zip', prefix + tmp + '.JPEG',folders[class_tmp]))
42 | 
43 |         normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
44 |                                          std=[0.229, 0.224, 0.225])
45 |         if transform is not None:
46 |             self.transforms = transform
47 |         else:
48 |             if train:
49 |                 self.transforms = transforms.Compose([
50 |                     transforms.RandomSizedCrop(224),
51 |                     transforms.RandomHorizontalFlip(),
52 |                     transforms.ToTensor(),
53 |                     normalize,
54 |                 ])
55 |             else:
56 |                 self.transforms = transforms.Compose([
57 |                     transforms.Scale(256),
58 |                     transforms.CenterCrop(224),
59 |                     transforms.ToTensor(),
60 |                     normalize,
61 |                 ])
62 | 
63 | 
64 |         self.nb_classes = cnt
65 |     def __len__(self):
66 |         return len(self.data)
67 | 
68 |     def __getitem__(self, idx):
69 | 
70 |         # print('extract_file', time.time()-start_time)
71 |         iob = self.z.read(self.data_root + '/' + self.data[idx][0], self.data[idx][1])
72 |         iob = io.BytesIO(iob)
73 |         img = Image.open(iob).convert('RGB')
74 |         target = self.data[idx][2]
75 |         if self.transforms is not None:
76 |             img = self.transforms(img)
77 |         # print('open', time.time()-start_time)
78 |         return img, target
79 | 


--------------------------------------------------------------------------------
/AutoFormer_original/model/module/Linear_super.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import numpy as np
 5 | 
 6 | class LinearSuper(nn.Linear):
 7 |     def __init__(self, super_in_dim, super_out_dim, bias=True, uniform_=None, non_linear='linear', scale=False):
 8 |         super().__init__(super_in_dim, super_out_dim, bias=bias)
 9 | 
10 |         # super_in_dim and super_out_dim indicate the largest network!
11 |         self.super_in_dim = super_in_dim
12 |         self.super_out_dim = super_out_dim
13 | 
14 |         # input_dim and output_dim indicate the current sampled size
15 |         self.sample_in_dim = None
16 |         self.sample_out_dim = None
17 | 
18 |         self.samples = {}
19 | 
20 |         self.scale = scale
21 |         self._reset_parameters(bias, uniform_, non_linear)
22 |         self.profiling = False
23 | 
24 |     def profile(self, mode=True):
25 |         self.profiling = mode
26 | 
27 |     def sample_parameters(self, resample=False):
28 |         if self.profiling or resample:
29 |             return self._sample_parameters()
30 |         return self.samples
31 | 
32 |     def _reset_parameters(self, bias, uniform_, non_linear):
33 |         nn.init.xavier_uniform_(self.weight) if uniform_ is None else uniform_(
34 |             self.weight, non_linear=non_linear)
35 |         if bias:
36 |             nn.init.constant_(self.bias, 0.)
37 | 
38 |     def set_sample_config(self, sample_in_dim, sample_out_dim):
39 |         self.sample_in_dim = sample_in_dim
40 |         self.sample_out_dim = sample_out_dim
41 | 
42 |         self._sample_parameters()
43 | 
44 |     def _sample_parameters(self):
45 |         self.samples['weight'] = sample_weight(self.weight, self.sample_in_dim, self.sample_out_dim)
46 |         self.samples['bias'] = self.bias
47 |         self.sample_scale = self.super_out_dim/self.sample_out_dim
48 |         if self.bias is not None:
49 |             self.samples['bias'] = sample_bias(self.bias, self.sample_out_dim)
50 |         return self.samples
51 | 
52 |     def forward(self, x):
53 |         self.sample_parameters()
54 |         return F.linear(x, self.samples['weight'], self.samples['bias']) * (self.sample_scale if self.scale else 1)
55 | 
56 |     def calc_sampled_param_num(self):
57 |         assert 'weight' in self.samples.keys()
58 |         weight_numel = self.samples['weight'].numel()
59 | 
60 |         if self.samples['bias'] is not None:
61 |             bias_numel = self.samples['bias'].numel()
62 |         else:
63 |             bias_numel = 0
64 | 
65 |         return weight_numel + bias_numel
66 |     def get_complexity(self, sequence_length):
67 |         total_flops = 0
68 |         total_flops += sequence_length *  np.prod(self.samples['weight'].size())
69 |         return total_flops
70 | 
71 | def sample_weight(weight, sample_in_dim, sample_out_dim):
72 |     sample_weight = weight[:, :sample_in_dim]
73 |     sample_weight = sample_weight[:sample_out_dim, :]
74 | 
75 |     return sample_weight
76 | 
77 | 
78 | def sample_bias(bias, sample_out_dim):
79 |     sample_bias = bias[:sample_out_dim]
80 | 
81 |     return sample_bias
82 | 


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/lib/imagenet_withhold.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from PIL import Image
 3 | import io
 4 | 
 5 | import torch
 6 | from torch.utils.data import Dataset
 7 | import torchvision.transforms as transforms
 8 | 
 9 | 
10 | class ImageNet_Withhold(Dataset):
11 |     def __init__(self, data_root, ann_file='', transform=None, train=True, task ='train'):
12 |         super(ImageNet_Withhold, self).__init__()
13 |         ann_file = ann_file + '/' + 'val_true.txt'
14 |         train_split  = (task == 'train' or  task == 'val')
15 |         self.data_root = data_root + '/'+ ('train' if train_split else 'val')
16 | 
17 |         self.data = []
18 |         self.nb_classes = 0
19 |         folders = {}
20 |         cnt = 0
21 |         self.z = ZipReader()
22 |         # if train:
23 |         #     for member in self.tarfile.getmembers():
24 |         #         print(member)
25 |         # self.tarfile = tarfile.open(self.data_root)
26 | 
27 |         f = open(ann_file)
28 |         prefix =  'data/sdb/imagenet'+'/'+ ('train' if train_split else 'val') + '/'
29 |         for line in f:
30 |             tmp = line.strip().split('\t')[0]
31 |             class_pic = tmp.split('/')
32 |             class_tmp = class_pic[0]
33 |             pic = class_pic[1]
34 | 
35 |             if class_tmp in folders:
36 |                 # print(self.tarfile.getmember(('train/' if train else 'val/') + tmp[0] + '.JPEG'))
37 |                 self.data.append((class_tmp + '.zip', prefix + tmp + '.JPEG', folders[class_tmp]))
38 |             else:
39 |                 folders[class_tmp] = cnt
40 |                 cnt += 1
41 |                 self.data.append((class_tmp + '.zip', prefix + tmp + '.JPEG',folders[class_tmp]))
42 | 
43 |         normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
44 |                                          std=[0.229, 0.224, 0.225])
45 |         if transform is not None:
46 |             self.transforms = transform
47 |         else:
48 |             if train:
49 |                 self.transforms = transforms.Compose([
50 |                     transforms.RandomSizedCrop(224),
51 |                     transforms.RandomHorizontalFlip(),
52 |                     transforms.ToTensor(),
53 |                     normalize,
54 |                 ])
55 |             else:
56 |                 self.transforms = transforms.Compose([
57 |                     transforms.Scale(256),
58 |                     transforms.CenterCrop(224),
59 |                     transforms.ToTensor(),
60 |                     normalize,
61 |                 ])
62 | 
63 | 
64 |         self.nb_classes = cnt
65 |     def __len__(self):
66 |         return len(self.data)
67 | 
68 |     def __getitem__(self, idx):
69 | 
70 |         # print('extract_file', time.time()-start_time)
71 |         iob = self.z.read(self.data_root + '/' + self.data[idx][0], self.data[idx][1])
72 |         iob = io.BytesIO(iob)
73 |         img = Image.open(iob).convert('RGB')
74 |         target = self.data[idx][2]
75 |         if self.transforms is not None:
76 |             img = self.transforms(img)
77 |         # print('open', time.time()-start_time)
78 |         return img, target
79 | 


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/model/module/Linear_super.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import numpy as np
 5 | 
 6 | class LinearSuper(nn.Linear):
 7 |     def __init__(self, super_in_dim, super_out_dim, bias=True, uniform_=None, non_linear='linear', scale=False):
 8 |         super().__init__(super_in_dim, super_out_dim, bias=bias)
 9 | 
10 |         # super_in_dim and super_out_dim indicate the largest network!
11 |         self.super_in_dim = super_in_dim
12 |         self.super_out_dim = super_out_dim
13 | 
14 |         # input_dim and output_dim indicate the current sampled size
15 |         self.sample_in_dim = None
16 |         self.sample_out_dim = None
17 | 
18 |         self.samples = {}
19 | 
20 |         self.scale = scale
21 |         self._reset_parameters(bias, uniform_, non_linear)
22 |         self.profiling = False
23 | 
24 |     def profile(self, mode=True):
25 |         self.profiling = mode
26 | 
27 |     def sample_parameters(self, resample=False):
28 |         if self.profiling or resample:
29 |             return self._sample_parameters()
30 |         return self.samples
31 | 
32 |     def _reset_parameters(self, bias, uniform_, non_linear):
33 |         nn.init.xavier_uniform_(self.weight) if uniform_ is None else uniform_(
34 |             self.weight, non_linear=non_linear)
35 |         if bias:
36 |             nn.init.constant_(self.bias, 0.)
37 | 
38 |     def set_sample_config(self, sample_in_dim, sample_out_dim):
39 |         self.sample_in_dim = sample_in_dim
40 |         self.sample_out_dim = sample_out_dim
41 | 
42 |         self._sample_parameters()
43 | 
44 |     def _sample_parameters(self):
45 |         self.samples['weight'] = sample_weight(self.weight, self.sample_in_dim, self.sample_out_dim)
46 |         self.samples['bias'] = self.bias
47 |         self.sample_scale = self.super_out_dim/self.sample_out_dim
48 |         if self.bias is not None:
49 |             self.samples['bias'] = sample_bias(self.bias, self.sample_out_dim)
50 |         return self.samples
51 | 
52 |     def forward(self, x):
53 |         self.sample_parameters()
54 |         return F.linear(x, self.samples['weight'], self.samples['bias']) * (self.sample_scale if self.scale else 1)
55 | 
56 |     def calc_sampled_param_num(self):
57 |         assert 'weight' in self.samples.keys()
58 |         weight_numel = self.samples['weight'].numel()
59 | 
60 |         if self.samples['bias'] is not None:
61 |             bias_numel = self.samples['bias'].numel()
62 |         else:
63 |             bias_numel = 0
64 | 
65 |         return weight_numel + bias_numel
66 |     def get_complexity(self, sequence_length):
67 |         total_flops = 0
68 |         total_flops += sequence_length *  np.prod(self.samples['weight'].size())
69 |         return total_flops
70 | 
71 | def sample_weight(weight, sample_in_dim, sample_out_dim):
72 |     sample_weight = weight[:, :sample_in_dim]
73 |     sample_weight = sample_weight[:sample_out_dim, :]
74 | 
75 |     return sample_weight
76 | 
77 | 
78 | def sample_bias(bias, sample_out_dim):
79 |     sample_bias = bias[:sample_out_dim]
80 | 
81 |     return sample_bias
82 | 


--------------------------------------------------------------------------------
/AutoFormer_original/model/module/qkv_super.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import numpy as np
 5 | 
 6 | 
 7 | class qkv_super(nn.Linear):
 8 |     def __init__(self, super_in_dim, super_out_dim, bias=True, uniform_=None, non_linear='linear', scale=False):
 9 |         super().__init__(super_in_dim, super_out_dim, bias=bias)
10 | 
11 |         # super_in_dim and super_out_dim indicate the largest network!
12 |         self.super_in_dim = super_in_dim
13 |         self.super_out_dim = super_out_dim
14 | 
15 |         # input_dim and output_dim indicate the current sampled size
16 |         self.sample_in_dim = None
17 |         self.sample_out_dim = None
18 | 
19 |         self.samples = {}
20 | 
21 |         self.scale = scale
22 |         # self._reset_parameters(bias, uniform_, non_linear)
23 |         self.profiling = False
24 | 
25 |     def profile(self, mode=True):
26 |         self.profiling = mode
27 | 
28 |     def sample_parameters(self, resample=False):
29 |         if self.profiling or resample:
30 |             return self._sample_parameters()
31 |         return self.samples
32 | 
33 |     def _reset_parameters(self, bias, uniform_, non_linear):
34 |         nn.init.xavier_uniform_(self.weight) if uniform_ is None else uniform_(
35 |             self.weight, non_linear=non_linear)
36 |         if bias:
37 |             nn.init.constant_(self.bias, 0.)
38 | 
39 |     def set_sample_config(self, sample_in_dim, sample_out_dim):
40 |         self.sample_in_dim = sample_in_dim
41 |         self.sample_out_dim = sample_out_dim
42 | 
43 |         self._sample_parameters()
44 | 
45 |     def _sample_parameters(self):
46 |         self.samples['weight'] = sample_weight(self.weight, self.sample_in_dim, self.sample_out_dim)
47 |         self.samples['bias'] = self.bias
48 |         self.sample_scale = self.super_out_dim/self.sample_out_dim
49 |         if self.bias is not None:
50 |             self.samples['bias'] = sample_bias(self.bias, self.sample_out_dim)
51 |         return self.samples
52 | 
53 |     def forward(self, x):
54 |         self.sample_parameters()
55 |         return F.linear(x, self.samples['weight'], self.samples['bias']) * (self.sample_scale if self.scale else 1)
56 | 
57 |     def calc_sampled_param_num(self):
58 |         assert 'weight' in self.samples.keys()
59 |         weight_numel = self.samples['weight'].numel()
60 | 
61 |         if self.samples['bias'] is not None:
62 |             bias_numel = self.samples['bias'].numel()
63 |         else:
64 |             bias_numel = 0
65 | 
66 |         return weight_numel + bias_numel
67 |     def get_complexity(self, sequence_length):
68 |         total_flops = 0
69 |         total_flops += sequence_length *  np.prod(self.samples['weight'].size())
70 |         return total_flops
71 | 
72 | def sample_weight(weight, sample_in_dim, sample_out_dim):
73 | 
74 |     sample_weight = weight[:, :sample_in_dim]
75 |     sample_weight = torch.cat([sample_weight[i:sample_out_dim:3, :] for i in range(3)], dim =0)
76 | 
77 |     return sample_weight
78 | 
79 | 
80 | def sample_bias(bias, sample_out_dim):
81 |     sample_bias = bias[:sample_out_dim]
82 | 
83 |     return sample_bias
84 | 


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/model/module/qkv_super.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import numpy as np
 5 | 
 6 | 
 7 | class qkv_super(nn.Linear):
 8 |     def __init__(self, super_in_dim, super_out_dim, bias=True, uniform_=None, non_linear='linear', scale=False):
 9 |         super().__init__(super_in_dim, super_out_dim, bias=bias)
10 | 
11 |         # super_in_dim and super_out_dim indicate the largest network!
12 |         self.super_in_dim = super_in_dim
13 |         self.super_out_dim = super_out_dim
14 | 
15 |         # input_dim and output_dim indicate the current sampled size
16 |         self.sample_in_dim = None
17 |         self.sample_out_dim = None
18 | 
19 |         self.samples = {}
20 | 
21 |         self.scale = scale
22 |         # self._reset_parameters(bias, uniform_, non_linear)
23 |         self.profiling = False
24 | 
25 |     def profile(self, mode=True):
26 |         self.profiling = mode
27 | 
28 |     def sample_parameters(self, resample=False):
29 |         if self.profiling or resample:
30 |             return self._sample_parameters()
31 |         return self.samples
32 | 
33 |     def _reset_parameters(self, bias, uniform_, non_linear):
34 |         nn.init.xavier_uniform_(self.weight) if uniform_ is None else uniform_(
35 |             self.weight, non_linear=non_linear)
36 |         if bias:
37 |             nn.init.constant_(self.bias, 0.)
38 | 
39 |     def set_sample_config(self, sample_in_dim, sample_out_dim):
40 |         self.sample_in_dim = sample_in_dim
41 |         self.sample_out_dim = sample_out_dim
42 | 
43 |         self._sample_parameters()
44 | 
45 |     def _sample_parameters(self):
46 |         self.samples['weight'] = sample_weight(self.weight, self.sample_in_dim, self.sample_out_dim)
47 |         self.samples['bias'] = self.bias
48 |         self.sample_scale = self.super_out_dim/self.sample_out_dim
49 |         if self.bias is not None:
50 |             self.samples['bias'] = sample_bias(self.bias, self.sample_out_dim)
51 |         return self.samples
52 | 
53 |     def forward(self, x):
54 |         self.sample_parameters()
55 |         return F.linear(x, self.samples['weight'], self.samples['bias']) * (self.sample_scale if self.scale else 1)
56 | 
57 |     def calc_sampled_param_num(self):
58 |         assert 'weight' in self.samples.keys()
59 |         weight_numel = self.samples['weight'].numel()
60 | 
61 |         if self.samples['bias'] is not None:
62 |             bias_numel = self.samples['bias'].numel()
63 |         else:
64 |             bias_numel = 0
65 | 
66 |         return weight_numel + bias_numel
67 |     def get_complexity(self, sequence_length):
68 |         total_flops = 0
69 |         total_flops += sequence_length *  np.prod(self.samples['weight'].size())
70 |         return total_flops
71 | 
72 | def sample_weight(weight, sample_in_dim, sample_out_dim):
73 | 
74 |     sample_weight = weight[:, :sample_in_dim]
75 |     sample_weight = torch.cat([sample_weight[i:sample_out_dim:3, :] for i in range(3)], dim =0)
76 | 
77 |     return sample_weight
78 | 
79 | 
80 | def sample_bias(bias, sample_out_dim):
81 |     sample_bias = bias[:sample_out_dim]
82 | 
83 |     return sample_bias
84 | 


--------------------------------------------------------------------------------
/AutoFormer/train_supernet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path '/data' --gp \
 4 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --epochs 500 --warmup-epochs 20 \
 5 | --log-file-path './greedyTAS/greedyTAS-epoch100-top-k(full).log' \
 6 | --resume './greedyTAS/checkpoint-4.pth' --output /OUTPUT_PATH --batch-size 128 
 7 | 
 8 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path '/data' --gp \
 9 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --epochs 500 --warmup-epochs 20 \
10 | --log-file-path './greedyTAS/greedyTAS-epoch200-top-k(full).log' \
11 | --resume './greedyTAS/checkpoint-9.pth' --output /OUTPUT_PATH --batch-size 128 
12 | 
13 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path '/data' --gp \
14 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --epochs 500 --warmup-epochs 20 \
15 | --log-file-path './greedyTAS/greedyTAS-epoch300-top-k(full).log' \
16 | --resume './greedyTAS/checkpoint-14.pth' --output /OUTPUT_PATH --batch-size 128 
17 | 
18 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path '/data' --gp \
19 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --epochs 500 --warmup-epochs 20 \
20 | --log-file-path './greedyTAS/greedyTAS-epoch400-top-k(full).log' \
21 | --resume './greedyTAS/checkpoint-19.pth' --output /OUTPUT_PATH --batch-size 128 
22 | 
23 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path '/data' --gp \
24 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --epochs 500 --warmup-epochs 20 \
25 | --log-file-path './greedyTAS/greedyTAS-epoch500-top-k(full).log' \
26 | --resume './greedyTAS/checkpoint-24.pth' --output /OUTPUT_PATH --batch-size 128 
27 | 
28 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path '/data' --gp \
29 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --epochs 500 --warmup-epochs 20 \
30 | --log-file-path './greedyTAS/greedyTAS-epoch0-top-k(full).log' \
31 | --output /OUTPUT_PATH --batch-size 128 
32 | 
33 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path '/data' --gp \
34 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --epochs 500 --warmup-epochs 20 \
35 | --log-file-path './greedyTAS/greedyTAS-epoch20-top-k(full).log' \
36 | --resume './greedyTAS/checkpoint-0.pth' --output /OUTPUT_PATH --batch-size 128 
37 | 
38 | 
39 | # --resume './greedyTAS/greedyTAS-epoch100-test/checkpoint-4.pth'
40 | # --resume './greedyTAS/checkpoint-09121607.pth' 
41 | # --resume './experiments/supernet/autoformer_t_500ep.pth'
42 | # --resume './greedyTAS/greedyTAS-epoch20-test/checkpoint-0.pth'
43 | # --resume './greedyTAS/greedyTAS-epoch59/checkpoint.pth'
44 | # --resume './greedyTAS/checkpoint-24.pth' 


--------------------------------------------------------------------------------
/AutoFormer/training_free/indicators/snip.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | import copy
 7 | import types
 8 | 
 9 | from . import indicator
10 | from ..p_utils import get_layer_metric_array
11 | 
12 | 
13 | def snip_forward_conv2d(self, x):
14 |         return F.conv2d(x, self.sampled_weight * self.weight_mask, self.sampled_bias,
15 |                         stride=self.patch_size, padding=self.proj.padding, dilation=self.proj.dilation).flatten(2).transpose(1,2)
16 | 
17 | def snip_forward_linear(self, x):
18 |         return F.linear(x, self.samples['weight'] * self.weight_mask, self.samples['bias'])
19 | 
20 | 
21 | def snip_forward_linear_(self, x):
22 |     return F.linear(x, self.weight * self.weight_mask, self.bias)
23 | 
24 | @indicator('snip', bn=True, mode='param')
25 | def compute_snip_per_weight(net, inputs, targets, mode, loss_fn, split_data=1):
26 |     for layer in net.modules():
27 |         if layer._get_name() == 'PatchembedSuper':
28 |             layer.weight_mask = nn.Parameter(torch.ones_like(layer.sampled_weight))
29 |             layer.sampled_weight = layer.sampled_weight.detach()
30 |         if isinstance(layer, nn.Linear) and layer.out_features != 1000 and layer.samples:
31 |             layer.weight_mask = nn.Parameter(torch.ones_like(layer.samples['weight']))
32 |             layer.samples['weight'] = layer.samples['weight'].detach()
33 |         if isinstance(layer, nn.Linear) and layer.out_features == 1000:
34 |             layer.weight_mask = nn.Parameter(torch.ones_like(layer.samples['weight']))
35 |             layer.samples['weight'] = layer.samples['weight'].detach()
36 | 
37 |         # Override the forward methods:
38 |         if layer._get_name() == 'PatchembedSuper':
39 |             layer.forward = types.MethodType(snip_forward_conv2d, layer)
40 | 
41 |         if isinstance(layer, nn.Linear) and layer.out_features != 1000 and layer.samples:
42 |             layer.forward = types.MethodType(snip_forward_linear, layer)
43 |         if isinstance(layer, nn.Linear) and layer.out_features == 1000:
44 |             layer.forward = types.MethodType(snip_forward_linear, layer)
45 | 
46 |     # Compute gradients (but don't apply them)
47 |     net.zero_grad()
48 |     N = inputs.shape[0]
49 |     for sp in range(split_data):
50 |         st=sp*N//split_data
51 |         en=(sp+1)*N//split_data
52 |     
53 |         outputs = net.forward(inputs[st:en])
54 |         loss = loss_fn(outputs, targets[st:en])
55 |         loss.backward()
56 | 
57 |     # select the gradients that we want to use for search/prune
58 |     def snip(layer):
59 |         if layer._get_name() == 'PatchembedSuper':
60 |             if layer.weight_mask.grad is not None:
61 |                 return torch.abs(layer.weight_mask.grad)
62 |             else:
63 |                 return torch.zeros_like(layer.weight)
64 |         if isinstance(layer, nn.Linear) and layer.out_features != 1000 and layer.samples:
65 |             if layer.weight_mask.grad is not None:
66 |                 return torch.abs(layer.weight_mask.grad)
67 |             else:
68 |                 return torch.zeros_like(layer.weight)
69 |         if isinstance(layer, nn.Linear) and layer.out_features == 1000:
70 |             if layer.weight_mask.grad is not None:
71 |                 return torch.abs(layer.weight_mask.grad)
72 |             else:
73 |                 return torch.zeros_like(layer.weight)
74 |     
75 |     grads_abs = get_layer_metric_array(net, snip, mode)
76 | 
77 |     return grads_abs
78 | 


--------------------------------------------------------------------------------
/AutoFormer/training_free/compute_indicators.py:
--------------------------------------------------------------------------------
 1 | from .p_utils import *
 2 | from . import indicators
 3 | 
 4 | import types
 5 | import copy
 6 | 
 7 | def no_op(self,x):
 8 |     return x
 9 | 
10 | def copynet(self, bn):
11 |     net = copy.deepcopy(self)
12 |     if bn==False:
13 |         for l in net.modules():
14 |             if isinstance(l,nn.BatchNorm2d) or isinstance(l,nn.BatchNorm1d) :
15 |                 l.forward = types.MethodType(no_op, l)
16 |     return net
17 | 
18 | def find_indicators_arrays(net_orig, trainloader, dataload_info, device, indicator_names=None, loss_fn=F.cross_entropy):
19 |     if indicator_names is None:
20 |         indicator_names = indicators.available_indicators
21 | 
22 |     dataload, num_imgs_or_batches, num_classes = dataload_info
23 | 
24 |     net_orig.to(device)
25 |     if not hasattr(net_orig,'get_copy'):
26 |         net_orig.get_copy = types.MethodType(copynet, net_orig)
27 | 
28 |     #move to cpu to free up mem
29 |     torch.cuda.empty_cache()
30 |     net_orig = net_orig.cpu() 
31 |     torch.cuda.empty_cache()
32 | 
33 |     #given 1 minibatch of data
34 |     if dataload == 'random':
35 |         inputs, targets = get_some_data(trainloader, num_batches=num_imgs_or_batches, device=device)
36 |     elif dataload == 'grasp':
37 |         inputs, targets = get_some_data_grasp(trainloader, num_classes, samples_per_class=num_imgs_or_batches, device=device)
38 |     else:
39 |         raise NotImplementedError(f'dataload {dataload} is not supported')
40 | 
41 |     done, ds = False, 10
42 |     indicator_values = {}
43 | 
44 |     while not done:
45 |         try:
46 |             for indicator_name in indicator_names:
47 |                 if indicator_name not in indicator_values:
48 |                     if indicator_name == 'NASWOT'  or indicator_name=='te_nas':
49 |                         val = indicators.calc_indicator(indicator_name, net_orig, device)
50 |                     else:
51 |                         val = indicators.calc_indicator(indicator_name, net_orig, device, inputs, targets, loss_fn=loss_fn, split_data=ds)
52 |                         indicator_values[indicator_name] = val
53 | 
54 |             done = True
55 |         except RuntimeError as e:
56 |             if 'out of memory' in str(e):
57 |                 done=True
58 |                 if ds == inputs.shape[0]//2:
59 |                     raise ValueError(f'Can\'t split data anymore, but still unable to run. Something is wrong') 
60 |                 ds += 1
61 |                 while inputs.shape[0] % ds != 0:
62 |                     ds += 1
63 |                 torch.cuda.empty_cache()
64 |                 print(f'Caught CUDA OOM, retrying with data split into {ds} parts')
65 |             else:
66 |                 raise e
67 | 
68 |     net_orig = net_orig.to(device).train()
69 |     return indicator_values
70 | 
71 | def find_indicators(net_orig,
72 |                   dataloader,
73 |                   dataload_info,
74 |                   device,
75 |                   loss_fn=F.cross_entropy,
76 |                   indicator_names=None,
77 |                   indicators_arr=None):
78 |     
79 | 
80 |     def sum_arr(arr):
81 |         sum = 0.
82 |         for i in range(len(arr)):
83 |             sum += torch.sum(arr[i])
84 |         return sum.item()
85 | 
86 |     if indicators_arr is None:
87 |         indicators_arr = find_indicators_arrays(net_orig, dataloader, dataload_info, device, loss_fn=loss_fn, indicator_names=indicator_names)
88 | 
89 |     indicators = {}
90 |     for k,v in indicators_arr.items():
91 |         if k == 'NASWOT' or k=='te_nas':
92 |             indicators[k] = v
93 |         else:
94 |             indicators[k] = sum_arr(v)
95 | 
96 |     return indicators
97 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # One-Shot-TAS 
 2 | 
 3 | ## How to Start
 4 | 
 5 | ```
 6 | apt install python3.8-venv
 7 | 
 8 | cd ./AutoFormer
 9 | 
10 | python3 -m venv {your_venv_name}
11 | 
12 | source {your_venv_name}/bin/activate
13 | 
14 | pip install -r requirements.txt
15 | 
16 | 
17 | # if 'Pillow' error is occured...
18 | 
19 | sudo apt-get install python3-dev
20 | 
21 | pip install wheel
22 | 
23 | apt-get update
24 | 
25 | apt-get install build-essential
26 | 
27 | apt-get install libjpeg-dev
28 | 
29 | apt-get install libpng-dev libtiff-dev
30 | 
31 | pip install pillow==6.1.0
32 | 
33 | `pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html`
34 | 
35 | ```
36 | 
37 | 
38 | ## Data Preparation 
39 | You need to first download the [ImageNet-2012](http://www.image-net.org/) to the folder `./data/imagenet` and move the validation set to the subfolder `./data/imagenet/val`. To move the validation set, you cloud use the following script: <https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh>
40 | 
41 | The directory structure is the standard layout as following.
42 | ```
43 | /path/to/imagenet/
44 |   train/
45 |     class1/
46 |       img1.jpeg
47 |     class2/
48 |       img2.jpeg
49 |   val/
50 |     class1/
51 |       img3.jpeg
52 |     class/2
53 |       img4.jpeg
54 | ```
55 | 
56 | ## Quick Start
57 | We provide *Supernet Train, Search, Test* code of AutoFormer as follows.
58 | 
59 | ### Supernet Train 
60 | 
61 | To train the supernet-T/S/B, we provided the corresponding supernet configuration files in `/experiments/supernet/`. For example, to train the supernet-B, you can run the following command. The default output path is `./`, you can specify the path with argument `--output`.
62 | 
63 | ```buildoutcfg
64 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path /PATH/TO/IMAGENT --gp \
65 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-B.yaml --epochs 500 --warmup-epochs 20 \
66 | --output /OUTPUT_PATH --batch-size 128
67 | ```
68 | 
69 | ### Search
70 | We run our evolution search on part of the ImageNet training dataset and use the validation set of ImageNet as the test set for fair comparison. To generate the subImagenet in `/PATH/TO/IMAGENET`, you could simply run:
71 | ```buildoutcfg
72 | python ./lib/subImageNet.py --data-path /PATH/TO/IMAGENT
73 | ```
74 |  
75 | 
76 | After obtaining the subImageNet and training of the supernet. We could perform the evolution search using below command. Please remember to config the specific constraint in this evolution search using `--min-param-limits` and `--param-limits`: 
77 | ```buildoutcfg
78 | python -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path /PATH/TO/IMAGENT --gp \
79 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-B.yaml --resume /PATH/TO/CHECKPOINT \
80 | --min-param-limits YOUR/CONFIG --param-limits YOUR/CONFIG --data-set EVO_IMNET
81 | ```
82 | 
83 | ### Test
84 | To test our trained models, you need to put the downloaded model in `/PATH/TO/CHECKPOINT`. After that you could use the following command to test the model (Please change your config file and model checkpoint according to different models. Here we use the AutoFormer-B as an example).
85 | ```buildoutcfg
86 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path /PATH/TO/IMAGENT --gp \
87 | --change_qk --relative_position --mode retrain --dist-eval --cfg ./experiments/subnet/AutoFormer-B.yaml --resume /PATH/TO/CHECKPOINT --eval 
88 | ```
89 | 
90 | ## Acknowledgements
91 | 
92 | The codes are inspired by [Autoformer](https://github.com/microsoft/Cream/tree/main/AutoFormer), [tf-tas](https://github.com/decemberzhou/TF_TAS).
93 | 
94 | 


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/train_supernet_only_supernet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # UTF-8 환경 변수 설정 (UnicodeEncodeError 방지)
 4 | export PYTHONIOENCODING=utf-8
 5 | export LC_ALL=C.UTF-8
 6 | export LANG=C.UTF-8
 7 | 
 8 | # python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train_only_supernet.py --data-path '/data' --gp \
 9 | # --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --epochs 500 --warmup-epochs 20 \
10 | # --output /OUTPUT_PATH --batch-size 128 \
11 | # --save_checkpoint_path 'checkpoint-tiny-only-supernet-maximum240-' --save_log_path './log/supernet_tiny-only-supernet-maximum240.log' --interval 1
12 | 
13 | # python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution_only_supernet.py --data-path '/data' --gp \
14 | # --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-tiny-only-supernet-maximum240-21.pth' \
15 | # --min-param-limits 5 --param-limits 13 \
16 | # --log-file-path './log/search_tiny-only-supernet240-minimum_pop1050.log'
17 | 
18 | # python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution_only_supernet.py --data-path '/data' --gp \
19 | # --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-tiny-only-supernet-minimum-21.pth' \
20 | # --min-param-limits 5 --param-limits 13 \
21 | # --log-file-path './log/search_tiny-only-supernet192-minimum_pop1050.log'
22 | 
23 | # --min-param-limits 5 --param-limits 6 \
24 | 
25 | 
26 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution_only_supernet.py --data-path '/data' --gp \
27 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-tiny-only-supernet-minimum-21.pth' \
28 | --min-param-limits 6 --param-limits 7 \
29 | --log-file-path './log/search_tiny-only-supernet192-minimum_pop1050_7M.log'
30 | 
31 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution_only_supernet.py --data-path '/data' --gp \
32 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-tiny-only-supernet-minimum-21.pth' \
33 | --min-param-limits 7 --param-limits 8 \
34 | --log-file-path './log/search_tiny-only-supernet192-minimum_pop1050_8M.log'
35 | 
36 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution_only_supernet.py --data-path '/data' --gp \
37 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-tiny-only-supernet-minimum-21.pth' \
38 | --min-param-limits 8 --param-limits 9 \
39 | --log-file-path './log/search_tiny-only-supernet192-minimum_pop1050_9M.log'
40 | 
41 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution_only_supernet.py --data-path '/data' --gp \
42 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-tiny-only-supernet-minimum-21.pth' \
43 | --min-param-limits 9 --param-limits 10 \
44 | --log-file-path './log/search_tiny-only-supernet192-minimum_pop1050_10M.log'
45 | 
46 | # python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train_only_supernet.py --data-path '/data' --gp \
47 | # --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --epochs 500 --warmup-epochs 20 \
48 | # --output /OUTPUT_PATH --batch-size 128 \
49 | # --save_checkpoint_path 'checkpoint-tiny-only-supernet-minimum-' --save_log_path './log/supernet_tiny-only-supernet-minimum.log' --interval 1
50 | 
51 | # python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution_only_supernet.py --data-path '/data' --gp \
52 | # --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-tiny-only-supernet-minimum-21.pth' \
53 | # --min-param-limits 5 --param-limits 6 \
54 | # --log-file-path './log/search_tiny-only-supernet-minimum_6M.log'
55 | 


--------------------------------------------------------------------------------
/AutoFormer/model/module/Linear_super.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import numpy as np
  5 | 
  6 | def uniform_element_selection(tensor, target_dim, dim):
  7 |     """
  8 |     Uniformly selects elements from the tensor along the specified dimension.
  9 |     
 10 |     Parameters:
 11 |     tensor (torch.Tensor): The input tensor.
 12 |     target_dim (int): The target dimension size.
 13 |     dim (int): The dimension along which to select elements.
 14 |     
 15 |     Returns:
 16 |     torch.Tensor: A tensor with the selected elements.
 17 |     """
 18 |     original_dim = tensor.size(dim)
 19 |     indices = torch.linspace(0, original_dim - 1, target_dim).long().to(tensor.device)
 20 |     return tensor.index_select(dim, indices)
 21 | 
 22 | class LinearSuper(nn.Linear):
 23 |     def __init__(self, super_in_dim, super_out_dim, bias=True, uniform_=None, non_linear='linear', scale=False):
 24 |         super().__init__(super_in_dim, super_out_dim, bias=bias)
 25 | 
 26 |         # super_in_dim and super_out_dim indicate the largest network!
 27 |         self.super_in_dim = super_in_dim
 28 |         self.super_out_dim = super_out_dim
 29 | 
 30 |         # input_dim and output_dim indicate the current sampled size
 31 |         self.sample_in_dim = None
 32 |         self.sample_out_dim = None
 33 | 
 34 |         self.samples = {}
 35 | 
 36 |         self.scale = scale
 37 |         self._reset_parameters(bias, uniform_, non_linear)
 38 |         self.profiling = False
 39 | 
 40 |     def profile(self, mode=True):
 41 |         self.profiling = mode
 42 | 
 43 |     def sample_parameters(self, resample=False):
 44 |         if self.profiling or resample:
 45 |             return self._sample_parameters()
 46 |         return self.samples
 47 | 
 48 |     def _reset_parameters(self, bias, uniform_, non_linear):
 49 |         nn.init.xavier_uniform_(self.weight) if uniform_ is None else uniform_(
 50 |             self.weight, non_linear=non_linear)
 51 |         if bias:
 52 |             nn.init.constant_(self.bias, 0.)
 53 | 
 54 |     def set_sample_config(self, sample_in_dim, sample_out_dim):
 55 |         self.sample_in_dim = sample_in_dim
 56 |         self.sample_out_dim = sample_out_dim
 57 | 
 58 |         self._sample_parameters()
 59 | 
 60 |     def _sample_parameters(self):
 61 |         self.samples['weight'] = sample_weight(self.weight, self.sample_in_dim, self.sample_out_dim)
 62 |         self.samples['bias'] = self.bias
 63 |         self.sample_scale = self.super_out_dim/self.sample_out_dim
 64 |         if self.bias is not None:
 65 |             self.samples['bias'] = sample_bias(self.bias, self.sample_out_dim)
 66 |         return self.samples
 67 | 
 68 |     def forward(self, x):
 69 |         self.sample_parameters()
 70 |         return F.linear(x, self.samples['weight'], self.samples['bias']) * (self.sample_scale if self.scale else 1)
 71 | 
 72 |     def calc_sampled_param_num(self):
 73 |         assert 'weight' in self.samples.keys()
 74 |         weight_numel = self.samples['weight'].numel()
 75 | 
 76 |         if self.samples['bias'] is not None:
 77 |             bias_numel = self.samples['bias'].numel()
 78 |         else:
 79 |             bias_numel = 0
 80 | 
 81 |         return weight_numel + bias_numel
 82 |     def get_complexity(self, sequence_length):
 83 |         total_flops = 0
 84 |         total_flops += sequence_length *  np.prod(self.samples['weight'].size())
 85 |         return total_flops
 86 | 
 87 | def sample_weight(weight, sample_in_dim, sample_out_dim):
 88 |     # sample_weight = weight[:, :sample_in_dim]
 89 |     # sample_weight = sample_weight[:sample_out_dim, :]
 90 |     sample_weight = uniform_element_selection(weight, sample_in_dim, dim=1)
 91 |     sample_weight = uniform_element_selection(sample_weight, sample_out_dim, dim=0)
 92 | 
 93 |     return sample_weight
 94 | 
 95 | 
 96 | def sample_bias(bias, sample_out_dim):
 97 |     # sample_bias = bias[:sample_out_dim]
 98 |     sample_bias = uniform_element_selection(bias, sample_out_dim, dim=0)
 99 | 
100 |     return sample_bias
101 | 


--------------------------------------------------------------------------------
/AutoFormer/model/module/qkv_super.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import numpy as np
  5 | 
  6 | def uniform_element_selection(tensor, target_dim, dim):
  7 |     """
  8 |     Uniformly selects elements from the tensor along the specified dimension.
  9 |     
 10 |     Parameters:
 11 |     tensor (torch.Tensor): The input tensor.
 12 |     target_dim (int): The target dimension size.
 13 |     dim (int): The dimension along which to select elements.
 14 |     
 15 |     Returns:
 16 |     torch.Tensor: A tensor with the selected elements.
 17 |     """
 18 |     original_dim = tensor.size(dim)
 19 |     indices = torch.linspace(0, original_dim - 1, target_dim).long().to(tensor.device)
 20 |     return tensor.index_select(dim, indices)
 21 | 
 22 | class qkv_super(nn.Linear):
 23 |     def __init__(self, super_in_dim, super_out_dim, bias=True, uniform_=None, non_linear='linear', scale=False):
 24 |         super().__init__(super_in_dim, super_out_dim, bias=bias)
 25 | 
 26 |         # super_in_dim and super_out_dim indicate the largest network!
 27 |         self.super_in_dim = super_in_dim
 28 |         self.super_out_dim = super_out_dim
 29 | 
 30 |         # input_dim and output_dim indicate the current sampled size
 31 |         self.sample_in_dim = None
 32 |         self.sample_out_dim = None
 33 | 
 34 |         self.samples = {}
 35 | 
 36 |         self.scale = scale
 37 |         # self._reset_parameters(bias, uniform_, non_linear)
 38 |         self.profiling = False
 39 | 
 40 |     def profile(self, mode=True):
 41 |         self.profiling = mode
 42 | 
 43 |     def sample_parameters(self, resample=False):
 44 |         if self.profiling or resample:
 45 |             return self._sample_parameters()
 46 |         return self.samples
 47 | 
 48 |     def _reset_parameters(self, bias, uniform_, non_linear):
 49 |         nn.init.xavier_uniform_(self.weight) if uniform_ is None else uniform_(
 50 |             self.weight, non_linear=non_linear)
 51 |         if bias:
 52 |             nn.init.constant_(self.bias, 0.)
 53 | 
 54 |     def set_sample_config(self, sample_in_dim, sample_out_dim):
 55 |         self.sample_in_dim = sample_in_dim
 56 |         self.sample_out_dim = sample_out_dim
 57 | 
 58 |         self._sample_parameters()
 59 | 
 60 |     def _sample_parameters(self):
 61 |         self.samples['weight'] = sample_weight(self.weight, self.sample_in_dim, self.sample_out_dim)
 62 |         self.samples['bias'] = self.bias
 63 |         self.sample_scale = self.super_out_dim/self.sample_out_dim
 64 |         if self.bias is not None:
 65 |             self.samples['bias'] = sample_bias(self.bias, self.sample_out_dim)
 66 |         return self.samples
 67 | 
 68 |     def forward(self, x):
 69 |         self.sample_parameters()
 70 |         return F.linear(x, self.samples['weight'], self.samples['bias']) * (self.sample_scale if self.scale else 1)
 71 | 
 72 |     def calc_sampled_param_num(self):
 73 |         assert 'weight' in self.samples.keys()
 74 |         weight_numel = self.samples['weight'].numel()
 75 | 
 76 |         if self.samples['bias'] is not None:
 77 |             bias_numel = self.samples['bias'].numel()
 78 |         else:
 79 |             bias_numel = 0
 80 | 
 81 |         return weight_numel + bias_numel
 82 |     def get_complexity(self, sequence_length):
 83 |         total_flops = 0
 84 |         total_flops += sequence_length *  np.prod(self.samples['weight'].size())
 85 |         return total_flops
 86 | 
 87 | # def sample_weight(weight, sample_in_dim, sample_out_dim):
 88 | #     sample_weight = uniform_element_selection(weight, sample_in_dim, dim=1)
 89 | #     sample_weight = uniform_element_selection(sample_weight, sample_out_dim, dim=0)
 90 | #     return sample_weight
 91 | 
 92 | # def sample_bias(bias, sample_out_dim):
 93 | #     sample_bias = uniform_element_selection(bias, sample_out_dim, dim=0)
 94 | #     return sample_bias
 95 | 
 96 | 
 97 | def sample_weight(weight, sample_in_dim, sample_out_dim):
 98 | 
 99 |     sample_weight = weight[:, :sample_in_dim]
100 |     sample_weight = torch.cat([sample_weight[i:sample_out_dim:3, :] for i in range(3)], dim =0)
101 |     sample_weight.requires_grad_(weight.requires_grad)  # requires_grad 속성 유지
102 |     return sample_weight
103 | 
104 | 
105 | def sample_bias(bias, sample_out_dim):
106 |     sample_bias = bias[:sample_out_dim]
107 |     sample_bias.requires_grad_(bias.requires_grad)  # requires_grad 속성 유지
108 |     return sample_bias
109 | 


--------------------------------------------------------------------------------
/AutoFormer/training_free/indicators/NASWOT.py:
--------------------------------------------------------------------------------
  1 | import os, sys, time
  2 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  3 | import torch
  4 | from torch import nn
  5 | import numpy as np
  6 | from . import indicator
  7 | 
  8 | def network_weight_gaussian_init(net: nn.Module):
  9 |     with torch.no_grad():
 10 |         for m in net.modules():
 11 |             if isinstance(m, nn.Conv2d):
 12 |                 nn.init.normal_(m.weight)
 13 |                 if hasattr(m, 'bias') and m.bias is not None:
 14 |                     nn.init.zeros_(m.bias)
 15 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
 16 |                 nn.init.ones_(m.weight)
 17 |                 nn.init.zeros_(m.bias)
 18 |             elif isinstance(m, nn.Linear):
 19 |                 nn.init.normal_(m.weight)
 20 |                 if hasattr(m, 'bias') and m.bias is not None:
 21 |                     nn.init.zeros_(m.bias)
 22 |             else:
 23 |                 continue
 24 | 
 25 |     return net
 26 | 
 27 | def logdet(K):
 28 |     s, ld = np.linalg.slogdet(K)
 29 |     return ld
 30 | 
 31 | def get_batch_jacobian(net, x):
 32 |     net.zero_grad()
 33 |     x.requires_grad_(True)
 34 |     y = net(x)
 35 |     y.backward(torch.ones_like(y))
 36 |     jacob = x.grad.detach()
 37 |     # return jacob, target.detach(), y.detach()
 38 |     return jacob, y.detach()
 39 | 
 40 | @indicator('NASWOT', bn=False, mode='param')
 41 | def compute_nas_score(model, device, resolution=224, batch_size=64):
 42 |     gpu=0
 43 |     if gpu is not None:
 44 |         torch.cuda.set_device(gpu)
 45 |         model = model.cuda(gpu)
 46 | 
 47 |     network_weight_gaussian_init(model)
 48 |     input = torch.randn(size=[batch_size, 3, resolution, resolution])
 49 |     if gpu is not None:
 50 |         input = input.cuda(gpu)
 51 | 
 52 |     model.K = np.zeros((batch_size, batch_size))
 53 | 
 54 |     def counting_forward_hook(module, inp, out):
 55 |         try:
 56 |             if not module.visited_backwards:
 57 |                 return
 58 |             if isinstance(inp, tuple):
 59 |                 inp = inp[0]
 60 |             inp = inp.view(inp.size(0), -1)
 61 |             x = (inp > 0).float()
 62 |             K = x @ x.t()
 63 |             K2 = (1. - x) @ (1. - x.t())
 64 |             model.K = model.K + K.cpu().numpy() + K2.cpu().numpy()
 65 |         except Exception as err:
 66 |             print('---- error on model : ')
 67 |             print(model)
 68 |             raise err
 69 | 
 70 | 
 71 |     def counting_backward_hook(module, inp, out):
 72 |         module.visited_backwards = True
 73 | 
 74 |     for name, module in model.named_modules():
 75 |         # if 'ReLU' in str(type(module)):
 76 |         if isinstance(module, torch.nn.GELU):
 77 |             # hooks[name] = module.register_forward_hook(counting_hook)
 78 |             module.visited_backwards = True
 79 |             module.register_forward_hook(counting_forward_hook)
 80 |             module.register_backward_hook(counting_backward_hook)
 81 | 
 82 |     x = input
 83 |     jacobs, y = get_batch_jacobian(model, x)
 84 | 
 85 |     score = logdet(model.K)
 86 | 
 87 |     return float(score)
 88 | 
 89 | 
 90 | 
 91 | def parse_cmd_options(argv):
 92 |     parser = argparse.ArgumentParser()
 93 |     parser.add_argument('--batch_size', type=int, default=16, help='number of instances in one mini-batch.')
 94 |     parser.add_argument('--input_image_size', type=int, default=None,
 95 |                         help='resolution of input image, usually 32 for CIFAR and 224 for ImageNet.')
 96 |     parser.add_argument('--repeat_times', type=int, default=32)
 97 |     parser.add_argument('--gpu', type=int, default=None)
 98 |     module_opt, _ = parser.parse_known_args(argv)
 99 |     return module_opt
100 | 
101 | if __name__ == "__main__":
102 |     opt = global_utils.parse_cmd_options(sys.argv)
103 |     args = parse_cmd_options(sys.argv)
104 |     the_model = ModelLoader.get_model(opt, sys.argv)
105 |     if args.gpu is not None:
106 |         the_model = the_model.cuda(args.gpu)
107 | 
108 | 
109 |     start_timer = time.time()
110 | 
111 |     for repeat_count in range(args.repeat_times):
112 |         the_score = compute_nas_score(gpu=args.gpu, model=the_model,
113 |                              resolution=args.input_image_size, batch_size=args.batch_size)
114 | 
115 |     time_cost = (time.time() - start_timer) / args.repeat_times
116 | 
117 |     print(f'NASWOT={the_score:.4g}, time cost={time_cost:.4g} second(s)')
118 | 


--------------------------------------------------------------------------------
/AutoFormer/model/utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import math
  3 | import warnings
  4 | from itertools import repeat
  5 | from torch._six import container_abcs
  6 | import torch.nn as nn
  7 | 
  8 | def _no_grad_trunc_normal_(tensor, mean, std, a, b):
  9 |     # Cut & paste from PyTorch official master until it's in a few official releases - RW
 10 |     # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
 11 |     def norm_cdf(x):
 12 |         # Computes standard normal cumulative distribution function
 13 |         return (1. + math.erf(x / math.sqrt(2.))) / 2.
 14 | 
 15 |     if (mean < a - 2 * std) or (mean > b + 2 * std):
 16 |         warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
 17 |                       "The distribution of values may be incorrect.",
 18 |                       stacklevel=2)
 19 | 
 20 |     with torch.no_grad():
 21 |         # Values are generated by using a truncated uniform distribution and
 22 |         # then using the inverse CDF for the normal distribution.
 23 |         # Get upper and lower cdf values
 24 |         l = norm_cdf((a - mean) / std)
 25 |         u = norm_cdf((b - mean) / std)
 26 | 
 27 |         # Uniformly fill tensor with values from [l, u], then translate to
 28 |         # [2l-1, 2u-1].
 29 |         tensor.uniform_(2 * l - 1, 2 * u - 1)
 30 | 
 31 |         # Use inverse cdf transform for normal distribution to get truncated
 32 |         # standard normal
 33 |         tensor.erfinv_()
 34 | 
 35 |         # Transform to proper mean, std
 36 |         tensor.mul_(std * math.sqrt(2.))
 37 |         tensor.add_(mean)
 38 | 
 39 |         # Clamp to ensure it's in the proper range
 40 |         tensor.clamp_(min=a, max=b)
 41 |         return tensor
 42 | 
 43 | 
 44 | def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
 45 |     # type: (Tensor, float, float, float, float) -> Tensor
 46 |     r"""Fills the input Tensor with values drawn from a truncated
 47 |     normal distribution. The values are effectively drawn from the
 48 |     normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
 49 |     with values outside :math:`[a, b]` redrawn until they are within
 50 |     the bounds. The method used for generating the random values works
 51 |     best when :math:`a \leq \text{mean} \leq b`.
 52 |     Args:
 53 |         tensor: an n-dimensional `torch.Tensor`
 54 |         mean: the mean of the normal distribution
 55 |         std: the standard deviation of the normal distribution
 56 |         a: the minimum cutoff value
 57 |         b: the maximum cutoff value
 58 |     Examples:
 59 |         >>> w = torch.empty(3, 5)
 60 |         >>> nn.init.trunc_normal_(w)
 61 |     """
 62 |     return _no_grad_trunc_normal_(tensor, mean, std, a, b)
 63 | 
 64 | def _ntuple(n):
 65 |     def parse(x):
 66 |         if isinstance(x, container_abcs.Iterable):
 67 |             return x
 68 |         return tuple(repeat(x, n))
 69 |     return parse
 70 | 
 71 | def drop_path(x, drop_prob: float = 0., training: bool = False):
 72 |     """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
 73 | 
 74 |     This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
 75 |     the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
 76 |     See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
 77 |     changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
 78 |     'survival rate' as the argument.
 79 | 
 80 |     """
 81 |     if drop_prob == 0. or not training:
 82 |         return x
 83 |     keep_prob = 1 - drop_prob
 84 |     shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
 85 |     random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
 86 |     random_tensor.floor_()  # binarize
 87 |     output = x.div(keep_prob) * random_tensor
 88 |     return output
 89 | 
 90 | 
 91 | class DropPath(nn.Module):
 92 |     """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
 93 |     """
 94 |     def __init__(self, drop_prob=None):
 95 |         super(DropPath, self).__init__()
 96 |         self.drop_prob = drop_prob
 97 | 
 98 |     def forward(self, x):
 99 |         return drop_path(x, self.drop_prob, self.training)
100 | 
101 | 
102 | to_1tuple = _ntuple(1)
103 | to_2tuple = _ntuple(2)
104 | to_3tuple = _ntuple(3)
105 | to_4tuple = _ntuple(4)
106 | to_ntuple = _ntuple


--------------------------------------------------------------------------------
/AutoFormer_original/model/utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import math
  3 | import warnings
  4 | from itertools import repeat
  5 | from torch._six import container_abcs
  6 | import torch.nn as nn
  7 | 
  8 | def _no_grad_trunc_normal_(tensor, mean, std, a, b):
  9 |     # Cut & paste from PyTorch official master until it's in a few official releases - RW
 10 |     # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
 11 |     def norm_cdf(x):
 12 |         # Computes standard normal cumulative distribution function
 13 |         return (1. + math.erf(x / math.sqrt(2.))) / 2.
 14 | 
 15 |     if (mean < a - 2 * std) or (mean > b + 2 * std):
 16 |         warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
 17 |                       "The distribution of values may be incorrect.",
 18 |                       stacklevel=2)
 19 | 
 20 |     with torch.no_grad():
 21 |         # Values are generated by using a truncated uniform distribution and
 22 |         # then using the inverse CDF for the normal distribution.
 23 |         # Get upper and lower cdf values
 24 |         l = norm_cdf((a - mean) / std)
 25 |         u = norm_cdf((b - mean) / std)
 26 | 
 27 |         # Uniformly fill tensor with values from [l, u], then translate to
 28 |         # [2l-1, 2u-1].
 29 |         tensor.uniform_(2 * l - 1, 2 * u - 1)
 30 | 
 31 |         # Use inverse cdf transform for normal distribution to get truncated
 32 |         # standard normal
 33 |         tensor.erfinv_()
 34 | 
 35 |         # Transform to proper mean, std
 36 |         tensor.mul_(std * math.sqrt(2.))
 37 |         tensor.add_(mean)
 38 | 
 39 |         # Clamp to ensure it's in the proper range
 40 |         tensor.clamp_(min=a, max=b)
 41 |         return tensor
 42 | 
 43 | 
 44 | def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
 45 |     # type: (Tensor, float, float, float, float) -> Tensor
 46 |     r"""Fills the input Tensor with values drawn from a truncated
 47 |     normal distribution. The values are effectively drawn from the
 48 |     normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
 49 |     with values outside :math:`[a, b]` redrawn until they are within
 50 |     the bounds. The method used for generating the random values works
 51 |     best when :math:`a \leq \text{mean} \leq b`.
 52 |     Args:
 53 |         tensor: an n-dimensional `torch.Tensor`
 54 |         mean: the mean of the normal distribution
 55 |         std: the standard deviation of the normal distribution
 56 |         a: the minimum cutoff value
 57 |         b: the maximum cutoff value
 58 |     Examples:
 59 |         >>> w = torch.empty(3, 5)
 60 |         >>> nn.init.trunc_normal_(w)
 61 |     """
 62 |     return _no_grad_trunc_normal_(tensor, mean, std, a, b)
 63 | 
 64 | def _ntuple(n):
 65 |     def parse(x):
 66 |         if isinstance(x, container_abcs.Iterable):
 67 |             return x
 68 |         return tuple(repeat(x, n))
 69 |     return parse
 70 | 
 71 | def drop_path(x, drop_prob: float = 0., training: bool = False):
 72 |     """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
 73 | 
 74 |     This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
 75 |     the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
 76 |     See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
 77 |     changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
 78 |     'survival rate' as the argument.
 79 | 
 80 |     """
 81 |     if drop_prob == 0. or not training:
 82 |         return x
 83 |     keep_prob = 1 - drop_prob
 84 |     shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
 85 |     random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
 86 |     random_tensor.floor_()  # binarize
 87 |     output = x.div(keep_prob) * random_tensor
 88 |     return output
 89 | 
 90 | 
 91 | class DropPath(nn.Module):
 92 |     """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
 93 |     """
 94 |     def __init__(self, drop_prob=None):
 95 |         super(DropPath, self).__init__()
 96 |         self.drop_prob = drop_prob
 97 | 
 98 |     def forward(self, x):
 99 |         return drop_path(x, self.drop_prob, self.training)
100 | 
101 | 
102 | to_1tuple = _ntuple(1)
103 | to_2tuple = _ntuple(2)
104 | to_3tuple = _ntuple(3)
105 | to_4tuple = _ntuple(4)
106 | to_ntuple = _ntuple


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/model/utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import math
  3 | import warnings
  4 | from itertools import repeat
  5 | from torch._six import container_abcs
  6 | import torch.nn as nn
  7 | 
  8 | def _no_grad_trunc_normal_(tensor, mean, std, a, b):
  9 |     # Cut & paste from PyTorch official master until it's in a few official releases - RW
 10 |     # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
 11 |     def norm_cdf(x):
 12 |         # Computes standard normal cumulative distribution function
 13 |         return (1. + math.erf(x / math.sqrt(2.))) / 2.
 14 | 
 15 |     if (mean < a - 2 * std) or (mean > b + 2 * std):
 16 |         warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
 17 |                       "The distribution of values may be incorrect.",
 18 |                       stacklevel=2)
 19 | 
 20 |     with torch.no_grad():
 21 |         # Values are generated by using a truncated uniform distribution and
 22 |         # then using the inverse CDF for the normal distribution.
 23 |         # Get upper and lower cdf values
 24 |         l = norm_cdf((a - mean) / std)
 25 |         u = norm_cdf((b - mean) / std)
 26 | 
 27 |         # Uniformly fill tensor with values from [l, u], then translate to
 28 |         # [2l-1, 2u-1].
 29 |         tensor.uniform_(2 * l - 1, 2 * u - 1)
 30 | 
 31 |         # Use inverse cdf transform for normal distribution to get truncated
 32 |         # standard normal
 33 |         tensor.erfinv_()
 34 | 
 35 |         # Transform to proper mean, std
 36 |         tensor.mul_(std * math.sqrt(2.))
 37 |         tensor.add_(mean)
 38 | 
 39 |         # Clamp to ensure it's in the proper range
 40 |         tensor.clamp_(min=a, max=b)
 41 |         return tensor
 42 | 
 43 | 
 44 | def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
 45 |     # type: (Tensor, float, float, float, float) -> Tensor
 46 |     r"""Fills the input Tensor with values drawn from a truncated
 47 |     normal distribution. The values are effectively drawn from the
 48 |     normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
 49 |     with values outside :math:`[a, b]` redrawn until they are within
 50 |     the bounds. The method used for generating the random values works
 51 |     best when :math:`a \leq \text{mean} \leq b`.
 52 |     Args:
 53 |         tensor: an n-dimensional `torch.Tensor`
 54 |         mean: the mean of the normal distribution
 55 |         std: the standard deviation of the normal distribution
 56 |         a: the minimum cutoff value
 57 |         b: the maximum cutoff value
 58 |     Examples:
 59 |         >>> w = torch.empty(3, 5)
 60 |         >>> nn.init.trunc_normal_(w)
 61 |     """
 62 |     return _no_grad_trunc_normal_(tensor, mean, std, a, b)
 63 | 
 64 | def _ntuple(n):
 65 |     def parse(x):
 66 |         if isinstance(x, container_abcs.Iterable):
 67 |             return x
 68 |         return tuple(repeat(x, n))
 69 |     return parse
 70 | 
 71 | def drop_path(x, drop_prob: float = 0., training: bool = False):
 72 |     """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
 73 | 
 74 |     This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
 75 |     the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
 76 |     See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
 77 |     changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
 78 |     'survival rate' as the argument.
 79 | 
 80 |     """
 81 |     if drop_prob == 0. or not training:
 82 |         return x
 83 |     keep_prob = 1 - drop_prob
 84 |     shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
 85 |     random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
 86 |     random_tensor.floor_()  # binarize
 87 |     output = x.div(keep_prob) * random_tensor
 88 |     return output
 89 | 
 90 | 
 91 | class DropPath(nn.Module):
 92 |     """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
 93 |     """
 94 |     def __init__(self, drop_prob=None):
 95 |         super(DropPath, self).__init__()
 96 |         self.drop_prob = drop_prob
 97 | 
 98 |     def forward(self, x):
 99 |         return drop_path(x, self.drop_prob, self.training)
100 | 
101 | 
102 | to_1tuple = _ntuple(1)
103 | to_2tuple = _ntuple(2)
104 | to_3tuple = _ntuple(3)
105 | to_4tuple = _ntuple(4)
106 | to_ntuple = _ntuple


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/evolution_search copy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # 첫 번째 작업 실행
 4 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
 5 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-original-25.pth' \
 6 | --min-param-limits 5 --param-limits 6 \
 7 | --log-file-path './log/search_original_tiny_6M.log'
 8 | 
 9 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
10 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-25.pth' \
11 | --min-param-limits 5 --param-limits 6 \
12 | --log-file-path './log/search_sn_tiny_6M.log'
13 | 
14 | # 첫 번째 작업 실행
15 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
16 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-original-25.pth' \
17 | --min-param-limits 6 --param-limits 7 \
18 | --log-file-path './log/search_original_tiny_7M.log'
19 | 
20 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
21 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-25.pth' \
22 | --min-param-limits 6 --param-limits 7 \
23 | --log-file-path './log/search_sn_tiny_7M.log'
24 | 
25 | # 첫 번째 작업 실행
26 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
27 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-original-25.pth' \
28 | --min-param-limits 7 --param-limits 8 \
29 | --log-file-path './log/search_original_tiny_8M.log'
30 | 
31 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
32 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-25.pth' \
33 | --min-param-limits 7 --param-limits 8 \
34 | --log-file-path './log/search_sn_tiny_8M.log'
35 | 
36 | # 첫 번째 작업 실행
37 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
38 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-original-25.pth' \
39 | --min-param-limits 8 --param-limits 9 \
40 | --log-file-path './log/search_original_tiny_9M.log'
41 | 
42 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
43 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-25.pth' \
44 | --min-param-limits 8 --param-limits 9 \
45 | --log-file-path './log/search_sn_tiny_9M.log'
46 | 
47 | # 첫 번째 작업 실행
48 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
49 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-original-25.pth' \
50 | --min-param-limits 9 --param-limits 10 \
51 | --log-file-path './log/search_original_tiny_10M.log'
52 | 
53 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
54 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-25.pth' \
55 | --min-param-limits 9 --param-limits 10 \
56 | --log-file-path './log/search_sn_tiny_10M.log'
57 | 
58 | 
59 | 
60 | # # 첫 번째 작업이 성공적으로 완료되면 두 번째 작업 실행
61 | # if [ $? -eq 0 ]; then
62 | #     python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
63 | #     --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume './experiments/supernet/checkpoint-25.pth' \
64 | #     --min-param-limits 1 --param-limits 100 --config-list-path './greedyTAS/greedyTAS-epoch20-test/autoformer-greedyTAS(dss)-20epoch.pkl' \
65 | #     --log-file-path './greedyTAS/greedyTAS-epoch20-test/autoformer-greedyTAS(dss)-20epoch-subnet.log'
66 | # else
67 | #     echo "첫 번째 작업이 실패했습니다. 두 번째 작업을 실행하지 않습니다."
68 | # fi
69 | # # --data-set EVO_IMNET
70 | 
71 | 
72 | # #!/bin/bash
73 | # python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
74 | # --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume './experiments/supernet/checkpoint-25.pth' \
75 | # --min-param-limits 1 --param-limits 100
76 | # # --data-set EVO_IMNET
77 | 
78 | 
79 | 


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/evolution_search.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # 첫 번째 작업 실행
 4 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
 5 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-not-original-400-25.pth' \
 6 | --min-param-limits 5 --param-limits 6 \
 7 | --log-file-path './log/search_sn_not_original_400_6M.log'
 8 | 
 9 | # 첫 번째 작업 실행
10 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
11 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-not-original-400-25.pth' \
12 | --min-param-limits 6 --param-limits 7 \
13 | --log-file-path './log/search_sn_not_original_400_7M.log'
14 | 
15 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
16 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-400-bottom-25.pth' \
17 | --min-param-limits 5 --param-limits 6 \
18 | --log-file-path './log/search_sn_400_bottom_6M.log'
19 | 
20 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
21 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-400-bottom-25.pth' \
22 | --min-param-limits 6 --param-limits 7 \
23 | --log-file-path './log/search_sn_400_bottom_7M.log'
24 | 
25 | # 첫 번째 작업 실행
26 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
27 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-not-original-400-25.pth' \
28 | --min-param-limits 7 --param-limits 8 \
29 | --log-file-path './log/search_sn_not_original_400_8M.log'
30 | 
31 | # 첫 번째 작업 실행
32 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
33 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-not-original-400-25.pth' \
34 | --min-param-limits 8 --param-limits 9 \
35 | --log-file-path './log/search_sn__not_original_400_9M.log'
36 | 
37 | 
38 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
39 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-400-bottom-25.pth' \
40 | --min-param-limits 7 --param-limits 8 \
41 | --log-file-path './log/search_sn_400_bottom_8M.log'
42 | 
43 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
44 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-400-bottom-25.pth' \
45 | --min-param-limits 8 --param-limits 9 \
46 | --log-file-path './log/search_sn_400_bottom_9M.log'
47 | # 첫 번째 작업 실행
48 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
49 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-not-original-400-25.pth' \
50 | --min-param-limits 9 --param-limits 10 \
51 | --log-file-path './log/search_sn__not_original_400_10M.log'
52 | 
53 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
54 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-400-bottom-25.pth' \
55 | --min-param-limits 9 --param-limits 10 \
56 | --log-file-path './log/search_sn_400_bottom_10M.log'
57 | 
58 | 
59 | 
60 | # # 첫 번째 작업이 성공적으로 완료되면 두 번째 작업 실행
61 | # if [ $? -eq 0 ]; then
62 | #     python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
63 | #     --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume './experiments/supernet/checkpoint-25.pth' \
64 | #     --min-param-limits 1 --param-limits 100 --config-list-path './greedyTAS/greedyTAS-epoch20-test/autoformer-greedyTAS(dss)-20epoch.pkl' \
65 | #     --log-file-path './greedyTAS/greedyTAS-epoch20-test/autoformer-greedyTAS(dss)-20epoch-subnet.log'
66 | # else
67 | #     echo "첫 번째 작업이 실패했습니다. 두 번째 작업을 실행하지 않습니다."
68 | # fi
69 | # # --data-set EVO_IMNET
70 | 
71 | 
72 | # #!/bin/bash
73 | # python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \
74 | # --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume './experiments/supernet/checkpoint-25.pth' \
75 | # --min-param-limits 1 --param-limits 100
76 | # # --data-set EVO_IMNET
77 | 
78 | 
79 | 


--------------------------------------------------------------------------------
/AutoFormer/tmp.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import sys
 3 | from typing import Iterable, Optional
 4 | from timm.utils.model import unwrap_model
 5 | import torch
 6 | import concurrent.futures
 7 | from torch.nn.parallel import DataParallel
 8 | from timm.data import Mixup
 9 | from timm.utils import accuracy, ModelEma
10 | from lib import utils
11 | import random
12 | import time
13 | 
14 | def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module,
15 |                     data_loader: Iterable, optimizer: torch.optim.Optimizer,
16 |                     device: torch.device, epoch: int, loss_scaler, max_norm: float = 0,
17 |                     model_ema: Optional[ModelEma] = None, mixup_fn: Optional[Mixup] = None,
18 |                     amp: bool = True, teacher_model: torch.nn.Module = None,
19 |                     teach_loss: torch.nn.Module = None, choices=None, mode='super', retrain_config=None,
20 |                     candidate_pool=None, validation_data_loader=None, pool_sampling_prob=0, m=10, k=5):
21 |     model.train()
22 |     criterion.train()
23 | 
24 |     # Set random seed
25 |     random.seed(epoch)
26 | 
27 |     metric_logger = utils.MetricLogger(delimiter="  ")
28 |     metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
29 |     header = 'Epoch: [{}]'.format(epoch)
30 |     print_freq = 10
31 | 
32 |     # Calculate T from data_loader total size and batch size
33 |     T = len(data_loader)  # Total number of iterations (total data / batch size)
34 |     # print("pool_sampling_prob : ", pool_sampling_prob)
35 | 
36 |     if mode == 'super':
37 |         model_module = unwrap_model(model)
38 |         total_iters = T // k  # T/k번 반복할 수 있도록 설정
39 | 
40 |         data_iter = iter(metric_logger.log_every(data_loader, print_freq, header))
41 |         
42 |         sampled_paths = [{'mlp_ratio': [4, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 4, 4, 3.5, 4, 3.5, 4], 'num_heads': [3, 4, 4, 3, 3, 4, 3, 4, 3, 3, 4, 4, 4], 'embed_dim': [192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192], 'layer_num': 13}, {'mlp_ratio': [3.5, 4, 3.5, 4, 3.5, 4, 3.5, 4, 3.5, 4, 3.5, 3.5, 3.5], 'num_heads': [4, 3, 3, 3, 3, 3, 3, 4, 4, 3, 4, 3, 3], 'embed_dim': [216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216], 'layer_num': 13}, {'mlp_ratio': [3.5, 4, 4, 3.5, 4, 3.5, 3.5, 3.5, 4, 4, 4, 3.5, 4, 4], 'num_heads': [3, 3, 3, 4, 3, 3, 3, 3, 4, 3, 3, 4, 3, 3], 'embed_dim': [240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240], 'layer_num': 14}, {'mlp_ratio': [4, 3.5, 4, 3.5, 4, 3.5, 3.5, 4, 3.5, 3.5, 4, 3.5, 4], 'num_heads': [4, 4, 4, 4, 4, 4, 3, 3, 3, 4, 4, 4, 3], 'embed_dim': [216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216], 'layer_num': 13}]
43 |         
44 |         losses = []
45 |         with torch.no_grad():  # 손실 계산에서 자동 기울기 추적을 방지하여 메모리 절약
46 |             for config in sampled_paths:
47 |                 model_module = unwrap_model(model)
48 |                 model_module.set_sample_config(config=config)
49 | 
50 |                 # Evaluate the model on the entire validation dataset
51 |                 val_loss_total = 0
52 |                 num_batches = 0
53 |                 
54 |                 for val_samples, val_targets in validation_data_loader:
55 |                     val_samples = val_samples.to(device, non_blocking=True)
56 |                     val_targets = val_targets.to(device, non_blocking=True)
57 | 
58 |                     if mixup_fn is not None:
59 |                         val_samples, val_targets = mixup_fn(val_samples, val_targets)
60 | 
61 |                     if amp:
62 |                         with torch.cuda.amp.autocast():
63 |                             val_outputs = model(val_samples)
64 |                             val_loss = criterion(val_outputs, val_targets)
65 |                     else:
66 |                         val_outputs = model(val_samples)
67 |                         val_loss = criterion(val_outputs, val_targets)
68 | 
69 |                     # 각 배치의 손실을 더함
70 |                     val_loss_total += val_loss.item()
71 |                     num_batches += 1
72 | 
73 |                 # 전체 배치의 평균 손실을 저장
74 |                 val_loss_avg = val_loss_total / num_batches
75 |                 losses.append((val_loss_avg, config))
76 |                 
77 |         losses.sort(key=lambda x: x[0])  # Sort by loss value (lower is better)
78 |         top_k_paths = losses[:k]
79 |         
80 |         # bottom_k_paths: 나머지 경로들을 loss가 큰 순서로 정렬
81 |         bottom_k_paths = sorted(losses[k:], key=lambda x: x[0], reverse=True)
82 |         
83 |         #########
84 |         
85 |         # top_k_paths = sampled_paths
86 |         
87 |         # 연산 종료 후, top_k_paths를 candidate_pool에 추가
88 |         if candidate_pool is not None:
89 |             candidate_pool[:] = [config for _, config in top_k_paths]
90 |             # candidate_pool[:] = top_k_paths  # candidate_pool 값을 top_k_paths로 대체
91 |         
92 |         # CUDA 메모리 부족 방지: top_k_paths 출력
93 |         print("top_k_paths : ", top_k_paths)
94 |         print("bottom_k_paths : ", bottom_k_paths)


--------------------------------------------------------------------------------
/AutoFormer/performance_parser.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import pickle
  3 | 
  4 | # a.pkl 파일을 불러오는 함수
  5 | def load_pkl(file_path):
  6 |     with open(file_path, 'rb') as file:
  7 |         data = pickle.load(file)
  8 |     return data
  9 | 
 10 | # loss 값을 추가하는 함수
 11 | def add_inter_loss(results, a_pkl_data):
 12 |     a_pkl_dict = {}
 13 |     
 14 |     # a.pkl 데이터를 키로 저장 (layer_num, mlp_ratio, num_heads, embed_dim 기준)
 15 |     for item in a_pkl_data:
 16 |         key = (item['layer_num'], tuple(item['mlp_ratio']), tuple(item['num_heads']), tuple(item['embed_dim']))
 17 |         a_pkl_dict[key] = item['loss']
 18 | 
 19 |     # results_no_duplicates에 inter_loss 추가
 20 |     for config in results:
 21 |         key = (config['layer_num'], tuple(config['mlp_ratio']), tuple(config['num_heads']), tuple(config['embed_dim']))
 22 |         if key in a_pkl_dict:
 23 |             config['inter_loss'] = a_pkl_dict[key]
 24 |     
 25 |     return results
 26 | 
 27 | def find_non_matching_pairs(results):
 28 |     non_matching_indices = []
 29 | 
 30 |     # results의 길이가 짝수인지 확인
 31 |     if len(results) % 2 != 0:
 32 |         print("Warning: The number of items in results should be even.")
 33 |         return non_matching_indices
 34 | 
 35 |     # 각 n과 n+1 쌍을 검사
 36 |     for i in range(0, len(results) - 1, 2):  # 2씩 증가시키면서 인덱스 접근
 37 |         if results[i] != results[i + 1]:
 38 |             non_matching_indices.append(i)  # n 인덱스 추가 (n, n+1 쌍이 일치하지 않음)
 39 | 
 40 |     return non_matching_indices
 41 | 
 42 | def remove_duplicates(results):
 43 |     unique_results = []
 44 |     seen = set()
 45 | 
 46 |     for config in results:
 47 |         # 중복을 검사할 키 생성 (parameters 제외)
 48 |         key = (config['layer_num'], tuple(config['mlp_ratio']), tuple(config['num_heads']), tuple(config['embed_dim']))
 49 | 
 50 |         if key not in seen:
 51 |             # 처음 본 키라면 저장
 52 |             seen.add(key)
 53 |             unique_results.append(config)
 54 | 
 55 |     return unique_results
 56 | 
 57 | 
 58 | def parse_evolution_log(file_path):
 59 |     results = []
 60 |     current_config = {}
 61 |     i = 0
 62 |     
 63 |     with open(file_path, 'r') as file:
 64 |         for line in file:
 65 |             line = line.strip()
 66 |             
 67 |             # 모델 설정 정보 추출
 68 |             if line.startswith("sampled model config:"):
 69 |                 config_dict_str = line.split("sampled model config: ")[1]
 70 |                 current_config = eval(config_dict_str)  # 문자열을 딕셔너리로 변환
 71 |             
 72 |             # 모델 파라미터 정보 추출
 73 |             elif line.startswith("sampled model parameters:"):
 74 |                 parameters = int(line.split("sampled model parameters: ")[1])
 75 |                 current_config['parameters'] = parameters
 76 |             
 77 |             # 성능 지표 정보 추출
 78 |             elif line.startswith("* Acc@1"):
 79 |                 acc1 = float(re.search(r"Acc@1\s(\d+\.\d+)", line).group(1))
 80 |                 acc5 = float(re.search(r"Acc@5\s(\d+\.\d+)", line).group(1))
 81 |                 loss = float(re.search(r"loss\s(\d+\.\d+)", line).group(1))
 82 |                 current_config['acc1'] = acc1
 83 |                 current_config['acc5'] = acc5
 84 |                 current_config['loss'] = loss
 85 |                 # current_config['id'] = int(i/2)
 86 |                 current_config['id'] = int(i)
 87 |                 
 88 |                 # 모든 정보가 취합된 딕셔너리를 결과 리스트에 추가
 89 |                 results.append(current_config)
 90 |                 current_config = {}  # 다음 세트를 위해 초기화
 91 |                 i = i + 1
 92 | 
 93 |     return results
 94 | 
 95 | # 파일 경로 사용 예
 96 | # file_path = "./greedyTAS/greedyTAS-epoch60/autoformer-greedyTAS(09131747).log"
 97 | 
 98 | # file_path = "./greedyTAS/greedyTAS-epoch20-test/autoformer-greedyTAS(dss)-20epoch-subnet.log"
 99 | file_path = "./greedyTAS/m(2500)_path_epoch100-subnet.log"
100 | 
101 | results = parse_evolution_log(file_path)
102 | print(len(results))  # 결과 출력
103 | 
104 | # non_matching_indices = find_non_matching_pairs(results)
105 | # print("Non-matching indices:", non_matching_indices)
106 | 
107 | # results_no_duplicates = remove_duplicates(results)
108 | results_no_duplicates = results
109 | print(len(results_no_duplicates))  # 중복 제거된 결과 출력
110 | print(results_no_duplicates[0])
111 | print(results_no_duplicates[1])
112 | print(results_no_duplicates[2])
113 | print(results_no_duplicates[-1])
114 | 
115 | # a.pkl 파일 경로
116 | # a_pkl_path = "./greedyTAS/greedyTAS-epoch60/autoformer-greedyTAS(09131747).log"
117 | # a_pkl_path = "./greedyTAS/greedyTAS-epoch20-test/autoformer-greedyTAS(dss)-20epoch.pkl"  # 실제 파일 경로로 변경하세요
118 | 
119 | # # a.pkl 파일 로드
120 | # a_pkl_data = load_pkl(a_pkl_path)
121 | 
122 | # # inter_loss 값을 추가
123 | # results_with_inter_loss = add_inter_loss(results_no_duplicates, a_pkl_data)
124 | 
125 | # # 결과 출력 (예시)
126 | # print(results_with_inter_loss[0])
127 | # print(results_with_inter_loss[1])
128 | # print(results_with_inter_loss[2])
129 | # print(results_with_inter_loss[-1])
130 | 
131 | # Save the transformed data to a new pickle file
132 | with open('./greedyTAS/m(2500)_path_epoch100-subnet.pkl', 'wb') as file:
133 |     pickle.dump(results_no_duplicates, file)
134 | 
135 | print("Data saved successfully.")


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/performance_parser.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import pickle
  3 | 
  4 | # a.pkl 파일을 불러오는 함수
  5 | def load_pkl(file_path):
  6 |     with open(file_path, 'rb') as file:
  7 |         data = pickle.load(file)
  8 |     return data
  9 | 
 10 | # loss 값을 추가하는 함수
 11 | def add_inter_loss(results, a_pkl_data):
 12 |     a_pkl_dict = {}
 13 |     
 14 |     # a.pkl 데이터를 키로 저장 (layer_num, mlp_ratio, num_heads, embed_dim 기준)
 15 |     for item in a_pkl_data:
 16 |         key = (item['layer_num'], tuple(item['mlp_ratio']), tuple(item['num_heads']), tuple(item['embed_dim']))
 17 |         a_pkl_dict[key] = item['loss']
 18 | 
 19 |     # results_no_duplicates에 inter_loss 추가
 20 |     for config in results:
 21 |         key = (config['layer_num'], tuple(config['mlp_ratio']), tuple(config['num_heads']), tuple(config['embed_dim']))
 22 |         if key in a_pkl_dict:
 23 |             config['inter_loss'] = a_pkl_dict[key]
 24 |     
 25 |     return results
 26 | 
 27 | def find_non_matching_pairs(results):
 28 |     non_matching_indices = []
 29 | 
 30 |     # results의 길이가 짝수인지 확인
 31 |     if len(results) % 2 != 0:
 32 |         print("Warning: The number of items in results should be even.")
 33 |         return non_matching_indices
 34 | 
 35 |     # 각 n과 n+1 쌍을 검사
 36 |     for i in range(0, len(results) - 1, 2):  # 2씩 증가시키면서 인덱스 접근
 37 |         if results[i] != results[i + 1]:
 38 |             non_matching_indices.append(i)  # n 인덱스 추가 (n, n+1 쌍이 일치하지 않음)
 39 | 
 40 |     return non_matching_indices
 41 | 
 42 | def remove_duplicates(results):
 43 |     unique_results = []
 44 |     seen = set()
 45 | 
 46 |     for config in results:
 47 |         # 중복을 검사할 키 생성 (parameters 제외)
 48 |         key = (config['layer_num'], tuple(config['mlp_ratio']), tuple(config['num_heads']), tuple(config['embed_dim']))
 49 | 
 50 |         if key not in seen:
 51 |             # 처음 본 키라면 저장
 52 |             seen.add(key)
 53 |             unique_results.append(config)
 54 | 
 55 |     return unique_results
 56 | 
 57 | 
 58 | def parse_evolution_log(file_path):
 59 |     results = []
 60 |     current_config = {}
 61 |     i = 0
 62 |     
 63 |     with open(file_path, 'r') as file:
 64 |         for line in file:
 65 |             line = line.strip()
 66 |             
 67 |             # 모델 설정 정보 추출
 68 |             if line.startswith("sampled model config:"):
 69 |                 config_dict_str = line.split("sampled model config: ")[1]
 70 |                 current_config = eval(config_dict_str)  # 문자열을 딕셔너리로 변환
 71 |             
 72 |             # 모델 파라미터 정보 추출
 73 |             elif line.startswith("sampled model parameters:"):
 74 |                 parameters = int(line.split("sampled model parameters: ")[1])
 75 |                 current_config['parameters'] = parameters
 76 |             
 77 |             # 성능 지표 정보 추출
 78 |             elif line.startswith("* Acc@1"):
 79 |                 acc1 = float(re.search(r"Acc@1\s(\d+\.\d+)", line).group(1))
 80 |                 acc5 = float(re.search(r"Acc@5\s(\d+\.\d+)", line).group(1))
 81 |                 loss = float(re.search(r"loss\s(\d+\.\d+)", line).group(1))
 82 |                 current_config['acc1'] = acc1
 83 |                 current_config['acc5'] = acc5
 84 |                 current_config['loss'] = loss
 85 |                 # current_config['id'] = int(i/2)
 86 |                 current_config['id'] = int(i)
 87 |                 
 88 |                 # 모든 정보가 취합된 딕셔너리를 결과 리스트에 추가
 89 |                 results.append(current_config)
 90 |                 current_config = {}  # 다음 세트를 위해 초기화
 91 |                 i = i + 1
 92 | 
 93 |     return results
 94 | 
 95 | # 파일 경로 사용 예
 96 | # file_path = "./greedyTAS/greedyTAS-epoch60/autoformer-greedyTAS(09131747).log"
 97 | 
 98 | # file_path = "./greedyTAS/greedyTAS-epoch20-test/autoformer-greedyTAS(dss)-20epoch-subnet.log"
 99 | file_path = "./log/search_tiny-only-supernet192-minimum_pop1050.log"
100 | 
101 | results = parse_evolution_log(file_path)
102 | print(len(results))  # 결과 출력
103 | 
104 | # non_matching_indices = find_non_matching_pairs(results)
105 | # print("Non-matching indices:", non_matching_indices)
106 | 
107 | results_no_duplicates = remove_duplicates(results)
108 | # results_no_duplicates = results
109 | print(len(results_no_duplicates))  # 중복 제거된 결과 출력
110 | print(results_no_duplicates[0])
111 | print(results_no_duplicates[1])
112 | print(results_no_duplicates[2])
113 | print(results_no_duplicates[-1])
114 | 
115 | # a.pkl 파일 경로
116 | # a_pkl_path = "./greedyTAS/greedyTAS-epoch60/autoformer-greedyTAS(09131747).log"
117 | # a_pkl_path = "./greedyTAS/greedyTAS-epoch20-test/autoformer-greedyTAS(dss)-20epoch.pkl"  # 실제 파일 경로로 변경하세요
118 | 
119 | # # a.pkl 파일 로드
120 | # a_pkl_data = load_pkl(a_pkl_path)
121 | 
122 | # # inter_loss 값을 추가
123 | # results_with_inter_loss = add_inter_loss(results_no_duplicates, a_pkl_data)
124 | 
125 | # # 결과 출력 (예시)
126 | # print(results_with_inter_loss[0])
127 | # print(results_with_inter_loss[1])
128 | # print(results_with_inter_loss[2])
129 | # print(results_with_inter_loss[-1])
130 | 
131 | # Save the transformed data to a new pickle file
132 | with open('./log/search_tiny-only-supernet192-minimum_pop1050.pkl', 'wb') as file:
133 |     pickle.dump(results_no_duplicates, file)
134 | 
135 | print("Data saved successfully.")


--------------------------------------------------------------------------------
/AutoFormer/training_free/indicators/grasp.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torch.autograd as autograd
  5 | 
  6 | from . import indicator
  7 | from ..p_utils import get_layer_metric_array
  8 | 
  9 | 
 10 | @indicator('grasp', bn=True, mode='param')
 11 | def compute_grasp_per_weight(net, inputs, targets, mode, loss_fn, T=1, num_iters=1, split_data=1):
 12 |     # get all applicable weights
 13 |     weights = []
 14 |     for layer in net.modules():
 15 |         if layer._get_name() == 'PatchembedSuper':
 16 |             weights.append(layer.sampled_weight)
 17 |             layer.sampled_weight.requires_grad_(True)  # TODO isn't this already true?
 18 |         if isinstance(layer, nn.Linear) and layer.out_features != 1000 and layer.samples:
 19 |             weights.append(layer.samples['weight'])
 20 |             layer.samples['weight'].requires_grad_(True)  # TODO isn't this already true?
 21 |         if isinstance(layer, torch.nn.Linear) and layer.out_features == 1000:
 22 |             weights.append(layer.samples['weight'])
 23 |             layer.weight.requires_grad_(True)  # TODO isn't this already true?
 24 | 
 25 |     # NOTE original code had some input/target splitting into 2
 26 |     # I am guessing this was because of GPU mem limit
 27 |     net.zero_grad()
 28 |     N = inputs.shape[0]
 29 |     for sp in range(split_data):
 30 |         st = sp * N // split_data
 31 |         en = (sp + 1) * N // split_data
 32 | 
 33 |         # forward/grad pass #1
 34 |         grad_w = None
 35 |         for _ in range(num_iters):
 36 |             # TODO get new data, otherwise num_iters is useless!
 37 |             outputs = net.forward(inputs[st:en]) / T
 38 |             loss = loss_fn(outputs, targets[st:en])
 39 |             grad_w_p = autograd.grad(loss, weights, allow_unused=True)
 40 |             if grad_w is None:
 41 |                 grad_w = list(grad_w_p)
 42 |             else:
 43 |                 for idx in range(len(grad_w)):
 44 |                     grad_w[idx] += grad_w_p[idx]
 45 | 
 46 |     for sp in range(split_data):
 47 |         st = sp * N // split_data
 48 |         en = (sp + 1) * N // split_data
 49 | 
 50 |         # forward/grad pass #2
 51 |         outputs = net.forward(inputs[st:en]) / T
 52 |         loss = loss_fn(outputs, targets[st:en])
 53 |         grad_f = autograd.grad(loss, weights, create_graph=True, allow_unused=True)
 54 | 
 55 |         # accumulate gradients computed in previous step and call backwards
 56 |         z, count = 0, 0
 57 |         for layer in net.modules():
 58 |             if layer._get_name() == 'PatchembedSuper':
 59 |                 if grad_w[count] is not None:
 60 |                     z += (grad_w[count].data * grad_f[count]).sum()
 61 |                 count += 1
 62 |             if isinstance(layer, nn.Linear) and layer.out_features != 1000 and layer.samples:
 63 |                 if grad_w[count] is not None:
 64 |                     z += (grad_w[count].data * grad_f[count]).sum()
 65 |                 count += 1
 66 |             if isinstance(layer, nn.Linear) and layer.out_features == 1000:
 67 |                 if grad_w[count] is not None:
 68 |                     z += (grad_w[count].data * grad_f[count]).sum()
 69 |                 count += 1
 70 |         z.backward()
 71 | 
 72 |     # compute final sensitivity metric and put in grads
 73 |     def grasp(layer):
 74 |         if layer._get_name() == 'PatchembedSuper':
 75 |             if layer.sampled_weight.grad is not None:
 76 |                 return -layer.sampled_weight.data * layer.sampled_weight.grad  # -theta_q Hg
 77 |                 # NOTE in the grasp code they take the *bottom* (1-p)% of values
 78 |                 # but we take the *top* (1-p)%, therefore we remove the -ve sign
 79 |                 # EDIT accuracy seems to be negatively correlated with this metric, so we add -ve sign here!
 80 |             else:
 81 |                 return torch.zeros_like(layer.sampled_weight)
 82 |         if isinstance(layer, nn.Linear) and layer.out_features != 1000 and layer.samples:
 83 |             if layer.samples['weight'].grad is not None:
 84 |                 return -layer.samples['weight'].data * layer.samples['weight'].grad  # -theta_q Hg
 85 |                 # NOTE in the grasp code they take the *bottom* (1-p)% of values
 86 |                 # but we take the *top* (1-p)%, therefore we remove the -ve sign
 87 |                 # EDIT accuracy seems to be negatively correlated with this metric, so we add -ve sign here!
 88 |             else:
 89 |                 return torch.zeros_like(layer.samples['weight'])
 90 |         if isinstance(layer, torch.nn.Linear) and layer.out_features == 1000:
 91 |             if layer.samples['weight'].grad is not None:
 92 |                 return -layer.samples['weight'].data * layer.samples['weight'].grad  # -theta_q Hg
 93 |                 # NOTE in the grasp code they take the *bottom* (1-p)% of values
 94 |                 # but we take the *top* (1-p)%, therefore we remove the -ve sign
 95 |                 # EDIT accuracy seems to be negatively correlated with this metric, so we add -ve sign here!
 96 |             else:
 97 |                 return torch.zeros_like(layer.samples['weight'])
 98 | 
 99 |     grads = get_layer_metric_array(net, grasp, mode)
100 | 
101 |     return grads


--------------------------------------------------------------------------------
/AutoFormer_original/README.md:
--------------------------------------------------------------------------------
  1 | # AutoFormer: Searching Transformers for Visual Recognition
  2 | 
  3 | **This is an official implementation of AutoFormer.**
  4 | 
  5 | AutoFormer is new one-shot architecture search framework dedicated to vision transformer search. It entangles the weights of different vision transformer blocks in the same layers during supernet training. 
  6 | Benefiting from the strategy, the trained supernet allows thousands of subnets to be very well-trained. Specifically, the performance of these subnets with weights inherited from the supernet is comparable to those retrained from scratch.
  7 | 
  8 | <div align="center">
  9 |     <img width="49%" alt="AutoFormer overview" src="https://github.com/microsoft/AutoML/releases/download/static_files/autoformer_overview.gif"/>
 10 |     <img width="49%" alt="AutoFormer detail" src="https://github.com/microsoft/AutoML/releases/download/static_files/autoformer_details.gif"/>
 11 | </div>
 12 | 
 13 | 
 14 | ## Highlights
 15 | - Once-for-all
 16 | 
 17 | AutoFormer is a simple yet effective method to train a once-for-all vision transformer supernet.
 18 | 
 19 | - Competive performance
 20 | 
 21 | AutoFormers consistently outperform DeiTs.
 22 | 
 23 | ## Environment Setup
 24 | 
 25 | To set up the enviroment you can easily run the following command:
 26 | ```buildoutcfg
 27 | conda create -n Autoformer python=3.6
 28 | conda activate Autoformer
 29 | pip install -r requirements.txt
 30 | ```
 31 | 
 32 | ## Data Preparation 
 33 | You need to first download the [ImageNet-2012](http://www.image-net.org/) to the folder `./data/imagenet` and move the validation set to the subfolder `./data/imagenet/val`. To move the validation set, you cloud use the following script: <https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh>
 34 | 
 35 | The directory structure is the standard layout as following.
 36 | ```
 37 | /path/to/imagenet/
 38 |   train/
 39 |     class1/
 40 |       img1.jpeg
 41 |     class2/
 42 |       img2.jpeg
 43 |   val/
 44 |     class1/
 45 |       img3.jpeg
 46 |     class/2
 47 |       img4.jpeg
 48 | ```
 49 | 
 50 | 
 51 | ## Model Zoo
 52 | For evaluation, we provide the checkpoints of our models in [Google Drive](https://drive.google.com/drive/folders/1HqzY3afqQUMI6pJ5_BgR2RquJU_b_3eg?usp=sharing) and [GitHub](https://github.com/silent-chen/AutoFormer-model-zoo).
 53 | 
 54 | After downloading the models, you can do the evaluation following the description in *Quick Start - Test*).
 55 | 
 56 | Model download links:
 57 | 
 58 | Model | Params. | Top-1 Acc. % | Top-5 Acc. % | Download link 
 59 | --- |:---:|:---:|:---:|:---:
 60 | AutoFormer-T | 5.8M | 75.3 | 92.7 | [Google Drive](https://drive.google.com/file/d/1uRCW3doQHgn2H-LjyalYEZ4CvmnQtr6Q/view?usp=sharing), [GitHub](https://github.com/silent-chen/AutoFormer-model-zoo/releases/download/v1.0/supernet-tiny.pth)
 61 | AutoFormer-S | 22.9M | 81.7 | 95.7 | [Google Drive](https://drive.google.com/file/d/1JTBmLR_nW7-ZbTKafWFvSl8J2orJXiNa/view?usp=sharing), [GitHub](https://github.com/silent-chen/AutoFormer-model-zoo/releases/download/v1.0/supernet-small.pth)
 62 | AutoFormer-B | 53.7M | 82.4 | 95.7 | [Google Drive](https://drive.google.com/file/d/1KPjUshk0SbqkaTzlirjPHM9pu19N5w0e/view?usp=sharing), [GitHub](https://github.com/silent-chen/AutoFormer-model-zoo/releases/download/v1.0/supernet-base.pth)
 63 | 
 64 | 
 65 | ## Quick Start
 66 | We provide *Supernet Train, Search, Test* code of AutoFormer as follows.
 67 | 
 68 | ### Supernet Train 
 69 | 
 70 | To train the supernet-T/S/B, we provided the corresponding supernet configuration files in `/experiments/supernet/`. For example, to train the supernet-B, you can run the following command. The default output path is `./`, you can specify the path with argument `--output`.
 71 | 
 72 | ```buildoutcfg
 73 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path /PATH/TO/IMAGENT --gp \
 74 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-B.yaml --epochs 500 --warmup-epochs 20 \
 75 | --output /OUTPUT_PATH --batch-size 128
 76 | ```
 77 | 
 78 | ### Search
 79 | We run our evolution search on part of the ImageNet training dataset and use the validation set of ImageNet as the test set for fair comparison. To generate the subImagenet in `/PATH/TO/IMAGENET`, you could simply run:
 80 | ```buildoutcfg
 81 | python ./lib/subImageNet.py --data-path /PATH/TO/IMAGENT
 82 | ```
 83 |  
 84 | 
 85 | After obtaining the subImageNet and training of the supernet. We could perform the evolution search using below command. Please remember to config the specific constraint in this evolution search using `--min-param-limits` and `--param-limits`: 
 86 | ```buildoutcfg
 87 | python -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path /PATH/TO/IMAGENT --gp \
 88 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-B.yaml --resume /PATH/TO/CHECKPOINT \
 89 | --min-param-limits YOUR/CONFIG --param-limits YOUR/CONFIG --data-set EVO_IMNET
 90 | ```
 91 | 
 92 | ### Test
 93 | To test our trained models, you need to put the downloaded model in `/PATH/TO/CHECKPOINT`. After that you could use the following command to test the model (Please change your config file and model checkpoint according to different models. Here we use the AutoFormer-B as an example).
 94 | ```buildoutcfg
 95 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path /PATH/TO/IMAGENT --gp \
 96 | --change_qk --relative_position --mode retrain --dist-eval --cfg ./experiments/subnet/AutoFormer-B.yaml --resume /PATH/TO/CHECKPOINT --eval 
 97 | ```
 98 | 
 99 | ## Performance
100 | 
101 | **Left:** Top-1 accuracy on ImageNet. Our method achieves very competitive performance, being superior to the recent DeiT and ViT. **Right:** 1000 random sampled good architectures in the supernet-S. The supernet trained under our strategy allows subnets to be well optimized.
102 | 
103 | <div align="half">
104 |     <img src=".figure/performance.png" width="49%"/>
105 |     <img src=".figure/ofa.png" width="49%"/>
106 | </div>
107 | 
108 | ## Bibtex
109 | 
110 | If this repo is helpful for you, please consider to cite it. Thank you! :)
111 | ```bibtex
112 | @InProceedings{AutoFormer,
113 |     title     = {AutoFormer: Searching Transformers for Visual Recognition},
114 |     author    = {Chen, Minghao and Peng, Houwen and Fu, Jianlong and Ling, Haibin},
115 |     booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)},
116 |     month     = {October},
117 |     year      = {2021},
118 |     pages     = {12270-12280}
119 | }
120 | ```
121 | 
122 | ## Acknowledgements
123 | 
124 | The codes are inspired by [HAT](https://github.com/mit-han-lab/hardware-aware-transformers), [timm](https://github.com/rwightman/pytorch-image-models), [DeiT](https://github.com/facebookresearch/deit), [SPOS](https://github.com/megvii-model/SinglePathOneShot).
125 | 


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/README.md:
--------------------------------------------------------------------------------
  1 | # AutoFormer: Searching Transformers for Visual Recognition
  2 | 
  3 | **This is an official implementation of AutoFormer.**
  4 | 
  5 | AutoFormer is new one-shot architecture search framework dedicated to vision transformer search. It entangles the weights of different vision transformer blocks in the same layers during supernet training. 
  6 | Benefiting from the strategy, the trained supernet allows thousands of subnets to be very well-trained. Specifically, the performance of these subnets with weights inherited from the supernet is comparable to those retrained from scratch.
  7 | 
  8 | <div align="center">
  9 |     <img width="49%" alt="AutoFormer overview" src="https://github.com/microsoft/AutoML/releases/download/static_files/autoformer_overview.gif"/>
 10 |     <img width="49%" alt="AutoFormer detail" src="https://github.com/microsoft/AutoML/releases/download/static_files/autoformer_details.gif"/>
 11 | </div>
 12 | 
 13 | 
 14 | ## Highlights
 15 | - Once-for-all
 16 | 
 17 | AutoFormer is a simple yet effective method to train a once-for-all vision transformer supernet.
 18 | 
 19 | - Competive performance
 20 | 
 21 | AutoFormers consistently outperform DeiTs.
 22 | 
 23 | ## Environment Setup
 24 | 
 25 | To set up the enviroment you can easily run the following command:
 26 | ```buildoutcfg
 27 | conda create -n Autoformer python=3.6
 28 | conda activate Autoformer
 29 | pip install -r requirements.txt
 30 | ```
 31 | 
 32 | ## Data Preparation 
 33 | You need to first download the [ImageNet-2012](http://www.image-net.org/) to the folder `./data/imagenet` and move the validation set to the subfolder `./data/imagenet/val`. To move the validation set, you cloud use the following script: <https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh>
 34 | 
 35 | The directory structure is the standard layout as following.
 36 | ```
 37 | /path/to/imagenet/
 38 |   train/
 39 |     class1/
 40 |       img1.jpeg
 41 |     class2/
 42 |       img2.jpeg
 43 |   val/
 44 |     class1/
 45 |       img3.jpeg
 46 |     class/2
 47 |       img4.jpeg
 48 | ```
 49 | 
 50 | 
 51 | ## Model Zoo
 52 | For evaluation, we provide the checkpoints of our models in [Google Drive](https://drive.google.com/drive/folders/1HqzY3afqQUMI6pJ5_BgR2RquJU_b_3eg?usp=sharing) and [GitHub](https://github.com/silent-chen/AutoFormer-model-zoo).
 53 | 
 54 | After downloading the models, you can do the evaluation following the description in *Quick Start - Test*).
 55 | 
 56 | Model download links:
 57 | 
 58 | Model | Params. | Top-1 Acc. % | Top-5 Acc. % | Download link 
 59 | --- |:---:|:---:|:---:|:---:
 60 | AutoFormer-T | 5.8M | 75.3 | 92.7 | [Google Drive](https://drive.google.com/file/d/1uRCW3doQHgn2H-LjyalYEZ4CvmnQtr6Q/view?usp=sharing), [GitHub](https://github.com/silent-chen/AutoFormer-model-zoo/releases/download/v1.0/supernet-tiny.pth)
 61 | AutoFormer-S | 22.9M | 81.7 | 95.7 | [Google Drive](https://drive.google.com/file/d/1JTBmLR_nW7-ZbTKafWFvSl8J2orJXiNa/view?usp=sharing), [GitHub](https://github.com/silent-chen/AutoFormer-model-zoo/releases/download/v1.0/supernet-small.pth)
 62 | AutoFormer-B | 53.7M | 82.4 | 95.7 | [Google Drive](https://drive.google.com/file/d/1KPjUshk0SbqkaTzlirjPHM9pu19N5w0e/view?usp=sharing), [GitHub](https://github.com/silent-chen/AutoFormer-model-zoo/releases/download/v1.0/supernet-base.pth)
 63 | 
 64 | 
 65 | ## Quick Start
 66 | We provide *Supernet Train, Search, Test* code of AutoFormer as follows.
 67 | 
 68 | ### Supernet Train 
 69 | 
 70 | To train the supernet-T/S/B, we provided the corresponding supernet configuration files in `/experiments/supernet/`. For example, to train the supernet-B, you can run the following command. The default output path is `./`, you can specify the path with argument `--output`.
 71 | 
 72 | ```buildoutcfg
 73 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path /PATH/TO/IMAGENT --gp \
 74 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-B.yaml --epochs 500 --warmup-epochs 20 \
 75 | --output /OUTPUT_PATH --batch-size 128
 76 | ```
 77 | 
 78 | ### Search
 79 | We run our evolution search on part of the ImageNet training dataset and use the validation set of ImageNet as the test set for fair comparison. To generate the subImagenet in `/PATH/TO/IMAGENET`, you could simply run:
 80 | ```buildoutcfg
 81 | python ./lib/subImageNet.py --data-path /PATH/TO/IMAGENT
 82 | ```
 83 |  
 84 | 
 85 | After obtaining the subImageNet and training of the supernet. We could perform the evolution search using below command. Please remember to config the specific constraint in this evolution search using `--min-param-limits` and `--param-limits`: 
 86 | ```buildoutcfg
 87 | python -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path /PATH/TO/IMAGENT --gp \
 88 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-B.yaml --resume /PATH/TO/CHECKPOINT \
 89 | --min-param-limits YOUR/CONFIG --param-limits YOUR/CONFIG --data-set EVO_IMNET
 90 | ```
 91 | 
 92 | ### Test
 93 | To test our trained models, you need to put the downloaded model in `/PATH/TO/CHECKPOINT`. After that you could use the following command to test the model (Please change your config file and model checkpoint according to different models. Here we use the AutoFormer-B as an example).
 94 | ```buildoutcfg
 95 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path /PATH/TO/IMAGENT --gp \
 96 | --change_qk --relative_position --mode retrain --dist-eval --cfg ./experiments/subnet/AutoFormer-B.yaml --resume /PATH/TO/CHECKPOINT --eval 
 97 | ```
 98 | 
 99 | ## Performance
100 | 
101 | **Left:** Top-1 accuracy on ImageNet. Our method achieves very competitive performance, being superior to the recent DeiT and ViT. **Right:** 1000 random sampled good architectures in the supernet-S. The supernet trained under our strategy allows subnets to be well optimized.
102 | 
103 | <div align="half">
104 |     <img src=".figure/performance.png" width="49%"/>
105 |     <img src=".figure/ofa.png" width="49%"/>
106 | </div>
107 | 
108 | ## Bibtex
109 | 
110 | If this repo is helpful for you, please consider to cite it. Thank you! :)
111 | ```bibtex
112 | @InProceedings{AutoFormer,
113 |     title     = {AutoFormer: Searching Transformers for Visual Recognition},
114 |     author    = {Chen, Minghao and Peng, Houwen and Fu, Jianlong and Ling, Haibin},
115 |     booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)},
116 |     month     = {October},
117 |     year      = {2021},
118 |     pages     = {12270-12280}
119 | }
120 | ```
121 | 
122 | ## Acknowledgements
123 | 
124 | The codes are inspired by [HAT](https://github.com/mit-han-lab/hardware-aware-transformers), [timm](https://github.com/rwightman/pytorch-image-models), [DeiT](https://github.com/facebookresearch/deit), [SPOS](https://github.com/megvii-model/SinglePathOneShot).
125 | 


--------------------------------------------------------------------------------
/AutoFormer/supernet_engine(save).py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import sys
  3 | from typing import Iterable, Optional
  4 | from timm.utils.model import unwrap_model
  5 | import torch
  6 | 
  7 | from timm.data import Mixup
  8 | from timm.utils import accuracy, ModelEma
  9 | from lib import utils
 10 | import random
 11 | import time
 12 | 
 13 | def sample_configs(choices):
 14 | 
 15 |     config = {}
 16 |     dimensions = ['mlp_ratio', 'num_heads']
 17 |     depth = random.choice(choices['depth'])
 18 |     for dimension in dimensions:
 19 |         config[dimension] = [random.choice(choices[dimension]) for _ in range(depth)]
 20 | 
 21 |     config['embed_dim'] = [random.choice(choices['embed_dim'])]*depth
 22 | 
 23 |     config['layer_num'] = depth
 24 |     return config
 25 | 
 26 | def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module,
 27 |                     data_loader: Iterable, optimizer: torch.optim.Optimizer,
 28 |                     device: torch.device, epoch: int, loss_scaler, max_norm: float = 0,
 29 |                     model_ema: Optional[ModelEma] = None, mixup_fn: Optional[Mixup] = None,
 30 |                     amp: bool = True, teacher_model: torch.nn.Module = None,
 31 |                     teach_loss: torch.nn.Module = None, choices=None, mode='super', retrain_config=None):
 32 |     model.train()
 33 |     criterion.train()
 34 | 
 35 |     # set random seed
 36 |     random.seed(epoch)
 37 | 
 38 |     metric_logger = utils.MetricLogger(delimiter="  ")
 39 |     metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
 40 |     header = 'Epoch: [{}]'.format(epoch)
 41 |     print_freq = 10
 42 |     if mode == 'retrain':
 43 |         config = retrain_config
 44 |         model_module = unwrap_model(model)
 45 |         print(config)
 46 |         model_module.set_sample_config(config=config)
 47 |         print(model_module.get_sampled_params_numel(config))
 48 | 
 49 |     for samples, targets in metric_logger.log_every(data_loader, print_freq, header):
 50 |         samples = samples.to(device, non_blocking=True)
 51 |         targets = targets.to(device, non_blocking=True)
 52 | 
 53 |         # sample random config
 54 |         if mode == 'super':
 55 |             config = sample_configs(choices=choices)
 56 |             model_module = unwrap_model(model)
 57 |             model_module.set_sample_config(config=config)
 58 |         elif mode == 'retrain':
 59 |             config = retrain_config
 60 |             model_module = unwrap_model(model)
 61 |             model_module.set_sample_config(config=config)
 62 |         if mixup_fn is not None:
 63 |             samples, targets = mixup_fn(samples, targets)
 64 |         if amp:
 65 |             with torch.cuda.amp.autocast():
 66 |                 if teacher_model:
 67 |                     with torch.no_grad():
 68 |                         teach_output = teacher_model(samples)
 69 |                     _, teacher_label = teach_output.topk(1, 1, True, True)
 70 |                     outputs = model(samples)
 71 |                     loss = 1/2 * criterion(outputs, targets) + 1/2 * teach_loss(outputs, teacher_label.squeeze())
 72 |                 else:
 73 |                     outputs = model(samples)
 74 |                     loss = criterion(outputs, targets)
 75 |         else:
 76 |             outputs = model(samples)
 77 |             if teacher_model:
 78 |                 with torch.no_grad():
 79 |                     teach_output = teacher_model(samples)
 80 |                 _, teacher_label = teach_output.topk(1, 1, True, True)
 81 |                 loss = 1 / 2 * criterion(outputs, targets) + 1 / 2 * teach_loss(outputs, teacher_label.squeeze())
 82 |             else:
 83 |                 loss = criterion(outputs, targets)
 84 | 
 85 |         loss_value = loss.item()
 86 | 
 87 |         if not math.isfinite(loss_value):
 88 |             print("Loss is {}, stopping training".format(loss_value))
 89 |             sys.exit(1)
 90 | 
 91 |         optimizer.zero_grad()
 92 | 
 93 |         # this attribute is added by timm on one optimizer (adahessian)
 94 |         if amp:
 95 |             is_second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order
 96 |             loss_scaler(loss, optimizer, clip_grad=max_norm,
 97 |                     parameters=model.parameters(), create_graph=is_second_order)
 98 |         else:
 99 |             loss.backward()
100 |             optimizer.step()
101 | 
102 |         torch.cuda.synchronize()
103 |         if model_ema is not None:
104 |             model_ema.update(model)
105 | 
106 |         metric_logger.update(loss=loss_value)
107 |         metric_logger.update(lr=optimizer.param_groups[0]["lr"])
108 | 
109 |     # gather the stats from all processes
110 |     metric_logger.synchronize_between_processes()
111 |     print("Averaged stats:", metric_logger)
112 |     return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
113 | 
114 | @torch.no_grad()
115 | def evaluate(data_loader, model, device, amp=True, choices=None, mode='super', retrain_config=None):
116 |     criterion = torch.nn.CrossEntropyLoss()
117 | 
118 |     metric_logger = utils.MetricLogger(delimiter="  ")
119 |     header = 'Test:'
120 | 
121 |     # switch to evaluation mode
122 |     model.eval()
123 |     if mode == 'super':
124 |         config = sample_configs(choices=choices)
125 |         model_module = unwrap_model(model)
126 |         model_module.set_sample_config(config=config)
127 |     else:
128 |         config = retrain_config
129 |         model_module = unwrap_model(model)
130 |         model_module.set_sample_config(config=config)
131 | 
132 | 
133 |     print("sampled model config: {}".format(config))
134 |     parameters = model_module.get_sampled_params_numel(config)
135 |     print("sampled model parameters: {}".format(parameters))
136 | 
137 |     for images, target in metric_logger.log_every(data_loader, 10, header):
138 |         images = images.to(device, non_blocking=True)
139 |         target = target.to(device, non_blocking=True)
140 |         # compute output
141 |         if amp:
142 |             with torch.cuda.amp.autocast():
143 |                 output = model(images)
144 |                 loss = criterion(output, target)
145 |         else:
146 |             output = model(images)
147 |             loss = criterion(output, target)
148 | 
149 |         acc1, acc5 = accuracy(output, target, topk=(1, 5))
150 | 
151 |         batch_size = images.shape[0]
152 |         metric_logger.update(loss=loss.item())
153 |         metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
154 |         metric_logger.meters['acc5'].update(acc5.item(), n=batch_size)
155 |     # gather the stats from all processes
156 |     metric_logger.synchronize_between_processes()
157 |     print('* Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f} loss {losses.global_avg:.3f}'
158 |           .format(top1=metric_logger.acc1, top5=metric_logger.acc5, losses=metric_logger.loss))
159 | 
160 |     return {k: meter.global_avg for k, meter in metric_logger.meters.items()}


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/z_supernet_engine.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import sys
  3 | from typing import Iterable, Optional
  4 | from timm.utils.model import unwrap_model
  5 | import torch
  6 | 
  7 | from timm.data import Mixup
  8 | from timm.utils import accuracy, ModelEma
  9 | from lib import utils
 10 | import random
 11 | import time
 12 | 
 13 | def sample_configs(choices):
 14 | 
 15 |     config = {}
 16 |     dimensions = ['mlp_ratio', 'num_heads']
 17 |     depth = random.choice(choices['depth'])
 18 |     for dimension in dimensions:
 19 |         config[dimension] = [random.choice(choices[dimension]) for _ in range(depth)]
 20 | 
 21 |     config['embed_dim'] = [random.choice(choices['embed_dim'])]*depth
 22 | 
 23 |     config['layer_num'] = depth
 24 |     return config
 25 | 
 26 | def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module,
 27 |                     data_loader: Iterable, optimizer: torch.optim.Optimizer,
 28 |                     device: torch.device, epoch: int, loss_scaler, max_norm: float = 0,
 29 |                     model_ema: Optional[ModelEma] = None, mixup_fn: Optional[Mixup] = None,
 30 |                     amp: bool = True, teacher_model: torch.nn.Module = None,
 31 |                     teach_loss: torch.nn.Module = None, choices=None, mode='super', retrain_config=None):
 32 |     model.train()
 33 |     criterion.train()
 34 | 
 35 |     # set random seed
 36 |     random.seed(epoch)
 37 | 
 38 |     metric_logger = utils.MetricLogger(delimiter="  ")
 39 |     metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
 40 |     header = 'Epoch: [{}]'.format(epoch)
 41 |     print_freq = 10
 42 |     if mode == 'retrain':
 43 |         config = retrain_config
 44 |         model_module = unwrap_model(model)
 45 |         print(config)
 46 |         model_module.set_sample_config(config=config)
 47 |         print(model_module.get_sampled_params_numel(config))
 48 | 
 49 |     for samples, targets in metric_logger.log_every(data_loader, print_freq, header):
 50 |         samples = samples.to(device, non_blocking=True)
 51 |         targets = targets.to(device, non_blocking=True)
 52 | 
 53 |         # sample random config
 54 |         if mode == 'super':
 55 |             config = sample_configs(choices=choices)
 56 |             model_module = unwrap_model(model)
 57 |             model_module.set_sample_config(config=config)
 58 |         elif mode == 'retrain':
 59 |             config = retrain_config
 60 |             model_module = unwrap_model(model)
 61 |             model_module.set_sample_config(config=config)
 62 |         if mixup_fn is not None:
 63 |             samples, targets = mixup_fn(samples, targets)
 64 |         if amp:
 65 |             with torch.cuda.amp.autocast():
 66 |                 if teacher_model:
 67 |                     with torch.no_grad():
 68 |                         teach_output = teacher_model(samples)
 69 |                     _, teacher_label = teach_output.topk(1, 1, True, True)
 70 |                     outputs = model(samples)
 71 |                     loss = 1/2 * criterion(outputs, targets) + 1/2 * teach_loss(outputs, teacher_label.squeeze())
 72 |                 else:
 73 |                     outputs = model(samples)
 74 |                     loss = criterion(outputs, targets)
 75 |         else:
 76 |             outputs = model(samples)
 77 |             if teacher_model:
 78 |                 with torch.no_grad():
 79 |                     teach_output = teacher_model(samples)
 80 |                 _, teacher_label = teach_output.topk(1, 1, True, True)
 81 |                 loss = 1 / 2 * criterion(outputs, targets) + 1 / 2 * teach_loss(outputs, teacher_label.squeeze())
 82 |             else:
 83 |                 loss = criterion(outputs, targets)
 84 | 
 85 |         loss_value = loss.item()
 86 | 
 87 |         if not math.isfinite(loss_value):
 88 |             print("Loss is {}, stopping training".format(loss_value))
 89 |             sys.exit(1)
 90 | 
 91 |         optimizer.zero_grad()
 92 | 
 93 |         # this attribute is added by timm on one optimizer (adahessian)
 94 |         if amp:
 95 |             is_second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order
 96 |             loss_scaler(loss, optimizer, clip_grad=max_norm,
 97 |                     parameters=model.parameters(), create_graph=is_second_order)
 98 |         else:
 99 |             loss.backward()
100 |             optimizer.step()
101 | 
102 |         torch.cuda.synchronize()
103 |         if model_ema is not None:
104 |             model_ema.update(model)
105 | 
106 |         metric_logger.update(loss=loss_value)
107 |         metric_logger.update(lr=optimizer.param_groups[0]["lr"])
108 | 
109 |     # gather the stats from all processes
110 |     metric_logger.synchronize_between_processes()
111 |     print("Averaged stats:", metric_logger)
112 |     return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
113 | 
114 | @torch.no_grad()
115 | def evaluate(data_loader, model, device, amp=True, choices=None, mode='super', retrain_config=None):
116 |     criterion = torch.nn.CrossEntropyLoss()
117 | 
118 |     metric_logger = utils.MetricLogger(delimiter="  ")
119 |     header = 'Test:'
120 | 
121 |     # switch to evaluation mode
122 |     model.eval()
123 |     if mode == 'super':
124 |         config = sample_configs(choices=choices)
125 |         model_module = unwrap_model(model)
126 |         model_module.set_sample_config(config=config)
127 |     else:
128 |         config = retrain_config
129 |         model_module = unwrap_model(model)
130 |         model_module.set_sample_config(config=config)
131 | 
132 | 
133 |     print("sampled model config: {}".format(config))
134 |     parameters = model_module.get_sampled_params_numel(config)
135 |     print("sampled model parameters: {}".format(parameters))
136 | 
137 |     for images, target in metric_logger.log_every(data_loader, 10, header):
138 |         images = images.to(device, non_blocking=True)
139 |         target = target.to(device, non_blocking=True)
140 |         # compute output
141 |         if amp:
142 |             with torch.cuda.amp.autocast():
143 |                 output = model(images)
144 |                 loss = criterion(output, target)
145 |         else:
146 |             output = model(images)
147 |             loss = criterion(output, target)
148 | 
149 |         acc1, acc5 = accuracy(output, target, topk=(1, 5))
150 | 
151 |         batch_size = images.shape[0]
152 |         metric_logger.update(loss=loss.item())
153 |         metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
154 |         metric_logger.meters['acc5'].update(acc5.item(), n=batch_size)
155 |     # gather the stats from all processes
156 |     metric_logger.synchronize_between_processes()
157 |     print('* Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f} loss {losses.global_avg:.3f}'
158 |           .format(top1=metric_logger.acc1, top5=metric_logger.acc5, losses=metric_logger.loss))
159 | 
160 |     return {k: meter.global_avg for k, meter in metric_logger.meters.items()}


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/supernet_engine_base.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import sys
  3 | from typing import Iterable, Optional
  4 | from timm.utils.model import unwrap_model
  5 | import torch
  6 | 
  7 | from timm.data import Mixup
  8 | from timm.utils import accuracy, ModelEma
  9 | from lib import utils
 10 | import random
 11 | import time
 12 | 
 13 | def sample_configs(choices):
 14 | 
 15 |     config = {}
 16 |     dimensions = ['mlp_ratio', 'num_heads']
 17 |     depth = random.choice(choices['depth'])
 18 |     for dimension in dimensions:
 19 |         config[dimension] = [random.choice(choices[dimension]) for _ in range(depth)]
 20 | 
 21 |     config['embed_dim'] = [random.choice(choices['embed_dim'])]*depth
 22 | 
 23 |     config['layer_num'] = depth
 24 |     return config
 25 | 
 26 | def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module,
 27 |                     data_loader: Iterable, optimizer: torch.optim.Optimizer,
 28 |                     device: torch.device, epoch: int, loss_scaler, max_norm: float = 0,
 29 |                     model_ema: Optional[ModelEma] = None, mixup_fn: Optional[Mixup] = None,
 30 |                     amp: bool = True, teacher_model: torch.nn.Module = None,
 31 |                     teach_loss: torch.nn.Module = None, choices=None, mode='super', retrain_config=None):
 32 |     model.train()
 33 |     criterion.train()
 34 | 
 35 |     # set random seed
 36 |     random.seed(epoch)
 37 | 
 38 |     metric_logger = utils.MetricLogger(delimiter="  ")
 39 |     metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
 40 |     header = 'Epoch: [{}]'.format(epoch)
 41 |     print_freq = 10
 42 |     if mode == 'retrain':
 43 |         config = retrain_config
 44 |         model_module = unwrap_model(model)
 45 |         print(config)
 46 |         model_module.set_sample_config(config=config)
 47 |         print(model_module.get_sampled_params_numel(config))
 48 | 
 49 |     for samples, targets in metric_logger.log_every(data_loader, print_freq, header):
 50 |         samples = samples.to(device, non_blocking=True)
 51 |         targets = targets.to(device, non_blocking=True)
 52 | 
 53 |         # sample random config
 54 |         if mode == 'super':
 55 |             config = sample_configs(choices=choices)
 56 |             model_module = unwrap_model(model)
 57 |             model_module.set_sample_config(config=config)
 58 |         elif mode == 'retrain':
 59 |             config = retrain_config
 60 |             model_module = unwrap_model(model)
 61 |             model_module.set_sample_config(config=config)
 62 |         if mixup_fn is not None:
 63 |             samples, targets = mixup_fn(samples, targets)
 64 |         if amp:
 65 |             with torch.cuda.amp.autocast():
 66 |                 if teacher_model:
 67 |                     with torch.no_grad():
 68 |                         teach_output = teacher_model(samples)
 69 |                     _, teacher_label = teach_output.topk(1, 1, True, True)
 70 |                     outputs = model(samples)
 71 |                     loss = 1/2 * criterion(outputs, targets) + 1/2 * teach_loss(outputs, teacher_label.squeeze())
 72 |                 else:
 73 |                     outputs = model(samples)
 74 |                     loss = criterion(outputs, targets)
 75 |         else:
 76 |             outputs = model(samples)
 77 |             if teacher_model:
 78 |                 with torch.no_grad():
 79 |                     teach_output = teacher_model(samples)
 80 |                 _, teacher_label = teach_output.topk(1, 1, True, True)
 81 |                 loss = 1 / 2 * criterion(outputs, targets) + 1 / 2 * teach_loss(outputs, teacher_label.squeeze())
 82 |             else:
 83 |                 loss = criterion(outputs, targets)
 84 | 
 85 |         loss_value = loss.item()
 86 | 
 87 |         if not math.isfinite(loss_value):
 88 |             print("Loss is {}, stopping training".format(loss_value))
 89 |             sys.exit(1)
 90 | 
 91 |         optimizer.zero_grad()
 92 | 
 93 |         # this attribute is added by timm on one optimizer (adahessian)
 94 |         if amp:
 95 |             is_second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order
 96 |             loss_scaler(loss, optimizer, clip_grad=max_norm,
 97 |                     parameters=model.parameters(), create_graph=is_second_order)
 98 |         else:
 99 |             loss.backward()
100 |             optimizer.step()
101 | 
102 |         torch.cuda.synchronize()
103 |         if model_ema is not None:
104 |             model_ema.update(model)
105 | 
106 |         metric_logger.update(loss=loss_value)
107 |         metric_logger.update(lr=optimizer.param_groups[0]["lr"])
108 | 
109 |     # gather the stats from all processes
110 |     metric_logger.synchronize_between_processes()
111 |     print("Averaged stats:", metric_logger)
112 |     return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
113 | 
114 | @torch.no_grad()
115 | def evaluate(data_loader, model, device, amp=True, choices=None, mode='super', retrain_config=None):
116 |     criterion = torch.nn.CrossEntropyLoss()
117 | 
118 |     metric_logger = utils.MetricLogger(delimiter="  ")
119 |     header = 'Test:'
120 | 
121 |     # switch to evaluation mode
122 |     model.eval()
123 |     if mode == 'super':
124 |         config = sample_configs(choices=choices)
125 |         model_module = unwrap_model(model)
126 |         model_module.set_sample_config(config=config)
127 |     else:
128 |         config = retrain_config
129 |         model_module = unwrap_model(model)
130 |         model_module.set_sample_config(config=config)
131 | 
132 | 
133 |     print("sampled model config: {}".format(config))
134 |     parameters = model_module.get_sampled_params_numel(config)
135 |     print("sampled model parameters: {}".format(parameters))
136 | 
137 |     for images, target in metric_logger.log_every(data_loader, 10, header):
138 |         images = images.to(device, non_blocking=True)
139 |         target = target.to(device, non_blocking=True)
140 |         # compute output
141 |         if amp:
142 |             with torch.cuda.amp.autocast():
143 |                 output = model(images)
144 |                 loss = criterion(output, target)
145 |         else:
146 |             output = model(images)
147 |             loss = criterion(output, target)
148 | 
149 |         acc1, acc5 = accuracy(output, target, topk=(1, 5))
150 | 
151 |         batch_size = images.shape[0]
152 |         metric_logger.update(loss=loss.item())
153 |         metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
154 |         metric_logger.meters['acc5'].update(acc5.item(), n=batch_size)
155 |     # gather the stats from all processes
156 |     metric_logger.synchronize_between_processes()
157 |     print('* Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f} loss {losses.global_avg:.3f}'
158 |           .format(top1=metric_logger.acc1, top5=metric_logger.acc5, losses=metric_logger.loss))
159 | 
160 |     return {k: meter.global_avg for k, meter in metric_logger.meters.items()}


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/supernet_engine_real_original.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import sys
  3 | from typing import Iterable, Optional
  4 | from timm.utils.model import unwrap_model
  5 | import torch
  6 | 
  7 | from timm.data import Mixup
  8 | from timm.utils import accuracy, ModelEma
  9 | from lib import utils
 10 | import random
 11 | import time
 12 | 
 13 | def sample_configs(choices):
 14 | 
 15 |     config = {}
 16 |     dimensions = ['mlp_ratio', 'num_heads']
 17 |     depth = random.choice(choices['depth'])
 18 |     for dimension in dimensions:
 19 |         config[dimension] = [random.choice(choices[dimension]) for _ in range(depth)]
 20 | 
 21 |     config['embed_dim'] = [random.choice(choices['embed_dim'])]*depth
 22 | 
 23 |     config['layer_num'] = depth
 24 |     return config
 25 | 
 26 | def train_one_epoch_original(model: torch.nn.Module, criterion: torch.nn.Module,
 27 |                     data_loader: Iterable, optimizer: torch.optim.Optimizer,
 28 |                     device: torch.device, epoch: int, loss_scaler, max_norm: float = 0,
 29 |                     model_ema: Optional[ModelEma] = None, mixup_fn: Optional[Mixup] = None,
 30 |                     amp: bool = True, teacher_model: torch.nn.Module = None,
 31 |                     teach_loss: torch.nn.Module = None, choices=None, mode='super', retrain_config=None):
 32 |     model.train()
 33 |     criterion.train()
 34 | 
 35 |     # set random seed
 36 |     random.seed(epoch)
 37 | 
 38 |     metric_logger = utils.MetricLogger(delimiter="  ")
 39 |     metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
 40 |     header = 'Epoch: [{}]'.format(epoch)
 41 |     print_freq = 10
 42 |     if mode == 'retrain':
 43 |         config = retrain_config
 44 |         model_module = unwrap_model(model)
 45 |         print(config)
 46 |         model_module.set_sample_config(config=config)
 47 |         print(model_module.get_sampled_params_numel(config))
 48 | 
 49 |     for samples, targets in metric_logger.log_every(data_loader, print_freq, header):
 50 |         samples = samples.to(device, non_blocking=True)
 51 |         targets = targets.to(device, non_blocking=True)
 52 | 
 53 |         # sample random config
 54 |         if mode == 'super':
 55 |             config = sample_configs(choices=choices)
 56 |             model_module = unwrap_model(model)
 57 |             model_module.set_sample_config(config=config)
 58 |         elif mode == 'retrain':
 59 |             config = retrain_config
 60 |             model_module = unwrap_model(model)
 61 |             model_module.set_sample_config(config=config)
 62 |         if mixup_fn is not None:
 63 |             samples, targets = mixup_fn(samples, targets)
 64 |         if amp:
 65 |             with torch.cuda.amp.autocast():
 66 |                 if teacher_model:
 67 |                     with torch.no_grad():
 68 |                         teach_output = teacher_model(samples)
 69 |                     _, teacher_label = teach_output.topk(1, 1, True, True)
 70 |                     outputs = model(samples)
 71 |                     loss = 1/2 * criterion(outputs, targets) + 1/2 * teach_loss(outputs, teacher_label.squeeze())
 72 |                 else:
 73 |                     outputs = model(samples)
 74 |                     loss = criterion(outputs, targets)
 75 |         else:
 76 |             outputs = model(samples)
 77 |             if teacher_model:
 78 |                 with torch.no_grad():
 79 |                     teach_output = teacher_model(samples)
 80 |                 _, teacher_label = teach_output.topk(1, 1, True, True)
 81 |                 loss = 1 / 2 * criterion(outputs, targets) + 1 / 2 * teach_loss(outputs, teacher_label.squeeze())
 82 |             else:
 83 |                 loss = criterion(outputs, targets)
 84 | 
 85 |         loss_value = loss.item()
 86 | 
 87 |         if not math.isfinite(loss_value):
 88 |             print("Loss is {}, stopping training".format(loss_value))
 89 |             sys.exit(1)
 90 | 
 91 |         optimizer.zero_grad()
 92 | 
 93 |         # this attribute is added by timm on one optimizer (adahessian)
 94 |         if amp:
 95 |             is_second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order
 96 |             loss_scaler(loss, optimizer, clip_grad=max_norm,
 97 |                     parameters=model.parameters(), create_graph=is_second_order)
 98 |         else:
 99 |             loss.backward()
100 |             optimizer.step()
101 | 
102 |         torch.cuda.synchronize()
103 |         if model_ema is not None:
104 |             model_ema.update(model)
105 | 
106 |         metric_logger.update(loss=loss_value)
107 |         metric_logger.update(lr=optimizer.param_groups[0]["lr"])
108 | 
109 |     # gather the stats from all processes
110 |     metric_logger.synchronize_between_processes()
111 |     print("Averaged stats:", metric_logger)
112 |     return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
113 | 
114 | @torch.no_grad()
115 | def evaluate_original(data_loader, model, device, amp=True, choices=None, mode='super', retrain_config=None):
116 |     criterion = torch.nn.CrossEntropyLoss()
117 | 
118 |     metric_logger = utils.MetricLogger(delimiter="  ")
119 |     header = 'Test:'
120 | 
121 |     # switch to evaluation mode
122 |     model.eval()
123 |     if mode == 'super':
124 |         config = sample_configs(choices=choices)
125 |         model_module = unwrap_model(model)
126 |         model_module.set_sample_config(config=config)
127 |     else:
128 |         config = retrain_config
129 |         model_module = unwrap_model(model)
130 |         model_module.set_sample_config(config=config)
131 | 
132 | 
133 |     print("sampled model config: {}".format(config))
134 |     parameters = model_module.get_sampled_params_numel(config)
135 |     print("sampled model parameters: {}".format(parameters))
136 | 
137 |     for images, target in metric_logger.log_every(data_loader, 10, header):
138 |         images = images.to(device, non_blocking=True)
139 |         target = target.to(device, non_blocking=True)
140 |         # compute output
141 |         if amp:
142 |             with torch.cuda.amp.autocast():
143 |                 output = model(images)
144 |                 loss = criterion(output, target)
145 |         else:
146 |             output = model(images)
147 |             loss = criterion(output, target)
148 | 
149 |         acc1, acc5 = accuracy(output, target, topk=(1, 5))
150 | 
151 |         batch_size = images.shape[0]
152 |         metric_logger.update(loss=loss.item())
153 |         metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
154 |         metric_logger.meters['acc5'].update(acc5.item(), n=batch_size)
155 |     # gather the stats from all processes
156 |     metric_logger.synchronize_between_processes()
157 |     print('* Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f} loss {losses.global_avg:.3f}'
158 |           .format(top1=metric_logger.acc1, top5=metric_logger.acc5, losses=metric_logger.loss))
159 | 
160 |     return {k: meter.global_avg for k, meter in metric_logger.meters.items()}


--------------------------------------------------------------------------------
/AutoFormer_original_greedy/supernet_engine_only_supernet.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import sys
  3 | from typing import Iterable, Optional
  4 | from timm.utils.model import unwrap_model
  5 | import torch
  6 | 
  7 | from timm.data import Mixup
  8 | from timm.utils import accuracy, ModelEma
  9 | from lib import utils
 10 | import random
 11 | import time
 12 | 
 13 | def sample_configs(choices):
 14 | 
 15 |     config = {}
 16 |     dimensions = ['mlp_ratio', 'num_heads']
 17 |     depth = random.choice(choices['depth'])
 18 |     for dimension in dimensions:
 19 |         config[dimension] = [random.choice(choices[dimension]) for _ in range(depth)]
 20 | 
 21 |     config['embed_dim'] = [random.choice(choices['embed_dim'])]*depth
 22 | 
 23 |     config['layer_num'] = depth
 24 |     return config
 25 | 
 26 | def train_one_epoch_original(model: torch.nn.Module, criterion: torch.nn.Module,
 27 |                     data_loader: Iterable, optimizer: torch.optim.Optimizer,
 28 |                     device: torch.device, epoch: int, loss_scaler, max_norm: float = 0,
 29 |                     model_ema: Optional[ModelEma] = None, mixup_fn: Optional[Mixup] = None,
 30 |                     amp: bool = True, teacher_model: torch.nn.Module = None,
 31 |                     teach_loss: torch.nn.Module = None, choices=None, mode='super', retrain_config=None):
 32 |     model.train()
 33 |     criterion.train()
 34 | 
 35 |     # set random seed
 36 |     random.seed(epoch)
 37 | 
 38 |     metric_logger = utils.MetricLogger(delimiter="  ")
 39 |     metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
 40 |     header = 'Epoch: [{}]'.format(epoch)
 41 |     print_freq = 10
 42 |     if mode == 'retrain':
 43 |         config = retrain_config
 44 |         model_module = unwrap_model(model)
 45 |         print(config)
 46 |         model_module.set_sample_config(config=config)
 47 |         print(model_module.get_sampled_params_numel(config))
 48 | 
 49 |     for samples, targets in metric_logger.log_every(data_loader, print_freq, header):
 50 |         samples = samples.to(device, non_blocking=True)
 51 |         targets = targets.to(device, non_blocking=True)
 52 | 
 53 |         # sample random config
 54 |         if mode == 'super':
 55 |             config = {
 56 |                 'layer_num': 14,
 57 |                 'mlp_ratio': [4.0] * 14,
 58 |                 'num_heads': [4] * 14,
 59 |                 'embed_dim': [240] * 14
 60 |             }
 61 |             # config = sample_configs(choices=choices)
 62 |             model_module = unwrap_model(model)
 63 |             model_module.set_sample_config(config=config)
 64 |         elif mode == 'retrain':
 65 |             config = retrain_config
 66 |             model_module = unwrap_model(model)
 67 |             model_module.set_sample_config(config=config)
 68 |         if mixup_fn is not None:
 69 |             samples, targets = mixup_fn(samples, targets)
 70 |         if amp:
 71 |             with torch.cuda.amp.autocast():
 72 |                 if teacher_model:
 73 |                     with torch.no_grad():
 74 |                         teach_output = teacher_model(samples)
 75 |                     _, teacher_label = teach_output.topk(1, 1, True, True)
 76 |                     outputs = model(samples)
 77 |                     loss = 1/2 * criterion(outputs, targets) + 1/2 * teach_loss(outputs, teacher_label.squeeze())
 78 |                 else:
 79 |                     outputs = model(samples)
 80 |                     loss = criterion(outputs, targets)
 81 |         else:
 82 |             outputs = model(samples)
 83 |             if teacher_model:
 84 |                 with torch.no_grad():
 85 |                     teach_output = teacher_model(samples)
 86 |                 _, teacher_label = teach_output.topk(1, 1, True, True)
 87 |                 loss = 1 / 2 * criterion(outputs, targets) + 1 / 2 * teach_loss(outputs, teacher_label.squeeze())
 88 |             else:
 89 |                 loss = criterion(outputs, targets)
 90 | 
 91 |         loss_value = loss.item()
 92 | 
 93 |         if not math.isfinite(loss_value):
 94 |             print("Loss is {}, stopping training".format(loss_value))
 95 |             sys.exit(1)
 96 | 
 97 |         optimizer.zero_grad()
 98 | 
 99 |         # this attribute is added by timm on one optimizer (adahessian)
100 |         if amp:
101 |             is_second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order
102 |             loss_scaler(loss, optimizer, clip_grad=max_norm,
103 |                     parameters=model.parameters(), create_graph=is_second_order)
104 |         else:
105 |             loss.backward()
106 |             optimizer.step()
107 | 
108 |         torch.cuda.synchronize()
109 |         if model_ema is not None:
110 |             model_ema.update(model)
111 | 
112 |         metric_logger.update(loss=loss_value)
113 |         metric_logger.update(lr=optimizer.param_groups[0]["lr"])
114 | 
115 |     # gather the stats from all processes
116 |     metric_logger.synchronize_between_processes()
117 |     print("Averaged stats:", metric_logger)
118 |     return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
119 | 
120 | @torch.no_grad()
121 | def evaluate_original(data_loader, model, device, amp=True, choices=None, mode='super', retrain_config=None):
122 |     criterion = torch.nn.CrossEntropyLoss()
123 | 
124 |     metric_logger = utils.MetricLogger(delimiter="  ")
125 |     header = 'Test:'
126 | 
127 |     # switch to evaluation mode
128 |     model.eval()
129 |     if mode == 'super':
130 |         config = sample_configs(choices=choices)
131 |         model_module = unwrap_model(model)
132 |         model_module.set_sample_config(config=config)
133 |     else:
134 |         config = retrain_config
135 |         model_module = unwrap_model(model)
136 |         model_module.set_sample_config(config=config)
137 | 
138 | 
139 |     print("sampled model config: {}".format(config))
140 |     parameters = model_module.get_sampled_params_numel(config)
141 |     print("sampled model parameters: {}".format(parameters))
142 | 
143 |     for images, target in metric_logger.log_every(data_loader, 10, header):
144 |         images = images.to(device, non_blocking=True)
145 |         target = target.to(device, non_blocking=True)
146 |         # compute output
147 |         if amp:
148 |             with torch.cuda.amp.autocast():
149 |                 output = model(images)
150 |                 loss = criterion(output, target)
151 |         else:
152 |             output = model(images)
153 |             loss = criterion(output, target)
154 | 
155 |         acc1, acc5 = accuracy(output, target, topk=(1, 5))
156 | 
157 |         batch_size = images.shape[0]
158 |         metric_logger.update(loss=loss.item())
159 |         metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
160 |         metric_logger.meters['acc5'].update(acc5.item(), n=batch_size)
161 |     # gather the stats from all processes
162 |     metric_logger.synchronize_between_processes()
163 |     print('* Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f} loss {losses.global_avg:.3f}'
164 |           .format(top1=metric_logger.acc1, top5=metric_logger.acc5, losses=metric_logger.loss))
165 | 
166 |     return {k: meter.global_avg for k, meter in metric_logger.meters.items()}


--------------------------------------------------------------------------------
/AutoFormer/lib/utils.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | import os
  3 | import time
  4 | from collections import defaultdict, deque
  5 | import datetime
  6 | 
  7 | import torch
  8 | import torch.distributed as dist
  9 | 
 10 | 
 11 | class SmoothedValue(object):
 12 |     """Track a series of values and provide access to smoothed values over a
 13 |     window or the global series average.
 14 |     """
 15 | 
 16 |     def __init__(self, window_size=20, fmt=None):
 17 |         if fmt is None:
 18 |             fmt = "{median:.4f} ({global_avg:.4f})"
 19 |         self.deque = deque(maxlen=window_size)
 20 |         self.total = 0.0
 21 |         self.count = 0
 22 |         self.fmt = fmt
 23 | 
 24 |     def update(self, value, n=1):
 25 |         self.deque.append(value)
 26 |         self.count += n
 27 |         self.total += value * n
 28 | 
 29 |     def synchronize_between_processes(self):
 30 |         """
 31 |         Warning: does not synchronize the deque!
 32 |         """
 33 |         if not is_dist_avail_and_initialized():
 34 |             return
 35 |         t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
 36 |         dist.barrier()
 37 |         dist.all_reduce(t)
 38 |         t = t.tolist()
 39 |         self.count = int(t[0])
 40 |         self.total = t[1]
 41 | 
 42 |     @property
 43 |     def median(self):
 44 |         d = torch.tensor(list(self.deque))
 45 |         return d.median().item()
 46 | 
 47 |     @property
 48 |     def avg(self):
 49 |         d = torch.tensor(list(self.deque), dtype=torch.float32)
 50 |         return d.mean().item()
 51 | 
 52 |     @property
 53 |     def global_avg(self):
 54 |         return self.total / self.count
 55 | 
 56 |     @property
 57 |     def max(self):
 58 |         return max(self.deque)
 59 | 
 60 |     @property
 61 |     def value(self):
 62 |         return self.deque[-1]
 63 | 
 64 |     def __str__(self):
 65 |         return self.fmt.format(
 66 |             median=self.median,
 67 |             avg=self.avg,
 68 |             global_avg=self.global_avg,
 69 |             max=self.max,
 70 |             value=self.value)
 71 | 
 72 | 
 73 | class MetricLogger(object):
 74 |     def __init__(self, delimiter="\t"):
 75 |         self.meters = defaultdict(SmoothedValue)
 76 |         self.delimiter = delimiter
 77 | 
 78 |     def update(self, **kwargs):
 79 |         for k, v in kwargs.items():
 80 |             if isinstance(v, torch.Tensor):
 81 |                 v = v.item()
 82 |             assert isinstance(v, (float, int))
 83 |             self.meters[k].update(v)
 84 | 
 85 |     def __getattr__(self, attr):
 86 |         if attr in self.meters:
 87 |             return self.meters[attr]
 88 |         if attr in self.__dict__:
 89 |             return self.__dict__[attr]
 90 |         raise AttributeError("'{}' object has no attribute '{}'".format(
 91 |             type(self).__name__, attr))
 92 | 
 93 |     def __str__(self):
 94 |         loss_str = []
 95 |         for name, meter in self.meters.items():
 96 |             loss_str.append(
 97 |                 "{}: {}".format(name, str(meter))
 98 |             )
 99 |         return self.delimiter.join(loss_str)
100 | 
101 |     def synchronize_between_processes(self):
102 |         for meter in self.meters.values():
103 |             meter.synchronize_between_processes()
104 | 
105 |     def add_meter(self, name, meter):
106 |         self.meters[name] = meter
107 | 
108 |     def log_every(self, iterable, print_freq, header=None):
109 |         i = 0
110 |         if not header:
111 |             header = ''
112 |         start_time = time.time()
113 |         end = time.time()
114 |         iter_time = SmoothedValue(fmt='{avg:.4f}')
115 |         data_time = SmoothedValue(fmt='{avg:.4f}')
116 |         space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
117 |         log_msg = [
118 |             header,
119 |             '[{0' + space_fmt + '}/{1}]',
120 |             'eta: {eta}',
121 |             '{meters}',
122 |             'time: {time}',
123 |             'data: {data}'
124 |         ]
125 |         if torch.cuda.is_available():
126 |             log_msg.append('max mem: {memory:.0f}')
127 |         log_msg = self.delimiter.join(log_msg)
128 |         MB = 1024.0 * 1024.0
129 |         for obj in iterable:
130 |             data_time.update(time.time() - end)
131 |             yield obj
132 |             iter_time.update(time.time() - end)
133 |             if i % print_freq == 0 or i == len(iterable) - 1:
134 |                 eta_seconds = iter_time.global_avg * (len(iterable) - i)
135 |                 eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
136 |                 if torch.cuda.is_available():
137 |                     print(log_msg.format(
138 |                         i, len(iterable), eta=eta_string,
139 |                         meters=str(self),
140 |                         time=str(iter_time), data=str(data_time),
141 |                         memory=torch.cuda.max_memory_allocated() / MB))
142 |                 else:
143 |                     print(log_msg.format(
144 |                         i, len(iterable), eta=eta_string,
145 |                         meters=str(self),
146 |                         time=str(iter_time), data=str(data_time)))
147 |             i += 1
148 |             end = time.time()
149 |         total_time = time.time() - start_time
150 |         total_time_str = str(datetime.timedelta(seconds=int(total_time)))
151 |         print('{} Total time: {} ({:.4f} s / it)'.format(
152 |             header, total_time_str, total_time / len(iterable)))
153 | 
154 | 
155 | def _load_checkpoint_for_ema(model_ema, checkpoint):
156 |     """
157 |     Workaround for ModelEma._load_checkpoint to accept an already-loaded object
158 |     """
159 |     mem_file = io.BytesIO()
160 |     torch.save(checkpoint, mem_file)
161 |     mem_file.seek(0)
162 |     model_ema._load_checkpoint(mem_file)
163 | 
164 | 
165 | def setup_for_distributed(is_master):
166 |     """
167 |     This function disables printing when not in master process
168 |     """
169 |     import builtins as __builtin__
170 |     builtin_print = __builtin__.print
171 | 
172 |     def print(*args, **kwargs):
173 |         force = kwargs.pop('force', False)
174 |         if is_master or force:
175 |             builtin_print(*args, **kwargs)
176 | 
177 |     __builtin__.print = print
178 | 
179 | 
180 | def is_dist_avail_and_initialized():
181 |     if not dist.is_available():
182 |         return False
183 |     if not dist.is_initialized():
184 |         return False
185 |     return True
186 | 
187 | 
188 | def get_world_size():
189 |     if not is_dist_avail_and_initialized():
190 |         return 1
191 |     return dist.get_world_size()
192 | 
193 | 
194 | def get_rank():
195 |     if not is_dist_avail_and_initialized():
196 |         return 0
197 |     return dist.get_rank()
198 | 
199 | 
200 | def is_main_process():
201 |     return get_rank() == 0
202 | 
203 | 
204 | def save_on_master(*args, **kwargs):
205 |     if is_main_process():
206 |         torch.save(*args, **kwargs)
207 | 
208 | 
209 | def init_distributed_mode(args):
210 |     if 'OMPI_COMM_WORLD_RANK' in os.environ:
211 |         args.rank = int(os.environ.get('OMPI_COMM_WORLD_RANK'))
212 |         args.world_size = int(os.environ.get('OMPI_COMM_WORLD_SIZE'))
213 |         args.gpu = args.rank % torch.cuda.device_count()
214 |     elif 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
215 |         args.rank = int(os.environ["RANK"])
216 |         args.world_size = int(os.environ['WORLD_SIZE'])
217 |         args.gpu = int(os.environ['LOCAL_RANK'])
218 |     elif 'SLURM_PROCID' in os.environ:
219 |         args.rank = int(os.environ['SLURM_PROCID'])
220 |         args.gpu = args.rank % torch.cuda.device_count()
221 |     else:
222 |         print('Not using distributed mode')
223 |         args.distributed = False
224 |         return
225 | 
226 |     args.distributed = True
227 | 
228 |     torch.cuda.set_device(args.gpu)
229 |     args.dist_backend = 'nccl'
230 |     print('| distributed init (rank {}): {}'.format(
231 |         args.rank, args.dist_url), flush=True)
232 |     torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
233 |                                          world_size=args.world_size, rank=args.rank)
234 |     torch.distributed.barrier()
235 |     setup_for_distributed(args.rank == 0)
236 | 


--------------------------------------------------------------------------------
/AutoFormer_original/lib/utils.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | import os
  3 | import time
  4 | from collections import defaultdict, deque
  5 | import datetime
  6 | 
  7 | import torch
  8 | import torch.distributed as dist
  9 | 
 10 | 
 11 | class SmoothedValue(object):
 12 |     """Track a series of values and provide access to smoothed values over a
 13 |     window or the global series average.
 14 |     """
 15 | 
 16 |     def __init__(self, window_size=20, fmt=None):
 17 |         if fmt is None:
 18 |             fmt = "{median:.4f} ({global_avg:.4f})"
 19 |         self.deque = deque(maxlen=window_size)
 20 |         self.total = 0.0
 21 |         self.count = 0
 22 |         self.fmt = fmt
 23 | 
 24 |     def update(self, value, n=1):
 25 |         self.deque.append(value)
 26 |         self.count += n
 27 |         self.total += value * n
 28 | 
 29 |     def synchronize_between_processes(self):
 30 |         """
 31 |         Warning: does not synchronize the deque!
 32 |         """
 33 |         if not is_dist_avail_and_initialized():
 34 |             return
 35 |         t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
 36 |         dist.barrier()
 37 |         dist.all_reduce(t)
 38 |         t = t.tolist()
 39 |         self.count = int(t[0])
 40 |         self.total = t[1]
 41 | 
 42 |     @property
 43 |     def median(self):
 44 |         d = torch.tensor(list(self.deque))
 45 |         return d.median().item()
 46 | 
 47 |     @property
 48 |     def avg(self):
 49 |         d = torch.tensor(list(self.deque), dtype=torch.float32)
 50 |         return d.mean().item()
 51 | 
 52 |     @property
 53 |     def global_avg(self):
 54 |         return self.total / self.count
 55 | 
 56 |     @property
 57 |     def max(self):
 58 |         return max(self.deque)
 59 | 
 60 |     @property
 61 |     def value(self):
 62 |         return self.deque[-1]
 63 | 
 64 |     def __str__(self):
 65 |         return self.fmt.format(
 66 |             median=self.median,
 67 |             avg=self.avg,
 68 |             global_avg=self.global_avg,
 69 |             max=self.max,
 70 |             value=self.value)
 71 | 
 72 | 
 73 | class MetricLogger(object):
 74 |     def __init__(self, delimiter="\t"):
 75 |         self.meters = defaultdict(SmoothedValue)
 76 |         self.delimiter = delimiter
 77 | 
 78 |     def update(self, **kwargs):
 79 |         for k, v in kwargs.items():
 80 |             if isinstance(v, torch.Tensor):
 81 |                 v = v.item()
 82 |             assert isinstance(v, (float, int))
 83 |             self.meters[k].update(v)
 84 | 
 85 |     def __getattr__(self, attr):
 86 |         if attr in self.meters:
 87 |             return self.meters[attr]
 88 |         if attr in self.__dict__:
 89 |             return self.__dict__[attr]
 90 |         raise AttributeError("'{}' object has no attribute '{}'".format(
 91 |             type(self).__name__, attr))
 92 | 
 93 |     def __str__(self):
 94 |         loss_str = []
 95 |         for name, meter in self.meters.items():
 96 |             loss_str.append(
 97 |                 "{}: {}".format(name, str(meter))
 98 |             )
 99 |         return self.delimiter.join(loss_str)
100 | 
101 |     def synchronize_between_processes(self):
102 |         for meter in self.meters.values():
103 |             meter.synchronize_between_processes()
104 | 
105 |     def add_meter(self, name, meter):
106 |         self.meters[name] = meter
107 | 
108 |     def log_every(self, iterable, print_freq, header=None):
109 |         i = 0
110 |         if not header:
111 |             header = ''
112 |         start_time = time.time()
113 |         end = time.time()
114 |         iter_time = SmoothedValue(fmt='{avg:.4f}')
115 |         data_time = SmoothedValue(fmt='{avg:.4f}')
116 |         space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
117 |         log_msg = [
118 |             header,
119 |             '[{0' + space_fmt + '}/{1}]',
120 |             'eta: {eta}',
121 |             '{meters}',
122 |             'time: {time}',
123 |             'data: {data}'
124 |         ]
125 |         if torch.cuda.is_available():
126 |             log_msg.append('max mem: {memory:.0f}')
127 |         log_msg = self.delimiter.join(log_msg)
128 |         MB = 1024.0 * 1024.0
129 |         for obj in iterable:
130 |             data_time.update(time.time() - end)
131 |             yield obj
132 |             iter_time.update(time.time() - end)
133 |             if i % print_freq == 0 or i == len(iterable) - 1:
134 |                 eta_seconds = iter_time.global_avg * (len(iterable) - i)
135 |                 eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
136 |                 if torch.cuda.is_available():
137 |                     print(log_msg.format(
138 |                         i, len(iterable), eta=eta_string,
139 |                         meters=str(self),
140 |                         time=str(iter_time), data=str(data_time),
141 |                         memory=torch.cuda.max_memory_allocated() / MB))
142 |                 else:
143 |                     print(log_msg.format(
144 |                         i, len(iterable), eta=eta_string,
145 |                         meters=str(self),
146 |                         time=str(iter_time), data=str(data_time)))
147 |             i += 1
148 |             end = time.time()
149 |         total_time = time.time() - start_time
150 |         total_time_str = str(datetime.timedelta(seconds=int(total_time)))
151 |         print('{} Total time: {} ({:.4f} s / it)'.format(
152 |             header, total_time_str, total_time / len(iterable)))
153 | 
154 | 
155 | def _load_checkpoint_for_ema(model_ema, checkpoint):
156 |     """
157 |     Workaround for ModelEma._load_checkpoint to accept an already-loaded object
158 |     """
159 |     mem_file = io.BytesIO()
160 |     torch.save(checkpoint, mem_file)
161 |     mem_file.seek(0)
162 |     model_ema._load_checkpoint(mem_file)
163 | 
164 | 
165 | def setup_for_distributed(is_master):
166 |     """
167 |     This function disables printing when not in master process
168 |     """
169 |     import builtins as __builtin__
170 |     builtin_print = __builtin__.print
171 | 
172 |     def print(*args, **kwargs):
173 |         force = kwargs.pop('force', False)
174 |         if is_master or force:
175 |             builtin_print(*args, **kwargs)
176 | 
177 |     __builtin__.print = print
178 | 
179 | 
180 | def is_dist_avail_and_initialized():
181 |     if not dist.is_available():
182 |         return False
183 |     if not dist.is_initialized():
184 |         return False
185 |     return True
186 | 
187 | 
188 | def get_world_size():
189 |     if not is_dist_avail_and_initialized():
190 |         return 1
191 |     return dist.get_world_size()
192 | 
193 | 
194 | def get_rank():
195 |     if not is_dist_avail_and_initialized():
196 |         return 0
197 |     return dist.get_rank()
198 | 
199 | 
200 | def is_main_process():
201 |     return get_rank() == 0
202 | 
203 | 
204 | def save_on_master(*args, **kwargs):
205 |     if is_main_process():
206 |         torch.save(*args, **kwargs)
207 | 
208 | 
209 | def init_distributed_mode(args):
210 |     if 'OMPI_COMM_WORLD_RANK' in os.environ:
211 |         args.rank = int(os.environ.get('OMPI_COMM_WORLD_RANK'))
212 |         args.world_size = int(os.environ.get('OMPI_COMM_WORLD_SIZE'))
213 |         args.gpu = args.rank % torch.cuda.device_count()
214 |     elif 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
215 |         args.rank = int(os.environ["RANK"])
216 |         args.world_size = int(os.environ['WORLD_SIZE'])
217 |         args.gpu = int(os.environ['LOCAL_RANK'])
218 |     elif 'SLURM_PROCID' in os.environ:
219 |         args.rank = int(os.environ['SLURM_PROCID'])
220 |         args.gpu = args.rank % torch.cuda.device_count()
221 |     else:
222 |         print('Not using distributed mode')
223 |         args.distributed = False
224 |         return
225 | 
226 |     args.distributed = True
227 | 
228 |     torch.cuda.set_device(args.gpu)
229 |     args.dist_backend = 'nccl'
230 |     print('| distributed init (rank {}): {}'.format(
231 |         args.rank, args.dist_url), flush=True)
232 |     torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
233 |                                          world_size=args.world_size, rank=args.rank)
234 |     torch.distributed.barrier()
235 |     setup_for_distributed(args.rank == 0)
236 | 


--------------------------------------------------------------------------------