├── AutoFormer ├── model │ ├── module │ │ ├── __init__.py │ │ ├── layernorm_super.py │ │ ├── embedding_super.py │ │ ├── Linear_super.py │ │ └── qkv_super.py │ └── utils.py ├── .figure │ ├── ofa.png │ ├── overview.png │ └── performance.png ├── attn_map.xlsx ├── generate_subImagenet.sh ├── result │ ├── img.png │ ├── attn_map.png │ ├── cls_weight.png │ ├── img_resized.png │ └── img_resized_overlay.png ├── cls_attn_map.xlsx ├── requirements.txt ├── training_free │ ├── __init__.py │ ├── indicators │ │ ├── __init__.py │ │ ├── snip.py │ │ ├── NASWOT.py │ │ └── grasp.py │ └── compute_indicators.py ├── experiments │ ├── supernet │ │ ├── supernet-T.yaml │ │ ├── supernet-B.yaml │ │ └── supernet-S.yaml │ └── subnet │ │ ├── AutoFormer-T.yaml │ │ ├── AutoFormer-S.yaml │ │ └── AutoFormer-B.yaml ├── observe_supernet.sh ├── evolution_search.sh ├── lib │ ├── config.py │ ├── subImageNet.py │ ├── samplers.py │ ├── imagenet_withhold.py │ └── utils.py ├── config.yaml ├── top_k_parser.py ├── train_supernet.sh ├── tmp.py ├── performance_parser.py └── supernet_engine(save).py ├── AutoFormer_original ├── model │ ├── module │ │ ├── __init__.py │ │ ├── layernorm_super.py │ │ ├── embedding_super.py │ │ ├── Linear_super.py │ │ └── qkv_super.py │ └── utils.py ├── .figure │ ├── ofa.png │ ├── overview.png │ └── performance.png ├── requirements.txt ├── train_supernet.sh ├── experiments │ ├── supernet │ │ ├── supernet-T.yaml │ │ ├── supernet-B.yaml │ │ └── supernet-S.yaml │ └── subnet │ │ ├── AutoFormer-T.yaml │ │ ├── AutoFormer-S.yaml │ │ └── AutoFormer-B.yaml ├── lib │ ├── config.py │ ├── subImageNet.py │ ├── samplers.py │ ├── imagenet_withhold.py │ └── utils.py └── README.md ├── AutoFormer_original_greedy ├── model │ ├── module │ │ ├── __init__.py │ │ ├── layernorm_super.py │ │ ├── embedding_super.py │ │ ├── Linear_super.py │ │ └── qkv_super.py │ └── utils.py ├── .figure │ ├── ofa.png │ ├── overview.png │ └── performance.png ├── requirements.txt ├── experiments │ ├── supernet │ │ ├── supernet-T.yaml │ │ ├── supernet-B.yaml │ │ └── supernet-S.yaml │ └── subnet │ │ ├── AutoFormer-T.yaml │ │ ├── AutoFormer-S.yaml │ │ └── AutoFormer-B.yaml ├── train_supernet_base.sh ├── train_supernet_observation.sh ├── z_train_supernet.sh ├── lib │ ├── config.py │ ├── subImageNet.py │ ├── samplers.py │ └── imagenet_withhold.py ├── config.yaml ├── train_supernet_small.sh ├── config_prenas.yaml ├── train_supernet_only_supernet.sh ├── evolution_search copy.sh ├── evolution_search.sh ├── performance_parser.py ├── README.md ├── z_supernet_engine.py ├── supernet_engine_base.py ├── supernet_engine_real_original.py └── supernet_engine_only_supernet.py ├── .gitignore └── README.md /AutoFormer/model/module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /AutoFormer_original/model/module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /AutoFormer_original_greedy/model/module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /AutoFormer/.figure/ofa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer/.figure/ofa.png -------------------------------------------------------------------------------- /AutoFormer/attn_map.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer/attn_map.xlsx -------------------------------------------------------------------------------- /AutoFormer/generate_subImagenet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python3 ./lib/subImageNet.py --data-path '/data' 3 | 4 | 5 | -------------------------------------------------------------------------------- /AutoFormer/result/img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer/result/img.png -------------------------------------------------------------------------------- /AutoFormer/cls_attn_map.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer/cls_attn_map.xlsx -------------------------------------------------------------------------------- /AutoFormer/.figure/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer/.figure/overview.png -------------------------------------------------------------------------------- /AutoFormer/result/attn_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer/result/attn_map.png -------------------------------------------------------------------------------- /AutoFormer/result/cls_weight.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer/result/cls_weight.png -------------------------------------------------------------------------------- /AutoFormer/result/img_resized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer/result/img_resized.png -------------------------------------------------------------------------------- /AutoFormer/.figure/performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer/.figure/performance.png -------------------------------------------------------------------------------- /AutoFormer_original/.figure/ofa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer_original/.figure/ofa.png -------------------------------------------------------------------------------- /AutoFormer/result/img_resized_overlay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer/result/img_resized_overlay.png -------------------------------------------------------------------------------- /AutoFormer_original/.figure/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer_original/.figure/overview.png -------------------------------------------------------------------------------- /AutoFormer_original/.figure/performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer_original/.figure/performance.png -------------------------------------------------------------------------------- /AutoFormer_original_greedy/.figure/ofa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer_original_greedy/.figure/ofa.png -------------------------------------------------------------------------------- /AutoFormer_original_greedy/.figure/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer_original_greedy/.figure/overview.png -------------------------------------------------------------------------------- /AutoFormer_original_greedy/.figure/performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvlab-yonsei/One-Shot-TAS/HEAD/AutoFormer_original_greedy/.figure/performance.png -------------------------------------------------------------------------------- /AutoFormer/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.7.0 2 | timm==0.3.2 3 | scikit-image 4 | ptflops 5 | easydict 6 | PyYAML 7 | pillow 8 | torchvision==0.2.1 9 | opencv-python 10 | -------------------------------------------------------------------------------- /AutoFormer_original/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.7.0 2 | timm==0.3.2 3 | scikit-image 4 | ptflops 5 | easydict 6 | PyYAML 7 | pillow 8 | torchvision==0.2.1 9 | opencv-python 10 | -------------------------------------------------------------------------------- /AutoFormer_original_greedy/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.7.0 2 | timm==0.3.2 3 | scikit-image 4 | ptflops 5 | easydict 6 | PyYAML 7 | pillow 8 | torchvision==0.2.1 9 | opencv-python 10 | -------------------------------------------------------------------------------- /AutoFormer/training_free/__init__.py: -------------------------------------------------------------------------------- 1 | from .compute_indicators import * 2 | from os.path import dirname, basename, isfile, join 3 | import glob 4 | modules = glob.glob(join(dirname(__file__), "*.py")) 5 | __all__ = [ basename(f)[:-3] for f in modules if isfile(f) and not f.endswith('__init__.py')] -------------------------------------------------------------------------------- /AutoFormer_original/train_supernet.sh: -------------------------------------------------------------------------------- 1 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path '/data' --gp \ 2 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --epochs 500 --warmup-epochs 20 \ 3 | --output /OUTPUT_PATH --batch-size 128 -------------------------------------------------------------------------------- /AutoFormer/experiments/supernet/supernet-T.yaml: -------------------------------------------------------------------------------- 1 | SUPERNET: 2 | MLP_RATIO: 4.0 3 | NUM_HEADS: 4 4 | EMBED_DIM: 256 5 | DEPTH: 14 6 | SEARCH_SPACE: 7 | MLP_RATIO: 8 | - 3.5 9 | - 4 10 | NUM_HEADS: 11 | - 3 12 | - 4 13 | DEPTH: 14 | - 12 15 | - 13 16 | - 14 17 | EMBED_DIM: 18 | - 192 19 | - 216 20 | - 240 -------------------------------------------------------------------------------- /AutoFormer_original/experiments/supernet/supernet-T.yaml: -------------------------------------------------------------------------------- 1 | SUPERNET: 2 | MLP_RATIO: 4.0 3 | NUM_HEADS: 4 4 | EMBED_DIM: 256 5 | DEPTH: 14 6 | SEARCH_SPACE: 7 | MLP_RATIO: 8 | - 3.5 9 | - 4 10 | NUM_HEADS: 11 | - 3 12 | - 4 13 | DEPTH: 14 | - 12 15 | - 13 16 | - 14 17 | EMBED_DIM: 18 | - 192 19 | - 216 20 | - 240 21 | -------------------------------------------------------------------------------- /AutoFormer/observe_supernet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env observe_supernet.py --data-path '/data' --gp \ 3 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume './experiments/supernet/supernet-tiny.pth' \ 4 | --min-param-limits 1 --param-limits 7 5 | # --data-set EVO_IMNET 6 | 7 | 8 | -------------------------------------------------------------------------------- /AutoFormer_original_greedy/experiments/supernet/supernet-T.yaml: -------------------------------------------------------------------------------- 1 | SUPERNET: 2 | MLP_RATIO: 4.0 3 | NUM_HEADS: 4 4 | EMBED_DIM: 256 5 | DEPTH: 14 6 | SEARCH_SPACE: 7 | MLP_RATIO: 8 | - 3.5 9 | - 4 10 | NUM_HEADS: 11 | - 3 12 | - 4 13 | DEPTH: 14 | - 12 15 | - 13 16 | - 14 17 | EMBED_DIM: 18 | - 192 19 | - 216 20 | - 240 21 | -------------------------------------------------------------------------------- /AutoFormer/experiments/supernet/supernet-B.yaml: -------------------------------------------------------------------------------- 1 | SUPERNET: 2 | MLP_RATIO: 4.0 3 | NUM_HEADS: 10 4 | EMBED_DIM: 640 5 | DEPTH: 16 6 | SEARCH_SPACE: 7 | MLP_RATIO: 8 | - 3.0 9 | - 3.5 10 | - 4.0 11 | NUM_HEADS: 12 | - 9 13 | - 10 14 | DEPTH: 15 | - 14 16 | - 15 17 | - 16 18 | EMBED_DIM: 19 | - 528 20 | - 576 21 | - 624 22 | -------------------------------------------------------------------------------- /AutoFormer/experiments/supernet/supernet-S.yaml: -------------------------------------------------------------------------------- 1 | SUPERNET: 2 | MLP_RATIO: 4.0 3 | NUM_HEADS: 7 4 | EMBED_DIM: 448 5 | DEPTH: 14 6 | SEARCH_SPACE: 7 | MLP_RATIO: 8 | - 3.0 9 | - 3.5 10 | - 4.0 11 | NUM_HEADS: 12 | - 5 13 | - 6 14 | - 7 15 | DEPTH: 16 | - 12 17 | - 13 18 | - 14 19 | EMBED_DIM: 20 | - 320 21 | - 384 22 | - 448 23 | -------------------------------------------------------------------------------- /AutoFormer_original/experiments/supernet/supernet-B.yaml: -------------------------------------------------------------------------------- 1 | SUPERNET: 2 | MLP_RATIO: 4.0 3 | NUM_HEADS: 10 4 | EMBED_DIM: 640 5 | DEPTH: 16 6 | SEARCH_SPACE: 7 | MLP_RATIO: 8 | - 3.0 9 | - 3.5 10 | - 4.0 11 | NUM_HEADS: 12 | - 9 13 | - 10 14 | DEPTH: 15 | - 14 16 | - 15 17 | - 16 18 | EMBED_DIM: 19 | - 528 20 | - 576 21 | - 624 22 | -------------------------------------------------------------------------------- /AutoFormer_original_greedy/experiments/supernet/supernet-B.yaml: -------------------------------------------------------------------------------- 1 | SUPERNET: 2 | MLP_RATIO: 4.0 3 | NUM_HEADS: 10 4 | EMBED_DIM: 640 5 | DEPTH: 16 6 | SEARCH_SPACE: 7 | MLP_RATIO: 8 | - 3.0 9 | - 3.5 10 | - 4.0 11 | NUM_HEADS: 12 | - 9 13 | - 10 14 | DEPTH: 15 | - 14 16 | - 15 17 | - 16 18 | EMBED_DIM: 19 | - 528 20 | - 576 21 | - 624 22 | -------------------------------------------------------------------------------- /AutoFormer_original/experiments/supernet/supernet-S.yaml: -------------------------------------------------------------------------------- 1 | SUPERNET: 2 | MLP_RATIO: 4.0 3 | NUM_HEADS: 7 4 | EMBED_DIM: 448 5 | DEPTH: 14 6 | SEARCH_SPACE: 7 | MLP_RATIO: 8 | - 3.0 9 | - 3.5 10 | - 4.0 11 | NUM_HEADS: 12 | - 5 13 | - 6 14 | - 7 15 | DEPTH: 16 | - 12 17 | - 13 18 | - 14 19 | EMBED_DIM: 20 | - 320 21 | - 384 22 | - 448 23 | -------------------------------------------------------------------------------- /AutoFormer_original_greedy/experiments/supernet/supernet-S.yaml: -------------------------------------------------------------------------------- 1 | SUPERNET: 2 | MLP_RATIO: 4.0 3 | NUM_HEADS: 7 4 | EMBED_DIM: 448 5 | DEPTH: 14 6 | SEARCH_SPACE: 7 | MLP_RATIO: 8 | - 3.0 9 | - 3.5 10 | - 4.0 11 | NUM_HEADS: 12 | - 5 13 | - 6 14 | - 7 15 | DEPTH: 16 | - 12 17 | - 13 18 | - 14 19 | EMBED_DIM: 20 | - 320 21 | - 384 22 | - 448 23 | -------------------------------------------------------------------------------- /AutoFormer_original_greedy/train_supernet_base.sh: -------------------------------------------------------------------------------- 1 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train_base.py --data-path '/data' --gp \ 2 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-B.yaml --epochs 500 --warmup-epochs 20 \ 3 | --output /OUTPUT_PATH --batch-size 128 \ 4 | --save_checkpoint_path 'checkpoint-original-base-' --save_log_path './log/supernet_original_base.log' --interval 1 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | checkpoint/ 2 | *.pyc 3 | /*.log 4 | *ptm 5 | .vscode 6 | *visualize_images*/ 7 | *_tsne_* 8 | *hard_triplet_* 9 | *_feature.pkl 10 | Visualize_Network_* 11 | hard_mining/ 12 | backup.py 13 | *pdf 14 | *.bak* 15 | # *.sh 16 | *.log 17 | *.pkl 18 | *.tar 19 | *.pth 20 | AutoFormer/tftasenv 21 | AutoFormer/Python-3.6.15 22 | AutoFormer/autoformer3.6 23 | *.tgz 24 | config.txt 25 | AutoFormer/greedyTAS/autoformer-greedyTAS-09121607-greedy.log 26 | AutoFormer/greedyTAS/autoformer-greedyTAS(09121607)-greedy.log -------------------------------------------------------------------------------- /AutoFormer_original_greedy/train_supernet_observation.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # UTF-8 환경 변수 설정 (UnicodeEncodeError 방지) 4 | export PYTHONIOENCODING=utf-8 5 | export LC_ALL=C.UTF-8 6 | export LANG=C.UTF-8 7 | 8 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_observation.py --data-path '/data' --gp \ 9 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --epochs 500 --warmup-epochs 20 \ 10 | --resume '/OUTPUT_PATH/checkpoint-original-24.pth' --output /OUTPUT_PATH --batch-size 128 \ 11 | --save_checkpoint_path 'checkpoint-tiny-observation2-' --save_log_path './log/supernet_tiny_observation2.log' --interval 1 12 | -------------------------------------------------------------------------------- /AutoFormer/experiments/subnet/AutoFormer-T.yaml: -------------------------------------------------------------------------------- 1 | SUPERNET: 2 | MLP_RATIO: 4.0 3 | NUM_HEADS: 4 4 | EMBED_DIM: 256 5 | DEPTH: 14 6 | SEARCH_SPACE: 7 | MLP_RATIO: 8 | - 3.5 9 | - 4 10 | NUM_HEADS: 11 | - 3 12 | - 4 13 | DEPTH: 14 | - 12 15 | - 13 16 | - 14 17 | EMBED_DIM: 18 | - 192 19 | - 216 20 | - 240 21 | RETRAIN: 22 | MLP_RATIO: 23 | - 3.5 24 | - 3.5 25 | - 3.0 26 | - 3.5 27 | - 3.0 28 | - 3.0 29 | - 4.0 30 | - 4.0 31 | - 3.5 32 | - 4.0 33 | - 3.5 34 | - 4.0 35 | - 3.5 36 | NUM_HEADS: 37 | - 3 38 | - 3 39 | - 3 40 | - 3 41 | - 3 42 | - 3 43 | - 3 44 | - 3 45 | - 3 46 | - 3 47 | - 4 48 | - 3 49 | - 3 50 | DEPTH: 13 51 | EMBED_DIM: 192 -------------------------------------------------------------------------------- /AutoFormer_original/experiments/subnet/AutoFormer-T.yaml: -------------------------------------------------------------------------------- 1 | SUPERNET: 2 | MLP_RATIO: 4.0 3 | NUM_HEADS: 4 4 | EMBED_DIM: 256 5 | DEPTH: 14 6 | SEARCH_SPACE: 7 | MLP_RATIO: 8 | - 3.5 9 | - 4 10 | NUM_HEADS: 11 | - 3 12 | - 4 13 | DEPTH: 14 | - 12 15 | - 13 16 | - 14 17 | EMBED_DIM: 18 | - 192 19 | - 216 20 | - 240 21 | RETRAIN: 22 | MLP_RATIO: 23 | - 3.5 24 | - 3.5 25 | - 3.0 26 | - 3.5 27 | - 3.0 28 | - 3.0 29 | - 4.0 30 | - 4.0 31 | - 3.5 32 | - 4.0 33 | - 3.5 34 | - 4.0 35 | - 3.5 36 | NUM_HEADS: 37 | - 3 38 | - 3 39 | - 3 40 | - 3 41 | - 3 42 | - 3 43 | - 3 44 | - 3 45 | - 3 46 | - 3 47 | - 4 48 | - 3 49 | - 3 50 | DEPTH: 13 51 | EMBED_DIM: 192 -------------------------------------------------------------------------------- /AutoFormer_original_greedy/experiments/subnet/AutoFormer-T.yaml: -------------------------------------------------------------------------------- 1 | SUPERNET: 2 | MLP_RATIO: 4.0 3 | NUM_HEADS: 4 4 | EMBED_DIM: 256 5 | DEPTH: 14 6 | SEARCH_SPACE: 7 | MLP_RATIO: 8 | - 3.5 9 | - 4 10 | NUM_HEADS: 11 | - 3 12 | - 4 13 | DEPTH: 14 | - 12 15 | - 13 16 | - 14 17 | EMBED_DIM: 18 | - 192 19 | - 216 20 | - 240 21 | RETRAIN: 22 | MLP_RATIO: 23 | - 3.5 24 | - 3.5 25 | - 3.0 26 | - 3.5 27 | - 3.0 28 | - 3.0 29 | - 4.0 30 | - 4.0 31 | - 3.5 32 | - 4.0 33 | - 3.5 34 | - 4.0 35 | - 3.5 36 | NUM_HEADS: 37 | - 3 38 | - 3 39 | - 3 40 | - 3 41 | - 3 42 | - 3 43 | - 3 44 | - 3 45 | - 3 46 | - 3 47 | - 4 48 | - 3 49 | - 3 50 | DEPTH: 13 51 | EMBED_DIM: 192 -------------------------------------------------------------------------------- /AutoFormer/experiments/subnet/AutoFormer-S.yaml: -------------------------------------------------------------------------------- 1 | SUPERNET: 2 | MLP_RATIO: 4.0 3 | NUM_HEADS: 7 4 | EMBED_DIM: 448 5 | DEPTH: 14 6 | SEARCH_SPACE: 7 | MLP_RATIO: 8 | - 3.0 9 | - 3.5 10 | - 4.0 11 | NUM_HEADS: 12 | - 5 13 | - 6 14 | - 7 15 | DEPTH: 16 | - 12 17 | - 13 18 | - 14 19 | EMBED_DIM: 20 | - 320 21 | - 384 22 | - 448 23 | RETRAIN: 24 | MLP_RATIO: 25 | - 3.0 26 | - 3.5 27 | - 3.0 28 | - 3.5 29 | - 4.0 30 | - 4.0 31 | - 4.0 32 | - 4.0 33 | - 4.0 34 | - 4.0 35 | - 4.0 36 | - 3.5 37 | - 4.0 38 | NUM_HEADS: 39 | - 6 40 | - 6 41 | - 5 42 | - 7 43 | - 5 44 | - 5 45 | - 5 46 | - 6 47 | - 6 48 | - 7 49 | - 7 50 | - 6 51 | - 7 52 | DEPTH: 13 53 | EMBED_DIM: 384 54 | 55 | -------------------------------------------------------------------------------- /AutoFormer_original/experiments/subnet/AutoFormer-S.yaml: -------------------------------------------------------------------------------- 1 | SUPERNET: 2 | MLP_RATIO: 4.0 3 | NUM_HEADS: 7 4 | EMBED_DIM: 448 5 | DEPTH: 14 6 | SEARCH_SPACE: 7 | MLP_RATIO: 8 | - 3.0 9 | - 3.5 10 | - 4.0 11 | NUM_HEADS: 12 | - 5 13 | - 6 14 | - 7 15 | DEPTH: 16 | - 12 17 | - 13 18 | - 14 19 | EMBED_DIM: 20 | - 320 21 | - 384 22 | - 448 23 | RETRAIN: 24 | MLP_RATIO: 25 | - 3.0 26 | - 3.5 27 | - 3.0 28 | - 3.5 29 | - 4.0 30 | - 4.0 31 | - 4.0 32 | - 4.0 33 | - 4.0 34 | - 4.0 35 | - 4.0 36 | - 3.5 37 | - 4.0 38 | NUM_HEADS: 39 | - 6 40 | - 6 41 | - 5 42 | - 7 43 | - 5 44 | - 5 45 | - 5 46 | - 6 47 | - 6 48 | - 7 49 | - 7 50 | - 6 51 | - 7 52 | DEPTH: 13 53 | EMBED_DIM: 384 54 | 55 | -------------------------------------------------------------------------------- /AutoFormer_original_greedy/experiments/subnet/AutoFormer-S.yaml: -------------------------------------------------------------------------------- 1 | SUPERNET: 2 | MLP_RATIO: 4.0 3 | NUM_HEADS: 7 4 | EMBED_DIM: 448 5 | DEPTH: 14 6 | SEARCH_SPACE: 7 | MLP_RATIO: 8 | - 3.0 9 | - 3.5 10 | - 4.0 11 | NUM_HEADS: 12 | - 5 13 | - 6 14 | - 7 15 | DEPTH: 16 | - 12 17 | - 13 18 | - 14 19 | EMBED_DIM: 20 | - 320 21 | - 384 22 | - 448 23 | RETRAIN: 24 | MLP_RATIO: 25 | - 3.0 26 | - 3.5 27 | - 3.0 28 | - 3.5 29 | - 4.0 30 | - 4.0 31 | - 4.0 32 | - 4.0 33 | - 4.0 34 | - 4.0 35 | - 4.0 36 | - 3.5 37 | - 4.0 38 | NUM_HEADS: 39 | - 6 40 | - 6 41 | - 5 42 | - 7 43 | - 5 44 | - 5 45 | - 5 46 | - 6 47 | - 6 48 | - 7 49 | - 7 50 | - 6 51 | - 7 52 | DEPTH: 13 53 | EMBED_DIM: 384 54 | 55 | -------------------------------------------------------------------------------- /AutoFormer/evolution_search.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 첫 번째 작업 실행 4 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 5 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume './experiments/supernet/checkpoint-25.pth' \ 6 | --min-param-limits 1 --param-limits 100 --config-list-path './greedyTAS/m(2500)_path_epoch100.pkl' \ 7 | --log-file-path './greedyTAS/m(2500)_path_epoch100-subnet.log' 8 | 9 | 10 | # #!/bin/bash 11 | # python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 12 | # --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume './experiments/supernet/checkpoint-25.pth' \ 13 | # --min-param-limits 1 --param-limits 100 14 | # # --data-set EVO_IMNET 15 | 16 | 17 | -------------------------------------------------------------------------------- /AutoFormer/experiments/subnet/AutoFormer-B.yaml: -------------------------------------------------------------------------------- 1 | SUPERNET: 2 | MLP_RATIO: 4.0 3 | NUM_HEADS: 10 4 | EMBED_DIM: 640 5 | DEPTH: 16 6 | SEARCH_SPACE: 7 | MLP_RATIO: 8 | - 3.0 9 | - 3.5 10 | - 4.0 11 | NUM_HEADS: 12 | - 8 13 | - 9 14 | - 10 15 | DEPTH: 16 | - 14 17 | - 15 18 | - 16 19 | EMBED_DIM: 20 | - 528 21 | - 576 22 | - 624 23 | RETRAIN: 24 | MLP_RATIO: 25 | - 3.5 26 | - 3.5 27 | - 4.0 28 | - 3.5 29 | - 4.0 30 | - 3.5 31 | - 3.5 32 | - 3.0 33 | - 4.0 34 | - 4.0 35 | - 3.0 36 | - 4.0 37 | - 3.0 38 | - 3.5 39 | NUM_HEADS: 40 | - 9 41 | - 9 42 | - 9 43 | - 9 44 | - 9 45 | - 10 46 | - 9 47 | - 9 48 | - 10 49 | - 9 50 | - 10 51 | - 9 52 | - 9 53 | - 10 54 | DEPTH: 14 55 | EMBED_DIM: 576 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /AutoFormer_original/experiments/subnet/AutoFormer-B.yaml: -------------------------------------------------------------------------------- 1 | SUPERNET: 2 | MLP_RATIO: 4.0 3 | NUM_HEADS: 10 4 | EMBED_DIM: 640 5 | DEPTH: 16 6 | SEARCH_SPACE: 7 | MLP_RATIO: 8 | - 3.0 9 | - 3.5 10 | - 4.0 11 | NUM_HEADS: 12 | - 8 13 | - 9 14 | - 10 15 | DEPTH: 16 | - 14 17 | - 15 18 | - 16 19 | EMBED_DIM: 20 | - 528 21 | - 576 22 | - 624 23 | RETRAIN: 24 | MLP_RATIO: 25 | - 3.5 26 | - 3.5 27 | - 4.0 28 | - 3.5 29 | - 4.0 30 | - 3.5 31 | - 3.5 32 | - 3.0 33 | - 4.0 34 | - 4.0 35 | - 3.0 36 | - 4.0 37 | - 3.0 38 | - 3.5 39 | NUM_HEADS: 40 | - 9 41 | - 9 42 | - 9 43 | - 9 44 | - 9 45 | - 10 46 | - 9 47 | - 9 48 | - 10 49 | - 9 50 | - 10 51 | - 9 52 | - 9 53 | - 10 54 | DEPTH: 14 55 | EMBED_DIM: 576 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /AutoFormer_original_greedy/experiments/subnet/AutoFormer-B.yaml: -------------------------------------------------------------------------------- 1 | SUPERNET: 2 | MLP_RATIO: 4.0 3 | NUM_HEADS: 10 4 | EMBED_DIM: 640 5 | DEPTH: 16 6 | SEARCH_SPACE: 7 | MLP_RATIO: 8 | - 3.0 9 | - 3.5 10 | - 4.0 11 | NUM_HEADS: 12 | - 8 13 | - 9 14 | - 10 15 | DEPTH: 16 | - 14 17 | - 15 18 | - 16 19 | EMBED_DIM: 20 | - 528 21 | - 576 22 | - 624 23 | RETRAIN: 24 | MLP_RATIO: 25 | - 3.5 26 | - 3.5 27 | - 4.0 28 | - 3.5 29 | - 4.0 30 | - 3.5 31 | - 3.5 32 | - 3.0 33 | - 4.0 34 | - 4.0 35 | - 3.0 36 | - 4.0 37 | - 3.0 38 | - 3.5 39 | NUM_HEADS: 40 | - 9 41 | - 9 42 | - 9 43 | - 9 44 | - 9 45 | - 10 46 | - 9 47 | - 9 48 | - 10 49 | - 9 50 | - 10 51 | - 9 52 | - 9 53 | - 10 54 | DEPTH: 14 55 | EMBED_DIM: 576 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /AutoFormer_original_greedy/z_train_supernet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # UTF-8 환경 변수 설정 (UnicodeEncodeError 방지) 4 | export PYTHONIOENCODING=utf-8 5 | export LC_ALL=C.UTF-8 6 | export LANG=C.UTF-8 7 | 8 | python -m torch.distributed.launch --nproc_per_node=8 --use_env z_supernet_train.py --data-path '/data' --gp \ 9 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-S.yaml --epochs 500 --warmup-epochs 20 \ 10 | --output /OUTPUT_PATH --batch-size 128 \ 11 | --save_checkpoint_path 'checkpoint-z_original_auto_s_prenassmallaug' --save_log_path './log/supernet_z_original_auto_s_prenassmallaug.log' --interval 1 12 | 13 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env z_evolution.py --data-path '/data' --gp \ 14 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-S.yaml --resume '/OUTPUT_PATH/checkpoint-z_original_auto_s_prenassmallaug-25.pth' \ 15 | --min-param-limits 5 --param-limits 23 \ 16 | --log-file-path './log/search_z_original_auto_s_prenassmallaug_23M.log' 17 | -------------------------------------------------------------------------------- /AutoFormer/lib/config.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | import yaml 3 | 4 | cfg = edict() 5 | 6 | 7 | def _edict2dict(dest_dict, src_edict): 8 | if isinstance(dest_dict, dict) and isinstance(src_edict, dict): 9 | for k, v in src_edict.items(): 10 | if not isinstance(v, edict): 11 | dest_dict[k] = v 12 | else: 13 | dest_dict[k] = {} 14 | _edict2dict(dest_dict[k], v) 15 | else: 16 | return 17 | 18 | def gen_config(config_file): 19 | cfg_dict = {} 20 | _edict2dict(cfg_dict, cfg) 21 | with open(config_file, 'w') as f: 22 | yaml.dump(cfg_dict, f, default_flow_style=False) 23 | 24 | 25 | def _update_config(base_cfg, exp_cfg): 26 | if isinstance(base_cfg, edict) and isinstance(exp_cfg, edict): 27 | for k, v in exp_cfg.items(): 28 | base_cfg[k] = v 29 | else: 30 | return 31 | 32 | 33 | def update_config_from_file(filename): 34 | exp_config = None 35 | with open(filename) as f: 36 | exp_config = edict(yaml.safe_load(f)) 37 | _update_config(cfg, exp_config) 38 | 39 | 40 | -------------------------------------------------------------------------------- /AutoFormer_original/lib/config.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | import yaml 3 | 4 | cfg = edict() 5 | 6 | 7 | def _edict2dict(dest_dict, src_edict): 8 | if isinstance(dest_dict, dict) and isinstance(src_edict, dict): 9 | for k, v in src_edict.items(): 10 | if not isinstance(v, edict): 11 | dest_dict[k] = v 12 | else: 13 | dest_dict[k] = {} 14 | _edict2dict(dest_dict[k], v) 15 | else: 16 | return 17 | 18 | def gen_config(config_file): 19 | cfg_dict = {} 20 | _edict2dict(cfg_dict, cfg) 21 | with open(config_file, 'w') as f: 22 | yaml.dump(cfg_dict, f, default_flow_style=False) 23 | 24 | 25 | def _update_config(base_cfg, exp_cfg): 26 | if isinstance(base_cfg, edict) and isinstance(exp_cfg, edict): 27 | for k, v in exp_cfg.items(): 28 | base_cfg[k] = v 29 | else: 30 | return 31 | 32 | 33 | def update_config_from_file(filename): 34 | exp_config = None 35 | with open(filename) as f: 36 | exp_config = edict(yaml.safe_load(f)) 37 | _update_config(cfg, exp_config) 38 | 39 | 40 | -------------------------------------------------------------------------------- /AutoFormer_original_greedy/lib/config.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | import yaml 3 | 4 | cfg = edict() 5 | 6 | 7 | def _edict2dict(dest_dict, src_edict): 8 | if isinstance(dest_dict, dict) and isinstance(src_edict, dict): 9 | for k, v in src_edict.items(): 10 | if not isinstance(v, edict): 11 | dest_dict[k] = v 12 | else: 13 | dest_dict[k] = {} 14 | _edict2dict(dest_dict[k], v) 15 | else: 16 | return 17 | 18 | def gen_config(config_file): 19 | cfg_dict = {} 20 | _edict2dict(cfg_dict, cfg) 21 | with open(config_file, 'w') as f: 22 | yaml.dump(cfg_dict, f, default_flow_style=False) 23 | 24 | 25 | def _update_config(base_cfg, exp_cfg): 26 | if isinstance(base_cfg, edict) and isinstance(exp_cfg, edict): 27 | for k, v in exp_cfg.items(): 28 | base_cfg[k] = v 29 | else: 30 | return 31 | 32 | 33 | def update_config_from_file(filename): 34 | exp_config = None 35 | with open(filename) as f: 36 | exp_config = edict(yaml.safe_load(f)) 37 | _update_config(cfg, exp_config) 38 | 39 | 40 | -------------------------------------------------------------------------------- /AutoFormer/training_free/indicators/__init__.py: -------------------------------------------------------------------------------- 1 | available_indicators = [] 2 | _indicator_impls = {} 3 | 4 | 5 | def indicator(name, bn=True, copy_net=True, force_clean=True, **impl_args): 6 | def make_impl(func): 7 | def indicator_impl(net_orig, device, *args, **kwargs): 8 | if copy_net: 9 | net = net_orig.get_copy(bn=bn).to(device) 10 | else: 11 | net = net_orig 12 | if name =='NASWOT': 13 | ret = func(net, device) 14 | elif name =='te_nas': 15 | ret = func(net) 16 | else: 17 | ret = func(net, *args, **kwargs, **impl_args) 18 | if copy_net and force_clean: 19 | import gc 20 | import torch 21 | del net 22 | torch.cuda.empty_cache() 23 | gc.collect() 24 | return ret 25 | 26 | global _indicator_impls 27 | if name in _indicator_impls: 28 | raise KeyError(f'Duplicated indicator! {name}') 29 | available_indicators.append(name) 30 | _indicator_impls[name] = indicator_impl 31 | return func 32 | return make_impl 33 | 34 | 35 | def calc_indicator(name, net, device, *args, **kwargs): 36 | return _indicator_impls[name](net, device, *args, **kwargs) 37 | 38 | 39 | def load_all(): 40 | # from . import snip 41 | # from . import grasp 42 | # from . import NASWOT 43 | # from . import te_nas 44 | from . import dss 45 | 46 | load_all() 47 | -------------------------------------------------------------------------------- /AutoFormer_original/model/module/layernorm_super.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class LayerNormSuper(torch.nn.LayerNorm): 6 | def __init__(self, super_embed_dim): 7 | super().__init__(super_embed_dim) 8 | 9 | # the largest embed dim 10 | self.super_embed_dim = super_embed_dim 11 | 12 | # the current sampled embed dim 13 | self.sample_embed_dim = None 14 | 15 | self.samples = {} 16 | self.profiling = False 17 | 18 | def profile(self, mode=True): 19 | self.profiling = mode 20 | 21 | def sample_parameters(self, resample=False): 22 | if self.profiling or resample: 23 | return self._sample_parameters() 24 | return self.samples 25 | 26 | def _sample_parameters(self): 27 | self.samples['weight'] = self.weight[:self.sample_embed_dim] 28 | self.samples['bias'] = self.bias[:self.sample_embed_dim] 29 | return self.samples 30 | 31 | def set_sample_config(self, sample_embed_dim): 32 | self.sample_embed_dim = sample_embed_dim 33 | self._sample_parameters() 34 | 35 | def forward(self, x): 36 | self.sample_parameters() 37 | return F.layer_norm(x, (self.sample_embed_dim,), weight=self.samples['weight'], bias=self.samples['bias'], eps=self.eps) 38 | 39 | def calc_sampled_param_num(self): 40 | assert 'weight' in self.samples.keys() 41 | assert 'bias' in self.samples.keys() 42 | return self.samples['weight'].numel() + self.samples['bias'].numel() 43 | 44 | def get_complexity(self, sequence_length): 45 | return sequence_length * self.sample_embed_dim 46 | -------------------------------------------------------------------------------- /AutoFormer_original_greedy/model/module/layernorm_super.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class LayerNormSuper(torch.nn.LayerNorm): 6 | def __init__(self, super_embed_dim): 7 | super().__init__(super_embed_dim) 8 | 9 | # the largest embed dim 10 | self.super_embed_dim = super_embed_dim 11 | 12 | # the current sampled embed dim 13 | self.sample_embed_dim = None 14 | 15 | self.samples = {} 16 | self.profiling = False 17 | 18 | def profile(self, mode=True): 19 | self.profiling = mode 20 | 21 | def sample_parameters(self, resample=False): 22 | if self.profiling or resample: 23 | return self._sample_parameters() 24 | return self.samples 25 | 26 | def _sample_parameters(self): 27 | self.samples['weight'] = self.weight[:self.sample_embed_dim] 28 | self.samples['bias'] = self.bias[:self.sample_embed_dim] 29 | return self.samples 30 | 31 | def set_sample_config(self, sample_embed_dim): 32 | self.sample_embed_dim = sample_embed_dim 33 | self._sample_parameters() 34 | 35 | def forward(self, x): 36 | self.sample_parameters() 37 | return F.layer_norm(x, (self.sample_embed_dim,), weight=self.samples['weight'], bias=self.samples['bias'], eps=self.eps) 38 | 39 | def calc_sampled_param_num(self): 40 | assert 'weight' in self.samples.keys() 41 | assert 'bias' in self.samples.keys() 42 | return self.samples['weight'].numel() + self.samples['bias'].numel() 43 | 44 | def get_complexity(self, sequence_length): 45 | return sequence_length * self.sample_embed_dim 46 | -------------------------------------------------------------------------------- /AutoFormer_original_greedy/config.yaml: -------------------------------------------------------------------------------- 1 | aa: rand-m9-mstd0.5-inc1 2 | amp: true 3 | batch_size: 64 4 | cfg: ./experiments/supernet/supernet-T.yaml 5 | change_qkv: true 6 | clip_grad: null 7 | color_jitter: 0.4 8 | cooldown_epochs: 10 9 | crossover_num: 25 10 | cutmix: 1.0 11 | cutmix_minmax: null 12 | data_path: /data 13 | data_set: IMNET 14 | decay_epochs: 30 15 | decay_rate: 0.1 16 | device: cuda 17 | dist_backend: nccl 18 | dist_eval: true 19 | dist_url: env:// 20 | distributed: true 21 | drop: 0.0 22 | drop_block: null 23 | drop_path: 0.1 24 | epochs: 30 25 | eval: false 26 | gp: true 27 | gpu: 0 28 | inat_category: name 29 | input_size: 224 30 | log_file_path: ./log/search_tiny-only-supernet192-minimum_pop1050_10M.log 31 | lr: 0.0005 32 | lr_noise: null 33 | lr_noise_pct: 0.67 34 | lr_noise_std: 1.0 35 | lr_power: 1.0 36 | m_prob: 0.2 37 | max_epochs: 20 38 | max_relative_position: 14 39 | min_lr: 1.0e-05 40 | min_param_limits: 9.0 41 | mixup: 0.8 42 | mixup_mode: batch 43 | mixup_prob: 1.0 44 | mixup_switch_prob: 0.5 45 | model: '' 46 | model_ema: false 47 | model_ema_decay: 0.99996 48 | model_ema_force_cpu: false 49 | momentum: 0.9 50 | mutation_num: 25 51 | no_abs_pos: false 52 | no_prefetcher: false 53 | num_workers: 10 54 | opt: adamw 55 | opt_betas: null 56 | opt_eps: 1.0e-08 57 | output_dir: '' 58 | param_limits: 10.0 59 | patch_size: 16 60 | patience_epochs: 10 61 | pin_mem: true 62 | platform: pai 63 | population_num: 50 64 | post_norm: false 65 | rank: 0 66 | recount: 1 67 | relative_position: true 68 | remode: pixel 69 | repeated_aug: true 70 | reprob: 0.25 71 | resplit: false 72 | resume: /OUTPUT_PATH/checkpoint-tiny-only-supernet-minimum-21.pth 73 | rpe_type: bias 74 | s_prob: 0.4 75 | scale: false 76 | sched: cosine 77 | seed: 0 78 | select_num: 10 79 | smoothing: 0.1 80 | start_epoch: 0 81 | teacher_model: '' 82 | train_interpolation: bicubic 83 | warmup_epochs: 5 84 | warmup_lr: 1.0e-06 85 | weight_decay: 0.05 86 | world_size: 8 87 | -------------------------------------------------------------------------------- /AutoFormer/config.yaml: -------------------------------------------------------------------------------- 1 | aa: rand-m9-mstd0.5-inc1 2 | amp: true 3 | batch_size: 64 4 | cfg: ./experiments/supernet/supernet-T.yaml 5 | change_qkv: true 6 | clip_grad: null 7 | color_jitter: 0.4 8 | config_list_path: ./greedyTAS/m(2500)_path_epoch100.pkl 9 | cooldown_epochs: 10 10 | crossover_num: 25 11 | cutmix: 1.0 12 | cutmix_minmax: null 13 | data_path: /data 14 | data_set: IMNET 15 | decay_epochs: 30 16 | decay_rate: 0.1 17 | device: cuda 18 | dist_backend: nccl 19 | dist_eval: true 20 | dist_url: tcp://localhost:2042 21 | distributed: true 22 | drop: 0.0 23 | drop_block: null 24 | drop_path: 0.1 25 | epochs: 30 26 | eval: false 27 | gp: true 28 | gpu: 5 29 | inat_category: name 30 | input_size: 224 31 | log_file_path: ./greedyTAS/m(2500)_path_epoch100-subnet.log 32 | lr: 0.0005 33 | lr_noise: null 34 | lr_noise_pct: 0.67 35 | lr_noise_std: 1.0 36 | lr_power: 1.0 37 | m_prob: 0.2 38 | max_epochs: 20 39 | max_relative_position: 14 40 | min_lr: 1.0e-05 41 | min_param_limits: 1.0 42 | mixup: 0.8 43 | mixup_mode: batch 44 | mixup_prob: 1.0 45 | mixup_switch_prob: 0.5 46 | model: '' 47 | model_ema: false 48 | model_ema_decay: 0.99996 49 | model_ema_force_cpu: false 50 | momentum: 0.9 51 | mutation_num: 25 52 | no_abs_pos: false 53 | no_prefetcher: false 54 | num_workers: 10 55 | opt: adamw 56 | opt_betas: null 57 | opt_eps: 1.0e-08 58 | output_dir: '' 59 | param_limits: 100.0 60 | patch_size: 16 61 | patience_epochs: 10 62 | pin_mem: true 63 | platform: pai 64 | population_num: 200 65 | post_norm: false 66 | rank: 5 67 | recount: 1 68 | relative_position: true 69 | remode: pixel 70 | repeated_aug: true 71 | reprob: 0.25 72 | resplit: false 73 | resume: ./experiments/supernet/checkpoint-25.pth 74 | rpe_type: bias 75 | s_prob: 0.4 76 | scale: false 77 | sched: cosine 78 | seed: 0 79 | select_num: 10 80 | smoothing: 0.1 81 | start_epoch: 0 82 | teacher_model: '' 83 | train_interpolation: bicubic 84 | warmup_epochs: 5 85 | warmup_lr: 1.0e-06 86 | weight_decay: 0.05 87 | world_size: 8 88 | -------------------------------------------------------------------------------- /AutoFormer/lib/subImageNet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import argparse 4 | random.seed(0) 5 | parser = argparse.ArgumentParser('Generate SubImageNet', add_help=False) 6 | parser.add_argument('--data-path', default='../data/imagenet', type=str, 7 | help='dataset path') 8 | args = parser.parse_args() 9 | 10 | data_path = args.data_path 11 | ImageNet_train_path = os.path.join(data_path, 'train') 12 | subImageNet_name = 'subImageNet' 13 | class_idx_txt_path = os.path.join(data_path, subImageNet_name) 14 | 15 | # train 16 | classes = sorted(os.listdir(ImageNet_train_path)) 17 | if not os.path.exists(os.path.join(data_path, subImageNet_name)): 18 | os.mkdir(os.path.join(data_path, subImageNet_name)) 19 | 20 | subImageNet = dict() 21 | with open(os.path.join(class_idx_txt_path, 'subimages_list.txt'), 'w') as f: 22 | subImageNet_class = classes 23 | for iclass in subImageNet_class: 24 | class_path = os.path.join(ImageNet_train_path, iclass) 25 | if not os.path.exists( 26 | os.path.join( 27 | data_path, 28 | subImageNet_name, 29 | iclass)): 30 | os.mkdir(os.path.join(data_path, subImageNet_name, iclass)) 31 | subImages = random.sample(sorted(os.listdir(class_path)), 100) 32 | # print("{}\n".format(subImages)) 33 | f.write("{}\n".format(subImages)) 34 | subImageNet[iclass] = subImages 35 | for image in subImages: 36 | raw_path = os.path.join(ImageNet_train_path, iclass, image) 37 | new_ipath = os.path.join( 38 | data_path, subImageNet_name, iclass, image) 39 | os.system('cp {} {}'.format(raw_path, new_ipath)) 40 | 41 | sub_classes = sorted(subImageNet.keys()) 42 | with open(os.path.join(class_idx_txt_path, 'info.txt'), 'w') as f: 43 | class_idx = 0 44 | for key in sub_classes: 45 | images = sorted((subImageNet[key])) 46 | # print(len(images)) 47 | f.write("{}\n".format(key)) 48 | class_idx = class_idx + 1 -------------------------------------------------------------------------------- /AutoFormer_original/lib/subImageNet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import argparse 4 | random.seed(0) 5 | parser = argparse.ArgumentParser('Generate SubImageNet', add_help=False) 6 | parser.add_argument('--data-path', default='../data/imagenet', type=str, 7 | help='dataset path') 8 | args = parser.parse_args() 9 | 10 | data_path = args.data_path 11 | ImageNet_train_path = os.path.join(data_path, 'train') 12 | subImageNet_name = 'subImageNet' 13 | class_idx_txt_path = os.path.join(data_path, subImageNet_name) 14 | 15 | # train 16 | classes = sorted(os.listdir(ImageNet_train_path)) 17 | if not os.path.exists(os.path.join(data_path, subImageNet_name)): 18 | os.mkdir(os.path.join(data_path, subImageNet_name)) 19 | 20 | subImageNet = dict() 21 | with open(os.path.join(class_idx_txt_path, 'subimages_list.txt'), 'w') as f: 22 | subImageNet_class = classes 23 | for iclass in subImageNet_class: 24 | class_path = os.path.join(ImageNet_train_path, iclass) 25 | if not os.path.exists( 26 | os.path.join( 27 | data_path, 28 | subImageNet_name, 29 | iclass)): 30 | os.mkdir(os.path.join(data_path, subImageNet_name, iclass)) 31 | subImages = random.sample(sorted(os.listdir(class_path)), 100) 32 | # print("{}\n".format(subImages)) 33 | f.write("{}\n".format(subImages)) 34 | subImageNet[iclass] = subImages 35 | for image in subImages: 36 | raw_path = os.path.join(ImageNet_train_path, iclass, image) 37 | new_ipath = os.path.join( 38 | data_path, subImageNet_name, iclass, image) 39 | os.system('cp {} {}'.format(raw_path, new_ipath)) 40 | 41 | sub_classes = sorted(subImageNet.keys()) 42 | with open(os.path.join(class_idx_txt_path, 'info.txt'), 'w') as f: 43 | class_idx = 0 44 | for key in sub_classes: 45 | images = sorted((subImageNet[key])) 46 | # print(len(images)) 47 | f.write("{}\n".format(key)) 48 | class_idx = class_idx + 1 -------------------------------------------------------------------------------- /AutoFormer_original_greedy/lib/subImageNet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import argparse 4 | random.seed(0) 5 | parser = argparse.ArgumentParser('Generate SubImageNet', add_help=False) 6 | parser.add_argument('--data-path', default='../data/imagenet', type=str, 7 | help='dataset path') 8 | args = parser.parse_args() 9 | 10 | data_path = args.data_path 11 | ImageNet_train_path = os.path.join(data_path, 'train') 12 | subImageNet_name = 'subImageNet' 13 | class_idx_txt_path = os.path.join(data_path, subImageNet_name) 14 | 15 | # train 16 | classes = sorted(os.listdir(ImageNet_train_path)) 17 | if not os.path.exists(os.path.join(data_path, subImageNet_name)): 18 | os.mkdir(os.path.join(data_path, subImageNet_name)) 19 | 20 | subImageNet = dict() 21 | with open(os.path.join(class_idx_txt_path, 'subimages_list.txt'), 'w') as f: 22 | subImageNet_class = classes 23 | for iclass in subImageNet_class: 24 | class_path = os.path.join(ImageNet_train_path, iclass) 25 | if not os.path.exists( 26 | os.path.join( 27 | data_path, 28 | subImageNet_name, 29 | iclass)): 30 | os.mkdir(os.path.join(data_path, subImageNet_name, iclass)) 31 | subImages = random.sample(sorted(os.listdir(class_path)), 100) 32 | # print("{}\n".format(subImages)) 33 | f.write("{}\n".format(subImages)) 34 | subImageNet[iclass] = subImages 35 | for image in subImages: 36 | raw_path = os.path.join(ImageNet_train_path, iclass, image) 37 | new_ipath = os.path.join( 38 | data_path, subImageNet_name, iclass, image) 39 | os.system('cp {} {}'.format(raw_path, new_ipath)) 40 | 41 | sub_classes = sorted(subImageNet.keys()) 42 | with open(os.path.join(class_idx_txt_path, 'info.txt'), 'w') as f: 43 | class_idx = 0 44 | for key in sub_classes: 45 | images = sorted((subImageNet[key])) 46 | # print(len(images)) 47 | f.write("{}\n".format(key)) 48 | class_idx = class_idx + 1 -------------------------------------------------------------------------------- /AutoFormer_original_greedy/train_supernet_small.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # UTF-8 환경 변수 설정 (UnicodeEncodeError 방지) 4 | export PYTHONIOENCODING=utf-8 5 | export LC_ALL=C.UTF-8 6 | export LANG=C.UTF-8 7 | 8 | # python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train_small_sn.py --data-path '/data' --gp \ 9 | # --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-S.yaml --epochs 500 --warmup-epochs 20 \ 10 | # --resume '/OUTPUT_PATH/checkpoint_small_original_450.pth' --output /OUTPUT_PATH --batch-size 128 \ 11 | # --save_checkpoint_path 'checkpoint-sn-small-450ep-' --save_log_path './log/supernet_sn_small_450.log' --interval 1 12 | 13 | # python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train_small_sn.py --data-path '/data' --gp \ 14 | # --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-S.yaml --epochs 500 --warmup-epochs 20 \ 15 | # --resume '/OUTPUT_PATH/checkpoint-sn-small-450ep-23.pth' --output /OUTPUT_PATH --batch-size 128 \ 16 | # --save_checkpoint_path 'checkpoint-sn-small-450ep-ing-' --save_log_path './log/supernet_sn_small_450_ing.log' --interval 1 17 | 18 | # python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train_small_sn.py --data-path '/data' --gp \ 19 | # --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-S.yaml --epochs 500 --warmup-epochs 20 \ 20 | # --resume '/OUTPUT_PATH/checkpoint_small_original_450.pth' --output /OUTPUT_PATH --batch-size 128 \ 21 | # --save_checkpoint_path 'checkpoint-sn-small-450ep-droppath01-' --save_log_path './log/supernet_sn_small_450_droppath01.log' --interval 1 22 | 23 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 24 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-S.yaml --resume '/OUTPUT_PATH/checkpoint-sn-small-450ep-droppath01-25.pth' \ 25 | --min-param-limits 22 --param-limits 23 \ 26 | --log-file-path './log/search_sn-small-450ep-droppath01_6M.log' 27 | -------------------------------------------------------------------------------- /AutoFormer_original_greedy/config_prenas.yaml: -------------------------------------------------------------------------------- 1 | aa: rand-n3-m10-mstd0.5-inc1 #rand-m9-mstd0.5-inc1 #m9-n2-mstd0.5-inc1 (random augment) (autoaug x) 2 | amp: true 3 | batch_size: 128 4 | candfile: ./interval_cands/tiny.json 5 | cfg: ./experiments/supernet/tiny.yaml 6 | change_qkv: true 7 | clip_grad: null 8 | color_jitter: 0.4 9 | cooldown_epochs: 10 10 | cutmix: 1.0 #0.0 11 | cutmix_minmax: null 12 | data_path: /data 13 | data_set: IMNET 14 | decay_epochs: 30 15 | decay_rate: 0.1 16 | device: cuda 17 | dist_backend: nccl 18 | dist_eval: true 19 | dist_url: tcp://localhost:2042 20 | distributed: true 21 | drop: 0.0 22 | drop_block: null 23 | drop_path: 0.1 24 | epochs: 500 25 | eval: false 26 | eval_crops: 1 27 | gp: true 28 | gpu: 0 29 | group_by_depth: true 30 | group_by_dim: true 31 | inat_category: name 32 | input_size: 224 33 | lr: 0.0005 34 | lr_noise: null 35 | lr_noise_pct: 0.67 36 | lr_noise_std: 1.0 37 | lr_power: 1.0 38 | max_relative_position: 14 39 | min_lr: 1.0e-07 40 | mixup: 0.8 #0.0 41 | mixup_mode: elem #x 42 | mixup_prob: 1.0 #0.0 ??? 43 | mixup_switch_prob: 0.5 #0.0 44 | mode: super 45 | model: '' 46 | model_ema: false 47 | model_ema_decay: 0.99996 48 | model_ema_force_cpu: false 49 | momentum: 0.9 50 | no_abs_pos: false 51 | no_sandwich_base: false 52 | no_sandwich_top: false 53 | num_workers: 10 54 | opt: adamw 55 | opt_betas: null 56 | opt_eps: 1.0e-08 57 | output_dir: ./output/tiny/train/2024_07_23-19_48_51 58 | patch_size: 16 59 | patience_epochs: 10 60 | pin_mem: true 61 | platform: pai 62 | post_norm: false 63 | print2file: false 64 | rank: 0 65 | recount: 2 # 1 66 | relative_position: true 67 | remode: pixel 68 | repeated_aug: true 69 | reprob: 0.25 70 | resplit: false 71 | resume: output/tiny/train/2024_07_18-12_40_28/checkpoint-460.pth 72 | rpe_type: bias 73 | sandwich: 0 74 | scale_attn: false 75 | scale_embed: false 76 | scale_mlp: false 77 | sched: cosine 78 | seed: 0 79 | shuffle: false 80 | smoothing: 0.1 81 | start_epoch: 0 82 | switch_ln: false 83 | task: '' 84 | teacher_model: '' 85 | train_interpolation: bicubic 86 | warmup_epochs: 20 87 | warmup_lr: 1.0e-06 88 | weight_decay: 0.05 #0.02 89 | world_size: 8 90 | -------------------------------------------------------------------------------- /AutoFormer/top_k_parser.py: -------------------------------------------------------------------------------- 1 | import re 2 | import pickle 3 | 4 | # 로그 파일을 읽어서 top_k_paths 부분을 추출하는 함수 5 | def parse_log_file(log_file_path): 6 | # 정규 표현식으로 top_k_paths 추출 7 | top_k_pattern = re.compile(r"top_k_paths\s*:\s*(\[\(.*?\)\])") 8 | 9 | config_list = [] 10 | id = 0 11 | 12 | with open(log_file_path, 'r') as log_file: 13 | for line in log_file: 14 | match = top_k_pattern.search(line) 15 | if match: 16 | top_k_str = match.group(1) 17 | # eval을 사용하여 문자열을 실제 리스트로 변환 18 | top_k_paths = eval(top_k_str) 19 | 20 | for item in top_k_paths: 21 | loss = item[0] 22 | config = item[1] 23 | mlp_ratio = config['mlp_ratio'] 24 | num_heads = config['num_heads'] 25 | embed_dim = config['embed_dim'] 26 | layer_num = config['layer_num'] 27 | 28 | # 각 item의 정보 (loss, mlp_ratio, num_heads, embed_dim, layer_num) 추가 29 | config_list.append({ 30 | 'loss': loss, 31 | 'mlp_ratio': mlp_ratio, 32 | 'num_heads': num_heads, 33 | 'embed_dim': embed_dim, 34 | 'layer_num': layer_num, 35 | 'id': id 36 | }) 37 | id += 1 38 | 39 | return config_list 40 | 41 | # 파싱한 config 리스트를 pkl로 저장하는 함수 42 | def save_config_list_to_pkl(config_list, output_pkl_path): 43 | with open(output_pkl_path, 'wb') as f: 44 | pickle.dump(config_list, f) 45 | print(f"Config list saved to {output_pkl_path}") 46 | 47 | # 실행 부분 48 | if __name__ == "__main__": 49 | log_file_path = './greedyTAS/greedyTAS-epoch20-test/autoformer-greedyTAS(dss)-20epoch.log' # 로그 파일 경로 50 | output_pkl_path = './greedyTAS/greedyTAS-epoch20-test/autoformer-greedyTAS(dss)-20epoch.pkl' # 저장할 pkl 파일 경로 51 | 52 | config_list = parse_log_file(log_file_path) 53 | save_config_list_to_pkl(config_list, output_pkl_path) 54 | print(len(config_list)) 55 | print(config_list[100]) 56 | -------------------------------------------------------------------------------- /AutoFormer_original/model/module/embedding_super.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from model.utils import to_2tuple 5 | import numpy as np 6 | 7 | class PatchembedSuper(nn.Module): 8 | def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, scale=False): 9 | super(PatchembedSuper, self).__init__() 10 | 11 | img_size = to_2tuple(img_size) 12 | patch_size = to_2tuple(patch_size) 13 | num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0]) 14 | self.img_size = img_size 15 | self.patch_size = patch_size 16 | self.num_patches = num_patches 17 | self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) 18 | self.super_embed_dim = embed_dim 19 | self.scale = scale 20 | 21 | # sampled_ 22 | self.sample_embed_dim = None 23 | self.sampled_weight = None 24 | self.sampled_bias = None 25 | self.sampled_scale = None 26 | 27 | def set_sample_config(self, sample_embed_dim): 28 | self.sample_embed_dim = sample_embed_dim 29 | self.sampled_weight = self.proj.weight[:sample_embed_dim, ...] 30 | self.sampled_bias = self.proj.bias[:self.sample_embed_dim, ...] 31 | if self.scale: 32 | self.sampled_scale = self.super_embed_dim / sample_embed_dim 33 | def forward(self, x): 34 | B, C, H, W = x.shape 35 | assert H == self.img_size[0] and W == self.img_size[1], \ 36 | f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})." 37 | x = F.conv2d(x, self.sampled_weight, self.sampled_bias, stride=self.patch_size, padding=self.proj.padding, dilation=self.proj.dilation).flatten(2).transpose(1,2) 38 | if self.scale: 39 | return x * self.sampled_scale 40 | return x 41 | def calc_sampled_param_num(self): 42 | return self.sampled_weight.numel() + self.sampled_bias.numel() 43 | 44 | def get_complexity(self, sequence_length): 45 | total_flops = 0 46 | if self.sampled_bias is not None: 47 | total_flops += self.sampled_bias.size(0) 48 | total_flops += sequence_length * np.prod(self.sampled_weight.size()) 49 | return total_flops -------------------------------------------------------------------------------- /AutoFormer_original_greedy/model/module/embedding_super.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from model.utils import to_2tuple 5 | import numpy as np 6 | 7 | class PatchembedSuper(nn.Module): 8 | def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, scale=False): 9 | super(PatchembedSuper, self).__init__() 10 | 11 | img_size = to_2tuple(img_size) 12 | patch_size = to_2tuple(patch_size) 13 | num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0]) 14 | self.img_size = img_size 15 | self.patch_size = patch_size 16 | self.num_patches = num_patches 17 | self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) 18 | self.super_embed_dim = embed_dim 19 | self.scale = scale 20 | 21 | # sampled_ 22 | self.sample_embed_dim = None 23 | self.sampled_weight = None 24 | self.sampled_bias = None 25 | self.sampled_scale = None 26 | 27 | def set_sample_config(self, sample_embed_dim): 28 | self.sample_embed_dim = sample_embed_dim 29 | self.sampled_weight = self.proj.weight[:sample_embed_dim, ...] 30 | self.sampled_bias = self.proj.bias[:self.sample_embed_dim, ...] 31 | if self.scale: 32 | self.sampled_scale = self.super_embed_dim / sample_embed_dim 33 | def forward(self, x): 34 | B, C, H, W = x.shape 35 | assert H == self.img_size[0] and W == self.img_size[1], \ 36 | f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})." 37 | x = F.conv2d(x, self.sampled_weight, self.sampled_bias, stride=self.patch_size, padding=self.proj.padding, dilation=self.proj.dilation).flatten(2).transpose(1,2) 38 | if self.scale: 39 | return x * self.sampled_scale 40 | return x 41 | def calc_sampled_param_num(self): 42 | return self.sampled_weight.numel() + self.sampled_bias.numel() 43 | 44 | def get_complexity(self, sequence_length): 45 | total_flops = 0 46 | if self.sampled_bias is not None: 47 | total_flops += self.sampled_bias.size(0) 48 | total_flops += sequence_length * np.prod(self.sampled_weight.size()) 49 | return total_flops -------------------------------------------------------------------------------- /AutoFormer/model/module/layernorm_super.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | def uniform_element_selection(tensor, target_dim): 6 | """ 7 | Uniformly selects elements from the tensor along the specified dimension. 8 | 9 | Parameters: 10 | tensor (torch.Tensor): The input tensor. 11 | target_dim (int): The target dimension size. 12 | 13 | Returns: 14 | torch.Tensor: A tensor with the selected elements. 15 | """ 16 | original_dim = tensor.size(0) 17 | indices = torch.linspace(0, original_dim - 1, target_dim).long().to(tensor.device) 18 | return tensor[indices] 19 | 20 | class LayerNormSuper(torch.nn.LayerNorm): 21 | def __init__(self, super_embed_dim): 22 | super().__init__(super_embed_dim) 23 | 24 | # the largest embed dim 25 | self.super_embed_dim = super_embed_dim 26 | 27 | # the current sampled embed dim 28 | self.sample_embed_dim = None 29 | 30 | self.samples = {} 31 | self.profiling = False 32 | 33 | def profile(self, mode=True): 34 | self.profiling = mode 35 | 36 | def sample_parameters(self, resample=False): 37 | if self.profiling or resample: 38 | return self._sample_parameters() 39 | return self.samples 40 | 41 | def _sample_parameters(self): 42 | # self.samples['weight'] = self.weight[:self.sample_embed_dim] 43 | # self.samples['bias'] = self.bias[:self.sample_embed_dim] 44 | self.samples['weight'] = uniform_element_selection(self.weight, self.sample_embed_dim) 45 | self.samples['bias'] = uniform_element_selection(self.bias, self.sample_embed_dim) 46 | return self.samples 47 | 48 | def set_sample_config(self, sample_embed_dim): 49 | self.sample_embed_dim = sample_embed_dim 50 | self._sample_parameters() 51 | 52 | def forward(self, x): 53 | self.sample_parameters() 54 | return F.layer_norm(x, (self.sample_embed_dim,), weight=self.samples['weight'], bias=self.samples['bias'], eps=self.eps) 55 | 56 | def calc_sampled_param_num(self): 57 | assert 'weight' in self.samples.keys() 58 | assert 'bias' in self.samples.keys() 59 | return self.samples['weight'].numel() + self.samples['bias'].numel() 60 | 61 | def get_complexity(self, sequence_length): 62 | return sequence_length * self.sample_embed_dim 63 | -------------------------------------------------------------------------------- /AutoFormer/lib/samplers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.distributed as dist 3 | import math 4 | 5 | 6 | class RASampler(torch.utils.data.Sampler): 7 | """Sampler that restricts data loading to a subset of the dataset for distributed, 8 | with repeated augmentation. 9 | It ensures that different each augmented version of a sample will be visible to a 10 | different process (GPU) 11 | Heavily based on torch.utils.data.DistributedSampler 12 | """ 13 | 14 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 15 | if num_replicas is None: 16 | if not dist.is_available(): 17 | raise RuntimeError("Requires distributed package to be available") 18 | num_replicas = dist.get_world_size() 19 | if rank is None: 20 | if not dist.is_available(): 21 | raise RuntimeError("Requires distributed package to be available") 22 | rank = dist.get_rank() 23 | self.dataset = dataset 24 | self.num_replicas = num_replicas 25 | self.rank = rank 26 | self.epoch = 0 27 | self.num_samples = int(math.ceil(len(self.dataset) * 3.0 / self.num_replicas)) 28 | self.total_size = self.num_samples * self.num_replicas 29 | # self.num_selected_samples = int(math.ceil(len(self.dataset) / self.num_replicas)) 30 | self.num_selected_samples = int(math.floor(len(self.dataset) // 256 * 256 / self.num_replicas)) 31 | self.shuffle = shuffle 32 | 33 | def __iter__(self): 34 | # deterministically shuffle based on epoch 35 | g = torch.Generator() 36 | g.manual_seed(self.epoch) 37 | if self.shuffle: 38 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 39 | else: 40 | indices = list(range(len(self.dataset))) 41 | 42 | # add extra samples to make it evenly divisible 43 | indices = [ele for ele in indices for i in range(3)] 44 | indices += indices[:(self.total_size - len(indices))] 45 | assert len(indices) == self.total_size 46 | 47 | # subsample 48 | indices = indices[self.rank:self.total_size:self.num_replicas] 49 | 50 | assert len(indices) == self.num_samples 51 | return iter(indices[:self.num_selected_samples]) 52 | 53 | def __len__(self): 54 | return self.num_selected_samples 55 | 56 | def set_epoch(self, epoch): 57 | self.epoch = epoch 58 | -------------------------------------------------------------------------------- /AutoFormer_original/lib/samplers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.distributed as dist 3 | import math 4 | 5 | 6 | class RASampler(torch.utils.data.Sampler): 7 | """Sampler that restricts data loading to a subset of the dataset for distributed, 8 | with repeated augmentation. 9 | It ensures that different each augmented version of a sample will be visible to a 10 | different process (GPU) 11 | Heavily based on torch.utils.data.DistributedSampler 12 | """ 13 | 14 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 15 | if num_replicas is None: 16 | if not dist.is_available(): 17 | raise RuntimeError("Requires distributed package to be available") 18 | num_replicas = dist.get_world_size() 19 | if rank is None: 20 | if not dist.is_available(): 21 | raise RuntimeError("Requires distributed package to be available") 22 | rank = dist.get_rank() 23 | self.dataset = dataset 24 | self.num_replicas = num_replicas 25 | self.rank = rank 26 | self.epoch = 0 27 | self.num_samples = int(math.ceil(len(self.dataset) * 3.0 / self.num_replicas)) 28 | self.total_size = self.num_samples * self.num_replicas 29 | # self.num_selected_samples = int(math.ceil(len(self.dataset) / self.num_replicas)) 30 | self.num_selected_samples = int(math.floor(len(self.dataset) // 256 * 256 / self.num_replicas)) 31 | self.shuffle = shuffle 32 | 33 | def __iter__(self): 34 | # deterministically shuffle based on epoch 35 | g = torch.Generator() 36 | g.manual_seed(self.epoch) 37 | if self.shuffle: 38 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 39 | else: 40 | indices = list(range(len(self.dataset))) 41 | 42 | # add extra samples to make it evenly divisible 43 | indices = [ele for ele in indices for i in range(3)] 44 | indices += indices[:(self.total_size - len(indices))] 45 | assert len(indices) == self.total_size 46 | 47 | # subsample 48 | indices = indices[self.rank:self.total_size:self.num_replicas] 49 | 50 | assert len(indices) == self.num_samples 51 | return iter(indices[:self.num_selected_samples]) 52 | 53 | def __len__(self): 54 | return self.num_selected_samples 55 | 56 | def set_epoch(self, epoch): 57 | self.epoch = epoch 58 | -------------------------------------------------------------------------------- /AutoFormer_original_greedy/lib/samplers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.distributed as dist 3 | import math 4 | 5 | 6 | class RASampler(torch.utils.data.Sampler): 7 | """Sampler that restricts data loading to a subset of the dataset for distributed, 8 | with repeated augmentation. 9 | It ensures that different each augmented version of a sample will be visible to a 10 | different process (GPU) 11 | Heavily based on torch.utils.data.DistributedSampler 12 | """ 13 | 14 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 15 | if num_replicas is None: 16 | if not dist.is_available(): 17 | raise RuntimeError("Requires distributed package to be available") 18 | num_replicas = dist.get_world_size() 19 | if rank is None: 20 | if not dist.is_available(): 21 | raise RuntimeError("Requires distributed package to be available") 22 | rank = dist.get_rank() 23 | self.dataset = dataset 24 | self.num_replicas = num_replicas 25 | self.rank = rank 26 | self.epoch = 0 27 | self.num_samples = int(math.ceil(len(self.dataset) * 3.0 / self.num_replicas)) 28 | self.total_size = self.num_samples * self.num_replicas 29 | # self.num_selected_samples = int(math.ceil(len(self.dataset) / self.num_replicas)) 30 | self.num_selected_samples = int(math.floor(len(self.dataset) // 256 * 256 / self.num_replicas)) 31 | self.shuffle = shuffle 32 | 33 | def __iter__(self): 34 | # deterministically shuffle based on epoch 35 | g = torch.Generator() 36 | g.manual_seed(self.epoch) 37 | if self.shuffle: 38 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 39 | else: 40 | indices = list(range(len(self.dataset))) 41 | 42 | # add extra samples to make it evenly divisible 43 | indices = [ele for ele in indices for i in range(3)] 44 | indices += indices[:(self.total_size - len(indices))] 45 | assert len(indices) == self.total_size 46 | 47 | # subsample 48 | indices = indices[self.rank:self.total_size:self.num_replicas] 49 | 50 | assert len(indices) == self.num_samples 51 | return iter(indices[:self.num_selected_samples]) 52 | 53 | def __len__(self): 54 | return self.num_selected_samples 55 | 56 | def set_epoch(self, epoch): 57 | self.epoch = epoch 58 | -------------------------------------------------------------------------------- /AutoFormer/model/module/embedding_super.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from model.utils import to_2tuple 5 | import numpy as np 6 | 7 | def uniform_element_selection(tensor, target_dim): 8 | """ 9 | Uniformly selects elements from the tensor along the specified dimension. 10 | 11 | Parameters: 12 | tensor (torch.Tensor): The input tensor. 13 | target_dim (int): The target dimension size. 14 | 15 | Returns: 16 | torch.Tensor: A tensor with the selected elements. 17 | """ 18 | original_dim = tensor.size(0) 19 | indices = torch.linspace(0, original_dim - 1, target_dim).long().to(tensor.device) 20 | return tensor[indices] 21 | 22 | class PatchembedSuper(nn.Module): 23 | def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, scale=False): 24 | super(PatchembedSuper, self).__init__() 25 | 26 | img_size = to_2tuple(img_size) 27 | patch_size = to_2tuple(patch_size) 28 | num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0]) 29 | self.img_size = img_size 30 | self.patch_size = patch_size 31 | self.num_patches = num_patches 32 | self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) 33 | self.super_embed_dim = embed_dim 34 | self.scale = scale 35 | 36 | # sampled_ 37 | self.sample_embed_dim = None 38 | self.sampled_weight = None 39 | self.sampled_bias = None 40 | self.sampled_scale = None 41 | 42 | def set_sample_config(self, sample_embed_dim): 43 | self.sample_embed_dim = sample_embed_dim 44 | # self.sampled_weight = self.proj.weight[:sample_embed_dim, ...] 45 | # self.sampled_bias = self.proj.bias[:self.sample_embed_dim, ...] 46 | self.sampled_weight = uniform_element_selection(self.proj.weight, sample_embed_dim) 47 | self.sampled_bias = uniform_element_selection(self.proj.bias, sample_embed_dim) 48 | if self.scale: 49 | self.sampled_scale = self.super_embed_dim / sample_embed_dim 50 | 51 | def forward(self, x): 52 | B, C, H, W = x.shape 53 | assert H == self.img_size[0] and W == self.img_size[1], \ 54 | f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})." 55 | x = F.conv2d(x, self.sampled_weight, self.sampled_bias, stride=self.patch_size, padding=self.proj.padding, dilation=self.proj.dilation).flatten(2).transpose(1,2) 56 | if self.scale: 57 | return x * self.sampled_scale 58 | return x 59 | def calc_sampled_param_num(self): 60 | return self.sampled_weight.numel() + self.sampled_bias.numel() 61 | 62 | def get_complexity(self, sequence_length): 63 | total_flops = 0 64 | if self.sampled_bias is not None: 65 | total_flops += self.sampled_bias.size(0) 66 | total_flops += sequence_length * np.prod(self.sampled_weight.size()) 67 | return total_flops -------------------------------------------------------------------------------- /AutoFormer/lib/imagenet_withhold.py: -------------------------------------------------------------------------------- 1 | 2 | from PIL import Image 3 | import io 4 | 5 | import torch 6 | from torch.utils.data import Dataset 7 | import torchvision.transforms as transforms 8 | 9 | 10 | class ImageNet_Withhold(Dataset): 11 | def __init__(self, data_root, ann_file='', transform=None, train=True, task ='train'): 12 | super(ImageNet_Withhold, self).__init__() 13 | ann_file = ann_file + '/' + 'val_true.txt' 14 | train_split = (task == 'train' or task == 'val') 15 | self.data_root = data_root + '/'+ ('train' if train_split else 'val') 16 | 17 | self.data = [] 18 | self.nb_classes = 0 19 | folders = {} 20 | cnt = 0 21 | self.z = ZipReader() 22 | # if train: 23 | # for member in self.tarfile.getmembers(): 24 | # print(member) 25 | # self.tarfile = tarfile.open(self.data_root) 26 | 27 | f = open(ann_file) 28 | prefix = 'data/sdb/imagenet'+'/'+ ('train' if train_split else 'val') + '/' 29 | for line in f: 30 | tmp = line.strip().split('\t')[0] 31 | class_pic = tmp.split('/') 32 | class_tmp = class_pic[0] 33 | pic = class_pic[1] 34 | 35 | if class_tmp in folders: 36 | # print(self.tarfile.getmember(('train/' if train else 'val/') + tmp[0] + '.JPEG')) 37 | self.data.append((class_tmp + '.zip', prefix + tmp + '.JPEG', folders[class_tmp])) 38 | else: 39 | folders[class_tmp] = cnt 40 | cnt += 1 41 | self.data.append((class_tmp + '.zip', prefix + tmp + '.JPEG',folders[class_tmp])) 42 | 43 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 44 | std=[0.229, 0.224, 0.225]) 45 | if transform is not None: 46 | self.transforms = transform 47 | else: 48 | if train: 49 | self.transforms = transforms.Compose([ 50 | transforms.RandomSizedCrop(224), 51 | transforms.RandomHorizontalFlip(), 52 | transforms.ToTensor(), 53 | normalize, 54 | ]) 55 | else: 56 | self.transforms = transforms.Compose([ 57 | transforms.Scale(256), 58 | transforms.CenterCrop(224), 59 | transforms.ToTensor(), 60 | normalize, 61 | ]) 62 | 63 | 64 | self.nb_classes = cnt 65 | def __len__(self): 66 | return len(self.data) 67 | 68 | def __getitem__(self, idx): 69 | 70 | # print('extract_file', time.time()-start_time) 71 | iob = self.z.read(self.data_root + '/' + self.data[idx][0], self.data[idx][1]) 72 | iob = io.BytesIO(iob) 73 | img = Image.open(iob).convert('RGB') 74 | target = self.data[idx][2] 75 | if self.transforms is not None: 76 | img = self.transforms(img) 77 | # print('open', time.time()-start_time) 78 | return img, target 79 | -------------------------------------------------------------------------------- /AutoFormer_original/lib/imagenet_withhold.py: -------------------------------------------------------------------------------- 1 | 2 | from PIL import Image 3 | import io 4 | 5 | import torch 6 | from torch.utils.data import Dataset 7 | import torchvision.transforms as transforms 8 | 9 | 10 | class ImageNet_Withhold(Dataset): 11 | def __init__(self, data_root, ann_file='', transform=None, train=True, task ='train'): 12 | super(ImageNet_Withhold, self).__init__() 13 | ann_file = ann_file + '/' + 'val_true.txt' 14 | train_split = (task == 'train' or task == 'val') 15 | self.data_root = data_root + '/'+ ('train' if train_split else 'val') 16 | 17 | self.data = [] 18 | self.nb_classes = 0 19 | folders = {} 20 | cnt = 0 21 | self.z = ZipReader() 22 | # if train: 23 | # for member in self.tarfile.getmembers(): 24 | # print(member) 25 | # self.tarfile = tarfile.open(self.data_root) 26 | 27 | f = open(ann_file) 28 | prefix = 'data/sdb/imagenet'+'/'+ ('train' if train_split else 'val') + '/' 29 | for line in f: 30 | tmp = line.strip().split('\t')[0] 31 | class_pic = tmp.split('/') 32 | class_tmp = class_pic[0] 33 | pic = class_pic[1] 34 | 35 | if class_tmp in folders: 36 | # print(self.tarfile.getmember(('train/' if train else 'val/') + tmp[0] + '.JPEG')) 37 | self.data.append((class_tmp + '.zip', prefix + tmp + '.JPEG', folders[class_tmp])) 38 | else: 39 | folders[class_tmp] = cnt 40 | cnt += 1 41 | self.data.append((class_tmp + '.zip', prefix + tmp + '.JPEG',folders[class_tmp])) 42 | 43 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 44 | std=[0.229, 0.224, 0.225]) 45 | if transform is not None: 46 | self.transforms = transform 47 | else: 48 | if train: 49 | self.transforms = transforms.Compose([ 50 | transforms.RandomSizedCrop(224), 51 | transforms.RandomHorizontalFlip(), 52 | transforms.ToTensor(), 53 | normalize, 54 | ]) 55 | else: 56 | self.transforms = transforms.Compose([ 57 | transforms.Scale(256), 58 | transforms.CenterCrop(224), 59 | transforms.ToTensor(), 60 | normalize, 61 | ]) 62 | 63 | 64 | self.nb_classes = cnt 65 | def __len__(self): 66 | return len(self.data) 67 | 68 | def __getitem__(self, idx): 69 | 70 | # print('extract_file', time.time()-start_time) 71 | iob = self.z.read(self.data_root + '/' + self.data[idx][0], self.data[idx][1]) 72 | iob = io.BytesIO(iob) 73 | img = Image.open(iob).convert('RGB') 74 | target = self.data[idx][2] 75 | if self.transforms is not None: 76 | img = self.transforms(img) 77 | # print('open', time.time()-start_time) 78 | return img, target 79 | -------------------------------------------------------------------------------- /AutoFormer_original/model/module/Linear_super.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | 6 | class LinearSuper(nn.Linear): 7 | def __init__(self, super_in_dim, super_out_dim, bias=True, uniform_=None, non_linear='linear', scale=False): 8 | super().__init__(super_in_dim, super_out_dim, bias=bias) 9 | 10 | # super_in_dim and super_out_dim indicate the largest network! 11 | self.super_in_dim = super_in_dim 12 | self.super_out_dim = super_out_dim 13 | 14 | # input_dim and output_dim indicate the current sampled size 15 | self.sample_in_dim = None 16 | self.sample_out_dim = None 17 | 18 | self.samples = {} 19 | 20 | self.scale = scale 21 | self._reset_parameters(bias, uniform_, non_linear) 22 | self.profiling = False 23 | 24 | def profile(self, mode=True): 25 | self.profiling = mode 26 | 27 | def sample_parameters(self, resample=False): 28 | if self.profiling or resample: 29 | return self._sample_parameters() 30 | return self.samples 31 | 32 | def _reset_parameters(self, bias, uniform_, non_linear): 33 | nn.init.xavier_uniform_(self.weight) if uniform_ is None else uniform_( 34 | self.weight, non_linear=non_linear) 35 | if bias: 36 | nn.init.constant_(self.bias, 0.) 37 | 38 | def set_sample_config(self, sample_in_dim, sample_out_dim): 39 | self.sample_in_dim = sample_in_dim 40 | self.sample_out_dim = sample_out_dim 41 | 42 | self._sample_parameters() 43 | 44 | def _sample_parameters(self): 45 | self.samples['weight'] = sample_weight(self.weight, self.sample_in_dim, self.sample_out_dim) 46 | self.samples['bias'] = self.bias 47 | self.sample_scale = self.super_out_dim/self.sample_out_dim 48 | if self.bias is not None: 49 | self.samples['bias'] = sample_bias(self.bias, self.sample_out_dim) 50 | return self.samples 51 | 52 | def forward(self, x): 53 | self.sample_parameters() 54 | return F.linear(x, self.samples['weight'], self.samples['bias']) * (self.sample_scale if self.scale else 1) 55 | 56 | def calc_sampled_param_num(self): 57 | assert 'weight' in self.samples.keys() 58 | weight_numel = self.samples['weight'].numel() 59 | 60 | if self.samples['bias'] is not None: 61 | bias_numel = self.samples['bias'].numel() 62 | else: 63 | bias_numel = 0 64 | 65 | return weight_numel + bias_numel 66 | def get_complexity(self, sequence_length): 67 | total_flops = 0 68 | total_flops += sequence_length * np.prod(self.samples['weight'].size()) 69 | return total_flops 70 | 71 | def sample_weight(weight, sample_in_dim, sample_out_dim): 72 | sample_weight = weight[:, :sample_in_dim] 73 | sample_weight = sample_weight[:sample_out_dim, :] 74 | 75 | return sample_weight 76 | 77 | 78 | def sample_bias(bias, sample_out_dim): 79 | sample_bias = bias[:sample_out_dim] 80 | 81 | return sample_bias 82 | -------------------------------------------------------------------------------- /AutoFormer_original_greedy/lib/imagenet_withhold.py: -------------------------------------------------------------------------------- 1 | 2 | from PIL import Image 3 | import io 4 | 5 | import torch 6 | from torch.utils.data import Dataset 7 | import torchvision.transforms as transforms 8 | 9 | 10 | class ImageNet_Withhold(Dataset): 11 | def __init__(self, data_root, ann_file='', transform=None, train=True, task ='train'): 12 | super(ImageNet_Withhold, self).__init__() 13 | ann_file = ann_file + '/' + 'val_true.txt' 14 | train_split = (task == 'train' or task == 'val') 15 | self.data_root = data_root + '/'+ ('train' if train_split else 'val') 16 | 17 | self.data = [] 18 | self.nb_classes = 0 19 | folders = {} 20 | cnt = 0 21 | self.z = ZipReader() 22 | # if train: 23 | # for member in self.tarfile.getmembers(): 24 | # print(member) 25 | # self.tarfile = tarfile.open(self.data_root) 26 | 27 | f = open(ann_file) 28 | prefix = 'data/sdb/imagenet'+'/'+ ('train' if train_split else 'val') + '/' 29 | for line in f: 30 | tmp = line.strip().split('\t')[0] 31 | class_pic = tmp.split('/') 32 | class_tmp = class_pic[0] 33 | pic = class_pic[1] 34 | 35 | if class_tmp in folders: 36 | # print(self.tarfile.getmember(('train/' if train else 'val/') + tmp[0] + '.JPEG')) 37 | self.data.append((class_tmp + '.zip', prefix + tmp + '.JPEG', folders[class_tmp])) 38 | else: 39 | folders[class_tmp] = cnt 40 | cnt += 1 41 | self.data.append((class_tmp + '.zip', prefix + tmp + '.JPEG',folders[class_tmp])) 42 | 43 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 44 | std=[0.229, 0.224, 0.225]) 45 | if transform is not None: 46 | self.transforms = transform 47 | else: 48 | if train: 49 | self.transforms = transforms.Compose([ 50 | transforms.RandomSizedCrop(224), 51 | transforms.RandomHorizontalFlip(), 52 | transforms.ToTensor(), 53 | normalize, 54 | ]) 55 | else: 56 | self.transforms = transforms.Compose([ 57 | transforms.Scale(256), 58 | transforms.CenterCrop(224), 59 | transforms.ToTensor(), 60 | normalize, 61 | ]) 62 | 63 | 64 | self.nb_classes = cnt 65 | def __len__(self): 66 | return len(self.data) 67 | 68 | def __getitem__(self, idx): 69 | 70 | # print('extract_file', time.time()-start_time) 71 | iob = self.z.read(self.data_root + '/' + self.data[idx][0], self.data[idx][1]) 72 | iob = io.BytesIO(iob) 73 | img = Image.open(iob).convert('RGB') 74 | target = self.data[idx][2] 75 | if self.transforms is not None: 76 | img = self.transforms(img) 77 | # print('open', time.time()-start_time) 78 | return img, target 79 | -------------------------------------------------------------------------------- /AutoFormer_original_greedy/model/module/Linear_super.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | 6 | class LinearSuper(nn.Linear): 7 | def __init__(self, super_in_dim, super_out_dim, bias=True, uniform_=None, non_linear='linear', scale=False): 8 | super().__init__(super_in_dim, super_out_dim, bias=bias) 9 | 10 | # super_in_dim and super_out_dim indicate the largest network! 11 | self.super_in_dim = super_in_dim 12 | self.super_out_dim = super_out_dim 13 | 14 | # input_dim and output_dim indicate the current sampled size 15 | self.sample_in_dim = None 16 | self.sample_out_dim = None 17 | 18 | self.samples = {} 19 | 20 | self.scale = scale 21 | self._reset_parameters(bias, uniform_, non_linear) 22 | self.profiling = False 23 | 24 | def profile(self, mode=True): 25 | self.profiling = mode 26 | 27 | def sample_parameters(self, resample=False): 28 | if self.profiling or resample: 29 | return self._sample_parameters() 30 | return self.samples 31 | 32 | def _reset_parameters(self, bias, uniform_, non_linear): 33 | nn.init.xavier_uniform_(self.weight) if uniform_ is None else uniform_( 34 | self.weight, non_linear=non_linear) 35 | if bias: 36 | nn.init.constant_(self.bias, 0.) 37 | 38 | def set_sample_config(self, sample_in_dim, sample_out_dim): 39 | self.sample_in_dim = sample_in_dim 40 | self.sample_out_dim = sample_out_dim 41 | 42 | self._sample_parameters() 43 | 44 | def _sample_parameters(self): 45 | self.samples['weight'] = sample_weight(self.weight, self.sample_in_dim, self.sample_out_dim) 46 | self.samples['bias'] = self.bias 47 | self.sample_scale = self.super_out_dim/self.sample_out_dim 48 | if self.bias is not None: 49 | self.samples['bias'] = sample_bias(self.bias, self.sample_out_dim) 50 | return self.samples 51 | 52 | def forward(self, x): 53 | self.sample_parameters() 54 | return F.linear(x, self.samples['weight'], self.samples['bias']) * (self.sample_scale if self.scale else 1) 55 | 56 | def calc_sampled_param_num(self): 57 | assert 'weight' in self.samples.keys() 58 | weight_numel = self.samples['weight'].numel() 59 | 60 | if self.samples['bias'] is not None: 61 | bias_numel = self.samples['bias'].numel() 62 | else: 63 | bias_numel = 0 64 | 65 | return weight_numel + bias_numel 66 | def get_complexity(self, sequence_length): 67 | total_flops = 0 68 | total_flops += sequence_length * np.prod(self.samples['weight'].size()) 69 | return total_flops 70 | 71 | def sample_weight(weight, sample_in_dim, sample_out_dim): 72 | sample_weight = weight[:, :sample_in_dim] 73 | sample_weight = sample_weight[:sample_out_dim, :] 74 | 75 | return sample_weight 76 | 77 | 78 | def sample_bias(bias, sample_out_dim): 79 | sample_bias = bias[:sample_out_dim] 80 | 81 | return sample_bias 82 | -------------------------------------------------------------------------------- /AutoFormer_original/model/module/qkv_super.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | 6 | 7 | class qkv_super(nn.Linear): 8 | def __init__(self, super_in_dim, super_out_dim, bias=True, uniform_=None, non_linear='linear', scale=False): 9 | super().__init__(super_in_dim, super_out_dim, bias=bias) 10 | 11 | # super_in_dim and super_out_dim indicate the largest network! 12 | self.super_in_dim = super_in_dim 13 | self.super_out_dim = super_out_dim 14 | 15 | # input_dim and output_dim indicate the current sampled size 16 | self.sample_in_dim = None 17 | self.sample_out_dim = None 18 | 19 | self.samples = {} 20 | 21 | self.scale = scale 22 | # self._reset_parameters(bias, uniform_, non_linear) 23 | self.profiling = False 24 | 25 | def profile(self, mode=True): 26 | self.profiling = mode 27 | 28 | def sample_parameters(self, resample=False): 29 | if self.profiling or resample: 30 | return self._sample_parameters() 31 | return self.samples 32 | 33 | def _reset_parameters(self, bias, uniform_, non_linear): 34 | nn.init.xavier_uniform_(self.weight) if uniform_ is None else uniform_( 35 | self.weight, non_linear=non_linear) 36 | if bias: 37 | nn.init.constant_(self.bias, 0.) 38 | 39 | def set_sample_config(self, sample_in_dim, sample_out_dim): 40 | self.sample_in_dim = sample_in_dim 41 | self.sample_out_dim = sample_out_dim 42 | 43 | self._sample_parameters() 44 | 45 | def _sample_parameters(self): 46 | self.samples['weight'] = sample_weight(self.weight, self.sample_in_dim, self.sample_out_dim) 47 | self.samples['bias'] = self.bias 48 | self.sample_scale = self.super_out_dim/self.sample_out_dim 49 | if self.bias is not None: 50 | self.samples['bias'] = sample_bias(self.bias, self.sample_out_dim) 51 | return self.samples 52 | 53 | def forward(self, x): 54 | self.sample_parameters() 55 | return F.linear(x, self.samples['weight'], self.samples['bias']) * (self.sample_scale if self.scale else 1) 56 | 57 | def calc_sampled_param_num(self): 58 | assert 'weight' in self.samples.keys() 59 | weight_numel = self.samples['weight'].numel() 60 | 61 | if self.samples['bias'] is not None: 62 | bias_numel = self.samples['bias'].numel() 63 | else: 64 | bias_numel = 0 65 | 66 | return weight_numel + bias_numel 67 | def get_complexity(self, sequence_length): 68 | total_flops = 0 69 | total_flops += sequence_length * np.prod(self.samples['weight'].size()) 70 | return total_flops 71 | 72 | def sample_weight(weight, sample_in_dim, sample_out_dim): 73 | 74 | sample_weight = weight[:, :sample_in_dim] 75 | sample_weight = torch.cat([sample_weight[i:sample_out_dim:3, :] for i in range(3)], dim =0) 76 | 77 | return sample_weight 78 | 79 | 80 | def sample_bias(bias, sample_out_dim): 81 | sample_bias = bias[:sample_out_dim] 82 | 83 | return sample_bias 84 | -------------------------------------------------------------------------------- /AutoFormer_original_greedy/model/module/qkv_super.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | 6 | 7 | class qkv_super(nn.Linear): 8 | def __init__(self, super_in_dim, super_out_dim, bias=True, uniform_=None, non_linear='linear', scale=False): 9 | super().__init__(super_in_dim, super_out_dim, bias=bias) 10 | 11 | # super_in_dim and super_out_dim indicate the largest network! 12 | self.super_in_dim = super_in_dim 13 | self.super_out_dim = super_out_dim 14 | 15 | # input_dim and output_dim indicate the current sampled size 16 | self.sample_in_dim = None 17 | self.sample_out_dim = None 18 | 19 | self.samples = {} 20 | 21 | self.scale = scale 22 | # self._reset_parameters(bias, uniform_, non_linear) 23 | self.profiling = False 24 | 25 | def profile(self, mode=True): 26 | self.profiling = mode 27 | 28 | def sample_parameters(self, resample=False): 29 | if self.profiling or resample: 30 | return self._sample_parameters() 31 | return self.samples 32 | 33 | def _reset_parameters(self, bias, uniform_, non_linear): 34 | nn.init.xavier_uniform_(self.weight) if uniform_ is None else uniform_( 35 | self.weight, non_linear=non_linear) 36 | if bias: 37 | nn.init.constant_(self.bias, 0.) 38 | 39 | def set_sample_config(self, sample_in_dim, sample_out_dim): 40 | self.sample_in_dim = sample_in_dim 41 | self.sample_out_dim = sample_out_dim 42 | 43 | self._sample_parameters() 44 | 45 | def _sample_parameters(self): 46 | self.samples['weight'] = sample_weight(self.weight, self.sample_in_dim, self.sample_out_dim) 47 | self.samples['bias'] = self.bias 48 | self.sample_scale = self.super_out_dim/self.sample_out_dim 49 | if self.bias is not None: 50 | self.samples['bias'] = sample_bias(self.bias, self.sample_out_dim) 51 | return self.samples 52 | 53 | def forward(self, x): 54 | self.sample_parameters() 55 | return F.linear(x, self.samples['weight'], self.samples['bias']) * (self.sample_scale if self.scale else 1) 56 | 57 | def calc_sampled_param_num(self): 58 | assert 'weight' in self.samples.keys() 59 | weight_numel = self.samples['weight'].numel() 60 | 61 | if self.samples['bias'] is not None: 62 | bias_numel = self.samples['bias'].numel() 63 | else: 64 | bias_numel = 0 65 | 66 | return weight_numel + bias_numel 67 | def get_complexity(self, sequence_length): 68 | total_flops = 0 69 | total_flops += sequence_length * np.prod(self.samples['weight'].size()) 70 | return total_flops 71 | 72 | def sample_weight(weight, sample_in_dim, sample_out_dim): 73 | 74 | sample_weight = weight[:, :sample_in_dim] 75 | sample_weight = torch.cat([sample_weight[i:sample_out_dim:3, :] for i in range(3)], dim =0) 76 | 77 | return sample_weight 78 | 79 | 80 | def sample_bias(bias, sample_out_dim): 81 | sample_bias = bias[:sample_out_dim] 82 | 83 | return sample_bias 84 | -------------------------------------------------------------------------------- /AutoFormer/train_supernet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path '/data' --gp \ 4 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --epochs 500 --warmup-epochs 20 \ 5 | --log-file-path './greedyTAS/greedyTAS-epoch100-top-k(full).log' \ 6 | --resume './greedyTAS/checkpoint-4.pth' --output /OUTPUT_PATH --batch-size 128 7 | 8 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path '/data' --gp \ 9 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --epochs 500 --warmup-epochs 20 \ 10 | --log-file-path './greedyTAS/greedyTAS-epoch200-top-k(full).log' \ 11 | --resume './greedyTAS/checkpoint-9.pth' --output /OUTPUT_PATH --batch-size 128 12 | 13 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path '/data' --gp \ 14 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --epochs 500 --warmup-epochs 20 \ 15 | --log-file-path './greedyTAS/greedyTAS-epoch300-top-k(full).log' \ 16 | --resume './greedyTAS/checkpoint-14.pth' --output /OUTPUT_PATH --batch-size 128 17 | 18 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path '/data' --gp \ 19 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --epochs 500 --warmup-epochs 20 \ 20 | --log-file-path './greedyTAS/greedyTAS-epoch400-top-k(full).log' \ 21 | --resume './greedyTAS/checkpoint-19.pth' --output /OUTPUT_PATH --batch-size 128 22 | 23 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path '/data' --gp \ 24 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --epochs 500 --warmup-epochs 20 \ 25 | --log-file-path './greedyTAS/greedyTAS-epoch500-top-k(full).log' \ 26 | --resume './greedyTAS/checkpoint-24.pth' --output /OUTPUT_PATH --batch-size 128 27 | 28 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path '/data' --gp \ 29 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --epochs 500 --warmup-epochs 20 \ 30 | --log-file-path './greedyTAS/greedyTAS-epoch0-top-k(full).log' \ 31 | --output /OUTPUT_PATH --batch-size 128 32 | 33 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path '/data' --gp \ 34 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --epochs 500 --warmup-epochs 20 \ 35 | --log-file-path './greedyTAS/greedyTAS-epoch20-top-k(full).log' \ 36 | --resume './greedyTAS/checkpoint-0.pth' --output /OUTPUT_PATH --batch-size 128 37 | 38 | 39 | # --resume './greedyTAS/greedyTAS-epoch100-test/checkpoint-4.pth' 40 | # --resume './greedyTAS/checkpoint-09121607.pth' 41 | # --resume './experiments/supernet/autoformer_t_500ep.pth' 42 | # --resume './greedyTAS/greedyTAS-epoch20-test/checkpoint-0.pth' 43 | # --resume './greedyTAS/greedyTAS-epoch59/checkpoint.pth' 44 | # --resume './greedyTAS/checkpoint-24.pth' -------------------------------------------------------------------------------- /AutoFormer/training_free/indicators/snip.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | import copy 7 | import types 8 | 9 | from . import indicator 10 | from ..p_utils import get_layer_metric_array 11 | 12 | 13 | def snip_forward_conv2d(self, x): 14 | return F.conv2d(x, self.sampled_weight * self.weight_mask, self.sampled_bias, 15 | stride=self.patch_size, padding=self.proj.padding, dilation=self.proj.dilation).flatten(2).transpose(1,2) 16 | 17 | def snip_forward_linear(self, x): 18 | return F.linear(x, self.samples['weight'] * self.weight_mask, self.samples['bias']) 19 | 20 | 21 | def snip_forward_linear_(self, x): 22 | return F.linear(x, self.weight * self.weight_mask, self.bias) 23 | 24 | @indicator('snip', bn=True, mode='param') 25 | def compute_snip_per_weight(net, inputs, targets, mode, loss_fn, split_data=1): 26 | for layer in net.modules(): 27 | if layer._get_name() == 'PatchembedSuper': 28 | layer.weight_mask = nn.Parameter(torch.ones_like(layer.sampled_weight)) 29 | layer.sampled_weight = layer.sampled_weight.detach() 30 | if isinstance(layer, nn.Linear) and layer.out_features != 1000 and layer.samples: 31 | layer.weight_mask = nn.Parameter(torch.ones_like(layer.samples['weight'])) 32 | layer.samples['weight'] = layer.samples['weight'].detach() 33 | if isinstance(layer, nn.Linear) and layer.out_features == 1000: 34 | layer.weight_mask = nn.Parameter(torch.ones_like(layer.samples['weight'])) 35 | layer.samples['weight'] = layer.samples['weight'].detach() 36 | 37 | # Override the forward methods: 38 | if layer._get_name() == 'PatchembedSuper': 39 | layer.forward = types.MethodType(snip_forward_conv2d, layer) 40 | 41 | if isinstance(layer, nn.Linear) and layer.out_features != 1000 and layer.samples: 42 | layer.forward = types.MethodType(snip_forward_linear, layer) 43 | if isinstance(layer, nn.Linear) and layer.out_features == 1000: 44 | layer.forward = types.MethodType(snip_forward_linear, layer) 45 | 46 | # Compute gradients (but don't apply them) 47 | net.zero_grad() 48 | N = inputs.shape[0] 49 | for sp in range(split_data): 50 | st=sp*N//split_data 51 | en=(sp+1)*N//split_data 52 | 53 | outputs = net.forward(inputs[st:en]) 54 | loss = loss_fn(outputs, targets[st:en]) 55 | loss.backward() 56 | 57 | # select the gradients that we want to use for search/prune 58 | def snip(layer): 59 | if layer._get_name() == 'PatchembedSuper': 60 | if layer.weight_mask.grad is not None: 61 | return torch.abs(layer.weight_mask.grad) 62 | else: 63 | return torch.zeros_like(layer.weight) 64 | if isinstance(layer, nn.Linear) and layer.out_features != 1000 and layer.samples: 65 | if layer.weight_mask.grad is not None: 66 | return torch.abs(layer.weight_mask.grad) 67 | else: 68 | return torch.zeros_like(layer.weight) 69 | if isinstance(layer, nn.Linear) and layer.out_features == 1000: 70 | if layer.weight_mask.grad is not None: 71 | return torch.abs(layer.weight_mask.grad) 72 | else: 73 | return torch.zeros_like(layer.weight) 74 | 75 | grads_abs = get_layer_metric_array(net, snip, mode) 76 | 77 | return grads_abs 78 | -------------------------------------------------------------------------------- /AutoFormer/training_free/compute_indicators.py: -------------------------------------------------------------------------------- 1 | from .p_utils import * 2 | from . import indicators 3 | 4 | import types 5 | import copy 6 | 7 | def no_op(self,x): 8 | return x 9 | 10 | def copynet(self, bn): 11 | net = copy.deepcopy(self) 12 | if bn==False: 13 | for l in net.modules(): 14 | if isinstance(l,nn.BatchNorm2d) or isinstance(l,nn.BatchNorm1d) : 15 | l.forward = types.MethodType(no_op, l) 16 | return net 17 | 18 | def find_indicators_arrays(net_orig, trainloader, dataload_info, device, indicator_names=None, loss_fn=F.cross_entropy): 19 | if indicator_names is None: 20 | indicator_names = indicators.available_indicators 21 | 22 | dataload, num_imgs_or_batches, num_classes = dataload_info 23 | 24 | net_orig.to(device) 25 | if not hasattr(net_orig,'get_copy'): 26 | net_orig.get_copy = types.MethodType(copynet, net_orig) 27 | 28 | #move to cpu to free up mem 29 | torch.cuda.empty_cache() 30 | net_orig = net_orig.cpu() 31 | torch.cuda.empty_cache() 32 | 33 | #given 1 minibatch of data 34 | if dataload == 'random': 35 | inputs, targets = get_some_data(trainloader, num_batches=num_imgs_or_batches, device=device) 36 | elif dataload == 'grasp': 37 | inputs, targets = get_some_data_grasp(trainloader, num_classes, samples_per_class=num_imgs_or_batches, device=device) 38 | else: 39 | raise NotImplementedError(f'dataload {dataload} is not supported') 40 | 41 | done, ds = False, 10 42 | indicator_values = {} 43 | 44 | while not done: 45 | try: 46 | for indicator_name in indicator_names: 47 | if indicator_name not in indicator_values: 48 | if indicator_name == 'NASWOT' or indicator_name=='te_nas': 49 | val = indicators.calc_indicator(indicator_name, net_orig, device) 50 | else: 51 | val = indicators.calc_indicator(indicator_name, net_orig, device, inputs, targets, loss_fn=loss_fn, split_data=ds) 52 | indicator_values[indicator_name] = val 53 | 54 | done = True 55 | except RuntimeError as e: 56 | if 'out of memory' in str(e): 57 | done=True 58 | if ds == inputs.shape[0]//2: 59 | raise ValueError(f'Can\'t split data anymore, but still unable to run. Something is wrong') 60 | ds += 1 61 | while inputs.shape[0] % ds != 0: 62 | ds += 1 63 | torch.cuda.empty_cache() 64 | print(f'Caught CUDA OOM, retrying with data split into {ds} parts') 65 | else: 66 | raise e 67 | 68 | net_orig = net_orig.to(device).train() 69 | return indicator_values 70 | 71 | def find_indicators(net_orig, 72 | dataloader, 73 | dataload_info, 74 | device, 75 | loss_fn=F.cross_entropy, 76 | indicator_names=None, 77 | indicators_arr=None): 78 | 79 | 80 | def sum_arr(arr): 81 | sum = 0. 82 | for i in range(len(arr)): 83 | sum += torch.sum(arr[i]) 84 | return sum.item() 85 | 86 | if indicators_arr is None: 87 | indicators_arr = find_indicators_arrays(net_orig, dataloader, dataload_info, device, loss_fn=loss_fn, indicator_names=indicator_names) 88 | 89 | indicators = {} 90 | for k,v in indicators_arr.items(): 91 | if k == 'NASWOT' or k=='te_nas': 92 | indicators[k] = v 93 | else: 94 | indicators[k] = sum_arr(v) 95 | 96 | return indicators 97 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # One-Shot-TAS 2 | 3 | ## How to Start 4 | 5 | ``` 6 | apt install python3.8-venv 7 | 8 | cd ./AutoFormer 9 | 10 | python3 -m venv {your_venv_name} 11 | 12 | source {your_venv_name}/bin/activate 13 | 14 | pip install -r requirements.txt 15 | 16 | 17 | # if 'Pillow' error is occured... 18 | 19 | sudo apt-get install python3-dev 20 | 21 | pip install wheel 22 | 23 | apt-get update 24 | 25 | apt-get install build-essential 26 | 27 | apt-get install libjpeg-dev 28 | 29 | apt-get install libpng-dev libtiff-dev 30 | 31 | pip install pillow==6.1.0 32 | 33 | `pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html` 34 | 35 | ``` 36 | 37 | 38 | ## Data Preparation 39 | You need to first download the [ImageNet-2012](http://www.image-net.org/) to the folder `./data/imagenet` and move the validation set to the subfolder `./data/imagenet/val`. To move the validation set, you cloud use the following script: 40 | 41 | The directory structure is the standard layout as following. 42 | ``` 43 | /path/to/imagenet/ 44 | train/ 45 | class1/ 46 | img1.jpeg 47 | class2/ 48 | img2.jpeg 49 | val/ 50 | class1/ 51 | img3.jpeg 52 | class/2 53 | img4.jpeg 54 | ``` 55 | 56 | ## Quick Start 57 | We provide *Supernet Train, Search, Test* code of AutoFormer as follows. 58 | 59 | ### Supernet Train 60 | 61 | To train the supernet-T/S/B, we provided the corresponding supernet configuration files in `/experiments/supernet/`. For example, to train the supernet-B, you can run the following command. The default output path is `./`, you can specify the path with argument `--output`. 62 | 63 | ```buildoutcfg 64 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path /PATH/TO/IMAGENT --gp \ 65 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-B.yaml --epochs 500 --warmup-epochs 20 \ 66 | --output /OUTPUT_PATH --batch-size 128 67 | ``` 68 | 69 | ### Search 70 | We run our evolution search on part of the ImageNet training dataset and use the validation set of ImageNet as the test set for fair comparison. To generate the subImagenet in `/PATH/TO/IMAGENET`, you could simply run: 71 | ```buildoutcfg 72 | python ./lib/subImageNet.py --data-path /PATH/TO/IMAGENT 73 | ``` 74 | 75 | 76 | After obtaining the subImageNet and training of the supernet. We could perform the evolution search using below command. Please remember to config the specific constraint in this evolution search using `--min-param-limits` and `--param-limits`: 77 | ```buildoutcfg 78 | python -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path /PATH/TO/IMAGENT --gp \ 79 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-B.yaml --resume /PATH/TO/CHECKPOINT \ 80 | --min-param-limits YOUR/CONFIG --param-limits YOUR/CONFIG --data-set EVO_IMNET 81 | ``` 82 | 83 | ### Test 84 | To test our trained models, you need to put the downloaded model in `/PATH/TO/CHECKPOINT`. After that you could use the following command to test the model (Please change your config file and model checkpoint according to different models. Here we use the AutoFormer-B as an example). 85 | ```buildoutcfg 86 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path /PATH/TO/IMAGENT --gp \ 87 | --change_qk --relative_position --mode retrain --dist-eval --cfg ./experiments/subnet/AutoFormer-B.yaml --resume /PATH/TO/CHECKPOINT --eval 88 | ``` 89 | 90 | ## Acknowledgements 91 | 92 | The codes are inspired by [Autoformer](https://github.com/microsoft/Cream/tree/main/AutoFormer), [tf-tas](https://github.com/decemberzhou/TF_TAS). 93 | 94 | -------------------------------------------------------------------------------- /AutoFormer_original_greedy/train_supernet_only_supernet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # UTF-8 환경 변수 설정 (UnicodeEncodeError 방지) 4 | export PYTHONIOENCODING=utf-8 5 | export LC_ALL=C.UTF-8 6 | export LANG=C.UTF-8 7 | 8 | # python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train_only_supernet.py --data-path '/data' --gp \ 9 | # --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --epochs 500 --warmup-epochs 20 \ 10 | # --output /OUTPUT_PATH --batch-size 128 \ 11 | # --save_checkpoint_path 'checkpoint-tiny-only-supernet-maximum240-' --save_log_path './log/supernet_tiny-only-supernet-maximum240.log' --interval 1 12 | 13 | # python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution_only_supernet.py --data-path '/data' --gp \ 14 | # --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-tiny-only-supernet-maximum240-21.pth' \ 15 | # --min-param-limits 5 --param-limits 13 \ 16 | # --log-file-path './log/search_tiny-only-supernet240-minimum_pop1050.log' 17 | 18 | # python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution_only_supernet.py --data-path '/data' --gp \ 19 | # --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-tiny-only-supernet-minimum-21.pth' \ 20 | # --min-param-limits 5 --param-limits 13 \ 21 | # --log-file-path './log/search_tiny-only-supernet192-minimum_pop1050.log' 22 | 23 | # --min-param-limits 5 --param-limits 6 \ 24 | 25 | 26 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution_only_supernet.py --data-path '/data' --gp \ 27 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-tiny-only-supernet-minimum-21.pth' \ 28 | --min-param-limits 6 --param-limits 7 \ 29 | --log-file-path './log/search_tiny-only-supernet192-minimum_pop1050_7M.log' 30 | 31 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution_only_supernet.py --data-path '/data' --gp \ 32 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-tiny-only-supernet-minimum-21.pth' \ 33 | --min-param-limits 7 --param-limits 8 \ 34 | --log-file-path './log/search_tiny-only-supernet192-minimum_pop1050_8M.log' 35 | 36 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution_only_supernet.py --data-path '/data' --gp \ 37 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-tiny-only-supernet-minimum-21.pth' \ 38 | --min-param-limits 8 --param-limits 9 \ 39 | --log-file-path './log/search_tiny-only-supernet192-minimum_pop1050_9M.log' 40 | 41 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution_only_supernet.py --data-path '/data' --gp \ 42 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-tiny-only-supernet-minimum-21.pth' \ 43 | --min-param-limits 9 --param-limits 10 \ 44 | --log-file-path './log/search_tiny-only-supernet192-minimum_pop1050_10M.log' 45 | 46 | # python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train_only_supernet.py --data-path '/data' --gp \ 47 | # --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --epochs 500 --warmup-epochs 20 \ 48 | # --output /OUTPUT_PATH --batch-size 128 \ 49 | # --save_checkpoint_path 'checkpoint-tiny-only-supernet-minimum-' --save_log_path './log/supernet_tiny-only-supernet-minimum.log' --interval 1 50 | 51 | # python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution_only_supernet.py --data-path '/data' --gp \ 52 | # --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-tiny-only-supernet-minimum-21.pth' \ 53 | # --min-param-limits 5 --param-limits 6 \ 54 | # --log-file-path './log/search_tiny-only-supernet-minimum_6M.log' 55 | -------------------------------------------------------------------------------- /AutoFormer/model/module/Linear_super.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | 6 | def uniform_element_selection(tensor, target_dim, dim): 7 | """ 8 | Uniformly selects elements from the tensor along the specified dimension. 9 | 10 | Parameters: 11 | tensor (torch.Tensor): The input tensor. 12 | target_dim (int): The target dimension size. 13 | dim (int): The dimension along which to select elements. 14 | 15 | Returns: 16 | torch.Tensor: A tensor with the selected elements. 17 | """ 18 | original_dim = tensor.size(dim) 19 | indices = torch.linspace(0, original_dim - 1, target_dim).long().to(tensor.device) 20 | return tensor.index_select(dim, indices) 21 | 22 | class LinearSuper(nn.Linear): 23 | def __init__(self, super_in_dim, super_out_dim, bias=True, uniform_=None, non_linear='linear', scale=False): 24 | super().__init__(super_in_dim, super_out_dim, bias=bias) 25 | 26 | # super_in_dim and super_out_dim indicate the largest network! 27 | self.super_in_dim = super_in_dim 28 | self.super_out_dim = super_out_dim 29 | 30 | # input_dim and output_dim indicate the current sampled size 31 | self.sample_in_dim = None 32 | self.sample_out_dim = None 33 | 34 | self.samples = {} 35 | 36 | self.scale = scale 37 | self._reset_parameters(bias, uniform_, non_linear) 38 | self.profiling = False 39 | 40 | def profile(self, mode=True): 41 | self.profiling = mode 42 | 43 | def sample_parameters(self, resample=False): 44 | if self.profiling or resample: 45 | return self._sample_parameters() 46 | return self.samples 47 | 48 | def _reset_parameters(self, bias, uniform_, non_linear): 49 | nn.init.xavier_uniform_(self.weight) if uniform_ is None else uniform_( 50 | self.weight, non_linear=non_linear) 51 | if bias: 52 | nn.init.constant_(self.bias, 0.) 53 | 54 | def set_sample_config(self, sample_in_dim, sample_out_dim): 55 | self.sample_in_dim = sample_in_dim 56 | self.sample_out_dim = sample_out_dim 57 | 58 | self._sample_parameters() 59 | 60 | def _sample_parameters(self): 61 | self.samples['weight'] = sample_weight(self.weight, self.sample_in_dim, self.sample_out_dim) 62 | self.samples['bias'] = self.bias 63 | self.sample_scale = self.super_out_dim/self.sample_out_dim 64 | if self.bias is not None: 65 | self.samples['bias'] = sample_bias(self.bias, self.sample_out_dim) 66 | return self.samples 67 | 68 | def forward(self, x): 69 | self.sample_parameters() 70 | return F.linear(x, self.samples['weight'], self.samples['bias']) * (self.sample_scale if self.scale else 1) 71 | 72 | def calc_sampled_param_num(self): 73 | assert 'weight' in self.samples.keys() 74 | weight_numel = self.samples['weight'].numel() 75 | 76 | if self.samples['bias'] is not None: 77 | bias_numel = self.samples['bias'].numel() 78 | else: 79 | bias_numel = 0 80 | 81 | return weight_numel + bias_numel 82 | def get_complexity(self, sequence_length): 83 | total_flops = 0 84 | total_flops += sequence_length * np.prod(self.samples['weight'].size()) 85 | return total_flops 86 | 87 | def sample_weight(weight, sample_in_dim, sample_out_dim): 88 | # sample_weight = weight[:, :sample_in_dim] 89 | # sample_weight = sample_weight[:sample_out_dim, :] 90 | sample_weight = uniform_element_selection(weight, sample_in_dim, dim=1) 91 | sample_weight = uniform_element_selection(sample_weight, sample_out_dim, dim=0) 92 | 93 | return sample_weight 94 | 95 | 96 | def sample_bias(bias, sample_out_dim): 97 | # sample_bias = bias[:sample_out_dim] 98 | sample_bias = uniform_element_selection(bias, sample_out_dim, dim=0) 99 | 100 | return sample_bias 101 | -------------------------------------------------------------------------------- /AutoFormer/model/module/qkv_super.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | 6 | def uniform_element_selection(tensor, target_dim, dim): 7 | """ 8 | Uniformly selects elements from the tensor along the specified dimension. 9 | 10 | Parameters: 11 | tensor (torch.Tensor): The input tensor. 12 | target_dim (int): The target dimension size. 13 | dim (int): The dimension along which to select elements. 14 | 15 | Returns: 16 | torch.Tensor: A tensor with the selected elements. 17 | """ 18 | original_dim = tensor.size(dim) 19 | indices = torch.linspace(0, original_dim - 1, target_dim).long().to(tensor.device) 20 | return tensor.index_select(dim, indices) 21 | 22 | class qkv_super(nn.Linear): 23 | def __init__(self, super_in_dim, super_out_dim, bias=True, uniform_=None, non_linear='linear', scale=False): 24 | super().__init__(super_in_dim, super_out_dim, bias=bias) 25 | 26 | # super_in_dim and super_out_dim indicate the largest network! 27 | self.super_in_dim = super_in_dim 28 | self.super_out_dim = super_out_dim 29 | 30 | # input_dim and output_dim indicate the current sampled size 31 | self.sample_in_dim = None 32 | self.sample_out_dim = None 33 | 34 | self.samples = {} 35 | 36 | self.scale = scale 37 | # self._reset_parameters(bias, uniform_, non_linear) 38 | self.profiling = False 39 | 40 | def profile(self, mode=True): 41 | self.profiling = mode 42 | 43 | def sample_parameters(self, resample=False): 44 | if self.profiling or resample: 45 | return self._sample_parameters() 46 | return self.samples 47 | 48 | def _reset_parameters(self, bias, uniform_, non_linear): 49 | nn.init.xavier_uniform_(self.weight) if uniform_ is None else uniform_( 50 | self.weight, non_linear=non_linear) 51 | if bias: 52 | nn.init.constant_(self.bias, 0.) 53 | 54 | def set_sample_config(self, sample_in_dim, sample_out_dim): 55 | self.sample_in_dim = sample_in_dim 56 | self.sample_out_dim = sample_out_dim 57 | 58 | self._sample_parameters() 59 | 60 | def _sample_parameters(self): 61 | self.samples['weight'] = sample_weight(self.weight, self.sample_in_dim, self.sample_out_dim) 62 | self.samples['bias'] = self.bias 63 | self.sample_scale = self.super_out_dim/self.sample_out_dim 64 | if self.bias is not None: 65 | self.samples['bias'] = sample_bias(self.bias, self.sample_out_dim) 66 | return self.samples 67 | 68 | def forward(self, x): 69 | self.sample_parameters() 70 | return F.linear(x, self.samples['weight'], self.samples['bias']) * (self.sample_scale if self.scale else 1) 71 | 72 | def calc_sampled_param_num(self): 73 | assert 'weight' in self.samples.keys() 74 | weight_numel = self.samples['weight'].numel() 75 | 76 | if self.samples['bias'] is not None: 77 | bias_numel = self.samples['bias'].numel() 78 | else: 79 | bias_numel = 0 80 | 81 | return weight_numel + bias_numel 82 | def get_complexity(self, sequence_length): 83 | total_flops = 0 84 | total_flops += sequence_length * np.prod(self.samples['weight'].size()) 85 | return total_flops 86 | 87 | # def sample_weight(weight, sample_in_dim, sample_out_dim): 88 | # sample_weight = uniform_element_selection(weight, sample_in_dim, dim=1) 89 | # sample_weight = uniform_element_selection(sample_weight, sample_out_dim, dim=0) 90 | # return sample_weight 91 | 92 | # def sample_bias(bias, sample_out_dim): 93 | # sample_bias = uniform_element_selection(bias, sample_out_dim, dim=0) 94 | # return sample_bias 95 | 96 | 97 | def sample_weight(weight, sample_in_dim, sample_out_dim): 98 | 99 | sample_weight = weight[:, :sample_in_dim] 100 | sample_weight = torch.cat([sample_weight[i:sample_out_dim:3, :] for i in range(3)], dim =0) 101 | sample_weight.requires_grad_(weight.requires_grad) # requires_grad 속성 유지 102 | return sample_weight 103 | 104 | 105 | def sample_bias(bias, sample_out_dim): 106 | sample_bias = bias[:sample_out_dim] 107 | sample_bias.requires_grad_(bias.requires_grad) # requires_grad 속성 유지 108 | return sample_bias 109 | -------------------------------------------------------------------------------- /AutoFormer/training_free/indicators/NASWOT.py: -------------------------------------------------------------------------------- 1 | import os, sys, time 2 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 3 | import torch 4 | from torch import nn 5 | import numpy as np 6 | from . import indicator 7 | 8 | def network_weight_gaussian_init(net: nn.Module): 9 | with torch.no_grad(): 10 | for m in net.modules(): 11 | if isinstance(m, nn.Conv2d): 12 | nn.init.normal_(m.weight) 13 | if hasattr(m, 'bias') and m.bias is not None: 14 | nn.init.zeros_(m.bias) 15 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 16 | nn.init.ones_(m.weight) 17 | nn.init.zeros_(m.bias) 18 | elif isinstance(m, nn.Linear): 19 | nn.init.normal_(m.weight) 20 | if hasattr(m, 'bias') and m.bias is not None: 21 | nn.init.zeros_(m.bias) 22 | else: 23 | continue 24 | 25 | return net 26 | 27 | def logdet(K): 28 | s, ld = np.linalg.slogdet(K) 29 | return ld 30 | 31 | def get_batch_jacobian(net, x): 32 | net.zero_grad() 33 | x.requires_grad_(True) 34 | y = net(x) 35 | y.backward(torch.ones_like(y)) 36 | jacob = x.grad.detach() 37 | # return jacob, target.detach(), y.detach() 38 | return jacob, y.detach() 39 | 40 | @indicator('NASWOT', bn=False, mode='param') 41 | def compute_nas_score(model, device, resolution=224, batch_size=64): 42 | gpu=0 43 | if gpu is not None: 44 | torch.cuda.set_device(gpu) 45 | model = model.cuda(gpu) 46 | 47 | network_weight_gaussian_init(model) 48 | input = torch.randn(size=[batch_size, 3, resolution, resolution]) 49 | if gpu is not None: 50 | input = input.cuda(gpu) 51 | 52 | model.K = np.zeros((batch_size, batch_size)) 53 | 54 | def counting_forward_hook(module, inp, out): 55 | try: 56 | if not module.visited_backwards: 57 | return 58 | if isinstance(inp, tuple): 59 | inp = inp[0] 60 | inp = inp.view(inp.size(0), -1) 61 | x = (inp > 0).float() 62 | K = x @ x.t() 63 | K2 = (1. - x) @ (1. - x.t()) 64 | model.K = model.K + K.cpu().numpy() + K2.cpu().numpy() 65 | except Exception as err: 66 | print('---- error on model : ') 67 | print(model) 68 | raise err 69 | 70 | 71 | def counting_backward_hook(module, inp, out): 72 | module.visited_backwards = True 73 | 74 | for name, module in model.named_modules(): 75 | # if 'ReLU' in str(type(module)): 76 | if isinstance(module, torch.nn.GELU): 77 | # hooks[name] = module.register_forward_hook(counting_hook) 78 | module.visited_backwards = True 79 | module.register_forward_hook(counting_forward_hook) 80 | module.register_backward_hook(counting_backward_hook) 81 | 82 | x = input 83 | jacobs, y = get_batch_jacobian(model, x) 84 | 85 | score = logdet(model.K) 86 | 87 | return float(score) 88 | 89 | 90 | 91 | def parse_cmd_options(argv): 92 | parser = argparse.ArgumentParser() 93 | parser.add_argument('--batch_size', type=int, default=16, help='number of instances in one mini-batch.') 94 | parser.add_argument('--input_image_size', type=int, default=None, 95 | help='resolution of input image, usually 32 for CIFAR and 224 for ImageNet.') 96 | parser.add_argument('--repeat_times', type=int, default=32) 97 | parser.add_argument('--gpu', type=int, default=None) 98 | module_opt, _ = parser.parse_known_args(argv) 99 | return module_opt 100 | 101 | if __name__ == "__main__": 102 | opt = global_utils.parse_cmd_options(sys.argv) 103 | args = parse_cmd_options(sys.argv) 104 | the_model = ModelLoader.get_model(opt, sys.argv) 105 | if args.gpu is not None: 106 | the_model = the_model.cuda(args.gpu) 107 | 108 | 109 | start_timer = time.time() 110 | 111 | for repeat_count in range(args.repeat_times): 112 | the_score = compute_nas_score(gpu=args.gpu, model=the_model, 113 | resolution=args.input_image_size, batch_size=args.batch_size) 114 | 115 | time_cost = (time.time() - start_timer) / args.repeat_times 116 | 117 | print(f'NASWOT={the_score:.4g}, time cost={time_cost:.4g} second(s)') 118 | -------------------------------------------------------------------------------- /AutoFormer/model/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | import warnings 4 | from itertools import repeat 5 | from torch._six import container_abcs 6 | import torch.nn as nn 7 | 8 | def _no_grad_trunc_normal_(tensor, mean, std, a, b): 9 | # Cut & paste from PyTorch official master until it's in a few official releases - RW 10 | # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf 11 | def norm_cdf(x): 12 | # Computes standard normal cumulative distribution function 13 | return (1. + math.erf(x / math.sqrt(2.))) / 2. 14 | 15 | if (mean < a - 2 * std) or (mean > b + 2 * std): 16 | warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. " 17 | "The distribution of values may be incorrect.", 18 | stacklevel=2) 19 | 20 | with torch.no_grad(): 21 | # Values are generated by using a truncated uniform distribution and 22 | # then using the inverse CDF for the normal distribution. 23 | # Get upper and lower cdf values 24 | l = norm_cdf((a - mean) / std) 25 | u = norm_cdf((b - mean) / std) 26 | 27 | # Uniformly fill tensor with values from [l, u], then translate to 28 | # [2l-1, 2u-1]. 29 | tensor.uniform_(2 * l - 1, 2 * u - 1) 30 | 31 | # Use inverse cdf transform for normal distribution to get truncated 32 | # standard normal 33 | tensor.erfinv_() 34 | 35 | # Transform to proper mean, std 36 | tensor.mul_(std * math.sqrt(2.)) 37 | tensor.add_(mean) 38 | 39 | # Clamp to ensure it's in the proper range 40 | tensor.clamp_(min=a, max=b) 41 | return tensor 42 | 43 | 44 | def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.): 45 | # type: (Tensor, float, float, float, float) -> Tensor 46 | r"""Fills the input Tensor with values drawn from a truncated 47 | normal distribution. The values are effectively drawn from the 48 | normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` 49 | with values outside :math:`[a, b]` redrawn until they are within 50 | the bounds. The method used for generating the random values works 51 | best when :math:`a \leq \text{mean} \leq b`. 52 | Args: 53 | tensor: an n-dimensional `torch.Tensor` 54 | mean: the mean of the normal distribution 55 | std: the standard deviation of the normal distribution 56 | a: the minimum cutoff value 57 | b: the maximum cutoff value 58 | Examples: 59 | >>> w = torch.empty(3, 5) 60 | >>> nn.init.trunc_normal_(w) 61 | """ 62 | return _no_grad_trunc_normal_(tensor, mean, std, a, b) 63 | 64 | def _ntuple(n): 65 | def parse(x): 66 | if isinstance(x, container_abcs.Iterable): 67 | return x 68 | return tuple(repeat(x, n)) 69 | return parse 70 | 71 | def drop_path(x, drop_prob: float = 0., training: bool = False): 72 | """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). 73 | 74 | This is the same as the DropConnect impl I created for EfficientNet, etc networks, however, 75 | the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... 76 | See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for 77 | changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 78 | 'survival rate' as the argument. 79 | 80 | """ 81 | if drop_prob == 0. or not training: 82 | return x 83 | keep_prob = 1 - drop_prob 84 | shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets 85 | random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) 86 | random_tensor.floor_() # binarize 87 | output = x.div(keep_prob) * random_tensor 88 | return output 89 | 90 | 91 | class DropPath(nn.Module): 92 | """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). 93 | """ 94 | def __init__(self, drop_prob=None): 95 | super(DropPath, self).__init__() 96 | self.drop_prob = drop_prob 97 | 98 | def forward(self, x): 99 | return drop_path(x, self.drop_prob, self.training) 100 | 101 | 102 | to_1tuple = _ntuple(1) 103 | to_2tuple = _ntuple(2) 104 | to_3tuple = _ntuple(3) 105 | to_4tuple = _ntuple(4) 106 | to_ntuple = _ntuple -------------------------------------------------------------------------------- /AutoFormer_original/model/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | import warnings 4 | from itertools import repeat 5 | from torch._six import container_abcs 6 | import torch.nn as nn 7 | 8 | def _no_grad_trunc_normal_(tensor, mean, std, a, b): 9 | # Cut & paste from PyTorch official master until it's in a few official releases - RW 10 | # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf 11 | def norm_cdf(x): 12 | # Computes standard normal cumulative distribution function 13 | return (1. + math.erf(x / math.sqrt(2.))) / 2. 14 | 15 | if (mean < a - 2 * std) or (mean > b + 2 * std): 16 | warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. " 17 | "The distribution of values may be incorrect.", 18 | stacklevel=2) 19 | 20 | with torch.no_grad(): 21 | # Values are generated by using a truncated uniform distribution and 22 | # then using the inverse CDF for the normal distribution. 23 | # Get upper and lower cdf values 24 | l = norm_cdf((a - mean) / std) 25 | u = norm_cdf((b - mean) / std) 26 | 27 | # Uniformly fill tensor with values from [l, u], then translate to 28 | # [2l-1, 2u-1]. 29 | tensor.uniform_(2 * l - 1, 2 * u - 1) 30 | 31 | # Use inverse cdf transform for normal distribution to get truncated 32 | # standard normal 33 | tensor.erfinv_() 34 | 35 | # Transform to proper mean, std 36 | tensor.mul_(std * math.sqrt(2.)) 37 | tensor.add_(mean) 38 | 39 | # Clamp to ensure it's in the proper range 40 | tensor.clamp_(min=a, max=b) 41 | return tensor 42 | 43 | 44 | def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.): 45 | # type: (Tensor, float, float, float, float) -> Tensor 46 | r"""Fills the input Tensor with values drawn from a truncated 47 | normal distribution. The values are effectively drawn from the 48 | normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` 49 | with values outside :math:`[a, b]` redrawn until they are within 50 | the bounds. The method used for generating the random values works 51 | best when :math:`a \leq \text{mean} \leq b`. 52 | Args: 53 | tensor: an n-dimensional `torch.Tensor` 54 | mean: the mean of the normal distribution 55 | std: the standard deviation of the normal distribution 56 | a: the minimum cutoff value 57 | b: the maximum cutoff value 58 | Examples: 59 | >>> w = torch.empty(3, 5) 60 | >>> nn.init.trunc_normal_(w) 61 | """ 62 | return _no_grad_trunc_normal_(tensor, mean, std, a, b) 63 | 64 | def _ntuple(n): 65 | def parse(x): 66 | if isinstance(x, container_abcs.Iterable): 67 | return x 68 | return tuple(repeat(x, n)) 69 | return parse 70 | 71 | def drop_path(x, drop_prob: float = 0., training: bool = False): 72 | """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). 73 | 74 | This is the same as the DropConnect impl I created for EfficientNet, etc networks, however, 75 | the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... 76 | See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for 77 | changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 78 | 'survival rate' as the argument. 79 | 80 | """ 81 | if drop_prob == 0. or not training: 82 | return x 83 | keep_prob = 1 - drop_prob 84 | shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets 85 | random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) 86 | random_tensor.floor_() # binarize 87 | output = x.div(keep_prob) * random_tensor 88 | return output 89 | 90 | 91 | class DropPath(nn.Module): 92 | """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). 93 | """ 94 | def __init__(self, drop_prob=None): 95 | super(DropPath, self).__init__() 96 | self.drop_prob = drop_prob 97 | 98 | def forward(self, x): 99 | return drop_path(x, self.drop_prob, self.training) 100 | 101 | 102 | to_1tuple = _ntuple(1) 103 | to_2tuple = _ntuple(2) 104 | to_3tuple = _ntuple(3) 105 | to_4tuple = _ntuple(4) 106 | to_ntuple = _ntuple -------------------------------------------------------------------------------- /AutoFormer_original_greedy/model/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | import warnings 4 | from itertools import repeat 5 | from torch._six import container_abcs 6 | import torch.nn as nn 7 | 8 | def _no_grad_trunc_normal_(tensor, mean, std, a, b): 9 | # Cut & paste from PyTorch official master until it's in a few official releases - RW 10 | # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf 11 | def norm_cdf(x): 12 | # Computes standard normal cumulative distribution function 13 | return (1. + math.erf(x / math.sqrt(2.))) / 2. 14 | 15 | if (mean < a - 2 * std) or (mean > b + 2 * std): 16 | warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. " 17 | "The distribution of values may be incorrect.", 18 | stacklevel=2) 19 | 20 | with torch.no_grad(): 21 | # Values are generated by using a truncated uniform distribution and 22 | # then using the inverse CDF for the normal distribution. 23 | # Get upper and lower cdf values 24 | l = norm_cdf((a - mean) / std) 25 | u = norm_cdf((b - mean) / std) 26 | 27 | # Uniformly fill tensor with values from [l, u], then translate to 28 | # [2l-1, 2u-1]. 29 | tensor.uniform_(2 * l - 1, 2 * u - 1) 30 | 31 | # Use inverse cdf transform for normal distribution to get truncated 32 | # standard normal 33 | tensor.erfinv_() 34 | 35 | # Transform to proper mean, std 36 | tensor.mul_(std * math.sqrt(2.)) 37 | tensor.add_(mean) 38 | 39 | # Clamp to ensure it's in the proper range 40 | tensor.clamp_(min=a, max=b) 41 | return tensor 42 | 43 | 44 | def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.): 45 | # type: (Tensor, float, float, float, float) -> Tensor 46 | r"""Fills the input Tensor with values drawn from a truncated 47 | normal distribution. The values are effectively drawn from the 48 | normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` 49 | with values outside :math:`[a, b]` redrawn until they are within 50 | the bounds. The method used for generating the random values works 51 | best when :math:`a \leq \text{mean} \leq b`. 52 | Args: 53 | tensor: an n-dimensional `torch.Tensor` 54 | mean: the mean of the normal distribution 55 | std: the standard deviation of the normal distribution 56 | a: the minimum cutoff value 57 | b: the maximum cutoff value 58 | Examples: 59 | >>> w = torch.empty(3, 5) 60 | >>> nn.init.trunc_normal_(w) 61 | """ 62 | return _no_grad_trunc_normal_(tensor, mean, std, a, b) 63 | 64 | def _ntuple(n): 65 | def parse(x): 66 | if isinstance(x, container_abcs.Iterable): 67 | return x 68 | return tuple(repeat(x, n)) 69 | return parse 70 | 71 | def drop_path(x, drop_prob: float = 0., training: bool = False): 72 | """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). 73 | 74 | This is the same as the DropConnect impl I created for EfficientNet, etc networks, however, 75 | the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... 76 | See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for 77 | changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 78 | 'survival rate' as the argument. 79 | 80 | """ 81 | if drop_prob == 0. or not training: 82 | return x 83 | keep_prob = 1 - drop_prob 84 | shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets 85 | random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) 86 | random_tensor.floor_() # binarize 87 | output = x.div(keep_prob) * random_tensor 88 | return output 89 | 90 | 91 | class DropPath(nn.Module): 92 | """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). 93 | """ 94 | def __init__(self, drop_prob=None): 95 | super(DropPath, self).__init__() 96 | self.drop_prob = drop_prob 97 | 98 | def forward(self, x): 99 | return drop_path(x, self.drop_prob, self.training) 100 | 101 | 102 | to_1tuple = _ntuple(1) 103 | to_2tuple = _ntuple(2) 104 | to_3tuple = _ntuple(3) 105 | to_4tuple = _ntuple(4) 106 | to_ntuple = _ntuple -------------------------------------------------------------------------------- /AutoFormer_original_greedy/evolution_search copy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 첫 번째 작업 실행 4 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 5 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-original-25.pth' \ 6 | --min-param-limits 5 --param-limits 6 \ 7 | --log-file-path './log/search_original_tiny_6M.log' 8 | 9 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 10 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-25.pth' \ 11 | --min-param-limits 5 --param-limits 6 \ 12 | --log-file-path './log/search_sn_tiny_6M.log' 13 | 14 | # 첫 번째 작업 실행 15 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 16 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-original-25.pth' \ 17 | --min-param-limits 6 --param-limits 7 \ 18 | --log-file-path './log/search_original_tiny_7M.log' 19 | 20 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 21 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-25.pth' \ 22 | --min-param-limits 6 --param-limits 7 \ 23 | --log-file-path './log/search_sn_tiny_7M.log' 24 | 25 | # 첫 번째 작업 실행 26 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 27 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-original-25.pth' \ 28 | --min-param-limits 7 --param-limits 8 \ 29 | --log-file-path './log/search_original_tiny_8M.log' 30 | 31 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 32 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-25.pth' \ 33 | --min-param-limits 7 --param-limits 8 \ 34 | --log-file-path './log/search_sn_tiny_8M.log' 35 | 36 | # 첫 번째 작업 실행 37 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 38 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-original-25.pth' \ 39 | --min-param-limits 8 --param-limits 9 \ 40 | --log-file-path './log/search_original_tiny_9M.log' 41 | 42 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 43 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-25.pth' \ 44 | --min-param-limits 8 --param-limits 9 \ 45 | --log-file-path './log/search_sn_tiny_9M.log' 46 | 47 | # 첫 번째 작업 실행 48 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 49 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-original-25.pth' \ 50 | --min-param-limits 9 --param-limits 10 \ 51 | --log-file-path './log/search_original_tiny_10M.log' 52 | 53 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 54 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-25.pth' \ 55 | --min-param-limits 9 --param-limits 10 \ 56 | --log-file-path './log/search_sn_tiny_10M.log' 57 | 58 | 59 | 60 | # # 첫 번째 작업이 성공적으로 완료되면 두 번째 작업 실행 61 | # if [ $? -eq 0 ]; then 62 | # python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 63 | # --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume './experiments/supernet/checkpoint-25.pth' \ 64 | # --min-param-limits 1 --param-limits 100 --config-list-path './greedyTAS/greedyTAS-epoch20-test/autoformer-greedyTAS(dss)-20epoch.pkl' \ 65 | # --log-file-path './greedyTAS/greedyTAS-epoch20-test/autoformer-greedyTAS(dss)-20epoch-subnet.log' 66 | # else 67 | # echo "첫 번째 작업이 실패했습니다. 두 번째 작업을 실행하지 않습니다." 68 | # fi 69 | # # --data-set EVO_IMNET 70 | 71 | 72 | # #!/bin/bash 73 | # python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 74 | # --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume './experiments/supernet/checkpoint-25.pth' \ 75 | # --min-param-limits 1 --param-limits 100 76 | # # --data-set EVO_IMNET 77 | 78 | 79 | -------------------------------------------------------------------------------- /AutoFormer_original_greedy/evolution_search.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 첫 번째 작업 실행 4 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 5 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-not-original-400-25.pth' \ 6 | --min-param-limits 5 --param-limits 6 \ 7 | --log-file-path './log/search_sn_not_original_400_6M.log' 8 | 9 | # 첫 번째 작업 실행 10 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 11 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-not-original-400-25.pth' \ 12 | --min-param-limits 6 --param-limits 7 \ 13 | --log-file-path './log/search_sn_not_original_400_7M.log' 14 | 15 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 16 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-400-bottom-25.pth' \ 17 | --min-param-limits 5 --param-limits 6 \ 18 | --log-file-path './log/search_sn_400_bottom_6M.log' 19 | 20 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 21 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-400-bottom-25.pth' \ 22 | --min-param-limits 6 --param-limits 7 \ 23 | --log-file-path './log/search_sn_400_bottom_7M.log' 24 | 25 | # 첫 번째 작업 실행 26 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 27 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-not-original-400-25.pth' \ 28 | --min-param-limits 7 --param-limits 8 \ 29 | --log-file-path './log/search_sn_not_original_400_8M.log' 30 | 31 | # 첫 번째 작업 실행 32 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 33 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-not-original-400-25.pth' \ 34 | --min-param-limits 8 --param-limits 9 \ 35 | --log-file-path './log/search_sn__not_original_400_9M.log' 36 | 37 | 38 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 39 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-400-bottom-25.pth' \ 40 | --min-param-limits 7 --param-limits 8 \ 41 | --log-file-path './log/search_sn_400_bottom_8M.log' 42 | 43 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 44 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-400-bottom-25.pth' \ 45 | --min-param-limits 8 --param-limits 9 \ 46 | --log-file-path './log/search_sn_400_bottom_9M.log' 47 | # 첫 번째 작업 실행 48 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 49 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-not-original-400-25.pth' \ 50 | --min-param-limits 9 --param-limits 10 \ 51 | --log-file-path './log/search_sn__not_original_400_10M.log' 52 | 53 | python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 54 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume '/OUTPUT_PATH/checkpoint-sn-400-bottom-25.pth' \ 55 | --min-param-limits 9 --param-limits 10 \ 56 | --log-file-path './log/search_sn_400_bottom_10M.log' 57 | 58 | 59 | 60 | # # 첫 번째 작업이 성공적으로 완료되면 두 번째 작업 실행 61 | # if [ $? -eq 0 ]; then 62 | # python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 63 | # --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume './experiments/supernet/checkpoint-25.pth' \ 64 | # --min-param-limits 1 --param-limits 100 --config-list-path './greedyTAS/greedyTAS-epoch20-test/autoformer-greedyTAS(dss)-20epoch.pkl' \ 65 | # --log-file-path './greedyTAS/greedyTAS-epoch20-test/autoformer-greedyTAS(dss)-20epoch-subnet.log' 66 | # else 67 | # echo "첫 번째 작업이 실패했습니다. 두 번째 작업을 실행하지 않습니다." 68 | # fi 69 | # # --data-set EVO_IMNET 70 | 71 | 72 | # #!/bin/bash 73 | # python3 -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path '/data' --gp \ 74 | # --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-T.yaml --resume './experiments/supernet/checkpoint-25.pth' \ 75 | # --min-param-limits 1 --param-limits 100 76 | # # --data-set EVO_IMNET 77 | 78 | 79 | -------------------------------------------------------------------------------- /AutoFormer/tmp.py: -------------------------------------------------------------------------------- 1 | import math 2 | import sys 3 | from typing import Iterable, Optional 4 | from timm.utils.model import unwrap_model 5 | import torch 6 | import concurrent.futures 7 | from torch.nn.parallel import DataParallel 8 | from timm.data import Mixup 9 | from timm.utils import accuracy, ModelEma 10 | from lib import utils 11 | import random 12 | import time 13 | 14 | def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module, 15 | data_loader: Iterable, optimizer: torch.optim.Optimizer, 16 | device: torch.device, epoch: int, loss_scaler, max_norm: float = 0, 17 | model_ema: Optional[ModelEma] = None, mixup_fn: Optional[Mixup] = None, 18 | amp: bool = True, teacher_model: torch.nn.Module = None, 19 | teach_loss: torch.nn.Module = None, choices=None, mode='super', retrain_config=None, 20 | candidate_pool=None, validation_data_loader=None, pool_sampling_prob=0, m=10, k=5): 21 | model.train() 22 | criterion.train() 23 | 24 | # Set random seed 25 | random.seed(epoch) 26 | 27 | metric_logger = utils.MetricLogger(delimiter=" ") 28 | metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) 29 | header = 'Epoch: [{}]'.format(epoch) 30 | print_freq = 10 31 | 32 | # Calculate T from data_loader total size and batch size 33 | T = len(data_loader) # Total number of iterations (total data / batch size) 34 | # print("pool_sampling_prob : ", pool_sampling_prob) 35 | 36 | if mode == 'super': 37 | model_module = unwrap_model(model) 38 | total_iters = T // k # T/k번 반복할 수 있도록 설정 39 | 40 | data_iter = iter(metric_logger.log_every(data_loader, print_freq, header)) 41 | 42 | sampled_paths = [{'mlp_ratio': [4, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 4, 4, 3.5, 4, 3.5, 4], 'num_heads': [3, 4, 4, 3, 3, 4, 3, 4, 3, 3, 4, 4, 4], 'embed_dim': [192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192], 'layer_num': 13}, {'mlp_ratio': [3.5, 4, 3.5, 4, 3.5, 4, 3.5, 4, 3.5, 4, 3.5, 3.5, 3.5], 'num_heads': [4, 3, 3, 3, 3, 3, 3, 4, 4, 3, 4, 3, 3], 'embed_dim': [216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216], 'layer_num': 13}, {'mlp_ratio': [3.5, 4, 4, 3.5, 4, 3.5, 3.5, 3.5, 4, 4, 4, 3.5, 4, 4], 'num_heads': [3, 3, 3, 4, 3, 3, 3, 3, 4, 3, 3, 4, 3, 3], 'embed_dim': [240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240], 'layer_num': 14}, {'mlp_ratio': [4, 3.5, 4, 3.5, 4, 3.5, 3.5, 4, 3.5, 3.5, 4, 3.5, 4], 'num_heads': [4, 4, 4, 4, 4, 4, 3, 3, 3, 4, 4, 4, 3], 'embed_dim': [216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216], 'layer_num': 13}] 43 | 44 | losses = [] 45 | with torch.no_grad(): # 손실 계산에서 자동 기울기 추적을 방지하여 메모리 절약 46 | for config in sampled_paths: 47 | model_module = unwrap_model(model) 48 | model_module.set_sample_config(config=config) 49 | 50 | # Evaluate the model on the entire validation dataset 51 | val_loss_total = 0 52 | num_batches = 0 53 | 54 | for val_samples, val_targets in validation_data_loader: 55 | val_samples = val_samples.to(device, non_blocking=True) 56 | val_targets = val_targets.to(device, non_blocking=True) 57 | 58 | if mixup_fn is not None: 59 | val_samples, val_targets = mixup_fn(val_samples, val_targets) 60 | 61 | if amp: 62 | with torch.cuda.amp.autocast(): 63 | val_outputs = model(val_samples) 64 | val_loss = criterion(val_outputs, val_targets) 65 | else: 66 | val_outputs = model(val_samples) 67 | val_loss = criterion(val_outputs, val_targets) 68 | 69 | # 각 배치의 손실을 더함 70 | val_loss_total += val_loss.item() 71 | num_batches += 1 72 | 73 | # 전체 배치의 평균 손실을 저장 74 | val_loss_avg = val_loss_total / num_batches 75 | losses.append((val_loss_avg, config)) 76 | 77 | losses.sort(key=lambda x: x[0]) # Sort by loss value (lower is better) 78 | top_k_paths = losses[:k] 79 | 80 | # bottom_k_paths: 나머지 경로들을 loss가 큰 순서로 정렬 81 | bottom_k_paths = sorted(losses[k:], key=lambda x: x[0], reverse=True) 82 | 83 | ######### 84 | 85 | # top_k_paths = sampled_paths 86 | 87 | # 연산 종료 후, top_k_paths를 candidate_pool에 추가 88 | if candidate_pool is not None: 89 | candidate_pool[:] = [config for _, config in top_k_paths] 90 | # candidate_pool[:] = top_k_paths # candidate_pool 값을 top_k_paths로 대체 91 | 92 | # CUDA 메모리 부족 방지: top_k_paths 출력 93 | print("top_k_paths : ", top_k_paths) 94 | print("bottom_k_paths : ", bottom_k_paths) -------------------------------------------------------------------------------- /AutoFormer/performance_parser.py: -------------------------------------------------------------------------------- 1 | import re 2 | import pickle 3 | 4 | # a.pkl 파일을 불러오는 함수 5 | def load_pkl(file_path): 6 | with open(file_path, 'rb') as file: 7 | data = pickle.load(file) 8 | return data 9 | 10 | # loss 값을 추가하는 함수 11 | def add_inter_loss(results, a_pkl_data): 12 | a_pkl_dict = {} 13 | 14 | # a.pkl 데이터를 키로 저장 (layer_num, mlp_ratio, num_heads, embed_dim 기준) 15 | for item in a_pkl_data: 16 | key = (item['layer_num'], tuple(item['mlp_ratio']), tuple(item['num_heads']), tuple(item['embed_dim'])) 17 | a_pkl_dict[key] = item['loss'] 18 | 19 | # results_no_duplicates에 inter_loss 추가 20 | for config in results: 21 | key = (config['layer_num'], tuple(config['mlp_ratio']), tuple(config['num_heads']), tuple(config['embed_dim'])) 22 | if key in a_pkl_dict: 23 | config['inter_loss'] = a_pkl_dict[key] 24 | 25 | return results 26 | 27 | def find_non_matching_pairs(results): 28 | non_matching_indices = [] 29 | 30 | # results의 길이가 짝수인지 확인 31 | if len(results) % 2 != 0: 32 | print("Warning: The number of items in results should be even.") 33 | return non_matching_indices 34 | 35 | # 각 n과 n+1 쌍을 검사 36 | for i in range(0, len(results) - 1, 2): # 2씩 증가시키면서 인덱스 접근 37 | if results[i] != results[i + 1]: 38 | non_matching_indices.append(i) # n 인덱스 추가 (n, n+1 쌍이 일치하지 않음) 39 | 40 | return non_matching_indices 41 | 42 | def remove_duplicates(results): 43 | unique_results = [] 44 | seen = set() 45 | 46 | for config in results: 47 | # 중복을 검사할 키 생성 (parameters 제외) 48 | key = (config['layer_num'], tuple(config['mlp_ratio']), tuple(config['num_heads']), tuple(config['embed_dim'])) 49 | 50 | if key not in seen: 51 | # 처음 본 키라면 저장 52 | seen.add(key) 53 | unique_results.append(config) 54 | 55 | return unique_results 56 | 57 | 58 | def parse_evolution_log(file_path): 59 | results = [] 60 | current_config = {} 61 | i = 0 62 | 63 | with open(file_path, 'r') as file: 64 | for line in file: 65 | line = line.strip() 66 | 67 | # 모델 설정 정보 추출 68 | if line.startswith("sampled model config:"): 69 | config_dict_str = line.split("sampled model config: ")[1] 70 | current_config = eval(config_dict_str) # 문자열을 딕셔너리로 변환 71 | 72 | # 모델 파라미터 정보 추출 73 | elif line.startswith("sampled model parameters:"): 74 | parameters = int(line.split("sampled model parameters: ")[1]) 75 | current_config['parameters'] = parameters 76 | 77 | # 성능 지표 정보 추출 78 | elif line.startswith("* Acc@1"): 79 | acc1 = float(re.search(r"Acc@1\s(\d+\.\d+)", line).group(1)) 80 | acc5 = float(re.search(r"Acc@5\s(\d+\.\d+)", line).group(1)) 81 | loss = float(re.search(r"loss\s(\d+\.\d+)", line).group(1)) 82 | current_config['acc1'] = acc1 83 | current_config['acc5'] = acc5 84 | current_config['loss'] = loss 85 | # current_config['id'] = int(i/2) 86 | current_config['id'] = int(i) 87 | 88 | # 모든 정보가 취합된 딕셔너리를 결과 리스트에 추가 89 | results.append(current_config) 90 | current_config = {} # 다음 세트를 위해 초기화 91 | i = i + 1 92 | 93 | return results 94 | 95 | # 파일 경로 사용 예 96 | # file_path = "./greedyTAS/greedyTAS-epoch60/autoformer-greedyTAS(09131747).log" 97 | 98 | # file_path = "./greedyTAS/greedyTAS-epoch20-test/autoformer-greedyTAS(dss)-20epoch-subnet.log" 99 | file_path = "./greedyTAS/m(2500)_path_epoch100-subnet.log" 100 | 101 | results = parse_evolution_log(file_path) 102 | print(len(results)) # 결과 출력 103 | 104 | # non_matching_indices = find_non_matching_pairs(results) 105 | # print("Non-matching indices:", non_matching_indices) 106 | 107 | # results_no_duplicates = remove_duplicates(results) 108 | results_no_duplicates = results 109 | print(len(results_no_duplicates)) # 중복 제거된 결과 출력 110 | print(results_no_duplicates[0]) 111 | print(results_no_duplicates[1]) 112 | print(results_no_duplicates[2]) 113 | print(results_no_duplicates[-1]) 114 | 115 | # a.pkl 파일 경로 116 | # a_pkl_path = "./greedyTAS/greedyTAS-epoch60/autoformer-greedyTAS(09131747).log" 117 | # a_pkl_path = "./greedyTAS/greedyTAS-epoch20-test/autoformer-greedyTAS(dss)-20epoch.pkl" # 실제 파일 경로로 변경하세요 118 | 119 | # # a.pkl 파일 로드 120 | # a_pkl_data = load_pkl(a_pkl_path) 121 | 122 | # # inter_loss 값을 추가 123 | # results_with_inter_loss = add_inter_loss(results_no_duplicates, a_pkl_data) 124 | 125 | # # 결과 출력 (예시) 126 | # print(results_with_inter_loss[0]) 127 | # print(results_with_inter_loss[1]) 128 | # print(results_with_inter_loss[2]) 129 | # print(results_with_inter_loss[-1]) 130 | 131 | # Save the transformed data to a new pickle file 132 | with open('./greedyTAS/m(2500)_path_epoch100-subnet.pkl', 'wb') as file: 133 | pickle.dump(results_no_duplicates, file) 134 | 135 | print("Data saved successfully.") -------------------------------------------------------------------------------- /AutoFormer_original_greedy/performance_parser.py: -------------------------------------------------------------------------------- 1 | import re 2 | import pickle 3 | 4 | # a.pkl 파일을 불러오는 함수 5 | def load_pkl(file_path): 6 | with open(file_path, 'rb') as file: 7 | data = pickle.load(file) 8 | return data 9 | 10 | # loss 값을 추가하는 함수 11 | def add_inter_loss(results, a_pkl_data): 12 | a_pkl_dict = {} 13 | 14 | # a.pkl 데이터를 키로 저장 (layer_num, mlp_ratio, num_heads, embed_dim 기준) 15 | for item in a_pkl_data: 16 | key = (item['layer_num'], tuple(item['mlp_ratio']), tuple(item['num_heads']), tuple(item['embed_dim'])) 17 | a_pkl_dict[key] = item['loss'] 18 | 19 | # results_no_duplicates에 inter_loss 추가 20 | for config in results: 21 | key = (config['layer_num'], tuple(config['mlp_ratio']), tuple(config['num_heads']), tuple(config['embed_dim'])) 22 | if key in a_pkl_dict: 23 | config['inter_loss'] = a_pkl_dict[key] 24 | 25 | return results 26 | 27 | def find_non_matching_pairs(results): 28 | non_matching_indices = [] 29 | 30 | # results의 길이가 짝수인지 확인 31 | if len(results) % 2 != 0: 32 | print("Warning: The number of items in results should be even.") 33 | return non_matching_indices 34 | 35 | # 각 n과 n+1 쌍을 검사 36 | for i in range(0, len(results) - 1, 2): # 2씩 증가시키면서 인덱스 접근 37 | if results[i] != results[i + 1]: 38 | non_matching_indices.append(i) # n 인덱스 추가 (n, n+1 쌍이 일치하지 않음) 39 | 40 | return non_matching_indices 41 | 42 | def remove_duplicates(results): 43 | unique_results = [] 44 | seen = set() 45 | 46 | for config in results: 47 | # 중복을 검사할 키 생성 (parameters 제외) 48 | key = (config['layer_num'], tuple(config['mlp_ratio']), tuple(config['num_heads']), tuple(config['embed_dim'])) 49 | 50 | if key not in seen: 51 | # 처음 본 키라면 저장 52 | seen.add(key) 53 | unique_results.append(config) 54 | 55 | return unique_results 56 | 57 | 58 | def parse_evolution_log(file_path): 59 | results = [] 60 | current_config = {} 61 | i = 0 62 | 63 | with open(file_path, 'r') as file: 64 | for line in file: 65 | line = line.strip() 66 | 67 | # 모델 설정 정보 추출 68 | if line.startswith("sampled model config:"): 69 | config_dict_str = line.split("sampled model config: ")[1] 70 | current_config = eval(config_dict_str) # 문자열을 딕셔너리로 변환 71 | 72 | # 모델 파라미터 정보 추출 73 | elif line.startswith("sampled model parameters:"): 74 | parameters = int(line.split("sampled model parameters: ")[1]) 75 | current_config['parameters'] = parameters 76 | 77 | # 성능 지표 정보 추출 78 | elif line.startswith("* Acc@1"): 79 | acc1 = float(re.search(r"Acc@1\s(\d+\.\d+)", line).group(1)) 80 | acc5 = float(re.search(r"Acc@5\s(\d+\.\d+)", line).group(1)) 81 | loss = float(re.search(r"loss\s(\d+\.\d+)", line).group(1)) 82 | current_config['acc1'] = acc1 83 | current_config['acc5'] = acc5 84 | current_config['loss'] = loss 85 | # current_config['id'] = int(i/2) 86 | current_config['id'] = int(i) 87 | 88 | # 모든 정보가 취합된 딕셔너리를 결과 리스트에 추가 89 | results.append(current_config) 90 | current_config = {} # 다음 세트를 위해 초기화 91 | i = i + 1 92 | 93 | return results 94 | 95 | # 파일 경로 사용 예 96 | # file_path = "./greedyTAS/greedyTAS-epoch60/autoformer-greedyTAS(09131747).log" 97 | 98 | # file_path = "./greedyTAS/greedyTAS-epoch20-test/autoformer-greedyTAS(dss)-20epoch-subnet.log" 99 | file_path = "./log/search_tiny-only-supernet192-minimum_pop1050.log" 100 | 101 | results = parse_evolution_log(file_path) 102 | print(len(results)) # 결과 출력 103 | 104 | # non_matching_indices = find_non_matching_pairs(results) 105 | # print("Non-matching indices:", non_matching_indices) 106 | 107 | results_no_duplicates = remove_duplicates(results) 108 | # results_no_duplicates = results 109 | print(len(results_no_duplicates)) # 중복 제거된 결과 출력 110 | print(results_no_duplicates[0]) 111 | print(results_no_duplicates[1]) 112 | print(results_no_duplicates[2]) 113 | print(results_no_duplicates[-1]) 114 | 115 | # a.pkl 파일 경로 116 | # a_pkl_path = "./greedyTAS/greedyTAS-epoch60/autoformer-greedyTAS(09131747).log" 117 | # a_pkl_path = "./greedyTAS/greedyTAS-epoch20-test/autoformer-greedyTAS(dss)-20epoch.pkl" # 실제 파일 경로로 변경하세요 118 | 119 | # # a.pkl 파일 로드 120 | # a_pkl_data = load_pkl(a_pkl_path) 121 | 122 | # # inter_loss 값을 추가 123 | # results_with_inter_loss = add_inter_loss(results_no_duplicates, a_pkl_data) 124 | 125 | # # 결과 출력 (예시) 126 | # print(results_with_inter_loss[0]) 127 | # print(results_with_inter_loss[1]) 128 | # print(results_with_inter_loss[2]) 129 | # print(results_with_inter_loss[-1]) 130 | 131 | # Save the transformed data to a new pickle file 132 | with open('./log/search_tiny-only-supernet192-minimum_pop1050.pkl', 'wb') as file: 133 | pickle.dump(results_no_duplicates, file) 134 | 135 | print("Data saved successfully.") -------------------------------------------------------------------------------- /AutoFormer/training_free/indicators/grasp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.autograd as autograd 5 | 6 | from . import indicator 7 | from ..p_utils import get_layer_metric_array 8 | 9 | 10 | @indicator('grasp', bn=True, mode='param') 11 | def compute_grasp_per_weight(net, inputs, targets, mode, loss_fn, T=1, num_iters=1, split_data=1): 12 | # get all applicable weights 13 | weights = [] 14 | for layer in net.modules(): 15 | if layer._get_name() == 'PatchembedSuper': 16 | weights.append(layer.sampled_weight) 17 | layer.sampled_weight.requires_grad_(True) # TODO isn't this already true? 18 | if isinstance(layer, nn.Linear) and layer.out_features != 1000 and layer.samples: 19 | weights.append(layer.samples['weight']) 20 | layer.samples['weight'].requires_grad_(True) # TODO isn't this already true? 21 | if isinstance(layer, torch.nn.Linear) and layer.out_features == 1000: 22 | weights.append(layer.samples['weight']) 23 | layer.weight.requires_grad_(True) # TODO isn't this already true? 24 | 25 | # NOTE original code had some input/target splitting into 2 26 | # I am guessing this was because of GPU mem limit 27 | net.zero_grad() 28 | N = inputs.shape[0] 29 | for sp in range(split_data): 30 | st = sp * N // split_data 31 | en = (sp + 1) * N // split_data 32 | 33 | # forward/grad pass #1 34 | grad_w = None 35 | for _ in range(num_iters): 36 | # TODO get new data, otherwise num_iters is useless! 37 | outputs = net.forward(inputs[st:en]) / T 38 | loss = loss_fn(outputs, targets[st:en]) 39 | grad_w_p = autograd.grad(loss, weights, allow_unused=True) 40 | if grad_w is None: 41 | grad_w = list(grad_w_p) 42 | else: 43 | for idx in range(len(grad_w)): 44 | grad_w[idx] += grad_w_p[idx] 45 | 46 | for sp in range(split_data): 47 | st = sp * N // split_data 48 | en = (sp + 1) * N // split_data 49 | 50 | # forward/grad pass #2 51 | outputs = net.forward(inputs[st:en]) / T 52 | loss = loss_fn(outputs, targets[st:en]) 53 | grad_f = autograd.grad(loss, weights, create_graph=True, allow_unused=True) 54 | 55 | # accumulate gradients computed in previous step and call backwards 56 | z, count = 0, 0 57 | for layer in net.modules(): 58 | if layer._get_name() == 'PatchembedSuper': 59 | if grad_w[count] is not None: 60 | z += (grad_w[count].data * grad_f[count]).sum() 61 | count += 1 62 | if isinstance(layer, nn.Linear) and layer.out_features != 1000 and layer.samples: 63 | if grad_w[count] is not None: 64 | z += (grad_w[count].data * grad_f[count]).sum() 65 | count += 1 66 | if isinstance(layer, nn.Linear) and layer.out_features == 1000: 67 | if grad_w[count] is not None: 68 | z += (grad_w[count].data * grad_f[count]).sum() 69 | count += 1 70 | z.backward() 71 | 72 | # compute final sensitivity metric and put in grads 73 | def grasp(layer): 74 | if layer._get_name() == 'PatchembedSuper': 75 | if layer.sampled_weight.grad is not None: 76 | return -layer.sampled_weight.data * layer.sampled_weight.grad # -theta_q Hg 77 | # NOTE in the grasp code they take the *bottom* (1-p)% of values 78 | # but we take the *top* (1-p)%, therefore we remove the -ve sign 79 | # EDIT accuracy seems to be negatively correlated with this metric, so we add -ve sign here! 80 | else: 81 | return torch.zeros_like(layer.sampled_weight) 82 | if isinstance(layer, nn.Linear) and layer.out_features != 1000 and layer.samples: 83 | if layer.samples['weight'].grad is not None: 84 | return -layer.samples['weight'].data * layer.samples['weight'].grad # -theta_q Hg 85 | # NOTE in the grasp code they take the *bottom* (1-p)% of values 86 | # but we take the *top* (1-p)%, therefore we remove the -ve sign 87 | # EDIT accuracy seems to be negatively correlated with this metric, so we add -ve sign here! 88 | else: 89 | return torch.zeros_like(layer.samples['weight']) 90 | if isinstance(layer, torch.nn.Linear) and layer.out_features == 1000: 91 | if layer.samples['weight'].grad is not None: 92 | return -layer.samples['weight'].data * layer.samples['weight'].grad # -theta_q Hg 93 | # NOTE in the grasp code they take the *bottom* (1-p)% of values 94 | # but we take the *top* (1-p)%, therefore we remove the -ve sign 95 | # EDIT accuracy seems to be negatively correlated with this metric, so we add -ve sign here! 96 | else: 97 | return torch.zeros_like(layer.samples['weight']) 98 | 99 | grads = get_layer_metric_array(net, grasp, mode) 100 | 101 | return grads -------------------------------------------------------------------------------- /AutoFormer_original/README.md: -------------------------------------------------------------------------------- 1 | # AutoFormer: Searching Transformers for Visual Recognition 2 | 3 | **This is an official implementation of AutoFormer.** 4 | 5 | AutoFormer is new one-shot architecture search framework dedicated to vision transformer search. It entangles the weights of different vision transformer blocks in the same layers during supernet training. 6 | Benefiting from the strategy, the trained supernet allows thousands of subnets to be very well-trained. Specifically, the performance of these subnets with weights inherited from the supernet is comparable to those retrained from scratch. 7 | 8 |
9 | AutoFormer overview 10 | AutoFormer detail 11 |
12 | 13 | 14 | ## Highlights 15 | - Once-for-all 16 | 17 | AutoFormer is a simple yet effective method to train a once-for-all vision transformer supernet. 18 | 19 | - Competive performance 20 | 21 | AutoFormers consistently outperform DeiTs. 22 | 23 | ## Environment Setup 24 | 25 | To set up the enviroment you can easily run the following command: 26 | ```buildoutcfg 27 | conda create -n Autoformer python=3.6 28 | conda activate Autoformer 29 | pip install -r requirements.txt 30 | ``` 31 | 32 | ## Data Preparation 33 | You need to first download the [ImageNet-2012](http://www.image-net.org/) to the folder `./data/imagenet` and move the validation set to the subfolder `./data/imagenet/val`. To move the validation set, you cloud use the following script: 34 | 35 | The directory structure is the standard layout as following. 36 | ``` 37 | /path/to/imagenet/ 38 | train/ 39 | class1/ 40 | img1.jpeg 41 | class2/ 42 | img2.jpeg 43 | val/ 44 | class1/ 45 | img3.jpeg 46 | class/2 47 | img4.jpeg 48 | ``` 49 | 50 | 51 | ## Model Zoo 52 | For evaluation, we provide the checkpoints of our models in [Google Drive](https://drive.google.com/drive/folders/1HqzY3afqQUMI6pJ5_BgR2RquJU_b_3eg?usp=sharing) and [GitHub](https://github.com/silent-chen/AutoFormer-model-zoo). 53 | 54 | After downloading the models, you can do the evaluation following the description in *Quick Start - Test*). 55 | 56 | Model download links: 57 | 58 | Model | Params. | Top-1 Acc. % | Top-5 Acc. % | Download link 59 | --- |:---:|:---:|:---:|:---: 60 | AutoFormer-T | 5.8M | 75.3 | 92.7 | [Google Drive](https://drive.google.com/file/d/1uRCW3doQHgn2H-LjyalYEZ4CvmnQtr6Q/view?usp=sharing), [GitHub](https://github.com/silent-chen/AutoFormer-model-zoo/releases/download/v1.0/supernet-tiny.pth) 61 | AutoFormer-S | 22.9M | 81.7 | 95.7 | [Google Drive](https://drive.google.com/file/d/1JTBmLR_nW7-ZbTKafWFvSl8J2orJXiNa/view?usp=sharing), [GitHub](https://github.com/silent-chen/AutoFormer-model-zoo/releases/download/v1.0/supernet-small.pth) 62 | AutoFormer-B | 53.7M | 82.4 | 95.7 | [Google Drive](https://drive.google.com/file/d/1KPjUshk0SbqkaTzlirjPHM9pu19N5w0e/view?usp=sharing), [GitHub](https://github.com/silent-chen/AutoFormer-model-zoo/releases/download/v1.0/supernet-base.pth) 63 | 64 | 65 | ## Quick Start 66 | We provide *Supernet Train, Search, Test* code of AutoFormer as follows. 67 | 68 | ### Supernet Train 69 | 70 | To train the supernet-T/S/B, we provided the corresponding supernet configuration files in `/experiments/supernet/`. For example, to train the supernet-B, you can run the following command. The default output path is `./`, you can specify the path with argument `--output`. 71 | 72 | ```buildoutcfg 73 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path /PATH/TO/IMAGENT --gp \ 74 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-B.yaml --epochs 500 --warmup-epochs 20 \ 75 | --output /OUTPUT_PATH --batch-size 128 76 | ``` 77 | 78 | ### Search 79 | We run our evolution search on part of the ImageNet training dataset and use the validation set of ImageNet as the test set for fair comparison. To generate the subImagenet in `/PATH/TO/IMAGENET`, you could simply run: 80 | ```buildoutcfg 81 | python ./lib/subImageNet.py --data-path /PATH/TO/IMAGENT 82 | ``` 83 | 84 | 85 | After obtaining the subImageNet and training of the supernet. We could perform the evolution search using below command. Please remember to config the specific constraint in this evolution search using `--min-param-limits` and `--param-limits`: 86 | ```buildoutcfg 87 | python -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path /PATH/TO/IMAGENT --gp \ 88 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-B.yaml --resume /PATH/TO/CHECKPOINT \ 89 | --min-param-limits YOUR/CONFIG --param-limits YOUR/CONFIG --data-set EVO_IMNET 90 | ``` 91 | 92 | ### Test 93 | To test our trained models, you need to put the downloaded model in `/PATH/TO/CHECKPOINT`. After that you could use the following command to test the model (Please change your config file and model checkpoint according to different models. Here we use the AutoFormer-B as an example). 94 | ```buildoutcfg 95 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path /PATH/TO/IMAGENT --gp \ 96 | --change_qk --relative_position --mode retrain --dist-eval --cfg ./experiments/subnet/AutoFormer-B.yaml --resume /PATH/TO/CHECKPOINT --eval 97 | ``` 98 | 99 | ## Performance 100 | 101 | **Left:** Top-1 accuracy on ImageNet. Our method achieves very competitive performance, being superior to the recent DeiT and ViT. **Right:** 1000 random sampled good architectures in the supernet-S. The supernet trained under our strategy allows subnets to be well optimized. 102 | 103 |
104 | 105 | 106 |
107 | 108 | ## Bibtex 109 | 110 | If this repo is helpful for you, please consider to cite it. Thank you! :) 111 | ```bibtex 112 | @InProceedings{AutoFormer, 113 | title = {AutoFormer: Searching Transformers for Visual Recognition}, 114 | author = {Chen, Minghao and Peng, Houwen and Fu, Jianlong and Ling, Haibin}, 115 | booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, 116 | month = {October}, 117 | year = {2021}, 118 | pages = {12270-12280} 119 | } 120 | ``` 121 | 122 | ## Acknowledgements 123 | 124 | The codes are inspired by [HAT](https://github.com/mit-han-lab/hardware-aware-transformers), [timm](https://github.com/rwightman/pytorch-image-models), [DeiT](https://github.com/facebookresearch/deit), [SPOS](https://github.com/megvii-model/SinglePathOneShot). 125 | -------------------------------------------------------------------------------- /AutoFormer_original_greedy/README.md: -------------------------------------------------------------------------------- 1 | # AutoFormer: Searching Transformers for Visual Recognition 2 | 3 | **This is an official implementation of AutoFormer.** 4 | 5 | AutoFormer is new one-shot architecture search framework dedicated to vision transformer search. It entangles the weights of different vision transformer blocks in the same layers during supernet training. 6 | Benefiting from the strategy, the trained supernet allows thousands of subnets to be very well-trained. Specifically, the performance of these subnets with weights inherited from the supernet is comparable to those retrained from scratch. 7 | 8 |
9 | AutoFormer overview 10 | AutoFormer detail 11 |
12 | 13 | 14 | ## Highlights 15 | - Once-for-all 16 | 17 | AutoFormer is a simple yet effective method to train a once-for-all vision transformer supernet. 18 | 19 | - Competive performance 20 | 21 | AutoFormers consistently outperform DeiTs. 22 | 23 | ## Environment Setup 24 | 25 | To set up the enviroment you can easily run the following command: 26 | ```buildoutcfg 27 | conda create -n Autoformer python=3.6 28 | conda activate Autoformer 29 | pip install -r requirements.txt 30 | ``` 31 | 32 | ## Data Preparation 33 | You need to first download the [ImageNet-2012](http://www.image-net.org/) to the folder `./data/imagenet` and move the validation set to the subfolder `./data/imagenet/val`. To move the validation set, you cloud use the following script: 34 | 35 | The directory structure is the standard layout as following. 36 | ``` 37 | /path/to/imagenet/ 38 | train/ 39 | class1/ 40 | img1.jpeg 41 | class2/ 42 | img2.jpeg 43 | val/ 44 | class1/ 45 | img3.jpeg 46 | class/2 47 | img4.jpeg 48 | ``` 49 | 50 | 51 | ## Model Zoo 52 | For evaluation, we provide the checkpoints of our models in [Google Drive](https://drive.google.com/drive/folders/1HqzY3afqQUMI6pJ5_BgR2RquJU_b_3eg?usp=sharing) and [GitHub](https://github.com/silent-chen/AutoFormer-model-zoo). 53 | 54 | After downloading the models, you can do the evaluation following the description in *Quick Start - Test*). 55 | 56 | Model download links: 57 | 58 | Model | Params. | Top-1 Acc. % | Top-5 Acc. % | Download link 59 | --- |:---:|:---:|:---:|:---: 60 | AutoFormer-T | 5.8M | 75.3 | 92.7 | [Google Drive](https://drive.google.com/file/d/1uRCW3doQHgn2H-LjyalYEZ4CvmnQtr6Q/view?usp=sharing), [GitHub](https://github.com/silent-chen/AutoFormer-model-zoo/releases/download/v1.0/supernet-tiny.pth) 61 | AutoFormer-S | 22.9M | 81.7 | 95.7 | [Google Drive](https://drive.google.com/file/d/1JTBmLR_nW7-ZbTKafWFvSl8J2orJXiNa/view?usp=sharing), [GitHub](https://github.com/silent-chen/AutoFormer-model-zoo/releases/download/v1.0/supernet-small.pth) 62 | AutoFormer-B | 53.7M | 82.4 | 95.7 | [Google Drive](https://drive.google.com/file/d/1KPjUshk0SbqkaTzlirjPHM9pu19N5w0e/view?usp=sharing), [GitHub](https://github.com/silent-chen/AutoFormer-model-zoo/releases/download/v1.0/supernet-base.pth) 63 | 64 | 65 | ## Quick Start 66 | We provide *Supernet Train, Search, Test* code of AutoFormer as follows. 67 | 68 | ### Supernet Train 69 | 70 | To train the supernet-T/S/B, we provided the corresponding supernet configuration files in `/experiments/supernet/`. For example, to train the supernet-B, you can run the following command. The default output path is `./`, you can specify the path with argument `--output`. 71 | 72 | ```buildoutcfg 73 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path /PATH/TO/IMAGENT --gp \ 74 | --change_qk --relative_position --mode super --dist-eval --cfg ./experiments/supernet/supernet-B.yaml --epochs 500 --warmup-epochs 20 \ 75 | --output /OUTPUT_PATH --batch-size 128 76 | ``` 77 | 78 | ### Search 79 | We run our evolution search on part of the ImageNet training dataset and use the validation set of ImageNet as the test set for fair comparison. To generate the subImagenet in `/PATH/TO/IMAGENET`, you could simply run: 80 | ```buildoutcfg 81 | python ./lib/subImageNet.py --data-path /PATH/TO/IMAGENT 82 | ``` 83 | 84 | 85 | After obtaining the subImageNet and training of the supernet. We could perform the evolution search using below command. Please remember to config the specific constraint in this evolution search using `--min-param-limits` and `--param-limits`: 86 | ```buildoutcfg 87 | python -m torch.distributed.launch --nproc_per_node=8 --use_env evolution.py --data-path /PATH/TO/IMAGENT --gp \ 88 | --change_qk --relative_position --dist-eval --cfg ./experiments/supernet/supernet-B.yaml --resume /PATH/TO/CHECKPOINT \ 89 | --min-param-limits YOUR/CONFIG --param-limits YOUR/CONFIG --data-set EVO_IMNET 90 | ``` 91 | 92 | ### Test 93 | To test our trained models, you need to put the downloaded model in `/PATH/TO/CHECKPOINT`. After that you could use the following command to test the model (Please change your config file and model checkpoint according to different models. Here we use the AutoFormer-B as an example). 94 | ```buildoutcfg 95 | python -m torch.distributed.launch --nproc_per_node=8 --use_env supernet_train.py --data-path /PATH/TO/IMAGENT --gp \ 96 | --change_qk --relative_position --mode retrain --dist-eval --cfg ./experiments/subnet/AutoFormer-B.yaml --resume /PATH/TO/CHECKPOINT --eval 97 | ``` 98 | 99 | ## Performance 100 | 101 | **Left:** Top-1 accuracy on ImageNet. Our method achieves very competitive performance, being superior to the recent DeiT and ViT. **Right:** 1000 random sampled good architectures in the supernet-S. The supernet trained under our strategy allows subnets to be well optimized. 102 | 103 |
104 | 105 | 106 |
107 | 108 | ## Bibtex 109 | 110 | If this repo is helpful for you, please consider to cite it. Thank you! :) 111 | ```bibtex 112 | @InProceedings{AutoFormer, 113 | title = {AutoFormer: Searching Transformers for Visual Recognition}, 114 | author = {Chen, Minghao and Peng, Houwen and Fu, Jianlong and Ling, Haibin}, 115 | booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, 116 | month = {October}, 117 | year = {2021}, 118 | pages = {12270-12280} 119 | } 120 | ``` 121 | 122 | ## Acknowledgements 123 | 124 | The codes are inspired by [HAT](https://github.com/mit-han-lab/hardware-aware-transformers), [timm](https://github.com/rwightman/pytorch-image-models), [DeiT](https://github.com/facebookresearch/deit), [SPOS](https://github.com/megvii-model/SinglePathOneShot). 125 | -------------------------------------------------------------------------------- /AutoFormer/supernet_engine(save).py: -------------------------------------------------------------------------------- 1 | import math 2 | import sys 3 | from typing import Iterable, Optional 4 | from timm.utils.model import unwrap_model 5 | import torch 6 | 7 | from timm.data import Mixup 8 | from timm.utils import accuracy, ModelEma 9 | from lib import utils 10 | import random 11 | import time 12 | 13 | def sample_configs(choices): 14 | 15 | config = {} 16 | dimensions = ['mlp_ratio', 'num_heads'] 17 | depth = random.choice(choices['depth']) 18 | for dimension in dimensions: 19 | config[dimension] = [random.choice(choices[dimension]) for _ in range(depth)] 20 | 21 | config['embed_dim'] = [random.choice(choices['embed_dim'])]*depth 22 | 23 | config['layer_num'] = depth 24 | return config 25 | 26 | def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module, 27 | data_loader: Iterable, optimizer: torch.optim.Optimizer, 28 | device: torch.device, epoch: int, loss_scaler, max_norm: float = 0, 29 | model_ema: Optional[ModelEma] = None, mixup_fn: Optional[Mixup] = None, 30 | amp: bool = True, teacher_model: torch.nn.Module = None, 31 | teach_loss: torch.nn.Module = None, choices=None, mode='super', retrain_config=None): 32 | model.train() 33 | criterion.train() 34 | 35 | # set random seed 36 | random.seed(epoch) 37 | 38 | metric_logger = utils.MetricLogger(delimiter=" ") 39 | metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) 40 | header = 'Epoch: [{}]'.format(epoch) 41 | print_freq = 10 42 | if mode == 'retrain': 43 | config = retrain_config 44 | model_module = unwrap_model(model) 45 | print(config) 46 | model_module.set_sample_config(config=config) 47 | print(model_module.get_sampled_params_numel(config)) 48 | 49 | for samples, targets in metric_logger.log_every(data_loader, print_freq, header): 50 | samples = samples.to(device, non_blocking=True) 51 | targets = targets.to(device, non_blocking=True) 52 | 53 | # sample random config 54 | if mode == 'super': 55 | config = sample_configs(choices=choices) 56 | model_module = unwrap_model(model) 57 | model_module.set_sample_config(config=config) 58 | elif mode == 'retrain': 59 | config = retrain_config 60 | model_module = unwrap_model(model) 61 | model_module.set_sample_config(config=config) 62 | if mixup_fn is not None: 63 | samples, targets = mixup_fn(samples, targets) 64 | if amp: 65 | with torch.cuda.amp.autocast(): 66 | if teacher_model: 67 | with torch.no_grad(): 68 | teach_output = teacher_model(samples) 69 | _, teacher_label = teach_output.topk(1, 1, True, True) 70 | outputs = model(samples) 71 | loss = 1/2 * criterion(outputs, targets) + 1/2 * teach_loss(outputs, teacher_label.squeeze()) 72 | else: 73 | outputs = model(samples) 74 | loss = criterion(outputs, targets) 75 | else: 76 | outputs = model(samples) 77 | if teacher_model: 78 | with torch.no_grad(): 79 | teach_output = teacher_model(samples) 80 | _, teacher_label = teach_output.topk(1, 1, True, True) 81 | loss = 1 / 2 * criterion(outputs, targets) + 1 / 2 * teach_loss(outputs, teacher_label.squeeze()) 82 | else: 83 | loss = criterion(outputs, targets) 84 | 85 | loss_value = loss.item() 86 | 87 | if not math.isfinite(loss_value): 88 | print("Loss is {}, stopping training".format(loss_value)) 89 | sys.exit(1) 90 | 91 | optimizer.zero_grad() 92 | 93 | # this attribute is added by timm on one optimizer (adahessian) 94 | if amp: 95 | is_second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order 96 | loss_scaler(loss, optimizer, clip_grad=max_norm, 97 | parameters=model.parameters(), create_graph=is_second_order) 98 | else: 99 | loss.backward() 100 | optimizer.step() 101 | 102 | torch.cuda.synchronize() 103 | if model_ema is not None: 104 | model_ema.update(model) 105 | 106 | metric_logger.update(loss=loss_value) 107 | metric_logger.update(lr=optimizer.param_groups[0]["lr"]) 108 | 109 | # gather the stats from all processes 110 | metric_logger.synchronize_between_processes() 111 | print("Averaged stats:", metric_logger) 112 | return {k: meter.global_avg for k, meter in metric_logger.meters.items()} 113 | 114 | @torch.no_grad() 115 | def evaluate(data_loader, model, device, amp=True, choices=None, mode='super', retrain_config=None): 116 | criterion = torch.nn.CrossEntropyLoss() 117 | 118 | metric_logger = utils.MetricLogger(delimiter=" ") 119 | header = 'Test:' 120 | 121 | # switch to evaluation mode 122 | model.eval() 123 | if mode == 'super': 124 | config = sample_configs(choices=choices) 125 | model_module = unwrap_model(model) 126 | model_module.set_sample_config(config=config) 127 | else: 128 | config = retrain_config 129 | model_module = unwrap_model(model) 130 | model_module.set_sample_config(config=config) 131 | 132 | 133 | print("sampled model config: {}".format(config)) 134 | parameters = model_module.get_sampled_params_numel(config) 135 | print("sampled model parameters: {}".format(parameters)) 136 | 137 | for images, target in metric_logger.log_every(data_loader, 10, header): 138 | images = images.to(device, non_blocking=True) 139 | target = target.to(device, non_blocking=True) 140 | # compute output 141 | if amp: 142 | with torch.cuda.amp.autocast(): 143 | output = model(images) 144 | loss = criterion(output, target) 145 | else: 146 | output = model(images) 147 | loss = criterion(output, target) 148 | 149 | acc1, acc5 = accuracy(output, target, topk=(1, 5)) 150 | 151 | batch_size = images.shape[0] 152 | metric_logger.update(loss=loss.item()) 153 | metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) 154 | metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) 155 | # gather the stats from all processes 156 | metric_logger.synchronize_between_processes() 157 | print('* Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f} loss {losses.global_avg:.3f}' 158 | .format(top1=metric_logger.acc1, top5=metric_logger.acc5, losses=metric_logger.loss)) 159 | 160 | return {k: meter.global_avg for k, meter in metric_logger.meters.items()} -------------------------------------------------------------------------------- /AutoFormer_original_greedy/z_supernet_engine.py: -------------------------------------------------------------------------------- 1 | import math 2 | import sys 3 | from typing import Iterable, Optional 4 | from timm.utils.model import unwrap_model 5 | import torch 6 | 7 | from timm.data import Mixup 8 | from timm.utils import accuracy, ModelEma 9 | from lib import utils 10 | import random 11 | import time 12 | 13 | def sample_configs(choices): 14 | 15 | config = {} 16 | dimensions = ['mlp_ratio', 'num_heads'] 17 | depth = random.choice(choices['depth']) 18 | for dimension in dimensions: 19 | config[dimension] = [random.choice(choices[dimension]) for _ in range(depth)] 20 | 21 | config['embed_dim'] = [random.choice(choices['embed_dim'])]*depth 22 | 23 | config['layer_num'] = depth 24 | return config 25 | 26 | def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module, 27 | data_loader: Iterable, optimizer: torch.optim.Optimizer, 28 | device: torch.device, epoch: int, loss_scaler, max_norm: float = 0, 29 | model_ema: Optional[ModelEma] = None, mixup_fn: Optional[Mixup] = None, 30 | amp: bool = True, teacher_model: torch.nn.Module = None, 31 | teach_loss: torch.nn.Module = None, choices=None, mode='super', retrain_config=None): 32 | model.train() 33 | criterion.train() 34 | 35 | # set random seed 36 | random.seed(epoch) 37 | 38 | metric_logger = utils.MetricLogger(delimiter=" ") 39 | metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) 40 | header = 'Epoch: [{}]'.format(epoch) 41 | print_freq = 10 42 | if mode == 'retrain': 43 | config = retrain_config 44 | model_module = unwrap_model(model) 45 | print(config) 46 | model_module.set_sample_config(config=config) 47 | print(model_module.get_sampled_params_numel(config)) 48 | 49 | for samples, targets in metric_logger.log_every(data_loader, print_freq, header): 50 | samples = samples.to(device, non_blocking=True) 51 | targets = targets.to(device, non_blocking=True) 52 | 53 | # sample random config 54 | if mode == 'super': 55 | config = sample_configs(choices=choices) 56 | model_module = unwrap_model(model) 57 | model_module.set_sample_config(config=config) 58 | elif mode == 'retrain': 59 | config = retrain_config 60 | model_module = unwrap_model(model) 61 | model_module.set_sample_config(config=config) 62 | if mixup_fn is not None: 63 | samples, targets = mixup_fn(samples, targets) 64 | if amp: 65 | with torch.cuda.amp.autocast(): 66 | if teacher_model: 67 | with torch.no_grad(): 68 | teach_output = teacher_model(samples) 69 | _, teacher_label = teach_output.topk(1, 1, True, True) 70 | outputs = model(samples) 71 | loss = 1/2 * criterion(outputs, targets) + 1/2 * teach_loss(outputs, teacher_label.squeeze()) 72 | else: 73 | outputs = model(samples) 74 | loss = criterion(outputs, targets) 75 | else: 76 | outputs = model(samples) 77 | if teacher_model: 78 | with torch.no_grad(): 79 | teach_output = teacher_model(samples) 80 | _, teacher_label = teach_output.topk(1, 1, True, True) 81 | loss = 1 / 2 * criterion(outputs, targets) + 1 / 2 * teach_loss(outputs, teacher_label.squeeze()) 82 | else: 83 | loss = criterion(outputs, targets) 84 | 85 | loss_value = loss.item() 86 | 87 | if not math.isfinite(loss_value): 88 | print("Loss is {}, stopping training".format(loss_value)) 89 | sys.exit(1) 90 | 91 | optimizer.zero_grad() 92 | 93 | # this attribute is added by timm on one optimizer (adahessian) 94 | if amp: 95 | is_second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order 96 | loss_scaler(loss, optimizer, clip_grad=max_norm, 97 | parameters=model.parameters(), create_graph=is_second_order) 98 | else: 99 | loss.backward() 100 | optimizer.step() 101 | 102 | torch.cuda.synchronize() 103 | if model_ema is not None: 104 | model_ema.update(model) 105 | 106 | metric_logger.update(loss=loss_value) 107 | metric_logger.update(lr=optimizer.param_groups[0]["lr"]) 108 | 109 | # gather the stats from all processes 110 | metric_logger.synchronize_between_processes() 111 | print("Averaged stats:", metric_logger) 112 | return {k: meter.global_avg for k, meter in metric_logger.meters.items()} 113 | 114 | @torch.no_grad() 115 | def evaluate(data_loader, model, device, amp=True, choices=None, mode='super', retrain_config=None): 116 | criterion = torch.nn.CrossEntropyLoss() 117 | 118 | metric_logger = utils.MetricLogger(delimiter=" ") 119 | header = 'Test:' 120 | 121 | # switch to evaluation mode 122 | model.eval() 123 | if mode == 'super': 124 | config = sample_configs(choices=choices) 125 | model_module = unwrap_model(model) 126 | model_module.set_sample_config(config=config) 127 | else: 128 | config = retrain_config 129 | model_module = unwrap_model(model) 130 | model_module.set_sample_config(config=config) 131 | 132 | 133 | print("sampled model config: {}".format(config)) 134 | parameters = model_module.get_sampled_params_numel(config) 135 | print("sampled model parameters: {}".format(parameters)) 136 | 137 | for images, target in metric_logger.log_every(data_loader, 10, header): 138 | images = images.to(device, non_blocking=True) 139 | target = target.to(device, non_blocking=True) 140 | # compute output 141 | if amp: 142 | with torch.cuda.amp.autocast(): 143 | output = model(images) 144 | loss = criterion(output, target) 145 | else: 146 | output = model(images) 147 | loss = criterion(output, target) 148 | 149 | acc1, acc5 = accuracy(output, target, topk=(1, 5)) 150 | 151 | batch_size = images.shape[0] 152 | metric_logger.update(loss=loss.item()) 153 | metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) 154 | metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) 155 | # gather the stats from all processes 156 | metric_logger.synchronize_between_processes() 157 | print('* Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f} loss {losses.global_avg:.3f}' 158 | .format(top1=metric_logger.acc1, top5=metric_logger.acc5, losses=metric_logger.loss)) 159 | 160 | return {k: meter.global_avg for k, meter in metric_logger.meters.items()} -------------------------------------------------------------------------------- /AutoFormer_original_greedy/supernet_engine_base.py: -------------------------------------------------------------------------------- 1 | import math 2 | import sys 3 | from typing import Iterable, Optional 4 | from timm.utils.model import unwrap_model 5 | import torch 6 | 7 | from timm.data import Mixup 8 | from timm.utils import accuracy, ModelEma 9 | from lib import utils 10 | import random 11 | import time 12 | 13 | def sample_configs(choices): 14 | 15 | config = {} 16 | dimensions = ['mlp_ratio', 'num_heads'] 17 | depth = random.choice(choices['depth']) 18 | for dimension in dimensions: 19 | config[dimension] = [random.choice(choices[dimension]) for _ in range(depth)] 20 | 21 | config['embed_dim'] = [random.choice(choices['embed_dim'])]*depth 22 | 23 | config['layer_num'] = depth 24 | return config 25 | 26 | def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module, 27 | data_loader: Iterable, optimizer: torch.optim.Optimizer, 28 | device: torch.device, epoch: int, loss_scaler, max_norm: float = 0, 29 | model_ema: Optional[ModelEma] = None, mixup_fn: Optional[Mixup] = None, 30 | amp: bool = True, teacher_model: torch.nn.Module = None, 31 | teach_loss: torch.nn.Module = None, choices=None, mode='super', retrain_config=None): 32 | model.train() 33 | criterion.train() 34 | 35 | # set random seed 36 | random.seed(epoch) 37 | 38 | metric_logger = utils.MetricLogger(delimiter=" ") 39 | metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) 40 | header = 'Epoch: [{}]'.format(epoch) 41 | print_freq = 10 42 | if mode == 'retrain': 43 | config = retrain_config 44 | model_module = unwrap_model(model) 45 | print(config) 46 | model_module.set_sample_config(config=config) 47 | print(model_module.get_sampled_params_numel(config)) 48 | 49 | for samples, targets in metric_logger.log_every(data_loader, print_freq, header): 50 | samples = samples.to(device, non_blocking=True) 51 | targets = targets.to(device, non_blocking=True) 52 | 53 | # sample random config 54 | if mode == 'super': 55 | config = sample_configs(choices=choices) 56 | model_module = unwrap_model(model) 57 | model_module.set_sample_config(config=config) 58 | elif mode == 'retrain': 59 | config = retrain_config 60 | model_module = unwrap_model(model) 61 | model_module.set_sample_config(config=config) 62 | if mixup_fn is not None: 63 | samples, targets = mixup_fn(samples, targets) 64 | if amp: 65 | with torch.cuda.amp.autocast(): 66 | if teacher_model: 67 | with torch.no_grad(): 68 | teach_output = teacher_model(samples) 69 | _, teacher_label = teach_output.topk(1, 1, True, True) 70 | outputs = model(samples) 71 | loss = 1/2 * criterion(outputs, targets) + 1/2 * teach_loss(outputs, teacher_label.squeeze()) 72 | else: 73 | outputs = model(samples) 74 | loss = criterion(outputs, targets) 75 | else: 76 | outputs = model(samples) 77 | if teacher_model: 78 | with torch.no_grad(): 79 | teach_output = teacher_model(samples) 80 | _, teacher_label = teach_output.topk(1, 1, True, True) 81 | loss = 1 / 2 * criterion(outputs, targets) + 1 / 2 * teach_loss(outputs, teacher_label.squeeze()) 82 | else: 83 | loss = criterion(outputs, targets) 84 | 85 | loss_value = loss.item() 86 | 87 | if not math.isfinite(loss_value): 88 | print("Loss is {}, stopping training".format(loss_value)) 89 | sys.exit(1) 90 | 91 | optimizer.zero_grad() 92 | 93 | # this attribute is added by timm on one optimizer (adahessian) 94 | if amp: 95 | is_second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order 96 | loss_scaler(loss, optimizer, clip_grad=max_norm, 97 | parameters=model.parameters(), create_graph=is_second_order) 98 | else: 99 | loss.backward() 100 | optimizer.step() 101 | 102 | torch.cuda.synchronize() 103 | if model_ema is not None: 104 | model_ema.update(model) 105 | 106 | metric_logger.update(loss=loss_value) 107 | metric_logger.update(lr=optimizer.param_groups[0]["lr"]) 108 | 109 | # gather the stats from all processes 110 | metric_logger.synchronize_between_processes() 111 | print("Averaged stats:", metric_logger) 112 | return {k: meter.global_avg for k, meter in metric_logger.meters.items()} 113 | 114 | @torch.no_grad() 115 | def evaluate(data_loader, model, device, amp=True, choices=None, mode='super', retrain_config=None): 116 | criterion = torch.nn.CrossEntropyLoss() 117 | 118 | metric_logger = utils.MetricLogger(delimiter=" ") 119 | header = 'Test:' 120 | 121 | # switch to evaluation mode 122 | model.eval() 123 | if mode == 'super': 124 | config = sample_configs(choices=choices) 125 | model_module = unwrap_model(model) 126 | model_module.set_sample_config(config=config) 127 | else: 128 | config = retrain_config 129 | model_module = unwrap_model(model) 130 | model_module.set_sample_config(config=config) 131 | 132 | 133 | print("sampled model config: {}".format(config)) 134 | parameters = model_module.get_sampled_params_numel(config) 135 | print("sampled model parameters: {}".format(parameters)) 136 | 137 | for images, target in metric_logger.log_every(data_loader, 10, header): 138 | images = images.to(device, non_blocking=True) 139 | target = target.to(device, non_blocking=True) 140 | # compute output 141 | if amp: 142 | with torch.cuda.amp.autocast(): 143 | output = model(images) 144 | loss = criterion(output, target) 145 | else: 146 | output = model(images) 147 | loss = criterion(output, target) 148 | 149 | acc1, acc5 = accuracy(output, target, topk=(1, 5)) 150 | 151 | batch_size = images.shape[0] 152 | metric_logger.update(loss=loss.item()) 153 | metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) 154 | metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) 155 | # gather the stats from all processes 156 | metric_logger.synchronize_between_processes() 157 | print('* Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f} loss {losses.global_avg:.3f}' 158 | .format(top1=metric_logger.acc1, top5=metric_logger.acc5, losses=metric_logger.loss)) 159 | 160 | return {k: meter.global_avg for k, meter in metric_logger.meters.items()} -------------------------------------------------------------------------------- /AutoFormer_original_greedy/supernet_engine_real_original.py: -------------------------------------------------------------------------------- 1 | import math 2 | import sys 3 | from typing import Iterable, Optional 4 | from timm.utils.model import unwrap_model 5 | import torch 6 | 7 | from timm.data import Mixup 8 | from timm.utils import accuracy, ModelEma 9 | from lib import utils 10 | import random 11 | import time 12 | 13 | def sample_configs(choices): 14 | 15 | config = {} 16 | dimensions = ['mlp_ratio', 'num_heads'] 17 | depth = random.choice(choices['depth']) 18 | for dimension in dimensions: 19 | config[dimension] = [random.choice(choices[dimension]) for _ in range(depth)] 20 | 21 | config['embed_dim'] = [random.choice(choices['embed_dim'])]*depth 22 | 23 | config['layer_num'] = depth 24 | return config 25 | 26 | def train_one_epoch_original(model: torch.nn.Module, criterion: torch.nn.Module, 27 | data_loader: Iterable, optimizer: torch.optim.Optimizer, 28 | device: torch.device, epoch: int, loss_scaler, max_norm: float = 0, 29 | model_ema: Optional[ModelEma] = None, mixup_fn: Optional[Mixup] = None, 30 | amp: bool = True, teacher_model: torch.nn.Module = None, 31 | teach_loss: torch.nn.Module = None, choices=None, mode='super', retrain_config=None): 32 | model.train() 33 | criterion.train() 34 | 35 | # set random seed 36 | random.seed(epoch) 37 | 38 | metric_logger = utils.MetricLogger(delimiter=" ") 39 | metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) 40 | header = 'Epoch: [{}]'.format(epoch) 41 | print_freq = 10 42 | if mode == 'retrain': 43 | config = retrain_config 44 | model_module = unwrap_model(model) 45 | print(config) 46 | model_module.set_sample_config(config=config) 47 | print(model_module.get_sampled_params_numel(config)) 48 | 49 | for samples, targets in metric_logger.log_every(data_loader, print_freq, header): 50 | samples = samples.to(device, non_blocking=True) 51 | targets = targets.to(device, non_blocking=True) 52 | 53 | # sample random config 54 | if mode == 'super': 55 | config = sample_configs(choices=choices) 56 | model_module = unwrap_model(model) 57 | model_module.set_sample_config(config=config) 58 | elif mode == 'retrain': 59 | config = retrain_config 60 | model_module = unwrap_model(model) 61 | model_module.set_sample_config(config=config) 62 | if mixup_fn is not None: 63 | samples, targets = mixup_fn(samples, targets) 64 | if amp: 65 | with torch.cuda.amp.autocast(): 66 | if teacher_model: 67 | with torch.no_grad(): 68 | teach_output = teacher_model(samples) 69 | _, teacher_label = teach_output.topk(1, 1, True, True) 70 | outputs = model(samples) 71 | loss = 1/2 * criterion(outputs, targets) + 1/2 * teach_loss(outputs, teacher_label.squeeze()) 72 | else: 73 | outputs = model(samples) 74 | loss = criterion(outputs, targets) 75 | else: 76 | outputs = model(samples) 77 | if teacher_model: 78 | with torch.no_grad(): 79 | teach_output = teacher_model(samples) 80 | _, teacher_label = teach_output.topk(1, 1, True, True) 81 | loss = 1 / 2 * criterion(outputs, targets) + 1 / 2 * teach_loss(outputs, teacher_label.squeeze()) 82 | else: 83 | loss = criterion(outputs, targets) 84 | 85 | loss_value = loss.item() 86 | 87 | if not math.isfinite(loss_value): 88 | print("Loss is {}, stopping training".format(loss_value)) 89 | sys.exit(1) 90 | 91 | optimizer.zero_grad() 92 | 93 | # this attribute is added by timm on one optimizer (adahessian) 94 | if amp: 95 | is_second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order 96 | loss_scaler(loss, optimizer, clip_grad=max_norm, 97 | parameters=model.parameters(), create_graph=is_second_order) 98 | else: 99 | loss.backward() 100 | optimizer.step() 101 | 102 | torch.cuda.synchronize() 103 | if model_ema is not None: 104 | model_ema.update(model) 105 | 106 | metric_logger.update(loss=loss_value) 107 | metric_logger.update(lr=optimizer.param_groups[0]["lr"]) 108 | 109 | # gather the stats from all processes 110 | metric_logger.synchronize_between_processes() 111 | print("Averaged stats:", metric_logger) 112 | return {k: meter.global_avg for k, meter in metric_logger.meters.items()} 113 | 114 | @torch.no_grad() 115 | def evaluate_original(data_loader, model, device, amp=True, choices=None, mode='super', retrain_config=None): 116 | criterion = torch.nn.CrossEntropyLoss() 117 | 118 | metric_logger = utils.MetricLogger(delimiter=" ") 119 | header = 'Test:' 120 | 121 | # switch to evaluation mode 122 | model.eval() 123 | if mode == 'super': 124 | config = sample_configs(choices=choices) 125 | model_module = unwrap_model(model) 126 | model_module.set_sample_config(config=config) 127 | else: 128 | config = retrain_config 129 | model_module = unwrap_model(model) 130 | model_module.set_sample_config(config=config) 131 | 132 | 133 | print("sampled model config: {}".format(config)) 134 | parameters = model_module.get_sampled_params_numel(config) 135 | print("sampled model parameters: {}".format(parameters)) 136 | 137 | for images, target in metric_logger.log_every(data_loader, 10, header): 138 | images = images.to(device, non_blocking=True) 139 | target = target.to(device, non_blocking=True) 140 | # compute output 141 | if amp: 142 | with torch.cuda.amp.autocast(): 143 | output = model(images) 144 | loss = criterion(output, target) 145 | else: 146 | output = model(images) 147 | loss = criterion(output, target) 148 | 149 | acc1, acc5 = accuracy(output, target, topk=(1, 5)) 150 | 151 | batch_size = images.shape[0] 152 | metric_logger.update(loss=loss.item()) 153 | metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) 154 | metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) 155 | # gather the stats from all processes 156 | metric_logger.synchronize_between_processes() 157 | print('* Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f} loss {losses.global_avg:.3f}' 158 | .format(top1=metric_logger.acc1, top5=metric_logger.acc5, losses=metric_logger.loss)) 159 | 160 | return {k: meter.global_avg for k, meter in metric_logger.meters.items()} -------------------------------------------------------------------------------- /AutoFormer_original_greedy/supernet_engine_only_supernet.py: -------------------------------------------------------------------------------- 1 | import math 2 | import sys 3 | from typing import Iterable, Optional 4 | from timm.utils.model import unwrap_model 5 | import torch 6 | 7 | from timm.data import Mixup 8 | from timm.utils import accuracy, ModelEma 9 | from lib import utils 10 | import random 11 | import time 12 | 13 | def sample_configs(choices): 14 | 15 | config = {} 16 | dimensions = ['mlp_ratio', 'num_heads'] 17 | depth = random.choice(choices['depth']) 18 | for dimension in dimensions: 19 | config[dimension] = [random.choice(choices[dimension]) for _ in range(depth)] 20 | 21 | config['embed_dim'] = [random.choice(choices['embed_dim'])]*depth 22 | 23 | config['layer_num'] = depth 24 | return config 25 | 26 | def train_one_epoch_original(model: torch.nn.Module, criterion: torch.nn.Module, 27 | data_loader: Iterable, optimizer: torch.optim.Optimizer, 28 | device: torch.device, epoch: int, loss_scaler, max_norm: float = 0, 29 | model_ema: Optional[ModelEma] = None, mixup_fn: Optional[Mixup] = None, 30 | amp: bool = True, teacher_model: torch.nn.Module = None, 31 | teach_loss: torch.nn.Module = None, choices=None, mode='super', retrain_config=None): 32 | model.train() 33 | criterion.train() 34 | 35 | # set random seed 36 | random.seed(epoch) 37 | 38 | metric_logger = utils.MetricLogger(delimiter=" ") 39 | metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) 40 | header = 'Epoch: [{}]'.format(epoch) 41 | print_freq = 10 42 | if mode == 'retrain': 43 | config = retrain_config 44 | model_module = unwrap_model(model) 45 | print(config) 46 | model_module.set_sample_config(config=config) 47 | print(model_module.get_sampled_params_numel(config)) 48 | 49 | for samples, targets in metric_logger.log_every(data_loader, print_freq, header): 50 | samples = samples.to(device, non_blocking=True) 51 | targets = targets.to(device, non_blocking=True) 52 | 53 | # sample random config 54 | if mode == 'super': 55 | config = { 56 | 'layer_num': 14, 57 | 'mlp_ratio': [4.0] * 14, 58 | 'num_heads': [4] * 14, 59 | 'embed_dim': [240] * 14 60 | } 61 | # config = sample_configs(choices=choices) 62 | model_module = unwrap_model(model) 63 | model_module.set_sample_config(config=config) 64 | elif mode == 'retrain': 65 | config = retrain_config 66 | model_module = unwrap_model(model) 67 | model_module.set_sample_config(config=config) 68 | if mixup_fn is not None: 69 | samples, targets = mixup_fn(samples, targets) 70 | if amp: 71 | with torch.cuda.amp.autocast(): 72 | if teacher_model: 73 | with torch.no_grad(): 74 | teach_output = teacher_model(samples) 75 | _, teacher_label = teach_output.topk(1, 1, True, True) 76 | outputs = model(samples) 77 | loss = 1/2 * criterion(outputs, targets) + 1/2 * teach_loss(outputs, teacher_label.squeeze()) 78 | else: 79 | outputs = model(samples) 80 | loss = criterion(outputs, targets) 81 | else: 82 | outputs = model(samples) 83 | if teacher_model: 84 | with torch.no_grad(): 85 | teach_output = teacher_model(samples) 86 | _, teacher_label = teach_output.topk(1, 1, True, True) 87 | loss = 1 / 2 * criterion(outputs, targets) + 1 / 2 * teach_loss(outputs, teacher_label.squeeze()) 88 | else: 89 | loss = criterion(outputs, targets) 90 | 91 | loss_value = loss.item() 92 | 93 | if not math.isfinite(loss_value): 94 | print("Loss is {}, stopping training".format(loss_value)) 95 | sys.exit(1) 96 | 97 | optimizer.zero_grad() 98 | 99 | # this attribute is added by timm on one optimizer (adahessian) 100 | if amp: 101 | is_second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order 102 | loss_scaler(loss, optimizer, clip_grad=max_norm, 103 | parameters=model.parameters(), create_graph=is_second_order) 104 | else: 105 | loss.backward() 106 | optimizer.step() 107 | 108 | torch.cuda.synchronize() 109 | if model_ema is not None: 110 | model_ema.update(model) 111 | 112 | metric_logger.update(loss=loss_value) 113 | metric_logger.update(lr=optimizer.param_groups[0]["lr"]) 114 | 115 | # gather the stats from all processes 116 | metric_logger.synchronize_between_processes() 117 | print("Averaged stats:", metric_logger) 118 | return {k: meter.global_avg for k, meter in metric_logger.meters.items()} 119 | 120 | @torch.no_grad() 121 | def evaluate_original(data_loader, model, device, amp=True, choices=None, mode='super', retrain_config=None): 122 | criterion = torch.nn.CrossEntropyLoss() 123 | 124 | metric_logger = utils.MetricLogger(delimiter=" ") 125 | header = 'Test:' 126 | 127 | # switch to evaluation mode 128 | model.eval() 129 | if mode == 'super': 130 | config = sample_configs(choices=choices) 131 | model_module = unwrap_model(model) 132 | model_module.set_sample_config(config=config) 133 | else: 134 | config = retrain_config 135 | model_module = unwrap_model(model) 136 | model_module.set_sample_config(config=config) 137 | 138 | 139 | print("sampled model config: {}".format(config)) 140 | parameters = model_module.get_sampled_params_numel(config) 141 | print("sampled model parameters: {}".format(parameters)) 142 | 143 | for images, target in metric_logger.log_every(data_loader, 10, header): 144 | images = images.to(device, non_blocking=True) 145 | target = target.to(device, non_blocking=True) 146 | # compute output 147 | if amp: 148 | with torch.cuda.amp.autocast(): 149 | output = model(images) 150 | loss = criterion(output, target) 151 | else: 152 | output = model(images) 153 | loss = criterion(output, target) 154 | 155 | acc1, acc5 = accuracy(output, target, topk=(1, 5)) 156 | 157 | batch_size = images.shape[0] 158 | metric_logger.update(loss=loss.item()) 159 | metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) 160 | metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) 161 | # gather the stats from all processes 162 | metric_logger.synchronize_between_processes() 163 | print('* Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f} loss {losses.global_avg:.3f}' 164 | .format(top1=metric_logger.acc1, top5=metric_logger.acc5, losses=metric_logger.loss)) 165 | 166 | return {k: meter.global_avg for k, meter in metric_logger.meters.items()} -------------------------------------------------------------------------------- /AutoFormer/lib/utils.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import time 4 | from collections import defaultdict, deque 5 | import datetime 6 | 7 | import torch 8 | import torch.distributed as dist 9 | 10 | 11 | class SmoothedValue(object): 12 | """Track a series of values and provide access to smoothed values over a 13 | window or the global series average. 14 | """ 15 | 16 | def __init__(self, window_size=20, fmt=None): 17 | if fmt is None: 18 | fmt = "{median:.4f} ({global_avg:.4f})" 19 | self.deque = deque(maxlen=window_size) 20 | self.total = 0.0 21 | self.count = 0 22 | self.fmt = fmt 23 | 24 | def update(self, value, n=1): 25 | self.deque.append(value) 26 | self.count += n 27 | self.total += value * n 28 | 29 | def synchronize_between_processes(self): 30 | """ 31 | Warning: does not synchronize the deque! 32 | """ 33 | if not is_dist_avail_and_initialized(): 34 | return 35 | t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') 36 | dist.barrier() 37 | dist.all_reduce(t) 38 | t = t.tolist() 39 | self.count = int(t[0]) 40 | self.total = t[1] 41 | 42 | @property 43 | def median(self): 44 | d = torch.tensor(list(self.deque)) 45 | return d.median().item() 46 | 47 | @property 48 | def avg(self): 49 | d = torch.tensor(list(self.deque), dtype=torch.float32) 50 | return d.mean().item() 51 | 52 | @property 53 | def global_avg(self): 54 | return self.total / self.count 55 | 56 | @property 57 | def max(self): 58 | return max(self.deque) 59 | 60 | @property 61 | def value(self): 62 | return self.deque[-1] 63 | 64 | def __str__(self): 65 | return self.fmt.format( 66 | median=self.median, 67 | avg=self.avg, 68 | global_avg=self.global_avg, 69 | max=self.max, 70 | value=self.value) 71 | 72 | 73 | class MetricLogger(object): 74 | def __init__(self, delimiter="\t"): 75 | self.meters = defaultdict(SmoothedValue) 76 | self.delimiter = delimiter 77 | 78 | def update(self, **kwargs): 79 | for k, v in kwargs.items(): 80 | if isinstance(v, torch.Tensor): 81 | v = v.item() 82 | assert isinstance(v, (float, int)) 83 | self.meters[k].update(v) 84 | 85 | def __getattr__(self, attr): 86 | if attr in self.meters: 87 | return self.meters[attr] 88 | if attr in self.__dict__: 89 | return self.__dict__[attr] 90 | raise AttributeError("'{}' object has no attribute '{}'".format( 91 | type(self).__name__, attr)) 92 | 93 | def __str__(self): 94 | loss_str = [] 95 | for name, meter in self.meters.items(): 96 | loss_str.append( 97 | "{}: {}".format(name, str(meter)) 98 | ) 99 | return self.delimiter.join(loss_str) 100 | 101 | def synchronize_between_processes(self): 102 | for meter in self.meters.values(): 103 | meter.synchronize_between_processes() 104 | 105 | def add_meter(self, name, meter): 106 | self.meters[name] = meter 107 | 108 | def log_every(self, iterable, print_freq, header=None): 109 | i = 0 110 | if not header: 111 | header = '' 112 | start_time = time.time() 113 | end = time.time() 114 | iter_time = SmoothedValue(fmt='{avg:.4f}') 115 | data_time = SmoothedValue(fmt='{avg:.4f}') 116 | space_fmt = ':' + str(len(str(len(iterable)))) + 'd' 117 | log_msg = [ 118 | header, 119 | '[{0' + space_fmt + '}/{1}]', 120 | 'eta: {eta}', 121 | '{meters}', 122 | 'time: {time}', 123 | 'data: {data}' 124 | ] 125 | if torch.cuda.is_available(): 126 | log_msg.append('max mem: {memory:.0f}') 127 | log_msg = self.delimiter.join(log_msg) 128 | MB = 1024.0 * 1024.0 129 | for obj in iterable: 130 | data_time.update(time.time() - end) 131 | yield obj 132 | iter_time.update(time.time() - end) 133 | if i % print_freq == 0 or i == len(iterable) - 1: 134 | eta_seconds = iter_time.global_avg * (len(iterable) - i) 135 | eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) 136 | if torch.cuda.is_available(): 137 | print(log_msg.format( 138 | i, len(iterable), eta=eta_string, 139 | meters=str(self), 140 | time=str(iter_time), data=str(data_time), 141 | memory=torch.cuda.max_memory_allocated() / MB)) 142 | else: 143 | print(log_msg.format( 144 | i, len(iterable), eta=eta_string, 145 | meters=str(self), 146 | time=str(iter_time), data=str(data_time))) 147 | i += 1 148 | end = time.time() 149 | total_time = time.time() - start_time 150 | total_time_str = str(datetime.timedelta(seconds=int(total_time))) 151 | print('{} Total time: {} ({:.4f} s / it)'.format( 152 | header, total_time_str, total_time / len(iterable))) 153 | 154 | 155 | def _load_checkpoint_for_ema(model_ema, checkpoint): 156 | """ 157 | Workaround for ModelEma._load_checkpoint to accept an already-loaded object 158 | """ 159 | mem_file = io.BytesIO() 160 | torch.save(checkpoint, mem_file) 161 | mem_file.seek(0) 162 | model_ema._load_checkpoint(mem_file) 163 | 164 | 165 | def setup_for_distributed(is_master): 166 | """ 167 | This function disables printing when not in master process 168 | """ 169 | import builtins as __builtin__ 170 | builtin_print = __builtin__.print 171 | 172 | def print(*args, **kwargs): 173 | force = kwargs.pop('force', False) 174 | if is_master or force: 175 | builtin_print(*args, **kwargs) 176 | 177 | __builtin__.print = print 178 | 179 | 180 | def is_dist_avail_and_initialized(): 181 | if not dist.is_available(): 182 | return False 183 | if not dist.is_initialized(): 184 | return False 185 | return True 186 | 187 | 188 | def get_world_size(): 189 | if not is_dist_avail_and_initialized(): 190 | return 1 191 | return dist.get_world_size() 192 | 193 | 194 | def get_rank(): 195 | if not is_dist_avail_and_initialized(): 196 | return 0 197 | return dist.get_rank() 198 | 199 | 200 | def is_main_process(): 201 | return get_rank() == 0 202 | 203 | 204 | def save_on_master(*args, **kwargs): 205 | if is_main_process(): 206 | torch.save(*args, **kwargs) 207 | 208 | 209 | def init_distributed_mode(args): 210 | if 'OMPI_COMM_WORLD_RANK' in os.environ: 211 | args.rank = int(os.environ.get('OMPI_COMM_WORLD_RANK')) 212 | args.world_size = int(os.environ.get('OMPI_COMM_WORLD_SIZE')) 213 | args.gpu = args.rank % torch.cuda.device_count() 214 | elif 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: 215 | args.rank = int(os.environ["RANK"]) 216 | args.world_size = int(os.environ['WORLD_SIZE']) 217 | args.gpu = int(os.environ['LOCAL_RANK']) 218 | elif 'SLURM_PROCID' in os.environ: 219 | args.rank = int(os.environ['SLURM_PROCID']) 220 | args.gpu = args.rank % torch.cuda.device_count() 221 | else: 222 | print('Not using distributed mode') 223 | args.distributed = False 224 | return 225 | 226 | args.distributed = True 227 | 228 | torch.cuda.set_device(args.gpu) 229 | args.dist_backend = 'nccl' 230 | print('| distributed init (rank {}): {}'.format( 231 | args.rank, args.dist_url), flush=True) 232 | torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, 233 | world_size=args.world_size, rank=args.rank) 234 | torch.distributed.barrier() 235 | setup_for_distributed(args.rank == 0) 236 | -------------------------------------------------------------------------------- /AutoFormer_original/lib/utils.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import time 4 | from collections import defaultdict, deque 5 | import datetime 6 | 7 | import torch 8 | import torch.distributed as dist 9 | 10 | 11 | class SmoothedValue(object): 12 | """Track a series of values and provide access to smoothed values over a 13 | window or the global series average. 14 | """ 15 | 16 | def __init__(self, window_size=20, fmt=None): 17 | if fmt is None: 18 | fmt = "{median:.4f} ({global_avg:.4f})" 19 | self.deque = deque(maxlen=window_size) 20 | self.total = 0.0 21 | self.count = 0 22 | self.fmt = fmt 23 | 24 | def update(self, value, n=1): 25 | self.deque.append(value) 26 | self.count += n 27 | self.total += value * n 28 | 29 | def synchronize_between_processes(self): 30 | """ 31 | Warning: does not synchronize the deque! 32 | """ 33 | if not is_dist_avail_and_initialized(): 34 | return 35 | t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') 36 | dist.barrier() 37 | dist.all_reduce(t) 38 | t = t.tolist() 39 | self.count = int(t[0]) 40 | self.total = t[1] 41 | 42 | @property 43 | def median(self): 44 | d = torch.tensor(list(self.deque)) 45 | return d.median().item() 46 | 47 | @property 48 | def avg(self): 49 | d = torch.tensor(list(self.deque), dtype=torch.float32) 50 | return d.mean().item() 51 | 52 | @property 53 | def global_avg(self): 54 | return self.total / self.count 55 | 56 | @property 57 | def max(self): 58 | return max(self.deque) 59 | 60 | @property 61 | def value(self): 62 | return self.deque[-1] 63 | 64 | def __str__(self): 65 | return self.fmt.format( 66 | median=self.median, 67 | avg=self.avg, 68 | global_avg=self.global_avg, 69 | max=self.max, 70 | value=self.value) 71 | 72 | 73 | class MetricLogger(object): 74 | def __init__(self, delimiter="\t"): 75 | self.meters = defaultdict(SmoothedValue) 76 | self.delimiter = delimiter 77 | 78 | def update(self, **kwargs): 79 | for k, v in kwargs.items(): 80 | if isinstance(v, torch.Tensor): 81 | v = v.item() 82 | assert isinstance(v, (float, int)) 83 | self.meters[k].update(v) 84 | 85 | def __getattr__(self, attr): 86 | if attr in self.meters: 87 | return self.meters[attr] 88 | if attr in self.__dict__: 89 | return self.__dict__[attr] 90 | raise AttributeError("'{}' object has no attribute '{}'".format( 91 | type(self).__name__, attr)) 92 | 93 | def __str__(self): 94 | loss_str = [] 95 | for name, meter in self.meters.items(): 96 | loss_str.append( 97 | "{}: {}".format(name, str(meter)) 98 | ) 99 | return self.delimiter.join(loss_str) 100 | 101 | def synchronize_between_processes(self): 102 | for meter in self.meters.values(): 103 | meter.synchronize_between_processes() 104 | 105 | def add_meter(self, name, meter): 106 | self.meters[name] = meter 107 | 108 | def log_every(self, iterable, print_freq, header=None): 109 | i = 0 110 | if not header: 111 | header = '' 112 | start_time = time.time() 113 | end = time.time() 114 | iter_time = SmoothedValue(fmt='{avg:.4f}') 115 | data_time = SmoothedValue(fmt='{avg:.4f}') 116 | space_fmt = ':' + str(len(str(len(iterable)))) + 'd' 117 | log_msg = [ 118 | header, 119 | '[{0' + space_fmt + '}/{1}]', 120 | 'eta: {eta}', 121 | '{meters}', 122 | 'time: {time}', 123 | 'data: {data}' 124 | ] 125 | if torch.cuda.is_available(): 126 | log_msg.append('max mem: {memory:.0f}') 127 | log_msg = self.delimiter.join(log_msg) 128 | MB = 1024.0 * 1024.0 129 | for obj in iterable: 130 | data_time.update(time.time() - end) 131 | yield obj 132 | iter_time.update(time.time() - end) 133 | if i % print_freq == 0 or i == len(iterable) - 1: 134 | eta_seconds = iter_time.global_avg * (len(iterable) - i) 135 | eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) 136 | if torch.cuda.is_available(): 137 | print(log_msg.format( 138 | i, len(iterable), eta=eta_string, 139 | meters=str(self), 140 | time=str(iter_time), data=str(data_time), 141 | memory=torch.cuda.max_memory_allocated() / MB)) 142 | else: 143 | print(log_msg.format( 144 | i, len(iterable), eta=eta_string, 145 | meters=str(self), 146 | time=str(iter_time), data=str(data_time))) 147 | i += 1 148 | end = time.time() 149 | total_time = time.time() - start_time 150 | total_time_str = str(datetime.timedelta(seconds=int(total_time))) 151 | print('{} Total time: {} ({:.4f} s / it)'.format( 152 | header, total_time_str, total_time / len(iterable))) 153 | 154 | 155 | def _load_checkpoint_for_ema(model_ema, checkpoint): 156 | """ 157 | Workaround for ModelEma._load_checkpoint to accept an already-loaded object 158 | """ 159 | mem_file = io.BytesIO() 160 | torch.save(checkpoint, mem_file) 161 | mem_file.seek(0) 162 | model_ema._load_checkpoint(mem_file) 163 | 164 | 165 | def setup_for_distributed(is_master): 166 | """ 167 | This function disables printing when not in master process 168 | """ 169 | import builtins as __builtin__ 170 | builtin_print = __builtin__.print 171 | 172 | def print(*args, **kwargs): 173 | force = kwargs.pop('force', False) 174 | if is_master or force: 175 | builtin_print(*args, **kwargs) 176 | 177 | __builtin__.print = print 178 | 179 | 180 | def is_dist_avail_and_initialized(): 181 | if not dist.is_available(): 182 | return False 183 | if not dist.is_initialized(): 184 | return False 185 | return True 186 | 187 | 188 | def get_world_size(): 189 | if not is_dist_avail_and_initialized(): 190 | return 1 191 | return dist.get_world_size() 192 | 193 | 194 | def get_rank(): 195 | if not is_dist_avail_and_initialized(): 196 | return 0 197 | return dist.get_rank() 198 | 199 | 200 | def is_main_process(): 201 | return get_rank() == 0 202 | 203 | 204 | def save_on_master(*args, **kwargs): 205 | if is_main_process(): 206 | torch.save(*args, **kwargs) 207 | 208 | 209 | def init_distributed_mode(args): 210 | if 'OMPI_COMM_WORLD_RANK' in os.environ: 211 | args.rank = int(os.environ.get('OMPI_COMM_WORLD_RANK')) 212 | args.world_size = int(os.environ.get('OMPI_COMM_WORLD_SIZE')) 213 | args.gpu = args.rank % torch.cuda.device_count() 214 | elif 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: 215 | args.rank = int(os.environ["RANK"]) 216 | args.world_size = int(os.environ['WORLD_SIZE']) 217 | args.gpu = int(os.environ['LOCAL_RANK']) 218 | elif 'SLURM_PROCID' in os.environ: 219 | args.rank = int(os.environ['SLURM_PROCID']) 220 | args.gpu = args.rank % torch.cuda.device_count() 221 | else: 222 | print('Not using distributed mode') 223 | args.distributed = False 224 | return 225 | 226 | args.distributed = True 227 | 228 | torch.cuda.set_device(args.gpu) 229 | args.dist_backend = 'nccl' 230 | print('| distributed init (rank {}): {}'.format( 231 | args.rank, args.dist_url), flush=True) 232 | torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, 233 | world_size=args.world_size, rank=args.rank) 234 | torch.distributed.barrier() 235 | setup_for_distributed(args.rank == 0) 236 | --------------------------------------------------------------------------------