├── netaug ├── models │ ├── __init__.py │ ├── base │ │ ├── __init__.py │ │ ├── tiny_mbv2.py │ │ ├── mcunet.py │ │ ├── mbv2.py │ │ ├── proxylessnas.py │ │ ├── mbv3.py │ │ └── layers.py │ └── netaug │ │ ├── __init__.py │ │ ├── tiny_mbv2.py │ │ ├── proxylessnas.py │ │ ├── utils.py │ │ ├── mcunet.py │ │ ├── mbv2.py │ │ └── mbv3.py ├── .gitignore ├── figures │ ├── fig1.png │ ├── fig2.png │ ├── fig3.png │ ├── fig4.png │ ├── fig5.png │ └── fig6.png ├── utils │ ├── __init__.py │ ├── lr_scheduler.py │ ├── profile.py │ ├── metric.py │ ├── criterion.py │ ├── distributed.py │ ├── init.py │ └── misc.py ├── bash │ ├── imagenet │ │ ├── mbv2-0.35.sh │ │ ├── proxylessnas-0.35.sh │ │ ├── tinymbv2.sh │ │ ├── mbv3-0.35.sh │ │ └── mcunet.sh │ ├── eval.sh │ └── transfer │ │ ├── car.sh │ │ ├── pets.sh │ │ ├── cub200.sh │ │ ├── food.sh │ │ └── flowers.sh ├── configs │ ├── default.yaml │ └── netaug.yaml ├── eval.py ├── README.md ├── setup.py └── train.py ├── tinytl ├── tinytl │ ├── __init__.py │ ├── model │ │ ├── __init__.py │ │ ├── network.py │ │ └── modules.py │ ├── utils │ │ ├── __init__.py │ │ ├── common_utils.py │ │ └── memory_cost_profiler.py │ └── data_providers │ │ ├── __init__.py │ │ ├── fgvc_data_providers.py │ │ └── fgvc_run_config.py ├── dataset_setup_scripts │ ├── make_all_datasets.sh │ ├── make_pets.py │ ├── make_food.py │ ├── make_aircraft.py │ ├── make_flowers102.py │ ├── make_stanford_cars.py │ └── make_cub200.py ├── exp_scripts │ ├── batch8 │ │ ├── car.sh │ │ ├── pets.sh │ │ ├── cifar10.sh │ │ ├── cub200.sh │ │ ├── food101.sh │ │ ├── cifar100.sh │ │ ├── flowers.sh │ │ └── aircraft.sh │ └── batch1 │ │ ├── car.sh │ │ ├── aircraft.sh │ │ └── flowers.sh ├── README.md └── tinytl_fgvc_train.py ├── .gitmodules ├── LICENSE ├── .gitignore └── README.md /netaug/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tinytl/tinytl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /netaug/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .idea 3 | .DS_store 4 | -------------------------------------------------------------------------------- /tinytl/tinytl/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .modules import * 2 | from .network import * 3 | -------------------------------------------------------------------------------- /netaug/figures/fig1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/tinyml/HEAD/netaug/figures/fig1.png -------------------------------------------------------------------------------- /netaug/figures/fig2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/tinyml/HEAD/netaug/figures/fig2.png -------------------------------------------------------------------------------- /netaug/figures/fig3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/tinyml/HEAD/netaug/figures/fig3.png -------------------------------------------------------------------------------- /netaug/figures/fig4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/tinyml/HEAD/netaug/figures/fig4.png -------------------------------------------------------------------------------- /netaug/figures/fig5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/tinyml/HEAD/netaug/figures/fig5.png -------------------------------------------------------------------------------- /netaug/figures/fig6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/tinyml/HEAD/netaug/figures/fig6.png -------------------------------------------------------------------------------- /tinytl/tinytl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .common_utils import * 2 | from .memory_cost_profiler import * 3 | -------------------------------------------------------------------------------- /tinytl/tinytl/data_providers/__init__.py: -------------------------------------------------------------------------------- 1 | from .fgvc_data_providers import * 2 | from .fgvc_run_config import * 3 | -------------------------------------------------------------------------------- /netaug/models/base/__init__.py: -------------------------------------------------------------------------------- 1 | from .layers import * 2 | from .mbv2 import * 3 | from .mbv3 import * 4 | from .mcunet import * 5 | from .proxylessnas import * 6 | from .tiny_mbv2 import * 7 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "once-for-all"] 2 | path = once-for-all 3 | url = https://github.com/mit-han-lab/once-for-all 4 | [submodule "mcunet"] 5 | path = mcunet 6 | url = https://github.com/mit-han-lab/mcunet 7 | -------------------------------------------------------------------------------- /netaug/models/netaug/__init__.py: -------------------------------------------------------------------------------- 1 | from .layers import * 2 | from .mbv2 import * 3 | from .mbv3 import * 4 | from .mcunet import * 5 | from .proxylessnas import * 6 | from .tiny_mbv2 import * 7 | from .utils import * 8 | -------------------------------------------------------------------------------- /netaug/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .criterion import * 2 | from .distributed import * 3 | from .init import * 4 | from .lr_scheduler import * 5 | from .metric import * 6 | from .misc import * 7 | from .profile import * 8 | -------------------------------------------------------------------------------- /tinytl/dataset_setup_scripts/make_all_datasets.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python make_food.py 3 | python make_aircraft.py 4 | python make_flowers102.py 5 | python make_stanford_cars.py 6 | python make_cub200.py 7 | python make_pets.py 8 | -------------------------------------------------------------------------------- /netaug/bash/imagenet/mbv2-0.35.sh: -------------------------------------------------------------------------------- 1 | torchpack dist-run -np 16 -H $server1:8,$server2:8 \ 2 | python train.py configs/netaug.yaml \ 3 | --data_provider "{data_path:/dataset/imagenet}" \ 4 | --netaug "{aug_expand_list:[1.0,1.6,2.2,2.8],aug_width_mult_list:[1.0,1.8,2.6]}" \ 5 | --path 6 | -------------------------------------------------------------------------------- /netaug/bash/imagenet/proxylessnas-0.35.sh: -------------------------------------------------------------------------------- 1 | torchpack dist-run -np 16 -H $server1:8,$server2:8 \ 2 | python train.py configs/netaug.yaml \ 3 | --data_provider "{data_path:/dataset/imagenet}" \ 4 | --model "{name:proxylessnas-0.35}" \ 5 | --netaug "{aug_expand_list:[1.0,1.6,2.2,2.8],aug_width_mult_list:[1.0,1.8,2.6]}" \ 6 | --path 7 | -------------------------------------------------------------------------------- /netaug/bash/imagenet/tinymbv2.sh: -------------------------------------------------------------------------------- 1 | torchpack dist-run -np 16 -H $server1:8,$server2:8 \ 2 | python train.py configs/netaug.yaml \ 3 | --data_provider "{data_path:/dataset/imagenet,image_size:144}" \ 4 | --model "{name:tinymbv2}" \ 5 | --netaug "{aug_expand_list:[1.0,1.6,2.2,2.8],aug_width_mult_list:[1.0,1.8,2.6]}" \ 6 | --path 7 | -------------------------------------------------------------------------------- /tinytl/exp_scripts/batch8/car.sh: -------------------------------------------------------------------------------- 1 | python tinytl_fgvc_train.py --transfer_learning_method tinytl-lite_residual+bias \ 2 | --train_batch_size 8 --test_batch_size 100 \ 3 | --n_epochs 50 --init_lr 8e-4 --opt_type adam \ 4 | --label_smoothing 0.7 --distort_color torch --frozen_param_bits 8 \ 5 | --gpu 0 --dataset car --path .exp/batch8/car -------------------------------------------------------------------------------- /tinytl/exp_scripts/batch1/car.sh: -------------------------------------------------------------------------------- 1 | python tinytl_fgvc_train.py --transfer_learning_method tinytl-lite_residual+bias \ 2 | --train_batch_size 1 --test_batch_size 100 \ 3 | --n_epochs 50 --init_lr 2e-4 --opt_type adam \ 4 | --label_smoothing 0.7 --distort_color torch --frozen_param_bits 8 \ 5 | --gpu 0 --dataset car --path .exp/batch1/car 6 | -------------------------------------------------------------------------------- /tinytl/exp_scripts/batch8/pets.sh: -------------------------------------------------------------------------------- 1 | python tinytl_fgvc_train.py --transfer_learning_method tinytl-lite_residual+bias \ 2 | --train_batch_size 8 --test_batch_size 100 \ 3 | --n_epochs 50 --init_lr 0.5e-4 --opt_type adam \ 4 | --label_smoothing 0.7 --distort_color None --frozen_param_bits 8 \ 5 | --gpu 0 --dataset pets --path .exp/batch8/pets -------------------------------------------------------------------------------- /tinytl/exp_scripts/batch8/cifar10.sh: -------------------------------------------------------------------------------- 1 | python tinytl_fgvc_train.py --transfer_learning_method tinytl-lite_residual+bias \ 2 | --train_batch_size 8 --test_batch_size 100 \ 3 | --n_epochs 50 --init_lr 3e-4 --opt_type adam \ 4 | --label_smoothing 0.1 --distort_color None --frozen_param_bits 8 \ 5 | --gpu 0 --dataset cifar10 --path .exp/batch8/cifar10 -------------------------------------------------------------------------------- /tinytl/exp_scripts/batch8/cub200.sh: -------------------------------------------------------------------------------- 1 | python tinytl_fgvc_train.py --transfer_learning_method tinytl-lite_residual+bias \ 2 | --train_batch_size 8 --test_batch_size 100 \ 3 | --n_epochs 50 --init_lr 6e-4 --opt_type adam \ 4 | --label_smoothing 0.7 --distort_color None --frozen_param_bits 8 \ 5 | --gpu 0 --dataset cub200 --path .exp/batch8/cub200 -------------------------------------------------------------------------------- /tinytl/exp_scripts/batch8/food101.sh: -------------------------------------------------------------------------------- 1 | python tinytl_fgvc_train.py --transfer_learning_method tinytl-lite_residual+bias \ 2 | --train_batch_size 8 --test_batch_size 100 \ 3 | --n_epochs 50 --init_lr 2e-4 --opt_type adam \ 4 | --label_smoothing 0.1 --distort_color None --frozen_param_bits 8 \ 5 | --gpu 0 --dataset food101 --path .exp/batch8/food101 -------------------------------------------------------------------------------- /tinytl/exp_scripts/batch8/cifar100.sh: -------------------------------------------------------------------------------- 1 | python tinytl_fgvc_train.py --transfer_learning_method tinytl-lite_residual+bias \ 2 | --train_batch_size 8 --test_batch_size 100 \ 3 | --n_epochs 50 --init_lr 3e-4 --opt_type adam \ 4 | --label_smoothing 0.1 --distort_color None --frozen_param_bits 8 \ 5 | --gpu 0 --dataset cifar100 --path .exp/batch8/cifar100 -------------------------------------------------------------------------------- /tinytl/exp_scripts/batch8/flowers.sh: -------------------------------------------------------------------------------- 1 | python tinytl_fgvc_train.py --transfer_learning_method tinytl-lite_residual+bias \ 2 | --train_batch_size 8 --test_batch_size 100 \ 3 | --n_epochs 50 --init_lr 2e-4 --opt_type adam \ 4 | --label_smoothing 0.7 --distort_color None --frozen_param_bits 8 \ 5 | --gpu 0 --dataset flowers102 --path .exp/batch8/flowers102 -------------------------------------------------------------------------------- /tinytl/exp_scripts/batch1/aircraft.sh: -------------------------------------------------------------------------------- 1 | python tinytl_fgvc_train.py --transfer_learning_method tinytl-lite_residual+bias \ 2 | --train_batch_size 1 --test_batch_size 100 \ 3 | --n_epochs 50 --init_lr 3e-4 --opt_type adam \ 4 | --label_smoothing 0.7 --distort_color torch --frozen_param_bits 8 \ 5 | --gpu 0 --dataset aircraft --path .exp/batch1/aircraft 6 | -------------------------------------------------------------------------------- /tinytl/exp_scripts/batch1/flowers.sh: -------------------------------------------------------------------------------- 1 | python tinytl_fgvc_train.py --transfer_learning_method tinytl-lite_residual+bias \ 2 | --train_batch_size 1 --test_batch_size 100 \ 3 | --n_epochs 50 --init_lr 3e-4 --opt_type adam \ 4 | --label_smoothing 0.7 --distort_color None --frozen_param_bits 8 \ 5 | --gpu 0 --dataset flowers102 --path .exp/batch1/flowers102 6 | -------------------------------------------------------------------------------- /tinytl/exp_scripts/batch8/aircraft.sh: -------------------------------------------------------------------------------- 1 | python tinytl_fgvc_train.py --transfer_learning_method tinytl-lite_residual+bias \ 2 | --train_batch_size 8 --test_batch_size 100 \ 3 | --n_epochs 50 --init_lr 16e-4 --opt_type adam \ 4 | --label_smoothing 0.7 --distort_color torch --frozen_param_bits 8 \ 5 | --gpu 0 --dataset aircraft --path .exp/batch8/aircraft 6 | -------------------------------------------------------------------------------- /netaug/bash/imagenet/mbv3-0.35.sh: -------------------------------------------------------------------------------- 1 | torchpack dist-run -np 16 -H $server1:8,$server2:8 \ 2 | python train.py configs/netaug.yaml \ 3 | --data_provider "{data_path:/dataset/imagenet}" \ 4 | --model "{name:mbv3-0.35}" \ 5 | --run_config "{weight_decay:3.0e-5,base_lr:0.1}" \ 6 | --netaug "{aug_expand_list:[1.0,1.6,2.2,2.8],aug_width_mult_list:[1.0,2.0,3.0]}" \ 7 | --path 8 | -------------------------------------------------------------------------------- /netaug/configs/default.yaml: -------------------------------------------------------------------------------- 1 | data_provider: 2 | dataset: imagenet 3 | image_size: 160 4 | base_batch_size: 128 5 | n_worker: 8 6 | data_path: null 7 | 8 | model: 9 | name: mbv2-0.35 10 | dropout_rate: 0 11 | 12 | run_config: 13 | n_epochs: 150 14 | weight_decay: 4.0e-5 15 | base_lr: 0.025 16 | warmup_epochs: 5 17 | label_smoothing: 0.1 18 | init_type: he_fout 19 | -------------------------------------------------------------------------------- /netaug/bash/imagenet/mcunet.sh: -------------------------------------------------------------------------------- 1 | torchpack dist-run -np 32 -H $server1:8,$server2:8,$server3:8,$server4:8 \ 2 | python train.py configs/netaug.yaml \ 3 | --data_provider "{data_path:/dataset/imagenet,image_size:176,base_batch_size:64}" \ 4 | --run_config "{base_lr:0.0125}" \ 5 | --model "{name:mcunet}" \ 6 | --netaug "{aug_expand_list:[1.0,1.6,2.2,2.8],aug_width_mult_list:[1.0,1.6,2.2]}" \ 7 | --path 8 | -------------------------------------------------------------------------------- /netaug/configs/netaug.yaml: -------------------------------------------------------------------------------- 1 | data_provider: 2 | dataset: imagenet 3 | image_size: 160 4 | base_batch_size: 128 5 | n_worker: 8 6 | data_path: null 7 | 8 | model: 9 | name: mbv2-0.35 10 | dropout_rate: 0 11 | 12 | run_config: 13 | n_epochs: 150 14 | weight_decay: 4.0e-5 15 | base_lr: 0.025 16 | warmup_epochs: 5 17 | label_smoothing: 0.1 18 | init_type: kaiming_uniform 19 | 20 | netaug: 21 | aug_expand_list: [1.0] 22 | aug_width_mult_list: [1.0] 23 | stop_aug_w_epoch: 75 24 | stop_aug_e_epoch: 0 25 | stop_netaug_epoch: 0 26 | sort_channel: true 27 | sync: false 28 | -------------------------------------------------------------------------------- /netaug/models/netaug/tiny_mbv2.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from models.base.tiny_mbv2 import TinyMobileNetV2 4 | 5 | from .mbv2 import NetAugMobileNetV2 6 | 7 | __all__ = ["NetAugTinyMobileNetV2"] 8 | 9 | 10 | class NetAugTinyMobileNetV2(NetAugMobileNetV2): 11 | def __init__( 12 | self, 13 | base_net: TinyMobileNetV2, 14 | aug_expand_list: List[float], 15 | aug_width_mult_list: List[float], 16 | n_classes: int, 17 | dropout_rate=0.0, 18 | ): 19 | super(NetAugTinyMobileNetV2, self).__init__( 20 | base_net, 21 | aug_expand_list, 22 | aug_width_mult_list, 23 | n_classes, 24 | dropout_rate, 25 | ) 26 | -------------------------------------------------------------------------------- /netaug/models/netaug/proxylessnas.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from models.base.proxylessnas import ProxylessNASMobile 4 | 5 | from .mbv2 import NetAugMobileNetV2 6 | 7 | __all__ = ["NetAugProxylessNASMobile"] 8 | 9 | 10 | class NetAugProxylessNASMobile(NetAugMobileNetV2): 11 | def __init__( 12 | self, 13 | base_net: ProxylessNASMobile, 14 | aug_expand_list: List[float], 15 | aug_width_mult_list: List[float], 16 | n_classes: int, 17 | dropout_rate=0.0, 18 | ): 19 | super(NetAugProxylessNASMobile, self).__init__( 20 | base_net, 21 | aug_expand_list, 22 | aug_width_mult_list, 23 | n_classes, 24 | dropout_rate, 25 | ) 26 | -------------------------------------------------------------------------------- /netaug/bash/eval.sh: -------------------------------------------------------------------------------- 1 | torchpack dist-run -np 8 python eval.py \ 2 | --dataset imagenet --data_path /dataset/imagenet/ \ 3 | --image_size 160 \ 4 | --model mbv2-0.35 \ 5 | --init_from 6 | 7 | torchpack dist-run -np 8 python eval.py \ 8 | --dataset imagenet --data_path /dataset/imagenet/ \ 9 | --image_size 160 \ 10 | --model mbv3-0.35 \ 11 | --init_from 12 | 13 | torchpack dist-run -np 8 python eval.py \ 14 | --dataset imagenet --data_path /dataset/imagenet/ \ 15 | --image_size 160 \ 16 | --model proxylessnas-0.35 \ 17 | --init_from 18 | 19 | torchpack dist-run -np 8 python eval.py \ 20 | --dataset imagenet --data_path /dataset/imagenet/ \ 21 | --image_size 176 \ 22 | --model mcunet \ 23 | --init_from 24 | 25 | torchpack dist-run -np 8 python eval.py \ 26 | --dataset imagenet --data_path /dataset/imagenet/ \ 27 | --image_size 144 \ 28 | --model tinymbv2 \ 29 | --init_from 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Han Cai 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /netaug/models/base/tiny_mbv2.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from models.base.layers import ( 4 | ConvLayer, 5 | InvertedBlock, 6 | LinearLayer, 7 | OpSequential, 8 | ResidualBlock, 9 | ) 10 | from models.base.mbv2 import MobileNetV2 11 | 12 | 13 | class TinyMobileNetV2(MobileNetV2): 14 | def __init__(self, channel_divisor=8, n_classes=1000, dropout_rate=0): 15 | super(TinyMobileNetV2, self).__init__( 16 | 0.35, channel_divisor, n_classes, dropout_rate 17 | ) 18 | 19 | self.head = OpSequential( 20 | [ 21 | ResidualBlock( 22 | InvertedBlock( 23 | 56, 24 | 112, 25 | 3, 26 | expand_ratio=6, 27 | act_func=("relu6", "relu6", None), 28 | ), 29 | shortcut=None, 30 | ), 31 | ConvLayer(112, 448, 1, act_func="relu6"), 32 | nn.AdaptiveAvgPool2d(1), 33 | LinearLayer(448, n_classes, dropout_rate=dropout_rate), 34 | ] 35 | ) 36 | -------------------------------------------------------------------------------- /tinytl/tinytl/model/network.py: -------------------------------------------------------------------------------- 1 | from ofa.utils.layers import ResidualBlock 2 | from ofa.imagenet_classification.networks import ProxylessNASNets 3 | from .modules import my_set_layer_from_config 4 | 5 | __all__ = ['build_residual_block_from_config', 'build_network_from_config'] 6 | 7 | 8 | def build_residual_block_from_config(config): 9 | conv_config = config['conv'] if 'conv' in config else config['mobile_inverted_conv'] 10 | conv = my_set_layer_from_config(conv_config) 11 | shortcut = my_set_layer_from_config(config['shortcut']) 12 | return ResidualBlock(conv, shortcut) 13 | 14 | 15 | def build_network_from_config(config): 16 | first_conv = my_set_layer_from_config(config['first_conv']) 17 | feature_mix_layer = my_set_layer_from_config(config['feature_mix_layer']) 18 | classifier = my_set_layer_from_config(config['classifier']) 19 | 20 | blocks = [] 21 | for block_config in config['blocks']: 22 | blocks.append(build_residual_block_from_config(block_config)) 23 | 24 | net = ProxylessNASNets(first_conv, blocks, feature_mix_layer, classifier) 25 | if 'bn' in config: 26 | net.set_bn_param(**config['bn']) 27 | else: 28 | net.set_bn_param(momentum=0.1, eps=1e-3) 29 | 30 | return net 31 | -------------------------------------------------------------------------------- /netaug/utils/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import List 3 | 4 | import torch 5 | from torch.optim import Optimizer 6 | 7 | __all__ = ["CosineLRwithWarmup"] 8 | 9 | 10 | class CosineLRwithWarmup(torch.optim.lr_scheduler._LRScheduler): 11 | def __init__( 12 | self, 13 | optimizer: Optimizer, 14 | warmup_steps: int, 15 | warmup_lr: float, 16 | decay_steps: int, 17 | last_epoch: int = -1, 18 | ) -> None: 19 | self.warmup_steps = warmup_steps 20 | self.warmup_lr = warmup_lr 21 | self.decay_steps = decay_steps 22 | super().__init__(optimizer, last_epoch) 23 | 24 | def get_lr(self) -> List[float]: 25 | if self.last_epoch < self.warmup_steps: 26 | return [ 27 | (base_lr - self.warmup_lr) * self.last_epoch / self.warmup_steps 28 | + self.warmup_lr 29 | for base_lr in self.base_lrs 30 | ] 31 | else: 32 | current_steps = self.last_epoch - self.warmup_steps 33 | return [ 34 | 0.5 35 | * base_lr 36 | * (1 + math.cos(math.pi * current_steps / self.decay_steps)) 37 | for base_lr in self.base_lrs 38 | ] 39 | -------------------------------------------------------------------------------- /netaug/utils/profile.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Tuple 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torchprofile import profile_macs 6 | 7 | __all__ = ["is_parallel", "get_module_device", "trainable_param_num", "inference_macs"] 8 | 9 | 10 | def is_parallel(model: nn.Module) -> bool: 11 | return isinstance( 12 | model, (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) 13 | ) 14 | 15 | 16 | def get_module_device(module: nn.Module) -> torch.device: 17 | return module.parameters().__next__().device 18 | 19 | 20 | def trainable_param_num(network: nn.Module, unit=1e6) -> float: 21 | return sum(p.numel() for p in network.parameters() if p.requires_grad) / unit 22 | 23 | 24 | def inference_macs( 25 | network: nn.Module, 26 | args: Tuple = (), 27 | data_shape: Optional[Tuple] = None, 28 | unit: float = 1e6, 29 | ) -> float: 30 | if is_parallel(network): 31 | network = network.module 32 | if data_shape is not None: 33 | if len(args) > 0: 34 | raise ValueError("Please provide either data_shape or args tuple.") 35 | args = (torch.zeros(data_shape, device=get_module_device(network)),) 36 | is_training = network.training 37 | network.eval() 38 | macs = profile_macs(network, args=args) / unit 39 | network.train(is_training) 40 | return macs 41 | -------------------------------------------------------------------------------- /netaug/utils/metric.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union 2 | 3 | import numpy as np 4 | import torch 5 | 6 | __all__ = ["accuracy", "AverageMeter"] 7 | 8 | 9 | def accuracy( 10 | output: torch.Tensor, target: torch.Tensor, topk=(1,) 11 | ) -> List[torch.Tensor]: 12 | """Computes the precision@k for the specified values of k.""" 13 | maxk = max(topk) 14 | batch_size = target.shape[0] 15 | 16 | _, pred = output.topk(maxk, 1, True, True) 17 | pred = pred.t() 18 | correct = pred.eq(target.reshape(1, -1).expand_as(pred)) 19 | 20 | res = [] 21 | for k in topk: 22 | correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) 23 | res.append(correct_k.mul_(100.0 / batch_size)) 24 | return res 25 | 26 | 27 | class AverageMeter(object): 28 | """Computes and stores the average and current value. 29 | 30 | Copied from: https://github.com/pytorch/examples/blob/master/imagenet/main.py 31 | """ 32 | 33 | def __init__(self): 34 | self.val = 0 35 | self.avg = 0 36 | self.sum = 0 37 | self.count = 0 38 | 39 | def reset(self): 40 | self.val = 0 41 | self.avg = 0 42 | self.sum = 0 43 | self.count = 0 44 | 45 | def update(self, val: Union[torch.Tensor, np.ndarray, float, int], n=1): 46 | self.val = val 47 | self.sum += val * n 48 | self.count += n 49 | self.avg = self.sum / self.count 50 | -------------------------------------------------------------------------------- /netaug/utils/criterion.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | __all__ = ["label_smooth", "CrossEntropyWithSoftTarget", "CrossEntropyWithLabelSmooth"] 5 | 6 | 7 | def label_smooth( 8 | target: torch.Tensor, n_classes: int, smooth_factor=0.1 9 | ) -> torch.Tensor: 10 | # convert to one-hot 11 | batch_size = target.shape[0] 12 | target = torch.unsqueeze(target, 1) 13 | soft_target = torch.zeros((batch_size, n_classes), device=target.device) 14 | soft_target.scatter_(1, target, 1) 15 | # label smoothing 16 | soft_target = torch.add( 17 | soft_target * (1 - smooth_factor), smooth_factor / n_classes 18 | ) 19 | return soft_target 20 | 21 | 22 | class CrossEntropyWithSoftTarget: 23 | @staticmethod 24 | def get_loss(pred: torch.Tensor, soft_target: torch.Tensor) -> torch.Tensor: 25 | return torch.mean( 26 | torch.sum(-soft_target * F.log_softmax(pred, dim=-1, _stacklevel=5), 1) 27 | ) 28 | 29 | def __call__(self, pred: torch.Tensor, soft_target: torch.Tensor) -> torch.Tensor: 30 | return self.get_loss(pred, soft_target) 31 | 32 | 33 | class CrossEntropyWithLabelSmooth: 34 | def __init__(self, smooth_ratio=0.1): 35 | super(CrossEntropyWithLabelSmooth, self).__init__() 36 | self.smooth_ratio = smooth_ratio 37 | 38 | def __call__(self, pred: torch.Tensor, target: torch.Tensor) -> torch.Tensor: 39 | soft_target = label_smooth(target, pred.shape[1], self.smooth_ratio) 40 | return CrossEntropyWithSoftTarget.get_loss(pred, soft_target) 41 | -------------------------------------------------------------------------------- /netaug/bash/transfer/car.sh: -------------------------------------------------------------------------------- 1 | # mbv2-0.35 2 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 3 | --data_provider "{dataset:car,base_batch_size:64}" \ 4 | --model "{dropout_rate:0.5}" \ 5 | --run_config "{n_epochs:50,base_lr:0.18}" \ 6 | --init_from \ 7 | --path 8 | 9 | # mcunet 10 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 11 | --data_provider "{dataset:car,base_batch_size:64,image_size:176}" \ 12 | --model "{dropout_rate:0.1,name:mcunet}" \ 13 | --run_config "{n_epochs:50,base_lr:0.25}" \ 14 | --init_from \ 15 | --path 16 | 17 | # proxylessnas-0.35 18 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 19 | --data_provider "{dataset:car,base_batch_size:64}" \ 20 | --model "{dropout_rate:0.2,name:proxylessnas-0.35}" \ 21 | --run_config "{n_epochs:50,base_lr:0.20}" \ 22 | --init_from \ 23 | --path 24 | 25 | # mbv3-0.35 26 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 27 | --data_provider "{dataset:car,base_batch_size:64}" \ 28 | --model "{dropout_rate:0.7,name:mbv3-0.35}" \ 29 | --run_config "{n_epochs:50,base_lr:0.10,weight_decay:3.0e-5}" \ 30 | --init_from \ 31 | --path 32 | 33 | # tinymbv2 34 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 35 | --data_provider "{dataset:car,base_batch_size:64,image_size:144}" \ 36 | --model "{dropout_rate:0.2,name:tinymbv2}" \ 37 | --run_config "{n_epochs:50,base_lr:0.14}" \ 38 | --init_from \ 39 | --path 40 | -------------------------------------------------------------------------------- /netaug/bash/transfer/pets.sh: -------------------------------------------------------------------------------- 1 | # mbv2-0.35 2 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 3 | --data_provider "{dataset:pets,base_batch_size:64}" \ 4 | --model "{dropout_rate:0.4}" \ 5 | --run_config "{n_epochs:50,base_lr:0.01}" \ 6 | --init_from \ 7 | --path 8 | 9 | # mcunet 10 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 11 | --data_provider "{dataset:pets,base_batch_size:64,image_size:176}" \ 12 | --model "{dropout_rate:0.6,name:mcunet}" \ 13 | --run_config "{n_epochs:50,base_lr:0.01}" \ 14 | --init_from \ 15 | --path 16 | 17 | # proxylessnas-0.35 18 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 19 | --data_provider "{dataset:pets,base_batch_size:64}" \ 20 | --model "{dropout_rate:0.8,name:proxylessnas-0.35}" \ 21 | --run_config "{n_epochs:50,base_lr:0.01}" \ 22 | --init_from \ 23 | --path 24 | 25 | # mbv3-0.35 26 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 27 | --data_provider "{dataset:pets,base_batch_size:64}" \ 28 | --model "{dropout_rate:0.8,name:mbv3-0.35}" \ 29 | --run_config "{n_epochs:50,base_lr:0.01,weight_decay:3.0e-5}" \ 30 | --init_from \ 31 | --path 32 | 33 | # tinymbv2 34 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 35 | --data_provider "{dataset:pets,base_batch_size:64,image_size:144}" \ 36 | --model "{dropout_rate:0.1,name:tinymbv2}" \ 37 | --run_config "{n_epochs:50,base_lr:0.06}" \ 38 | --init_from \ 39 | --path 40 | -------------------------------------------------------------------------------- /netaug/bash/transfer/cub200.sh: -------------------------------------------------------------------------------- 1 | # mbv2-0.35 2 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 3 | --data_provider "{dataset:cub200,base_batch_size:64}" \ 4 | --model "{dropout_rate:0.7}" \ 5 | --run_config "{n_epochs:50,base_lr:0.02}" \ 6 | --init_from \ 7 | --path 8 | 9 | # mcunet 10 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 11 | --data_provider "{dataset:cub200,base_batch_size:64,image_size:176}" \ 12 | --model "{dropout_rate:0.6,name:mcunet}" \ 13 | --run_config "{n_epochs:50,base_lr:0.08}" \ 14 | --init_from \ 15 | --path 16 | 17 | # proxylessnas-0.35 18 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 19 | --data_provider "{dataset:cub200,base_batch_size:64}" \ 20 | --model "{dropout_rate:0.7,name:proxylessnas-0.35}" \ 21 | --run_config "{n_epochs:50,base_lr:0.04}" \ 22 | --init_from \ 23 | --path 24 | 25 | # mbv3-0.35 26 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 27 | --data_provider "{dataset:cub200,base_batch_size:64}" \ 28 | --model "{dropout_rate:0.8,name:mbv3-0.35}" \ 29 | --run_config "{n_epochs:50,base_lr:0.02,weight_decay:3.0e-5}" \ 30 | --init_from \ 31 | --path 32 | 33 | # tinymbv2 34 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 35 | --data_provider "{dataset:cub200,base_batch_size:64,image_size:144}" \ 36 | --model "{dropout_rate:0.5,name:tinymbv2}" \ 37 | --run_config "{n_epochs:50,base_lr:0.04}" \ 38 | --init_from \ 39 | --path 40 | -------------------------------------------------------------------------------- /netaug/bash/transfer/food.sh: -------------------------------------------------------------------------------- 1 | # mbv2-0.35 2 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 3 | --data_provider "{dataset:food101,base_batch_size:64}" \ 4 | --model "{dropout_rate:0.2}" \ 5 | --run_config "{n_epochs:50,base_lr:0.10}" \ 6 | --init_from \ 7 | --path 8 | 9 | # mcunet 10 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 11 | --data_provider "{dataset:food101,base_batch_size:64,image_size:176}" \ 12 | --model "{dropout_rate:0.1,name:mcunet}" \ 13 | --run_config "{n_epochs:50,base_lr:0.04}" \ 14 | --init_from \ 15 | --path 16 | 17 | # proxylessnas-0.35 18 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 19 | --data_provider "{dataset:food101,base_batch_size:64}" \ 20 | --model "{dropout_rate:0.2,name:proxylessnas-0.35}" \ 21 | --run_config "{n_epochs:50,base_lr:0.10}" \ 22 | --init_from \ 23 | --path 24 | 25 | # mbv3-0.35 26 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 27 | --data_provider "{dataset:food101,base_batch_size:64}" \ 28 | --model "{dropout_rate:0.2,name:mbv3-0.35}" \ 29 | --run_config "{n_epochs:50,base_lr:0.20,weight_decay:3.0e-5}" \ 30 | --init_from \ 31 | --path 32 | 33 | # tinymbv2 34 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 35 | --data_provider "{dataset:food101,base_batch_size:64,image_size:144}" \ 36 | --model "{dropout_rate:0.1,name:tinymbv2}" \ 37 | --run_config "{n_epochs:50,base_lr:0.06}" \ 38 | --init_from \ 39 | --path 40 | -------------------------------------------------------------------------------- /netaug/bash/transfer/flowers.sh: -------------------------------------------------------------------------------- 1 | # mbv2-0.35 2 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 3 | --data_provider "{dataset:flowers102,base_batch_size:64}" \ 4 | --model "{dropout_rate:0.3}" \ 5 | --run_config "{n_epochs:50,base_lr:0.08}" \ 6 | --init_from \ 7 | --path 8 | 9 | # mcunet 10 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 11 | --data_provider "{dataset:flowers102,base_batch_size:64,image_size:176}" \ 12 | --model "{dropout_rate:0.4,name:mcunet}" \ 13 | --run_config "{n_epochs:50,base_lr:0.05}" \ 14 | --init_from \ 15 | --path 16 | 17 | # proxylessnas-0.35 18 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 19 | --data_provider "{dataset:flowers102,base_batch_size:64}" \ 20 | --model "{dropout_rate:0.2,name:proxylessnas-0.35}" \ 21 | --run_config "{n_epochs:50,base_lr:0.06}" \ 22 | --init_from \ 23 | --path 24 | 25 | # mbv3-0.35 26 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 27 | --data_provider "{dataset:flowers102,base_batch_size:64}" \ 28 | --model "{dropout_rate:0.6,name:mbv3-0.35}" \ 29 | --run_config "{n_epochs:50,base_lr:0.05,weight_decay:3.0e-5}" \ 30 | --init_from \ 31 | --path 32 | 33 | # tinymbv2 34 | torchpack dist-run -np 4 python train.py configs/default.yaml \ 35 | --data_provider "{dataset:flowers102,base_batch_size:64,image_size:144}" \ 36 | --model "{dropout_rate:0,name:tinymbv2}" \ 37 | --run_config "{n_epochs:50,base_lr:0.08}" \ 38 | --init_from \ 39 | --path 40 | -------------------------------------------------------------------------------- /netaug/utils/distributed.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional, Union 2 | 3 | import torch 4 | import torch.distributed 5 | from torchpack import distributed 6 | 7 | from utils.misc import list_mean, list_sum 8 | 9 | __all__ = ["ddp_reduce_tensor", "DistributedMetric"] 10 | 11 | 12 | def ddp_reduce_tensor( 13 | tensor: torch.Tensor, reduce="mean" 14 | ) -> Union[torch.Tensor, List[torch.Tensor]]: 15 | tensor_list = [torch.empty_like(tensor) for _ in range(distributed.size())] 16 | torch.distributed.all_gather(tensor_list, tensor.contiguous(), async_op=False) 17 | if reduce == "mean": 18 | return list_mean(tensor_list) 19 | elif reduce == "sum": 20 | return list_sum(tensor_list) 21 | elif reduce == "cat": 22 | return torch.cat(tensor_list, dim=0) 23 | elif reduce == "root": 24 | return tensor_list[0] 25 | else: 26 | return tensor_list 27 | 28 | 29 | class DistributedMetric(object): 30 | """Average metrics for distributed training.""" 31 | 32 | def __init__(self, name: Optional[str] = None, backend="ddp"): 33 | self.name = name 34 | self.sum = 0 35 | self.count = 0 36 | self.backend = backend 37 | 38 | def update(self, val: Union[torch.Tensor, int, float], delta_n=1): 39 | val *= delta_n 40 | if type(val) in [int, float]: 41 | val = torch.Tensor(1).fill_(val).cuda() 42 | if self.backend == "ddp": 43 | self.count += ddp_reduce_tensor( 44 | torch.Tensor(1).fill_(delta_n).cuda(), reduce="sum" 45 | ) 46 | self.sum += ddp_reduce_tensor(val.detach(), reduce="sum") 47 | else: 48 | raise NotImplementedError 49 | 50 | @property 51 | def avg(self): 52 | if self.count == 0: 53 | return torch.Tensor(1).fill_(-1) 54 | else: 55 | return self.sum / self.count 56 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/* 2 | tmp/ 3 | cache/ 4 | # Created by .ignore support plugin (hsz.mobi) 5 | ### Python template 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # pyenv 81 | .python-version 82 | 83 | # celery beat schedule file 84 | celerybeat-schedule 85 | 86 | # SageMath parsed files 87 | *.sage.py 88 | 89 | # Environments 90 | .env 91 | .venv 92 | env/ 93 | venv/ 94 | ENV/ 95 | env.bak/ 96 | venv.bak/ 97 | 98 | # Spyder project settings 99 | .spyderproject 100 | .spyproject 101 | 102 | # Rope project settings 103 | .ropeproject 104 | 105 | # mkdocs documentation 106 | /site 107 | 108 | # mypy 109 | .mypy_cache/ 110 | 111 | *.DS_Store 112 | *.xml 113 | *.iml 114 | /bash/distributed_imagenet.sh 115 | exp.sh -------------------------------------------------------------------------------- /tinytl/dataset_setup_scripts/make_pets.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import numpy as np 4 | import tarfile 5 | 6 | 7 | dataset_path = '~/dataset/pets' 8 | dataset_path = os.path.expanduser(dataset_path) 9 | 10 | if not os.path.exists(dataset_path): 11 | os.makedirs(dataset_path, exist_ok=True) 12 | 13 | 14 | def download_file(url, dest=None): 15 | if not dest: 16 | dest = os.path.join(dataset_path, url.split('/')[-1]) 17 | if os.path.exists(dest): 18 | print('%s exists' % dest) 19 | return 20 | run_cmd = ('wget %s -O %s' % (url, dest)) 21 | print(run_cmd) 22 | os.system(run_cmd) 23 | 24 | download_file('https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz') 25 | tarfile.open(os.path.join(dataset_path, 'images.tar.gz')).extractall(path=dataset_path) 26 | 27 | download_file('https://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz') 28 | tarfile.open(os.path.join(dataset_path, 'annotations.tar.gz')).extractall(path=dataset_path) 29 | 30 | # build train+val 31 | train_val_list = os.path.join(dataset_path, 'annotations/trainval.txt') 32 | with open(train_val_list, 'r') as fin: 33 | for line in fin.readlines(): 34 | line = line[:-1].split(' ') 35 | file_name, class_id = line[0], int(line[1]) 36 | file_name += '.jpg' 37 | src_path = os.path.join(dataset_path, 'images/%s' % file_name) 38 | 39 | target_folder = os.path.join(dataset_path, 'train/%d' % class_id) 40 | os.makedirs(target_folder, exist_ok=True) 41 | target_path = os.path.join(target_folder, file_name) 42 | shutil.move(src_path, target_path) 43 | print('Moving %s to %s' % (src_path, target_path)) 44 | 45 | 46 | # build test 47 | test_list = os.path.join(dataset_path, 'annotations/test.txt') 48 | with open(test_list, 'r') as fin: 49 | for line in fin.readlines(): 50 | line = line[:-1].split(' ') 51 | file_name, class_id = line[0], int(line[1]) 52 | file_name += '.jpg' 53 | src_path = os.path.join(dataset_path, 'images/%s' % file_name) 54 | 55 | target_folder = os.path.join(dataset_path, 'val/%d' % class_id) 56 | os.makedirs(target_folder, exist_ok=True) 57 | target_path = os.path.join(target_folder, file_name) 58 | shutil.move(src_path, target_path) 59 | print('Moving %s to %s' % (src_path, target_path)) 60 | 61 | os.remove(os.path.join(dataset_path, 'images.tar.gz')) 62 | os.remove(os.path.join(dataset_path, 'annotations.tar.gz')) 63 | 64 | shutil.rmtree(os.path.join(dataset_path, 'images')) 65 | shutil.rmtree(os.path.join(dataset_path, 'annotations')) 66 | 67 | -------------------------------------------------------------------------------- /tinytl/README.md: -------------------------------------------------------------------------------- 1 | # TinyTL: Reduce Activations, Not Trainable Parameters for Efficient On-Device Learning [[website]](https://hanlab.mit.edu/projects/tinyml/tinyTL/) 2 | 3 | ```BibTex 4 | @inproceedings{ 5 | cai2020tinytl, 6 | title={TinyTL: Reduce Memory, Not Parameters for Efficient On-Device Learning}, 7 | author={Cai, Han and Gan, Chuang and Zhu, Ligeng and Han, Song}, 8 | booktitle={Advances in Neural Information Processing Systems}, 9 | volume={33}, 10 | year={2020} 11 | } 12 | ``` 13 | 14 | ## On-Device Learning, not Just Inference 15 |

16 | 17 |

18 | 19 | ## Activation is the Main Bottleneck, not Parameters 20 |

21 | 22 |

23 | 24 | ## Tiny Transfer Learning 25 |

26 | 27 |

28 | 29 | ## Transfer Learning Results 30 |

31 | 32 |

33 | 34 | ## Combining with Batch Size 1 Training 35 |

36 | 37 |

38 | 39 | ## Data Preparation 40 | To set up the datasets, please run `bash make_all_datasets.sh` under the folder **dataset_setup_scripts**. 41 | 42 | ## Requirement 43 | * Python 3.6+ 44 | * Pytorch 1.4.0+ 45 | 46 | ## How to Run Transfer Learning Experiments 47 | To run transfer learning experiments, please first set up the datasets and then run **tinytl_fgvc_train.py**. 48 | Scripts are available under the folder **exp_scripts**. 49 | 50 | ## TODO 51 | 52 | - [ ] Add system support for TinyTL 53 | 54 | 55 | ## Related Projects 56 | 57 | [MCUNet: Tiny Deep Learning on IoT Devices](https://arxiv.org/abs/2007.10319) (NeurIPS'20, spotlight) 58 | 59 | [Once for All: Train One Network and Specialize it for Efficient Deployment](https://arxiv.org/abs/1908.09791) (ICLR'20) 60 | 61 | [ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware](https://arxiv.org/pdf/1812.00332.pdf) (ICLR'19) 62 | 63 | [AutoML for Architecting Efficient and Specialized Neural Networks](https://ieeexplore.ieee.org/abstract/document/8897011) (IEEE Micro) 64 | 65 | [AMC: AutoML for Model Compression and Acceleration on Mobile Devices](https://arxiv.org/pdf/1802.03494.pdf) (ECCV'18) 66 | 67 | [HAQ: Hardware-Aware Automated Quantization](https://arxiv.org/pdf/1811.08886.pdf) (CVPR'19, oral) 68 | -------------------------------------------------------------------------------- /netaug/eval.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import torch.backends.cudnn 5 | import torch.nn as nn 6 | from torchpack import distributed as dist 7 | 8 | from setup import build_data_loader, build_model 9 | from train import eval 10 | from utils import load_state_dict_from_file 11 | 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument( 14 | "--gpu", type=str, default=None 15 | ) # used in single machine experiments 16 | parser.add_argument("--batch_size", type=int, default=100) 17 | parser.add_argument("--n_worker", type=int, default=8) 18 | parser.add_argument( 19 | "--dataset", 20 | type=str, 21 | default="imagenet", 22 | choices=[ 23 | "imagenet", 24 | "imagenet21k_winter_p", 25 | "car", 26 | "flowers102", 27 | "food101", 28 | "cub200", 29 | "pets", 30 | ], 31 | ) 32 | parser.add_argument("--data_path", type=str, default=None) 33 | parser.add_argument("--image_size", type=int, default=160) 34 | 35 | parser.add_argument( 36 | "--model", 37 | type=str, 38 | default="mbv2-0.35", 39 | choices=[ 40 | "mbv2-0.35", 41 | "mbv3-0.35", 42 | "proxylessnas-0.35", 43 | "mcunet", 44 | "tinymbv2", 45 | ], 46 | ) 47 | 48 | parser.add_argument("--init_from", type=str) 49 | parser.add_argument("--reset_bn", action="store_true") 50 | parser.add_argument("--save_path", type=str, default=None) 51 | 52 | if __name__ == "__main__": 53 | args = parser.parse_args() 54 | # setup gpu and distributed training 55 | if args.gpu is not None: 56 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu 57 | dist.init() 58 | torch.backends.cudnn.benchmark = True 59 | torch.cuda.set_device(dist.local_rank()) 60 | 61 | # build data loader 62 | data_loader_dict, n_classes = build_data_loader( 63 | args.dataset, 64 | args.image_size, 65 | args.batch_size, 66 | args.n_worker, 67 | args.data_path, 68 | dist.size(), 69 | dist.rank(), 70 | ) 71 | 72 | # build model 73 | model = build_model(args.model, n_classes, 0).cuda() 74 | 75 | # load checkpoint 76 | checkpoint = load_state_dict_from_file(args.init_from) 77 | model.load_state_dict(checkpoint) 78 | 79 | model = nn.parallel.DistributedDataParallel(model, device_ids=[dist.local_rank()]) 80 | val_results = eval(model, data_loader_dict, args.reset_bn) 81 | 82 | for key, val in val_results.items(): 83 | print(key, ": ", val) 84 | 85 | if args.save_path is not None: 86 | torch.save( 87 | model.module.state_dict(), 88 | args.save_path, 89 | _use_new_zipfile_serialization=False, 90 | ) 91 | -------------------------------------------------------------------------------- /tinytl/tinytl/utils/common_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from tqdm import tqdm 5 | from sklearn.cluster import KMeans 6 | 7 | __all__ = [ 8 | 'module_require_grad', 'set_module_grad_status', 'enable_bn_update', 'enable_bias_update', 9 | 'weight_quantization', 10 | ] 11 | 12 | 13 | def module_require_grad(module): 14 | return module.parameters().__next__().requires_grad 15 | 16 | 17 | def set_module_grad_status(module, flag=False): 18 | if isinstance(module, list): 19 | for m in module: 20 | set_module_grad_status(m, flag) 21 | else: 22 | for p in module.parameters(): 23 | p.requires_grad = flag 24 | 25 | 26 | def enable_bn_update(model): 27 | for m in model.modules(): 28 | if type(m) in [nn.BatchNorm2d, nn.GroupNorm] and m.weight is not None: 29 | set_module_grad_status(m, True) 30 | 31 | 32 | def enable_bias_update(model): 33 | for m in model.modules(): 34 | for name, param in m.named_parameters(): 35 | if name == 'bias': 36 | param.requires_grad = True 37 | 38 | 39 | def k_means_cpu(weight, n_clusters, init='k-means++', max_iter=50): 40 | # flatten the weight for computing k-means 41 | org_shape = weight.shape 42 | weight = weight.reshape(-1, 1) # single feature 43 | if n_clusters > weight.size: 44 | n_clusters = weight.size 45 | 46 | k_means = KMeans(n_clusters=n_clusters, init=init, n_init=1, max_iter=max_iter, n_jobs=20) 47 | k_means.fit(weight) 48 | 49 | centroids = k_means.cluster_centers_ 50 | labels = k_means.labels_ 51 | labels = labels.reshape(org_shape) 52 | return torch.from_numpy(centroids).view(1, -1), torch.from_numpy(labels).int() 53 | 54 | 55 | def reconstruct_weight_from_k_means_result(centroids, labels): 56 | weight = torch.zeros_like(labels).float() 57 | for i, c in enumerate(centroids.cpu().numpy().squeeze()): 58 | weight[labels == i] = c.item() 59 | return weight 60 | 61 | 62 | def quantization(layer, bits=8, max_iter=50): 63 | w = layer.weight.data 64 | centroids, labels = k_means_cpu(w.cpu().numpy(), 2 ** bits, max_iter=max_iter) 65 | w_q = reconstruct_weight_from_k_means_result(centroids, labels) 66 | layer.weight.data = w_q.float() 67 | 68 | 69 | def weight_quantization(model, bits=8, max_iter=50): 70 | if bits is None: 71 | return 72 | to_quantize_modules = [] 73 | for m in model.modules(): 74 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): 75 | if not m.weight.requires_grad: 76 | to_quantize_modules.append(m) 77 | 78 | with tqdm(total=len(to_quantize_modules), 79 | desc='%d-bits quantization start' % bits) as t: 80 | for m in to_quantize_modules: 81 | quantization(m, bits, max_iter) 82 | t.update() 83 | -------------------------------------------------------------------------------- /tinytl/dataset_setup_scripts/make_food.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import numpy as np 4 | import tarfile 5 | 6 | dataset_path = '~/dataset/food101' 7 | dataset_path = os.path.expanduser(dataset_path) 8 | 9 | if not os.path.exists(dataset_path): 10 | os.makedirs(dataset_path, exist_ok=True) 11 | 12 | 13 | def download_file(url, dest=None): 14 | if not dest: 15 | dest = os.path.join(dataset_path, url.split('/')[-1]) 16 | if os.path.exists(dest): 17 | print('%s exists' % dest) 18 | return 19 | run_cmd = ('wget %s -O %s' % (url, dest)) 20 | print(run_cmd) 21 | os.system(run_cmd) 22 | 23 | 24 | def test_data(): 25 | test_path = os.path.join(dataset_path, 'val') 26 | os.makedirs(test_path, exist_ok=True) 27 | img2id = np.genfromtxt(os.path.join(dataset_path, 'val.txt'), dtype=str) 28 | print(img2id.shape) 29 | 30 | for id_ in range(img2id.shape[0]): 31 | original_path = os.path.join(dataset_path, img2id[id_, 0][:-1]) 32 | label = int(img2id[id_, 1]) # Label starts with 0 33 | 34 | target_path = '%s/val/%d/image_%05d.jpg' % (dataset_path, label, id_) 35 | 36 | sub_path = os.path.join(test_path, str(label)) 37 | os.makedirs(sub_path, exist_ok=True) 38 | 39 | shutil.move(original_path, target_path) 40 | 41 | 42 | def train_data(): 43 | train_path = os.path.join(dataset_path, 'train') 44 | os.makedirs(train_path, exist_ok=True) 45 | img2id = np.genfromtxt(os.path.join(dataset_path, 'train.txt'), dtype=str) 46 | print(img2id.shape) 47 | 48 | for id_ in range(img2id.shape[0]): 49 | original_path = os.path.join(dataset_path, img2id[id_, 0][:-1]) 50 | label = int(img2id[id_, 1]) # Label starts with 0 51 | target_path = '%s/train/%d/image_%05d.jpg' % (dataset_path, label, id_) 52 | 53 | sub_path = os.path.join(train_path, str(label)) 54 | os.makedirs(sub_path, exist_ok=True) 55 | 56 | shutil.move(original_path, target_path) 57 | 58 | 59 | def main(): 60 | download_file('http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz') 61 | tarfile.open(os.path.join(dataset_path, 'food-101.tar.gz')).extractall(path=dataset_path) 62 | os.system('mv %s %s' % (os.path.join(dataset_path, 'food-101/images'), dataset_path)) 63 | shutil.rmtree(os.path.join(dataset_path, 'food-101')) 64 | os.remove(os.path.join(dataset_path, 'food-101.tar.gz')) 65 | 66 | download_file('https://hanlab.mit.edu/tools/image_dataset_formats/food_101/train.txt') 67 | download_file('https://hanlab.mit.edu/tools/image_dataset_formats/food_101/val.txt') 68 | 69 | test_data() 70 | train_data() 71 | 72 | shutil.rmtree(os.path.join(dataset_path, 'images')) 73 | os.remove(os.path.join(dataset_path, 'train.txt')) 74 | os.remove(os.path.join(dataset_path, 'val.txt')) 75 | 76 | 77 | if __name__ == '__main__': 78 | main() 79 | -------------------------------------------------------------------------------- /tinytl/dataset_setup_scripts/make_aircraft.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import numpy as np 4 | import tarfile 5 | 6 | 7 | dataset_path = '~/dataset/aircraft' 8 | dataset_path = os.path.expanduser(dataset_path) 9 | 10 | if not os.path.exists(dataset_path): 11 | os.makedirs(dataset_path, exist_ok=True) 12 | 13 | 14 | def download_file(url, dest=None): 15 | if not dest: 16 | dest = os.path.join(dataset_path, url.split('/')[-1]) 17 | if os.path.exists(dest): 18 | print('%s exists' % dest) 19 | return 20 | run_cmd = ('wget %s -O %s' % (url, dest)) 21 | print(run_cmd) 22 | os.system(run_cmd) 23 | 24 | 25 | def test_data(): 26 | test_path = os.path.join(dataset_path, 'val') 27 | os.makedirs(test_path, exist_ok=True) 28 | img2id = np.genfromtxt(os.path.join(dataset_path, 'val.txt'), dtype=str) 29 | print(img2id.shape) 30 | 31 | for id_ in range(img2id.shape[0]): 32 | original_path = os.path.join(dataset_path, img2id[id_, 0][:-1]) 33 | label = int(img2id[id_, 1]) # Label starts with 0 34 | 35 | target_path = '%s/val/%d/image_%05d.jpg' % (dataset_path, label, id_) 36 | 37 | sub_path = os.path.join(test_path, str(label)) 38 | os.makedirs(sub_path, exist_ok=True) 39 | 40 | shutil.move(original_path, target_path) 41 | 42 | 43 | def train_data(): 44 | train_path = os.path.join(dataset_path, 'train') 45 | os.makedirs(train_path, exist_ok=True) 46 | img2id = np.genfromtxt(os.path.join(dataset_path, 'train.txt'), dtype=str) 47 | print(img2id.shape) 48 | 49 | for id_ in range(img2id.shape[0]): 50 | original_path = os.path.join(dataset_path, img2id[id_, 0][:-1]) 51 | label = int(img2id[id_, 1]) 52 | target_path = '%s/train/%d/image_%05d.jpg' % (dataset_path, label, id_) 53 | 54 | sub_path = os.path.join(train_path, str(label)) 55 | os.makedirs(sub_path, exist_ok=True) 56 | 57 | shutil.move(original_path, target_path) 58 | 59 | 60 | def main(): 61 | download_file('http://www.robots.ox.ac.uk/~vgg/data/fgvc-aircraft/archives/fgvc-aircraft-2013b.tar.gz') 62 | tarfile.open(os.path.join(dataset_path, 'fgvc-aircraft-2013b.tar.gz')).extractall(path=dataset_path) 63 | shutil.move(os.path.join(dataset_path, 'fgvc-aircraft-2013b/data/images'), dataset_path) 64 | shutil.rmtree(os.path.join(dataset_path, 'fgvc-aircraft-2013b')) 65 | os.remove(os.path.join(dataset_path, 'fgvc-aircraft-2013b.tar.gz')) 66 | 67 | download_file('https://hanlab.mit.edu/tools/image_dataset_formats/aircraft/train.txt') 68 | download_file('https://hanlab.mit.edu/tools/image_dataset_formats/aircraft/val.txt') 69 | 70 | test_data() 71 | train_data() 72 | 73 | shutil.rmtree(os.path.join(dataset_path, 'images')) 74 | os.remove(os.path.join(dataset_path, 'train.txt')) 75 | os.remove(os.path.join(dataset_path, 'val.txt')) 76 | 77 | 78 | if __name__ == '__main__': 79 | main() 80 | -------------------------------------------------------------------------------- /netaug/utils/init.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import Dict, List, Union 3 | 4 | import torch 5 | import torch.nn as nn 6 | from torch.nn.modules.batchnorm import _BatchNorm 7 | 8 | __all__ = ["init_modules", "load_state_dict"] 9 | 10 | 11 | def init_modules( 12 | module: Union[nn.Module, List[nn.Module]], init_type="he_fout" 13 | ) -> None: 14 | init_params = init_type.split("@") 15 | if len(init_params) > 1: 16 | init_params = float(init_params[1]) 17 | else: 18 | init_params = None 19 | 20 | if isinstance(module, list): 21 | for sub_module in module: 22 | init_modules(sub_module) 23 | else: 24 | for m in module.modules(): 25 | if isinstance(m, nn.Conv2d): 26 | if init_type == "he_fout": 27 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 28 | m.weight.data.normal_(0, math.sqrt(2.0 / n)) 29 | elif init_type.startswith("kaiming_uniform"): 30 | nn.init.kaiming_uniform_(m.weight, a=math.sqrt(init_params or 5)) 31 | else: 32 | nn.init.kaiming_uniform_(m.weight, a=math.sqrt(init_params or 5)) 33 | if m.bias is not None: 34 | m.bias.data.zero_() 35 | elif isinstance(m, _BatchNorm): 36 | m.weight.data.fill_(1) 37 | m.bias.data.zero_() 38 | elif isinstance(m, nn.Linear): 39 | nn.init.trunc_normal_(m.weight, std=0.02) 40 | if m.bias is not None: 41 | m.bias.data.zero_() 42 | else: 43 | weight = getattr(m, "weight", None) 44 | bias = getattr(m, "bias", None) 45 | if isinstance(weight, torch.nn.Parameter): 46 | nn.init.kaiming_uniform_(m.weight, a=math.sqrt(init_params or 5)) 47 | if isinstance(bias, torch.nn.Parameter): 48 | bias.data.zero_() 49 | 50 | 51 | def load_state_dict( 52 | model: nn.Module, state_dict: Dict[str, torch.Tensor], strict=True 53 | ) -> None: 54 | current_state_dict = model.state_dict() 55 | for key in state_dict: 56 | if current_state_dict[key].shape != state_dict[key].shape: 57 | if strict: 58 | raise ValueError( 59 | "%s shape mismatch (src=%s, target=%s)" 60 | % ( 61 | key, 62 | list(state_dict[key].shape), 63 | list(current_state_dict[key].shape), 64 | ) 65 | ) 66 | else: 67 | print( 68 | "Skip loading %s due to shape mismatch (src=%s, target=%s)" 69 | % ( 70 | key, 71 | list(state_dict[key].shape), 72 | list(current_state_dict[key].shape), 73 | ) 74 | ) 75 | else: 76 | current_state_dict[key].copy_(state_dict[key]) 77 | model.load_state_dict(current_state_dict) 78 | -------------------------------------------------------------------------------- /tinytl/dataset_setup_scripts/make_flowers102.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import tarfile 4 | from scipy.io import loadmat 5 | import shutil 6 | 7 | 8 | dataset_path = '~/dataset/flowers102' 9 | dataset_path = os.path.expanduser(dataset_path) 10 | 11 | 12 | def download_file(url, dest=None): 13 | if not dest: 14 | dest = os.path.join(dataset_path, url.split('/')[-1]) 15 | run_cmd = ('wget %s -O %s' % (url, dest)) 16 | print(run_cmd) 17 | os.system(run_cmd) 18 | # urllib.urlretrieve(url, dest) 19 | 20 | 21 | # Download the Oxford102 flowersset into the current directory 22 | if not os.path.exists(dataset_path): 23 | os.makedirs(dataset_path, exist_ok=True) 24 | 25 | if not os.path.exists(os.path.join(dataset_path, '102flowers.tgz')): 26 | print('Downloading images...') 27 | download_file('http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz') 28 | tarfile.open(os.path.join(dataset_path, '102flowers.tgz')).extractall(path=dataset_path) 29 | 30 | print('Downloading image labels...') 31 | download_file('http://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat') 32 | 33 | print('Downloading train/test/valid splits...') 34 | download_file('http://www.robots.ox.ac.uk/~vgg/data/flowers/102/setid.mat') 35 | 36 | # Read .mat file containing training, testing, and validation sets. 37 | setid = loadmat(os.path.join(dataset_path, 'setid.mat')) 38 | 39 | # The .mat file is 1-indexed, so we subtract one to match Caffe's convention. 40 | idx_train = setid['trnid'][0] 41 | idx_test = setid['tstid'][0] 42 | idx_valid = setid['valid'][0] 43 | 44 | # Read .mat file containing image labels. 45 | image_labels = loadmat(os.path.join(dataset_path, 'imagelabels.mat'))['labels'][0] 46 | 47 | train_path = os.path.join(dataset_path, 'train') 48 | os.makedirs(train_path, exist_ok=True) 49 | for i in range(1, 103): 50 | sub_path = os.path.join(train_path, '%d' % i) 51 | os.makedirs(sub_path, exist_ok=True) 52 | 53 | for idx in idx_train: 54 | category = image_labels[idx - 1] 55 | original_path = '%s/jpg/image_%05d.jpg' % (dataset_path, idx) 56 | target_path = '%s/train/%d/image_%05d.jpg' % (dataset_path, category, idx) 57 | shutil.move(original_path, target_path) 58 | 59 | for idx in idx_valid: 60 | category = image_labels[idx - 1] 61 | original_path = '%s/jpg/image_%05d.jpg' % (dataset_path, idx) 62 | target_path = '%s/train/%d/image_%05d.jpg' % (dataset_path, category, idx) 63 | shutil.move(original_path, target_path) 64 | 65 | path = os.path.join(dataset_path, 'val') 66 | os.makedirs(path, exist_ok=True) 67 | for i in range(1, 103): 68 | sub_path = os.path.join(path, '%d' % i) 69 | os.makedirs(sub_path, exist_ok=True) 70 | 71 | for idx in idx_test: 72 | category = image_labels[idx - 1] 73 | original_path = '%s/jpg/image_%05d.jpg' % (dataset_path, idx) 74 | target_path = '%s/val/%d/image_%05d.jpg' % (dataset_path, category, idx) 75 | shutil.move(original_path, target_path) 76 | 77 | 78 | # rm other files 79 | for to_remove in ['102flowers.tgz', 'imagelabels.mat', 'jpg', 'setid.mat']: 80 | file_path = os.path.join(dataset_path, to_remove) 81 | if os.path.isfile(file_path): 82 | os.remove(file_path) 83 | else: 84 | shutil.rmtree(file_path) 85 | -------------------------------------------------------------------------------- /tinytl/dataset_setup_scripts/make_stanford_cars.py: -------------------------------------------------------------------------------- 1 | import os 2 | import scipy.io as io 3 | import shutil 4 | import tarfile 5 | 6 | 7 | dataset_path = '~/dataset/stanford_car' 8 | dataset_path = os.path.expanduser(dataset_path) 9 | 10 | if not os.path.exists(dataset_path): 11 | os.makedirs(dataset_path, exist_ok=True) 12 | 13 | 14 | def download_file(url, dest=None): 15 | if not dest: 16 | dest = os.path.join(dataset_path, url.split('/')[-1]) 17 | if os.path.exists(dest): 18 | print('%s exists' % dest) 19 | return 20 | run_cmd = ('wget %s -O %s' % (url, dest)) 21 | print(run_cmd) 22 | os.system(run_cmd) 23 | 24 | 25 | def get_data(): 26 | download_file('http://ai.stanford.edu/~jkrause/car196/cars_train.tgz') 27 | download_file('http://ai.stanford.edu/~jkrause/car196/cars_test.tgz') 28 | download_file('https://ai.stanford.edu/~jkrause/cars/car_devkit.tgz') 29 | download_file('http://ai.stanford.edu/~jkrause/car196/cars_test_annos_withlabels.mat') 30 | tarfile.open(os.path.join(dataset_path, 'cars_train.tgz')).extractall(path=dataset_path) 31 | tarfile.open(os.path.join(dataset_path, 'cars_test.tgz')).extractall(path=dataset_path) 32 | tarfile.open(os.path.join(dataset_path, 'car_devkit.tgz')).extractall(path=dataset_path) 33 | 34 | 35 | def test_data(): 36 | test_path = os.path.join(dataset_path, 'val') 37 | os.makedirs(test_path, exist_ok=True) 38 | a = io.loadmat(os.path.join(dataset_path, 'cars_test_annos_withlabels.mat')) 39 | b = a['annotations'][0] 40 | with open(os.path.join(dataset_path, 'cars_test.txt'), 'w'): 41 | for t in b: 42 | outstr = os.path.join(dataset_path, 'cars_test/%s' % t[5][0]) 43 | class_id = t[4][0][0] 44 | class_path = os.path.join(test_path, '%d' % class_id) 45 | if not os.path.exists(class_path): 46 | os.makedirs(class_path, exist_ok=True) 47 | target_path = os.path.join(class_path, t[5][0]) 48 | shutil.move(outstr, target_path) 49 | print(outstr, class_id, 'to', target_path) 50 | 51 | 52 | def train_data(): 53 | train_path = os.path.join(dataset_path, 'train') 54 | os.makedirs(train_path, exist_ok=True) 55 | a = io.loadmat(os.path.join(dataset_path, 'devkit/cars_train_annos.mat')) 56 | b = a['annotations'][0] 57 | with open(os.path.join(dataset_path, 'cars_train.txt'), 'w'): 58 | for t in b: 59 | outstr = os.path.join(dataset_path, 'cars_train/%s' % t[5][0]) 60 | class_id = t[4][0][0] 61 | class_path = os.path.join(train_path, '%d' % class_id) 62 | if not os.path.exists(class_path): 63 | os.makedirs(class_path, exist_ok=True) 64 | target_path = os.path.join(class_path, t[5][0]) 65 | shutil.move(outstr, target_path) 66 | print(outstr, class_id, 'to', target_path) 67 | 68 | 69 | def clear(): 70 | # rm other files 71 | for file_path in [ 72 | 'cars_test', 'cars_train', 'devkit', 'car_devkit.tgz', 'cars_test.tgz', 'cars_test.txt', 73 | 'cars_test_annos_withlabels.mat', 'cars_train.tgz', 'cars_train.txt', 74 | ]: 75 | file_path = os.path.join(dataset_path, file_path) 76 | if os.path.isfile(file_path): 77 | os.remove(file_path) 78 | else: 79 | shutil.rmtree(file_path) 80 | 81 | 82 | def main(): 83 | get_data() 84 | test_data() 85 | train_data() 86 | clear() 87 | 88 | 89 | if __name__ == '__main__': 90 | main() 91 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tiny Machine Learning [[website]](https://tinyml.mit.edu) 2 | 3 | **[News]** We refactored MCUNet into a standalone repo: https://github.com/mit-han-lab/mcunet. Please follow the new repo for updates on TinyEngine release! 4 | 5 | **[News]** We actively collaborate with industrial partners for real-world TinyML applications. Our technolgy has successfully influenced many products and deployed on over 100K IoT devices. Feel free to contact Prof. Song Han for more info. 6 | 7 | **[News]** Our projects are covered by: 8 | [MIT News](https://news.mit.edu/2020/iot-deep-learning-1113), 9 | [WIRED](https://www.wired.com/story/ai-algorithms-slimming-fit-fridge/), 10 | [Morning Brew](https://www.morningbrew.com/emerging-tech/stories/2020/12/07/researchers-figured-fit-ai-ever-onto-internet-things-microchips), 11 | [Stacey on IoT](https://staceyoniot.com/researchers-take-a-3-pronged-approach-to-edge-ai/), 12 | [Analytics Insight](https://www.analyticsinsight.net/amalgamating-ml-and-iot-in-smart-home-devices/), 13 | [Techable](https://techable.jp/archives/142462). 14 | 15 | ## TinyML Projects 16 | | Projects | Keywords | 17 | |----------------------|:----------:| 18 | | [MCUNet](https://github.com/mit-han-lab/mcunet) | Memory-efficient inference, System-algorithm co-design | 19 | | [TinyTL](https://github.com/mit-han-lab/tinyml/tree/master/tinytl) | On-device learning, Memory-efficient transfer learning | 20 | | [NetAug](https://github.com/mit-han-lab/tinyml/tree/master/netaug) | Training technique for tiny neural networks | 21 | 22 | 23 | ## About TinyML 24 | Intelligent edge devices with rich sensors (e.g., billions of mobile phones and IoT devices) have been ubiquitous 25 | in our daily lives. Combining artificial intelligence (AI) and these edge devices, 26 | there are vast real-world applications such as smart home, smart retail, autonomous driving, 27 | and so on. However, the state-of-the-art deep learning AI systems typically require tremendous 28 | resources (e.g., large labeled dataset, many computational resources, many AI experts), 29 | both for training and inference. This hinders the application of these powerful deep learning 30 | AI systems on edge devices. The [TinyML project](https://tinyml.mit.edu) aims to improve the efficiency of deep learning 31 | AI systems by requiring less computation, fewer engineers, and less data, 32 | to facilitate the giant market of edge AI and AIoT. 33 | 34 |

35 | 36 |

37 |

38 | 39 |

40 | 41 | ## Demo 42 | [![Watch the video](https://hanlab.mit.edu/projects/tinyml/figures/mcunet_demo.png)](https://youtu.be/YvioBgtec4U) 43 | 44 | ## Related Projects 45 | 46 | [MCUNet: Tiny Deep Learning on IoT Devices](https://arxiv.org/abs/2007.10319) (NeurIPS'20, spotlight) 47 | 48 | [TinyTL: Reduce Memory, Not Parameters for Efficient On-Device Learning](https://arxiv.org/abs/2007.11622) (NeurIPS'20) 49 | 50 | [Once for All: Train One Network and Specialize it for Efficient Deployment](https://arxiv.org/abs/1908.09791) (ICLR'20) 51 | 52 | [ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware](https://arxiv.org/pdf/1812.00332.pdf) (ICLR'19) 53 | 54 | [AutoML for Architecting Efficient and Specialized Neural Networks](https://ieeexplore.ieee.org/abstract/document/8897011) (IEEE Micro) 55 | 56 | [AMC: AutoML for Model Compression and Acceleration on Mobile Devices](https://arxiv.org/pdf/1802.03494.pdf) (ECCV'18) 57 | 58 | [HAQ: Hardware-Aware Automated Quantization](https://arxiv.org/pdf/1811.08886.pdf) (CVPR'19, oral) 59 | -------------------------------------------------------------------------------- /netaug/models/base/mcunet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from models.base.layers import ( 5 | ConvLayer, 6 | DsConvLayer, 7 | InvertedBlock, 8 | LinearLayer, 9 | OpSequential, 10 | ResidualBlock, 11 | ) 12 | from utils import make_divisible 13 | 14 | __all__ = ["MCUNet"] 15 | 16 | 17 | class MCUNet(nn.Module): 18 | def __init__(self, channel_divisor=8, n_classes=1000, dropout_rate=0): 19 | super(MCUNet, self).__init__() 20 | stage_width_list = [16, 8, 16, 24, 40, 48, 96] 21 | head_width_list = [160] 22 | act_func = "relu6" 23 | 24 | block_configs = [ 25 | [[3, 5, 5, 4], [7, 3, 7, 5], 4, 2], 26 | [[5, 5, 5], [5, 5, 5], 3, 2], 27 | [[5, 6, 4], [3, 7, 5], 3, 2], 28 | [[5, 5, 5], [5, 7, 3], 3, 1], 29 | [[6, 5, 4], [3, 7, 3], 3, 2], 30 | ] 31 | 32 | input_stem = OpSequential( 33 | [ 34 | ConvLayer(3, stage_width_list[0], 3, 2, act_func=act_func), 35 | ResidualBlock( 36 | DsConvLayer( 37 | stage_width_list[0], 38 | stage_width_list[1], 39 | 3, 40 | 1, 41 | (act_func, None), 42 | ), 43 | shortcut=None, 44 | ), 45 | ] 46 | ) 47 | 48 | # stages 49 | stages = [] 50 | in_channels = stage_width_list[1] 51 | for (e_list, ks_list, n, s), c in zip(block_configs, stage_width_list[2:]): 52 | blocks = [] 53 | for i in range(n): 54 | stride = s if i == 0 else 1 55 | mid_channels = make_divisible( 56 | round(e_list[i] * in_channels), channel_divisor 57 | ) 58 | mb_conv = ResidualBlock( 59 | InvertedBlock( 60 | in_channels, 61 | c, 62 | ks_list[i], 63 | stride, 64 | mid_channels=mid_channels, 65 | act_func=(act_func, act_func, None), 66 | ), 67 | shortcut=nn.Identity() 68 | if (stride == 1 and in_channels == c and i != 0) 69 | else None, 70 | ) 71 | blocks.append(mb_conv) 72 | in_channels = c 73 | stages.append(OpSequential(blocks)) 74 | 75 | # head 76 | head = OpSequential( 77 | [ 78 | ResidualBlock( 79 | InvertedBlock( 80 | in_channels, 81 | head_width_list[0], 82 | 7, 83 | mid_channels=480, 84 | act_func=(act_func, act_func, None), 85 | ), 86 | shortcut=None, 87 | ), 88 | nn.AdaptiveAvgPool2d(1), 89 | LinearLayer(head_width_list[0], n_classes, dropout_rate=dropout_rate), 90 | ] 91 | ) 92 | 93 | self.backbone = nn.ModuleDict( 94 | { 95 | "input_stem": input_stem, 96 | "stages": nn.ModuleList(stages), 97 | } 98 | ) 99 | self.head = head 100 | 101 | def forward(self, x: torch.Tensor) -> torch.Tensor: 102 | x = self.backbone["input_stem"](x) 103 | for stage in self.backbone["stages"]: 104 | x = stage(x) 105 | x = self.head(x) 106 | return x 107 | -------------------------------------------------------------------------------- /tinytl/tinytl/data_providers/fgvc_data_providers.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torchvision 3 | from ofa.imagenet_classification.data_providers import ImagenetDataProvider 4 | 5 | __all__ = [ 6 | 'FGVCDataProvider', 7 | 'AircraftDataProvider', 'CarDataProvider', 'Flowers102DataProvider', 'CUB200DataProvider', 'PetsDataProvider', 8 | 'Food101DataProvider', 'CIFAR10DataProvider', 'CIFAR100DataProvider', 9 | ] 10 | 11 | 12 | class FGVCDataProvider(ImagenetDataProvider): 13 | 14 | @staticmethod 15 | def name(): 16 | raise not NotImplementedError 17 | 18 | @property 19 | def n_classes(self): 20 | raise not NotImplementedError 21 | 22 | @property 23 | def save_path(self): 24 | raise not NotImplementedError 25 | 26 | 27 | class AircraftDataProvider(FGVCDataProvider): 28 | 29 | @staticmethod 30 | def name(): 31 | return 'aircraft' 32 | 33 | @property 34 | def n_classes(self): 35 | return 100 36 | 37 | @property 38 | def save_path(self): 39 | return os.path.expanduser('~/dataset/aircraft') 40 | 41 | 42 | class CarDataProvider(FGVCDataProvider): 43 | 44 | @staticmethod 45 | def name(): 46 | return 'car' 47 | 48 | @property 49 | def n_classes(self): 50 | return 196 51 | 52 | @property 53 | def save_path(self): 54 | return os.path.expanduser('~/dataset/stanford_car') 55 | 56 | 57 | class Flowers102DataProvider(FGVCDataProvider): 58 | 59 | @staticmethod 60 | def name(): 61 | return 'flowers102' 62 | 63 | @property 64 | def n_classes(self): 65 | return 102 66 | 67 | @property 68 | def save_path(self): 69 | return os.path.expanduser('~/dataset/flowers102') 70 | 71 | 72 | class Food101DataProvider(FGVCDataProvider): 73 | 74 | @staticmethod 75 | def name(): 76 | return 'food101' 77 | 78 | @property 79 | def n_classes(self): 80 | return 101 81 | 82 | @property 83 | def save_path(self): 84 | return os.path.expanduser('~/dataset/food101') 85 | 86 | 87 | class CUB200DataProvider(FGVCDataProvider): 88 | 89 | @staticmethod 90 | def name(): 91 | return 'cub200' 92 | 93 | @property 94 | def n_classes(self): 95 | return 200 96 | 97 | @property 98 | def save_path(self): 99 | return os.path.expanduser('~/dataset/cub200') 100 | 101 | 102 | class PetsDataProvider(FGVCDataProvider): 103 | 104 | @staticmethod 105 | def name(): 106 | return 'pets' 107 | 108 | @property 109 | def n_classes(self): 110 | return 37 111 | 112 | @property 113 | def save_path(self): 114 | return os.path.expanduser('~/dataset/pets') 115 | 116 | 117 | class CIFAR10DataProvider(FGVCDataProvider): 118 | 119 | @staticmethod 120 | def name(): 121 | return 'cifar10' 122 | 123 | @property 124 | def n_classes(self): 125 | return 10 126 | 127 | @property 128 | def save_path(self): 129 | return os.path.expanduser('~/dataset/cifar10') 130 | 131 | def train_dataset(self, _transforms): 132 | dataset = torchvision.datasets.CIFAR10(self.save_path, train=True, transform=_transforms, download=True) 133 | return dataset 134 | 135 | def test_dataset(self, _transforms): 136 | dataset = torchvision.datasets.CIFAR10(self.save_path, train=False, transform=_transforms, download=True) 137 | return dataset 138 | 139 | 140 | class CIFAR100DataProvider(CIFAR10DataProvider): 141 | 142 | @staticmethod 143 | def name(): 144 | return 'cifar100' 145 | 146 | @property 147 | def n_classes(self): 148 | return 100 149 | 150 | @property 151 | def save_path(self): 152 | return os.path.expanduser('~/dataset/cifar100') 153 | 154 | def train_dataset(self, _transforms): 155 | dataset = torchvision.datasets.CIFAR100(self.save_path, train=True, transform=_transforms, download=True) 156 | return dataset 157 | 158 | def test_dataset(self, _transforms): 159 | dataset = torchvision.datasets.CIFAR100(self.save_path, train=False, transform=_transforms, download=True) 160 | return dataset 161 | -------------------------------------------------------------------------------- /netaug/models/base/mbv2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from models.base.layers import ( 5 | ConvLayer, 6 | DsConvLayer, 7 | InvertedBlock, 8 | LinearLayer, 9 | OpSequential, 10 | ResidualBlock, 11 | ) 12 | from utils import make_divisible 13 | 14 | __all__ = ["MobileNetV2"] 15 | 16 | 17 | class MobileNetV2(nn.Module): 18 | def __init__( 19 | self, width_mult=1.0, channel_divisor=8, n_classes=1000, dropout_rate=0 20 | ): 21 | super(MobileNetV2, self).__init__() 22 | stage_width_list = [32, 16, 24, 32, 64, 96, 160] 23 | head_width_list = [320, 1280] 24 | act_func = "relu6" 25 | 26 | block_configs = [ 27 | # t, n, s 28 | [6, 2, 2], 29 | [6, 3, 2], 30 | [6, 4, 2], 31 | [6, 3, 1], 32 | [6, 3, 2], 33 | ] 34 | 35 | for i, w in enumerate(stage_width_list): 36 | stage_width_list[i] = make_divisible(w * width_mult, channel_divisor) 37 | for i, w in enumerate(head_width_list): 38 | head_width_list[i] = make_divisible(w * width_mult, channel_divisor) 39 | head_width_list[1] = max(head_width_list[1], 1280) 40 | 41 | input_stem = OpSequential( 42 | [ 43 | ConvLayer(3, stage_width_list[0], 3, 2, act_func=act_func), 44 | ResidualBlock( 45 | DsConvLayer( 46 | stage_width_list[0], 47 | stage_width_list[1], 48 | 3, 49 | 1, 50 | (act_func, None), 51 | ), 52 | shortcut=None, 53 | ), 54 | ] 55 | ) 56 | 57 | # stages 58 | stages = [] 59 | in_channels = stage_width_list[1] 60 | for (t, n, s), c in zip(block_configs, stage_width_list[2:]): 61 | blocks = [] 62 | for i in range(n): 63 | stride = s if i == 0 else 1 64 | mid_channels = make_divisible(round(t * in_channels), channel_divisor) 65 | mb_conv = ResidualBlock( 66 | InvertedBlock( 67 | in_channels, 68 | c, 69 | 3, 70 | stride, 71 | mid_channels=mid_channels, 72 | act_func=(act_func, act_func, None), 73 | ), 74 | shortcut=nn.Identity() 75 | if (stride == 1 and in_channels == c and i != 0) 76 | else None, 77 | ) 78 | blocks.append(mb_conv) 79 | in_channels = c 80 | stages.append(OpSequential(blocks)) 81 | 82 | # head 83 | head = OpSequential( 84 | [ 85 | ResidualBlock( 86 | InvertedBlock( 87 | in_channels, 88 | head_width_list[0], 89 | 3, 90 | expand_ratio=6, 91 | act_func=(act_func, act_func, None), 92 | ), 93 | shortcut=None, 94 | ), 95 | ConvLayer(head_width_list[0], head_width_list[1], 1, act_func=act_func), 96 | nn.AdaptiveAvgPool2d(1), 97 | LinearLayer(head_width_list[1], n_classes, dropout_rate=dropout_rate), 98 | ] 99 | ) 100 | 101 | self.backbone = nn.ModuleDict( 102 | { 103 | "input_stem": input_stem, 104 | "stages": nn.ModuleList(stages), 105 | } 106 | ) 107 | self.head = head 108 | 109 | def forward(self, x: torch.Tensor) -> torch.Tensor: 110 | x = self.backbone["input_stem"](x) 111 | for stage in self.backbone["stages"]: 112 | x = stage(x) 113 | x = self.head(x) 114 | return x 115 | -------------------------------------------------------------------------------- /netaug/utils/misc.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional, Tuple, Union 2 | 3 | import torch 4 | import torch.nn as nn 5 | import yaml 6 | from torch.nn.modules.batchnorm import _BatchNorm 7 | 8 | __all__ = [ 9 | "make_divisible", 10 | "load_state_dict_from_file", 11 | "list_mean", 12 | "list_sum", 13 | "parse_unknown_args", 14 | "partial_update_config", 15 | "remove_bn", 16 | "get_same_padding", 17 | "torch_random_choices", 18 | ] 19 | 20 | 21 | def make_divisible( 22 | v: Union[int, float], divisor: Optional[int], min_val=None 23 | ) -> Union[int, float]: 24 | """This function is taken from the original tf repo. 25 | 26 | It ensures that all layers have a channel number that is divisible by 8 27 | It can be seen here: 28 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py 29 | :param v: 30 | :param divisor: 31 | :param min_val: 32 | :return: 33 | """ 34 | if divisor is None: 35 | return v 36 | 37 | if min_val is None: 38 | min_val = divisor 39 | new_v = max(min_val, int(v + divisor / 2) // divisor * divisor) 40 | # Make sure that round down does not go down by more than 10%. 41 | if new_v < 0.9 * v: 42 | new_v += divisor 43 | return new_v 44 | 45 | 46 | def load_state_dict_from_file(file: str) -> Dict[str, torch.Tensor]: 47 | checkpoint = torch.load(file, map_location="cpu") 48 | if "state_dict" in checkpoint: 49 | checkpoint = checkpoint["state_dict"] 50 | return checkpoint 51 | 52 | 53 | def list_sum(x: List) -> Any: 54 | return x[0] if len(x) == 1 else x[0] + list_sum(x[1:]) 55 | 56 | 57 | def list_mean(x: List) -> Any: 58 | return list_sum(x) / len(x) 59 | 60 | 61 | def parse_unknown_args(unknown: List) -> Dict: 62 | """Parse unknown args.""" 63 | index = 0 64 | parsed_dict = {} 65 | while index < len(unknown): 66 | key, val = unknown[index], unknown[index + 1] 67 | index += 2 68 | if key.startswith("--"): 69 | key = key[2:] 70 | try: 71 | # try parsing with yaml 72 | if "{" in val and "}" in val and ":" in val: 73 | val = val.replace(":", ": ") # add space manually for dict 74 | out_val = yaml.safe_load(val) 75 | except ValueError: 76 | # return raw string if parsing fails 77 | out_val = val 78 | parsed_dict[key] = out_val 79 | return parsed_dict 80 | 81 | 82 | def partial_update_config(config: Dict, partial_config: Dict): 83 | for key in partial_config: 84 | if ( 85 | key in config 86 | and isinstance(partial_config[key], Dict) 87 | and isinstance(config[key], Dict) 88 | ): 89 | partial_update_config(config[key], partial_config[key]) 90 | else: 91 | config[key] = partial_config[key] 92 | 93 | 94 | def remove_bn(model: nn.Module) -> None: 95 | for m in model.modules(): 96 | if isinstance(m, _BatchNorm): 97 | m.weight = m.bias = None 98 | m.forward = lambda x: x 99 | 100 | 101 | def get_same_padding(kernel_size: Union[int, Tuple[int, int]]) -> Union[int, tuple]: 102 | if isinstance(kernel_size, tuple): 103 | assert len(kernel_size) == 2, f"invalid kernel size: {kernel_size}" 104 | p1 = get_same_padding(kernel_size[0]) 105 | p2 = get_same_padding(kernel_size[1]) 106 | return p1, p2 107 | else: 108 | assert isinstance( 109 | kernel_size, int 110 | ), "kernel size should be either `int` or `tuple`" 111 | assert kernel_size % 2 > 0, "kernel size should be odd number" 112 | return kernel_size // 2 113 | 114 | 115 | def torch_random_choices( 116 | src_list: List[Any], 117 | generator: Optional[torch.Generator], 118 | k=1, 119 | ) -> Union[Any, List[Any]]: 120 | rand_idx = torch.randint(low=0, high=len(src_list), generator=generator, size=(k,)) 121 | out_list = [src_list[i] for i in rand_idx] 122 | return out_list[0] if k == 1 else out_list 123 | -------------------------------------------------------------------------------- /netaug/models/base/proxylessnas.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from models.base.layers import ( 5 | ConvLayer, 6 | DsConvLayer, 7 | InvertedBlock, 8 | LinearLayer, 9 | OpSequential, 10 | ResidualBlock, 11 | ) 12 | from utils import make_divisible 13 | 14 | __all__ = ["ProxylessNASMobile"] 15 | 16 | 17 | class ProxylessNASMobile(nn.Module): 18 | def __init__( 19 | self, width_mult=1.0, channel_divisor=8, n_classes=1000, dropout_rate=0 20 | ): 21 | super(ProxylessNASMobile, self).__init__() 22 | stage_width_list = [32, 16, 32, 40, 80, 96, 192] 23 | head_width_list = [320, 1280] 24 | act_func = "relu6" 25 | 26 | block_configs = [ 27 | [[3, 3], [5, 3], 2, 2], 28 | [[3, 3, 3, 3], [7, 3, 5, 5], 4, 2], 29 | [[6, 3, 3, 3], [7, 5, 5, 5], 4, 2], 30 | [[6, 3, 3, 3], [5, 5, 5, 5], 4, 1], 31 | [[6, 6, 3, 3], [7, 7, 7, 7], 4, 2], 32 | ] 33 | 34 | for i, w in enumerate(stage_width_list): 35 | stage_width_list[i] = make_divisible(w * width_mult, channel_divisor) 36 | for i, w in enumerate(head_width_list): 37 | head_width_list[i] = make_divisible(w * width_mult, channel_divisor) 38 | head_width_list[1] = max(head_width_list[1], 1280) 39 | 40 | input_stem = OpSequential( 41 | [ 42 | ConvLayer(3, stage_width_list[0], 3, 2, act_func=act_func), 43 | ResidualBlock( 44 | DsConvLayer( 45 | stage_width_list[0], 46 | stage_width_list[1], 47 | 3, 48 | 1, 49 | (act_func, None), 50 | ), 51 | shortcut=None, 52 | ), 53 | ] 54 | ) 55 | 56 | # stages 57 | stages = [] 58 | in_channels = stage_width_list[1] 59 | for (e_list, ks_list, n, s), c in zip(block_configs, stage_width_list[2:]): 60 | blocks = [] 61 | for i in range(n): 62 | stride = s if i == 0 else 1 63 | mid_channels = make_divisible( 64 | round(e_list[i] * in_channels), channel_divisor 65 | ) 66 | mb_conv = ResidualBlock( 67 | InvertedBlock( 68 | in_channels, 69 | c, 70 | ks_list[i], 71 | stride, 72 | mid_channels=mid_channels, 73 | act_func=(act_func, act_func, None), 74 | ), 75 | shortcut=nn.Identity() 76 | if (stride == 1 and in_channels == c and i != 0) 77 | else None, 78 | ) 79 | blocks.append(mb_conv) 80 | in_channels = c 81 | stages.append(OpSequential(blocks)) 82 | 83 | # head 84 | head = OpSequential( 85 | [ 86 | ResidualBlock( 87 | InvertedBlock( 88 | in_channels, 89 | head_width_list[0], 90 | 7, 91 | expand_ratio=6, 92 | act_func=(act_func, act_func, None), 93 | ), 94 | shortcut=None, 95 | ), 96 | ConvLayer(head_width_list[0], head_width_list[1], 1, act_func=act_func), 97 | nn.AdaptiveAvgPool2d(1), 98 | LinearLayer(head_width_list[1], n_classes, dropout_rate=dropout_rate), 99 | ] 100 | ) 101 | 102 | self.backbone = nn.ModuleDict( 103 | { 104 | "input_stem": input_stem, 105 | "stages": nn.ModuleList(stages), 106 | } 107 | ) 108 | self.head = head 109 | 110 | def forward(self, x: torch.Tensor) -> torch.Tensor: 111 | x = self.backbone["input_stem"](x) 112 | for stage in self.backbone["stages"]: 113 | x = stage(x) 114 | x = self.head(x) 115 | return x 116 | -------------------------------------------------------------------------------- /tinytl/tinytl/data_providers/fgvc_run_config.py: -------------------------------------------------------------------------------- 1 | from ofa.imagenet_classification.run_manager import ImagenetRunConfig 2 | 3 | from .fgvc_data_providers import AircraftDataProvider, Flowers102DataProvider, CarDataProvider 4 | from .fgvc_data_providers import Food101DataProvider, CUB200DataProvider, PetsDataProvider 5 | from .fgvc_data_providers import CIFAR10DataProvider, CIFAR100DataProvider 6 | 7 | __all__ = ['FGVCRunConfig'] 8 | 9 | 10 | class FGVCRunConfig(ImagenetRunConfig): 11 | 12 | def __init__(self, n_epochs=50, init_lr=0.01, lr_schedule_type='cosine', lr_schedule_param=None, 13 | dataset='flowers102', train_batch_size=256, test_batch_size=500, valid_size=None, 14 | opt_type='sgd', opt_param=None, weight_decay=4e-5, label_smoothing=0, no_decay_keys=None, 15 | mixup_alpha=None, model_init='he_fout', validation_frequency=1, print_frequency=10, 16 | n_worker=32, resize_scale=0.08, distort_color='tf', image_size=224, fast_evaluation=True, **kwargs): 17 | super(FGVCRunConfig, self).__init__( 18 | n_epochs, init_lr, lr_schedule_type, lr_schedule_param, 19 | dataset, train_batch_size, test_batch_size, valid_size, 20 | opt_type, opt_param, weight_decay, label_smoothing, no_decay_keys, mixup_alpha, 21 | model_init, validation_frequency, print_frequency, 22 | n_worker, resize_scale, distort_color, image_size, **kwargs, 23 | ) 24 | self.fast_evaluation = fast_evaluation 25 | 26 | @property 27 | def data_provider(self): 28 | if self.__dict__.get('_data_provider', None) is None: 29 | if self.dataset == AircraftDataProvider.name(): 30 | DataProviderClass = AircraftDataProvider 31 | elif self.dataset == Flowers102DataProvider.name(): 32 | DataProviderClass = Flowers102DataProvider 33 | elif self.dataset == CarDataProvider.name(): 34 | DataProviderClass = CarDataProvider 35 | elif self.dataset == Food101DataProvider.name(): 36 | DataProviderClass = Food101DataProvider 37 | elif self.dataset == CUB200DataProvider.name(): 38 | DataProviderClass = CUB200DataProvider 39 | elif self.dataset == PetsDataProvider.name(): 40 | DataProviderClass = PetsDataProvider 41 | elif self.dataset == CIFAR10DataProvider.name(): 42 | DataProviderClass = CIFAR10DataProvider 43 | elif self.dataset == CIFAR100DataProvider.name(): 44 | DataProviderClass = CIFAR100DataProvider 45 | else: 46 | raise ValueError('Do not support: %s' % self.dataset) 47 | self.__dict__['_data_provider'] = DataProviderClass( 48 | train_batch_size=self.train_batch_size, test_batch_size=self.test_batch_size, 49 | valid_size=self.valid_size, n_worker=self.n_worker, resize_scale=self.resize_scale, 50 | distort_color=self.distort_color, image_size=self.image_size, 51 | ) 52 | return self.__dict__['_data_provider'] 53 | 54 | @property 55 | def valid_loader(self): 56 | if not self.fast_evaluation: 57 | return self.data_provider.valid 58 | 59 | if self.valid_size is None: 60 | return self.test_loader 61 | if self.__dict__.get('_in_memory_valid%d' % self.data_provider.active_img_size, None) is None: 62 | self.__dict__['_in_memory_valid%d' % self.data_provider.active_img_size] = [] 63 | for images, labels in self.data_provider.valid: 64 | self.__dict__['_in_memory_valid%d' % self.data_provider.active_img_size].append((images, labels)) 65 | return self.__dict__['_in_memory_valid%d' % self.data_provider.active_img_size] 66 | 67 | @property 68 | def test_loader(self): 69 | if not self.fast_evaluation: 70 | return self.data_provider.test 71 | 72 | if self.__dict__.get('_in_memory_test%d' % self.data_provider.active_img_size, None) is None: 73 | self.__dict__['_in_memory_test%d' % self.data_provider.active_img_size] = [] 74 | for images, labels in self.data_provider.test: 75 | self.__dict__['_in_memory_test%d' % self.data_provider.active_img_size].append((images, labels)) 76 | return self.__dict__['_in_memory_test%d' % self.data_provider.active_img_size] 77 | 78 | -------------------------------------------------------------------------------- /netaug/README.md: -------------------------------------------------------------------------------- 1 | # Network Augmentation for Tiny Deep Learning 2 | 3 | ```BibTex 4 | @inproceedings{ 5 | cai2022network, 6 | title={Network Augmentation for Tiny Deep Learning}, 7 | author={Han Cai and Chuang Gan and Ji Lin and Song Han}, 8 | booktitle={International Conference on Learning Representations}, 9 | year={2022}, 10 | url={https://openreview.net/forum?id=TYw3-OlrRm-} 11 | } 12 | ``` 13 | 14 | ### Neural Networks Going Tiny for Deployment on Tiny Edge Devices 15 |

16 | 17 |

18 | 19 | ### Training Tiny Neural Networks is Different from Training Large Neural Networks 20 |

21 | 22 |

23 | 24 | ### Augment Tiny Neural Networks to Get More Supervision During Training 25 |

26 | 27 |

28 | 29 | ### Experiment Results 30 |

31 | 32 |

33 |

34 | 35 |

36 |

37 | 38 |

39 | 40 | ## Environment 41 | * Python 3.8.5 42 | * Pytorch 1.8.2 43 | * [torchpack](https://github.com/zhijian-liu/torchpack) 44 | * [torchprofile](https://github.com/zhijian-liu/torchprofile) 45 | 46 | ## Pretrained Models 47 | | Model | #Params | #MACs | ImageNet Top1 (%) | Pretrained weights | 48 | |--------------------------------------------|---------|-------|-------------------|-------------------------------------------------------------------------------------------| 49 | | MobileNetV2-Tiny + NetAug | 0.75M | 23.5M | 53.3% | [pth](https://drive.google.com/file/d/1rTJQXMO2A9PzzY3Vo9MvaB7_m58H4RIX/view?usp=sharing) | 50 | | MCUNet + NetAug | 0.74M | 81.8M | 62.7% | [pth](https://drive.google.com/file/d/12w2EoPKmHhTqz0yMwld7u766WJOCPfAn/view?usp=sharing) | 51 | | ProxylessNAS-Mobile (w0.35, r160) + NetAug | 1.8M | 35.7M | 60.8% | [pth](https://drive.google.com/file/d/1fd9YDVlx6oFC8spoeeM5NtMMLQaWExFP/view?usp=sharing) | 52 | 53 | More are available on [Google Drive](https://drive.google.com/drive/folders/1wquOwniMCI9iDftaiPmdzxwkE8o_ES72?usp=sharing). 54 | 55 | To evaluate pretrained models, please run **eval.py**. 56 | 57 | Example: 58 | ``` 59 | torchpack dist-run -np 1 python eval.py \ 60 | --dataset imagenet --data_path /dataset/imagenet/ \ 61 | --image_size 160 \ 62 | --model proxylessnas-0.35 \ 63 | --init_from 64 | ``` 65 | 66 | ## How to train models with NetAug 67 | Scripts for training models with NetAug on ImageNet are available under the folder **bash/imagenet**. 68 | 69 | Notes: 70 | 1. With netaug, the expand ratio of the augmented model will be very large. We find the **[fout](https://github.com/mit-han-lab/once-for-all/blob/4451593507b0f48a7854763adfe7785705abdd78/ofa/utils/my_modules.py#L122)** initialization strategy does not work well for such kind of models. Thus, we use **[nn.init.kaiming_uniform](https://github.com/pytorch/pytorch/blob/e0495a7aa104471d95dc85a1b8f6473fbcc427a8/torch/nn/modules/conv.py#L114)** initialization when netaug is used. 71 | 2. We sort the channels according to the channel's L1 value at the beginning of each epoch, which forces the target model to take the most important channels. 72 | 3. We stop augmenting the width multiplier (i.e., width multiplier augmentation ratio is always 1.0) in the second half of the training epochs, which slightly improves the results in our early experiments. 73 | 4. When using netaug, running mean and running var in BN layers are not accurate. Thus, if netaug is used, we always use a subset of training images to re-estimate running mean and running var in BN layers after getting the trained model. 74 | 75 | ## How to run transfer learning experiments 76 | To run transfer learning experiments, please first download our pretrained weights or train the models on the pretraining dataset by yourself. Scripts are available under the folder **bash/transfer/**. 77 | 78 | ## Related Projects 79 | 80 | [TinyTL: Reduce Activations, Not Trainable Parameters for Efficient On-Device Learning](https://github.com/mit-han-lab/tinyml/tree/master/tinytl) (NeurIPS'20) 81 | 82 | [MCUNet: Tiny Deep Learning on IoT Devices](https://arxiv.org/abs/2007.10319) (NeurIPS'20, spotlight) 83 | 84 | [Once for All: Train One Network and Specialize it for Efficient Deployment](https://arxiv.org/abs/1908.09791) (ICLR'20) 85 | 86 | [ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware](https://arxiv.org/pdf/1812.00332.pdf) (ICLR'19) 87 | -------------------------------------------------------------------------------- /netaug/models/base/mbv3.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from models.base.layers import ( 5 | ConvLayer, 6 | DsConvLayer, 7 | InvertedBlock, 8 | LinearLayer, 9 | OpSequential, 10 | ResidualBlock, 11 | SeInvertedBlock, 12 | ) 13 | from utils import make_divisible 14 | 15 | __all__ = ["MobileNetV3"] 16 | 17 | 18 | class MobileNetV3(nn.Module): 19 | def __init__( 20 | self, width_mult=1.0, channel_divisor=8, n_classes=1000, dropout_rate=0 21 | ): 22 | super(MobileNetV3, self).__init__() 23 | stage_width_list = [16, 24, 40, 80, 112, 160] 24 | head_width_list = [960, 1280] 25 | 26 | block_configs = [ 27 | [[64, 72], 3, 2, 2, "relu", False], 28 | [[72, 120, 120], 5, 3, 2, "relu", True], 29 | [[240, 200, 184, 184], 3, 4, 2, "h_swish", False], 30 | [[480, 672], 3, 2, 1, "h_swish", True], 31 | [[672, 960, 960], 5, 3, 2, "h_swish", True], 32 | ] 33 | 34 | for i, w in enumerate(stage_width_list): 35 | stage_width_list[i] = make_divisible(w * width_mult, channel_divisor) 36 | for i, w in enumerate(head_width_list): 37 | head_width_list[i] = make_divisible(w * width_mult, channel_divisor) 38 | head_width_list[1] = max(head_width_list[1], 1280) 39 | 40 | input_stem = OpSequential( 41 | [ 42 | ConvLayer(3, stage_width_list[0], 3, 2, act_func="h_swish"), 43 | ResidualBlock( 44 | DsConvLayer( 45 | stage_width_list[0], 46 | stage_width_list[0], 47 | 3, 48 | 1, 49 | ("relu", None), 50 | ), 51 | shortcut=nn.Identity(), 52 | ), 53 | ] 54 | ) 55 | 56 | # stages 57 | stages = [] 58 | in_channels = stage_width_list[0] 59 | for (mid_c_list, ks, n, s, act_func, use_se), c in zip( 60 | block_configs, stage_width_list[1:] 61 | ): 62 | blocks = [] 63 | for i in range(n): 64 | stride = s if i == 0 else 1 65 | mid_channels = make_divisible( 66 | round(mid_c_list[i] * width_mult), channel_divisor 67 | ) 68 | if use_se: 69 | conv = SeInvertedBlock( 70 | in_channels, 71 | c, 72 | ks, 73 | stride, 74 | mid_channels=mid_channels, 75 | act_func=(act_func, act_func, None), 76 | se_config={ 77 | "act_func": "relu", 78 | "mid_channels": max( 79 | make_divisible(mid_channels / 4, channel_divisor), 16 80 | ), 81 | }, 82 | ) 83 | else: 84 | conv = InvertedBlock( 85 | in_channels, 86 | c, 87 | ks, 88 | stride, 89 | mid_channels=mid_channels, 90 | act_func=(act_func, act_func, None), 91 | ) 92 | mb_conv = ResidualBlock( 93 | conv, 94 | shortcut=nn.Identity() 95 | if (stride == 1 and in_channels == c and i != 0) 96 | else None, 97 | ) 98 | blocks.append(mb_conv) 99 | in_channels = c 100 | stages.append(OpSequential(blocks)) 101 | 102 | # head 103 | head = OpSequential( 104 | [ 105 | ConvLayer(in_channels, head_width_list[0], 1, act_func="h_swish"), 106 | nn.AdaptiveAvgPool2d(1), 107 | ConvLayer( 108 | head_width_list[0], 109 | head_width_list[1], 110 | 1, 111 | act_func="h_swish", 112 | norm=None, 113 | use_bias=True, 114 | ), 115 | LinearLayer(head_width_list[1], n_classes, dropout_rate=dropout_rate), 116 | ] 117 | ) 118 | 119 | self.backbone = nn.ModuleDict( 120 | { 121 | "input_stem": input_stem, 122 | "stages": nn.ModuleList(stages), 123 | } 124 | ) 125 | self.head = head 126 | 127 | def forward(self, x: torch.Tensor) -> torch.Tensor: 128 | x = self.backbone["input_stem"](x) 129 | for stage in self.backbone["stages"]: 130 | x = stage(x) 131 | x = self.head(x) 132 | return x 133 | -------------------------------------------------------------------------------- /tinytl/dataset_setup_scripts/make_cub200.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -* 2 | """This module is served as torchvision.datasets to load CUB200-2011. 3 | 4 | CUB200-2011 dataset has 11,788 images of 200 bird species. The project page 5 | is as follows. 6 | http://www.vision.caltech.edu/visipedia/CUB-200-2011.html 7 | - Images are contained in the directory data/cub200/raw/images/, 8 | with 200 subdirectories. 9 | - Format of images.txt: 10 | - Format of train_test_split.txt: 11 | - Format of classes.txt: 12 | - Format of image_class_labels.txt: 13 | 14 | This file is modified from: 15 | https://github.com/vishwakftw/vision. 16 | """ 17 | 18 | 19 | import os 20 | import pickle 21 | import numpy as np 22 | import PIL.Image 23 | import shutil 24 | import requests 25 | 26 | import torch.utils.data as data 27 | 28 | 29 | def download_file_from_google_drive(id, destination): 30 | URL = "https://docs.google.com/uc?export=download" 31 | 32 | session = requests.Session() 33 | 34 | response = session.get(URL, params = { 'id' : id }, stream = True) 35 | token = get_confirm_token(response) 36 | 37 | if token: 38 | params = { 'id' : id, 'confirm' : token } 39 | response = session.get(URL, params = params, stream = True) 40 | 41 | save_response_content(response, destination) 42 | 43 | 44 | def get_confirm_token(response): 45 | for key, value in response.cookies.items(): 46 | if key.startswith('download_warning'): 47 | return value 48 | 49 | return None 50 | 51 | def save_response_content(response, destination): 52 | CHUNK_SIZE = 32768 53 | 54 | with open(destination, "wb") as f: 55 | for chunk in response.iter_content(CHUNK_SIZE): 56 | if chunk: # filter out keep-alive new chunks 57 | f.write(chunk) 58 | 59 | 60 | class CUB200(data.Dataset): 61 | """CUB200 dataset. 62 | 63 | Args: 64 | _root, str: Root directory of the dataset. 65 | _train, bool: Load train/test data. 66 | _transform, callable: A function/transform that takes in a PIL.Image 67 | and transforms it. 68 | _target_transform, callable: A function/transform that takes in the 69 | target and transforms it. 70 | _train_data, list of np.ndarray. 71 | _train_labels, list of int. 72 | _test_data, list of np.ndarray. 73 | _test_labels, list of int. 74 | """ 75 | def __init__(self, root, _train=True, transform=None, target_transform=None, 76 | download=False): 77 | """Load the dataset. 78 | 79 | Args 80 | root, str: Root directory of the dataset. 81 | train, bool [True]: Load train/test data. 82 | transform, callable [None]: A function/transform that takes in a 83 | PIL.Image and transforms it. 84 | target_transform, callable [None]: A function/transform that takes 85 | in the target and transforms it. 86 | download, bool [False]: If true, downloads the dataset from the 87 | internet and puts it in root directory. If dataset is already 88 | downloaded, it is not downloaded again. 89 | """ 90 | self._root = os.path.expanduser(root) # Replace ~ by the complete dir 91 | os.makedirs(self._root, exist_ok=True) 92 | self._train = _train 93 | self._transform = transform 94 | self._target_transform = target_transform 95 | 96 | self._download() 97 | self._extract() 98 | 99 | def _download(self): 100 | """Download and uncompress the tar.gz file from a given URL. 101 | 102 | Args: 103 | url, str: URL to be downloaded. 104 | """ 105 | import six.moves 106 | import tarfile 107 | 108 | raw_path = os.path.join(self._root, 'raw') 109 | processed_path = os.path.join(self._root, 'processed') 110 | if not os.path.isdir(raw_path): 111 | os.mkdir(raw_path, mode=0o775) 112 | if not os.path.isdir(processed_path): 113 | os.makedirs(processed_path, exist_ok=True) 114 | 115 | # Downloads file. 116 | fpath = os.path.join(self._root, 'raw/CUB_200_2011.tgz') 117 | download_file_from_google_drive(id='1hbzc_P1FuxMkcabkgn9ZKinBwW683j45', destination=fpath) 118 | 119 | # Extract file. 120 | cwd = os.getcwd() 121 | tar = tarfile.open(fpath, 'r:gz') 122 | os.chdir(os.path.join(self._root, 'raw')) 123 | tar.extractall() 124 | tar.close() 125 | os.chdir(cwd) 126 | 127 | def _extract(self): 128 | """Prepare the data for train/test split and save onto disk.""" 129 | image_path = os.path.join(self._root, 'raw/CUB_200_2011/images/') 130 | # Format of images.txt: 131 | id2name = np.genfromtxt(os.path.join( 132 | self._root, 'raw/CUB_200_2011/images.txt'), dtype=str) 133 | # Format of train_test_split.txt: 134 | id2train = np.genfromtxt(os.path.join( 135 | self._root, 'raw/CUB_200_2011/train_test_split.txt'), dtype=int) 136 | 137 | for id_ in range(id2name.shape[0]): 138 | src_image_path = os.path.join(image_path, id2name[id_, 1]) 139 | label = int(id2name[id_, 1][:3]) - 1 # Label starts with 0 140 | 141 | if id2train[id_, 1] == 1: 142 | target_path = os.path.join(self._root, 'train') 143 | os.makedirs(target_path, exist_ok=True) 144 | target_path = os.path.join(target_path, id2name[id_, 1]) 145 | else: 146 | target_path = os.path.join(self._root, 'val') 147 | os.makedirs(target_path, exist_ok=True) 148 | target_path = os.path.join(target_path, id2name[id_, 1]) 149 | folder = '/'.join(target_path.split('/')[:-1]) 150 | os.makedirs(folder, exist_ok=True) 151 | shutil.move(src_image_path, target_path) 152 | print('(%s, %s): Move from %s to %s' % (id2name[id_, 1], label, src_image_path, target_path)) 153 | 154 | dataset_path = '~/dataset/cub200' 155 | dataset_path = os.path.expanduser(dataset_path) 156 | 157 | train = CUB200(dataset_path, _train=True, download=True, transform=None) 158 | test = CUB200(dataset_path, _train=False, download=True, transform=None) 159 | shutil.rmtree(os.path.join(dataset_path, 'raw')) 160 | shutil.rmtree(os.path.join(dataset_path, 'processed')) 161 | -------------------------------------------------------------------------------- /tinytl/tinytl/utils/memory_cost_profiler.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import torch 3 | import torch.nn as nn 4 | from ofa.utils import Hswish, Hsigmoid, MyConv2d 5 | 6 | from ofa.utils.layers import ResidualBlock 7 | from torchvision.models.resnet import BasicBlock, Bottleneck 8 | from torchvision.models.mobilenet import InvertedResidual 9 | 10 | __all__ = ['count_model_size', 'count_activation_size', 'profile_memory_cost'] 11 | 12 | 13 | def count_model_size(net, trainable_param_bits=32, frozen_param_bits=8, print_log=True): 14 | frozen_param_bits = 32 if frozen_param_bits is None else frozen_param_bits 15 | 16 | trainable_param_size = 0 17 | frozen_param_size = 0 18 | for p in net.parameters(): 19 | if p.requires_grad: 20 | trainable_param_size += trainable_param_bits / 8 * p.numel() 21 | else: 22 | frozen_param_size += frozen_param_bits / 8 * p.numel() 23 | model_size = trainable_param_size + frozen_param_size 24 | if print_log: 25 | print('Total: %d' % model_size, 26 | '\tTrainable: %d (data bits %d)' % (trainable_param_size, trainable_param_bits), 27 | '\tFrozen: %d (data bits %d)' % (frozen_param_size, frozen_param_bits)) 28 | # Byte 29 | return model_size 30 | 31 | 32 | def count_activation_size(net, input_size=(1, 3, 224, 224), require_backward=True, activation_bits=32): 33 | act_byte = activation_bits / 8 34 | model = copy.deepcopy(net) 35 | 36 | # noinspection PyArgumentList 37 | def count_convNd(m, x, y): 38 | # count activation size required by backward 39 | if m.weight is not None and m.weight.requires_grad: 40 | m.grad_activations = torch.Tensor([x[0].numel() * act_byte]) # bytes 41 | else: 42 | m.grad_activations = torch.Tensor([0]) 43 | # temporary memory footprint required by inference 44 | m.tmp_activations = torch.Tensor([x[0].numel() * act_byte + y.numel() * act_byte // m.groups]) # bytes 45 | 46 | # noinspection PyArgumentList 47 | def count_linear(m, x, y): 48 | # count activation size required by backward 49 | if m.weight is not None and m.weight.requires_grad: 50 | m.grad_activations = torch.Tensor([x[0].numel() * act_byte]) # bytes 51 | else: 52 | m.grad_activations = torch.Tensor([0]) 53 | # temporary memory footprint required by inference 54 | m.tmp_activations = torch.Tensor([x[0].numel() * act_byte + y.numel() * act_byte]) # bytes 55 | 56 | # noinspection PyArgumentList 57 | def count_bn(m, x, _): 58 | # count activation size required by backward 59 | if m.weight is not None and m.weight.requires_grad: 60 | m.grad_activations = torch.Tensor([x[0].numel() * act_byte]) # bytes 61 | else: 62 | m.grad_activations = torch.Tensor([0]) 63 | # temporary memory footprint required by inference 64 | m.tmp_activations = torch.Tensor([x[0].numel() * act_byte]) # bytes 65 | 66 | # noinspection PyArgumentList 67 | def count_relu(m, x, _): 68 | # count activation size required by backward 69 | if require_backward: 70 | m.grad_activations = torch.Tensor([x[0].numel() / 8]) # bytes 71 | else: 72 | m.grad_activations = torch.Tensor([0]) 73 | # temporary memory footprint required by inference 74 | m.tmp_activations = torch.Tensor([x[0].numel() * act_byte]) # bytes 75 | 76 | # noinspection PyArgumentList 77 | def count_smooth_act(m, x, _): 78 | # count activation size required by backward 79 | if require_backward: 80 | m.grad_activations = torch.Tensor([x[0].numel() * act_byte]) # bytes 81 | else: 82 | m.grad_activations = torch.Tensor([0]) 83 | # temporary memory footprint required by inference 84 | m.tmp_activations = torch.Tensor([x[0].numel() * act_byte]) # bytes 85 | 86 | def add_hooks(m_): 87 | if len(list(m_.children())) > 0: 88 | return 89 | 90 | m_.register_buffer('grad_activations', torch.zeros(1)) 91 | m_.register_buffer('tmp_activations', torch.zeros(1)) 92 | 93 | if type(m_) in [nn.Conv1d, nn.Conv2d, nn.Conv3d, MyConv2d]: 94 | fn = count_convNd 95 | elif type(m_) in [nn.Linear]: 96 | fn = count_linear 97 | elif type(m_) in [nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.GroupNorm]: 98 | fn = count_bn 99 | elif type(m_) in [nn.ReLU, nn.ReLU6, nn.LeakyReLU]: 100 | fn = count_relu 101 | elif type(m_) in [nn.Sigmoid, nn.Tanh, Hswish, Hsigmoid]: 102 | fn = count_smooth_act 103 | else: 104 | fn = None 105 | 106 | if fn is not None: 107 | _handler = m_.register_forward_hook(fn) 108 | 109 | model.eval() 110 | model.apply(add_hooks) 111 | 112 | x = torch.zeros(input_size).to(model.parameters().__next__().device) 113 | with torch.no_grad(): 114 | model(x) 115 | 116 | memory_info_dict = { 117 | 'peak_activation_size': torch.zeros(1), 118 | 'grad_activation_size': torch.zeros(1), 119 | 'residual_size': torch.zeros(1), 120 | } 121 | 122 | for m in model.modules(): 123 | if len(list(m.children())) == 0: 124 | def new_forward(_module): 125 | def lambda_forward(_x): 126 | current_act_size = _module.tmp_activations + memory_info_dict['grad_activation_size'] + \ 127 | memory_info_dict['residual_size'] 128 | memory_info_dict['peak_activation_size'] = max( 129 | current_act_size, memory_info_dict['peak_activation_size'] 130 | ) 131 | memory_info_dict['grad_activation_size'] += _module.grad_activations 132 | return _module.old_forward(_x) 133 | 134 | return lambda_forward 135 | 136 | m.old_forward = m.forward 137 | m.forward = new_forward(m) 138 | 139 | if (isinstance(m, ResidualBlock) and m.shortcut is not None) or \ 140 | (isinstance(m, InvertedResidual) and m.use_res_connect) or \ 141 | type(m) in [BasicBlock, Bottleneck]: 142 | def new_forward(_module): 143 | def lambda_forward(_x): 144 | memory_info_dict['residual_size'] = _x.numel() * act_byte 145 | result = _module.old_forward(_x) 146 | memory_info_dict['residual_size'] = 0 147 | return result 148 | 149 | return lambda_forward 150 | 151 | m.old_forward = m.forward 152 | m.forward = new_forward(m) 153 | 154 | with torch.no_grad(): 155 | model(x) 156 | 157 | return memory_info_dict['peak_activation_size'].item(), memory_info_dict['grad_activation_size'].item() 158 | 159 | 160 | def profile_memory_cost(net, input_size=(1, 3, 224, 224), require_backward=True, 161 | activation_bits=32, trainable_param_bits=32, frozen_param_bits=8, batch_size=8): 162 | param_size = count_model_size(net, trainable_param_bits, frozen_param_bits, print_log=True) 163 | activation_size, _ = count_activation_size(net, input_size, require_backward, activation_bits) 164 | 165 | memory_cost = activation_size * batch_size + param_size 166 | return memory_cost, {'param_size': param_size, 'act_size': activation_size} 167 | -------------------------------------------------------------------------------- /netaug/setup.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import math 3 | import os.path 4 | from typing import Dict, Optional, Tuple, Type 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torchvision.datasets as datasets 9 | import torchvision.transforms as transforms 10 | 11 | from models.base import ( 12 | MCUNet, 13 | MobileNetV2, 14 | MobileNetV3, 15 | ProxylessNASMobile, 16 | TinyMobileNetV2, 17 | ) 18 | from models.netaug import ( 19 | NetAugMCUNet, 20 | NetAugMobileNetV2, 21 | NetAugMobileNetV3, 22 | NetAugProxylessNASMobile, 23 | NetAugTinyMobileNetV2, 24 | ) 25 | 26 | __all__ = ["build_data_loader", "build_model", "augemnt_model"] 27 | 28 | 29 | def build_data_loader( 30 | dataset: str, 31 | image_size: int, 32 | batch_size: int, 33 | n_worker: int = 8, 34 | data_path: Optional[str] = None, 35 | num_replica: Optional[int] = None, 36 | rank: Optional[int] = None, 37 | ) -> Tuple[Dict, int]: 38 | # build dataset 39 | dataset_info_dict = { 40 | "imagenet21k_winter_p": ( 41 | os.path.expanduser("~/dataset/imagenet21k_winter_p"), 42 | 10450, 43 | ), 44 | "imagenet": (os.path.expanduser("~/dataset/imagenet"), 1000), 45 | "car": (os.path.expanduser("~/dataset/fgvc/stanford_car"), 196), 46 | "flowers102": (os.path.expanduser("~/dataset/fgvc/flowers102"), 102), 47 | "food101": (os.path.expanduser("~/dataset/fgvc/food101"), 101), 48 | "cub200": (os.path.expanduser("~/dataset/fgvc/cub200"), 200), 49 | "pets": (os.path.expanduser("~/dataset/fgvc/pets"), 37), 50 | } 51 | assert dataset in dataset_info_dict, f"Do not support {dataset}" 52 | 53 | data_path = data_path or dataset_info_dict[dataset][0] 54 | n_classes = dataset_info_dict[dataset][1] 55 | 56 | # build datasets 57 | train_dataset = datasets.ImageFolder( 58 | os.path.join(data_path, "train"), 59 | transforms.Compose( 60 | [ 61 | transforms.RandomResizedCrop(image_size), 62 | transforms.RandomHorizontalFlip(), 63 | transforms.ColorJitter(brightness=32.0 / 255.0, saturation=0.5), 64 | transforms.ToTensor(), 65 | transforms.Normalize( 66 | mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] 67 | ), 68 | ] 69 | ), 70 | ) 71 | 72 | val_dataset = datasets.ImageFolder( 73 | os.path.join(data_path, "val"), 74 | transforms.Compose( 75 | [ 76 | transforms.Resize(int(math.ceil(image_size / 0.875))), 77 | transforms.CenterCrop(image_size), 78 | transforms.ToTensor(), 79 | transforms.Normalize( 80 | mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] 81 | ), 82 | ] 83 | ), 84 | ) 85 | 86 | sub_train_dataset = copy.deepcopy(train_dataset) # used for resetting bn statistics 87 | if len(sub_train_dataset) > 16000: 88 | g = torch.Generator() 89 | g.manual_seed(937162211) 90 | rand_indexes = torch.randperm(len(sub_train_dataset), generator=g).tolist() 91 | rand_indexes = rand_indexes[:16000] 92 | sub_train_dataset.samples = [ 93 | sub_train_dataset.samples[idx] for idx in rand_indexes 94 | ] 95 | 96 | # build data loader 97 | if num_replica is None: 98 | train_loader = torch.utils.data.DataLoader( 99 | dataset=train_dataset, 100 | batch_size=batch_size, 101 | shuffle=True, 102 | num_workers=n_worker, 103 | pin_memory=True, 104 | drop_last=True, 105 | ) 106 | sub_train_loader = torch.utils.data.DataLoader( 107 | dataset=sub_train_dataset, 108 | batch_size=batch_size, 109 | shuffle=True, 110 | num_workers=n_worker, 111 | pin_memory=True, 112 | drop_last=True, 113 | ) 114 | valid_loader = torch.utils.data.DataLoader( 115 | dataset=val_dataset, 116 | batch_size=batch_size, 117 | shuffle=False, 118 | num_workers=n_worker, 119 | pin_memory=True, 120 | drop_last=False, 121 | ) 122 | else: 123 | train_loader = torch.utils.data.DataLoader( 124 | dataset=train_dataset, 125 | batch_size=batch_size, 126 | sampler=torch.utils.data.distributed.DistributedSampler( 127 | train_dataset, num_replica, rank 128 | ), 129 | num_workers=n_worker, 130 | pin_memory=True, 131 | drop_last=True, 132 | ) 133 | sub_train_loader = torch.utils.data.DataLoader( 134 | dataset=sub_train_dataset, 135 | batch_size=batch_size, 136 | sampler=torch.utils.data.distributed.DistributedSampler( 137 | sub_train_dataset, num_replica, rank 138 | ), 139 | num_workers=n_worker, 140 | pin_memory=True, 141 | drop_last=True, 142 | ) 143 | valid_loader = torch.utils.data.DataLoader( 144 | dataset=val_dataset, 145 | batch_size=batch_size, 146 | sampler=torch.utils.data.distributed.DistributedSampler( 147 | val_dataset, num_replica, rank 148 | ), 149 | num_workers=n_worker, 150 | pin_memory=True, 151 | drop_last=False, 152 | ) 153 | 154 | # prefetch sub_train 155 | sub_train_loader = [data for data in sub_train_loader] 156 | 157 | data_loader_dict = { 158 | "train": train_loader, 159 | "val": valid_loader, 160 | "sub_train": sub_train_loader, 161 | } 162 | 163 | return data_loader_dict, n_classes 164 | 165 | 166 | def build_model( 167 | name: str, 168 | n_classes=1000, 169 | dropout_rate=0.0, 170 | **kwargs, 171 | ) -> nn.Module: 172 | 173 | model_dict = { 174 | "mbv2": MobileNetV2, 175 | "mbv3": MobileNetV3, 176 | "mcunet": MCUNet, 177 | "proxylessnas": ProxylessNASMobile, 178 | "tinymbv2": TinyMobileNetV2, 179 | } 180 | 181 | name = name.split("-") 182 | if len(name) > 1: 183 | kwargs["width_mult"] = float(name[1]) 184 | name = name[0] 185 | 186 | return model_dict[name](n_classes=n_classes, dropout_rate=dropout_rate, **kwargs) 187 | 188 | 189 | def augemnt_model( 190 | base_model: nn.Module, aug_config: Dict, n_classes=1000, dropout_rate=0.0 191 | ) -> nn.Module: 192 | class_mapping: Dict[Type, Type] = { 193 | MobileNetV2: NetAugMobileNetV2, 194 | TinyMobileNetV2: NetAugTinyMobileNetV2, 195 | ProxylessNASMobile: NetAugProxylessNASMobile, 196 | MCUNet: NetAugMCUNet, 197 | MobileNetV3: NetAugMobileNetV3, 198 | } 199 | return class_mapping[type(base_model)]( 200 | base_model, 201 | aug_expand_list=aug_config["aug_expand_list"], 202 | aug_width_mult_list=aug_config["aug_width_mult_list"], 203 | n_classes=n_classes, 204 | dropout_rate=dropout_rate, 205 | ) 206 | -------------------------------------------------------------------------------- /netaug/models/netaug/utils.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from typing import List, Optional, Union 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from torch.nn.modules.batchnorm import _BatchNorm 8 | from tqdm import tqdm 9 | 10 | from models.base.layers import InvertedBlock, SeInvertedBlock, SELayer 11 | from utils.distributed import ddp_reduce_tensor 12 | from utils.metric import AverageMeter 13 | from utils.misc import make_divisible 14 | 15 | __all__ = ["reset_bn", "aug_width", "sync_width", "sort_param", "sort_channels_inner"] 16 | 17 | 18 | def reset_bn( 19 | model: nn.Module, data_loader, sync=False, backend="ddp", progress_bar=False 20 | ) -> None: 21 | bn_mean = {} 22 | bn_var = {} 23 | 24 | tmp_model = copy.deepcopy(model) 25 | for name, m in tmp_model.named_modules(): 26 | if isinstance(m, _BatchNorm): 27 | bn_mean[name] = AverageMeter() 28 | bn_var[name] = AverageMeter() 29 | 30 | def new_forward(bn, mean_est, var_est): 31 | def lambda_forward(x): 32 | x = x.contiguous() 33 | if sync: 34 | batch_mean = ( 35 | x.mean(0, keepdim=True) 36 | .mean(2, keepdim=True) 37 | .mean(3, keepdim=True) 38 | ) # 1, C, 1, 1 39 | if backend == "ddp": 40 | batch_mean = ddp_reduce_tensor(batch_mean, reduce="cat") 41 | else: 42 | raise NotImplementedError 43 | batch_mean = torch.mean(batch_mean, dim=0, keepdim=True) 44 | 45 | batch_var = (x - batch_mean) * (x - batch_mean) 46 | batch_var = ( 47 | batch_var.mean(0, keepdim=True) 48 | .mean(2, keepdim=True) 49 | .mean(3, keepdim=True) 50 | ) 51 | if backend == "ddp": 52 | batch_var = ddp_reduce_tensor(batch_var, reduce="cat") 53 | else: 54 | raise NotImplementedError 55 | batch_var = torch.mean(batch_var, dim=0, keepdim=True) 56 | else: 57 | batch_mean = ( 58 | x.mean(0, keepdim=True) 59 | .mean(2, keepdim=True) 60 | .mean(3, keepdim=True) 61 | ) # 1, C, 1, 1 62 | batch_var = (x - batch_mean) * (x - batch_mean) 63 | batch_var = ( 64 | batch_var.mean(0, keepdim=True) 65 | .mean(2, keepdim=True) 66 | .mean(3, keepdim=True) 67 | ) 68 | 69 | batch_mean = torch.squeeze(batch_mean) 70 | batch_var = torch.squeeze(batch_var) 71 | 72 | mean_est.update(batch_mean.data, x.size(0)) 73 | var_est.update(batch_var.data, x.size(0)) 74 | 75 | # bn forward using calculated mean & var 76 | _feature_dim = batch_mean.shape[0] 77 | return F.batch_norm( 78 | x, 79 | batch_mean, 80 | batch_var, 81 | bn.weight[:_feature_dim], 82 | bn.bias[:_feature_dim], 83 | False, 84 | 0.0, 85 | bn.eps, 86 | ) 87 | 88 | return lambda_forward 89 | 90 | m.forward = new_forward(m, bn_mean[name], bn_var[name]) 91 | 92 | # skip if there is no batch normalization layers in the network 93 | if len(bn_mean) == 0: 94 | return 95 | 96 | tmp_model.eval() 97 | with torch.no_grad(): 98 | with tqdm( 99 | total=len(data_loader), desc="reset bn", disable=(not progress_bar) 100 | ) as t: 101 | for images, _ in data_loader: 102 | images = images.cuda() 103 | tmp_model(images) 104 | t.set_postfix( 105 | { 106 | "batch_size": images.size(0), 107 | "image_size": images.size(2), 108 | } 109 | ) 110 | t.update() 111 | 112 | for name, m in model.named_modules(): 113 | if name in bn_mean and bn_mean[name].count > 0: 114 | feature_dim = bn_mean[name].avg.size(0) 115 | assert isinstance(m, _BatchNorm) 116 | m.running_mean.data[:feature_dim].copy_(bn_mean[name].avg) 117 | m.running_var.data[:feature_dim].copy_(bn_var[name].avg) 118 | 119 | 120 | def aug_width( 121 | base_width: float, factor_list: List[float], divisor: Optional[int] = None 122 | ) -> List[Union[float, int]]: 123 | out_list = [base_width * factor for factor in factor_list] 124 | if divisor is not None: 125 | out_list = [make_divisible(out_dim, divisor) for out_dim in out_list] 126 | return out_list 127 | 128 | 129 | def sync_width(width) -> int: 130 | width = ddp_reduce_tensor(torch.Tensor(1).fill_(width).cuda(), "root") 131 | return int(width) 132 | 133 | 134 | def sort_param( 135 | param: nn.Parameter, 136 | dim: int, 137 | sorted_idx: torch.Tensor, 138 | ) -> None: 139 | param.data.copy_( 140 | torch.clone(torch.index_select(param.data, dim, sorted_idx)).detach() 141 | ) 142 | 143 | 144 | def sort_norm(norm, sorted_idx: torch.Tensor) -> None: 145 | sort_param(norm.weight, 0, sorted_idx) 146 | sort_param(norm.bias, 0, sorted_idx) 147 | try: 148 | sort_param(norm.running_mean, 0, sorted_idx) 149 | sort_param(norm.running_var, 0, sorted_idx) 150 | except AttributeError: 151 | pass 152 | 153 | 154 | def sort_se(se: SELayer, sorted_idx: torch.Tensor) -> None: 155 | # expand conv, output dim 0 156 | sort_param(se.expand_conv.weight, 0, sorted_idx) 157 | sort_param(se.expand_conv.bias, 0, sorted_idx) 158 | # reduce conv, input dim 1 159 | sort_param(se.reduce_conv.weight, 1, sorted_idx) 160 | 161 | # sort middle weight 162 | importance = torch.sum(torch.abs(se.expand_conv.weight.data), dim=(0, 2, 3)) 163 | sorted_importance, sorted_idx = torch.sort(importance, dim=0, descending=True) 164 | # expand conv, input dim 1 165 | sort_param(se.expand_conv.weight, 1, sorted_idx) 166 | # reduce conv, output dim 0 167 | sort_param(se.reduce_conv.weight, 0, sorted_idx) 168 | sort_param(se.reduce_conv.bias, 0, sorted_idx) 169 | 170 | 171 | def sort_channels_inner(block) -> None: 172 | if isinstance(block, (InvertedBlock, SeInvertedBlock)): 173 | # calc channel importance 174 | importance = torch.sum( 175 | torch.abs(block.point_conv.conv.weight.data), dim=(0, 2, 3) 176 | ) 177 | sorted_importance, sorted_idx = torch.sort(importance, dim=0, descending=True) 178 | # sort based on sorted_idx 179 | sort_param(block.point_conv.conv.weight, 1, sorted_idx) 180 | sort_norm(block.depth_conv.norm, sorted_idx) 181 | sort_param(block.depth_conv.conv.weight, 0, sorted_idx) 182 | sort_norm(block.inverted_conv.norm, sorted_idx) 183 | sort_param(block.inverted_conv.conv.weight, 0, sorted_idx) 184 | if isinstance(block, SeInvertedBlock): 185 | sort_se(block.se_layer, sorted_idx) 186 | else: 187 | raise NotImplementedError 188 | -------------------------------------------------------------------------------- /tinytl/tinytl/model/modules.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from collections import OrderedDict 4 | from ofa.utils.layers import set_layer_from_config, ZeroLayer 5 | from ofa.utils import MyModule, MyNetwork, MyGlobalAvgPool2d, min_divisible_value, SEModule 6 | from ofa.utils import get_same_padding, make_divisible, build_activation, init_models 7 | 8 | __all__ = ['my_set_layer_from_config', 9 | 'LiteResidualModule', 'ReducedMBConvLayer'] 10 | 11 | 12 | def my_set_layer_from_config(layer_config): 13 | if layer_config is None: 14 | return None 15 | name2layer = { 16 | LiteResidualModule.__name__: LiteResidualModule, 17 | ReducedMBConvLayer.__name__: ReducedMBConvLayer, 18 | } 19 | layer_name = layer_config.pop('name') 20 | if layer_name in name2layer: 21 | layer = name2layer[layer_name] 22 | return layer.build_from_config(layer_config) 23 | else: 24 | return set_layer_from_config({'name': layer_name, **layer_config}) 25 | 26 | 27 | class LiteResidualModule(MyModule): 28 | 29 | def __init__(self, main_branch, in_channels, out_channels, 30 | expand=1.0, kernel_size=3, act_func='relu', n_groups=2, 31 | downsample_ratio=2, upsample_type='bilinear', stride=1): 32 | super(LiteResidualModule, self).__init__() 33 | 34 | self.main_branch = main_branch 35 | 36 | self.lite_residual_config = { 37 | 'in_channels': in_channels, 38 | 'out_channels': out_channels, 39 | 'expand': expand, 40 | 'kernel_size': kernel_size, 41 | 'act_func': act_func, 42 | 'n_groups': n_groups, 43 | 'downsample_ratio': downsample_ratio, 44 | 'upsample_type': upsample_type, 45 | 'stride': stride, 46 | } 47 | 48 | kernel_size = 1 if downsample_ratio is None else kernel_size 49 | 50 | padding = get_same_padding(kernel_size) 51 | if downsample_ratio is None: 52 | pooling = MyGlobalAvgPool2d() 53 | else: 54 | pooling = nn.AvgPool2d(downsample_ratio, downsample_ratio, 0) 55 | num_mid = make_divisible(int(in_channels * expand), divisor=MyNetwork.CHANNEL_DIVISIBLE) 56 | self.lite_residual = nn.Sequential(OrderedDict({ 57 | 'pooling': pooling, 58 | 'conv1': nn.Conv2d(in_channels, num_mid, kernel_size, stride, padding, groups=n_groups, bias=False), 59 | 'bn1': nn.BatchNorm2d(num_mid), 60 | 'act': build_activation(act_func), 61 | 'conv2': nn.Conv2d(num_mid, out_channels, 1, 1, 0, bias=False), 62 | 'final_bn': nn.BatchNorm2d(out_channels), 63 | })) 64 | 65 | # initialize 66 | init_models(self.lite_residual) 67 | self.lite_residual.final_bn.weight.data.zero_() 68 | 69 | def forward(self, x): 70 | main_x = self.main_branch(x) 71 | lite_residual_x = self.lite_residual(x) 72 | if self.lite_residual_config['downsample_ratio'] is not None: 73 | lite_residual_x = F.upsample(lite_residual_x, main_x.shape[2:], 74 | mode=self.lite_residual_config['upsample_type']) 75 | return main_x + lite_residual_x 76 | 77 | @property 78 | def module_str(self): 79 | return self.main_branch.module_str + ' + LiteResidual(downsample=%s, n_groups=%s, expand=%s, ks=%s)' % ( 80 | self.lite_residual_config['downsample_ratio'], self.lite_residual_config['n_groups'], 81 | self.lite_residual_config['expand'], self.lite_residual_config['kernel_size'], 82 | ) 83 | 84 | @property 85 | def config(self): 86 | return { 87 | 'name': LiteResidualModule.__name__, 88 | 'main': self.main_branch.config, 89 | 'lite_residual': self.lite_residual_config, 90 | } 91 | 92 | @staticmethod 93 | def build_from_config(config): 94 | main_branch = my_set_layer_from_config(config['main']) 95 | lite_residual_module = LiteResidualModule( 96 | main_branch, **config['lite_residual'] 97 | ) 98 | return lite_residual_module 99 | 100 | def __repr__(self): 101 | return '{\n (main branch): ' + self.main_branch.__repr__() + ', ' + \ 102 | '\n (lite residual): ' + self.lite_residual.__repr__() + '}' 103 | 104 | @staticmethod 105 | def insert_lite_residual(net, downsample_ratio=2, upsample_type='bilinear', 106 | expand=1.0, max_kernel_size=5, act_func='relu', n_groups=2, 107 | **kwargs): 108 | if LiteResidualModule.has_lite_residual_module(net): 109 | # skip if already has lite residual modules 110 | return 111 | from ofa.imagenet_classification.networks import ProxylessNASNets 112 | if isinstance(net, ProxylessNASNets): 113 | bn_param = net.get_bn_param() 114 | 115 | # blocks 116 | max_resolution = 128 117 | stride_stages = [2, 2, 2, 1, 2, 1] 118 | for block_index_list, stride in zip(net.grouped_block_index, stride_stages): 119 | for i, idx in enumerate(block_index_list): 120 | block = net.blocks[idx].conv 121 | if isinstance(block, ZeroLayer): 122 | continue 123 | s = stride if i == 0 else 1 124 | block_downsample_ratio = downsample_ratio 125 | block_resolution = max(1, max_resolution // block_downsample_ratio) 126 | max_resolution //= s 127 | 128 | kernel_size = max_kernel_size 129 | if block_resolution == 1: 130 | kernel_size = 1 131 | block_downsample_ratio = None 132 | else: 133 | while block_resolution < kernel_size: 134 | kernel_size -= 2 135 | net.blocks[idx].conv = LiteResidualModule( 136 | block, block.in_channels, block.out_channels, expand=expand, kernel_size=kernel_size, 137 | act_func=act_func, n_groups=n_groups, downsample_ratio=block_downsample_ratio, 138 | upsample_type=upsample_type, stride=s, 139 | ) 140 | 141 | net.set_bn_param(**bn_param) 142 | else: 143 | raise NotImplementedError 144 | 145 | @staticmethod 146 | def has_lite_residual_module(net): 147 | for m in net.modules(): 148 | if isinstance(m, LiteResidualModule): 149 | return True 150 | return False 151 | 152 | @property 153 | def in_channels(self): 154 | return self.lite_residual_config['in_channels'] 155 | 156 | @property 157 | def out_channels(self): 158 | return self.lite_residual_config['out_channels'] 159 | 160 | 161 | class ReducedMBConvLayer(MyModule): 162 | 163 | def __init__(self, in_channels, out_channels, 164 | kernel_size=3, stride=1, expand_ratio=6, mid_channels=None, act_func='relu6', use_se=False, groups=1): 165 | super(ReducedMBConvLayer, self).__init__() 166 | 167 | self.in_channels = in_channels 168 | self.out_channels = out_channels 169 | 170 | self.kernel_size = kernel_size 171 | self.stride = stride 172 | self.expand_ratio = expand_ratio 173 | self.mid_channels = mid_channels 174 | self.act_func = act_func 175 | self.use_se = use_se 176 | self.groups = groups 177 | 178 | if self.mid_channels is None: 179 | feature_dim = round(self.in_channels * self.expand_ratio) 180 | else: 181 | feature_dim = self.mid_channels 182 | 183 | pad = get_same_padding(self.kernel_size) 184 | groups = feature_dim if self.groups is None else min_divisible_value(feature_dim, self.groups) 185 | self.expand_conv = nn.Sequential(OrderedDict({ 186 | 'conv': nn.Conv2d(in_channels, feature_dim, kernel_size, stride, pad, groups=groups, bias=False), 187 | 'bn': nn.BatchNorm2d(feature_dim), 188 | 'act': build_activation(self.act_func, inplace=True), 189 | })) 190 | if self.use_se: 191 | self.expand_conv.add_module('se', SEModule(feature_dim)) 192 | 193 | self.reduce_conv = nn.Sequential(OrderedDict({ 194 | 'conv': nn.Conv2d(feature_dim, out_channels, 1, 1, 0, bias=False), 195 | 'bn': nn.BatchNorm2d(out_channels), 196 | })) 197 | 198 | def forward(self, x): 199 | x = self.expand_conv(x) 200 | x = self.reduce_conv(x) 201 | return x 202 | 203 | @property 204 | def module_str(self): 205 | if self.mid_channels is None: 206 | expand_ratio = self.expand_ratio 207 | else: 208 | expand_ratio = self.mid_channels // self.in_channels 209 | layer_str = '%dx%d_ReducedMBConv%.3f_%s' % ( 210 | self.kernel_size, self.kernel_size, expand_ratio, self.act_func.upper()) 211 | if self.use_se: 212 | layer_str = 'SE_' + layer_str 213 | layer_str += '_O%d' % self.out_channels 214 | if self.groups is not None: 215 | layer_str += '_G%d' % self.groups 216 | if isinstance(self.reduce_conv.bn, nn.GroupNorm): 217 | layer_str += '_GN%d' % self.reduce_conv.bn.num_groups 218 | elif isinstance(self.reduce_conv.bn, nn.BatchNorm2d): 219 | layer_str += '_BN' 220 | 221 | return layer_str 222 | 223 | @property 224 | def config(self): 225 | return { 226 | 'name': ReducedMBConvLayer.__name__, 227 | 'in_channels': self.in_channels, 228 | 'out_channels': self.out_channels, 229 | 'kernel_size': self.kernel_size, 230 | 'stride': self.stride, 231 | 'expand_ratio': self.expand_ratio, 232 | 'mid_channels': self.mid_channels, 233 | 'act_func': self.act_func, 234 | 'use_se': self.use_se, 235 | 'groups': self.groups, 236 | } 237 | 238 | @staticmethod 239 | def build_from_config(config): 240 | return ReducedMBConvLayer(**config) 241 | -------------------------------------------------------------------------------- /netaug/models/netaug/mcunet.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import torch.nn as nn 4 | 5 | from models.base.layers import OpSequential, ResidualBlock 6 | from models.base.mcunet import MCUNet 7 | from models.netaug.layers import ( 8 | DynamicConvLayer, 9 | DynamicDsConvLayer, 10 | DynamicInvertedBlock, 11 | DynamicLinearLayer, 12 | ) 13 | from models.netaug.utils import aug_width, sync_width 14 | from utils import make_divisible, torch_random_choices 15 | 16 | from .mbv2 import NetAugMobileNetV2 17 | 18 | __all__ = ["NetAugMCUNet"] 19 | 20 | 21 | class NetAugMCUNet(NetAugMobileNetV2): 22 | def __init__( 23 | self, 24 | base_net: MCUNet, 25 | aug_expand_list: List[float], 26 | aug_width_mult_list: List[float], 27 | n_classes: int, 28 | dropout_rate=0.0, 29 | ): 30 | nn.Module.__init__(self) 31 | max_width_mult = max(aug_width_mult_list) 32 | 33 | # input stem 34 | base_input_stem = base_net.backbone["input_stem"] 35 | aug_input_stem = OpSequential( 36 | [ 37 | DynamicConvLayer( 38 | 3, 39 | aug_width( 40 | base_input_stem.op_list[0].out_channels, aug_width_mult_list, 1 41 | ), 42 | stride=2, 43 | act_func="relu6", 44 | ), 45 | ResidualBlock( 46 | DynamicDsConvLayer( 47 | make_divisible( 48 | base_input_stem.op_list[0].out_channels * max_width_mult, 1 49 | ), 50 | aug_width( 51 | base_input_stem.op_list[1].conv.out_channels, 52 | aug_width_mult_list, 53 | 1, 54 | ), 55 | act_func=("relu6", None), 56 | ), 57 | shortcut=None, 58 | ), 59 | ] 60 | ) 61 | 62 | # stages 63 | aug_stages = [] 64 | for base_stage in base_net.backbone["stages"]: 65 | stage = [] 66 | for base_block in base_stage.op_list: 67 | stage.append( 68 | ResidualBlock( 69 | DynamicInvertedBlock( 70 | in_channels=make_divisible( 71 | base_block.conv.in_channels * max_width_mult, 1 72 | ), 73 | out_channels=aug_width( 74 | base_block.conv.out_channels, aug_width_mult_list, 1 75 | ), 76 | kernel_size=base_block.conv.kernel_size, 77 | expand_ratio=aug_width( 78 | base_block.conv.expand_ratio, aug_expand_list 79 | ), 80 | stride=base_block.conv.stride, 81 | act_func=( 82 | base_block.conv.inverted_conv.act, 83 | base_block.conv.depth_conv.act, 84 | base_block.conv.point_conv.act, 85 | ), 86 | ), 87 | shortcut=base_block.shortcut, 88 | ) 89 | ) 90 | aug_stages.append(OpSequential(stage)) 91 | 92 | # head 93 | base_head = base_net.head 94 | aug_head = OpSequential( 95 | [ 96 | ResidualBlock( 97 | DynamicInvertedBlock( 98 | make_divisible( 99 | base_head.op_list[0].conv.in_channels * max_width_mult, 1 100 | ), 101 | aug_width( 102 | base_head.op_list[0].conv.out_channels, 103 | aug_width_mult_list, 104 | 1, 105 | ), 106 | base_head.op_list[0].conv.kernel_size, 107 | expand_ratio=aug_width( 108 | base_head.op_list[0].conv.expand_ratio, aug_expand_list 109 | ), 110 | act_func=("relu6", "relu6", None), 111 | ), 112 | shortcut=None, 113 | ), 114 | nn.AdaptiveAvgPool2d(1), 115 | DynamicLinearLayer( 116 | make_divisible( 117 | base_head.op_list[-1].in_features * max_width_mult, 1 118 | ), 119 | n_classes, 120 | dropout_rate=dropout_rate, 121 | ), 122 | ] 123 | ) 124 | 125 | self.backbone = nn.ModuleDict( 126 | { 127 | "input_stem": aug_input_stem, 128 | "stages": nn.ModuleList(aug_stages), 129 | } 130 | ) 131 | self.head = aug_head 132 | 133 | def set_active(self, mode: str, sync=False, generator=None): 134 | # input stem 135 | first_conv, first_block = self.backbone["input_stem"].op_list 136 | if mode in ["min", "min_w"]: 137 | first_conv.conv.active_out_channels = min(first_conv.out_channels_list) 138 | first_block.conv.point_conv.conv.active_out_channels = min( 139 | first_block.conv.point_conv.out_channels_list 140 | ) 141 | elif mode in ["random", "min_e"]: 142 | first_conv.conv.active_out_channels = torch_random_choices( 143 | first_conv.out_channels_list, 144 | generator, 145 | ) 146 | first_block.conv.point_conv.conv.active_out_channels = torch_random_choices( 147 | first_block.conv.point_conv.out_channels_list, 148 | generator, 149 | ) 150 | else: 151 | raise NotImplementedError 152 | if sync: 153 | first_conv.conv.active_out_channels = sync_width( 154 | first_conv.conv.active_out_channels 155 | ) 156 | first_block.conv.point_conv.conv.active_out_channels = sync_width( 157 | first_block.conv.point_conv.conv.active_out_channels 158 | ) 159 | 160 | # stages 161 | in_channels = first_block.conv.point_conv.conv.active_out_channels 162 | for block in self.all_blocks: 163 | if block.shortcut is None: 164 | if mode in ["min", "min_w"]: 165 | active_out_channels = min(block.conv.point_conv.out_channels_list) 166 | elif mode in ["random", "min_e"]: 167 | active_out_channels = torch_random_choices( 168 | block.conv.point_conv.out_channels_list, 169 | generator, 170 | ) 171 | else: 172 | raise NotImplementedError 173 | else: 174 | active_out_channels = in_channels 175 | if mode in ["min", "min_e"]: 176 | active_expand_ratio = min(block.conv.expand_ratio_list) 177 | elif mode in ["min_w", "random"]: 178 | active_expand_ratio = torch_random_choices( 179 | block.conv.expand_ratio_list, 180 | generator, 181 | ) 182 | else: 183 | raise NotImplementedError 184 | active_mid_channels = make_divisible(active_expand_ratio * in_channels, 1) 185 | if sync: 186 | active_mid_channels = sync_width(active_mid_channels) 187 | active_out_channels = sync_width(active_out_channels) 188 | 189 | block.conv.inverted_conv.conv.active_out_channels = active_mid_channels 190 | block.conv.point_conv.conv.active_out_channels = active_out_channels 191 | 192 | in_channels = active_out_channels 193 | 194 | def export(self) -> MCUNet: 195 | export_model = MCUNet.__new__(MCUNet) 196 | nn.Module.__init__(export_model) 197 | # input stem 198 | input_stem = OpSequential( 199 | [ 200 | self.backbone["input_stem"].op_list[0].export(), 201 | ResidualBlock( 202 | self.backbone["input_stem"].op_list[1].conv.export(), 203 | self.backbone["input_stem"].op_list[1].shortcut, 204 | ), 205 | ] 206 | ) 207 | 208 | # stages 209 | stages = [] 210 | for stage in self.backbone["stages"]: 211 | blocks = [] 212 | for block in stage.op_list: 213 | blocks.append( 214 | ResidualBlock( 215 | block.conv.export(), 216 | block.shortcut, 217 | ) 218 | ) 219 | stages.append(OpSequential(blocks)) 220 | 221 | # head 222 | head = OpSequential( 223 | [ 224 | ResidualBlock( 225 | self.head.op_list[0].conv.export(), 226 | self.head.op_list[0].shortcut, 227 | ), 228 | self.head.op_list[1], 229 | self.head.op_list[2].export(), 230 | ] 231 | ) 232 | export_model.backbone = nn.ModuleDict( 233 | { 234 | "input_stem": input_stem, 235 | "stages": nn.ModuleList(stages), 236 | } 237 | ) 238 | export_model.head = head 239 | return export_model 240 | -------------------------------------------------------------------------------- /netaug/models/base/layers.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Optional, Union 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | __all__ = [ 8 | "build_act", 9 | "ConvLayer", 10 | "LinearLayer", 11 | "SELayer", 12 | "DsConvLayer", 13 | "InvertedBlock", 14 | "SeInvertedBlock", 15 | "ResidualBlock", 16 | "OpSequential", 17 | ] 18 | 19 | 20 | def build_norm(name: Optional[str], num_features: int) -> Optional[nn.Module]: 21 | if name is None: 22 | return None 23 | elif name == "bn_2d": 24 | return nn.BatchNorm2d(num_features) 25 | else: 26 | raise NotImplementedError 27 | 28 | 29 | def build_act(name: Union[str, nn.Module, None]) -> Optional[nn.Module]: 30 | if name is None: 31 | return None 32 | elif isinstance(name, nn.Module): 33 | return name 34 | elif name == "relu": 35 | return nn.ReLU(inplace=True) 36 | elif name == "relu6": 37 | return nn.ReLU6(inplace=True) 38 | elif name == "h_swish": 39 | return nn.Hardswish(inplace=True) 40 | elif name == "h_sigmoid": 41 | return nn.Hardsigmoid(inplace=True) 42 | else: 43 | raise NotImplementedError 44 | 45 | 46 | class ConvLayer(nn.Module): 47 | def __init__( 48 | self, 49 | in_channels: int, 50 | out_channels: int, 51 | kernel_size=3, 52 | stride=1, 53 | dilation=1, 54 | groups=1, 55 | use_bias=False, 56 | norm="bn_2d", 57 | act_func="relu", 58 | ): 59 | super(ConvLayer, self).__init__() 60 | self.in_channels = in_channels 61 | self.out_channels = out_channels 62 | self.kernel_size = kernel_size 63 | self.stride = stride 64 | self.dilation = dilation 65 | self.groups = groups 66 | 67 | padding = kernel_size // 2 68 | padding *= dilation 69 | 70 | self.conv = nn.Conv2d( 71 | in_channels, 72 | out_channels, 73 | kernel_size=(kernel_size, kernel_size), 74 | stride=(stride, stride), 75 | padding=padding, 76 | dilation=(dilation, dilation), 77 | groups=groups, 78 | bias=use_bias, 79 | ) 80 | self.norm = build_norm(norm, num_features=out_channels) 81 | self.act = build_act(act_func) 82 | 83 | def forward(self, x: torch.Tensor) -> torch.Tensor: 84 | x = self.conv(x) 85 | if self.norm: 86 | x = self.norm(x) 87 | if self.act: 88 | x = self.act(x) 89 | return x 90 | 91 | 92 | class LinearLayer(nn.Module): 93 | def __init__( 94 | self, 95 | in_features: int, 96 | out_features: int, 97 | use_bias=True, 98 | dropout_rate=0, 99 | norm=None, 100 | act_func=None, 101 | ): 102 | super(LinearLayer, self).__init__() 103 | self.in_features = in_features 104 | self.out_features = out_features 105 | 106 | self.dropout = ( 107 | nn.Dropout(dropout_rate, inplace=False) if dropout_rate > 0 else None 108 | ) 109 | self.linear = nn.Linear(in_features, out_features, use_bias) 110 | self.norm = build_norm(norm, num_features=out_features) 111 | self.act = build_act(act_func) 112 | 113 | def _try_squeeze(self, x: torch.Tensor) -> torch.Tensor: 114 | if x.dim() > 2: 115 | for i in range(x.dim() - 1, 1, -1): 116 | x = torch.squeeze(x, dim=i) 117 | return x 118 | 119 | def forward(self, x: torch.Tensor) -> torch.Tensor: 120 | x = self._try_squeeze(x) 121 | if self.dropout: 122 | x = self.dropout(x) 123 | x = self.linear(x) 124 | if self.norm: 125 | x = self.norm(x) 126 | if self.act: 127 | x = self.act(x) 128 | return x 129 | 130 | 131 | class SELayer(nn.Module): 132 | def __init__( 133 | self, 134 | in_channels: int, 135 | mid_channels=None, 136 | reduction=4, 137 | min_dim=16, 138 | act_func="relu", 139 | ): 140 | super(SELayer, self).__init__() 141 | self.in_channels = in_channels 142 | self.mid_channels = mid_channels or max(round(in_channels / reduction), min_dim) 143 | self.reduction = self.in_channels / self.mid_channels + 1e-10 144 | self.min_dim = min_dim 145 | 146 | self.pooling = nn.AdaptiveAvgPool2d(1) 147 | self.reduce_conv = nn.Conv2d( 148 | in_channels, self.mid_channels, kernel_size=(1, 1), bias=True 149 | ) 150 | self.act = build_act(act_func) 151 | self.expand_conv = nn.Conv2d( 152 | self.mid_channels, in_channels, kernel_size=(1, 1), bias=True 153 | ) 154 | 155 | def forward(self, x: torch.Tensor) -> torch.Tensor: 156 | channel_attention = self.pooling(x) 157 | channel_attention = self.reduce_conv(channel_attention) 158 | channel_attention = self.act(channel_attention) 159 | channel_attention = self.expand_conv(channel_attention) 160 | channel_attention = F.hardsigmoid(channel_attention, inplace=True) 161 | return x * channel_attention 162 | 163 | 164 | class DsConvLayer(nn.Module): 165 | def __init__( 166 | self, 167 | in_channels: int, 168 | out_channels: int, 169 | kernel_size=3, 170 | stride=1, 171 | act_func=("relu6", None), 172 | norm=("bn_2d", "bn_2d"), 173 | ): 174 | super(DsConvLayer, self).__init__() 175 | self.in_channels = in_channels 176 | self.out_channels = out_channels 177 | self.kernel_size = kernel_size 178 | self.stride = stride 179 | 180 | self.depth_conv = ConvLayer( 181 | in_channels, 182 | in_channels, 183 | kernel_size, 184 | stride, 185 | groups=in_channels, 186 | norm=norm[0], 187 | act_func=act_func[0], 188 | ) 189 | self.point_conv = ConvLayer( 190 | in_channels, 191 | out_channels, 192 | 1, 193 | norm=norm[1], 194 | act_func=act_func[1], 195 | ) 196 | 197 | def forward(self, x: torch.Tensor) -> torch.Tensor: 198 | x = self.depth_conv(x) 199 | x = self.point_conv(x) 200 | return x 201 | 202 | 203 | class InvertedBlock(nn.Module): 204 | def __init__( 205 | self, 206 | in_channels: int, 207 | out_channels: int, 208 | kernel_size=3, 209 | stride=1, 210 | mid_channels=None, 211 | expand_ratio=6, 212 | act_func=("relu6", "relu6", None), 213 | norm=("bn_2d", "bn_2d", "bn_2d"), 214 | ): 215 | super(InvertedBlock, self).__init__() 216 | self.in_channels = in_channels 217 | self.out_channels = out_channels 218 | self.kernel_size = kernel_size 219 | self.stride = stride 220 | self.mid_channels = mid_channels or round(in_channels * expand_ratio) 221 | self.expand_ratio = self.mid_channels / self.in_channels + 1e-10 222 | 223 | self.inverted_conv = ConvLayer( 224 | in_channels, 225 | self.mid_channels, 226 | 1, 227 | norm=norm[0], 228 | act_func=act_func[0], 229 | ) 230 | self.depth_conv = ConvLayer( 231 | self.mid_channels, 232 | self.mid_channels, 233 | kernel_size, 234 | stride, 235 | groups=self.mid_channels, 236 | norm=norm[1], 237 | act_func=act_func[1], 238 | ) 239 | self.point_conv = ConvLayer( 240 | self.mid_channels, 241 | out_channels, 242 | 1, 243 | norm=norm[2], 244 | act_func=act_func[2], 245 | ) 246 | 247 | def forward(self, x: torch.Tensor) -> torch.Tensor: 248 | x = self.inverted_conv(x) 249 | x = self.depth_conv(x) 250 | x = self.point_conv(x) 251 | return x 252 | 253 | 254 | class SeInvertedBlock(InvertedBlock): 255 | def __init__( 256 | self, 257 | in_channels: int, 258 | out_channels: int, 259 | kernel_size=3, 260 | stride=1, 261 | mid_channels=None, 262 | expand_ratio=6, 263 | act_func=("relu6", "relu6", None), 264 | norm=("bn_2d", "bn_2d", "bn_2d"), 265 | se_config: Optional[Dict] = None, 266 | ): 267 | super(SeInvertedBlock, self).__init__( 268 | in_channels=in_channels, 269 | out_channels=out_channels, 270 | kernel_size=kernel_size, 271 | stride=stride, 272 | mid_channels=mid_channels, 273 | expand_ratio=expand_ratio, 274 | act_func=act_func, 275 | norm=norm, 276 | ) 277 | se_config = se_config or { 278 | "reduction": 4, 279 | "min_dim": 16, 280 | "act_func": "relu", 281 | } 282 | self.se_layer = SELayer(self.depth_conv.out_channels, **se_config) 283 | 284 | def forward(self, x: torch.Tensor) -> torch.Tensor: 285 | x = self.inverted_conv(x) 286 | x = self.depth_conv(x) 287 | x = self.se_layer(x) 288 | x = self.point_conv(x) 289 | return x 290 | 291 | 292 | class ResidualBlock(nn.Module): 293 | def __init__(self, conv: Optional[nn.Module], shortcut: Optional[nn.Module]): 294 | super(ResidualBlock, self).__init__() 295 | self.conv = conv 296 | self.shortcut = shortcut 297 | 298 | def forward(self, x: torch.Tensor) -> torch.Tensor: 299 | if self.conv is None: 300 | return x 301 | elif self.shortcut is None: 302 | return self.conv(x) 303 | else: 304 | return self.conv(x) + self.shortcut(x) 305 | 306 | 307 | class OpSequential(nn.Module): 308 | def __init__(self, op_list: List[Optional[nn.Module]]): 309 | super(OpSequential, self).__init__() 310 | valid_op_list = [] 311 | for op in op_list: 312 | if op is not None: 313 | valid_op_list.append(op) 314 | self.op_list = nn.ModuleList(valid_op_list) 315 | 316 | def forward(self, x: torch.Tensor) -> torch.Tensor: 317 | for op in self.op_list: 318 | x = op(x) 319 | return x 320 | -------------------------------------------------------------------------------- /tinytl/tinytl_fgvc_train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import inspect 4 | import sys 5 | import numpy as np 6 | import json 7 | import random 8 | import time 9 | import torch 10 | 11 | current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 12 | parent_dir = os.path.dirname(current_dir) 13 | sys.path.insert(0, os.path.join(parent_dir, 'once-for-all')) 14 | 15 | from ofa.utils.layers import LinearLayer 16 | from ofa.model_zoo import proxylessnas_mobile 17 | from ofa.imagenet_classification.run_manager import RunManager 18 | from ofa.utils import init_models, download_url, list_mean 19 | from ofa.utils import replace_conv2d_with_my_conv2d, replace_bn_with_gn 20 | from tinytl.data_providers import FGVCRunConfig 21 | from tinytl.utils import set_module_grad_status, enable_bn_update, enable_bias_update, weight_quantization 22 | from tinytl.utils import profile_memory_cost 23 | from tinytl.model import LiteResidualModule, build_network_from_config 24 | 25 | parser = argparse.ArgumentParser() 26 | parser.add_argument('--path', type=str, default=None) 27 | parser.add_argument('--gpu', help='gpu available', default='0') 28 | parser.add_argument('--resume', action='store_true') 29 | parser.add_argument('--manual_seed', default=0, type=int) 30 | 31 | """ RunConfig: dataset related """ 32 | parser.add_argument('--dataset', type=str, default='flowers102', choices=[ 33 | 'aircraft', 'car', 'flowers102', 34 | 'food101', 'cub200', 'pets', 35 | 'cifar10', 'cifar100', 36 | ]) 37 | parser.add_argument('--train_batch_size', type=int, default=8) 38 | parser.add_argument('--test_batch_size', type=int, default=100) 39 | parser.add_argument('--valid_size', type=float, default=None) 40 | 41 | parser.add_argument('--n_worker', type=int, default=10) 42 | parser.add_argument('--resize_scale', type=float, default=0.22) 43 | parser.add_argument('--distort_color', type=str, default='tf', choices=['tf', 'torch', 'None']) 44 | parser.add_argument('--image_size', type=int, default=224) 45 | 46 | """ RunConfig: optimization related """ 47 | parser.add_argument('--n_epochs', type=int, default=50) 48 | parser.add_argument('--init_lr', type=float, default=0.05) 49 | parser.add_argument('--lr_schedule_type', type=str, default='cosine') 50 | 51 | parser.add_argument('--opt_type', type=str, default='adam', choices=['sgd', 'adam']) 52 | parser.add_argument('--momentum', type=float, default=0.9) # opt_param 53 | parser.add_argument('--no_nesterov', action='store_true') # opt_param 54 | parser.add_argument('--weight_decay', type=float, default=0) 55 | parser.add_argument('--no_decay_keys', type=str, default='bn#bias', choices=['None', 'bn', 'bn#bias', 'bias']) 56 | parser.add_argument('--label_smoothing', type=float, default=0) 57 | 58 | """ net config """ 59 | parser.add_argument('--net', type=str, default='proxyless_mobile', choices=['proxyless_mobile', 'specialized']) 60 | parser.add_argument('--dropout', type=float, default=0.2) 61 | parser.add_argument('--ws_eps', type=float, default=1e-5) 62 | parser.add_argument('--net_path', type=str, default=None) 63 | 64 | """ transfer learning configs """ 65 | parser.add_argument('--transfer_learning_method', type=str, default='tinytl-lite_residual+bias', choices=[ 66 | 'full', 'bn+last', 'last', 67 | 'tinytl-bias', 'tinytl-lite_residual', 'tinytl-lite_residual+bias' 68 | ]) 69 | 70 | """ lite residual module configs """ 71 | parser.add_argument('--lite_residual_downsample', type=int, default=2) 72 | parser.add_argument('--lite_residual_expand', type=int, default=1) 73 | parser.add_argument('--lite_residual_groups', type=int, default=2) 74 | parser.add_argument('--lite_residual_ks', type=int, default=5) 75 | parser.add_argument('--random_init_lite_residual', action='store_true') 76 | 77 | """ weight quantization """ 78 | parser.add_argument('--frozen_param_bits', type=int, default=8) 79 | 80 | 81 | if __name__ == '__main__': 82 | args = parser.parse_args() 83 | os.makedirs(args.path, exist_ok=True) 84 | json.dump(args.__dict__, open(os.path.join(args.path, 'args.txt'), 'w'), indent=4) 85 | print(args) 86 | 87 | # setup transfer learning 88 | args.enable_feature_extractor_update = False 89 | args.enable_bn_update = False 90 | args.enable_bias_update = False 91 | args.enable_lite_residual = False 92 | if args.transfer_learning_method == 'full': 93 | args.enable_feature_extractor_update = True 94 | elif args.transfer_learning_method == 'bn+last': 95 | args.enable_bn_update = True 96 | elif args.transfer_learning_method == 'last': 97 | pass 98 | elif args.transfer_learning_method == 'tinytl-bias': 99 | args.enable_bias_update = True 100 | elif args.transfer_learning_method == 'tinytl-lite_residual': 101 | args.enable_lite_residual = True 102 | elif args.transfer_learning_method == 'tinytl-lite_residual+bias': 103 | args.enable_bias_update = True 104 | args.enable_lite_residual = True 105 | else: 106 | raise ValueError('Do not support %s' % args.transfer_learning_method) 107 | 108 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu 109 | if args.resume: 110 | args.manual_seed = int(time.time()) # set new manual seed 111 | torch.manual_seed(args.manual_seed) 112 | torch.cuda.manual_seed_all(args.manual_seed) 113 | np.random.seed(args.manual_seed) 114 | random.seed(args.manual_seed) 115 | 116 | # run config 117 | if isinstance(args.valid_size, float) and args.valid_size > 1: 118 | args.valid_size = int(args.valid_size) 119 | args.no_decay_keys = None if args.no_decay_keys == 'None' else args.no_decay_keys 120 | args.opt_param = {'momentum': args.momentum, 'nesterov': not args.no_nesterov} 121 | 122 | run_config = FGVCRunConfig(**args.__dict__) 123 | print('Run config:') 124 | for k, v in run_config.config.items(): 125 | print('\t%s: %s' % (k, v)) 126 | 127 | # network 128 | classification_head = [] 129 | if args.net == 'proxyless_mobile': 130 | net = proxylessnas_mobile(pretrained=False) 131 | LiteResidualModule.insert_lite_residual( 132 | net, args.lite_residual_downsample, 'bilinear', args.lite_residual_expand, args.lite_residual_ks, 133 | 'relu', args.lite_residual_groups, 134 | ) 135 | # replace bn layers with gn layers 136 | replace_bn_with_gn(net, gn_channel_per_group=8) 137 | # load pretrained model 138 | init_file = download_url('https://hanlab.mit.edu/projects/tinyml/tinyTL/files/' 139 | 'proxylessnas_mobile+lite_residual@imagenet@ws+gn', model_dir='~/.tinytl/') 140 | net.load_state_dict(torch.load(init_file, map_location='cpu')['state_dict']) 141 | net.classifier = LinearLayer( 142 | net.classifier.in_features, run_config.data_provider.n_classes, dropout_rate=args.dropout) 143 | classification_head.append(net.classifier) 144 | init_models(classification_head) 145 | else: 146 | if args.net_path is not None: 147 | net_config_path = os.path.join(args.net_path, 'net.config') 148 | init_path = os.path.join(args.net_path, 'init') 149 | else: 150 | base_url = 'https://hanlab.mit.edu/projects/tinyml/tinyTL/files/specialized/%s/' % args.dataset 151 | net_config_path = download_url(base_url + 'net.config', 152 | model_dir='~/.tinytl/specialized/%s' % args.dataset) 153 | init_path = download_url(base_url + 'init', model_dir='~/.tinytl/specialized/%s' % args.dataset) 154 | net_config = json.load(open(net_config_path, 'r')) 155 | net = build_network_from_config(net_config) 156 | net.classifier = LinearLayer( 157 | net.classifier.in_features, run_config.data_provider.n_classes, dropout_rate=args.dropout) 158 | classification_head.append(net.classifier) 159 | 160 | # load init (weight quantization already applied) 161 | init = torch.load(init_path, map_location='cpu') 162 | if 'state_dict' in init: 163 | init = init['state_dict'] 164 | net.load_state_dict(init) 165 | 166 | # set transfer learning configs 167 | set_module_grad_status(net, args.enable_feature_extractor_update) 168 | set_module_grad_status(classification_head, True) 169 | if args.enable_bn_update: 170 | enable_bn_update(net) 171 | if args.enable_bias_update: 172 | enable_bias_update(net) 173 | if args.enable_lite_residual: 174 | for m in net.modules(): 175 | if isinstance(m, LiteResidualModule): 176 | set_module_grad_status(m.lite_residual, True) 177 | if args.enable_bias_update or args.enable_bn_update: 178 | m.lite_residual.final_bn.bias.requires_grad = False 179 | if args.random_init_lite_residual: 180 | init_models(m.lite_residual) 181 | m.lite_residual.final_bn.weight.data.zero_() 182 | 183 | # weight quantization on frozen parameters 184 | if not args.resume and args.net == 'proxyless_mobile': 185 | weight_quantization(net, bits=args.frozen_param_bits, max_iter=20) 186 | 187 | # setup weight standardization 188 | replace_conv2d_with_my_conv2d(net, args.ws_eps) 189 | 190 | # build run manager 191 | run_manager = RunManager(args.path, net, run_config, init=False) 192 | 193 | # profile memory cost 194 | require_backward = args.enable_feature_extractor_update or args.enable_bn_update or args.enable_bias_update \ 195 | or args.enable_lite_residual 196 | input_size = (1, 3, run_config.data_provider.active_img_size, run_config.data_provider.active_img_size) 197 | memory_cost, detailed_info = profile_memory_cost( 198 | net, input_size, require_backward, activation_bits=32, trainable_param_bits=32, 199 | frozen_param_bits=args.frozen_param_bits, batch_size=run_config.train_batch_size, 200 | ) 201 | net_info = { 202 | 'memory_cost': memory_cost / 1e6, 203 | 'param_size': detailed_info['param_size'] / 1e6, 204 | 'act_size': detailed_info['act_size'] / 1e6, 205 | } 206 | with open('%s/net_info.txt' % run_manager.path, 'a') as fout: 207 | fout.write(json.dumps(net_info, indent=4) + '\n') 208 | 209 | # information of parameters that will be updated via gradient 210 | run_manager.write_log('Updated params:', 'grad_params', False, 'w') 211 | for i, param_group in enumerate(run_manager.optimizer.param_groups): 212 | run_manager.write_log( 213 | 'Group %d: %d params with wd %f' % (i + 1, len(param_group['params']), param_group['weight_decay']), 214 | 'grad_params', True, 'a') 215 | for name, param in net.named_parameters(): 216 | if param.requires_grad: 217 | run_manager.write_log('%s: %s' % (name, list(param.data.size())), 'grad_params', False, 'a') 218 | 219 | run_manager.save_config() 220 | if args.resume: 221 | run_manager.load_model() 222 | else: 223 | init_path = '%s/init' % args.path 224 | if os.path.isfile(init_path): 225 | checkpoint = torch.load(init_path, map_location='cpu') 226 | if 'state_dict' in checkpoint: 227 | checkpoint = checkpoint['state_dict'] 228 | run_manager.network.load_state_dict(checkpoint) 229 | 230 | # train 231 | args.teacher_model = None 232 | run_manager.train(args) 233 | # test 234 | img_size, loss, acc1, acc5 = run_manager.validate_all_resolution(is_test=True) 235 | log = 'test_loss: %f\t test_acc1: %f\t test_acc5: %f\t' % (list_mean(loss), list_mean(acc1), list_mean(acc5)) 236 | for i_s, v_a in zip(img_size, acc1): 237 | log += '(%d, %.3f), ' % (i_s, v_a) 238 | run_manager.write_log(log, prefix='test') 239 | -------------------------------------------------------------------------------- /netaug/models/netaug/mbv2.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import torch.nn as nn 4 | 5 | from models.base.layers import OpSequential, ResidualBlock 6 | from models.base.mbv2 import MobileNetV2 7 | from models.netaug.layers import ( 8 | DynamicConvLayer, 9 | DynamicDsConvLayer, 10 | DynamicInvertedBlock, 11 | DynamicLinearLayer, 12 | ) 13 | from models.netaug.utils import aug_width, sort_channels_inner, sync_width 14 | from utils import make_divisible, torch_random_choices 15 | 16 | __all__ = ["NetAugMobileNetV2"] 17 | 18 | 19 | class NetAugMobileNetV2(MobileNetV2): 20 | def __init__( 21 | self, 22 | base_net: MobileNetV2, 23 | aug_expand_list: List[float], 24 | aug_width_mult_list: List[float], 25 | n_classes: int, 26 | dropout_rate=0.0, 27 | ): 28 | nn.Module.__init__(self) 29 | max_width_mult = max(aug_width_mult_list) 30 | 31 | # input stem 32 | base_input_stem = base_net.backbone["input_stem"] 33 | aug_input_stem = OpSequential( 34 | [ 35 | DynamicConvLayer( 36 | 3, 37 | aug_width( 38 | base_input_stem.op_list[0].out_channels, aug_width_mult_list, 1 39 | ), 40 | stride=2, 41 | act_func="relu6", 42 | ), 43 | ResidualBlock( 44 | DynamicDsConvLayer( 45 | make_divisible( 46 | base_input_stem.op_list[0].out_channels * max_width_mult, 1 47 | ), 48 | aug_width( 49 | base_input_stem.op_list[1].conv.out_channels, 50 | aug_width_mult_list, 51 | 1, 52 | ), 53 | act_func=("relu6", None), 54 | ), 55 | shortcut=None, 56 | ), 57 | ] 58 | ) 59 | 60 | # stages 61 | aug_stages = [] 62 | for base_stage in base_net.backbone["stages"]: 63 | stage = [] 64 | for base_block in base_stage.op_list: 65 | stage.append( 66 | ResidualBlock( 67 | DynamicInvertedBlock( 68 | in_channels=make_divisible( 69 | base_block.conv.in_channels * max_width_mult, 1 70 | ), 71 | out_channels=aug_width( 72 | base_block.conv.out_channels, aug_width_mult_list, 1 73 | ), 74 | kernel_size=base_block.conv.kernel_size, 75 | expand_ratio=aug_width( 76 | base_block.conv.expand_ratio, aug_expand_list 77 | ), 78 | stride=base_block.conv.stride, 79 | act_func=( 80 | base_block.conv.inverted_conv.act, 81 | base_block.conv.depth_conv.act, 82 | base_block.conv.point_conv.act, 83 | ), 84 | ), 85 | shortcut=base_block.shortcut, 86 | ) 87 | ) 88 | aug_stages.append(OpSequential(stage)) 89 | 90 | # head 91 | base_head = base_net.head 92 | aug_head = OpSequential( 93 | [ 94 | ResidualBlock( 95 | DynamicInvertedBlock( 96 | make_divisible( 97 | base_head.op_list[0].conv.in_channels * max_width_mult, 1 98 | ), 99 | aug_width( 100 | base_head.op_list[0].conv.out_channels, 101 | aug_width_mult_list, 102 | 1, 103 | ), 104 | base_head.op_list[0].conv.kernel_size, 105 | expand_ratio=aug_width( 106 | base_head.op_list[0].conv.expand_ratio, aug_expand_list 107 | ), 108 | act_func=("relu6", "relu6", None), 109 | ), 110 | shortcut=None, 111 | ), 112 | DynamicConvLayer( 113 | make_divisible( 114 | base_head.op_list[1].in_channels * max_width_mult, 1 115 | ), 116 | aug_width( 117 | base_head.op_list[1].out_channels, aug_width_mult_list, 1 118 | ), 119 | 1, 120 | act_func=base_head.op_list[1].act, 121 | ), 122 | nn.AdaptiveAvgPool2d(1), 123 | DynamicLinearLayer( 124 | make_divisible( 125 | base_head.op_list[-1].in_features * max_width_mult, 1 126 | ), 127 | n_classes, 128 | dropout_rate=dropout_rate, 129 | ), 130 | ] 131 | ) 132 | 133 | self.backbone = nn.ModuleDict( 134 | { 135 | "input_stem": aug_input_stem, 136 | "stages": nn.ModuleList(aug_stages), 137 | } 138 | ) 139 | self.head = aug_head 140 | 141 | @property 142 | def all_blocks(self): 143 | all_blocks = [] 144 | for stage in self.backbone["stages"]: 145 | for block in stage.op_list: 146 | all_blocks.append(block) 147 | all_blocks.append(self.head.op_list[0]) 148 | return all_blocks 149 | 150 | def set_active(self, mode: str, sync=False, generator=None): 151 | # input stem 152 | first_conv, first_block = self.backbone["input_stem"].op_list 153 | if mode in ["min", "min_w"]: 154 | first_conv.conv.active_out_channels = min(first_conv.out_channels_list) 155 | first_block.conv.point_conv.conv.active_out_channels = min( 156 | first_block.conv.point_conv.out_channels_list 157 | ) 158 | elif mode in ["random", "min_e"]: 159 | first_conv.conv.active_out_channels = torch_random_choices( 160 | first_conv.out_channels_list, 161 | generator, 162 | ) 163 | first_block.conv.point_conv.conv.active_out_channels = torch_random_choices( 164 | first_block.conv.point_conv.out_channels_list, 165 | generator, 166 | ) 167 | else: 168 | raise NotImplementedError 169 | if sync: 170 | first_conv.conv.active_out_channels = sync_width( 171 | first_conv.conv.active_out_channels 172 | ) 173 | first_block.conv.point_conv.conv.active_out_channels = sync_width( 174 | first_block.conv.point_conv.conv.active_out_channels 175 | ) 176 | 177 | # stages 178 | in_channels = first_block.conv.point_conv.conv.active_out_channels 179 | for block in self.all_blocks: 180 | if block.shortcut is None: 181 | if mode in ["min", "min_w"]: 182 | active_out_channels = min(block.conv.point_conv.out_channels_list) 183 | elif mode in ["random", "min_e"]: 184 | active_out_channels = torch_random_choices( 185 | block.conv.point_conv.out_channels_list, 186 | generator, 187 | ) 188 | else: 189 | raise NotImplementedError 190 | else: 191 | active_out_channels = in_channels 192 | if mode in ["min", "min_e"]: 193 | active_expand_ratio = min(block.conv.expand_ratio_list) 194 | elif mode in ["min_w", "random"]: 195 | active_expand_ratio = torch_random_choices( 196 | block.conv.expand_ratio_list, 197 | generator, 198 | ) 199 | else: 200 | raise NotImplementedError 201 | active_mid_channels = make_divisible(active_expand_ratio * in_channels, 1) 202 | if sync: 203 | active_mid_channels = sync_width(active_mid_channels) 204 | active_out_channels = sync_width(active_out_channels) 205 | 206 | block.conv.inverted_conv.conv.active_out_channels = active_mid_channels 207 | block.conv.point_conv.conv.active_out_channels = active_out_channels 208 | 209 | in_channels = active_out_channels 210 | 211 | # head 212 | final_conv = self.head.op_list[1] 213 | if mode in ["min", "min_w"]: 214 | final_conv.conv.active_out_channels = min(final_conv.out_channels_list) 215 | elif mode in ["random", "min_e"]: 216 | final_conv.conv.active_out_channels = torch_random_choices( 217 | final_conv.out_channels_list, 218 | generator, 219 | ) 220 | if sync: 221 | final_conv.conv.active_out_channels = sync_width( 222 | final_conv.conv.active_out_channels 223 | ) 224 | else: 225 | raise NotImplementedError 226 | 227 | def export(self) -> MobileNetV2: 228 | export_model = MobileNetV2.__new__(MobileNetV2) 229 | nn.Module.__init__(export_model) 230 | # input stem 231 | input_stem = OpSequential( 232 | [ 233 | self.backbone["input_stem"].op_list[0].export(), 234 | ResidualBlock( 235 | self.backbone["input_stem"].op_list[1].conv.export(), 236 | self.backbone["input_stem"].op_list[1].shortcut, 237 | ), 238 | ] 239 | ) 240 | 241 | # stages 242 | stages = [] 243 | for stage in self.backbone["stages"]: 244 | blocks = [] 245 | for block in stage.op_list: 246 | blocks.append( 247 | ResidualBlock( 248 | block.conv.export(), 249 | block.shortcut, 250 | ) 251 | ) 252 | stages.append(OpSequential(blocks)) 253 | 254 | # head 255 | head = OpSequential( 256 | [ 257 | ResidualBlock( 258 | self.head.op_list[0].conv.export(), 259 | self.head.op_list[0].shortcut, 260 | ), 261 | self.head.op_list[1].export(), 262 | self.head.op_list[2], 263 | self.head.op_list[3].export(), 264 | ] 265 | ) 266 | export_model.backbone = nn.ModuleDict( 267 | { 268 | "input_stem": input_stem, 269 | "stages": nn.ModuleList(stages), 270 | } 271 | ) 272 | export_model.head = head 273 | return export_model 274 | 275 | def sort_channels(self) -> None: 276 | for block in self.all_blocks: 277 | sort_channels_inner(block.conv) 278 | -------------------------------------------------------------------------------- /netaug/models/netaug/mbv3.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from typing import List 3 | 4 | import torch.nn as nn 5 | 6 | from models.base.layers import ( 7 | InvertedBlock, 8 | OpSequential, 9 | ResidualBlock, 10 | SeInvertedBlock, 11 | ) 12 | from models.base.mbv3 import MobileNetV3 13 | from models.netaug.layers import ( 14 | DynamicConvLayer, 15 | DynamicDsConvLayer, 16 | DynamicInvertedBlock, 17 | DynamicLinearLayer, 18 | DynamicSeInvertedBlock, 19 | ) 20 | from models.netaug.utils import aug_width, sync_width 21 | from utils import make_divisible, torch_random_choices 22 | 23 | from .mbv2 import NetAugMobileNetV2 24 | 25 | __all__ = ["NetAugMobileNetV3"] 26 | 27 | 28 | class NetAugMobileNetV3(NetAugMobileNetV2): 29 | def __init__( 30 | self, 31 | base_net: MobileNetV3, 32 | aug_expand_list: List[float], 33 | aug_width_mult_list: List[float], 34 | n_classes: int, 35 | dropout_rate=0.0, 36 | ): 37 | nn.Module.__init__(self) 38 | max_width_mult = max(aug_width_mult_list) 39 | 40 | # input stem 41 | base_input_stem = base_net.backbone["input_stem"] 42 | aug_input_stem = OpSequential( 43 | [ 44 | DynamicConvLayer( 45 | 3, 46 | aug_width( 47 | base_input_stem.op_list[0].out_channels, aug_width_mult_list, 1 48 | ), 49 | stride=2, 50 | act_func="h_swish", 51 | ), 52 | ResidualBlock( 53 | DynamicDsConvLayer( 54 | make_divisible( 55 | base_input_stem.op_list[1].conv.in_channels 56 | * max_width_mult, 57 | 1, 58 | ), 59 | aug_width( 60 | base_input_stem.op_list[1].conv.out_channels, 61 | aug_width_mult_list, 62 | 1, 63 | ), 64 | act_func=("relu", None), 65 | ), 66 | shortcut=nn.Identity(), 67 | ), 68 | ] 69 | ) 70 | 71 | # stages 72 | aug_stages = [] 73 | for base_stage in base_net.backbone["stages"]: 74 | stage = [] 75 | for base_block in base_stage.op_list: 76 | if isinstance(base_block.conv, SeInvertedBlock): 77 | se_config = { 78 | "reduction": ( 79 | base_block.conv.se_layer.in_channels 80 | / base_block.conv.se_layer.mid_channels 81 | + 1.0e-10 82 | ), 83 | "act_func": base_block.conv.se_layer.act, 84 | } 85 | dynamic_block_cls = partial( 86 | DynamicSeInvertedBlock, se_config=se_config 87 | ) 88 | elif isinstance(base_block.conv, InvertedBlock): 89 | dynamic_block_cls = DynamicInvertedBlock 90 | else: 91 | raise NotImplementedError 92 | stage.append( 93 | ResidualBlock( 94 | dynamic_block_cls( 95 | in_channels=make_divisible( 96 | base_block.conv.in_channels * max_width_mult, 1 97 | ), 98 | out_channels=aug_width( 99 | base_block.conv.out_channels, aug_width_mult_list, 1 100 | ), 101 | kernel_size=base_block.conv.kernel_size, 102 | expand_ratio=aug_width( 103 | base_block.conv.expand_ratio, aug_expand_list 104 | ), 105 | stride=base_block.conv.stride, 106 | act_func=( 107 | base_block.conv.inverted_conv.act, 108 | base_block.conv.depth_conv.act, 109 | base_block.conv.point_conv.act, 110 | ), 111 | ), 112 | shortcut=base_block.shortcut, 113 | ) 114 | ) 115 | aug_stages.append(OpSequential(stage)) 116 | 117 | # head 118 | base_head = base_net.head 119 | aug_head = OpSequential( 120 | [ 121 | DynamicConvLayer( 122 | make_divisible( 123 | base_head.op_list[0].in_channels * max_width_mult, 1 124 | ), 125 | aug_width( 126 | base_head.op_list[0].out_channels, aug_width_mult_list, 1 127 | ), 128 | 1, 129 | act_func=base_head.op_list[0].act, 130 | ), 131 | nn.AdaptiveAvgPool2d(1), 132 | DynamicConvLayer( 133 | make_divisible( 134 | base_head.op_list[2].in_channels * max_width_mult, 1 135 | ), 136 | aug_width( 137 | base_head.op_list[2].out_channels, aug_width_mult_list, 1 138 | ), 139 | 1, 140 | use_bias=True, 141 | norm=None, 142 | act_func=base_head.op_list[2].act, 143 | ), 144 | DynamicLinearLayer( 145 | make_divisible( 146 | base_head.op_list[-1].in_features * max_width_mult, 1 147 | ), 148 | n_classes, 149 | dropout_rate=dropout_rate, 150 | ), 151 | ] 152 | ) 153 | 154 | self.backbone = nn.ModuleDict( 155 | { 156 | "input_stem": aug_input_stem, 157 | "stages": nn.ModuleList(aug_stages), 158 | } 159 | ) 160 | self.head = aug_head 161 | 162 | @property 163 | def all_blocks(self): 164 | all_blocks = [] 165 | for stage in self.backbone["stages"]: 166 | for block in stage.op_list: 167 | all_blocks.append(block) 168 | return all_blocks 169 | 170 | def set_active(self, mode: str, sync=False, generator=None): 171 | # input stem 172 | first_conv, first_block = self.backbone["input_stem"].op_list 173 | if mode in ["min", "min_w"]: 174 | first_conv.conv.active_out_channels = min(first_conv.out_channels_list) 175 | first_block.conv.point_conv.conv.active_out_channels = ( 176 | first_conv.conv.active_out_channels 177 | ) 178 | elif mode in ["random", "min_e"]: 179 | first_conv.conv.active_out_channels = torch_random_choices( 180 | first_conv.out_channels_list, 181 | generator, 182 | ) 183 | first_block.conv.point_conv.conv.active_out_channels = ( 184 | first_conv.conv.active_out_channels 185 | ) 186 | else: 187 | raise NotImplementedError 188 | if sync: 189 | first_conv.conv.active_out_channels = sync_width( 190 | first_conv.conv.active_out_channels 191 | ) 192 | first_block.conv.point_conv.conv.active_out_channels = sync_width( 193 | first_block.conv.point_conv.conv.active_out_channels 194 | ) 195 | 196 | # stages 197 | in_channels = first_block.conv.point_conv.conv.active_out_channels 198 | for block in self.all_blocks: 199 | if block.shortcut is None: 200 | if mode in ["min", "min_w"]: 201 | active_out_channels = min(block.conv.point_conv.out_channels_list) 202 | elif mode in ["random", "min_e"]: 203 | active_out_channels = torch_random_choices( 204 | block.conv.point_conv.out_channels_list, 205 | generator, 206 | ) 207 | else: 208 | raise NotImplementedError 209 | else: 210 | active_out_channels = in_channels 211 | if mode in ["min", "min_e"]: 212 | active_expand_ratio = min(block.conv.expand_ratio_list) 213 | elif mode in ["min_w", "random"]: 214 | active_expand_ratio = torch_random_choices( 215 | block.conv.expand_ratio_list, 216 | generator, 217 | ) 218 | else: 219 | raise NotImplementedError 220 | active_mid_channels = make_divisible(active_expand_ratio * in_channels, 1) 221 | if sync: 222 | active_mid_channels = sync_width(active_mid_channels) 223 | active_out_channels = sync_width(active_out_channels) 224 | 225 | block.conv.inverted_conv.conv.active_out_channels = active_mid_channels 226 | block.conv.point_conv.conv.active_out_channels = active_out_channels 227 | 228 | in_channels = active_out_channels 229 | 230 | # head 231 | if mode in ["min", "min_w"]: 232 | self.head.op_list[0].conv.active_out_channels = min( 233 | self.head.op_list[0].out_channels_list 234 | ) 235 | self.head.op_list[2].conv.active_out_channels = min( 236 | self.head.op_list[2].out_channels_list 237 | ) 238 | elif mode in ["random", "min_e"]: 239 | self.head.op_list[0].conv.active_out_channels = torch_random_choices( 240 | self.head.op_list[0].out_channels_list, 241 | generator, 242 | ) 243 | self.head.op_list[2].conv.active_out_channels = torch_random_choices( 244 | self.head.op_list[2].out_channels_list, 245 | generator, 246 | ) 247 | if sync: 248 | self.head.op_list[0].conv.active_out_channels = sync_width( 249 | self.head.op_list[0].conv.active_out_channels 250 | ) 251 | self.head.op_list[2].conv.active_out_channels = sync_width( 252 | self.head.op_list[2].conv.active_out_channels 253 | ) 254 | else: 255 | raise NotImplementedError 256 | 257 | def export(self) -> MobileNetV3: 258 | export_model = MobileNetV3.__new__(MobileNetV3) 259 | nn.Module.__init__(export_model) 260 | # input stem 261 | input_stem = OpSequential( 262 | [ 263 | self.backbone["input_stem"].op_list[0].export(), 264 | ResidualBlock( 265 | self.backbone["input_stem"].op_list[1].conv.export(), 266 | self.backbone["input_stem"].op_list[1].shortcut, 267 | ), 268 | ] 269 | ) 270 | 271 | # stages 272 | stages = [] 273 | for stage in self.backbone["stages"]: 274 | blocks = [] 275 | for block in stage.op_list: 276 | blocks.append( 277 | ResidualBlock( 278 | block.conv.export(), 279 | block.shortcut, 280 | ) 281 | ) 282 | stages.append(OpSequential(blocks)) 283 | 284 | # head 285 | head = OpSequential( 286 | [ 287 | self.head.op_list[0].export(), 288 | self.head.op_list[1], 289 | self.head.op_list[2].export(), 290 | self.head.op_list[3].export(), 291 | ] 292 | ) 293 | export_model.backbone = nn.ModuleDict( 294 | { 295 | "input_stem": input_stem, 296 | "stages": nn.ModuleList(stages), 297 | } 298 | ) 299 | export_model.head = head 300 | return export_model 301 | -------------------------------------------------------------------------------- /netaug/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import copy 3 | import os 4 | import time 5 | import warnings 6 | from typing import Dict, Optional 7 | 8 | import numpy as np 9 | import torch 10 | import torch.nn as nn 11 | import yaml 12 | from torchpack import distributed as dist 13 | from tqdm import tqdm 14 | 15 | from models.netaug import reset_bn 16 | from setup import augemnt_model, build_data_loader, build_model 17 | from utils import ( 18 | AverageMeter, 19 | CosineLRwithWarmup, 20 | CrossEntropyWithLabelSmooth, 21 | DistributedMetric, 22 | accuracy, 23 | inference_macs, 24 | init_modules, 25 | load_state_dict, 26 | load_state_dict_from_file, 27 | parse_unknown_args, 28 | partial_update_config, 29 | remove_bn, 30 | trainable_param_num, 31 | ) 32 | 33 | parser = argparse.ArgumentParser() 34 | 35 | parser.add_argument("config", metavar="FILE", help="config file") 36 | parser.add_argument("--path", type=str, metavar="DIR", help="run directory") 37 | parser.add_argument( 38 | "--gpu", type=str, default=None 39 | ) # used in single machine experiments 40 | parser.add_argument("--manual_seed", type=int, default=0) 41 | parser.add_argument("--resume", action="store_true") 42 | 43 | # initialization 44 | parser.add_argument("--init_from", type=str, default=None) 45 | 46 | 47 | def eval(model: nn.Module, data_loader_dict: Dict, _reset_bn=False) -> Dict: 48 | if _reset_bn: 49 | reset_bn( 50 | model, 51 | data_loader_dict["sub_train"], 52 | sync=True, 53 | ) 54 | 55 | test_criterion = nn.CrossEntropyLoss().cuda() 56 | 57 | val_loss = DistributedMetric() 58 | val_top1 = DistributedMetric() 59 | val_top5 = DistributedMetric() 60 | 61 | model.eval() 62 | with torch.no_grad(): 63 | with tqdm( 64 | total=len(data_loader_dict["val"]), 65 | desc="Eval", 66 | disable=not dist.is_master(), 67 | ) as t: 68 | for images, labels in data_loader_dict["val"]: 69 | images, labels = images.cuda(), labels.cuda() 70 | # compute output 71 | output = model(images) 72 | loss = test_criterion(output, labels) 73 | val_loss.update(loss, images.shape[0]) 74 | acc1, acc5 = accuracy(output, labels, topk=(1, 5)) 75 | val_top5.update(acc5[0], images.shape[0]) 76 | val_top1.update(acc1[0], images.shape[0]) 77 | 78 | t.set_postfix( 79 | { 80 | "loss": val_loss.avg.item(), 81 | "top1": val_top1.avg.item(), 82 | "top5": val_top5.avg.item(), 83 | "#samples": val_top1.count.item(), 84 | "batch_size": images.shape[0], 85 | "img_size": images.shape[2], 86 | } 87 | ) 88 | t.update() 89 | 90 | val_results = { 91 | "val_top1": val_top1.avg.item(), 92 | "val_top5": val_top5.avg.item(), 93 | "val_loss": val_loss.avg.item(), 94 | } 95 | return val_results 96 | 97 | 98 | def train_one_epoch( 99 | model: nn.Module, 100 | data_provider: Dict, 101 | epoch: int, 102 | optimizer, 103 | criterion, 104 | lr_scheduler, 105 | exp_config: Dict, 106 | netaug_mode: Optional[str] = None, 107 | ) -> Dict: 108 | train_loss = DistributedMetric() 109 | train_top1 = DistributedMetric() 110 | 111 | model.train() 112 | data_provider["train"].sampler.set_epoch(epoch) 113 | 114 | data_time = AverageMeter() 115 | with tqdm( 116 | total=len(data_provider["train"]), 117 | desc="Train Epoch #{}".format(epoch + 1), 118 | disable=not dist.is_master(), 119 | ) as t: 120 | end = time.time() 121 | for _, (images, labels) in enumerate(data_provider["train"]): 122 | data_time.update(time.time() - end) 123 | images, labels = images.cuda(), labels.cuda() 124 | 125 | optimizer.zero_grad() 126 | if netaug_mode is not None: 127 | # base 128 | model.module.set_active(mode="min") 129 | with model.no_sync(): 130 | output = model(images) 131 | loss = criterion(output, labels) 132 | loss.backward() 133 | top1 = accuracy(output, labels, topk=(1,))[0][0] 134 | # aug 135 | model.module.set_active( 136 | mode="random" if netaug_mode == "default" else netaug_mode, 137 | sync=exp_config["netaug"]["sync"], 138 | generator=exp_config["generator"], 139 | ) 140 | output = model(images) 141 | aug_loss = criterion(output, labels) 142 | aug_loss.backward() 143 | else: 144 | output = model(images) 145 | loss = criterion(output, labels) 146 | loss.backward() 147 | top1 = accuracy(output, labels, topk=(1,))[0][0] 148 | optimizer.step() 149 | lr_scheduler.step() 150 | 151 | train_loss.update(loss, images.shape[0]) 152 | train_top1.update(top1, images.shape[0]) 153 | 154 | t.set_postfix( 155 | { 156 | "loss": train_loss.avg.item(), 157 | "top1": train_top1.avg.item(), 158 | "batch_size": images.shape[0], 159 | "img_size": images.shape[2], 160 | "lr": optimizer.param_groups[0]["lr"], 161 | "data_time": data_time.avg, 162 | "netaug": netaug_mode, 163 | } 164 | ) 165 | t.update() 166 | 167 | end = time.time() 168 | return { 169 | "train_top1": train_top1.avg.item(), 170 | "train_loss": train_loss.avg.item(), 171 | } 172 | 173 | 174 | def train( 175 | model: nn.Module, 176 | data_provider: Dict, 177 | exp_config: Dict, 178 | path: str, 179 | resume=False, 180 | use_netaug=False, 181 | ): 182 | # build optimizer 183 | params_without_wd = [] 184 | params_with_wd = [] 185 | for name, param in model.named_parameters(): 186 | if param.requires_grad: 187 | if np.any([key in name for key in ["bias", "norm"]]): 188 | params_without_wd.append(param) 189 | else: 190 | params_with_wd.append(param) 191 | net_params = [ 192 | {"params": params_without_wd, "weight_decay": 0}, 193 | { 194 | "params": params_with_wd, 195 | "weight_decay": exp_config["run_config"]["weight_decay"], 196 | }, 197 | ] 198 | optimizer = torch.optim.SGD( 199 | net_params, 200 | lr=exp_config["run_config"]["base_lr"] * dist.size(), 201 | momentum=0.9, 202 | nesterov=True, 203 | ) 204 | # build lr scheduler 205 | lr_scheduler = CosineLRwithWarmup( 206 | optimizer, 207 | exp_config["run_config"]["warmup_epochs"] * len(data_provider["train"]), 208 | exp_config["run_config"]["base_lr"], 209 | exp_config["run_config"]["n_epochs"] * len(data_provider["train"]), 210 | ) 211 | # train criterion 212 | train_criterion = CrossEntropyWithLabelSmooth( 213 | smooth_ratio=exp_config["run_config"]["label_smoothing"] 214 | ) 215 | # init 216 | best_val = 0.0 217 | start_epoch = 0 218 | checkpoint_path = os.path.join(path, "checkpoint") 219 | log_path = os.path.join(path, "logs") 220 | os.makedirs(checkpoint_path, exist_ok=True) 221 | os.makedirs(log_path, exist_ok=True) 222 | logs_writer = open(os.path.join(log_path, "exp.log"), "a") 223 | 224 | if resume and os.path.isfile(os.path.join(checkpoint_path, "checkpoint.pt")): 225 | checkpoint = torch.load( 226 | os.path.join(checkpoint_path, "checkpoint.pt"), map_location="cpu" 227 | ) 228 | model.module.load_state_dict(checkpoint["state_dict"]) 229 | if "best_val" in checkpoint: 230 | best_val = checkpoint["best_val"] 231 | if "epoch" in checkpoint: 232 | start_epoch = checkpoint["epoch"] + 1 233 | if "optimizer" in checkpoint: 234 | optimizer.load_state_dict(checkpoint["optimizer"]) 235 | if "lr_scheduler" in checkpoint: 236 | lr_scheduler.load_state_dict(checkpoint["lr_scheduler"]) 237 | 238 | # start training 239 | for epoch in range( 240 | start_epoch, 241 | exp_config["run_config"]["n_epochs"] 242 | + exp_config["run_config"]["warmup_epochs"], 243 | ): 244 | remaining_epochs = ( 245 | exp_config["run_config"]["n_epochs"] 246 | + exp_config["run_config"]["warmup_epochs"] 247 | - epoch 248 | ) 249 | 250 | netaug_mode = None 251 | if use_netaug: 252 | netaug_mode = "default" 253 | if remaining_epochs <= exp_config["netaug"]["stop_aug_w_epoch"]: 254 | netaug_mode = "min_w" 255 | elif remaining_epochs <= exp_config["netaug"]["stop_aug_e_epoch"]: 256 | netaug_mode = "min_e" 257 | 258 | if remaining_epochs <= exp_config["netaug"]["stop_netaug_epoch"]: 259 | netaug_mode = None 260 | # sort channel 261 | if exp_config["netaug"]["sort_channel"] and netaug_mode == "default": 262 | model.module.sort_channels() 263 | print("sort channels") 264 | if netaug_mode is None: 265 | model.module.set_active(mode="min") 266 | train_info_dict = train_one_epoch( 267 | model, 268 | data_provider, 269 | epoch, 270 | optimizer, 271 | train_criterion, 272 | lr_scheduler, 273 | exp_config, 274 | netaug_mode, 275 | ) 276 | if use_netaug: 277 | model.module.set_active(mode="min") 278 | val_info_dict = eval(model, data_provider, use_netaug) 279 | is_best = val_info_dict["val_top1"] > best_val 280 | best_val = max(best_val, val_info_dict["val_top1"]) 281 | # log 282 | epoch_log = f"[{epoch + 1 - exp_config['run_config']['warmup_epochs']}/{exp_config['run_config']['n_epochs']}]" 283 | epoch_log += f"\tval_top1={val_info_dict['val_top1']:.2f} ({best_val:.2f})" 284 | epoch_log += f"\ttrain_top1={train_info_dict['train_top1']:.2f}\tlr={optimizer.param_groups[0]['lr']:.2E}" 285 | if dist.is_master(): 286 | logs_writer.write(epoch_log + "\n") 287 | logs_writer.flush() 288 | 289 | # save checkpoint 290 | checkpoint = { 291 | "state_dict": model.module.state_dict(), 292 | "epoch": epoch, 293 | "best_val": best_val, 294 | "optimizer": optimizer.state_dict(), 295 | "lr_scheduler": lr_scheduler.state_dict(), 296 | } 297 | if dist.is_master(): 298 | torch.save( 299 | checkpoint, 300 | os.path.join(checkpoint_path, "checkpoint.pt"), 301 | _use_new_zipfile_serialization=False, 302 | ) 303 | if is_best: 304 | torch.save( 305 | checkpoint, 306 | os.path.join(checkpoint_path, "best.pt"), 307 | _use_new_zipfile_serialization=False, 308 | ) 309 | 310 | # export if use_netaug 311 | if use_netaug: 312 | checkpoint = load_state_dict_from_file(os.path.join(checkpoint_path, "best.pt")) 313 | model.module.load_state_dict(checkpoint) 314 | model.eval() 315 | model.module.set_active(mode="min") 316 | with torch.no_grad(): 317 | model.module( 318 | torch.zeros( 319 | 1, 320 | 3, 321 | exp_config["data_provider"]["image_size"], 322 | exp_config["data_provider"]["image_size"], 323 | ).cuda() 324 | ) 325 | export_model = model.module.export() 326 | if dist.is_master(): 327 | torch.save( 328 | {"state_dict": export_model.state_dict()}, 329 | os.path.join(checkpoint_path, "target.pt"), 330 | _use_new_zipfile_serialization=False, 331 | ) 332 | 333 | 334 | def main(): 335 | warnings.filterwarnings("ignore") 336 | # parse args 337 | args, opt = parser.parse_known_args() 338 | opt = parse_unknown_args(opt) 339 | 340 | # setup gpu and distributed training 341 | if args.gpu is not None: 342 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu 343 | if not torch.distributed.is_initialized(): 344 | dist.init() 345 | torch.backends.cudnn.benchmark = True 346 | torch.cuda.set_device(dist.local_rank()) 347 | 348 | # setup path 349 | os.makedirs(args.path, exist_ok=True) 350 | 351 | # setup random seed 352 | if args.resume: 353 | args.manual_seed = int(time.time()) 354 | torch.manual_seed(args.manual_seed) 355 | torch.cuda.manual_seed_all(args.manual_seed) 356 | 357 | # load config 358 | exp_config = yaml.safe_load(open(args.config, "r")) 359 | partial_update_config(exp_config, opt) 360 | # save config to run directory 361 | yaml.dump( 362 | exp_config, open(os.path.join(args.path, "config.yaml"), "w"), sort_keys=False 363 | ) 364 | 365 | # build data_loader 366 | image_size = exp_config["data_provider"]["image_size"] 367 | data_provider, n_classes = build_data_loader( 368 | exp_config["data_provider"]["dataset"], 369 | image_size, 370 | exp_config["data_provider"]["base_batch_size"], 371 | exp_config["data_provider"]["n_worker"], 372 | exp_config["data_provider"]["data_path"], 373 | dist.size(), 374 | dist.rank(), 375 | ) 376 | 377 | # build model 378 | model = build_model( 379 | exp_config["model"]["name"], 380 | n_classes, 381 | exp_config["model"]["dropout_rate"], 382 | ) 383 | print(model) 384 | 385 | # netaug 386 | if exp_config.get("netaug", None) is not None: 387 | use_netaug = True 388 | model = augemnt_model( 389 | model, exp_config["netaug"], n_classes, exp_config["model"]["dropout_rate"] 390 | ) 391 | model.set_active(mode="min") 392 | else: 393 | use_netaug = False 394 | 395 | # load init 396 | if args.init_from is not None: 397 | init = load_state_dict_from_file(args.init_from) 398 | load_state_dict(model, init, strict=False) 399 | print("Loaded init from %s" % args.init_from) 400 | else: 401 | init_modules(model, init_type=exp_config["run_config"]["init_type"]) 402 | print("Random Init") 403 | 404 | # profile 405 | profile_model = copy.deepcopy(model) 406 | # during inference, bn will be fused into conv 407 | remove_bn(profile_model) 408 | print(f"Params: {trainable_param_num(profile_model)}M") 409 | print( 410 | f"MACs: {inference_macs(profile_model, data_shape=(1, 3, image_size, image_size))}M" 411 | ) 412 | 413 | # train 414 | exp_config["generator"] = torch.Generator() 415 | exp_config["generator"].manual_seed(args.manual_seed) 416 | model = nn.parallel.DistributedDataParallel( 417 | model.cuda(), device_ids=[dist.local_rank()] 418 | ) 419 | train(model, data_provider, exp_config, args.path, args.resume, use_netaug) 420 | 421 | 422 | if __name__ == "__main__": 423 | main() 424 | --------------------------------------------------------------------------------