├── SmartPAF.pdf ├── .gitignore ├── image ├── RelatedWork.png └── secure_ML_inference.png ├── expriments ├── convergence_curve.pdf └── fig9 │ └── PR_AT_2f12g1.log ├── src ├── PyTorch_CIFAR10 │ ├── download_weights.sh │ ├── LICENSE │ ├── module.py │ ├── data.py │ ├── train.py │ ├── README.md │ ├── cifar10_models │ │ ├── resnet_orig.py │ │ ├── mobilenetv2.py │ │ ├── vgg.py │ │ ├── densenet.py │ │ ├── googlenet.py │ │ ├── resnet.py │ │ └── inception.py │ └── schduler.py ├── global_config │ └── global_config.yaml ├── lib.py ├── pretrained_model.py ├── SS.py ├── CT_AESPA.py ├── CT_sign_SiLU.py ├── CT_cvnet_sign_SiLU.py ├── mobilevit_v2.py ├── resnet_model_2.py ├── CT_cvnet.py ├── CT_cvnet_bn.py ├── CT.py ├── custom_module.py └── AESPA_Baseline.py ├── log ├── CT_val_o.log ├── PA_CT_AT_o7.log ├── CT_val_c7.log └── CT_val_o7.log ├── LICENSE └── README.md /SmartPAF.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EfficientPPML/SmartPAF/HEAD/SmartPAF.pdf -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | PyTorch_CIFAR10 2 | *.__pycache__ 3 | src/PyTorch_CIFAR10/cifar10_models/state_dicts -------------------------------------------------------------------------------- /image/RelatedWork.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EfficientPPML/SmartPAF/HEAD/image/RelatedWork.png -------------------------------------------------------------------------------- /image/secure_ML_inference.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EfficientPPML/SmartPAF/HEAD/image/secure_ML_inference.png -------------------------------------------------------------------------------- /expriments/convergence_curve.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EfficientPPML/SmartPAF/HEAD/expriments/convergence_curve.pdf -------------------------------------------------------------------------------- /src/PyTorch_CIFAR10/download_weights.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python3 train.py --download_weights 1 3 | rm -rf state_dicts.zip -------------------------------------------------------------------------------- /src/global_config/global_config.yaml: -------------------------------------------------------------------------------- 1 | PA_AT: 2 | dropout_enable: False 3 | group_epochs: 20 4 | Global: 5 | dataset_dirctory: "/usr/scratch/jianming/PAF_test/dataset/" -------------------------------------------------------------------------------- /log/CT_val_o.log: -------------------------------------------------------------------------------- 1 | Namespace(model='mobileVitV2', dataset='imagenet_1k', sign_type='polyfit', data_collection=False, working_directory='/home/jianming/work/SmartPAF/cvnet_work/') 2 | 2024-01-22 08:37:17 - DEBUG  - Cannot load internal arguments, skipping. 3 | -------------------------------------------------------------------------------- /log/PA_CT_AT_o7.log: -------------------------------------------------------------------------------- 1 | Namespace(model='mobileVitV2', dataset='imagenet_1k', sign_type='polyfit', working_directory='/home/jianming/work/SmartPAF/cvnet_work2/', start_layer_name='None', max_counter=1000, learning_rate=0.0001) 2 | 2024-01-22 18:02:28 - DEBUG  - Cannot load internal arguments, skipping. 3 | 4 | 5 | -------------------------------------------------------------------------------- /src/lib.py: -------------------------------------------------------------------------------- 1 | import os 2 | import csv 3 | import time 4 | import numpy as np 5 | import urllib 6 | import json 7 | import math 8 | import copy 9 | import random 10 | import sys 11 | 12 | from typing import Any, Dict, Tuple, Union 13 | from functools import partial 14 | from argparse import ArgumentParser 15 | 16 | import torch 17 | import torch.nn as nn 18 | import torch.nn.functional as F 19 | import torch.backends.cudnn as cudnn 20 | 21 | from torch.optim import Optimizer 22 | from torch.utils.data import DataLoader, Subset 23 | from torch.optim.lr_scheduler import ReduceLROnPlateau 24 | from torch.optim.swa_utils import AveragedModel, SWALR 25 | from torch import Tensor 26 | from torch.quantization import QuantStub, DeQuantStub 27 | from torch.nn.quantized import functional as qF 28 | 29 | from torchvision.transforms import transforms, ToTensor 30 | from torchvision import datasets 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /src/PyTorch_CIFAR10/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Huy Phan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Georgia Institute of Technology 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/pretrained_model.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import PyTorch_CIFAR10.cifar10_models.vgg as cifar10_vgg 3 | import resnet_model_1 4 | import resnet_model_2 5 | from mobilevit_v2 import MobileViTv2 6 | from options.opts import get_training_arguments 7 | 8 | import torchvision.models 9 | 10 | def get_pretrained_model(model_name, dataset): 11 | if(model_name == "vgg19_bn" and dataset == "cifar10"): 12 | return cifar10_vgg.vgg19_bn(pretrained = True) 13 | elif(model_name == "vgg19_bn" and dataset == "imagenet_1k"): 14 | return torchvision.models.vgg19_bn(weights="IMAGENET1K_V1") 15 | elif(model_name == "resnet18" and dataset == "imagenet_1k"): 16 | return resnet_model_1.resnet18_fp(pretrained= True) 17 | elif(model_name == "resnet32" and dataset == "cifar100"): 18 | return resnet_model_2.cifar100_resnet32(pretrained= True) 19 | elif(model_name == "resnet20" and dataset == "cifar10"): 20 | return resnet_model_2.cifar10_resnet20(pretrained = True) 21 | elif(model_name == "mobileVitV2" and dataset == "imagenet_1k"): 22 | args_list = ['--common.config-file', '/home/jianming/work/Fast_Switch/NN_Model/ml-cvnets/config/classification/imagenet/mobilevit_v2.yaml', '--common.results-loc', 'mobilevitv2_results/width_0_5_0', '--model.classification.pretrained', '/home/jianming/work/Fast_Switch/NN_Model/ml-cvnets/mobilevitv2_results/width_0_5_0/mobilevitv2-0.5.pt', '--common.override-kwargs', 'model.classification.mitv2.width_multiplier=0.5'] 23 | opts = get_training_arguments(parse_args=True, args=args_list) 24 | model = MobileViTv2(opts) 25 | return model 26 | else: 27 | raise Exception("model name or dataset error") -------------------------------------------------------------------------------- /src/PyTorch_CIFAR10/module.py: -------------------------------------------------------------------------------- 1 | import pytorch_lightning as pl 2 | import torch 3 | from torchmetrics import Accuracy 4 | 5 | from cifar10_models.densenet import densenet121, densenet161, densenet169 6 | from cifar10_models.googlenet import googlenet 7 | from cifar10_models.inception import inception_v3 8 | from cifar10_models.mobilenetv2 import mobilenet_v2 9 | from cifar10_models.resnet import resnet18, resnet34, resnet50 10 | from cifar10_models.vgg import vgg11_bn, vgg13_bn, vgg16_bn, vgg19_bn 11 | from schduler import WarmupCosineLR 12 | 13 | all_classifiers = { 14 | "vgg11_bn": vgg11_bn(), 15 | "vgg13_bn": vgg13_bn(), 16 | "vgg16_bn": vgg16_bn(), 17 | "vgg19_bn": vgg19_bn(), 18 | "resnet18": resnet18(), 19 | "resnet34": resnet34(), 20 | "resnet50": resnet50(), 21 | "densenet121": densenet121(), 22 | "densenet161": densenet161(), 23 | "densenet169": densenet169(), 24 | "mobilenet_v2": mobilenet_v2(), 25 | "googlenet": googlenet(), 26 | "inception_v3": inception_v3(), 27 | } 28 | 29 | 30 | class CIFAR10Module(pl.LightningModule): 31 | def __init__(self, hparams): 32 | super().__init__() 33 | self.hparams = hparams 34 | 35 | self.criterion = torch.nn.CrossEntropyLoss() 36 | self.accuracy = Accuracy() 37 | 38 | self.model = all_classifiers[self.hparams.classifier] 39 | 40 | def forward(self, batch): 41 | images, labels = batch 42 | predictions = self.model(images) 43 | loss = self.criterion(predictions, labels) 44 | accuracy = self.accuracy(predictions, labels) 45 | return loss, accuracy * 100 46 | 47 | def training_step(self, batch, batch_nb): 48 | loss, accuracy = self.forward(batch) 49 | self.log("loss/train", loss) 50 | self.log("acc/train", accuracy) 51 | return loss 52 | 53 | def validation_step(self, batch, batch_nb): 54 | loss, accuracy = self.forward(batch) 55 | self.log("loss/val", loss) 56 | self.log("acc/val", accuracy) 57 | 58 | def test_step(self, batch, batch_nb): 59 | loss, accuracy = self.forward(batch) 60 | self.log("acc/test", accuracy) 61 | 62 | def configure_optimizers(self): 63 | optimizer = torch.optim.SGD( 64 | self.model.parameters(), 65 | lr=self.hparams.learning_rate, 66 | weight_decay=self.hparams.weight_decay, 67 | momentum=0.9, 68 | nesterov=True, 69 | ) 70 | total_steps = self.hparams.max_epochs * len(self.train_dataloader()) 71 | scheduler = { 72 | "scheduler": WarmupCosineLR( 73 | optimizer, warmup_epochs=total_steps * 0.3, max_epochs=total_steps 74 | ), 75 | "interval": "step", 76 | "name": "learning_rate", 77 | } 78 | return [optimizer], [scheduler] 79 | -------------------------------------------------------------------------------- /src/PyTorch_CIFAR10/data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import zipfile 3 | 4 | import pytorch_lightning as pl 5 | import requests 6 | from torch.utils.data import DataLoader 7 | from torchvision import transforms as T 8 | from torchvision.datasets import CIFAR10 9 | from tqdm import tqdm 10 | 11 | 12 | class CIFAR10Data(pl.LightningDataModule): 13 | def __init__(self, args): 14 | super().__init__() 15 | self.hparams = args 16 | self.mean = (0.4914, 0.4822, 0.4465) 17 | self.std = (0.2471, 0.2435, 0.2616) 18 | 19 | def download_weights(): 20 | url = ( 21 | "https://rutgers.box.com/shared/static/gkw08ecs797j2et1ksmbg1w5t3idf5r5.zip" 22 | ) 23 | 24 | # Streaming, so we can iterate over the response. 25 | r = requests.get(url, stream=True) 26 | 27 | # Total size in Mebibyte 28 | total_size = int(r.headers.get("content-length", 0)) 29 | block_size = 2 ** 20 # Mebibyte 30 | t = tqdm(total=total_size, unit="MiB", unit_scale=True) 31 | 32 | with open("state_dicts.zip", "wb") as f: 33 | for data in r.iter_content(block_size): 34 | t.update(len(data)) 35 | f.write(data) 36 | t.close() 37 | 38 | if total_size != 0 and t.n != total_size: 39 | raise Exception("Error, something went wrong") 40 | 41 | print("Download successful. Unzipping file...") 42 | path_to_zip_file = os.path.join(os.getcwd(), "state_dicts.zip") 43 | directory_to_extract_to = os.path.join(os.getcwd(), "cifar10_models") 44 | with zipfile.ZipFile(path_to_zip_file, "r") as zip_ref: 45 | zip_ref.extractall(directory_to_extract_to) 46 | print("Unzip file successful!") 47 | 48 | def train_dataloader(self): 49 | transform = T.Compose( 50 | [ 51 | T.RandomCrop(32, padding=4), 52 | T.RandomHorizontalFlip(), 53 | T.ToTensor(), 54 | T.Normalize(self.mean, self.std), 55 | ] 56 | ) 57 | dataset = CIFAR10(root=self.hparams.data_dir, train=True, transform=transform) 58 | dataloader = DataLoader( 59 | dataset, 60 | batch_size=self.hparams.batch_size, 61 | num_workers=self.hparams.num_workers, 62 | shuffle=True, 63 | drop_last=True, 64 | pin_memory=True, 65 | ) 66 | return dataloader 67 | 68 | def val_dataloader(self): 69 | transform = T.Compose( 70 | [ 71 | T.ToTensor(), 72 | T.Normalize(self.mean, self.std), 73 | ] 74 | ) 75 | dataset = CIFAR10(root=self.hparams.data_dir, train=False, transform=transform) 76 | dataloader = DataLoader( 77 | dataset, 78 | batch_size=self.hparams.batch_size, 79 | num_workers=self.hparams.num_workers, 80 | drop_last=True, 81 | pin_memory=True, 82 | ) 83 | return dataloader 84 | 85 | def test_dataloader(self): 86 | return self.val_dataloader() 87 | -------------------------------------------------------------------------------- /src/PyTorch_CIFAR10/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | from argparse import ArgumentParser 3 | 4 | import torch 5 | from pytorch_lightning import Trainer, seed_everything 6 | from pytorch_lightning.callbacks import ModelCheckpoint 7 | from pytorch_lightning.loggers import WandbLogger, TensorBoardLogger 8 | 9 | from data import CIFAR10Data 10 | from module import CIFAR10Module 11 | 12 | 13 | def main(args): 14 | 15 | if bool(args.download_weights): 16 | CIFAR10Data.download_weights() 17 | else: 18 | seed_everything(0) 19 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id 20 | 21 | if args.logger == "wandb": 22 | logger = WandbLogger(name=args.classifier, project="cifar10") 23 | elif args.logger == "tensorboard": 24 | logger = TensorBoardLogger("cifar10", name=args.classifier) 25 | 26 | checkpoint = ModelCheckpoint(monitor="acc/val", mode="max", save_last=False) 27 | 28 | trainer = Trainer( 29 | fast_dev_run=bool(args.dev), 30 | logger=logger if not bool(args.dev + args.test_phase) else None, 31 | gpus=-1, 32 | deterministic=True, 33 | weights_summary=None, 34 | log_every_n_steps=1, 35 | max_epochs=args.max_epochs, 36 | checkpoint_callback=checkpoint, 37 | precision=args.precision, 38 | ) 39 | 40 | model = CIFAR10Module(args) 41 | data = CIFAR10Data(args) 42 | 43 | if bool(args.pretrained): 44 | state_dict = os.path.join( 45 | "cifar10_models", "state_dicts", args.classifier + ".pt" 46 | ) 47 | model.model.load_state_dict(torch.load(state_dict)) 48 | 49 | if bool(args.test_phase): 50 | trainer.test(model, data.test_dataloader()) 51 | else: 52 | trainer.fit(model, data) 53 | trainer.test() 54 | 55 | 56 | if __name__ == "__main__": 57 | parser = ArgumentParser() 58 | 59 | # PROGRAM level args 60 | parser.add_argument("--data_dir", type=str, default="/data/huy/cifar10") 61 | parser.add_argument("--download_weights", type=int, default=0, choices=[0, 1]) 62 | parser.add_argument("--test_phase", type=int, default=0, choices=[0, 1]) 63 | parser.add_argument("--dev", type=int, default=0, choices=[0, 1]) 64 | parser.add_argument( 65 | "--logger", type=str, default="tensorboard", choices=["tensorboard", "wandb"] 66 | ) 67 | 68 | # TRAINER args 69 | parser.add_argument("--classifier", type=str, default="resnet18") 70 | parser.add_argument("--pretrained", type=int, default=0, choices=[0, 1]) 71 | 72 | parser.add_argument("--precision", type=int, default=32, choices=[16, 32]) 73 | parser.add_argument("--batch_size", type=int, default=256) 74 | parser.add_argument("--max_epochs", type=int, default=100) 75 | parser.add_argument("--num_workers", type=int, default=8) 76 | parser.add_argument("--gpu_id", type=str, default="3") 77 | 78 | parser.add_argument("--learning_rate", type=float, default=1e-2) 79 | parser.add_argument("--weight_decay", type=float, default=1e-2) 80 | 81 | args = parser.parse_args() 82 | main(args) 83 | -------------------------------------------------------------------------------- /log/CT_val_c7.log: -------------------------------------------------------------------------------- 1 | Namespace(model='mobileVitV2', dataset='imagenet_1k', sign_type='polyfit', data_collection=False, working_directory='/home/jianming/work/SmartPAF/cvnet_work/') 2 | 2024-01-22 08:23:44 - DEBUG  - Cannot load internal arguments, skipping. 3 | Results: loss=1.37407, top1=70.7000, top5=88.8000 4 | 4 5 | name: conv_1.block.act 6 | Swish() 7 | Results: loss=8.77412, top1=0.0000, top5=0.7000 8 | 4 9 | name: layer_1.0.block.exp_1x1.block.act 10 | Swish() 11 | Results: loss=nan, top1=0.1000, top5=0.4000 12 | 4 13 | name: layer_1.0.block.conv_3x3.block.act 14 | Swish() 15 | Results: loss=nan, top1=0.1000, top5=0.5000 16 | 4 17 | name: layer_2.0.block.exp_1x1.block.act 18 | Swish() 19 | Results: loss=nan, top1=0.1000, top5=0.5000 20 | 4 21 | name: layer_2.0.block.conv_3x3.block.act 22 | Swish() 23 | Results: loss=nan, top1=0.1000, top5=0.5000 24 | 4 25 | name: layer_2.1.block.exp_1x1.block.act 26 | Swish() 27 | Results: loss=nan, top1=0.1000, top5=0.5000 28 | 4 29 | name: layer_2.1.block.conv_3x3.block.act 30 | Swish() 31 | Results: loss=nan, top1=0.1000, top5=0.5000 32 | 4 33 | name: layer_3.0.block.exp_1x1.block.act 34 | Swish() 35 | Results: loss=nan, top1=0.1000, top5=0.5000 36 | 4 37 | name: layer_3.0.block.conv_3x3.block.act 38 | Swish() 39 | Results: loss=nan, top1=0.1000, top5=0.5000 40 | 4 41 | name: layer_3.1.local_rep.0.block.act 42 | Swish() 43 | Results: loss=nan, top1=0.1000, top5=0.5000 44 | 4 45 | name: layer_3.1.global_rep.0.pre_norm_ffn.1.block.act 46 | Swish() 47 | Results: loss=nan, top1=0.1000, top5=0.5000 48 | 4 49 | name: layer_3.1.global_rep.1.pre_norm_ffn.1.block.act 50 | Swish() 51 | Results: loss=nan, top1=0.1000, top5=0.5000 52 | 4 53 | name: layer_4.0.block.exp_1x1.block.act 54 | Swish() 55 | Results: loss=nan, top1=0.1000, top5=0.5000 56 | 4 57 | name: layer_4.0.block.conv_3x3.block.act 58 | Swish() 59 | Results: loss=nan, top1=0.1000, top5=0.5000 60 | 4 61 | name: layer_4.1.local_rep.0.block.act 62 | Swish() 63 | Results: loss=nan, top1=0.1000, top5=0.5000 64 | 4 65 | name: layer_4.1.global_rep.0.pre_norm_ffn.1.block.act 66 | Swish() 67 | Results: loss=nan, top1=0.1000, top5=0.5000 68 | 4 69 | name: layer_4.1.global_rep.1.pre_norm_ffn.1.block.act 70 | Swish() 71 | Results: loss=nan, top1=0.1000, top5=0.5000 72 | 4 73 | name: layer_4.1.global_rep.2.pre_norm_ffn.1.block.act 74 | Swish() 75 | Results: loss=nan, top1=0.1000, top5=0.5000 76 | 4 77 | name: layer_4.1.global_rep.3.pre_norm_ffn.1.block.act 78 | Swish() 79 | Results: loss=nan, top1=0.1000, top5=0.5000 80 | 4 81 | name: layer_5.0.block.exp_1x1.block.act 82 | Swish() 83 | Results: loss=nan, top1=0.1000, top5=0.5000 84 | 4 85 | name: layer_5.0.block.conv_3x3.block.act 86 | Swish() 87 | Results: loss=nan, top1=0.1000, top5=0.5000 88 | 4 89 | name: layer_5.1.local_rep.0.block.act 90 | Swish() 91 | Results: loss=nan, top1=0.1000, top5=0.5000 92 | 4 93 | name: layer_5.1.global_rep.0.pre_norm_ffn.1.block.act 94 | Swish() 95 | Results: loss=nan, top1=0.1000, top5=0.5000 96 | 4 97 | name: layer_5.1.global_rep.1.pre_norm_ffn.1.block.act 98 | Swish() 99 | Results: loss=nan, top1=0.1000, top5=0.5000 100 | 4 101 | name: layer_5.1.global_rep.2.pre_norm_ffn.1.block.act 102 | Swish() 103 | Results: loss=nan, top1=0.1000, top5=0.5000 104 | -------------------------------------------------------------------------------- /log/CT_val_o7.log: -------------------------------------------------------------------------------- 1 | Namespace(model='mobileVitV2', dataset='imagenet_1k', sign_type='polyfit', data_collection=False, working_directory='/home/jianming/work/SmartPAF/cvnet_work/') 2 | 2024-01-22 08:10:28 - DEBUG  - Cannot load internal arguments, skipping. 3 | Results: loss=1.37407, top1=70.7000, top5=88.8000 4 | 4 5 | name: conv_1.block.act 6 | Swish() 7 | Results: loss=1.74018, top1=61.0000, top5=85.2000 8 | 4 9 | name: layer_1.0.block.exp_1x1.block.act 10 | Swish() 11 | Results: loss=4.94935, top1=13.3000, top5=30.4000 12 | 4 13 | name: layer_1.0.block.conv_3x3.block.act 14 | Swish() 15 | Results: loss=6.86361, top1=2.5000, top5=7.6000 16 | 4 17 | name: layer_2.0.block.exp_1x1.block.act 18 | Swish() 19 | Results: loss=7.63561, top1=0.8000, top5=3.0000 20 | 4 21 | name: layer_2.0.block.conv_3x3.block.act 22 | Swish() 23 | Results: loss=nan, top1=0.1000, top5=1.2000 24 | 4 25 | name: layer_2.1.block.exp_1x1.block.act 26 | Swish() 27 | Results: loss=nan, top1=0.2000, top5=1.0000 28 | 4 29 | name: layer_2.1.block.conv_3x3.block.act 30 | Swish() 31 | Results: loss=nan, top1=0.1000, top5=1.2000 32 | 4 33 | name: layer_3.0.block.exp_1x1.block.act 34 | Swish() 35 | Results: loss=nan, top1=0.1000, top5=0.7000 36 | 4 37 | name: layer_3.0.block.conv_3x3.block.act 38 | Swish() 39 | Results: loss=nan, top1=0.1000, top5=0.7000 40 | 4 41 | name: layer_3.1.local_rep.0.block.act 42 | Swish() 43 | Results: loss=nan, top1=0.1000, top5=0.7000 44 | 4 45 | name: layer_3.1.global_rep.0.pre_norm_ffn.1.block.act 46 | Swish() 47 | Results: loss=nan, top1=0.2000, top5=0.8000 48 | 4 49 | name: layer_3.1.global_rep.1.pre_norm_ffn.1.block.act 50 | Swish() 51 | Results: loss=nan, top1=0.2000, top5=0.5000 52 | 4 53 | name: layer_4.0.block.exp_1x1.block.act 54 | Swish() 55 | Results: loss=nan, top1=0.1000, top5=0.6000 56 | 4 57 | name: layer_4.0.block.conv_3x3.block.act 58 | Swish() 59 | Results: loss=nan, top1=0.2000, top5=0.7000 60 | 4 61 | name: layer_4.1.local_rep.0.block.act 62 | Swish() 63 | Results: loss=nan, top1=0.2000, top5=0.7000 64 | 4 65 | name: layer_4.1.global_rep.0.pre_norm_ffn.1.block.act 66 | Swish() 67 | Results: loss=nan, top1=0.2000, top5=0.5000 68 | 4 69 | name: layer_4.1.global_rep.1.pre_norm_ffn.1.block.act 70 | Swish() 71 | Results: loss=nan, top1=0.1000, top5=0.5000 72 | 4 73 | name: layer_4.1.global_rep.2.pre_norm_ffn.1.block.act 74 | Swish() 75 | Results: loss=nan, top1=0.1000, top5=0.6000 76 | 4 77 | name: layer_4.1.global_rep.3.pre_norm_ffn.1.block.act 78 | Swish() 79 | Results: loss=nan, top1=0.1000, top5=0.5000 80 | 4 81 | name: layer_5.0.block.exp_1x1.block.act 82 | Swish() 83 | Results: loss=nan, top1=0.2000, top5=0.6000 84 | 4 85 | name: layer_5.0.block.conv_3x3.block.act 86 | Swish() 87 | Results: loss=nan, top1=0.2000, top5=0.6000 88 | 4 89 | name: layer_5.1.local_rep.0.block.act 90 | Swish() 91 | Results: loss=nan, top1=0.2000, top5=0.7000 92 | 4 93 | name: layer_5.1.global_rep.0.pre_norm_ffn.1.block.act 94 | Swish() 95 | Results: loss=nan, top1=0.1000, top5=0.4000 96 | 4 97 | name: layer_5.1.global_rep.1.pre_norm_ffn.1.block.act 98 | Swish() 99 | Results: loss=nan, top1=0.2000, top5=0.6000 100 | 4 101 | name: layer_5.1.global_rep.2.pre_norm_ffn.1.block.act 102 | Swish() 103 | Results: loss=nan, top1=0.2000, top5=0.7000 104 | -------------------------------------------------------------------------------- /src/PyTorch_CIFAR10/README.md: -------------------------------------------------------------------------------- 1 | # PyTorch models trained on CIFAR-10 dataset 2 | - I modified [TorchVision](https://pytorch.org/docs/stable/torchvision/models.html) official implementation of popular CNN models, and trained those on CIFAR-10 dataset. 3 | - I changed *number of class, filter size, stride, and padding* in the the original code so that it works with CIFAR-10. 4 | - I also share the **weights** of these models, so you can just load the weights and use them. 5 | - The code is highly re-producible and readable by using PyTorch-Lightning. 6 | 7 | ## Statistics of supported models 8 | | No. | Model | Val. Acc. | No. Params | Size | 9 | |:---:|:-------------|----------:|-----------:|-------:| 10 | | 1 | vgg11_bn | 92.39% | 28.150 M | 108 MB | 11 | | 2 | vgg13_bn | 94.22% | 28.334 M | 109 MB | 12 | | 3 | vgg16_bn | 94.00% | 33.647 M | 129 MB | 13 | | 4 | vgg19_bn | 93.95% | 38.959 M | 149 MB | 14 | | 5 | resnet18 | 93.07% | 11.174 M | 43 MB | 15 | | 6 | resnet34 | 93.34% | 21.282 M | 82 MB | 16 | | 7 | resnet50 | 93.65% | 23.521 M | 91 MB | 17 | | 8 | densenet121 | 94.06% | 6.956 M | 28 MB | 18 | | 9 | densenet161 | 94.07% | 26.483 M | 103 MB | 19 | | 10 | densenet169 | 94.05% | 12.493 M | 49 MB | 20 | | 11 | mobilenet_v2 | 93.91% | 2.237 M | 9 MB | 21 | | 12 | googlenet | 92.85% | 5.491 M | 22 MB | 22 | | 13 | inception_v3 | 93.74% | 21.640 M | 83 MB | 23 | 24 | ## Details Report & Run Logs 25 | Weight and Biases' details report for this project [WandB Report](https://wandb.ai/huyvnphan/cifar10/reports/CIFAR10-Classification-using-PyTorch---VmlldzozOTg0ODQ?accessToken=9m2q1ajhppuziprsq9tlryynvmqbkrbvjdoktrz7o6gtqilmtqbv2r9jjrtb2tqq) 26 | 27 | Weight and Biases' run logs for this project [WandB Run Log](https://wandb.ai/huyvnphan/cifar10). You can see each run hyper-parameters, training accuracy, validation accuracy, loss, time taken. 28 | 29 | ## How To Cite 30 | [![DOI](https://zenodo.org/badge/195914773.svg)](https://zenodo.org/badge/latestdoi/195914773) 31 | 32 | ## How to use pretrained models 33 | 34 | **Automatically download and extract the weights from Box (933 MB)** 35 | ```python 36 | python train.py --download_weights 1 37 | ``` 38 | Or use [Google Drive](https://drive.google.com/file/d/17fmN8eQdLpq2jIMQ_X0IXDPXfI9oVWgq/view?usp=sharing) backup link (you have to download and extract manually) 39 | 40 | **Load model and run** 41 | ```python 42 | from cifar10_models.vgg import vgg11_bn, vgg13_bn, vgg16_bn, vgg19_bn 43 | 44 | # Untrained model 45 | my_model = vgg11_bn() 46 | 47 | # Pretrained model 48 | my_model = vgg11_bn(pretrained=True) 49 | my_model.eval() # for evaluation 50 | ``` 51 | 52 | If you use your own images, all models expect data to be in range [0, 1] then normalized by 53 | ```python 54 | mean = [0.4914, 0.4822, 0.4465] 55 | std = [0.2471, 0.2435, 0.2616] 56 | ``` 57 | 58 | ## How to train models from scratch 59 | Check the `train.py` to see all available hyper-parameter choices. 60 | To reproduce the same accuracy use the default hyper-parameters 61 | 62 | `python train.py --classifier resnet18` 63 | 64 | ## How to test pretrained models 65 | `python train.py --test_phase 1 --pretrained 1 --classifier resnet18` 66 | 67 | Output 68 | 69 | `{'acc/test': tensor(93.0689, device='cuda:0')}` 70 | 71 | 72 | ## Requirements 73 | **Just to use pretrained models** 74 | - pytorch = 1.7.0 75 | 76 | **To train & test** 77 | - pytorch = 1.7.0 78 | - torchvision = 0.7.0 79 | - tensorboard = 2.2.1 80 | - pytorch-lightning = 1.1.0 81 | -------------------------------------------------------------------------------- /src/PyTorch_CIFAR10/cifar10_models/resnet_orig.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import os 5 | 6 | # Credit to https://github.com/akamaster/pytorch_resnet_cifar10 7 | 8 | __all__ = ["resnet_orig"] 9 | 10 | 11 | class LambdaLayer(nn.Module): 12 | def __init__(self, lambd): 13 | super(LambdaLayer, self).__init__() 14 | self.lambd = lambd 15 | 16 | def forward(self, x): 17 | return self.lambd(x) 18 | 19 | 20 | class BasicBlock(nn.Module): 21 | expansion = 1 22 | 23 | def __init__(self, in_planes, planes, stride=1, option="A"): 24 | super(BasicBlock, self).__init__() 25 | self.conv1 = nn.Conv2d( 26 | in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False 27 | ) 28 | self.bn1 = nn.BatchNorm2d(planes) 29 | self.conv2 = nn.Conv2d( 30 | planes, planes, kernel_size=3, stride=1, padding=1, bias=False 31 | ) 32 | self.bn2 = nn.BatchNorm2d(planes) 33 | 34 | self.shortcut = nn.Sequential() 35 | if stride != 1 or in_planes != planes: 36 | if option == "A": 37 | """ 38 | For CIFAR10 ResNet paper uses option A. 39 | """ 40 | self.shortcut = LambdaLayer( 41 | lambda x: F.pad( 42 | x[:, :, ::2, ::2], 43 | (0, 0, 0, 0, planes // 4, planes // 4), 44 | "constant", 45 | 0, 46 | ) 47 | ) 48 | elif option == "B": 49 | self.shortcut = nn.Sequential( 50 | nn.Conv2d( 51 | in_planes, 52 | self.expansion * planes, 53 | kernel_size=1, 54 | stride=stride, 55 | bias=False, 56 | ), 57 | nn.BatchNorm2d(self.expansion * planes), 58 | ) 59 | 60 | def forward(self, x): 61 | out = F.relu(self.bn1(self.conv1(x))) 62 | out = self.bn2(self.conv2(out)) 63 | out += self.shortcut(x) 64 | out = F.relu(out) 65 | return out 66 | 67 | 68 | class ResNet(nn.Module): 69 | def __init__(self, block, num_blocks, num_classes=10): 70 | super(ResNet, self).__init__() 71 | self.in_planes = 16 72 | 73 | self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False) 74 | self.bn1 = nn.BatchNorm2d(16) 75 | self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1) 76 | self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2) 77 | self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2) 78 | self.linear = nn.Linear(64, num_classes) 79 | 80 | def _make_layer(self, block, planes, num_blocks, stride): 81 | strides = [stride] + [1] * (num_blocks - 1) 82 | layers = [] 83 | for stride in strides: 84 | layers.append(block(self.in_planes, planes, stride)) 85 | self.in_planes = planes * block.expansion 86 | 87 | return nn.Sequential(*layers) 88 | 89 | def forward(self, x): 90 | out = F.relu(self.bn1(self.conv1(x))) 91 | out = self.layer1(out) 92 | out = self.layer2(out) 93 | out = self.layer3(out) 94 | out = F.avg_pool2d(out, out.size()[3]) 95 | out = out.view(out.size(0), -1) 96 | out = self.linear(out) 97 | return out 98 | 99 | 100 | def resnet_orig(pretrained=True, device="cpu"): 101 | net = ResNet(BasicBlock, [3, 3, 3]) 102 | if pretrained: 103 | script_dir = os.path.dirname(__file__) 104 | state_dict = torch.load( 105 | script_dir + "/state_dicts/resnet_orig.pt", map_location=device 106 | ) 107 | net.load_state_dict(state_dict) 108 | return net 109 | -------------------------------------------------------------------------------- /src/SS.py: -------------------------------------------------------------------------------- 1 | from util import * 2 | from custom_module import * 3 | from pretrained_model import * 4 | global_config = load_model_yaml("./global_config/", "global_config.yaml") 5 | 6 | def generate_layer_input_scale(model: nn.Module, train_data_loader, layer_nest_dict, directory_path): 7 | if(not os.path.exists(directory_path)): 8 | os.mkdir(directory_path) 9 | data_type = "_scale" 10 | for key in layer_nest_dict: 11 | my_model = model 12 | layer_name = key 13 | print("name: " + layer_name) 14 | collection_layer = Input_scale_collection_layer(layer_name, access_layer(my_model, layer_name)) 15 | replace_layer(my_model, layer_name, collection_layer) 16 | print() 17 | 18 | run_set(my_model, train_data_loader, "cuda:0") 19 | 20 | for key in layer_nest_dict: 21 | layer_name = key 22 | access_layer(my_model, layer_name).save(directory_path, layer_name + data_type + ".pt") 23 | data = torch.load(directory_path + layer_name + data_type + ".pt") 24 | print(data) 25 | 26 | 27 | def CT_reset_scale(model, sign_scale, scale_path, scale_ratio, sign_nest_dict): 28 | model = model.to("cuda:0") 29 | for key in sign_nest_dict: 30 | scale_name = key + "_scale.pt" 31 | if(scale_path != None): 32 | sign_scale = torch.load(scale_path + scale_name).item() 33 | print("scale: " + str(sign_scale)) 34 | access_layer(model, key).sign.scale = sign_scale 35 | access_layer(model, key).sign.scale_ratio = scale_ratio 36 | 37 | 38 | def SS_replace(model,valid_data_loader, train_data_loader, sign_type, input_data_dirctory): 39 | model = model 40 | sign_nest_dict = generate_sign_nest_dict(model) 41 | dirctory = input_data_dirctory + "model_PR_AT/" 42 | file_name = "model_PR_AT_"+sign_type+".pt" 43 | scale_path = input_data_dirctory + "Scale_" + sign_type + "/" 44 | model = torch.load(dirctory+file_name) 45 | validate(model, valid_data_loader, "cuda:0") 46 | generate_layer_input_scale(model = copy.deepcopy(model), train_data_loader = train_data_loader, layer_nest_dict = sign_nest_dict, directory_path = scale_path) 47 | CT_reset_scale(model = model, sign_scale = 100, scale_path= scale_path, scale_ratio = 1, sign_nest_dict= sign_nest_dict) 48 | validate(model, valid_data_loader, "cuda:0") 49 | file_name2 = "model_PR_AT_SS_"+sign_type+".pt" 50 | torch.save(model, dirctory+file_name2) 51 | 52 | 53 | 54 | 55 | if __name__ == "__main__": 56 | parser = ArgumentParser() 57 | parser.add_argument("--model", type=str,choices=["vgg19_bn", "resnet18", "resnet32"]) 58 | parser.add_argument("--dataset", type=str,choices=["cifar10", "imagenet_1k", "cifar100"]) 59 | parser.add_argument("-wd", "--working_directory", type=str, default="./working_dirctory/") 60 | parser.add_argument("-st","--sign_type", type=str, default="a7", choices=["a7", "2f12g1", "f1g2", "f2g2", "f2g3"]) 61 | args = parser.parse_args() 62 | print(args) 63 | 64 | valid_data_loader = None 65 | train_data_loader = None 66 | valid_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "valid", data_dir = global_config["Global"]["dataset_dirctory"]) 67 | train_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "train", data_dir = global_config["Global"]["dataset_dirctory"]) 68 | 69 | 70 | # if(args.dataset == "cifar10" or args.dataset == "cifar100"): 71 | # valid_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "valid", data_dir = global_config["Global"]["dataset_dirctory"]) 72 | # train_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "train", data_dir = global_config["Global"]["dataset_dirctory"]) 73 | # elif(args.dataset == "imagenet_1k"): 74 | # valid_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "valid", data_dir = os.path.join(global_config["Global"]["dataset_dirctory"], args.dataset) ) 75 | # train_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "train", data_dir = os.path.join(global_config["Global"]["dataset_dirctory"], args.dataset) ) 76 | 77 | model = get_pretrained_model(model_name=args.model, dataset=args.dataset) 78 | 79 | SS_replace(model = model, valid_data_loader=valid_data_loader, train_data_loader=train_data_loader ,sign_type = args.sign_type, input_data_dirctory = args.working_directory) 80 | -------------------------------------------------------------------------------- /src/PyTorch_CIFAR10/cifar10_models/mobilenetv2.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | __all__ = ["MobileNetV2", "mobilenet_v2"] 7 | 8 | 9 | class ConvBNReLU(nn.Sequential): 10 | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): 11 | padding = (kernel_size - 1) // 2 12 | super(ConvBNReLU, self).__init__( 13 | nn.Conv2d( 14 | in_planes, 15 | out_planes, 16 | kernel_size, 17 | stride, 18 | padding, 19 | groups=groups, 20 | bias=False, 21 | ), 22 | nn.BatchNorm2d(out_planes), 23 | nn.ReLU6(inplace=True), 24 | ) 25 | 26 | 27 | class InvertedResidual(nn.Module): 28 | def __init__(self, inp, oup, stride, expand_ratio): 29 | super(InvertedResidual, self).__init__() 30 | self.stride = stride 31 | assert stride in [1, 2] 32 | 33 | hidden_dim = int(round(inp * expand_ratio)) 34 | self.use_res_connect = self.stride == 1 and inp == oup 35 | 36 | layers = [] 37 | if expand_ratio != 1: 38 | # pw 39 | layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) 40 | layers.extend( 41 | [ 42 | # dw 43 | ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), 44 | # pw-linear 45 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 46 | nn.BatchNorm2d(oup), 47 | ] 48 | ) 49 | self.conv = nn.Sequential(*layers) 50 | 51 | def forward(self, x): 52 | if self.use_res_connect: 53 | return x + self.conv(x) 54 | else: 55 | return self.conv(x) 56 | 57 | 58 | class MobileNetV2(nn.Module): 59 | def __init__(self, num_classes=10, width_mult=1.0): 60 | super(MobileNetV2, self).__init__() 61 | block = InvertedResidual 62 | input_channel = 32 63 | last_channel = 1280 64 | 65 | # CIFAR10 66 | inverted_residual_setting = [ 67 | # t, c, n, s 68 | [1, 16, 1, 1], 69 | [6, 24, 2, 1], # Stride 2 -> 1 for CIFAR-10 70 | [6, 32, 3, 2], 71 | [6, 64, 4, 2], 72 | [6, 96, 3, 1], 73 | [6, 160, 3, 2], 74 | [6, 320, 1, 1], 75 | ] 76 | # END 77 | 78 | # building first layer 79 | input_channel = int(input_channel * width_mult) 80 | self.last_channel = int(last_channel * max(1.0, width_mult)) 81 | 82 | # CIFAR10: stride 2 -> 1 83 | features = [ConvBNReLU(3, input_channel, stride=1)] 84 | # END 85 | 86 | # building inverted residual blocks 87 | for t, c, n, s in inverted_residual_setting: 88 | output_channel = int(c * width_mult) 89 | for i in range(n): 90 | stride = s if i == 0 else 1 91 | features.append( 92 | block(input_channel, output_channel, stride, expand_ratio=t) 93 | ) 94 | input_channel = output_channel 95 | # building last several layers 96 | features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1)) 97 | # make it nn.Sequential 98 | self.features = nn.Sequential(*features) 99 | 100 | # building classifier 101 | self.classifier = nn.Sequential( 102 | nn.Dropout(0.2), 103 | nn.Linear(self.last_channel, num_classes), 104 | ) 105 | 106 | # weight initialization 107 | for m in self.modules(): 108 | if isinstance(m, nn.Conv2d): 109 | nn.init.kaiming_normal_(m.weight, mode="fan_out") 110 | if m.bias is not None: 111 | nn.init.zeros_(m.bias) 112 | elif isinstance(m, nn.BatchNorm2d): 113 | nn.init.ones_(m.weight) 114 | nn.init.zeros_(m.bias) 115 | elif isinstance(m, nn.Linear): 116 | nn.init.normal_(m.weight, 0, 0.01) 117 | nn.init.zeros_(m.bias) 118 | 119 | def forward(self, x): 120 | x = self.features(x) 121 | x = x.mean([2, 3]) 122 | x = self.classifier(x) 123 | return x 124 | 125 | 126 | def mobilenet_v2(pretrained=False, progress=True, device="cpu", **kwargs): 127 | """ 128 | Constructs a MobileNetV2 architecture from 129 | `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" `_. 130 | 131 | Args: 132 | pretrained (bool): If True, returns a model pre-trained on ImageNet 133 | progress (bool): If True, displays a progress bar of the download to stderr 134 | """ 135 | model = MobileNetV2(**kwargs) 136 | if pretrained: 137 | script_dir = os.path.dirname(__file__) 138 | state_dict = torch.load( 139 | script_dir + "/state_dicts/mobilenet_v2.pt", map_location=device 140 | ) 141 | model.load_state_dict(state_dict) 142 | return model 143 | -------------------------------------------------------------------------------- /src/PyTorch_CIFAR10/cifar10_models/vgg.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | __all__ = [ 7 | "VGG", 8 | "vgg11_bn", 9 | "vgg13_bn", 10 | "vgg16_bn", 11 | "vgg19_bn", 12 | ] 13 | 14 | 15 | class VGG(nn.Module): 16 | def __init__(self, features, num_classes=10, init_weights=True): 17 | super(VGG, self).__init__() 18 | self.features = features 19 | # CIFAR 10 (7, 7) to (1, 1) 20 | # self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) 21 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 22 | 23 | self.classifier = nn.Sequential( 24 | nn.Linear(512 * 1 * 1, 4096), 25 | # nn.Linear(512 * 7 * 7, 4096), 26 | nn.ReLU(True), 27 | nn.Dropout(), 28 | nn.Linear(4096, 4096), 29 | nn.ReLU(True), 30 | nn.Dropout(), 31 | nn.Linear(4096, num_classes), 32 | ) 33 | if init_weights: 34 | self._initialize_weights() 35 | 36 | def forward(self, x): 37 | x = self.features(x) 38 | x = self.avgpool(x) 39 | x = x.view(x.size(0), -1) 40 | x = self.classifier(x) 41 | return x 42 | 43 | def _initialize_weights(self): 44 | for m in self.modules(): 45 | if isinstance(m, nn.Conv2d): 46 | nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") 47 | if m.bias is not None: 48 | nn.init.constant_(m.bias, 0) 49 | elif isinstance(m, nn.BatchNorm2d): 50 | nn.init.constant_(m.weight, 1) 51 | nn.init.constant_(m.bias, 0) 52 | elif isinstance(m, nn.Linear): 53 | nn.init.normal_(m.weight, 0, 0.01) 54 | nn.init.constant_(m.bias, 0) 55 | 56 | 57 | def make_layers(cfg, batch_norm=False): 58 | layers = [] 59 | in_channels = 3 60 | for v in cfg: 61 | if v == "M": 62 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 63 | else: 64 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) 65 | if batch_norm: 66 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] 67 | else: 68 | layers += [conv2d, nn.ReLU(inplace=True)] 69 | in_channels = v 70 | return nn.Sequential(*layers) 71 | 72 | 73 | cfgs = { 74 | "A": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"], 75 | "B": [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"], 76 | "D": [ 77 | 64, 78 | 64, 79 | "M", 80 | 128, 81 | 128, 82 | "M", 83 | 256, 84 | 256, 85 | 256, 86 | "M", 87 | 512, 88 | 512, 89 | 512, 90 | "M", 91 | 512, 92 | 512, 93 | 512, 94 | "M", 95 | ], 96 | "E": [ 97 | 64, 98 | 64, 99 | "M", 100 | 128, 101 | 128, 102 | "M", 103 | 256, 104 | 256, 105 | 256, 106 | 256, 107 | "M", 108 | 512, 109 | 512, 110 | 512, 111 | 512, 112 | "M", 113 | 512, 114 | 512, 115 | 512, 116 | 512, 117 | "M", 118 | ], 119 | } 120 | 121 | 122 | def _vgg(arch, cfg, batch_norm, pretrained, progress, device, **kwargs): 123 | if pretrained: 124 | kwargs["init_weights"] = False 125 | model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm), **kwargs) 126 | if pretrained: 127 | script_dir = os.path.dirname(__file__) 128 | state_dict = torch.load( 129 | script_dir + "/state_dicts/" + arch + ".pt", map_location=device 130 | ) 131 | model.load_state_dict(state_dict) 132 | return model 133 | 134 | 135 | def vgg11_bn(pretrained=False, progress=True, device="cpu", **kwargs): 136 | """VGG 11-layer model (configuration "A") with batch normalization 137 | 138 | Args: 139 | pretrained (bool): If True, returns a model pre-trained on ImageNet 140 | progress (bool): If True, displays a progress bar of the download to stderr 141 | """ 142 | return _vgg("vgg11_bn", "A", True, pretrained, progress, device, **kwargs) 143 | 144 | 145 | def vgg13_bn(pretrained=False, progress=True, device="cpu", **kwargs): 146 | """VGG 13-layer model (configuration "B") with batch normalization 147 | 148 | Args: 149 | pretrained (bool): If True, returns a model pre-trained on ImageNet 150 | progress (bool): If True, displays a progress bar of the download to stderr 151 | """ 152 | return _vgg("vgg13_bn", "B", True, pretrained, progress, device, **kwargs) 153 | 154 | 155 | def vgg16_bn(pretrained=False, progress=True, device="cpu", **kwargs): 156 | """VGG 16-layer model (configuration "D") with batch normalization 157 | 158 | Args: 159 | pretrained (bool): If True, returns a model pre-trained on ImageNet 160 | progress (bool): If True, displays a progress bar of the download to stderr 161 | """ 162 | return _vgg("vgg16_bn", "D", True, pretrained, progress, device, **kwargs) 163 | 164 | 165 | def vgg19_bn(pretrained=False, progress=True, device="cpu", **kwargs): 166 | """VGG 19-layer model (configuration 'E') with batch normalization 167 | 168 | Args: 169 | pretrained (bool): If True, returns a model pre-trained on ImageNet 170 | progress (bool): If True, displays a progress bar of the download to stderr 171 | """ 172 | return _vgg("vgg19_bn", "E", True, pretrained, progress, device, **kwargs) 173 | -------------------------------------------------------------------------------- /src/PyTorch_CIFAR10/schduler.py: -------------------------------------------------------------------------------- 1 | import math 2 | import warnings 3 | from typing import List 4 | 5 | from torch.optim import Optimizer 6 | from torch.optim.lr_scheduler import _LRScheduler 7 | 8 | 9 | class WarmupCosineLR(_LRScheduler): 10 | """ 11 | Sets the learning rate of each parameter group to follow a linear warmup schedule 12 | between warmup_start_lr and base_lr followed by a cosine annealing schedule between 13 | base_lr and eta_min. 14 | .. warning:: 15 | It is recommended to call :func:`.step()` for :class:`LinearWarmupCosineAnnealingLR` 16 | after each iteration as calling it after each epoch will keep the starting lr at 17 | warmup_start_lr for the first epoch which is 0 in most cases. 18 | .. warning:: 19 | passing epoch to :func:`.step()` is being deprecated and comes with an EPOCH_DEPRECATION_WARNING. 20 | It calls the :func:`_get_closed_form_lr()` method for this scheduler instead of 21 | :func:`get_lr()`. Though this does not change the behavior of the scheduler, when passing 22 | epoch param to :func:`.step()`, the user should call the :func:`.step()` function before calling 23 | train and validation methods. 24 | Args: 25 | optimizer (Optimizer): Wrapped optimizer. 26 | warmup_epochs (int): Maximum number of iterations for linear warmup 27 | max_epochs (int): Maximum number of iterations 28 | warmup_start_lr (float): Learning rate to start the linear warmup. Default: 0. 29 | eta_min (float): Minimum learning rate. Default: 0. 30 | last_epoch (int): The index of last epoch. Default: -1. 31 | Example: 32 | >>> layer = nn.Linear(10, 1) 33 | >>> optimizer = Adam(layer.parameters(), lr=0.02) 34 | >>> scheduler = LinearWarmupCosineAnnealingLR(optimizer, warmup_epochs=10, max_epochs=40) 35 | >>> # 36 | >>> # the default case 37 | >>> for epoch in range(40): 38 | ... # train(...) 39 | ... # validate(...) 40 | ... scheduler.step() 41 | >>> # 42 | >>> # passing epoch param case 43 | >>> for epoch in range(40): 44 | ... scheduler.step(epoch) 45 | ... # train(...) 46 | ... # validate(...) 47 | """ 48 | 49 | def __init__( 50 | self, 51 | optimizer: Optimizer, 52 | warmup_epochs: int, 53 | max_epochs: int, 54 | warmup_start_lr: float = 1e-8, 55 | eta_min: float = 1e-8, 56 | last_epoch: int = -1, 57 | ) -> None: 58 | 59 | self.warmup_epochs = warmup_epochs 60 | self.max_epochs = max_epochs 61 | self.warmup_start_lr = warmup_start_lr 62 | self.eta_min = eta_min 63 | 64 | super(WarmupCosineLR, self).__init__(optimizer, last_epoch) 65 | 66 | def get_lr(self) -> List[float]: 67 | """ 68 | Compute learning rate using chainable form of the scheduler 69 | """ 70 | if not self._get_lr_called_within_step: 71 | warnings.warn( 72 | "To get the last learning rate computed by the scheduler, " 73 | "please use `get_last_lr()`.", 74 | UserWarning, 75 | ) 76 | 77 | if self.last_epoch == 0: 78 | return [self.warmup_start_lr] * len(self.base_lrs) 79 | elif self.last_epoch < self.warmup_epochs: 80 | return [ 81 | group["lr"] 82 | + (base_lr - self.warmup_start_lr) / (self.warmup_epochs - 1) 83 | for base_lr, group in zip(self.base_lrs, self.optimizer.param_groups) 84 | ] 85 | elif self.last_epoch == self.warmup_epochs: 86 | return self.base_lrs 87 | elif (self.last_epoch - 1 - self.max_epochs) % ( 88 | 2 * (self.max_epochs - self.warmup_epochs) 89 | ) == 0: 90 | return [ 91 | group["lr"] 92 | + (base_lr - self.eta_min) 93 | * (1 - math.cos(math.pi / (self.max_epochs - self.warmup_epochs))) 94 | / 2 95 | for base_lr, group in zip(self.base_lrs, self.optimizer.param_groups) 96 | ] 97 | 98 | return [ 99 | ( 100 | 1 101 | + math.cos( 102 | math.pi 103 | * (self.last_epoch - self.warmup_epochs) 104 | / (self.max_epochs - self.warmup_epochs) 105 | ) 106 | ) 107 | / ( 108 | 1 109 | + math.cos( 110 | math.pi 111 | * (self.last_epoch - self.warmup_epochs - 1) 112 | / (self.max_epochs - self.warmup_epochs) 113 | ) 114 | ) 115 | * (group["lr"] - self.eta_min) 116 | + self.eta_min 117 | for group in self.optimizer.param_groups 118 | ] 119 | 120 | def _get_closed_form_lr(self) -> List[float]: 121 | """ 122 | Called when epoch is passed as a param to the `step` function of the scheduler. 123 | """ 124 | if self.last_epoch < self.warmup_epochs: 125 | return [ 126 | self.warmup_start_lr 127 | + self.last_epoch 128 | * (base_lr - self.warmup_start_lr) 129 | / (self.warmup_epochs - 1) 130 | for base_lr in self.base_lrs 131 | ] 132 | 133 | return [ 134 | self.eta_min 135 | + 0.5 136 | * (base_lr - self.eta_min) 137 | * ( 138 | 1 139 | + math.cos( 140 | math.pi 141 | * (self.last_epoch - self.warmup_epochs) 142 | / (self.max_epochs - self.warmup_epochs) 143 | ) 144 | ) 145 | for base_lr in self.base_lrs 146 | ] 147 | -------------------------------------------------------------------------------- /src/CT_AESPA.py: -------------------------------------------------------------------------------- 1 | from util import * 2 | from custom_module import * 3 | from pretrained_model import * 4 | import os 5 | 6 | src_dir = os.path.dirname(os.path.abspath(__file__)) 7 | global_config = load_model_yaml( os.path.join(src_dir, "global_config"), "global_config.yaml") 8 | 9 | 10 | def generate_layer_input_data(model: nn.Module , layer_nest_dict, directory_path, train_data_loader): 11 | if(not os.path.exists(directory_path)): 12 | os.mkdir(directory_path) 13 | data_type = "_input" 14 | for key in layer_nest_dict: 15 | my_model = copy.deepcopy(model) 16 | layer_nest_dict[key] 17 | if(layer_nest_dict[key]["type"] == "ReLU" and layer_nest_dict[key]["HerPN"]): 18 | layer_name = layer_nest_dict[key]["HerPN"] 19 | else: 20 | layer_name = key 21 | print("name: " + layer_name) 22 | collection_layer = Input_data_collection_layer(layer_name, access_layer(my_model, layer_name)) 23 | replace_layer(my_model, layer_name, collection_layer) 24 | run_set(my_model, train_data_loader, "cuda:0") 25 | access_layer(my_model, layer_name).save(directory_path, layer_name + data_type + ".pt") 26 | # data = torch.load(directory_path + layer_name + data_type + ".pt") 27 | # print(data.shape) 28 | del my_model 29 | 30 | 31 | def generate_data_set(dirctory_path , layer_nest_dict, split_point): 32 | train_path = "train/" 33 | valid_path = "val/" 34 | if(not os.path.exists(dirctory_path + train_path)): 35 | os.mkdir(dirctory_path + train_path) 36 | if(not os.path.exists(dirctory_path + valid_path)): 37 | os.mkdir(dirctory_path + valid_path) 38 | 39 | for key in layer_nest_dict: 40 | data_type = "_input" 41 | if(layer_nest_dict[key]["type"] == "ReLU" and layer_nest_dict[key]["HerPN"]): 42 | layer_name = layer_nest_dict[key]["HerPN"] 43 | else: 44 | layer_name = key 45 | file_name = layer_name + data_type + ".pt" 46 | print(layer_name) 47 | data = torch.load(dirctory_path + file_name) 48 | data = data.reshape((-1, ) + data.shape[2:]) 49 | b=torch.randperm(data.shape[0]) 50 | data = data[b] 51 | train_data = data[0:split_point] 52 | valid_data = data[split_point:data.shape[0]] 53 | torch.save(train_data, dirctory_path + train_path + file_name) 54 | torch.save(valid_data, dirctory_path + valid_path + file_name) 55 | print(train_data.shape) 56 | print(valid_data.shape) 57 | 58 | 59 | def data_collection(model, valid_data_loader, train_data_loader, split_point, input_data_save_path): 60 | sign_nest_dict = generate_sign_nest_dict(model) 61 | validate(model, valid_data_loader) 62 | generate_layer_input_data(model, sign_nest_dict, input_data_save_path, train_data_loader) 63 | generate_data_set(input_data_save_path , sign_nest_dict, split_point) 64 | 65 | 66 | def CT_train(sign_type, sign_scale, scale_path, sign_nest_dict,batch_size, input_data_dirctory, output_floder_suffix, pretrain_model): 67 | print(sign_type) 68 | for key in sign_nest_dict: 69 | sign_dict = sign_nest_dict[key] 70 | if(sign_dict["type"] == "MaxPool2d"): 71 | continue 72 | relu_name = key 73 | bn_name = sign_dict["HerPN"] 74 | if(sign_dict["type"] == "ReLU" and sign_dict["HerPN"]): 75 | data_name = sign_dict["HerPN"] 76 | num_features = access_layer(pretrain_model, bn_name).num_features 77 | BN_dimension = 2 78 | my_model = HerPN2d(num_features, BN_dimension) 79 | ref_model = nn.Sequential(access_layer(pretrain_model, bn_name), access_layer(pretrain_model, relu_name)) 80 | else: 81 | data_name = key 82 | num_features = 4096 83 | BN_dimension = 1 84 | my_model = HerPN2d(num_features, BN_dimension) 85 | ref_model = access_layer(pretrain_model, relu_name) 86 | 87 | 88 | 89 | optimizer = torch.optim.Adam(params=my_model.parameters(), lr=0.01, weight_decay=0) 90 | scheduler = ReduceLROnPlateau(optimizer, 'min', patience = 2, threshold= 1e-8, min_lr= 1e-4) 91 | train_path = "train/" 92 | val_path = "val/" 93 | data_type = "_input" 94 | file_name = data_name + data_type + ".pt" 95 | print(file_name) 96 | train_data = torch.load(input_data_dirctory + train_path + file_name) 97 | valid_data = torch.load(input_data_dirctory + val_path + file_name) 98 | for epoch_i in range(40): 99 | train_loss_meter = AverageMeter("train loss") 100 | val_loss_meter = AverageMeter("val loss") 101 | #train 102 | for batch_i in range(int(train_data.shape[0] / batch_size)): 103 | x = train_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0") 104 | target_y = ref_model.to("cuda:0").forward(x) 105 | actual_y = my_model.to("cuda:0").forward(x) 106 | loss_fun = nn.MSELoss() 107 | my_model.zero_grad() 108 | loss = loss_fun(actual_y, target_y) 109 | train_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0]) 110 | loss.backward() 111 | optimizer.step() 112 | train_loss = train_loss_meter.avg 113 | 114 | #valid 115 | for batch_i in range(int(valid_data.shape[0] / batch_size)): 116 | x = valid_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0") 117 | target_y = ref_model.to("cuda:0").forward(x) 118 | actual_y = my_model.forward(x) 119 | loss_fun = nn.MSELoss() 120 | loss = loss_fun(actual_y, target_y) 121 | val_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0]) 122 | val_loss = val_loss_meter.avg 123 | 124 | scheduler.step(val_loss) 125 | 126 | print( 127 | f"Epoch:{epoch_i + 1}" 128 | + f" Train Loss:{train_loss:.10f}" 129 | + f" Val Loss: {val_loss:.10f}" 130 | ) 131 | 132 | folder_name = "CT_" + sign_type + "_S" + output_floder_suffix+"_40s/" 133 | coef_save_dirctory = input_data_dirctory + folder_name 134 | if(not os.path.exists(coef_save_dirctory)): 135 | os.mkdir(coef_save_dirctory) 136 | file_name = key + "_herpn.pt" 137 | torch.save(my_model, coef_save_dirctory + file_name) 138 | print("save: " + folder_name + file_name) 139 | print("\n") 140 | 141 | 142 | if __name__ == "__main__": 143 | parser = ArgumentParser() 144 | parser.add_argument("--model", type=str,choices=["vgg19_bn", "resnet18", "resnet32"]) 145 | parser.add_argument("--dataset", type=str,choices=["cifar10", "cifar100", "imagenet_1k"]) 146 | parser.add_argument("-st","--sign_type", type=str, choices=["a7", "2f12g1", "f1g2", "f2g2", "f2g3", "herph"]) 147 | parser.add_argument("-dc","--data_collection", type=bool, default=False, choices=[True , False]) 148 | parser.add_argument("-wd", "--working_directory", type=str, default="./working_directory/") 149 | 150 | args = parser.parse_args() 151 | print(args) 152 | if(args.dataset == "cifar10" or args.dataset == "cifar100"): 153 | split_point = 45000 154 | batch_size = 100 155 | elif(args.dataset == "imagenet_1k"): 156 | split_point = 900 157 | batch_size = 40 158 | model = get_pretrained_model(model_name=args.model, dataset=args.dataset) 159 | if(args.data_collection): 160 | data_collection(model = model, 161 | valid_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "valid", data_dir = global_config["Global"]["dataset_dirctory"] ), 162 | train_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "train", data_dir = global_config["Global"]["dataset_dirctory"] ), 163 | split_point = split_point, input_data_save_path = args.working_directory) 164 | 165 | else: 166 | nest_dict = generate_sign_nest_dict(model) 167 | CT_train(sign_type = args.sign_type, sign_scale = 0, scale_path= None, sign_nest_dict = nest_dict,batch_size = batch_size, 168 | input_data_dirctory = args.working_directory , output_floder_suffix= "test", pretrain_model=model) 169 | -------------------------------------------------------------------------------- /src/CT_sign_SiLU.py: -------------------------------------------------------------------------------- 1 | from util import * 2 | from custom_module import * 3 | from pretrained_model import * 4 | import numpy as np 5 | global_config = load_model_yaml("./global_config/", "global_config.yaml") 6 | 7 | def generate_layer_input_data(model: nn.Module , layer_nest_dict, directory_path, train_data_loader): 8 | if(not os.path.exists(directory_path)): 9 | os.mkdir(directory_path) 10 | data_type = "_input" 11 | for key in layer_nest_dict: 12 | my_model = copy.deepcopy(model) 13 | layer_name = key 14 | print("name: " + layer_name) 15 | collection_layer = Input_data_collection_layer(layer_name, access_layer(my_model, layer_name)) 16 | replace_layer(my_model, layer_name, collection_layer) 17 | run_set(my_model, train_data_loader, "cuda:0") 18 | access_layer(my_model, layer_name).save(directory_path, layer_name + data_type + ".pt") 19 | data = torch.load(directory_path + layer_name + data_type + ".pt") 20 | print(data.shape) 21 | 22 | 23 | def generate_data_set(dirctory_path , layer_nest_dict, split_point): 24 | train_path = "train/" 25 | valid_path = "val/" 26 | if(not os.path.exists(dirctory_path + train_path)): 27 | os.mkdir(dirctory_path + train_path) 28 | if(not os.path.exists(dirctory_path + valid_path)): 29 | os.mkdir(dirctory_path + valid_path) 30 | 31 | for key in layer_nest_dict: 32 | data_type = "_input" 33 | layer_name = key 34 | file_name = layer_name + data_type + ".pt" 35 | print(layer_name) 36 | data = torch.load(dirctory_path + file_name) 37 | b=torch.randperm(data.shape[0]) 38 | data = data[b] 39 | train_data = data[0:split_point] 40 | valid_data = data[split_point:data.shape[0]] 41 | torch.save(train_data, dirctory_path + train_path + file_name) 42 | torch.save(valid_data, dirctory_path + valid_path + file_name) 43 | print(train_data.shape) 44 | print(valid_data.shape) 45 | 46 | 47 | def data_collection(model, valid_data_loader, train_data_loader, split_point, input_data_save_path): 48 | sign_nest_dict = generate_sign_nest_dict(model) 49 | validate(model, valid_data_loader) 50 | generate_layer_input_data(model, sign_nest_dict, input_data_save_path, train_data_loader) 51 | generate_data_set(input_data_save_path , sign_nest_dict, split_point) 52 | 53 | 54 | def CT_train(sign_type, degree, sign_scale, scale_path, sign_nest_dict,batch_size, input_data_dirctory, output_floder_suffix, epoch = 40): 55 | sign_param_dict = Sign_parameter_generator().param_nest_dict[sign_type] 56 | print(sign_type) 57 | for key in sign_nest_dict: 58 | sign_dict = sign_nest_dict[key] 59 | train_path = "train/" 60 | val_path = "val/" 61 | data_type = "_input" 62 | file_name = key + data_type + ".pt" 63 | 64 | sign_module = Sign_minmax_layer(coef=sign_param_dict["coef"], degree=sign_param_dict["degree"],scale=sign_scale) 65 | my_model = ReLU_sign_layer(sign = sign_module) 66 | ref_model = nn.SiLU() 67 | 68 | 69 | 70 | optimizer = torch.optim.Adam(params=my_model.parameters(), lr=0.01, weight_decay=0) 71 | scheduler = ReduceLROnPlateau(optimizer, 'min', patience = 2, threshold= 1e-8, min_lr= 1e-4) 72 | 73 | print(file_name) 74 | train_data = torch.load(input_data_dirctory + train_path + file_name) 75 | valid_data = torch.load(input_data_dirctory + val_path + file_name) 76 | for epoch_i in range(epoch): 77 | train_loss_meter = AverageMeter("train loss") 78 | val_loss_meter = AverageMeter("val loss") 79 | #train 80 | for batch_i in range(int(train_data.shape[0] / batch_size)): 81 | x = train_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0") 82 | target_y = ref_model.to("cuda:0").forward(x) 83 | actual_y = my_model.forward(x) 84 | loss_fun = nn.MSELoss() 85 | my_model.zero_grad() 86 | loss = loss_fun(actual_y, target_y) 87 | train_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0]) 88 | loss.backward() 89 | optimizer.step() 90 | train_loss = train_loss_meter.avg 91 | 92 | #valid 93 | for batch_i in range(int(valid_data.shape[0] / batch_size)): 94 | x = valid_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0") 95 | target_y = ref_model.to("cuda:0").forward(x) 96 | actual_y = my_model.forward(x) 97 | loss_fun = nn.MSELoss() 98 | loss = loss_fun(actual_y, target_y) 99 | val_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0]) 100 | val_loss = val_loss_meter.avg 101 | 102 | scheduler.step(val_loss) 103 | 104 | print( 105 | f"Epoch:{epoch_i + 1}" 106 | + f" Train Loss:{train_loss:.10f}" 107 | + f" Val Loss: {val_loss:.10f}" 108 | ) 109 | 110 | folder_name = "CT_" + sign_type + "_S" + output_floder_suffix+"_40s/" 111 | coef_save_dirctory = input_data_dirctory + folder_name 112 | if(not os.path.exists(coef_save_dirctory)): 113 | os.mkdir(coef_save_dirctory) 114 | file_name = key + "_coef.pt" 115 | my_model.sign.save_coef(coef_save_dirctory + file_name) 116 | print("save: " + folder_name + file_name) 117 | print("\n") 118 | 119 | def CT_val(model: nn.Module , layer_nest_dict, directory_path, val_data_loader, sign_type, output_floder_suffix): 120 | if(not os.path.exists(directory_path)): 121 | os.mkdir(directory_path) 122 | folder_name = "CT_" + sign_type + "_S" + output_floder_suffix+"_40s/" 123 | for key in layer_nest_dict: 124 | file_name = key + "_coef.pt" 125 | coef = torch.load(directory_path + folder_name + file_name) 126 | degree = len(coef.tolist()[0]) 127 | sign_module_CT = Sigmoid_minmax_layer(coef=coef, degree=[degree],scale=1) 128 | rlays = SiLU_minmax_layer(sigmoid=sign_module_CT) 129 | layer_name = key 130 | print("name: " + layer_name) 131 | print(access_layer(model, layer_name)) 132 | replace_layer(model, layer_name, rlays) 133 | validate(model, val_data_loader) 134 | 135 | 136 | 137 | 138 | 139 | if __name__ == "__main__": 140 | parser = ArgumentParser() 141 | parser.add_argument("--model", type=str,choices=["vgg19_bn", "resnet18", "resnet32", "mobileVitV2"]) 142 | parser.add_argument("--dataset", type=str,choices=["cifar10", "cifar100", "imagenet_1k"]) 143 | parser.add_argument("-st","--sign_type", type=str, choices=["a7", "2f12g1", "f1g2", "f2g2", "f2g3", "polyfit"]) 144 | parser.add_argument("-dc","--data_collection", type=bool, default=False, choices=[True , False]) 145 | parser.add_argument("-wd", "--working_directory", type=str, default="./working_directory/") 146 | 147 | args = parser.parse_args() 148 | print(args) 149 | if(args.dataset == "cifar10" or args.dataset == "cifar100"): 150 | split_point = 45000 151 | batch_size = 100 152 | elif(args.dataset == "imagenet_1k"): 153 | split_point = 900 154 | batch_size = 50 155 | model = get_pretrained_model(model_name=args.model, dataset=args.dataset) 156 | # print(model) 157 | 158 | valid_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "valid", data_dir = global_config["Global"]["dataset_dirctory"]) 159 | train_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "train", data_dir = global_config["Global"]["dataset_dirctory"] ) 160 | 161 | if(args.data_collection): 162 | data_collection(model = model, 163 | 164 | split_point = split_point, input_data_save_path = args.working_directory) 165 | 166 | else: 167 | nest_dict = generate_sign_nest_dict(model) 168 | CT_train(sign_type = args.sign_type, degree=0, sign_scale = 0, scale_path= None, sign_nest_dict = nest_dict,batch_size = batch_size, 169 | input_data_dirctory = args.working_directory , output_floder_suffix= "dynamic", epoch=40) 170 | # CT_val(model=model, layer_nest_dict=nest_dict, directory_path=args.working_directory, 171 | # val_data_loader = valid_data_loader,sign_type=args.sign_type, output_floder_suffix="polyfit_o") 172 | -------------------------------------------------------------------------------- /src/CT_cvnet_sign_SiLU.py: -------------------------------------------------------------------------------- 1 | from util import * 2 | from custom_module import * 3 | from pretrained_model import * 4 | import numpy as np 5 | global_config = load_model_yaml("./global_config/", "global_config.yaml") 6 | 7 | def generate_layer_input_data(model: nn.Module , layer_nest_dict, directory_path, train_data_loader): 8 | if(not os.path.exists(directory_path)): 9 | os.mkdir(directory_path) 10 | data_type = "_input" 11 | for key in layer_nest_dict: 12 | my_model = copy.deepcopy(model) 13 | layer_name = key 14 | print("name: " + layer_name) 15 | collection_layer = Input_data_collection_layer(layer_name, access_layer(my_model, layer_name)) 16 | replace_layer(my_model, layer_name, collection_layer) 17 | run_set(my_model, train_data_loader, "cuda:0") 18 | access_layer(my_model, layer_name).save(directory_path, layer_name + data_type + ".pt") 19 | data = torch.load(directory_path + layer_name + data_type + ".pt") 20 | print(data.shape) 21 | 22 | 23 | def generate_data_set(dirctory_path , layer_nest_dict, split_point): 24 | train_path = "train/" 25 | valid_path = "val/" 26 | if(not os.path.exists(dirctory_path + train_path)): 27 | os.mkdir(dirctory_path + train_path) 28 | if(not os.path.exists(dirctory_path + valid_path)): 29 | os.mkdir(dirctory_path + valid_path) 30 | 31 | for key in layer_nest_dict: 32 | data_type = "_input" 33 | layer_name = key 34 | file_name = layer_name + data_type + ".pt" 35 | print(layer_name) 36 | data = torch.load(dirctory_path + file_name) 37 | b=torch.randperm(data.shape[0]) 38 | data = data[b] 39 | train_data = data[0:split_point] 40 | valid_data = data[split_point:data.shape[0]] 41 | torch.save(train_data, dirctory_path + train_path + file_name) 42 | torch.save(valid_data, dirctory_path + valid_path + file_name) 43 | print(train_data.shape) 44 | print(valid_data.shape) 45 | 46 | 47 | def data_collection(model, valid_data_loader, train_data_loader, split_point, input_data_save_path): 48 | sign_nest_dict = generate_sign_nest_dict(model) 49 | validate(model, valid_data_loader) 50 | generate_layer_input_data(model, sign_nest_dict, input_data_save_path, train_data_loader) 51 | generate_data_set(input_data_save_path , sign_nest_dict, split_point) 52 | 53 | 54 | def CT_train(sign_type, degree, sign_scale, scale_path, sign_nest_dict,batch_size, input_data_dirctory, output_floder_suffix, epoch = 40): 55 | sign_param_dict = Sign_parameter_generator().param_nest_dict[sign_type] 56 | print(sign_type) 57 | for key in sign_nest_dict: 58 | sign_dict = sign_nest_dict[key] 59 | train_path = "train/" 60 | val_path = "val/" 61 | data_type = "_input" 62 | file_name = key + data_type + ".pt" 63 | 64 | sign_module = Sign_minmax_layer(coef=sign_param_dict["coef"], degree=sign_param_dict["degree"],scale=sign_scale) 65 | my_model = ReLU_sign_layer(sign = sign_module) 66 | ref_model = nn.SiLU() 67 | 68 | 69 | 70 | optimizer = torch.optim.Adam(params=my_model.parameters(), lr=0.01, weight_decay=0) 71 | scheduler = ReduceLROnPlateau(optimizer, 'min', patience = 2, threshold= 1e-8, min_lr= 1e-4) 72 | 73 | print(file_name) 74 | train_data = torch.load(input_data_dirctory + train_path + file_name) 75 | valid_data = torch.load(input_data_dirctory + val_path + file_name) 76 | for epoch_i in range(epoch): 77 | train_loss_meter = AverageMeter("train loss") 78 | val_loss_meter = AverageMeter("val loss") 79 | #train 80 | for batch_i in range(int(train_data.shape[0] / batch_size)): 81 | x = train_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0") 82 | target_y = ref_model.to("cuda:0").forward(x) 83 | actual_y = my_model.forward(x) 84 | loss_fun = nn.MSELoss() 85 | my_model.zero_grad() 86 | loss = loss_fun(actual_y, target_y) 87 | train_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0]) 88 | loss.backward() 89 | optimizer.step() 90 | train_loss = train_loss_meter.avg 91 | 92 | #valid 93 | for batch_i in range(int(valid_data.shape[0] / batch_size)): 94 | x = valid_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0") 95 | target_y = ref_model.to("cuda:0").forward(x) 96 | actual_y = my_model.forward(x) 97 | loss_fun = nn.MSELoss() 98 | loss = loss_fun(actual_y, target_y) 99 | val_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0]) 100 | val_loss = val_loss_meter.avg 101 | 102 | scheduler.step(val_loss) 103 | 104 | print( 105 | f"Epoch:{epoch_i + 1}" 106 | + f" Train Loss:{train_loss:.10f}" 107 | + f" Val Loss: {val_loss:.10f}" 108 | ) 109 | 110 | folder_name = "CT_" + sign_type + "_S" + output_floder_suffix+"_40s/" 111 | coef_save_dirctory = input_data_dirctory + folder_name 112 | if(not os.path.exists(coef_save_dirctory)): 113 | os.mkdir(coef_save_dirctory) 114 | file_name = key + "_coef.pt" 115 | my_model.sign.save_coef(coef_save_dirctory + file_name) 116 | print("save: " + folder_name + file_name) 117 | print("\n") 118 | 119 | def CT_val(model: nn.Module , layer_nest_dict, directory_path, val_data_loader, sign_type, output_floder_suffix): 120 | if(not os.path.exists(directory_path)): 121 | os.mkdir(directory_path) 122 | folder_name = "CT_" + sign_type + "_S" + output_floder_suffix+"_40s/" 123 | for key in layer_nest_dict: 124 | file_name = key + "_coef.pt" 125 | coef = torch.load(directory_path + folder_name + file_name) 126 | degree = len(coef.tolist()[0]) 127 | sign_module_CT = Sigmoid_minmax_layer(coef=coef, degree=[degree],scale=1) 128 | rlays = SiLU_minmax_layer(sigmoid=sign_module_CT) 129 | layer_name = key 130 | print("name: " + layer_name) 131 | print(access_layer(model, layer_name)) 132 | replace_layer(model, layer_name, rlays) 133 | validate(model, val_data_loader) 134 | 135 | 136 | 137 | 138 | 139 | if __name__ == "__main__": 140 | parser = ArgumentParser() 141 | parser.add_argument("--model", type=str,choices=["vgg19_bn", "resnet18", "resnet32", "mobileVitV2"]) 142 | parser.add_argument("--dataset", type=str,choices=["cifar10", "cifar100", "imagenet_1k"]) 143 | parser.add_argument("-st","--sign_type", type=str, choices=["a7", "2f12g1", "f1g2", "f2g2", "f2g3", "polyfit"]) 144 | parser.add_argument("-dc","--data_collection", type=bool, default=False, choices=[True , False]) 145 | parser.add_argument("-wd", "--working_directory", type=str, default="./working_directory/") 146 | 147 | args = parser.parse_args() 148 | print(args) 149 | if(args.dataset == "cifar10" or args.dataset == "cifar100"): 150 | split_point = 45000 151 | batch_size = 100 152 | elif(args.dataset == "imagenet_1k"): 153 | split_point = 900 154 | batch_size = 50 155 | model = get_pretrained_model(model_name=args.model, dataset=args.dataset) 156 | # print(model) 157 | 158 | valid_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "valid", data_dir = global_config["Global"]["dataset_dirctory"]) 159 | train_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "train", data_dir = global_config["Global"]["dataset_dirctory"] ) 160 | 161 | if(args.data_collection): 162 | data_collection(model = model, 163 | 164 | split_point = split_point, input_data_save_path = args.working_directory) 165 | 166 | else: 167 | nest_dict = generate_sign_nest_dict(model) 168 | CT_train(sign_type = args.sign_type, degree=0, sign_scale = 0, scale_path= None, sign_nest_dict = nest_dict,batch_size = batch_size, 169 | input_data_dirctory = args.working_directory , output_floder_suffix= "dynamic", epoch=40) 170 | # CT_val(model=model, layer_nest_dict=nest_dict, directory_path=args.working_directory, 171 | # val_data_loader = valid_data_loader,sign_type=args.sign_type, output_floder_suffix="polyfit_o") 172 | -------------------------------------------------------------------------------- /src/PyTorch_CIFAR10/cifar10_models/densenet.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | __all__ = ["DenseNet", "densenet121", "densenet169", "densenet161"] 9 | 10 | 11 | class _DenseLayer(nn.Sequential): 12 | def __init__(self, num_input_features, growth_rate, bn_size, drop_rate): 13 | super(_DenseLayer, self).__init__() 14 | self.add_module("norm1", nn.BatchNorm2d(num_input_features)), 15 | self.add_module("relu1", nn.ReLU(inplace=True)), 16 | self.add_module( 17 | "conv1", 18 | nn.Conv2d( 19 | num_input_features, 20 | bn_size * growth_rate, 21 | kernel_size=1, 22 | stride=1, 23 | bias=False, 24 | ), 25 | ), 26 | self.add_module("norm2", nn.BatchNorm2d(bn_size * growth_rate)), 27 | self.add_module("relu2", nn.ReLU(inplace=True)), 28 | self.add_module( 29 | "conv2", 30 | nn.Conv2d( 31 | bn_size * growth_rate, 32 | growth_rate, 33 | kernel_size=3, 34 | stride=1, 35 | padding=1, 36 | bias=False, 37 | ), 38 | ), 39 | self.drop_rate = drop_rate 40 | 41 | def forward(self, x): 42 | new_features = super(_DenseLayer, self).forward(x) 43 | if self.drop_rate > 0: 44 | new_features = F.dropout( 45 | new_features, p=self.drop_rate, training=self.training 46 | ) 47 | return torch.cat([x, new_features], 1) 48 | 49 | 50 | class _DenseBlock(nn.Sequential): 51 | def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate): 52 | super(_DenseBlock, self).__init__() 53 | for i in range(num_layers): 54 | layer = _DenseLayer( 55 | num_input_features + i * growth_rate, growth_rate, bn_size, drop_rate 56 | ) 57 | self.add_module("denselayer%d" % (i + 1), layer) 58 | 59 | 60 | class _Transition(nn.Sequential): 61 | def __init__(self, num_input_features, num_output_features): 62 | super(_Transition, self).__init__() 63 | self.add_module("norm", nn.BatchNorm2d(num_input_features)) 64 | self.add_module("relu", nn.ReLU(inplace=True)) 65 | self.add_module( 66 | "conv", 67 | nn.Conv2d( 68 | num_input_features, 69 | num_output_features, 70 | kernel_size=1, 71 | stride=1, 72 | bias=False, 73 | ), 74 | ) 75 | self.add_module("pool", nn.AvgPool2d(kernel_size=2, stride=2)) 76 | 77 | 78 | class DenseNet(nn.Module): 79 | r"""Densenet-BC model class, based on 80 | `"Densely Connected Convolutional Networks" `_ 81 | 82 | Args: 83 | growth_rate (int) - how many filters to add each layer (`k` in paper) 84 | block_config (list of 4 ints) - how many layers in each pooling block 85 | num_init_features (int) - the number of filters to learn in the first convolution layer 86 | bn_size (int) - multiplicative factor for number of bottle neck layers 87 | (i.e. bn_size * k features in the bottleneck layer) 88 | drop_rate (float) - dropout rate after each dense layer 89 | num_classes (int) - number of classification classes 90 | """ 91 | 92 | def __init__( 93 | self, 94 | growth_rate=32, 95 | block_config=(6, 12, 24, 16), 96 | num_init_features=64, 97 | bn_size=4, 98 | drop_rate=0, 99 | num_classes=10, 100 | ): 101 | 102 | super(DenseNet, self).__init__() 103 | 104 | # First convolution 105 | 106 | # CIFAR-10: kernel_size 7 ->3, stride 2->1, padding 3->1 107 | self.features = nn.Sequential( 108 | OrderedDict( 109 | [ 110 | ( 111 | "conv0", 112 | nn.Conv2d( 113 | 3, 114 | num_init_features, 115 | kernel_size=3, 116 | stride=1, 117 | padding=1, 118 | bias=False, 119 | ), 120 | ), 121 | ("norm0", nn.BatchNorm2d(num_init_features)), 122 | ("relu0", nn.ReLU(inplace=True)), 123 | ("pool0", nn.MaxPool2d(kernel_size=3, stride=2, padding=1)), 124 | ] 125 | ) 126 | ) 127 | # END 128 | 129 | # Each denseblock 130 | num_features = num_init_features 131 | for i, num_layers in enumerate(block_config): 132 | block = _DenseBlock( 133 | num_layers=num_layers, 134 | num_input_features=num_features, 135 | bn_size=bn_size, 136 | growth_rate=growth_rate, 137 | drop_rate=drop_rate, 138 | ) 139 | self.features.add_module("denseblock%d" % (i + 1), block) 140 | num_features = num_features + num_layers * growth_rate 141 | if i != len(block_config) - 1: 142 | trans = _Transition( 143 | num_input_features=num_features, 144 | num_output_features=num_features // 2, 145 | ) 146 | self.features.add_module("transition%d" % (i + 1), trans) 147 | num_features = num_features // 2 148 | 149 | # Final batch norm 150 | self.features.add_module("norm5", nn.BatchNorm2d(num_features)) 151 | 152 | # Linear layer 153 | self.classifier = nn.Linear(num_features, num_classes) 154 | 155 | # Official init from torch repo. 156 | for m in self.modules(): 157 | if isinstance(m, nn.Conv2d): 158 | nn.init.kaiming_normal_(m.weight) 159 | elif isinstance(m, nn.BatchNorm2d): 160 | nn.init.constant_(m.weight, 1) 161 | nn.init.constant_(m.bias, 0) 162 | elif isinstance(m, nn.Linear): 163 | nn.init.constant_(m.bias, 0) 164 | 165 | def forward(self, x): 166 | features = self.features(x) 167 | out = F.relu(features, inplace=True) 168 | out = F.adaptive_avg_pool2d(out, (1, 1)).view(features.size(0), -1) 169 | out = self.classifier(out) 170 | return out 171 | 172 | 173 | def _densenet( 174 | arch, 175 | growth_rate, 176 | block_config, 177 | num_init_features, 178 | pretrained, 179 | progress, 180 | device, 181 | **kwargs 182 | ): 183 | model = DenseNet(growth_rate, block_config, num_init_features, **kwargs) 184 | if pretrained: 185 | script_dir = os.path.dirname(__file__) 186 | state_dict = torch.load( 187 | script_dir + "/state_dicts/" + arch + ".pt", map_location=device 188 | ) 189 | model.load_state_dict(state_dict) 190 | return model 191 | 192 | 193 | def densenet121(pretrained=False, progress=True, device="cpu", **kwargs): 194 | r"""Densenet-121 model from 195 | `"Densely Connected Convolutional Networks" `_ 196 | 197 | Args: 198 | pretrained (bool): If True, returns a model pre-trained on ImageNet 199 | progress (bool): If True, displays a progress bar of the download to stderr 200 | """ 201 | return _densenet( 202 | "densenet121", 32, (6, 12, 24, 16), 64, pretrained, progress, device, **kwargs 203 | ) 204 | 205 | 206 | def densenet161(pretrained=False, progress=True, device="cpu", **kwargs): 207 | r"""Densenet-161 model from 208 | `"Densely Connected Convolutional Networks" `_ 209 | 210 | Args: 211 | pretrained (bool): If True, returns a model pre-trained on ImageNet 212 | progress (bool): If True, displays a progress bar of the download to stderr 213 | """ 214 | return _densenet( 215 | "densenet161", 48, (6, 12, 36, 24), 96, pretrained, progress, device, **kwargs 216 | ) 217 | 218 | 219 | def densenet169(pretrained=False, progress=True, device="cpu", **kwargs): 220 | r"""Densenet-169 model from 221 | `"Densely Connected Convolutional Networks" `_ 222 | 223 | Args: 224 | pretrained (bool): If True, returns a model pre-trained on ImageNet 225 | progress (bool): If True, displays a progress bar of the download to stderr 226 | """ 227 | return _densenet( 228 | "densenet169", 32, (6, 12, 32, 32), 64, pretrained, progress, device, **kwargs 229 | ) 230 | -------------------------------------------------------------------------------- /src/mobilevit_v2.py: -------------------------------------------------------------------------------- 1 | # 2 | # For licensing see accompanying LICENSE file. 3 | # Copyright (C) 2023 Apple Inc. All Rights Reserved. 4 | # 5 | 6 | import argparse 7 | from typing import Dict, Optional, Tuple 8 | 9 | import torch 10 | from torch import nn 11 | 12 | from cvnets.layers import ConvLayer2d, GlobalPool, Identity, LinearLayer 13 | from cvnets.models.classification.base_image_encoder import BaseImageEncoder 14 | from cvnets.models.classification.config.mobilevit_v2 import get_configuration 15 | from cvnets.modules import InvertedResidual 16 | from cvnets.modules import MobileViTBlockv2 as Block 17 | 18 | class MobileViTv2(BaseImageEncoder): 19 | """ 20 | This class defines the `MobileViTv2 `_ architecture 21 | """ 22 | 23 | def __init__(self, opts, *args, **kwargs) -> None: 24 | num_classes = getattr(opts, "model.classification.n_classes", 1000) 25 | pool_type = getattr(opts, "model.layer.global_pool", "mean") 26 | 27 | mobilevit_config = get_configuration(opts=opts) 28 | image_channels = mobilevit_config["layer0"]["img_channels"] 29 | out_channels = mobilevit_config["layer0"]["out_channels"] 30 | 31 | super().__init__(opts, *args, **kwargs) 32 | 33 | # store model configuration in a dictionary 34 | self.model_conf_dict = dict() 35 | self.conv_1 = ConvLayer2d( 36 | opts=opts, 37 | in_channels=image_channels, 38 | out_channels=out_channels, 39 | kernel_size=3, 40 | stride=2, 41 | use_norm=True, 42 | use_act=True, 43 | ) 44 | 45 | self.model_conf_dict["conv1"] = {"in": image_channels, "out": out_channels} 46 | 47 | in_channels = out_channels 48 | self.layer_1, out_channels = self._make_layer( 49 | opts=opts, input_channel=in_channels, cfg=mobilevit_config["layer1"] 50 | ) 51 | self.model_conf_dict["layer1"] = {"in": in_channels, "out": out_channels} 52 | 53 | in_channels = out_channels 54 | self.layer_2, out_channels = self._make_layer( 55 | opts=opts, input_channel=in_channels, cfg=mobilevit_config["layer2"] 56 | ) 57 | self.model_conf_dict["layer2"] = {"in": in_channels, "out": out_channels} 58 | 59 | in_channels = out_channels 60 | self.layer_3, out_channels = self._make_layer( 61 | opts=opts, input_channel=in_channels, cfg=mobilevit_config["layer3"] 62 | ) 63 | self.model_conf_dict["layer3"] = {"in": in_channels, "out": out_channels} 64 | 65 | in_channels = out_channels 66 | self.layer_4, out_channels = self._make_layer( 67 | opts=opts, 68 | input_channel=in_channels, 69 | cfg=mobilevit_config["layer4"], 70 | dilate=self.dilate_l4, 71 | ) 72 | self.model_conf_dict["layer4"] = {"in": in_channels, "out": out_channels} 73 | 74 | in_channels = out_channels 75 | self.layer_5, out_channels = self._make_layer( 76 | opts=opts, 77 | input_channel=in_channels, 78 | cfg=mobilevit_config["layer5"], 79 | dilate=self.dilate_l5, 80 | ) 81 | self.model_conf_dict["layer5"] = {"in": in_channels, "out": out_channels} 82 | 83 | self.conv_1x1_exp = Identity() 84 | self.model_conf_dict["exp_before_cls"] = { 85 | "in": out_channels, 86 | "out": out_channels, 87 | } 88 | 89 | self.classifier = nn.Sequential( 90 | GlobalPool(pool_type=pool_type, keep_dim=False), 91 | LinearLayer(in_features=out_channels, out_features=num_classes, bias=True), 92 | ) 93 | 94 | # check model 95 | self.check_model() 96 | 97 | # weight initialization 98 | self.reset_parameters(opts=opts) 99 | 100 | self.load_state_dict(torch.load("/home/jianming/work/Fast_Switch/NN_Model/ml-cvnets/mobilevitv2_results/width_0_5_0/mobilevitv2-0.5.pt")) 101 | 102 | 103 | @classmethod 104 | def add_arguments(cls, parser: argparse.ArgumentParser) -> argparse.ArgumentParser: 105 | group = parser.add_argument_group(title=cls.__name__) 106 | group.add_argument( 107 | "--model.classification.mitv2.attn-dropout", 108 | type=float, 109 | default=0.0, 110 | help="Dropout in attention layer. Defaults to 0.0", 111 | ) 112 | group.add_argument( 113 | "--model.classification.mitv2.ffn-dropout", 114 | type=float, 115 | default=0.0, 116 | help="Dropout between FFN layers. Defaults to 0.0", 117 | ) 118 | group.add_argument( 119 | "--model.classification.mitv2.dropout", 120 | type=float, 121 | default=0.0, 122 | help="Dropout in attention layer. Defaults to 0.0", 123 | ) 124 | group.add_argument( 125 | "--model.classification.mitv2.width-multiplier", 126 | type=float, 127 | default=1.0, 128 | help="Width multiplier. Defaults to 1.0", 129 | ) 130 | group.add_argument( 131 | "--model.classification.mitv2.attn-norm-layer", 132 | type=str, 133 | default="layer_norm_2d", 134 | help="Norm layer in attention block. Defaults to LayerNorm", 135 | ) 136 | return parser 137 | 138 | def _make_layer( 139 | self, opts, input_channel, cfg: Dict, dilate: Optional[bool] = False 140 | ) -> Tuple[nn.Sequential, int]: 141 | block_type = cfg.get("block_type", "mobilevit") 142 | if block_type.lower() == "mobilevit": 143 | return self._make_mit_layer( 144 | opts=opts, input_channel=input_channel, cfg=cfg, dilate=dilate 145 | ) 146 | else: 147 | return self._make_mobilenet_layer( 148 | opts=opts, input_channel=input_channel, cfg=cfg 149 | ) 150 | 151 | @staticmethod 152 | def _make_mobilenet_layer( 153 | opts, input_channel: int, cfg: Dict 154 | ) -> Tuple[nn.Sequential, int]: 155 | output_channels = cfg.get("out_channels") 156 | num_blocks = cfg.get("num_blocks", 2) 157 | expand_ratio = cfg.get("expand_ratio", 4) 158 | block = [] 159 | 160 | for i in range(num_blocks): 161 | stride = cfg.get("stride", 1) if i == 0 else 1 162 | 163 | layer = InvertedResidual( 164 | opts=opts, 165 | in_channels=input_channel, 166 | out_channels=output_channels, 167 | stride=stride, 168 | expand_ratio=expand_ratio, 169 | ) 170 | block.append(layer) 171 | input_channel = output_channels 172 | return nn.Sequential(*block), input_channel 173 | 174 | def _make_mit_layer( 175 | self, opts, input_channel, cfg: Dict, dilate: Optional[bool] = False 176 | ) -> Tuple[nn.Sequential, int]: 177 | prev_dilation = self.dilation 178 | block = [] 179 | stride = cfg.get("stride", 1) 180 | 181 | if stride == 2: 182 | if dilate: 183 | self.dilation *= 2 184 | stride = 1 185 | 186 | layer = InvertedResidual( 187 | opts=opts, 188 | in_channels=input_channel, 189 | out_channels=cfg.get("out_channels"), 190 | stride=stride, 191 | expand_ratio=cfg.get("mv_expand_ratio", 4), 192 | dilation=prev_dilation, 193 | ) 194 | 195 | block.append(layer) 196 | input_channel = cfg.get("out_channels") 197 | 198 | attn_unit_dim = cfg["attn_unit_dim"] 199 | ffn_multiplier = cfg.get("ffn_multiplier") 200 | 201 | dropout = getattr(opts, "model.classification.mitv2.dropout", 0.0) 202 | 203 | block.append( 204 | Block( 205 | opts=opts, 206 | in_channels=input_channel, 207 | attn_unit_dim=attn_unit_dim, 208 | ffn_multiplier=ffn_multiplier, 209 | n_attn_blocks=cfg.get("attn_blocks", 1), 210 | patch_h=cfg.get("patch_h", 2), 211 | patch_w=cfg.get("patch_w", 2), 212 | dropout=dropout, 213 | ffn_dropout=getattr( 214 | opts, "model.classification.mitv2.ffn_dropout", 0.0 215 | ), 216 | attn_dropout=getattr( 217 | opts, "model.classification.mitv2.attn_dropout", 0.0 218 | ), 219 | conv_ksize=3, 220 | attn_norm_layer=getattr( 221 | opts, "model.classification.mitv2.attn_norm_layer", "layer_norm_2d" 222 | ), 223 | dilation=self.dilation, 224 | ) 225 | ) 226 | 227 | return nn.Sequential(*block), input_channel 228 | -------------------------------------------------------------------------------- /src/PyTorch_CIFAR10/cifar10_models/googlenet.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import namedtuple 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | __all__ = ["GoogLeNet", "googlenet"] 9 | 10 | 11 | _GoogLeNetOuputs = namedtuple( 12 | "GoogLeNetOuputs", ["logits", "aux_logits2", "aux_logits1"] 13 | ) 14 | 15 | 16 | def googlenet(pretrained=False, progress=True, device="cpu", **kwargs): 17 | r"""GoogLeNet (Inception v1) model architecture from 18 | `"Going Deeper with Convolutions" `_. 19 | 20 | Args: 21 | pretrained (bool): If True, returns a model pre-trained on ImageNet 22 | progress (bool): If True, displays a progress bar of the download to stderr 23 | aux_logits (bool): If True, adds two auxiliary branches that can improve training. 24 | Default: *False* when pretrained is True otherwise *True* 25 | transform_input (bool): If True, preprocesses the input according to the method with which it 26 | was trained on ImageNet. Default: *False* 27 | """ 28 | model = GoogLeNet() 29 | if pretrained: 30 | script_dir = os.path.dirname(__file__) 31 | state_dict = torch.load( 32 | script_dir + "/state_dicts/googlenet.pt", map_location=device 33 | ) 34 | model.load_state_dict(state_dict) 35 | return model 36 | 37 | 38 | class GoogLeNet(nn.Module): 39 | 40 | # CIFAR10: aux_logits True->False 41 | def __init__(self, num_classes=10, aux_logits=False, transform_input=False): 42 | super(GoogLeNet, self).__init__() 43 | self.aux_logits = aux_logits 44 | self.transform_input = transform_input 45 | 46 | # CIFAR10: out_channels 64->192, kernel_size 7->3, stride 2->1, padding 3->1 47 | self.conv1 = BasicConv2d(3, 192, kernel_size=3, stride=1, padding=1) 48 | # self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True) 49 | # self.conv2 = BasicConv2d(64, 64, kernel_size=1) 50 | # self.conv3 = BasicConv2d(64, 192, kernel_size=3, padding=1) 51 | # self.maxpool2 = nn.MaxPool2d(3, stride=2, ceil_mode=True) 52 | # END 53 | 54 | self.inception3a = Inception(192, 64, 96, 128, 16, 32, 32) 55 | self.inception3b = Inception(256, 128, 128, 192, 32, 96, 64) 56 | 57 | # CIFAR10: padding 0->1, ciel_model True->False 58 | self.maxpool3 = nn.MaxPool2d(3, stride=2, padding=1, ceil_mode=False) 59 | # END 60 | 61 | self.inception4a = Inception(480, 192, 96, 208, 16, 48, 64) 62 | self.inception4b = Inception(512, 160, 112, 224, 24, 64, 64) 63 | self.inception4c = Inception(512, 128, 128, 256, 24, 64, 64) 64 | self.inception4d = Inception(512, 112, 144, 288, 32, 64, 64) 65 | self.inception4e = Inception(528, 256, 160, 320, 32, 128, 128) 66 | 67 | # CIFAR10: kernel_size 2->3, padding 0->1, ciel_model True->False 68 | self.maxpool4 = nn.MaxPool2d(3, stride=2, padding=1, ceil_mode=False) 69 | # END 70 | 71 | self.inception5a = Inception(832, 256, 160, 320, 32, 128, 128) 72 | self.inception5b = Inception(832, 384, 192, 384, 48, 128, 128) 73 | 74 | if aux_logits: 75 | self.aux1 = InceptionAux(512, num_classes) 76 | self.aux2 = InceptionAux(528, num_classes) 77 | 78 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 79 | self.dropout = nn.Dropout(0.2) 80 | self.fc = nn.Linear(1024, num_classes) 81 | 82 | # if init_weights: 83 | # self._initialize_weights() 84 | 85 | # def _initialize_weights(self): 86 | # for m in self.modules(): 87 | # if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): 88 | # import scipy.stats as stats 89 | # X = stats.truncnorm(-2, 2, scale=0.01) 90 | # values = torch.as_tensor(X.rvs(m.weight.numel()), dtype=m.weight.dtype) 91 | # values = values.view(m.weight.size()) 92 | # with torch.no_grad(): 93 | # m.weight.copy_(values) 94 | # elif isinstance(m, nn.BatchNorm2d): 95 | # nn.init.constant_(m.weight, 1) 96 | # nn.init.constant_(m.bias, 0) 97 | 98 | def forward(self, x): 99 | if self.transform_input: 100 | x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 101 | x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 102 | x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 103 | x = torch.cat((x_ch0, x_ch1, x_ch2), 1) 104 | 105 | # N x 3 x 224 x 224 106 | x = self.conv1(x) 107 | 108 | # CIFAR10 109 | # N x 64 x 112 x 112 110 | # x = self.maxpool1(x) 111 | # N x 64 x 56 x 56 112 | # x = self.conv2(x) 113 | # N x 64 x 56 x 56 114 | # x = self.conv3(x) 115 | # N x 192 x 56 x 56 116 | # x = self.maxpool2(x) 117 | # END 118 | 119 | # N x 192 x 28 x 28 120 | x = self.inception3a(x) 121 | # N x 256 x 28 x 28 122 | x = self.inception3b(x) 123 | # N x 480 x 28 x 28 124 | x = self.maxpool3(x) 125 | # N x 480 x 14 x 14 126 | x = self.inception4a(x) 127 | # N x 512 x 14 x 14 128 | if self.training and self.aux_logits: 129 | aux1 = self.aux1(x) 130 | 131 | x = self.inception4b(x) 132 | # N x 512 x 14 x 14 133 | x = self.inception4c(x) 134 | # N x 512 x 14 x 14 135 | x = self.inception4d(x) 136 | # N x 528 x 14 x 14 137 | if self.training and self.aux_logits: 138 | aux2 = self.aux2(x) 139 | 140 | x = self.inception4e(x) 141 | # N x 832 x 14 x 14 142 | x = self.maxpool4(x) 143 | # N x 832 x 7 x 7 144 | x = self.inception5a(x) 145 | # N x 832 x 7 x 7 146 | x = self.inception5b(x) 147 | # N x 1024 x 7 x 7 148 | 149 | x = self.avgpool(x) 150 | # N x 1024 x 1 x 1 151 | x = x.view(x.size(0), -1) 152 | # N x 1024 153 | x = self.dropout(x) 154 | x = self.fc(x) 155 | # N x 1000 (num_classes) 156 | if self.training and self.aux_logits: 157 | return _GoogLeNetOuputs(x, aux2, aux1) 158 | return x 159 | 160 | 161 | class Inception(nn.Module): 162 | def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj): 163 | super(Inception, self).__init__() 164 | 165 | self.branch1 = BasicConv2d(in_channels, ch1x1, kernel_size=1) 166 | 167 | self.branch2 = nn.Sequential( 168 | BasicConv2d(in_channels, ch3x3red, kernel_size=1), 169 | BasicConv2d(ch3x3red, ch3x3, kernel_size=3, padding=1), 170 | ) 171 | 172 | self.branch3 = nn.Sequential( 173 | BasicConv2d(in_channels, ch5x5red, kernel_size=1), 174 | BasicConv2d(ch5x5red, ch5x5, kernel_size=3, padding=1), 175 | ) 176 | 177 | self.branch4 = nn.Sequential( 178 | nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=True), 179 | BasicConv2d(in_channels, pool_proj, kernel_size=1), 180 | ) 181 | 182 | def forward(self, x): 183 | branch1 = self.branch1(x) 184 | branch2 = self.branch2(x) 185 | branch3 = self.branch3(x) 186 | branch4 = self.branch4(x) 187 | 188 | outputs = [branch1, branch2, branch3, branch4] 189 | return torch.cat(outputs, 1) 190 | 191 | 192 | class InceptionAux(nn.Module): 193 | def __init__(self, in_channels, num_classes): 194 | super(InceptionAux, self).__init__() 195 | self.conv = BasicConv2d(in_channels, 128, kernel_size=1) 196 | 197 | self.fc1 = nn.Linear(2048, 1024) 198 | self.fc2 = nn.Linear(1024, num_classes) 199 | 200 | def forward(self, x): 201 | # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14 202 | x = F.adaptive_avg_pool2d(x, (4, 4)) 203 | # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4 204 | x = self.conv(x) 205 | # N x 128 x 4 x 4 206 | x = x.view(x.size(0), -1) 207 | # N x 2048 208 | x = F.relu(self.fc1(x), inplace=True) 209 | # N x 2048 210 | x = F.dropout(x, 0.7, training=self.training) 211 | # N x 2048 212 | x = self.fc2(x) 213 | # N x 1024 214 | 215 | return x 216 | 217 | 218 | class BasicConv2d(nn.Module): 219 | def __init__(self, in_channels, out_channels, **kwargs): 220 | super(BasicConv2d, self).__init__() 221 | self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs) 222 | self.bn = nn.BatchNorm2d(out_channels, eps=0.001) 223 | 224 | def forward(self, x): 225 | x = self.conv(x) 226 | x = self.bn(x) 227 | return F.relu(x, inplace=True) 228 | -------------------------------------------------------------------------------- /src/resnet_model_2.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Modified from https://raw.githubusercontent.com/pytorch/vision/v0.9.1/torchvision/models/resnet.py 3 | 4 | BSD 3-Clause License 5 | 6 | Copyright (c) Soumith Chintala 2016, 7 | All rights reserved. 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are met: 11 | 12 | * Redistributions of source code must retain the above copyright notice, this 13 | list of conditions and the following disclaimer. 14 | 15 | * Redistributions in binary form must reproduce the above copyright notice, 16 | this list of conditions and the following disclaimer in the documentation 17 | and/or other materials provided with the distribution. 18 | 19 | * Neither the name of the copyright holder nor the names of its 20 | contributors may be used to endorse or promote products derived from 21 | this software without specific prior written permission. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 26 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 27 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 29 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | ''' 34 | import sys 35 | import torch.nn as nn 36 | try: 37 | from torch.hub import load_state_dict_from_url 38 | except ImportError: 39 | from torch.utils.model_zoo import load_url as load_state_dict_from_url 40 | 41 | from functools import partial 42 | from typing import Dict, Type, Any, Callable, Union, List, Optional 43 | from torch import Tensor 44 | 45 | 46 | cifar10_pretrained_weight_urls = { 47 | 'resnet20': 'https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar10_resnet20-4118986f.pt', 48 | 'resnet32': 'https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar10_resnet32-ef93fc4d.pt', 49 | 'resnet44': 'https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar10_resnet44-2a3cabcb.pt', 50 | 'resnet56': 'https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar10_resnet56-187c023a.pt', 51 | } 52 | 53 | cifar100_pretrained_weight_urls = { 54 | 'resnet20': 'https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar100_resnet20-23dac2f1.pt', 55 | 'resnet32': 'https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar100_resnet32-84213ce6.pt', 56 | 'resnet44': 'https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar100_resnet44-ffe32858.pt', 57 | 'resnet56': 'https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar100_resnet56-f2eff4c8.pt', 58 | } 59 | 60 | 61 | def conv3x3(in_planes, out_planes, stride=1): 62 | """3x3 convolution with padding""" 63 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) 64 | 65 | 66 | def conv1x1(in_planes, out_planes, stride=1): 67 | """1x1 convolution""" 68 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 69 | 70 | 71 | class BasicBlock(nn.Module): 72 | expansion: int = 1 73 | 74 | 75 | def __init__( 76 | self, 77 | inplanes: int, 78 | planes: int, 79 | stride: int = 1, 80 | downsample: Optional[nn.Module] = None, 81 | groups: int = 1, 82 | base_width: int = 64, 83 | dilation: int = 1, 84 | norm_layer: Optional[Callable[..., nn.Module]] = None 85 | ) -> None: 86 | super(BasicBlock, self).__init__() 87 | if norm_layer is None: 88 | norm_layer = nn.BatchNorm2d 89 | if groups != 1 or base_width != 64: 90 | raise ValueError('BasicBlock only supports groups=1 and base_width=64') 91 | if dilation > 1: 92 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock") 93 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1 94 | self.conv1 = conv3x3(inplanes, planes, stride) 95 | self.bn1 = norm_layer(planes) 96 | self.relu1 = nn.ReLU(inplace=True) 97 | self.conv2 = conv3x3(planes, planes) 98 | self.bn2 = norm_layer(planes) 99 | self.relu2 = nn.ReLU(inplace=True) 100 | self.downsample = downsample 101 | self.stride = stride 102 | self.skip_add = nn.quantized.FloatFunctional() 103 | 104 | 105 | def forward(self, x: Tensor) -> Tensor: 106 | identity = x 107 | 108 | out = self.conv1(x) 109 | out = self.bn1(out) 110 | out = self.relu1(out) 111 | 112 | out = self.conv2(out) 113 | out = self.bn2(out) 114 | 115 | if self.downsample is not None: 116 | identity = self.downsample(x) 117 | 118 | #out += identity 119 | out = self.skip_add.add(out, identity) 120 | out = self.relu2(out) 121 | 122 | return out 123 | 124 | class CifarResNet(nn.Module): 125 | 126 | def __init__(self, block, layers, num_classes=10): 127 | super(CifarResNet, self).__init__() 128 | self.inplanes = 16 129 | self.conv1 = conv3x3(3, 16) 130 | self.bn1 = nn.BatchNorm2d(16) 131 | self.relu = nn.ReLU(inplace=True) 132 | 133 | self.layer1 = self._make_layer(block, 16, layers[0]) 134 | self.layer2 = self._make_layer(block, 32, layers[1], stride=2) 135 | self.layer3 = self._make_layer(block, 64, layers[2], stride=2) 136 | 137 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 138 | self.fc = nn.Linear(64 * block.expansion, num_classes) 139 | 140 | for m in self.modules(): 141 | if isinstance(m, nn.Conv2d): 142 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 143 | elif isinstance(m, nn.BatchNorm2d): 144 | nn.init.constant_(m.weight, 1) 145 | nn.init.constant_(m.bias, 0) 146 | 147 | def _make_layer(self, block, planes, blocks, stride=1): 148 | downsample = None 149 | if stride != 1 or self.inplanes != planes * block.expansion: 150 | downsample = nn.Sequential( 151 | conv1x1(self.inplanes, planes * block.expansion, stride), 152 | nn.BatchNorm2d(planes * block.expansion), 153 | ) 154 | 155 | layers = [] 156 | layers.append(block(self.inplanes, planes, stride, downsample)) 157 | self.inplanes = planes * block.expansion 158 | for _ in range(1, blocks): 159 | layers.append(block(self.inplanes, planes)) 160 | 161 | return nn.Sequential(*layers) 162 | 163 | def forward(self, x): 164 | x = self.conv1(x) 165 | x = self.bn1(x) 166 | x = self.relu(x) 167 | 168 | x = self.layer1(x) 169 | x = self.layer2(x) 170 | x = self.layer3(x) 171 | 172 | x = self.avgpool(x) 173 | x = x.view(x.size(0), -1) 174 | x = self.fc(x) 175 | 176 | return x 177 | 178 | 179 | def _resnet( 180 | arch: str, 181 | layers: List[int], 182 | model_urls: Dict[str, str], 183 | progress: bool = True, 184 | pretrained: bool = False, 185 | **kwargs: Any 186 | ) -> CifarResNet: 187 | model = CifarResNet(BasicBlock, layers, **kwargs) 188 | if pretrained: 189 | state_dict = load_state_dict_from_url(model_urls[arch], 190 | progress=progress) 191 | model.load_state_dict(state_dict) 192 | return model 193 | 194 | 195 | def cifar10_resnet20(*args, **kwargs) -> CifarResNet: pass 196 | def cifar10_resnet32(*args, **kwargs) -> CifarResNet: pass 197 | def cifar10_resnet44(*args, **kwargs) -> CifarResNet: pass 198 | def cifar10_resnet56(*args, **kwargs) -> CifarResNet: pass 199 | 200 | 201 | def cifar100_resnet20(*args, **kwargs) -> CifarResNet: pass 202 | def cifar100_resnet32(*args, **kwargs) -> CifarResNet: pass 203 | def cifar100_resnet44(*args, **kwargs) -> CifarResNet: pass 204 | def cifar100_resnet56(*args, **kwargs) -> CifarResNet: pass 205 | 206 | 207 | thismodule = sys.modules[__name__] 208 | for dataset in ["cifar10", "cifar100"]: 209 | for layers, model_name in zip([[3]*3, [5]*3, [7]*3, [9]*3], 210 | ["resnet20", "resnet32", "resnet44", "resnet56"]): 211 | method_name = f"{dataset}_{model_name}" 212 | model_urls = cifar10_pretrained_weight_urls if dataset == "cifar10" else cifar100_pretrained_weight_urls 213 | num_classes = 10 if dataset == "cifar10" else 100 214 | setattr( 215 | thismodule, 216 | method_name, 217 | partial(_resnet, 218 | arch=model_name, 219 | layers=layers, 220 | model_urls=model_urls, 221 | num_classes=num_classes) 222 | ) 223 | -------------------------------------------------------------------------------- /src/CT_cvnet.py: -------------------------------------------------------------------------------- 1 | from util import * 2 | from custom_module import * 3 | from pretrained_model import * 4 | import numpy as np 5 | global_config = load_model_yaml("./global_config/", "global_config.yaml") 6 | 7 | def generate_layer_input_data(model: nn.Module , layer_nest_dict, directory_path, train_data_loader): 8 | if(not os.path.exists(directory_path)): 9 | os.mkdir(directory_path) 10 | data_type = "_input" 11 | for key in layer_nest_dict: 12 | my_model = copy.deepcopy(model) 13 | layer_name = key 14 | print("name: " + layer_name) 15 | collection_layer = Input_data_collection_layer(layer_name, access_layer(my_model, layer_name)) 16 | replace_layer(my_model, layer_name, collection_layer) 17 | run_set(my_model, train_data_loader, "cuda:0") 18 | access_layer(my_model, layer_name).save(directory_path, layer_name + data_type + ".pt") 19 | data = torch.load(directory_path + layer_name + data_type + ".pt") 20 | print(data.shape) 21 | 22 | 23 | def generate_data_set(dirctory_path , layer_nest_dict, split_point): 24 | train_path = "train/" 25 | valid_path = "val/" 26 | if(not os.path.exists(dirctory_path + train_path)): 27 | os.mkdir(dirctory_path + train_path) 28 | if(not os.path.exists(dirctory_path + valid_path)): 29 | os.mkdir(dirctory_path + valid_path) 30 | 31 | for key in layer_nest_dict: 32 | data_type = "_input" 33 | layer_name = key 34 | file_name = layer_name + data_type + ".pt" 35 | print(layer_name) 36 | data = torch.load(dirctory_path + file_name) 37 | b=torch.randperm(data.shape[0]) 38 | data = data[b] 39 | train_data = data[0:split_point] 40 | valid_data = data[split_point:data.shape[0]] 41 | torch.save(train_data, dirctory_path + train_path + file_name) 42 | torch.save(valid_data, dirctory_path + valid_path + file_name) 43 | print(train_data.shape) 44 | print(valid_data.shape) 45 | 46 | 47 | def data_collection(model, valid_data_loader, train_data_loader, split_point, input_data_save_path): 48 | sign_nest_dict = generate_sign_nest_dict(model) 49 | validate(model, valid_data_loader) 50 | generate_layer_input_data(model, sign_nest_dict, input_data_save_path, train_data_loader) 51 | generate_data_set(input_data_save_path , sign_nest_dict, split_point) 52 | 53 | 54 | def CT_train(sign_type, degree, sign_scale, scale_path, sign_nest_dict,batch_size, input_data_dirctory, output_floder_suffix, epoch = 40): 55 | print(sign_type) 56 | for key in sign_nest_dict: 57 | sign_dict = sign_nest_dict[key] 58 | train_path = "train/" 59 | val_path = "val/" 60 | data_type = "_input" 61 | file_name = key + data_type + ".pt" 62 | 63 | input_data = torch.load(input_data_dirctory + "cvnet_work"+file_name) 64 | print(input_data.shape) 65 | num_features = input_data.shape[1] 66 | min_data = torch.min(input_data) 67 | max_data = torch.max(input_data) 68 | end_point = max(abs(min_data), abs(max_data)) 69 | print(f"end point: {end_point}") 70 | del input_data 71 | init_coef = generate_init_coeffcients(sig_odd, degree, -end_point, end_point, scale=1) 72 | print(f"coef: {torch.tensor([init_coef])}") 73 | 74 | sign_module = Sigmoid_minmax_layer(coef=torch.tensor([init_coef]), degree=[(degree+1)//2],scale=sign_scale) 75 | print("name: ", key) 76 | my_model = SiLU_minmax_bn_layer(sigmoid=sign_module, num_features=num_features) 77 | ref_model = nn.SiLU() 78 | 79 | 80 | 81 | 82 | # test_input = ((torch.rand(100) - 0.5) * 100).to("cuda:0") 83 | # test_output = my_model.forward(copy.deepcopy(test_input)) 84 | # test_output_ref = ref_model.forward(copy.deepcopy(test_input)) 85 | # print(test_input) 86 | # print(test_output) 87 | 88 | 89 | # test_input_list = test_input.to("cpu").tolist() 90 | # test_output_list = test_output.to("cpu").tolist() 91 | # test_output_list_ref = test_output_ref.to("cpu").tolist() 92 | 93 | # test_coef = generate_init_coeffcients_numpy(sig_odd, degree, -end_point, end_point, scale=1) 94 | # pr = np.poly1d(test_coef) 95 | 96 | # plt.plot(test_input_list, test_output_list_ref, '.', test_input_list,test_output_list,'.', markersize=10) 97 | # plt.plot(test_input_list,test_input_list * (pr(test_input_list) + 0.5), '.',markersize = 4) 98 | # plt.ylim(-2, 50) 99 | # plt.savefig("test") 100 | # plt.show() 101 | 102 | 103 | 104 | 105 | 106 | 107 | optimizer = torch.optim.Adam(params=my_model.parameters(), lr=0.01, weight_decay=0) 108 | scheduler = ReduceLROnPlateau(optimizer, 'min', patience = 2, threshold= 1e-8, min_lr= 1e-4) 109 | 110 | print(file_name) 111 | train_data = torch.load(input_data_dirctory + train_path + file_name) 112 | valid_data = torch.load(input_data_dirctory + val_path + file_name) 113 | for epoch_i in range(epoch): 114 | train_loss_meter = AverageMeter("train loss") 115 | val_loss_meter = AverageMeter("val loss") 116 | #train 117 | for batch_i in range(int(train_data.shape[0] / batch_size)): 118 | x = train_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0") 119 | target_y = ref_model.to("cuda:0").forward(x) 120 | actual_y = my_model.forward(x) 121 | loss_fun = nn.MSELoss() 122 | my_model.zero_grad() 123 | loss = loss_fun(actual_y, target_y) 124 | train_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0]) 125 | loss.backward() 126 | optimizer.step() 127 | train_loss = train_loss_meter.avg 128 | 129 | #valid 130 | for batch_i in range(int(valid_data.shape[0] / batch_size)): 131 | x = valid_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0") 132 | target_y = ref_model.to("cuda:0").forward(x) 133 | actual_y = my_model.forward(x) 134 | loss_fun = nn.MSELoss() 135 | loss = loss_fun(actual_y, target_y) 136 | val_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0]) 137 | val_loss = val_loss_meter.avg 138 | 139 | scheduler.step(val_loss) 140 | 141 | print( 142 | f"Epoch:{epoch_i + 1}" 143 | + f" Train Loss:{train_loss:.10f}" 144 | + f" Val Loss: {val_loss:.10f}" 145 | ) 146 | 147 | folder_name = "CT_" + sign_type + "_S" + output_floder_suffix+"_40s/" 148 | coef_save_dirctory = input_data_dirctory + folder_name 149 | if(not os.path.exists(coef_save_dirctory)): 150 | os.mkdir(coef_save_dirctory) 151 | file_name = key + "_coef.pt" 152 | my_model.sigmoid.save_coef(coef_save_dirctory + file_name) 153 | print("save: " + folder_name + file_name) 154 | print("\n") 155 | 156 | def CT_val(model: nn.Module , layer_nest_dict, directory_path, val_data_loader, sign_type, output_floder_suffix): 157 | if(not os.path.exists(directory_path)): 158 | os.mkdir(directory_path) 159 | folder_name = "CT_" + sign_type + "_S" + output_floder_suffix+"_40s/" 160 | for key in layer_nest_dict: 161 | file_name = key + "_coef.pt" 162 | coef = torch.load(directory_path + folder_name + file_name) 163 | degree = len(coef.tolist()[0]) 164 | sign_module_CT = Sigmoid_minmax_layer(coef=coef, degree=[degree],scale=1) 165 | rlays = SiLU_minmax_layer(sigmoid=sign_module_CT) 166 | layer_name = key 167 | print("name: " + layer_name) 168 | print(access_layer(model, layer_name)) 169 | replace_layer(model, layer_name, rlays) 170 | validate(model, val_data_loader) 171 | 172 | 173 | 174 | 175 | 176 | if __name__ == "__main__": 177 | parser = ArgumentParser() 178 | parser.add_argument("--model", type=str,choices=["vgg19_bn", "resnet18", "resnet32", "mobileVitV2"]) 179 | parser.add_argument("--dataset", type=str,choices=["cifar10", "cifar100", "imagenet_1k"]) 180 | parser.add_argument("-st","--sign_type", type=str, choices=["a7", "2f12g1", "f1g2", "f2g2", "f2g3", "polyfit"]) 181 | parser.add_argument("-dc","--data_collection", type=bool, default=False, choices=[True , False]) 182 | parser.add_argument("-wd", "--working_directory", type=str, default="./working_directory/") 183 | 184 | args = parser.parse_args() 185 | print(args) 186 | if(args.dataset == "cifar10" or args.dataset == "cifar100"): 187 | split_point = 45000 188 | batch_size = 100 189 | elif(args.dataset == "imagenet_1k"): 190 | split_point = 900 191 | batch_size = 50 192 | model = get_pretrained_model(model_name=args.model, dataset=args.dataset) 193 | # print(model) 194 | 195 | valid_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "valid", data_dir = global_config["Global"]["dataset_dirctory"]) 196 | train_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "train", data_dir = global_config["Global"]["dataset_dirctory"] ) 197 | 198 | if(args.data_collection): 199 | data_collection(model = model, 200 | 201 | split_point = split_point, input_data_save_path = args.working_directory) 202 | 203 | else: 204 | nest_dict = generate_sign_nest_dict(model) 205 | CT_train(sign_type = args.sign_type, degree=7, sign_scale = 1, scale_path= None, sign_nest_dict = nest_dict,batch_size = batch_size, 206 | input_data_dirctory = args.working_directory , output_floder_suffix= "polyfit_bn_o7", epoch=0) 207 | # CT_val(model=model, layer_nest_dict=nest_dict, directory_path=args.working_directory, 208 | # val_data_loader = valid_data_loader,sign_type=args.sign_type, output_floder_suffix="polyfit_o") 209 | -------------------------------------------------------------------------------- /src/CT_cvnet_bn.py: -------------------------------------------------------------------------------- 1 | from util import * 2 | from custom_module import * 3 | from pretrained_model import * 4 | import numpy as np 5 | global_config = load_model_yaml("./global_config/", "global_config.yaml") 6 | 7 | def generate_layer_input_data(model: nn.Module , layer_nest_dict, directory_path, train_data_loader): 8 | if(not os.path.exists(directory_path)): 9 | os.mkdir(directory_path) 10 | data_type = "_input" 11 | for key in layer_nest_dict: 12 | my_model = copy.deepcopy(model) 13 | layer_name = key 14 | print("name: " + layer_name) 15 | collection_layer = Input_data_collection_layer(layer_name, access_layer(my_model, layer_name)) 16 | replace_layer(my_model, layer_name, collection_layer) 17 | run_set(my_model, train_data_loader, "cuda:0") 18 | access_layer(my_model, layer_name).save(directory_path, layer_name + data_type + ".pt") 19 | data = torch.load(directory_path + layer_name + data_type + ".pt") 20 | print(data.shape) 21 | 22 | 23 | def generate_data_set(dirctory_path , layer_nest_dict, split_point): 24 | train_path = "train/" 25 | valid_path = "val/" 26 | if(not os.path.exists(dirctory_path + train_path)): 27 | os.mkdir(dirctory_path + train_path) 28 | if(not os.path.exists(dirctory_path + valid_path)): 29 | os.mkdir(dirctory_path + valid_path) 30 | 31 | for key in layer_nest_dict: 32 | data_type = "_input" 33 | layer_name = key 34 | file_name = layer_name + data_type + ".pt" 35 | print(layer_name) 36 | data = torch.load(dirctory_path + file_name) 37 | b=torch.randperm(data.shape[0]) 38 | data = data[b] 39 | train_data = data[0:split_point] 40 | valid_data = data[split_point:data.shape[0]] 41 | torch.save(train_data, dirctory_path + train_path + file_name) 42 | torch.save(valid_data, dirctory_path + valid_path + file_name) 43 | print(train_data.shape) 44 | print(valid_data.shape) 45 | 46 | 47 | def data_collection(model, valid_data_loader, train_data_loader, split_point, input_data_save_path): 48 | sign_nest_dict = generate_sign_nest_dict(model) 49 | validate(model, valid_data_loader) 50 | generate_layer_input_data(model, sign_nest_dict, input_data_save_path, train_data_loader) 51 | generate_data_set(input_data_save_path , sign_nest_dict, split_point) 52 | 53 | 54 | def CT_train(sign_type, degree, sign_scale, scale_path, sign_nest_dict,batch_size, input_data_dirctory, output_floder_suffix, epoch = 40): 55 | print(sign_type) 56 | for key in sign_nest_dict: 57 | sign_dict = sign_nest_dict[key] 58 | train_path = "train/" 59 | val_path = "val/" 60 | data_type = "_input" 61 | file_name = key + data_type + ".pt" 62 | 63 | input_data = torch.load(input_data_dirctory + "cvnet_work"+file_name) 64 | print(input_data.shape) 65 | num_features = input_data.shape[1] 66 | min_data = torch.min(input_data) 67 | max_data = torch.max(input_data) 68 | end_point = max(abs(min_data), abs(max_data)) 69 | print(f"end point: {end_point}") 70 | del input_data 71 | init_coef = generate_init_coeffcients(sig_odd, degree, -end_point, end_point, scale=1) 72 | print(f"coef: {torch.tensor([init_coef])}") 73 | 74 | sign_module = Sigmoid_minmax_layer(coef=torch.tensor([init_coef]), degree=[(degree+1)//2],scale=sign_scale) 75 | print("name: ", key) 76 | my_model = SiLU_minmax_bn_layer(sigmoid=sign_module, num_features=num_features) 77 | ref_model = nn.SiLU() 78 | 79 | 80 | 81 | 82 | # test_input = ((torch.rand(100) - 0.5) * 100).to("cuda:0") 83 | # test_output = my_model.forward(copy.deepcopy(test_input)) 84 | # test_output_ref = ref_model.forward(copy.deepcopy(test_input)) 85 | # print(test_input) 86 | # print(test_output) 87 | 88 | 89 | # test_input_list = test_input.to("cpu").tolist() 90 | # test_output_list = test_output.to("cpu").tolist() 91 | # test_output_list_ref = test_output_ref.to("cpu").tolist() 92 | 93 | # test_coef = generate_init_coeffcients_numpy(sig_odd, degree, -end_point, end_point, scale=1) 94 | # pr = np.poly1d(test_coef) 95 | 96 | # plt.plot(test_input_list, test_output_list_ref, '.', test_input_list,test_output_list,'.', markersize=10) 97 | # plt.plot(test_input_list,test_input_list * (pr(test_input_list) + 0.5), '.',markersize = 4) 98 | # plt.ylim(-2, 50) 99 | # plt.savefig("test") 100 | # plt.show() 101 | 102 | 103 | 104 | 105 | 106 | 107 | optimizer = torch.optim.Adam(params=my_model.parameters(), lr=0.01, weight_decay=0) 108 | scheduler = ReduceLROnPlateau(optimizer, 'min', patience = 2, threshold= 1e-8, min_lr= 1e-4) 109 | 110 | print(file_name) 111 | train_data = torch.load(input_data_dirctory + train_path + file_name) 112 | valid_data = torch.load(input_data_dirctory + val_path + file_name) 113 | for epoch_i in range(epoch): 114 | train_loss_meter = AverageMeter("train loss") 115 | val_loss_meter = AverageMeter("val loss") 116 | #train 117 | for batch_i in range(int(train_data.shape[0] / batch_size)): 118 | x = train_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0") 119 | target_y = ref_model.to("cuda:0").forward(x) 120 | actual_y = my_model.forward(x) 121 | loss_fun = nn.MSELoss() 122 | my_model.zero_grad() 123 | loss = loss_fun(actual_y, target_y) 124 | train_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0]) 125 | loss.backward() 126 | optimizer.step() 127 | train_loss = train_loss_meter.avg 128 | 129 | #valid 130 | for batch_i in range(int(valid_data.shape[0] / batch_size)): 131 | x = valid_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0") 132 | target_y = ref_model.to("cuda:0").forward(x) 133 | actual_y = my_model.forward(x) 134 | loss_fun = nn.MSELoss() 135 | loss = loss_fun(actual_y, target_y) 136 | val_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0]) 137 | val_loss = val_loss_meter.avg 138 | 139 | scheduler.step(val_loss) 140 | 141 | print( 142 | f"Epoch:{epoch_i + 1}" 143 | + f" Train Loss:{train_loss:.10f}" 144 | + f" Val Loss: {val_loss:.10f}" 145 | ) 146 | 147 | folder_name = "CT_" + sign_type + "_S" + output_floder_suffix+"_40s/" 148 | coef_save_dirctory = input_data_dirctory + folder_name 149 | if(not os.path.exists(coef_save_dirctory)): 150 | os.mkdir(coef_save_dirctory) 151 | # file_name = key + "_coef.pt" 152 | # my_model.sigmoid.save_coef(coef_save_dirctory + file_name) 153 | file_name = key + "SilU_bn.pt" 154 | torch.save(my_model, coef_save_dirctory + file_name) 155 | print("save: " + folder_name + file_name) 156 | print("\n") 157 | 158 | def CT_val(model: nn.Module , layer_nest_dict, directory_path, val_data_loader, sign_type, output_floder_suffix): 159 | if(not os.path.exists(directory_path)): 160 | os.mkdir(directory_path) 161 | folder_name = "CT_" + sign_type + "_S" + output_floder_suffix+"_40s/" 162 | for key in layer_nest_dict: 163 | file_name = key + "_coef.pt" 164 | coef = torch.load(directory_path + folder_name + file_name) 165 | degree = len(coef.tolist()[0]) 166 | sign_module_CT = Sigmoid_minmax_layer(coef=coef, degree=[degree],scale=1) 167 | rlays = SiLU_minmax_layer(sigmoid=sign_module_CT) 168 | layer_name = key 169 | print("name: " + layer_name) 170 | print(access_layer(model, layer_name)) 171 | replace_layer(model, layer_name, rlays) 172 | validate(model, val_data_loader) 173 | 174 | 175 | 176 | 177 | 178 | if __name__ == "__main__": 179 | parser = ArgumentParser() 180 | parser.add_argument("--model", type=str,choices=["vgg19_bn", "resnet18", "resnet32", "mobileVitV2"]) 181 | parser.add_argument("--dataset", type=str,choices=["cifar10", "cifar100", "imagenet_1k"]) 182 | parser.add_argument("-st","--sign_type", type=str, choices=["a7", "2f12g1", "f1g2", "f2g2", "f2g3", "polyfit"]) 183 | parser.add_argument("-dc","--data_collection", type=bool, default=False, choices=[True , False]) 184 | parser.add_argument("-wd", "--working_directory", type=str, default="./working_directory/") 185 | 186 | args = parser.parse_args() 187 | print(args) 188 | if(args.dataset == "cifar10" or args.dataset == "cifar100"): 189 | split_point = 45000 190 | batch_size = 100 191 | elif(args.dataset == "imagenet_1k"): 192 | split_point = 900 193 | batch_size = 50 194 | model = get_pretrained_model(model_name=args.model, dataset=args.dataset) 195 | # print(model) 196 | 197 | valid_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "valid", data_dir = global_config["Global"]["dataset_dirctory"]) 198 | train_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "train", data_dir = global_config["Global"]["dataset_dirctory"] ) 199 | 200 | if(args.data_collection): 201 | data_collection(model = model, 202 | 203 | split_point = split_point, input_data_save_path = args.working_directory) 204 | 205 | else: 206 | nest_dict = generate_sign_nest_dict(model) 207 | CT_train(sign_type = args.sign_type, degree=7, sign_scale = 1, scale_path= None, sign_nest_dict = nest_dict,batch_size = batch_size, 208 | input_data_dirctory = args.working_directory , output_floder_suffix= "polyfit_bn_o7", epoch=0) 209 | # CT_val(model=model, layer_nest_dict=nest_dict, directory_path=args.working_directory, 210 | # val_data_loader = valid_data_loader,sign_type=args.sign_type, output_floder_suffix="polyfit_o") 211 | -------------------------------------------------------------------------------- /src/PyTorch_CIFAR10/cifar10_models/resnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import os 4 | 5 | __all__ = [ 6 | "ResNet", 7 | "resnet18", 8 | "resnet34", 9 | "resnet50", 10 | ] 11 | 12 | 13 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): 14 | """3x3 convolution with padding""" 15 | return nn.Conv2d( 16 | in_planes, 17 | out_planes, 18 | kernel_size=3, 19 | stride=stride, 20 | padding=dilation, 21 | groups=groups, 22 | bias=False, 23 | dilation=dilation, 24 | ) 25 | 26 | 27 | def conv1x1(in_planes, out_planes, stride=1): 28 | """1x1 convolution""" 29 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 30 | 31 | 32 | class BasicBlock(nn.Module): 33 | expansion = 1 34 | 35 | def __init__( 36 | self, 37 | inplanes, 38 | planes, 39 | stride=1, 40 | downsample=None, 41 | groups=1, 42 | base_width=64, 43 | dilation=1, 44 | norm_layer=None, 45 | ): 46 | super(BasicBlock, self).__init__() 47 | if norm_layer is None: 48 | norm_layer = nn.BatchNorm2d 49 | if groups != 1 or base_width != 64: 50 | raise ValueError("BasicBlock only supports groups=1 and base_width=64") 51 | if dilation > 1: 52 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock") 53 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1 54 | self.conv1 = conv3x3(inplanes, planes, stride) 55 | self.bn1 = norm_layer(planes) 56 | self.relu = nn.ReLU(inplace=True) 57 | self.conv2 = conv3x3(planes, planes) 58 | self.bn2 = norm_layer(planes) 59 | self.downsample = downsample 60 | self.stride = stride 61 | 62 | def forward(self, x): 63 | identity = x 64 | 65 | out = self.conv1(x) 66 | out = self.bn1(out) 67 | out = self.relu(out) 68 | 69 | out = self.conv2(out) 70 | out = self.bn2(out) 71 | 72 | if self.downsample is not None: 73 | identity = self.downsample(x) 74 | 75 | out += identity 76 | out = self.relu(out) 77 | 78 | return out 79 | 80 | 81 | class Bottleneck(nn.Module): 82 | expansion = 4 83 | 84 | def __init__( 85 | self, 86 | inplanes, 87 | planes, 88 | stride=1, 89 | downsample=None, 90 | groups=1, 91 | base_width=64, 92 | dilation=1, 93 | norm_layer=None, 94 | ): 95 | super(Bottleneck, self).__init__() 96 | if norm_layer is None: 97 | norm_layer = nn.BatchNorm2d 98 | width = int(planes * (base_width / 64.0)) * groups 99 | # Both self.conv2 and self.downsample layers downsample the input when stride != 1 100 | self.conv1 = conv1x1(inplanes, width) 101 | self.bn1 = norm_layer(width) 102 | self.conv2 = conv3x3(width, width, stride, groups, dilation) 103 | self.bn2 = norm_layer(width) 104 | self.conv3 = conv1x1(width, planes * self.expansion) 105 | self.bn3 = norm_layer(planes * self.expansion) 106 | self.relu = nn.ReLU(inplace=True) 107 | self.downsample = downsample 108 | self.stride = stride 109 | 110 | def forward(self, x): 111 | identity = x 112 | 113 | out = self.conv1(x) 114 | out = self.bn1(out) 115 | out = self.relu(out) 116 | 117 | out = self.conv2(out) 118 | out = self.bn2(out) 119 | out = self.relu(out) 120 | 121 | out = self.conv3(out) 122 | out = self.bn3(out) 123 | 124 | if self.downsample is not None: 125 | identity = self.downsample(x) 126 | 127 | out += identity 128 | out = self.relu(out) 129 | 130 | return out 131 | 132 | 133 | class ResNet(nn.Module): 134 | def __init__( 135 | self, 136 | block, 137 | layers, 138 | num_classes=10, 139 | zero_init_residual=False, 140 | groups=1, 141 | width_per_group=64, 142 | replace_stride_with_dilation=None, 143 | norm_layer=None, 144 | ): 145 | super(ResNet, self).__init__() 146 | if norm_layer is None: 147 | norm_layer = nn.BatchNorm2d 148 | self._norm_layer = norm_layer 149 | 150 | self.inplanes = 64 151 | self.dilation = 1 152 | if replace_stride_with_dilation is None: 153 | # each element in the tuple indicates if we should replace 154 | # the 2x2 stride with a dilated convolution instead 155 | replace_stride_with_dilation = [False, False, False] 156 | if len(replace_stride_with_dilation) != 3: 157 | raise ValueError( 158 | "replace_stride_with_dilation should be None " 159 | "or a 3-element tuple, got {}".format(replace_stride_with_dilation) 160 | ) 161 | self.groups = groups 162 | self.base_width = width_per_group 163 | 164 | # CIFAR10: kernel_size 7 -> 3, stride 2 -> 1, padding 3->1 165 | self.conv1 = nn.Conv2d( 166 | 3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False 167 | ) 168 | # END 169 | 170 | self.bn1 = norm_layer(self.inplanes) 171 | self.relu = nn.ReLU(inplace=True) 172 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 173 | self.layer1 = self._make_layer(block, 64, layers[0]) 174 | self.layer2 = self._make_layer( 175 | block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0] 176 | ) 177 | self.layer3 = self._make_layer( 178 | block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1] 179 | ) 180 | self.layer4 = self._make_layer( 181 | block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2] 182 | ) 183 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 184 | self.fc = nn.Linear(512 * block.expansion, num_classes) 185 | 186 | for m in self.modules(): 187 | if isinstance(m, nn.Conv2d): 188 | nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") 189 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 190 | nn.init.constant_(m.weight, 1) 191 | nn.init.constant_(m.bias, 0) 192 | 193 | # Zero-initialize the last BN in each residual branch, 194 | # so that the residual branch starts with zeros, and each residual block behaves like an identity. 195 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 196 | if zero_init_residual: 197 | for m in self.modules(): 198 | if isinstance(m, Bottleneck): 199 | nn.init.constant_(m.bn3.weight, 0) 200 | elif isinstance(m, BasicBlock): 201 | nn.init.constant_(m.bn2.weight, 0) 202 | 203 | def _make_layer(self, block, planes, blocks, stride=1, dilate=False): 204 | norm_layer = self._norm_layer 205 | downsample = None 206 | previous_dilation = self.dilation 207 | if dilate: 208 | self.dilation *= stride 209 | stride = 1 210 | if stride != 1 or self.inplanes != planes * block.expansion: 211 | downsample = nn.Sequential( 212 | conv1x1(self.inplanes, planes * block.expansion, stride), 213 | norm_layer(planes * block.expansion), 214 | ) 215 | 216 | layers = [] 217 | layers.append( 218 | block( 219 | self.inplanes, 220 | planes, 221 | stride, 222 | downsample, 223 | self.groups, 224 | self.base_width, 225 | previous_dilation, 226 | norm_layer, 227 | ) 228 | ) 229 | self.inplanes = planes * block.expansion 230 | for _ in range(1, blocks): 231 | layers.append( 232 | block( 233 | self.inplanes, 234 | planes, 235 | groups=self.groups, 236 | base_width=self.base_width, 237 | dilation=self.dilation, 238 | norm_layer=norm_layer, 239 | ) 240 | ) 241 | 242 | return nn.Sequential(*layers) 243 | 244 | def forward(self, x): 245 | x = self.conv1(x) 246 | x = self.bn1(x) 247 | x = self.relu(x) 248 | x = self.maxpool(x) 249 | 250 | x = self.layer1(x) 251 | x = self.layer2(x) 252 | x = self.layer3(x) 253 | x = self.layer4(x) 254 | 255 | x = self.avgpool(x) 256 | x = x.reshape(x.size(0), -1) 257 | x = self.fc(x) 258 | 259 | return x 260 | 261 | 262 | def _resnet(arch, block, layers, pretrained, progress, device, **kwargs): 263 | model = ResNet(block, layers, **kwargs) 264 | if pretrained: 265 | script_dir = os.path.dirname(__file__) 266 | state_dict = torch.load( 267 | script_dir + "/state_dicts/" + arch + ".pt", map_location=device 268 | ) 269 | model.load_state_dict(state_dict) 270 | return model 271 | 272 | 273 | def resnet18(pretrained=False, progress=True, device="cpu", **kwargs): 274 | """Constructs a ResNet-18 model. 275 | Args: 276 | pretrained (bool): If True, returns a model pre-trained on ImageNet 277 | progress (bool): If True, displays a progress bar of the download to stderr 278 | """ 279 | return _resnet( 280 | "resnet18", BasicBlock, [2, 2, 2, 2], pretrained, progress, device, **kwargs 281 | ) 282 | 283 | 284 | def resnet34(pretrained=False, progress=True, device="cpu", **kwargs): 285 | """Constructs a ResNet-34 model. 286 | Args: 287 | pretrained (bool): If True, returns a model pre-trained on ImageNet 288 | progress (bool): If True, displays a progress bar of the download to stderr 289 | """ 290 | return _resnet( 291 | "resnet34", BasicBlock, [3, 4, 6, 3], pretrained, progress, device, **kwargs 292 | ) 293 | 294 | 295 | def resnet50(pretrained=False, progress=True, device="cpu", **kwargs): 296 | """Constructs a ResNet-50 model. 297 | Args: 298 | pretrained (bool): If True, returns a model pre-trained on ImageNet 299 | progress (bool): If True, displays a progress bar of the download to stderr 300 | """ 301 | return _resnet( 302 | "resnet50", Bottleneck, [3, 4, 6, 3], pretrained, progress, device, **kwargs 303 | ) 304 | -------------------------------------------------------------------------------- /src/CT.py: -------------------------------------------------------------------------------- 1 | from util import * 2 | from custom_module import * 3 | from pretrained_model import * 4 | global_config = load_model_yaml("./global_config/", "global_config.yaml") 5 | 6 | def generate_layer_input_data(model: nn.Module , layer_nest_dict, directory_path, train_data_loader): 7 | if(not os.path.exists(directory_path)): 8 | os.mkdir(directory_path) 9 | data_type = "_input" 10 | for key in layer_nest_dict: 11 | my_model = copy.deepcopy(model) 12 | layer_name = key 13 | print("name: " + layer_name) 14 | collection_layer = Input_data_collection_layer(layer_name, access_layer(my_model, layer_name)) 15 | replace_layer(my_model, layer_name, collection_layer) 16 | run_set(my_model, train_data_loader, "cuda:0") 17 | access_layer(my_model, layer_name).save(directory_path, layer_name + data_type + ".pt") 18 | data = torch.load(directory_path + layer_name + data_type + ".pt") 19 | print(data.shape) 20 | 21 | 22 | def generate_data_set(dirctory_path , layer_nest_dict, split_point): 23 | train_path = "train/" 24 | valid_path = "val/" 25 | if(not os.path.exists(dirctory_path + train_path)): 26 | os.mkdir(dirctory_path + train_path) 27 | if(not os.path.exists(dirctory_path + valid_path)): 28 | os.mkdir(dirctory_path + valid_path) 29 | 30 | for key in layer_nest_dict: 31 | data_type = "_input" 32 | layer_name = key 33 | file_name = layer_name + data_type + ".pt" 34 | print(layer_name) 35 | data = torch.load(dirctory_path + file_name) 36 | data = data.reshape((-1, ) + data.shape[2:]) 37 | b=torch.randperm(data.shape[0]) 38 | data = data[b] 39 | train_data = data[0:split_point] 40 | valid_data = data[split_point:data.shape[0]] 41 | torch.save(train_data, dirctory_path + train_path + file_name) 42 | torch.save(valid_data, dirctory_path + valid_path + file_name) 43 | print(train_data.shape) 44 | print(valid_data.shape) 45 | 46 | 47 | def data_collection(model, valid_data_loader, train_data_loader, split_point, input_data_save_path): 48 | sign_nest_dict = generate_sign_nest_dict(model) 49 | validate(model, valid_data_loader) 50 | generate_layer_input_data(model, sign_nest_dict, input_data_save_path, train_data_loader) 51 | generate_data_set(input_data_save_path , sign_nest_dict, split_point) 52 | 53 | 54 | def CT_train(sign_type, sign_scale, scale_path, sign_nest_dict,batch_size, input_data_dirctory, output_floder_suffix): 55 | sign_param_dict = Sign_parameter_generator().param_nest_dict[sign_type] 56 | print(sign_type) 57 | for key in sign_nest_dict: 58 | sign_dict = sign_nest_dict[key] 59 | scale_name = key + "_scale.pt" 60 | if(scale_path != None): 61 | sign_scale = torch.load(scale_path + scale_name).item() 62 | print("scale: " + str(sign_scale)) 63 | sign_module = Sign_minmax_layer(coef=sign_param_dict["coef"], degree=sign_param_dict["degree"],scale=sign_scale) 64 | print("name: ", key) 65 | if(sign_dict["type"] == "ReLU"): 66 | my_model = ReLU_sign_layer(sign = sign_module) 67 | ref_model = nn.ReLU() 68 | elif(sign_dict["type"] == "MaxPool2d"): 69 | my_model = Maxpool_sign_layer(sign = sign_module, kernel_size=sign_dict["kernel_size"], stride= sign_dict["stride"], padding=sign_dict["padding"], dilation=sign_dict["dilation"]) 70 | ref_model = nn.MaxPool2d(kernel_size=sign_dict["kernel_size"], stride= sign_dict["stride"], padding=sign_dict["padding"], dilation=sign_dict["dilation"]) 71 | else: 72 | raise Exception("not implemented layer type.") 73 | optimizer = torch.optim.Adam(params=my_model.parameters(), lr=0.01, weight_decay=0) 74 | scheduler = ReduceLROnPlateau(optimizer, 'min', patience = 2, threshold= 1e-8, min_lr= 1e-4) 75 | train_path = "train/" 76 | val_path = "val/" 77 | data_type = "_input" 78 | file_name = key + data_type + ".pt" 79 | print(file_name) 80 | train_data = torch.load(input_data_dirctory + train_path + file_name) 81 | valid_data = torch.load(input_data_dirctory + val_path + file_name) 82 | for epoch_i in range(40): 83 | train_loss_meter = AverageMeter("train loss") 84 | val_loss_meter = AverageMeter("val loss") 85 | #train 86 | for batch_i in range(int(train_data.shape[0] / batch_size)): 87 | x = train_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0") 88 | target_y = ref_model.to("cuda:0").forward(x) 89 | actual_y = my_model.forward(x) 90 | loss_fun = nn.MSELoss() 91 | my_model.zero_grad() 92 | loss = loss_fun(actual_y, target_y) 93 | train_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0]) 94 | loss.backward() 95 | optimizer.step() 96 | train_loss = train_loss_meter.avg 97 | 98 | #valid 99 | for batch_i in range(int(valid_data.shape[0] / batch_size)): 100 | x = valid_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0") 101 | target_y = ref_model.to("cuda:0").forward(x) 102 | actual_y = my_model.forward(x) 103 | loss_fun = nn.MSELoss() 104 | loss = loss_fun(actual_y, target_y) 105 | val_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0]) 106 | val_loss = val_loss_meter.avg 107 | 108 | scheduler.step(val_loss) 109 | 110 | print( 111 | f"Epoch:{epoch_i + 1}" 112 | + f" Train Loss:{train_loss:.10f}" 113 | + f" Val Loss: {val_loss:.10f}" 114 | ) 115 | 116 | folder_name = "CT_" + sign_type + "_S" + output_floder_suffix+"_40s/" 117 | coef_save_dirctory = input_data_dirctory + folder_name 118 | if(not os.path.exists(coef_save_dirctory)): 119 | os.mkdir(coef_save_dirctory) 120 | file_name = key + "_coef.pt" 121 | my_model.sign.save_coef(coef_save_dirctory + file_name) 122 | print("save: " + folder_name + file_name) 123 | print("\n") 124 | 125 | def CT_train_SiLU(sign_type, sign_scale, scale_path, sign_nest_dict,batch_size, input_data_dirctory, output_floder_suffix): 126 | sign_param_dict = Sign_parameter_generator().param_nest_dict[sign_type] 127 | sigmoid = Sigmoid_minmax_layer(coef=sign_param_dict["coef"], degree=sign_param_dict["degree"],scale=sign_scale) 128 | my_model = SiLU_minmax_layer(sigmoid=sigmoid) 129 | ref_model = nn.SiLU() 130 | 131 | optimizer = torch.optim.Adam(params=my_model.parameters(), lr=0.01, weight_decay=0) 132 | scheduler = ReduceLROnPlateau(optimizer, 'min', patience = 2, threshold= 1e-8, min_lr= 1e-4) 133 | train_path = "train/" 134 | val_path = "val/" 135 | data_type = "_input" 136 | print(file_name) 137 | train_data = torch.rand(90000,4) 138 | valid_data = torch.rand(10000,4) 139 | for epoch_i in range(40): 140 | train_loss_meter = AverageMeter("train loss") 141 | val_loss_meter = AverageMeter("val loss") 142 | #train 143 | for batch_i in range(int(train_data.shape[0] / batch_size)): 144 | x = train_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0") 145 | target_y = ref_model.to("cuda:0").forward(x) 146 | actual_y = my_model.forward(x) 147 | loss_fun = nn.MSELoss() 148 | my_model.zero_grad() 149 | loss = loss_fun(actual_y, target_y) 150 | train_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0]) 151 | loss.backward() 152 | optimizer.step() 153 | train_loss = train_loss_meter.avg 154 | 155 | #valid 156 | for batch_i in range(int(valid_data.shape[0] / batch_size)): 157 | x = valid_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0") 158 | target_y = ref_model.to("cuda:0").forward(x) 159 | actual_y = my_model.forward(x) 160 | loss_fun = nn.MSELoss() 161 | loss = loss_fun(actual_y, target_y) 162 | val_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0]) 163 | val_loss = val_loss_meter.avg 164 | 165 | scheduler.step(val_loss) 166 | 167 | print( 168 | f"Epoch:{epoch_i + 1}" 169 | + f" Train Loss:{train_loss:.10f}" 170 | + f" Val Loss: {val_loss:.10f}" 171 | ) 172 | 173 | folder_name = "CT_" + sign_type + "_S" + output_floder_suffix+"_40s/" 174 | coef_save_dirctory = input_data_dirctory + folder_name 175 | if(not os.path.exists(coef_save_dirctory)): 176 | os.mkdir(coef_save_dirctory) 177 | file_name = "SiLU_test" + "_coef.pt" 178 | my_model.sign.save_coef(coef_save_dirctory + file_name) 179 | print("save: " + folder_name + file_name) 180 | print("\n") 181 | 182 | 183 | if __name__ == "__main__": 184 | parser = ArgumentParser() 185 | parser.add_argument("--model", type=str,choices=["vgg19_bn", "resnet18", "resnet32"]) 186 | parser.add_argument("--dataset", type=str,choices=["cifar10", "cifar100", "imagenet_1k"]) 187 | parser.add_argument("-st","--sign_type", type=str, choices=["a7", "2f12g1", "f1g2", "f2g2", "f2g3", "f1", "f2"]) 188 | parser.add_argument("-dc","--data_collection", type=bool, default=False, choices=[True , False]) 189 | parser.add_argument("-wd", "--working_directory", type=str, default="./working_directory/") 190 | parser.add_argument("-silu", "--silu_test", type=bool, default=False, choices=[True , False]) 191 | 192 | args = parser.parse_args() 193 | print(args) 194 | if(args.dataset == "cifar10" or args.dataset == "cifar100"): 195 | split_point = 45000 196 | batch_size = 100 197 | elif(args.dataset == "imagenet_1k"): 198 | split_point = 900 199 | batch_size = 40 200 | model = get_pretrained_model(model_name=args.model, dataset=args.dataset) 201 | if(args.data_collection): 202 | data_collection(model = model, 203 | valid_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "valid", data_dir = global_config["Global"]["dataset_dirctory"] ), 204 | train_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "train", data_dir = global_config["Global"]["dataset_dirctory"] ), 205 | split_point = split_point, input_data_save_path = args.working_directory) 206 | elif(args.silu_test): 207 | CT_train_SiLU(sign_type = args.sign_type, sign_scale = 0, scale_path= None, sign_nest_dict = None,batch_size = 100, 208 | input_data_dirctory = args.working_directory , output_floder_suffix= "fix") 209 | else: 210 | nest_dict = generate_sign_nest_dict(model) 211 | CT_train(sign_type = args.sign_type, sign_scale = 0, scale_path= None, sign_nest_dict = nest_dict,batch_size = batch_size, 212 | input_data_dirctory = args.working_directory , output_floder_suffix= "dynamic") 213 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SmartPAF: Accurate Low-Degree Polynomial Approximation of Non-polynomial Operators for Fast Private Inference in Homomorphic Encryption 2 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](./LICENSE) 3 | 4 | ## What is SmartPAF? 5 | SmartPAF is an open-source training framework to replace non-polynomial operators of ML models, such as ReLU and MaxPooling, with low-degree Polynomial Approximation Function (PAF) and recover accuracy through proposed fine-tuning tricks. SmartPAF is actively developed by the [Synergy Lab](https://synergy.ece.gatech.edu/) at [Georgia Institute of Technology](https://www.gatech.edu/). For more details about SmartPAF, please visit our [paper](https://arxiv.org/abs/2404.03216). 6 | 7 | 8 | ## Motivation 9 | Secure Fully Homomorphic Encryption (FHE) based Machine Learning Inference Converts Non-polynomial Operators (ReLU/MaxPooling) into Polynomial Approximation Functions (PAF) 10 | ![](image/secure_ML_inference.png) 11 | 12 | Existing PAFs suffer from either prohibitive latency overhead or low accuracy. SmartPAF proposes four training techniques to enable exploration on the entire PAF degree space and spot high-accuracy low-latency PAF. 13 | ![](image/RelatedWork.png) 14 | 15 | ** This repo open-sourced the SmartPAF framework code with prerun results**. 16 | 17 | ## Results 18 | SmartPAF spots optimal 14-degree PAF with 69.4% accuracy (the same accuracy as plaintext pretrained ResNet-18 under ImageNet-1k dataset) and saves 72% latency of 27-degree Minimax PAF. 19 | 20 | | Model-Dataset | Technique Setup | $f_1^2 \circ g_1^2$ | \alpha=7 | $f_2\circ g_3$ | $f_2\circ g_2$ | $f_1\circ g_2$ | 21 | |-------------------------------------------|----------------------------------------------------------------------|---------------------------------|---------------------------------|---------------------------------|---------------------------------|---------------------------------| 22 | | Replace ReLU | 23 | | | baseline + CT + DS w/o fine tune | 68.60\% | 67.70\% | 67.00\% | 66.50\% | 61.70\% | 24 | | | baseline + DS | 64.30\% | 66.70\% | 64.20\% | 58.30\% | 53.10\% | 25 | | | baseline + AT + DS | 65.20\% | 68.30\% | 63.70\% | 60.50\% | 52.00\% | 26 | | | 63.40\% | 68.10\% | 63.30\% | 57.60\% | 49.50\% | 27 | | ResNet-18 (ImageNet-1k) | baseline + PA + DS | 65.60\% | {68.40\%} | 64.60\% | 60.20\% | 52.60\% | 28 | | 69.4% | baseline + PA + AT + DS | 64.90\% | 67.40\% | 64.60\% | 56.50\% | 47.10\% | 29 | | | baseline + CT + PA + DS | 68.20\% | 67.00\% | {67.60\%} | 65.90\% | 60.80\% | 30 | | | baseline + CT + PA + AT + DS | {69.00\%} | 68.10\% | 61.40\% | {66.50\%} | {63.10\%} | 31 | | | {Accuracy Improvement over Baseline} | 1.35$\times$ | 1.06$\times$ | 1.37$\times$ | 2.08$\times$ | 3.39$\times$ | 32 | | | {Accuracy Improvement over ``baseline + DS"} | +4.7\%(1.07$\times$) | +1.7\%(1.03$\times$) | +3.4\%(1.05$\times$) | +8.2\%(1.14$\times$) | +10\%(1.19$\times$) | 33 | | | {Accuracy Improvement over baseline} | 1.07$\times$ | 1.03$\times$ | 1.05$\times$ | 1.14$\times$ | 1.19$\times$ | 34 | |-------------------------------------------|----------------------------------------------------------------------|---------------------------------|---------------------------------|---------------------------------|---------------------------------|---------------------------------| 35 | | Replace all Non Polynomial Operators | 36 | | | baseline + CT + DS w/o fine tune | 64.4\% | 59.4\% | 40.9\% | 33.1\% | 13.3\% | 37 | | | baseline + DS | 59.6\% | 66.2\% | 62\% | 49\% | 37\% | 38 | | ResNet-18 (ImageNet-1k) | baseline + SS ({prior work~\cite{Minimax_approximation}}) | 25.5\% | 47.1\% | 23\% | 4.2\% | 0\% | 39 | | 69.4% | baseline + CT + PA + AT + DS | {69.9\%} | {68\%} | {65.7\%} | {64.1\%} | {57.8}\% | 40 | | | \smartfhe: baseline + CT + PA + AT + SS |69.4\% |67\% |65.3\% |57.3\% |6.5\% | 41 | | | {Accuracy Improvement over Baseline} | 1.07$\times$ | 1.22$\times$ | 1.27$\times$ | 1.79$\times$ | 0.22$\times$ | 42 | | | {Accuracy Improvement over~\cite{Minimax_approximation}} | +43.9\%(2.72$\times$) | +19.9\%(1.42$\times$) | +42.3\%(2.84$\times$) | +53.1\%(13.64$\times$) | +6.5\%(\infty) | 43 | |-------------------------------------------|----------------------------------------------------------------------|---------------------------------|---------------------------------|---------------------------------|---------------------------------|---------------------------------| 44 | | Replace all Non Polynomial Operators | 45 | | | baseline + SS ({prior work~\cite{Minimax_approximation}}) | 91.06\% | 81.35\% | 76.58\% | 58.11\% | 43.84\% | 46 | | | baseline + CT + DS | 93.39\% | 93.6\% | 93.3\% | {92.4\%} | {91.53\%} | 47 | | VGG-19 (CiFa-10) | baseline + CT + PA + AT + DS | {93.6\%} | {93.81\%} | {93.59\%} | 91.49\% | 91.51\% | 48 | | 93.95 | \smartfhe: baseline + CT + PA + AT + SS |92.16\% |92.62\% |91.51\% |88.45\% |76.93\% | 49 | | | {Accuracy Improvement over Baseline} | 1.07$\times$ | 1.22$\times$ | 1.27$\times$ | 1.79$\times$ | 0.22$\times$ | 50 | | | {Accuracy Improvement over~\cite{Minimax_approximation}} | +1.1\%(1.01$\times$) | +11.27\%(1.14$\times$) | +14.93\%(1.2$\times$) | +30.34\%(1.52$\times$) | +33.09\%(1.75$\times$) | 51 | |-------------------------------------------|----------------------------------------------------------------------|---------------------------------|---------------------------------|---------------------------------|---------------------------------|---------------------------------| 52 | 53 | # Ready to run? 54 | ``` 55 | #Activate Conda 56 | # Create a python3.8 enviroment 57 | conda create --name SmartPAF python=3.8 58 | 59 | # Activate the enviroment 60 | conda activate SmartPAF 61 | 62 | # Install package 63 | conda install pytorch torchvision torchaudio pytorch-cuda=11.7 -c pytorch -c nvidia 64 | conda install -c conda-forge pytorch-lightning 65 | 66 | # Download cifar10 pretrained models 67 | cd PyTorch_CIFAR10/ 68 | sh download_weights.sh 69 | cd .. 70 | 71 | # Open /global_config/global_config.yaml 72 | #Edit "dataset_dirctory:" to set a folder to store dataset. 73 | 74 | # Download dataset 75 | python3 util.py -dd True --dataset cifar10 76 | python3 util.py -dd True --dataset cifar100 77 | python3 util.py -dd True --dataset imagenet_1k 78 | ``` 79 | 80 | 81 | ## Control Parameters for the library 82 | ``` 83 | typical step 84 | For one model with a dataset, one -wd (working directory) should be used 85 | --model: resnet18, vgg19_bn, resnet32 86 | --dataset: cifar10, imagenet, cifar100 87 | -st: a7, 2f12g1, f2f2, f2g3, f1g2 88 | Supported combination: vgg19_bn & imagenet, vgg19_bn & cifar10, resnet18 & imagenet, and resnet32 & cifar100 89 | -st is the supported PAF type 90 | -dc stands for "data collection": 91 | ``` 92 | 93 | ## ResNet-18 on ImageNet_1k 94 | ``` 95 | # The following steps must be run in serial, as following steps need results from previous steps. 96 | # Collection CT data 97 | python3 ./CT.py --model resnet18 --dataset imagenet_1k -wd ../resnet18_imagenet1k/ -dc True 98 | # CT 99 | python3 ./CT.py --model resnet18 --dataset imagenet_1k -wd ../resnet18_imagenet1k/ -st 2f12g1 100 | # PA and AT 101 | python3 ./PA_AT.py --model resnet18 --dataset imagenet_1k -wd ../resnet18_imagenet1k/ -st 2f12g1 102 | # Statistic Scale. 103 | python3 ./SS.py --model resnet18 --dataset imagenet_1k -wd ../resnet18_imagenet1k/ -st 2f12g1 104 | ``` 105 | ## ResNet-32 on CiFar-100 106 | ``` 107 | # The following steps must be run in serial, as following steps need results from previous steps. 108 | # Collection CT data 109 | python3 ./CT.py --model resnet32 --dataset cifar100 -wd ../resnet32_cifar100/ -dc True 110 | # CT 111 | python3 ./CT.py --model resnet32 --dataset cifar100 -wd ../resnet32_cifar100/ -st 2f12g1 112 | # PA and AT 113 | python3 ./PA_AT.py --model resnet32 --dataset cifar100 -wd ../resnet32_cifar100/ -st 2f12g1 114 | # Statistic Scale. 115 | python3 ./SS.py --model resnet32 --dataset cifar100 -wd ../resnet32_cifar100/ -st 2f12g1 116 | ``` 117 | 118 | ## VGG-19 on CiFar-10 119 | ``` 120 | # The following steps must be run in serial, as following steps need results from previous steps. 121 | # Collection CT data 122 | python3 ./CT.py --model vgg19_bn --dataset cifar10 -wd ../vgg19_bn_cifar10/ -dc True 123 | # CT 124 | python3 ./CT.py --model vgg19_bn --dataset cifar10 -wd ../vgg19_bn_cifar10/ -st 2f12g1 125 | # PA and AT 126 | python3 ./PA_AT.py --model vgg19_bn --dataset cifar10 -wd ../vgg19_bn_cifar10/ -st 2f12g1 127 | # Statistic Scale. 128 | python3 ./SS.py --model vgg19_bn --dataset cifar10 -wd ../vgg19_bn_cifar10/ -st 2f12g1 129 | ``` 130 | 131 | ## VGG-19 on ImageNet_1k 132 | ``` 133 | # The following steps must be run in serial, as following steps need results from previous steps. 134 | # Collection CT data 135 | python3 ./CT.py --model vgg19_bn --dataset imagenet_1k -wd ../vgg19_bn_imagenet1k/ -dc True 136 | # CT 137 | python3 ./CT.py --model vgg19_bn --dataset imagenet_1k -wd ../vgg19_bn_imagenet1k/ -st 2f12g1 138 | # PA and AT 139 | python3 ./PA_AT.py --model vgg19_bn --dataset imagenet_1k -wd ../vgg19_bn_imagenet1k/ -st 2f12g1 140 | # Statistic Scale. 141 | python3 ./SS.py --model vgg19_bn --dataset imagenet_1k -wd ../vgg19_imagenet1k/ -st 2f12g1 142 | ``` 143 | 144 | # Developers 145 | Jingtian Dang (Georgia Tech, dangjingtian@gatech.edu) 146 | 147 | Jianming Tong (Georgia Tech, jianming.tong@gatech.edu) 148 | 149 | Tushar Krishna (Georgia Tech) 150 | 151 | # Citations 152 | ``` 153 | @inproceedings{tong2024accurate, 154 | author={Jianming Tong and Jingtian Dang and Anupam Golder and Callie Hao and Arijit Raychowdhury and Tushar Krishna}, 155 | booktitle = {Proceedings of Machine Learning and Systems (MLSys)}, 156 | title={Accurate Low-Degree Polynomial Approximation of Non-polynomial Operators for Fast Private Inference in Homomorphic Encryption}, 157 | url = {https://arxiv.org/abs/2404.03216}, 158 | year = {2024} 159 | } 160 | ``` -------------------------------------------------------------------------------- /src/custom_module.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | from typing import Tuple, Union 4 | import torch 5 | import torch.nn as nn 6 | import copy 7 | from math import pi, sqrt 8 | 9 | 10 | 11 | # TODO: PA+CT instead of CT -> PA. 12 | class Input_data_collection_layer(nn.Module): 13 | def __init__(self, name, layer : nn.Module): 14 | super().__init__() 15 | self.data_store = torch.tensor([]) 16 | self.name = name 17 | self.layer = layer 18 | 19 | 20 | def forward(self, x): 21 | x_backup = x[None, :].to("cpu") 22 | self.data_store = torch.cat((self.data_store, x_backup), 0) 23 | 24 | res =self.layer.forward(x) 25 | return res 26 | 27 | 28 | def save(self, directory , file_name): 29 | if(not os.path.exists(directory)): 30 | os.mkdir(directory) 31 | torch.save(self.data_store, directory + file_name) 32 | 33 | 34 | 35 | class Input_scale_collection_layer(nn.Module): 36 | def __init__(self, name, layer : nn.Module): 37 | super().__init__() 38 | self.have_first = False 39 | self.data_store = torch.tensor(0) 40 | self.name = name 41 | self.layer = layer 42 | 43 | 44 | def forward(self, x): 45 | s_max = torch.max(x).item() 46 | s_min = torch.min(x).item() 47 | scale = max(abs(s_max), abs(s_min)) 48 | if(scale > self.data_store.item()): 49 | self.data_store = torch.tensor(scale) 50 | res =self.layer.forward(x) 51 | return res 52 | 53 | 54 | def save(self, directory , file_name): 55 | if(not os.path.exists(directory)): 56 | os.mkdir(directory) 57 | torch.save(self.data_store, directory + file_name) 58 | 59 | 60 | 61 | class Sign_minmax_layer(nn.Module): 62 | def __init__(self, coef, degree, scale = 0, scale_ratio = 1, train_coef = True, param_scale = False): 63 | super().__init__() 64 | self.scale_ratio = scale_ratio 65 | self.degree = degree 66 | self.coeflist = nn.Parameter(coef.to("cuda:0"), requires_grad=train_coef).to("cuda:0") 67 | self.param_scale = param_scale 68 | if(self.param_scale): 69 | self.scale = nn.Parameter(torch.tensor(scale).to("cuda:0"), requires_grad=False).to("cuda:0") 70 | else: 71 | self.scale = scale 72 | 73 | 74 | def forward(self, x): 75 | 76 | if(self.scale == 0): 77 | s_max = torch.max(x).item() 78 | s_min = torch.min(x).item() 79 | scale = max(abs(s_max), abs(s_min)) * self.scale_ratio 80 | else: 81 | if(self.param_scale): 82 | scale = self.scale.item() * self.scale_ratio 83 | else: 84 | scale = self.scale * self.scale_ratio 85 | 86 | 87 | # x_bk = torch.clone(x).to(x.device) 88 | x = torch.divide(x, scale) 89 | 90 | coeflist = self.coeflist 91 | 92 | for compositive_id in range(coeflist.shape[0]): 93 | 94 | degree_num = self.degree[compositive_id] 95 | 96 | # x_degree_1 = torch.clone(x).to(x_bk.device) 97 | x_degree_2 = torch.mul(x, x) 98 | 99 | # out = torch.clone(x).to(x_bk.device) 100 | out = torch.mul(x, coeflist[compositive_id][0]) # x^1 * coe[1] 101 | 102 | for i in range(1, degree_num): 103 | x = torch.mul(x, x_degree_2) 104 | partial_out = torch.mul(x, coeflist[compositive_id][i]) 105 | out = torch.add(out, partial_out) 106 | x = torch.clone(out).to(x.device) 107 | 108 | 109 | result = out.to(x.device) 110 | del x 111 | 112 | return result 113 | 114 | def set_coef_grad(self, grad): 115 | self.coeflist.requires_grad = grad 116 | 117 | def set_scale_grad(self, grad): 118 | if(self.param_scale): 119 | self.scale.requires_grad = grad 120 | 121 | def save_coef(self, path_name): 122 | torch.save(self.coeflist, path_name) 123 | 124 | def save_scale(self, path_name): 125 | if(self.param_scale): 126 | torch.save(self.scale, path_name) 127 | 128 | 129 | class Sigmoid_minmax_layer(nn.Module): 130 | def __init__(self, coef, degree, scale = 0, scale_ratio = 1, train_coef = True, param_scale = False): 131 | super().__init__() 132 | self.scale_ratio = scale_ratio 133 | self.degree = degree 134 | self.coeflist = nn.Parameter(coef.to("cuda:0"), requires_grad=train_coef).to("cuda:0") 135 | self.param_scale = param_scale 136 | if(self.param_scale): 137 | self.scale = nn.Parameter(torch.tensor(scale).to("cuda:0"), requires_grad=False).to("cuda:0") 138 | else: 139 | self.scale = scale 140 | 141 | 142 | def forward(self, x): 143 | 144 | if(self.scale == 0): 145 | s_max = torch.max(x).item() 146 | s_min = torch.min(x).item() 147 | scale = max(abs(s_max), abs(s_min)) * self.scale_ratio 148 | else: 149 | if(self.param_scale): 150 | scale = self.scale.item() * self.scale_ratio 151 | else: 152 | scale = self.scale * self.scale_ratio 153 | 154 | 155 | # x_bk = torch.clone(x).to(x.device) 156 | x = torch.divide(x, scale) 157 | 158 | coeflist = self.coeflist 159 | 160 | for compositive_id in range(coeflist.shape[0]): 161 | 162 | degree_num = self.degree[compositive_id] 163 | 164 | # x_degree_1 = torch.clone(x).to(x_bk.device) 165 | x_degree_2 = torch.mul(x, x) 166 | 167 | # out = torch.clone(x).to(x_bk.device) 168 | out = torch.mul(x, coeflist[compositive_id][0]) # x^1 * coe[1] 169 | 170 | for i in range(1, degree_num): 171 | x = torch.mul(x, x_degree_2) 172 | partial_out = torch.mul(x, coeflist[compositive_id][i]) 173 | out = torch.add(out, partial_out) 174 | x = torch.clone(out).to(x.device) 175 | 176 | x = x * 0.5 + 0.5 177 | result = out.to(x.device) 178 | del x 179 | 180 | return result 181 | 182 | def set_coef_grad(self, grad): 183 | self.coeflist.requires_grad = grad 184 | 185 | def set_scale_grad(self, grad): 186 | if(self.param_scale): 187 | self.scale.requires_grad = grad 188 | 189 | def save_coef(self, path_name): 190 | torch.save(self.coeflist, path_name) 191 | 192 | def save_scale(self, path_name): 193 | if(self.param_scale): 194 | torch.save(self.scale, path_name) 195 | 196 | 197 | 198 | 199 | class ReLU_sign_layer(nn.Module): 200 | def __init__(self, sign:nn.Module): 201 | super().__init__() 202 | self.sign = sign 203 | 204 | def forward(self, x): 205 | result = torch.divide(torch.add(x, torch.mul(x, self.sign.forward(x))),2) 206 | return result 207 | 208 | class SiLU_minmax_layer(nn.Module): 209 | def __init__(self, sigmoid:nn.Module): 210 | super().__init__() 211 | self.sigmoid = sigmoid 212 | 213 | def forward(self, x): 214 | result = torch.mul(x, self.sign.forward(x)) 215 | return result 216 | 217 | 218 | 219 | class Maxpool_sign_layer(nn.Module): 220 | def __init__(self, sign:nn.Module, 221 | kernel_size: Union[int, Tuple[int, int]], 222 | stride : Union[int, Tuple[int, int]] = 0, 223 | padding : Union[int, Tuple[int, int]] = 0, 224 | dilation : Union[int, Tuple[int, int]] = 1, ): 225 | super().__init__() 226 | 227 | self.sign = sign 228 | 229 | self.kernel_size = self.to_tuple(kernel_size) 230 | 231 | if(stride == 0): 232 | self.stride = self.kernel_size 233 | else: 234 | self.stride = self.to_tuple(stride) 235 | self.padding = self.to_tuple(padding) 236 | self.dilation = self.to_tuple(dilation) 237 | self.unfold = nn.Unfold(kernel_size=self.kernel_size, dilation=self.dilation, padding=self.padding, stride=self.stride) 238 | 239 | 240 | 241 | def forward(self, x): 242 | x_size = x.size() 243 | x_unfold = self.unfold(x) 244 | x_unfold_size = x_unfold.size() 245 | x_reshape = torch.reshape(x_unfold, (x_unfold_size[0], -1, int(x_unfold_size[1] / x_size[1]), x_unfold_size[2])) 246 | 247 | h_out = math.floor((x_size[2] + 2 * self.padding[0] - self.dilation[0] * (self.kernel_size[0] -1) - 1)/ self.stride[0] + 1) 248 | w_out = math.floor((x_size[3] + 2 * self.padding[1] - self.dilation[1] * (self.kernel_size[1] -1) - 1)/ self.stride[1] + 1) 249 | result = self.maxpool(x_reshape).reshape((x_size[0], x_size[1], h_out, w_out)) 250 | return result 251 | 252 | 253 | def to_tuple(self, param:Union[int, Tuple[int, int]]): 254 | if(isinstance(param, int)): 255 | param = (param, param) 256 | return param 257 | 258 | def maxpool(self, x): 259 | x_size = x.size() 260 | pool_size = x_size[2] 261 | 262 | if(pool_size == 1): 263 | return x 264 | elif(pool_size == 2): 265 | return self.max(x[:,:,0,:], x[:,:,1,:]) 266 | else: 267 | pivot = int(pool_size / 2) 268 | a = self.maxpool(x[:,:,0:pivot,:]) 269 | b = self.maxpool(x[:,:,pivot:pool_size,:]) 270 | return self.max(a, b) 271 | 272 | 273 | def max(self, a, b): 274 | a = torch.squeeze(a) 275 | b = torch.squeeze(b) 276 | sum = torch.add(a,b) 277 | diff = torch.sub(a,b) 278 | sign_diff = self.sign.forward(diff) 279 | result = torch.divide(torch.add(sum, torch.mul(sign_diff, diff)), 2) 280 | return result 281 | 282 | 283 | class HerPN2d(nn.Module): 284 | @staticmethod 285 | def h0(x): 286 | return torch.ones(x.shape).to(x.device) 287 | 288 | @staticmethod 289 | def h1(x): 290 | return x 291 | 292 | @staticmethod 293 | def h2(x): 294 | return (x * x - 1) 295 | 296 | def __init__(self, num_features : int, BN_dimension=2 ,BN_copy:nn.Module = None): 297 | super().__init__() 298 | self.f = (1 / sqrt(2 * pi), 1 / 2, 1 / sqrt(4 * pi)) 299 | 300 | if(BN_copy): 301 | self.bn0 = copy.deepcopy(BN_copy) 302 | self.bn1 = copy.deepcopy(BN_copy) 303 | self.bn2 = copy.deepcopy(BN_copy) 304 | 305 | elif(BN_dimension == 1): 306 | self.bn0 = nn.BatchNorm1d(num_features) 307 | self.bn1 = nn.BatchNorm1d(num_features) 308 | self.bn2 = nn.BatchNorm1d(num_features) 309 | else: 310 | self.bn0 = nn.BatchNorm2d(num_features) 311 | self.bn1 = nn.BatchNorm2d(num_features) 312 | self.bn2 = nn.BatchNorm2d(num_features) 313 | 314 | 315 | 316 | self.bn = (self.bn0, self.bn1, self.bn2) 317 | self.h = (self.h0, self.h1, self.h2) 318 | 319 | 320 | def forward(self, x): 321 | result = torch.zeros(x.shape).to(x.device) 322 | for bn, f, h in zip(self.bn, self.f, self.h): 323 | poly = torch.mul(f, h(x)) 324 | # print(poly.shape) 325 | result = torch.add(result, bn(poly)) 326 | 327 | return result 328 | 329 | 330 | class Sigmoid_minmax_layer(nn.Module): 331 | def __init__(self, coef, degree, scale = 0, scale_ratio = 1, train_coef = True, param_scale = False): 332 | super().__init__() 333 | self.scale_ratio = scale_ratio 334 | self.degree = degree 335 | self.coeflist = nn.Parameter(coef.to("cuda:0"), requires_grad=train_coef).to("cuda:0") 336 | self.param_scale = param_scale 337 | if(self.param_scale): 338 | self.scale = nn.Parameter(torch.tensor(scale).to("cuda:0"), requires_grad=False).to("cuda:0") 339 | else: 340 | self.scale = scale 341 | 342 | 343 | def forward(self, x): 344 | 345 | if(self.scale == 0): 346 | s_max = torch.max(x).item() 347 | s_min = torch.min(x).item() 348 | scale = max(abs(s_max), abs(s_min)) * self.scale_ratio 349 | else: 350 | if(self.param_scale): 351 | scale = self.scale.item() * self.scale_ratio 352 | else: 353 | scale = self.scale * self.scale_ratio 354 | 355 | 356 | # x_bk = torch.clone(x).to(x.device) 357 | x = torch.divide(x, scale) 358 | 359 | coeflist = self.coeflist 360 | 361 | for compositive_id in range(coeflist.shape[0]): 362 | 363 | degree_num = self.degree[compositive_id] 364 | 365 | # x_degree_1 = torch.clone(x).to(x_bk.device) 366 | x_degree_2 = torch.mul(x, x) 367 | 368 | # out = torch.clone(x).to(x_bk.device) 369 | out = torch.mul(x, coeflist[compositive_id][0]) # x^1 * coe[1] 370 | 371 | for i in range(1, degree_num): 372 | x = torch.mul(x, x_degree_2) 373 | partial_out = torch.mul(x, coeflist[compositive_id][i]) 374 | out = torch.add(out, partial_out) 375 | x = torch.clone(out).to(x.device) 376 | 377 | result = (out + 0.5).to(x.device) 378 | del x 379 | 380 | return result 381 | 382 | def set_coef_grad(self, grad): 383 | self.coeflist.requires_grad = grad 384 | 385 | def set_scale_grad(self, grad): 386 | if(self.param_scale): 387 | self.scale.requires_grad = grad 388 | 389 | def save_coef(self, path_name): 390 | torch.save(self.coeflist, path_name) 391 | 392 | def save_scale(self, path_name): 393 | if(self.param_scale): 394 | torch.save(self.scale, path_name) 395 | 396 | class SiLU_minmax_layer(nn.Module): 397 | def __init__(self, sigmoid:nn.Module): 398 | super().__init__() 399 | self.sigmoid = sigmoid 400 | 401 | def forward(self, x ): 402 | 403 | 404 | result = torch.mul(x, self.sigmoid.forward(x)) 405 | return result 406 | 407 | class SiLU_minmax_bn_layer(nn.Module): 408 | def __init__(self, sigmoid:nn.Module, num_features): 409 | super().__init__() 410 | self.sigmoid = sigmoid 411 | self.bn = nn.BatchNorm2d(num_features).to("cuda:0") 412 | 413 | def forward(self, x ): 414 | 415 | 416 | result = torch.mul(x, self.sigmoid.forward(x)) 417 | result = self.bn(result) 418 | return result -------------------------------------------------------------------------------- /src/PyTorch_CIFAR10/cifar10_models/inception.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import namedtuple 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | __all__ = ["Inception3", "inception_v3"] 9 | 10 | 11 | _InceptionOuputs = namedtuple("InceptionOuputs", ["logits", "aux_logits"]) 12 | 13 | 14 | def inception_v3(pretrained=False, progress=True, device="cpu", **kwargs): 15 | r"""Inception v3 model architecture from 16 | `"Rethinking the Inception Architecture for Computer Vision" `_. 17 | 18 | .. note:: 19 | **Important**: In contrast to the other models the inception_v3 expects tensors with a size of 20 | N x 3 x 299 x 299, so ensure your images are sized accordingly. 21 | 22 | Args: 23 | pretrained (bool): If True, returns a model pre-trained on ImageNet 24 | progress (bool): If True, displays a progress bar of the download to stderr 25 | aux_logits (bool): If True, add an auxiliary branch that can improve training. 26 | Default: *True* 27 | transform_input (bool): If True, preprocesses the input according to the method with which it 28 | was trained on ImageNet. Default: *False* 29 | """ 30 | model = Inception3() 31 | if pretrained: 32 | script_dir = os.path.dirname(__file__) 33 | state_dict = torch.load( 34 | script_dir + "/state_dicts/inception_v3.pt", map_location=device 35 | ) 36 | model.load_state_dict(state_dict) 37 | return model 38 | 39 | 40 | class Inception3(nn.Module): 41 | # CIFAR10: aux_logits True->False 42 | def __init__(self, num_classes=10, aux_logits=False, transform_input=False): 43 | super(Inception3, self).__init__() 44 | self.aux_logits = aux_logits 45 | self.transform_input = transform_input 46 | 47 | # CIFAR10: stride 2->1, padding 0 -> 1 48 | self.Conv2d_1a_3x3 = BasicConv2d(3, 192, kernel_size=3, stride=1, padding=1) 49 | # self.Conv2d_2a_3x3 = BasicConv2d(32, 32, kernel_size=3) 50 | # self.Conv2d_2b_3x3 = BasicConv2d(32, 64, kernel_size=3, padding=1) 51 | # self.Conv2d_3b_1x1 = BasicConv2d(64, 80, kernel_size=1) 52 | # self.Conv2d_4a_3x3 = BasicConv2d(80, 192, kernel_size=3) 53 | self.Mixed_5b = InceptionA(192, pool_features=32) 54 | self.Mixed_5c = InceptionA(256, pool_features=64) 55 | self.Mixed_5d = InceptionA(288, pool_features=64) 56 | self.Mixed_6a = InceptionB(288) 57 | self.Mixed_6b = InceptionC(768, channels_7x7=128) 58 | self.Mixed_6c = InceptionC(768, channels_7x7=160) 59 | self.Mixed_6d = InceptionC(768, channels_7x7=160) 60 | self.Mixed_6e = InceptionC(768, channels_7x7=192) 61 | if aux_logits: 62 | self.AuxLogits = InceptionAux(768, num_classes) 63 | self.Mixed_7a = InceptionD(768) 64 | self.Mixed_7b = InceptionE(1280) 65 | self.Mixed_7c = InceptionE(2048) 66 | self.fc = nn.Linear(2048, num_classes) 67 | 68 | # for m in self.modules(): 69 | # if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): 70 | # import scipy.stats as stats 71 | # stddev = m.stddev if hasattr(m, 'stddev') else 0.1 72 | # X = stats.truncnorm(-2, 2, scale=stddev) 73 | # values = torch.as_tensor(X.rvs(m.weight.numel()), dtype=m.weight.dtype) 74 | # values = values.view(m.weight.size()) 75 | # with torch.no_grad(): 76 | # m.weight.copy_(values) 77 | # elif isinstance(m, nn.BatchNorm2d): 78 | # nn.init.constant_(m.weight, 1) 79 | # nn.init.constant_(m.bias, 0) 80 | 81 | def forward(self, x): 82 | if self.transform_input: 83 | x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 84 | x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 85 | x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 86 | x = torch.cat((x_ch0, x_ch1, x_ch2), 1) 87 | # N x 3 x 299 x 299 88 | x = self.Conv2d_1a_3x3(x) 89 | 90 | # CIFAR10 91 | # N x 32 x 149 x 149 92 | # x = self.Conv2d_2a_3x3(x) 93 | # N x 32 x 147 x 147 94 | # x = self.Conv2d_2b_3x3(x) 95 | # N x 64 x 147 x 147 96 | # x = F.max_pool2d(x, kernel_size=3, stride=2) 97 | # N x 64 x 73 x 73 98 | # x = self.Conv2d_3b_1x1(x) 99 | # N x 80 x 73 x 73 100 | # x = self.Conv2d_4a_3x3(x) 101 | # N x 192 x 71 x 71 102 | # x = F.max_pool2d(x, kernel_size=3, stride=2) 103 | # N x 192 x 35 x 35 104 | x = self.Mixed_5b(x) 105 | # N x 256 x 35 x 35 106 | x = self.Mixed_5c(x) 107 | # N x 288 x 35 x 35 108 | x = self.Mixed_5d(x) 109 | # N x 288 x 35 x 35 110 | x = self.Mixed_6a(x) 111 | # N x 768 x 17 x 17 112 | x = self.Mixed_6b(x) 113 | # N x 768 x 17 x 17 114 | x = self.Mixed_6c(x) 115 | # N x 768 x 17 x 17 116 | x = self.Mixed_6d(x) 117 | # N x 768 x 17 x 17 118 | x = self.Mixed_6e(x) 119 | # N x 768 x 17 x 17 120 | if self.training and self.aux_logits: 121 | aux = self.AuxLogits(x) 122 | # N x 768 x 17 x 17 123 | x = self.Mixed_7a(x) 124 | # N x 1280 x 8 x 8 125 | x = self.Mixed_7b(x) 126 | # N x 2048 x 8 x 8 127 | x = self.Mixed_7c(x) 128 | # N x 2048 x 8 x 8 129 | # Adaptive average pooling 130 | x = F.adaptive_avg_pool2d(x, (1, 1)) 131 | # N x 2048 x 1 x 1 132 | x = F.dropout(x, training=self.training) 133 | # N x 2048 x 1 x 1 134 | x = x.view(x.size(0), -1) 135 | # N x 2048 136 | x = self.fc(x) 137 | # N x 1000 (num_classes) 138 | if self.training and self.aux_logits: 139 | return _InceptionOuputs(x, aux) 140 | return x 141 | 142 | 143 | class InceptionA(nn.Module): 144 | def __init__(self, in_channels, pool_features): 145 | super(InceptionA, self).__init__() 146 | self.branch1x1 = BasicConv2d(in_channels, 64, kernel_size=1) 147 | 148 | self.branch5x5_1 = BasicConv2d(in_channels, 48, kernel_size=1) 149 | self.branch5x5_2 = BasicConv2d(48, 64, kernel_size=5, padding=2) 150 | 151 | self.branch3x3dbl_1 = BasicConv2d(in_channels, 64, kernel_size=1) 152 | self.branch3x3dbl_2 = BasicConv2d(64, 96, kernel_size=3, padding=1) 153 | self.branch3x3dbl_3 = BasicConv2d(96, 96, kernel_size=3, padding=1) 154 | 155 | self.branch_pool = BasicConv2d(in_channels, pool_features, kernel_size=1) 156 | 157 | def forward(self, x): 158 | branch1x1 = self.branch1x1(x) 159 | 160 | branch5x5 = self.branch5x5_1(x) 161 | branch5x5 = self.branch5x5_2(branch5x5) 162 | 163 | branch3x3dbl = self.branch3x3dbl_1(x) 164 | branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) 165 | branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) 166 | 167 | branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) 168 | branch_pool = self.branch_pool(branch_pool) 169 | 170 | outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool] 171 | return torch.cat(outputs, 1) 172 | 173 | 174 | class InceptionB(nn.Module): 175 | def __init__(self, in_channels): 176 | super(InceptionB, self).__init__() 177 | self.branch3x3 = BasicConv2d(in_channels, 384, kernel_size=3, stride=2) 178 | 179 | self.branch3x3dbl_1 = BasicConv2d(in_channels, 64, kernel_size=1) 180 | self.branch3x3dbl_2 = BasicConv2d(64, 96, kernel_size=3, padding=1) 181 | self.branch3x3dbl_3 = BasicConv2d(96, 96, kernel_size=3, stride=2) 182 | 183 | def forward(self, x): 184 | branch3x3 = self.branch3x3(x) 185 | 186 | branch3x3dbl = self.branch3x3dbl_1(x) 187 | branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) 188 | branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) 189 | 190 | branch_pool = F.max_pool2d(x, kernel_size=3, stride=2) 191 | 192 | outputs = [branch3x3, branch3x3dbl, branch_pool] 193 | return torch.cat(outputs, 1) 194 | 195 | 196 | class InceptionC(nn.Module): 197 | def __init__(self, in_channels, channels_7x7): 198 | super(InceptionC, self).__init__() 199 | self.branch1x1 = BasicConv2d(in_channels, 192, kernel_size=1) 200 | 201 | c7 = channels_7x7 202 | self.branch7x7_1 = BasicConv2d(in_channels, c7, kernel_size=1) 203 | self.branch7x7_2 = BasicConv2d(c7, c7, kernel_size=(1, 7), padding=(0, 3)) 204 | self.branch7x7_3 = BasicConv2d(c7, 192, kernel_size=(7, 1), padding=(3, 0)) 205 | 206 | self.branch7x7dbl_1 = BasicConv2d(in_channels, c7, kernel_size=1) 207 | self.branch7x7dbl_2 = BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)) 208 | self.branch7x7dbl_3 = BasicConv2d(c7, c7, kernel_size=(1, 7), padding=(0, 3)) 209 | self.branch7x7dbl_4 = BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)) 210 | self.branch7x7dbl_5 = BasicConv2d(c7, 192, kernel_size=(1, 7), padding=(0, 3)) 211 | 212 | self.branch_pool = BasicConv2d(in_channels, 192, kernel_size=1) 213 | 214 | def forward(self, x): 215 | branch1x1 = self.branch1x1(x) 216 | 217 | branch7x7 = self.branch7x7_1(x) 218 | branch7x7 = self.branch7x7_2(branch7x7) 219 | branch7x7 = self.branch7x7_3(branch7x7) 220 | 221 | branch7x7dbl = self.branch7x7dbl_1(x) 222 | branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl) 223 | branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl) 224 | branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl) 225 | branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl) 226 | 227 | branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) 228 | branch_pool = self.branch_pool(branch_pool) 229 | 230 | outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool] 231 | return torch.cat(outputs, 1) 232 | 233 | 234 | class InceptionD(nn.Module): 235 | def __init__(self, in_channels): 236 | super(InceptionD, self).__init__() 237 | self.branch3x3_1 = BasicConv2d(in_channels, 192, kernel_size=1) 238 | self.branch3x3_2 = BasicConv2d(192, 320, kernel_size=3, stride=2) 239 | 240 | self.branch7x7x3_1 = BasicConv2d(in_channels, 192, kernel_size=1) 241 | self.branch7x7x3_2 = BasicConv2d(192, 192, kernel_size=(1, 7), padding=(0, 3)) 242 | self.branch7x7x3_3 = BasicConv2d(192, 192, kernel_size=(7, 1), padding=(3, 0)) 243 | self.branch7x7x3_4 = BasicConv2d(192, 192, kernel_size=3, stride=2) 244 | 245 | def forward(self, x): 246 | branch3x3 = self.branch3x3_1(x) 247 | branch3x3 = self.branch3x3_2(branch3x3) 248 | 249 | branch7x7x3 = self.branch7x7x3_1(x) 250 | branch7x7x3 = self.branch7x7x3_2(branch7x7x3) 251 | branch7x7x3 = self.branch7x7x3_3(branch7x7x3) 252 | branch7x7x3 = self.branch7x7x3_4(branch7x7x3) 253 | 254 | branch_pool = F.max_pool2d(x, kernel_size=3, stride=2) 255 | outputs = [branch3x3, branch7x7x3, branch_pool] 256 | return torch.cat(outputs, 1) 257 | 258 | 259 | class InceptionE(nn.Module): 260 | def __init__(self, in_channels): 261 | super(InceptionE, self).__init__() 262 | self.branch1x1 = BasicConv2d(in_channels, 320, kernel_size=1) 263 | 264 | self.branch3x3_1 = BasicConv2d(in_channels, 384, kernel_size=1) 265 | self.branch3x3_2a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1)) 266 | self.branch3x3_2b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0)) 267 | 268 | self.branch3x3dbl_1 = BasicConv2d(in_channels, 448, kernel_size=1) 269 | self.branch3x3dbl_2 = BasicConv2d(448, 384, kernel_size=3, padding=1) 270 | self.branch3x3dbl_3a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1)) 271 | self.branch3x3dbl_3b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0)) 272 | 273 | self.branch_pool = BasicConv2d(in_channels, 192, kernel_size=1) 274 | 275 | def forward(self, x): 276 | branch1x1 = self.branch1x1(x) 277 | 278 | branch3x3 = self.branch3x3_1(x) 279 | branch3x3 = [ 280 | self.branch3x3_2a(branch3x3), 281 | self.branch3x3_2b(branch3x3), 282 | ] 283 | branch3x3 = torch.cat(branch3x3, 1) 284 | 285 | branch3x3dbl = self.branch3x3dbl_1(x) 286 | branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) 287 | branch3x3dbl = [ 288 | self.branch3x3dbl_3a(branch3x3dbl), 289 | self.branch3x3dbl_3b(branch3x3dbl), 290 | ] 291 | branch3x3dbl = torch.cat(branch3x3dbl, 1) 292 | 293 | branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) 294 | branch_pool = self.branch_pool(branch_pool) 295 | 296 | outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool] 297 | return torch.cat(outputs, 1) 298 | 299 | 300 | class InceptionAux(nn.Module): 301 | def __init__(self, in_channels, num_classes): 302 | super(InceptionAux, self).__init__() 303 | self.conv0 = BasicConv2d(in_channels, 128, kernel_size=1) 304 | self.conv1 = BasicConv2d(128, 768, kernel_size=5) 305 | self.conv1.stddev = 0.01 306 | self.fc = nn.Linear(768, num_classes) 307 | self.fc.stddev = 0.001 308 | 309 | def forward(self, x): 310 | # N x 768 x 17 x 17 311 | x = F.avg_pool2d(x, kernel_size=5, stride=3) 312 | # N x 768 x 5 x 5 313 | x = self.conv0(x) 314 | # N x 128 x 5 x 5 315 | x = self.conv1(x) 316 | # N x 768 x 1 x 1 317 | # Adaptive average pooling 318 | x = F.adaptive_avg_pool2d(x, (1, 1)) 319 | # N x 768 x 1 x 1 320 | x = x.view(x.size(0), -1) 321 | # N x 768 322 | x = self.fc(x) 323 | # N x 1000 324 | return x 325 | 326 | 327 | class BasicConv2d(nn.Module): 328 | def __init__(self, in_channels, out_channels, **kwargs): 329 | super(BasicConv2d, self).__init__() 330 | self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs) 331 | self.bn = nn.BatchNorm2d(out_channels, eps=0.001) 332 | 333 | def forward(self, x): 334 | x = self.conv(x) 335 | x = self.bn(x) 336 | return F.relu(x, inplace=True) 337 | -------------------------------------------------------------------------------- /src/AESPA_Baseline.py: -------------------------------------------------------------------------------- 1 | from util import * 2 | from custom_module import * 3 | from pretrained_model import * 4 | torch.manual_seed(0) 5 | global_config = load_model_yaml("./global_config/", "global_config.yaml") 6 | 7 | global G_model 8 | global R_model 9 | global B_model 10 | global E_model 11 | global S_model 12 | 13 | 14 | def print_log_to_file(log_file, config, train_log, type, before_acc, swa_log): 15 | with open(log_file, "a") as f: 16 | print(" \n", file=f) 17 | print("config: ", file = f) 18 | print("before_acc: " + str(before_acc), file=f) 19 | print(config, file= f) 20 | if(train_log): 21 | acc_log_list = train_log["train_result"]["va"] 22 | if(type == "s" or type == "e"): 23 | end_i = len(acc_log_list) 24 | elif(type == "b"): 25 | end_i = train_log["best_index"] + 1 26 | for i in range(end_i): 27 | print("acc: " + str(acc_log_list[i]), file = f) 28 | print("swa: "+str(swa_log), file= f) 29 | 30 | 31 | def get_optimizer( 32 | model: torch.nn.Module, config: Dict[str, Any] 33 | ) -> torch.optim.Optimizer: 34 | """ 35 | Returns the optimizer initializer according to the config 36 | 37 | Note: config has a minimum of three entries. 38 | Feel free to add more entries if you want. 39 | But do not change the name of the three existing entries 40 | 41 | Args: 42 | - model: the model to optimize for 43 | - config: a dictionary containing parameters for the config 44 | Returns: 45 | - optimizer: the optimizer 46 | """ 47 | 48 | optimizer = None 49 | 50 | optimizer_type = config.get("optimizer_type", "sgd") 51 | learning_rate = config.get("lr", 0) 52 | weight_decay = config.get("weight_decay", 0) 53 | momentum = 0 54 | dampening = 0 55 | 56 | print(learning_rate) 57 | print(weight_decay) 58 | print(optimizer_type) 59 | if optimizer_type=="sgd": 60 | optimizer = torch.optim.SGD(params=model.parameters(), lr=learning_rate, weight_decay=weight_decay, momentum = momentum, dampening = dampening) 61 | elif optimizer_type=="adam": 62 | optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate, weight_decay=weight_decay) 63 | 64 | return optimizer 65 | 66 | 67 | def compute_accuracy(logits: torch.Tensor, labels: torch.Tensor) -> float: 68 | """Compute the accuracy given the prediction logits and the ground-truth labels 69 | 70 | Args: 71 | logits: The output of the forward pass through the model. 72 | for K classes logits[k] (where 0 <= k < K) corresponds to the 73 | log-odds of class `k` being the correct one. 74 | Shape: (batch_size, num_classes) 75 | labels: The ground truth label for each instance in the batch 76 | Shape: (batch_size) 77 | Returns: 78 | accuracy: The accuracy of the predicted logits 79 | (number of correct predictions / total number of examples) 80 | """ 81 | batch_accuracy = 0.0 82 | num_data = logits.size()[0] 83 | for _i in range(num_data): 84 | nn_inference_label = torch.argmax(logits[_i]) 85 | if(labels[_i] == nn_inference_label): 86 | batch_accuracy += 1.0 87 | 88 | batch_accuracy = batch_accuracy / num_data 89 | 90 | return batch_accuracy 91 | 92 | 93 | def compute_loss( 94 | model: nn.Module, 95 | model_output: torch.Tensor, 96 | target_labels: torch.Tensor, 97 | is_normalize: bool = True, 98 | ) -> torch.Tensor: 99 | """ 100 | Computes the loss between the model output and the target labels 101 | 102 | Args: 103 | - model: a model (which inherits from nn.Module) 104 | - model_output: the raw scores output by the net 105 | - target_labels: the ground truth class labels 106 | - is_normalize: bool flag indicating that loss should be divided by the batch size 107 | Returns: 108 | - the loss value 109 | """ 110 | loss = None 111 | 112 | criterion = nn.CrossEntropyLoss() 113 | loss = criterion(model_output, target_labels) 114 | #loss = model.loss_criterion(model_output, target_labels) 115 | 116 | if(is_normalize): 117 | loss = loss / model_output.size()[0] 118 | 119 | return loss 120 | 121 | 122 | class Trainer: 123 | """Class that stores model training metadata.""" 124 | def __init__( 125 | self, 126 | #data_dir: str, 127 | model: nn.Module, 128 | optimizer: Optimizer, 129 | train_loader: torch.utils.data.DataLoader, 130 | val_loader: torch.utils.data.DataLoader, 131 | model_dir: str = "None", 132 | load_from_disk: bool = True, 133 | cuda: bool = True, 134 | lr_scheduler = None, 135 | no_bn_track: bool = True, 136 | 137 | ) -> None: 138 | 139 | self.model_dir = model_dir 140 | self.model = model 141 | self.lr_scheduler = lr_scheduler 142 | self.no_bn_track = no_bn_track 143 | 144 | self.cuda = cuda 145 | if cuda: 146 | self.model.cuda() 147 | 148 | self.train_loader = train_loader 149 | self.val_loader = val_loader 150 | 151 | self.optimizer = optimizer 152 | 153 | self.train_loss_history = [] 154 | self.validation_loss_history = [] 155 | self.train_accuracy_history = [] 156 | self.validation_accuracy_history = [] 157 | 158 | # load the model from the disk if it exists 159 | if os.path.exists(model_dir) and load_from_disk: 160 | checkpoint = torch.load(os.path.join(self.model_dir, "checkpoint.pt")) 161 | self.model.load_state_dict(checkpoint["model_state_dict"]) 162 | self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) 163 | 164 | self.model.train() 165 | 166 | 167 | def save_model(self) -> None: 168 | """ 169 | Saves the model state and optimizer state on the dict 170 | """ 171 | torch.save( 172 | { 173 | "model_state_dict": self.model.state_dict(), 174 | "optimizer_state_dict": self.optimizer.state_dict(), 175 | }, 176 | os.path.join(self.model_dir, "checkpoint.pt"), 177 | ) 178 | 179 | 180 | def run_training_loop(self, num_epochs: int, swa_pack = None) -> None: 181 | """Train for num_epochs, and validate after every epoch.""" 182 | 183 | best_val = 0.0 184 | best_loss = 100 185 | best_epoch_i = 0 186 | train_result = {"tl": [], "vl": [], "ta" :[], "va":[]} 187 | 188 | for epoch_idx in range(num_epochs): 189 | train_loss, train_acc = self.train_epoch() 190 | self.train_loss_history.append(train_loss) 191 | self.train_accuracy_history.append(train_acc) 192 | val_loss, val_acc = self.validate() 193 | self.validation_loss_history.append(val_loss) 194 | self.validation_accuracy_history.append(val_acc) 195 | 196 | if(swa_pack != None and num_epochs > swa_pack[2]): 197 | swa_pack[0].update_parameters(self.model) 198 | swa_pack[1].step() 199 | if(self.lr_scheduler): 200 | self.lr_scheduler.step(val_acc) 201 | 202 | train_result["tl"].append(train_loss) 203 | train_result["vl"].append(val_loss) 204 | train_result["ta"].append(train_acc) 205 | train_result["va"].append(val_acc) 206 | 207 | print( 208 | f"Epoch:{epoch_idx + 1}" 209 | + f" Train Loss:{train_loss:.4f}" 210 | + f" Val Loss: {val_loss:.4f}" 211 | + f" Train Accuracy: {train_acc:.4f}" 212 | + f" Validation Accuracy: {val_acc:.4f}" 213 | ) 214 | 215 | global B_model 216 | if(val_acc > best_val): 217 | best_val = val_acc 218 | best_loss = val_loss 219 | # B_model = copy.deepcopy(self.model) 220 | best_epoch_i = epoch_idx 221 | elif(val_acc == best_val and val_loss < best_loss): 222 | best_val = val_acc 223 | best_loss = val_loss 224 | # B_model = copy.deepcopy(self.model) 225 | best_epoch_i = epoch_idx 226 | 227 | return_pack={"train_result" : train_result, "best_index": best_epoch_i} 228 | return return_pack 229 | 230 | 231 | def train_epoch(self) -> Tuple[float, float]: 232 | """Implements the main training loop.""" 233 | self.model.train() 234 | 235 | if(self.no_bn_track): 236 | self.disable_traking_bn() 237 | 238 | train_loss_meter = AverageMeter("train loss") 239 | train_acc_meter = AverageMeter("train accuracy") 240 | 241 | # loop over each minibatch 242 | for (x, y) in self.train_loader: 243 | if self.cuda: 244 | x = x.cuda() 245 | y = y.cuda() 246 | 247 | n = x.shape[0] 248 | logits = self.model(x) 249 | batch_acc = compute_accuracy(logits, y) 250 | train_acc_meter.update(val=batch_acc, n=n) 251 | 252 | batch_loss = compute_loss(self.model, logits, y, is_normalize=True) 253 | train_loss_meter.update(val=float(batch_loss.cpu().item()), n=n) 254 | 255 | self.optimizer.zero_grad() 256 | batch_loss.backward() 257 | self.optimizer.step() 258 | 259 | return train_loss_meter.avg, train_acc_meter.avg 260 | 261 | 262 | def validate(self) -> Tuple[float, float]: 263 | """Evaluate on held-out split (either val or test)""" 264 | self.model.eval() 265 | 266 | val_loss_meter = AverageMeter("val loss") 267 | val_acc_meter = AverageMeter("val accuracy") 268 | 269 | # loop over whole val set 270 | with torch.no_grad(): 271 | for (x, y) in self.val_loader: 272 | if self.cuda: 273 | x = x.cuda() 274 | y = y.cuda() 275 | 276 | n = x.shape[0] 277 | logits = self.model(x) 278 | 279 | batch_acc = compute_accuracy(logits, y) 280 | val_acc_meter.update(val=batch_acc, n=n) 281 | 282 | batch_loss = compute_loss(self.model, logits, y, is_normalize=True) 283 | val_loss_meter.update(val=float(batch_loss.cpu().item()), n=n) 284 | 285 | return val_loss_meter.avg, val_acc_meter.avg 286 | 287 | 288 | def disable_traking_bn(self): 289 | for layer in self.model.modules(): 290 | if isinstance(layer, nn.modules.BatchNorm2d): 291 | layer.eval() 292 | 293 | 294 | 295 | def AESAP_replace(model, valid_data_loader = None): 296 | sign_nest_dict = generate_sign_nest_dict(model) 297 | print(sign_nest_dict) 298 | global G_model 299 | G_model = copy.deepcopy(model) 300 | for key in sign_nest_dict: 301 | print(key) 302 | if(sign_nest_dict[key]["type"] == "MaxPool2d"): 303 | continue 304 | 305 | sign_dict = sign_nest_dict[key] 306 | bn_name = sign_dict["HerPN"] 307 | 308 | if(sign_dict["type"] == "ReLU" and sign_dict["HerPN"]): 309 | num_features = access_layer(G_model, bn_name).num_features 310 | BN_dimension = 2 311 | my_layer = HerPN2d(num_features, BN_dimension) 312 | else: 313 | print("Error: Replce Pair Can't Find") 314 | # assert(False, "Replce Pair Error") 315 | 316 | layer_name = key 317 | layer_dict = sign_nest_dict[key] 318 | replace_module = my_layer 319 | 320 | # print(layer_dict) 321 | 322 | replace_layer(model, layer_name, replace_module) 323 | if(sign_nest_dict[layer_name]["HerPN"]): 324 | replace_layer(model, sign_nest_dict[layer_name]["HerPN"], nn.Identity()) 325 | 326 | if(valid_data_loader): 327 | validate(model, valid_data_loader, "cuda:0") 328 | 329 | print(model) 330 | return model 331 | 332 | 333 | def AESPA_train(model, valid_data_loader, train_data_loader, config): 334 | layer_name = config["layer_name"] 335 | num_epochs = config["ep"] 336 | 337 | optimizer_name = "adam" 338 | learning_rate = config["lr"] 339 | weight_decay = config["wd"] 340 | learning_rate_decay = True 341 | no_bn_track = False 342 | my_model = model 343 | 344 | print("Name: " + layer_name) 345 | before_result = validate(my_model, valid_data_loader, "cuda:0") 346 | 347 | print(layer_name +": train") 348 | 349 | optimizer_config = {"optimizer_type": "adam", "lr": learning_rate, "weight_decay": weight_decay} 350 | optimizer = get_optimizer(my_model, optimizer_config) 351 | scheduler = ReduceLROnPlateau(optimizer, 'max', patience = 2, eps=1e-10) 352 | if(learning_rate_decay): 353 | lr_scheduler = scheduler 354 | else: 355 | lr_scheduler = None 356 | trainer = Trainer( 357 | model=my_model, 358 | optimizer=optimizer, 359 | load_from_disk=False, 360 | cuda=True, 361 | lr_scheduler = lr_scheduler, 362 | no_bn_track = no_bn_track, 363 | train_loader = train_data_loader, 364 | val_loader = valid_data_loader 365 | 366 | ) 367 | 368 | print("Layer name: " + layer_name) 369 | print("Parameter: ") 370 | print("Optimizer: " + optimizer_name) 371 | print("No batchnorm tracking: " +str(no_bn_track)) 372 | print("\tLearning rate: " + str(learning_rate)) 373 | print("\tWeight decay: " + str(weight_decay)) 374 | print("\tLearning rate decay: " + str(learning_rate_decay)) 375 | print("\n \n") 376 | 377 | print("Train epoch: ") 378 | train_return_pack = trainer.run_training_loop(num_epochs=num_epochs) 379 | train_result = validate(my_model, valid_data_loader, "cuda:0") 380 | global E_model 381 | E_model = copy.deepcopy(my_model) 382 | print("\n \n") 383 | print("Validation result:") 384 | print("\tbefore: " + str(before_result)) 385 | print("\ttrain: "+ str(train_result)) 386 | print("\n \n \n \n") 387 | 388 | 389 | 390 | 391 | 392 | if __name__ == "__main__": 393 | parser = ArgumentParser() 394 | parser.add_argument("--model", type=str,choices=["vgg19_bn", "resnet18", "resnet32", "resnet20"]) 395 | parser.add_argument("--dataset", type=str,choices=["cifar10", "imagenet_1k", "cifar100"]) 396 | parser.add_argument("-wd", "--working_directory", type=str, default="./working_dirctory/") 397 | parser.add_argument("-lr", "--learning_rate", type = float, default = 1e-6) 398 | 399 | args = parser.parse_args() 400 | print(args) 401 | valid_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "valid", data_dir = global_config["Global"]["dataset_dirctory"]) 402 | train_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "train", data_dir = global_config["Global"]["dataset_dirctory"]) 403 | model = get_pretrained_model(model_name=args.model, dataset=args.dataset) 404 | print(model) 405 | validate(model, valid_data_loader, "cuda:0") 406 | AESAP_replace(model) 407 | lr_c = args.learning_rate 408 | param_config = {"layer_name": "Whole Model", 409 | "ep" : 5, 410 | "lr" : lr_c, 411 | "wd" : 0.01} 412 | AESPA_train(model, valid_data_loader, train_data_loader, param_config) 413 | 414 | -------------------------------------------------------------------------------- /expriments/fig9/PR_AT_2f12g1.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | config: 5 | before_acc: 0.69 6 | {'layer_name': 'relu', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['relu'], 'do': False, 'lh': 0, 'lt': 'n'} 7 | 8 | 9 | config: 10 | before_acc: 0.69 11 | {'layer_name': 'relu', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['relu'], 'do': False, 'lh': 0, 'lt': 'n'} 12 | acc: 0.69 13 | acc: 0.69 14 | acc: 0.69 15 | acc: 0.689 16 | acc: 0.691 17 | acc: 0.691 18 | acc: 0.691 19 | acc: 0.691 20 | acc: 0.691 21 | acc: 0.691 22 | acc: 0.691 23 | acc: 0.69 24 | acc: 0.691 25 | acc: 0.691 26 | acc: 0.691 27 | acc: 0.691 28 | acc: 0.691 29 | acc: 0.691 30 | acc: 0.691 31 | swa: None 32 | 33 | 34 | config: 35 | before_acc: 0.691 36 | {'layer_name': 'relu', 'ep': 20, 'lr': 1e-05, 'wd': 0.1, 'tw': False, 'twe': ['conv1', 'bn1'], 'tc': False, 'tce': [], 'do': False, 'lh': 0, 'lt': 'b'} 37 | acc: 0.689 38 | acc: 0.69 39 | acc: 0.691 40 | acc: 0.691 41 | acc: 0.691 42 | acc: 0.693 43 | acc: 0.693 44 | acc: 0.695 45 | acc: 0.697 46 | swa: None 47 | 48 | 49 | config: 50 | before_acc: 0.6459999999999999 51 | {'layer_name': 'maxpool', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['maxpool'], 'do': False, 'lh': 0, 'lt': 'n'} 52 | 53 | 54 | config: 55 | before_acc: 0.6459999999999999 56 | {'layer_name': 'maxpool', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['maxpool'], 'do': False, 'lh': 0, 'lt': 'n'} 57 | acc: 0.645 58 | acc: 0.645 59 | acc: 0.645 60 | acc: 0.645 61 | acc: 0.644 62 | acc: 0.644 63 | acc: 0.644 64 | acc: 0.643 65 | acc: 0.643 66 | acc: 0.642 67 | acc: 0.641 68 | acc: 0.641 69 | acc: 0.641 70 | acc: 0.641 71 | acc: 0.64 72 | acc: 0.64 73 | acc: 0.641 74 | acc: 0.641 75 | acc: 0.641 76 | acc: 0.641 77 | swa: 0.6890000000000001 78 | 79 | 80 | config: 81 | before_acc: 0.6890000000000001 82 | {'layer_name': 'maxpool', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['maxpool'], 'do': False, 'lh': 0, 'lt': 's'} 83 | acc: 0.689 84 | acc: 0.688 85 | acc: 0.688 86 | acc: 0.689 87 | acc: 0.689 88 | acc: 0.689 89 | acc: 0.689 90 | acc: 0.689 91 | acc: 0.688 92 | acc: 0.688 93 | acc: 0.688 94 | acc: 0.69 95 | acc: 0.69 96 | acc: 0.69 97 | acc: 0.688 98 | acc: 0.688 99 | acc: 0.689 100 | acc: 0.688 101 | acc: 0.688 102 | acc: 0.688 103 | swa: 0.691 104 | 105 | 106 | config: 107 | before_acc: 0.6890000000000001 108 | {'layer_name': 'layer1.0.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer1.0.relu1'], 'do': False, 'lh': 0, 'lt': 'n'} 109 | 110 | 111 | config: 112 | before_acc: 0.6890000000000001 113 | {'layer_name': 'layer1.0.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer1.0.relu1'], 'do': False, 'lh': 0, 'lt': 'n'} 114 | acc: 0.689 115 | acc: 0.689 116 | acc: 0.689 117 | acc: 0.689 118 | acc: 0.689 119 | acc: 0.691 120 | acc: 0.691 121 | acc: 0.691 122 | acc: 0.692 123 | acc: 0.692 124 | acc: 0.693 125 | acc: 0.693 126 | acc: 0.694 127 | swa: None 128 | 129 | 130 | config: 131 | before_acc: 0.688 132 | {'layer_name': 'layer1.0.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer1.0.relu2'], 'do': False, 'lh': 0, 'lt': 'n'} 133 | 134 | 135 | config: 136 | before_acc: 0.685 137 | {'layer_name': 'layer1.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer1.1.relu1'], 'do': False, 'lh': 0, 'lt': 'n'} 138 | 139 | 140 | config: 141 | before_acc: 0.685 142 | {'layer_name': 'layer1.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer1.1.relu1'], 'do': False, 'lh': 0, 'lt': 'n'} 143 | acc: 0.686 144 | acc: 0.686 145 | acc: 0.686 146 | acc: 0.686 147 | acc: 0.685 148 | acc: 0.685 149 | acc: 0.685 150 | acc: 0.685 151 | acc: 0.687 152 | acc: 0.687 153 | acc: 0.687 154 | acc: 0.687 155 | acc: 0.687 156 | acc: 0.687 157 | acc: 0.687 158 | acc: 0.687 159 | acc: 0.687 160 | acc: 0.688 161 | swa: None 162 | 163 | 164 | config: 165 | before_acc: 0.6829999999999999 166 | {'layer_name': 'layer1.1.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer1.1.relu2'], 'do': False, 'lh': 0, 'lt': 'n'} 167 | 168 | 169 | config: 170 | before_acc: 0.6829999999999999 171 | {'layer_name': 'layer1.1.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer1.1.relu2'], 'do': False, 'lh': 0, 'lt': 'n'} 172 | acc: 0.683 173 | acc: 0.683 174 | acc: 0.683 175 | acc: 0.683 176 | acc: 0.683 177 | acc: 0.683 178 | acc: 0.683 179 | acc: 0.683 180 | acc: 0.683 181 | acc: 0.683 182 | acc: 0.683 183 | acc: 0.683 184 | acc: 0.682 185 | acc: 0.682 186 | acc: 0.683 187 | acc: 0.683 188 | acc: 0.684 189 | acc: 0.683 190 | acc: 0.683 191 | acc: 0.684 192 | swa: 0.6890000000000001 193 | 194 | 195 | config: 196 | before_acc: 0.6890000000000001 197 | {'layer_name': 'layer1.1.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer1.1.relu2'], 'do': False, 'lh': 0, 'lt': 's'} 198 | acc: 0.689 199 | acc: 0.689 200 | acc: 0.689 201 | acc: 0.69 202 | acc: 0.69 203 | acc: 0.69 204 | acc: 0.69 205 | acc: 0.691 206 | acc: 0.691 207 | acc: 0.691 208 | acc: 0.691 209 | acc: 0.691 210 | acc: 0.691 211 | acc: 0.691 212 | acc: 0.691 213 | acc: 0.691 214 | acc: 0.692 215 | acc: 0.692 216 | acc: 0.693 217 | swa: None 218 | 219 | 220 | config: 221 | before_acc: 0.693 222 | {'layer_name': 'layer1.1.relu2', 'ep': 20, 'lr': 1e-05, 'wd': 0.1, 'tw': False, 'twe': ['layer1.1.conv2', 'layer1.1.bn2'], 'tc': False, 'tce': [], 'do': False, 'lh': 0, 'lt': 'b'} 223 | acc: 0.694 224 | swa: None 225 | 226 | 227 | config: 228 | before_acc: 0.6890000000000001 229 | {'layer_name': 'layer2.0.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer2.0.relu1'], 'do': False, 'lh': 0, 'lt': 'n'} 230 | 231 | 232 | config: 233 | before_acc: 0.6890000000000001 234 | {'layer_name': 'layer2.0.relu1', 'ep': 20, 'lr': 1e-05, 'wd': 0.1, 'tw': False, 'twe': ['layer2.0.conv1', 'layer2.0.bn1'], 'tc': False, 'tce': [], 'do': False, 'lh': 0, 'lt': 'n'} 235 | acc: 0.69 236 | acc: 0.69 237 | acc: 0.689 238 | acc: 0.688 239 | acc: 0.69 240 | acc: 0.688 241 | acc: 0.685 242 | acc: 0.686 243 | acc: 0.687 244 | acc: 0.684 245 | acc: 0.685 246 | acc: 0.686 247 | acc: 0.687 248 | acc: 0.686 249 | acc: 0.687 250 | acc: 0.687 251 | acc: 0.687 252 | acc: 0.689 253 | acc: 0.688 254 | acc: 0.688 255 | swa: 0.69 256 | 257 | 258 | config: 259 | before_acc: 0.691 260 | {'layer_name': 'layer2.0.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer2.0.relu2'], 'do': False, 'lh': 0, 'lt': 'n'} 261 | 262 | 263 | config: 264 | before_acc: 0.68 265 | {'layer_name': 'layer2.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer2.1.relu1'], 'do': False, 'lh': 0, 'lt': 'n'} 266 | 267 | 268 | config: 269 | before_acc: 0.68 270 | {'layer_name': 'layer2.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer2.1.relu1'], 'do': False, 'lh': 0, 'lt': 'n'} 271 | acc: 0.68 272 | acc: 0.679 273 | acc: 0.679 274 | acc: 0.679 275 | acc: 0.679 276 | acc: 0.679 277 | acc: 0.68 278 | acc: 0.68 279 | acc: 0.68 280 | acc: 0.68 281 | acc: 0.68 282 | acc: 0.68 283 | acc: 0.68 284 | acc: 0.68 285 | acc: 0.68 286 | acc: 0.68 287 | acc: 0.68 288 | acc: 0.68 289 | acc: 0.68 290 | acc: 0.679 291 | swa: 0.6829999999999999 292 | 293 | 294 | config: 295 | before_acc: 0.6829999999999999 296 | {'layer_name': 'layer2.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer2.1.relu1'], 'do': False, 'lh': 0, 'lt': 's'} 297 | acc: 0.683 298 | acc: 0.683 299 | acc: 0.683 300 | acc: 0.683 301 | acc: 0.683 302 | acc: 0.683 303 | acc: 0.682 304 | acc: 0.682 305 | acc: 0.682 306 | acc: 0.681 307 | acc: 0.683 308 | acc: 0.683 309 | acc: 0.683 310 | acc: 0.684 311 | acc: 0.684 312 | acc: 0.684 313 | acc: 0.684 314 | acc: 0.684 315 | acc: 0.684 316 | acc: 0.684 317 | swa: 0.684 318 | 319 | 320 | config: 321 | before_acc: 0.684 322 | {'layer_name': 'layer2.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer2.1.relu1'], 'do': False, 'lh': 0, 'lt': 's'} 323 | acc: 0.684 324 | acc: 0.683 325 | acc: 0.683 326 | acc: 0.683 327 | acc: 0.683 328 | acc: 0.683 329 | acc: 0.683 330 | acc: 0.683 331 | acc: 0.683 332 | acc: 0.684 333 | acc: 0.684 334 | acc: 0.683 335 | acc: 0.683 336 | acc: 0.683 337 | acc: 0.683 338 | acc: 0.684 339 | acc: 0.684 340 | acc: 0.684 341 | acc: 0.684 342 | acc: 0.684 343 | swa: 0.685 344 | 345 | 346 | config: 347 | before_acc: 0.685 348 | {'layer_name': 'layer2.1.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer2.1.relu2'], 'do': False, 'lh': 0, 'lt': 'n'} 349 | 350 | 351 | config: 352 | before_acc: 0.685 353 | {'layer_name': 'layer2.1.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer2.1.relu2'], 'do': False, 'lh': 0, 'lt': 'n'} 354 | acc: 0.685 355 | acc: 0.685 356 | acc: 0.685 357 | acc: 0.686 358 | acc: 0.686 359 | acc: 0.687 360 | acc: 0.687 361 | acc: 0.687 362 | acc: 0.687 363 | acc: 0.687 364 | acc: 0.687 365 | acc: 0.688 366 | swa: None 367 | 368 | 369 | config: 370 | before_acc: 0.684 371 | {'layer_name': 'layer3.0.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer3.0.relu1'], 'do': False, 'lh': 0, 'lt': 'n'} 372 | 373 | 374 | config: 375 | before_acc: 0.684 376 | {'layer_name': 'layer3.0.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer3.0.relu2'], 'do': False, 'lh': 0, 'lt': 'n'} 377 | 378 | 379 | config: 380 | before_acc: 0.684 381 | {'layer_name': 'layer3.0.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer3.0.relu2'], 'do': False, 'lh': 0, 'lt': 'n'} 382 | acc: 0.684 383 | acc: 0.684 384 | acc: 0.684 385 | acc: 0.684 386 | acc: 0.685 387 | acc: 0.685 388 | acc: 0.685 389 | acc: 0.685 390 | acc: 0.685 391 | acc: 0.685 392 | acc: 0.685 393 | acc: 0.685 394 | acc: 0.685 395 | acc: 0.685 396 | acc: 0.685 397 | acc: 0.685 398 | acc: 0.685 399 | acc: 0.685 400 | acc: 0.684 401 | acc: 0.684 402 | swa: 0.685 403 | 404 | 405 | config: 406 | before_acc: 0.685 407 | {'layer_name': 'layer3.0.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer3.0.relu2'], 'do': False, 'lh': 0, 'lt': 's'} 408 | acc: 0.685 409 | acc: 0.685 410 | acc: 0.684 411 | acc: 0.684 412 | acc: 0.684 413 | acc: 0.684 414 | acc: 0.684 415 | acc: 0.684 416 | acc: 0.685 417 | acc: 0.686 418 | acc: 0.686 419 | acc: 0.686 420 | swa: None 421 | 422 | 423 | config: 424 | before_acc: 0.6829999999999999 425 | {'layer_name': 'layer3.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer3.1.relu1'], 'do': False, 'lh': 0, 'lt': 'n'} 426 | 427 | 428 | config: 429 | before_acc: 0.6829999999999999 430 | {'layer_name': 'layer3.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer3.1.relu1'], 'do': False, 'lh': 0, 'lt': 'n'} 431 | acc: 0.683 432 | acc: 0.681 433 | acc: 0.68 434 | acc: 0.68 435 | acc: 0.68 436 | acc: 0.68 437 | acc: 0.681 438 | acc: 0.681 439 | acc: 0.681 440 | acc: 0.681 441 | acc: 0.681 442 | acc: 0.68 443 | acc: 0.68 444 | acc: 0.68 445 | acc: 0.679 446 | acc: 0.679 447 | acc: 0.679 448 | acc: 0.679 449 | acc: 0.679 450 | acc: 0.678 451 | swa: 0.6859999999999999 452 | 453 | 454 | config: 455 | before_acc: 0.6859999999999999 456 | {'layer_name': 'layer3.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer3.1.relu1'], 'do': False, 'lh': 0, 'lt': 's'} 457 | acc: 0.686 458 | acc: 0.686 459 | acc: 0.686 460 | acc: 0.686 461 | acc: 0.686 462 | acc: 0.686 463 | acc: 0.687 464 | acc: 0.687 465 | acc: 0.688 466 | acc: 0.688 467 | acc: 0.687 468 | acc: 0.687 469 | acc: 0.687 470 | acc: 0.687 471 | acc: 0.686 472 | acc: 0.686 473 | acc: 0.685 474 | acc: 0.686 475 | acc: 0.684 476 | acc: 0.684 477 | swa: 0.688 478 | 479 | 480 | config: 481 | before_acc: 0.688 482 | {'layer_name': 'layer3.1.relu1', 'ep': 20, 'lr': 1e-05, 'wd': 0.1, 'tw': False, 'twe': ['layer3.0.downsample.0', 'layer3.0.downsample.1', 'layer3.1.conv1', 'layer3.1.bn1'], 'tc': False, 'tce': [], 'do': False, 'lh': 0, 'lt': 's'} 483 | acc: 0.689 484 | acc: 0.689 485 | acc: 0.688 486 | acc: 0.687 487 | acc: 0.686 488 | acc: 0.687 489 | acc: 0.687 490 | acc: 0.686 491 | acc: 0.685 492 | acc: 0.686 493 | acc: 0.686 494 | acc: 0.685 495 | acc: 0.684 496 | acc: 0.683 497 | acc: 0.683 498 | acc: 0.682 499 | acc: 0.684 500 | acc: 0.683 501 | acc: 0.685 502 | acc: 0.685 503 | swa: 0.6890000000000001 504 | 505 | 506 | config: 507 | before_acc: 0.6890000000000001 508 | {'layer_name': 'layer3.1.relu1', 'ep': 20, 'lr': 1e-05, 'wd': 0.1, 'tw': False, 'twe': ['layer3.0.downsample.0', 'layer3.0.downsample.1', 'layer3.1.conv1', 'layer3.1.bn1'], 'tc': False, 'tce': [], 'do': False, 'lh': 0, 'lt': 's'} 509 | acc: 0.688 510 | acc: 0.69 511 | acc: 0.689 512 | acc: 0.685 513 | acc: 0.684 514 | acc: 0.686 515 | acc: 0.688 516 | acc: 0.689 517 | acc: 0.689 518 | acc: 0.69 519 | swa: None 520 | 521 | 522 | config: 523 | before_acc: 0.687 524 | {'layer_name': 'layer3.1.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer3.1.relu2'], 'do': False, 'lh': 0, 'lt': 'n'} 525 | 526 | 527 | config: 528 | before_acc: 0.687 529 | {'layer_name': 'layer3.1.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer3.1.relu2'], 'do': False, 'lh': 0, 'lt': 'n'} 530 | acc: 0.687 531 | acc: 0.688 532 | swa: None 533 | 534 | 535 | config: 536 | before_acc: 0.688 537 | {'layer_name': 'layer3.1.relu2', 'ep': 20, 'lr': 1e-05, 'wd': 0.1, 'tw': False, 'twe': ['layer3.1.conv2', 'layer3.1.bn2'], 'tc': False, 'tce': [], 'do': False, 'lh': 0, 'lt': 'b'} 538 | acc: 0.687 539 | acc: 0.685 540 | acc: 0.684 541 | acc: 0.683 542 | acc: 0.682 543 | acc: 0.68 544 | acc: 0.681 545 | acc: 0.681 546 | acc: 0.681 547 | acc: 0.681 548 | acc: 0.681 549 | acc: 0.68 550 | acc: 0.677 551 | acc: 0.678 552 | acc: 0.682 553 | acc: 0.681 554 | acc: 0.681 555 | acc: 0.681 556 | acc: 0.681 557 | acc: 0.68 558 | swa: 0.69 559 | 560 | 561 | config: 562 | before_acc: 0.685 563 | {'layer_name': 'layer4.0.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer4.0.relu1'], 'do': False, 'lh': 0, 'lt': 'n'} 564 | 565 | 566 | config: 567 | before_acc: 0.685 568 | {'layer_name': 'layer4.0.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer4.0.relu1'], 'do': False, 'lh': 0, 'lt': 'n'} 569 | acc: 0.686 570 | acc: 0.686 571 | acc: 0.686 572 | acc: 0.686 573 | acc: 0.686 574 | acc: 0.685 575 | acc: 0.685 576 | acc: 0.685 577 | acc: 0.685 578 | acc: 0.685 579 | acc: 0.685 580 | acc: 0.685 581 | acc: 0.686 582 | acc: 0.686 583 | acc: 0.686 584 | acc: 0.686 585 | acc: 0.685 586 | acc: 0.685 587 | acc: 0.685 588 | acc: 0.686 589 | swa: 0.691 590 | 591 | 592 | config: 593 | before_acc: 0.691 594 | {'layer_name': 'layer4.0.relu1', 'ep': 20, 'lr': 1e-05, 'wd': 0.1, 'tw': False, 'twe': ['layer4.0.conv1', 'layer4.0.bn1'], 'tc': False, 'tce': [], 'do': False, 'lh': 0, 'lt': 's'} 595 | acc: 0.692 596 | swa: None 597 | 598 | 599 | config: 600 | before_acc: 0.685 601 | {'layer_name': 'layer4.0.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer4.0.relu2'], 'do': False, 'lh': 0, 'lt': 'n'} 602 | 603 | 604 | config: 605 | before_acc: 0.685 606 | {'layer_name': 'layer4.0.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer4.0.relu2'], 'do': False, 'lh': 0, 'lt': 'n'} 607 | acc: 0.686 608 | swa: None 609 | 610 | 611 | config: 612 | before_acc: 0.6829999999999999 613 | {'layer_name': 'layer4.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer4.1.relu1'], 'do': False, 'lh': 0, 'lt': 'n'} 614 | 615 | 616 | config: 617 | before_acc: 0.6829999999999999 618 | {'layer_name': 'layer4.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer4.1.relu1'], 'do': False, 'lh': 0, 'lt': 'n'} 619 | acc: 0.683 620 | acc: 0.683 621 | acc: 0.683 622 | acc: 0.683 623 | acc: 0.683 624 | acc: 0.683 625 | acc: 0.683 626 | acc: 0.683 627 | acc: 0.683 628 | acc: 0.684 629 | swa: None 630 | 631 | 632 | config: 633 | before_acc: 0.684 634 | {'layer_name': 'layer4.1.relu1', 'ep': 20, 'lr': 1e-05, 'wd': 0.1, 'tw': False, 'twe': ['layer4.0.downsample.0', 'layer4.0.downsample.1', 'layer4.1.conv1', 'layer4.1.bn1'], 'tc': False, 'tce': [], 'do': False, 'lh': 0, 'lt': 'b'} 635 | acc: 0.686 636 | acc: 0.686 637 | acc: 0.688 638 | acc: 0.689 639 | acc: 0.69 640 | acc: 0.691 641 | acc: 0.693 642 | acc: 0.694 643 | acc: 0.695 644 | acc: 0.696 645 | acc: 0.695 646 | acc: 0.695 647 | acc: 0.695 648 | acc: 0.696 649 | acc: 0.696 650 | acc: 0.698 651 | swa: None 652 | 653 | 654 | config: 655 | before_acc: 0.698 656 | {'layer_name': 'layer4.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer4.1.relu1'], 'do': False, 'lh': 0, 'lt': 'b'} 657 | acc: 0.698 658 | acc: 0.698 659 | acc: 0.698 660 | acc: 0.698 661 | acc: 0.698 662 | acc: 0.699 663 | swa: None 664 | 665 | 666 | config: 667 | before_acc: 0.695 668 | {'layer_name': 'layer4.1.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer4.1.relu2'], 'do': False, 'lh': 0, 'lt': 'n'} 669 | 670 | 671 | config: 672 | before_acc: 0.695 673 | {'layer_name': 'layer4.1.relu2', 'ep': 20, 'lr': 1e-05, 'wd': 0.1, 'tw': False, 'twe': ['layer4.1.conv2', 'layer4.1.bn2'], 'tc': False, 'tce': [], 'do': False, 'lh': 0, 'lt': 'n'} 674 | acc: 0.697 675 | acc: 0.698 676 | acc: 0.698 677 | acc: 0.697 678 | acc: 0.698 679 | swa: None 680 | --------------------------------------------------------------------------------