├── SmartPAF.pdf
├── .gitignore
├── image
    ├── RelatedWork.png
    └── secure_ML_inference.png
├── expriments
    ├── convergence_curve.pdf
    └── fig9
    │   └── PR_AT_2f12g1.log
├── src
    ├── PyTorch_CIFAR10
    │   ├── download_weights.sh
    │   ├── LICENSE
    │   ├── module.py
    │   ├── data.py
    │   ├── train.py
    │   ├── README.md
    │   ├── cifar10_models
    │   │   ├── resnet_orig.py
    │   │   ├── mobilenetv2.py
    │   │   ├── vgg.py
    │   │   ├── densenet.py
    │   │   ├── googlenet.py
    │   │   ├── resnet.py
    │   │   └── inception.py
    │   └── schduler.py
    ├── global_config
    │   └── global_config.yaml
    ├── lib.py
    ├── pretrained_model.py
    ├── SS.py
    ├── CT_AESPA.py
    ├── CT_sign_SiLU.py
    ├── CT_cvnet_sign_SiLU.py
    ├── mobilevit_v2.py
    ├── resnet_model_2.py
    ├── CT_cvnet.py
    ├── CT_cvnet_bn.py
    ├── CT.py
    ├── custom_module.py
    └── AESPA_Baseline.py
├── log
    ├── CT_val_o.log
    ├── PA_CT_AT_o7.log
    ├── CT_val_c7.log
    └── CT_val_o7.log
├── LICENSE
└── README.md


/SmartPAF.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EfficientPPML/SmartPAF/HEAD/SmartPAF.pdf


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | PyTorch_CIFAR10
2 | *.__pycache__
3 | src/PyTorch_CIFAR10/cifar10_models/state_dicts


--------------------------------------------------------------------------------
/image/RelatedWork.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EfficientPPML/SmartPAF/HEAD/image/RelatedWork.png


--------------------------------------------------------------------------------
/image/secure_ML_inference.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EfficientPPML/SmartPAF/HEAD/image/secure_ML_inference.png


--------------------------------------------------------------------------------
/expriments/convergence_curve.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EfficientPPML/SmartPAF/HEAD/expriments/convergence_curve.pdf


--------------------------------------------------------------------------------
/src/PyTorch_CIFAR10/download_weights.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python3 train.py --download_weights 1
3 | rm -rf state_dicts.zip


--------------------------------------------------------------------------------
/src/global_config/global_config.yaml:
--------------------------------------------------------------------------------
1 | PA_AT:
2 |   dropout_enable: False
3 |   group_epochs: 20
4 | Global:
5 |   dataset_dirctory: "/usr/scratch/jianming/PAF_test/dataset/"


--------------------------------------------------------------------------------
/log/CT_val_o.log:
--------------------------------------------------------------------------------
1 | Namespace(model='mobileVitV2', dataset='imagenet_1k', sign_type='polyfit', data_collection=False, working_directory='/home/jianming/work/SmartPAF/cvnet_work/')
2 | 2024-01-22 08:37:17 - [93m[1mDEBUG   [0m - Cannot load internal arguments, skipping.
3 | 


--------------------------------------------------------------------------------
/log/PA_CT_AT_o7.log:
--------------------------------------------------------------------------------
1 | Namespace(model='mobileVitV2', dataset='imagenet_1k', sign_type='polyfit', working_directory='/home/jianming/work/SmartPAF/cvnet_work2/', start_layer_name='None', max_counter=1000, learning_rate=0.0001)
2 | 2024-01-22 18:02:28 - [93m[1mDEBUG   [0m - Cannot load internal arguments, skipping.
3 | 
4 | 
5 | 


--------------------------------------------------------------------------------
/src/lib.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import csv
 3 | import time
 4 | import numpy as np
 5 | import urllib
 6 | import json
 7 | import math
 8 | import copy
 9 | import random
10 | import sys
11 | 
12 | from typing import Any, Dict, Tuple, Union
13 | from functools import partial
14 | from argparse import ArgumentParser
15 | 
16 | import torch 
17 | import torch.nn as nn
18 | import torch.nn.functional as F
19 | import torch.backends.cudnn as cudnn
20 | 
21 | from torch.optim import Optimizer
22 | from torch.utils.data import DataLoader, Subset
23 | from torch.optim.lr_scheduler import ReduceLROnPlateau
24 | from torch.optim.swa_utils import AveragedModel, SWALR
25 | from torch import Tensor
26 | from torch.quantization import QuantStub, DeQuantStub
27 | from torch.nn.quantized import functional as qF
28 | 
29 | from torchvision.transforms import transforms, ToTensor
30 | from torchvision import datasets
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/src/PyTorch_CIFAR10/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Huy Phan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Georgia Institute of Technology
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/pretrained_model.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import PyTorch_CIFAR10.cifar10_models.vgg as cifar10_vgg
 3 | import resnet_model_1
 4 | import resnet_model_2
 5 | from mobilevit_v2 import MobileViTv2
 6 | from options.opts import get_training_arguments
 7 | 
 8 | import torchvision.models
 9 | 
10 | def get_pretrained_model(model_name, dataset):
11 |     if(model_name == "vgg19_bn" and dataset == "cifar10"):
12 |         return cifar10_vgg.vgg19_bn(pretrained = True)
13 |     elif(model_name == "vgg19_bn" and dataset == "imagenet_1k"):
14 |         return torchvision.models.vgg19_bn(weights="IMAGENET1K_V1")
15 |     elif(model_name == "resnet18" and dataset == "imagenet_1k"):
16 |         return resnet_model_1.resnet18_fp(pretrained= True)
17 |     elif(model_name == "resnet32" and dataset == "cifar100"):
18 |         return resnet_model_2.cifar100_resnet32(pretrained= True)
19 |     elif(model_name == "resnet20" and dataset == "cifar10"):
20 |         return resnet_model_2.cifar10_resnet20(pretrained = True)
21 |     elif(model_name == "mobileVitV2" and dataset == "imagenet_1k"):
22 |         args_list = ['--common.config-file', '/home/jianming/work/Fast_Switch/NN_Model/ml-cvnets/config/classification/imagenet/mobilevit_v2.yaml', '--common.results-loc', 'mobilevitv2_results/width_0_5_0', '--model.classification.pretrained', '/home/jianming/work/Fast_Switch/NN_Model/ml-cvnets/mobilevitv2_results/width_0_5_0/mobilevitv2-0.5.pt', '--common.override-kwargs', 'model.classification.mitv2.width_multiplier=0.5']
23 |         opts = get_training_arguments(parse_args=True, args=args_list)
24 |         model = MobileViTv2(opts)
25 |         return model
26 |     else:
27 |         raise Exception("model name or dataset error")


--------------------------------------------------------------------------------
/src/PyTorch_CIFAR10/module.py:
--------------------------------------------------------------------------------
 1 | import pytorch_lightning as pl
 2 | import torch
 3 | from torchmetrics import Accuracy
 4 | 
 5 | from cifar10_models.densenet import densenet121, densenet161, densenet169
 6 | from cifar10_models.googlenet import googlenet
 7 | from cifar10_models.inception import inception_v3
 8 | from cifar10_models.mobilenetv2 import mobilenet_v2
 9 | from cifar10_models.resnet import resnet18, resnet34, resnet50
10 | from cifar10_models.vgg import vgg11_bn, vgg13_bn, vgg16_bn, vgg19_bn
11 | from schduler import WarmupCosineLR
12 | 
13 | all_classifiers = {
14 |     "vgg11_bn": vgg11_bn(),
15 |     "vgg13_bn": vgg13_bn(),
16 |     "vgg16_bn": vgg16_bn(),
17 |     "vgg19_bn": vgg19_bn(),
18 |     "resnet18": resnet18(),
19 |     "resnet34": resnet34(),
20 |     "resnet50": resnet50(),
21 |     "densenet121": densenet121(),
22 |     "densenet161": densenet161(),
23 |     "densenet169": densenet169(),
24 |     "mobilenet_v2": mobilenet_v2(),
25 |     "googlenet": googlenet(),
26 |     "inception_v3": inception_v3(),
27 | }
28 | 
29 | 
30 | class CIFAR10Module(pl.LightningModule):
31 |     def __init__(self, hparams):
32 |         super().__init__()
33 |         self.hparams = hparams
34 | 
35 |         self.criterion = torch.nn.CrossEntropyLoss()
36 |         self.accuracy = Accuracy()
37 | 
38 |         self.model = all_classifiers[self.hparams.classifier]
39 | 
40 |     def forward(self, batch):
41 |         images, labels = batch
42 |         predictions = self.model(images)
43 |         loss = self.criterion(predictions, labels)
44 |         accuracy = self.accuracy(predictions, labels)
45 |         return loss, accuracy * 100
46 | 
47 |     def training_step(self, batch, batch_nb):
48 |         loss, accuracy = self.forward(batch)
49 |         self.log("loss/train", loss)
50 |         self.log("acc/train", accuracy)
51 |         return loss
52 | 
53 |     def validation_step(self, batch, batch_nb):
54 |         loss, accuracy = self.forward(batch)
55 |         self.log("loss/val", loss)
56 |         self.log("acc/val", accuracy)
57 | 
58 |     def test_step(self, batch, batch_nb):
59 |         loss, accuracy = self.forward(batch)
60 |         self.log("acc/test", accuracy)
61 | 
62 |     def configure_optimizers(self):
63 |         optimizer = torch.optim.SGD(
64 |             self.model.parameters(),
65 |             lr=self.hparams.learning_rate,
66 |             weight_decay=self.hparams.weight_decay,
67 |             momentum=0.9,
68 |             nesterov=True,
69 |         )
70 |         total_steps = self.hparams.max_epochs * len(self.train_dataloader())
71 |         scheduler = {
72 |             "scheduler": WarmupCosineLR(
73 |                 optimizer, warmup_epochs=total_steps * 0.3, max_epochs=total_steps
74 |             ),
75 |             "interval": "step",
76 |             "name": "learning_rate",
77 |         }
78 |         return [optimizer], [scheduler]
79 | 


--------------------------------------------------------------------------------
/src/PyTorch_CIFAR10/data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import zipfile
 3 | 
 4 | import pytorch_lightning as pl
 5 | import requests
 6 | from torch.utils.data import DataLoader
 7 | from torchvision import transforms as T
 8 | from torchvision.datasets import CIFAR10
 9 | from tqdm import tqdm
10 | 
11 | 
12 | class CIFAR10Data(pl.LightningDataModule):
13 |     def __init__(self, args):
14 |         super().__init__()
15 |         self.hparams = args
16 |         self.mean = (0.4914, 0.4822, 0.4465)
17 |         self.std = (0.2471, 0.2435, 0.2616)
18 | 
19 |     def download_weights():
20 |         url = (
21 |             "https://rutgers.box.com/shared/static/gkw08ecs797j2et1ksmbg1w5t3idf5r5.zip"
22 |         )
23 | 
24 |         # Streaming, so we can iterate over the response.
25 |         r = requests.get(url, stream=True)
26 | 
27 |         # Total size in Mebibyte
28 |         total_size = int(r.headers.get("content-length", 0))
29 |         block_size = 2 ** 20  # Mebibyte
30 |         t = tqdm(total=total_size, unit="MiB", unit_scale=True)
31 | 
32 |         with open("state_dicts.zip", "wb") as f:
33 |             for data in r.iter_content(block_size):
34 |                 t.update(len(data))
35 |                 f.write(data)
36 |         t.close()
37 | 
38 |         if total_size != 0 and t.n != total_size:
39 |             raise Exception("Error, something went wrong")
40 | 
41 |         print("Download successful. Unzipping file...")
42 |         path_to_zip_file = os.path.join(os.getcwd(), "state_dicts.zip")
43 |         directory_to_extract_to = os.path.join(os.getcwd(), "cifar10_models")
44 |         with zipfile.ZipFile(path_to_zip_file, "r") as zip_ref:
45 |             zip_ref.extractall(directory_to_extract_to)
46 |             print("Unzip file successful!")
47 | 
48 |     def train_dataloader(self):
49 |         transform = T.Compose(
50 |             [
51 |                 T.RandomCrop(32, padding=4),
52 |                 T.RandomHorizontalFlip(),
53 |                 T.ToTensor(),
54 |                 T.Normalize(self.mean, self.std),
55 |             ]
56 |         )
57 |         dataset = CIFAR10(root=self.hparams.data_dir, train=True, transform=transform)
58 |         dataloader = DataLoader(
59 |             dataset,
60 |             batch_size=self.hparams.batch_size,
61 |             num_workers=self.hparams.num_workers,
62 |             shuffle=True,
63 |             drop_last=True,
64 |             pin_memory=True,
65 |         )
66 |         return dataloader
67 | 
68 |     def val_dataloader(self):
69 |         transform = T.Compose(
70 |             [
71 |                 T.ToTensor(),
72 |                 T.Normalize(self.mean, self.std),
73 |             ]
74 |         )
75 |         dataset = CIFAR10(root=self.hparams.data_dir, train=False, transform=transform)
76 |         dataloader = DataLoader(
77 |             dataset,
78 |             batch_size=self.hparams.batch_size,
79 |             num_workers=self.hparams.num_workers,
80 |             drop_last=True,
81 |             pin_memory=True,
82 |         )
83 |         return dataloader
84 | 
85 |     def test_dataloader(self):
86 |         return self.val_dataloader()
87 | 


--------------------------------------------------------------------------------
/src/PyTorch_CIFAR10/train.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from argparse import ArgumentParser
 3 | 
 4 | import torch
 5 | from pytorch_lightning import Trainer, seed_everything
 6 | from pytorch_lightning.callbacks import ModelCheckpoint
 7 | from pytorch_lightning.loggers import WandbLogger, TensorBoardLogger
 8 | 
 9 | from data import CIFAR10Data
10 | from module import CIFAR10Module
11 | 
12 | 
13 | def main(args):
14 | 
15 |     if bool(args.download_weights):
16 |         CIFAR10Data.download_weights()
17 |     else:
18 |         seed_everything(0)
19 |         os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id
20 | 
21 |         if args.logger == "wandb":
22 |             logger = WandbLogger(name=args.classifier, project="cifar10")
23 |         elif args.logger == "tensorboard":
24 |             logger = TensorBoardLogger("cifar10", name=args.classifier)
25 | 
26 |         checkpoint = ModelCheckpoint(monitor="acc/val", mode="max", save_last=False)
27 | 
28 |         trainer = Trainer(
29 |             fast_dev_run=bool(args.dev),
30 |             logger=logger if not bool(args.dev + args.test_phase) else None,
31 |             gpus=-1,
32 |             deterministic=True,
33 |             weights_summary=None,
34 |             log_every_n_steps=1,
35 |             max_epochs=args.max_epochs,
36 |             checkpoint_callback=checkpoint,
37 |             precision=args.precision,
38 |         )
39 | 
40 |         model = CIFAR10Module(args)
41 |         data = CIFAR10Data(args)
42 | 
43 |         if bool(args.pretrained):
44 |             state_dict = os.path.join(
45 |                 "cifar10_models", "state_dicts", args.classifier + ".pt"
46 |             )
47 |             model.model.load_state_dict(torch.load(state_dict))
48 | 
49 |         if bool(args.test_phase):
50 |             trainer.test(model, data.test_dataloader())
51 |         else:
52 |             trainer.fit(model, data)
53 |             trainer.test()
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     parser = ArgumentParser()
58 | 
59 |     # PROGRAM level args
60 |     parser.add_argument("--data_dir", type=str, default="/data/huy/cifar10")
61 |     parser.add_argument("--download_weights", type=int, default=0, choices=[0, 1])
62 |     parser.add_argument("--test_phase", type=int, default=0, choices=[0, 1])
63 |     parser.add_argument("--dev", type=int, default=0, choices=[0, 1])
64 |     parser.add_argument(
65 |         "--logger", type=str, default="tensorboard", choices=["tensorboard", "wandb"]
66 |     )
67 | 
68 |     # TRAINER args
69 |     parser.add_argument("--classifier", type=str, default="resnet18")
70 |     parser.add_argument("--pretrained", type=int, default=0, choices=[0, 1])
71 | 
72 |     parser.add_argument("--precision", type=int, default=32, choices=[16, 32])
73 |     parser.add_argument("--batch_size", type=int, default=256)
74 |     parser.add_argument("--max_epochs", type=int, default=100)
75 |     parser.add_argument("--num_workers", type=int, default=8)
76 |     parser.add_argument("--gpu_id", type=str, default="3")
77 | 
78 |     parser.add_argument("--learning_rate", type=float, default=1e-2)
79 |     parser.add_argument("--weight_decay", type=float, default=1e-2)
80 | 
81 |     args = parser.parse_args()
82 |     main(args)
83 | 


--------------------------------------------------------------------------------
/log/CT_val_c7.log:
--------------------------------------------------------------------------------
  1 | Namespace(model='mobileVitV2', dataset='imagenet_1k', sign_type='polyfit', data_collection=False, working_directory='/home/jianming/work/SmartPAF/cvnet_work/')
  2 | 2024-01-22 08:23:44 - [93m[1mDEBUG   [0m - Cannot load internal arguments, skipping.
  3 | Results: loss=1.37407,	 top1=70.7000,	 top5=88.8000
  4 | 4
  5 | name: conv_1.block.act
  6 | Swish()
  7 | Results: loss=8.77412,	 top1=0.0000,	 top5=0.7000
  8 | 4
  9 | name: layer_1.0.block.exp_1x1.block.act
 10 | Swish()
 11 | Results: loss=nan,	 top1=0.1000,	 top5=0.4000
 12 | 4
 13 | name: layer_1.0.block.conv_3x3.block.act
 14 | Swish()
 15 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 16 | 4
 17 | name: layer_2.0.block.exp_1x1.block.act
 18 | Swish()
 19 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 20 | 4
 21 | name: layer_2.0.block.conv_3x3.block.act
 22 | Swish()
 23 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 24 | 4
 25 | name: layer_2.1.block.exp_1x1.block.act
 26 | Swish()
 27 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 28 | 4
 29 | name: layer_2.1.block.conv_3x3.block.act
 30 | Swish()
 31 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 32 | 4
 33 | name: layer_3.0.block.exp_1x1.block.act
 34 | Swish()
 35 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 36 | 4
 37 | name: layer_3.0.block.conv_3x3.block.act
 38 | Swish()
 39 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 40 | 4
 41 | name: layer_3.1.local_rep.0.block.act
 42 | Swish()
 43 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 44 | 4
 45 | name: layer_3.1.global_rep.0.pre_norm_ffn.1.block.act
 46 | Swish()
 47 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 48 | 4
 49 | name: layer_3.1.global_rep.1.pre_norm_ffn.1.block.act
 50 | Swish()
 51 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 52 | 4
 53 | name: layer_4.0.block.exp_1x1.block.act
 54 | Swish()
 55 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 56 | 4
 57 | name: layer_4.0.block.conv_3x3.block.act
 58 | Swish()
 59 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 60 | 4
 61 | name: layer_4.1.local_rep.0.block.act
 62 | Swish()
 63 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 64 | 4
 65 | name: layer_4.1.global_rep.0.pre_norm_ffn.1.block.act
 66 | Swish()
 67 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 68 | 4
 69 | name: layer_4.1.global_rep.1.pre_norm_ffn.1.block.act
 70 | Swish()
 71 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 72 | 4
 73 | name: layer_4.1.global_rep.2.pre_norm_ffn.1.block.act
 74 | Swish()
 75 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 76 | 4
 77 | name: layer_4.1.global_rep.3.pre_norm_ffn.1.block.act
 78 | Swish()
 79 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 80 | 4
 81 | name: layer_5.0.block.exp_1x1.block.act
 82 | Swish()
 83 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 84 | 4
 85 | name: layer_5.0.block.conv_3x3.block.act
 86 | Swish()
 87 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 88 | 4
 89 | name: layer_5.1.local_rep.0.block.act
 90 | Swish()
 91 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 92 | 4
 93 | name: layer_5.1.global_rep.0.pre_norm_ffn.1.block.act
 94 | Swish()
 95 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 96 | 4
 97 | name: layer_5.1.global_rep.1.pre_norm_ffn.1.block.act
 98 | Swish()
 99 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
100 | 4
101 | name: layer_5.1.global_rep.2.pre_norm_ffn.1.block.act
102 | Swish()
103 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
104 | 


--------------------------------------------------------------------------------
/log/CT_val_o7.log:
--------------------------------------------------------------------------------
  1 | Namespace(model='mobileVitV2', dataset='imagenet_1k', sign_type='polyfit', data_collection=False, working_directory='/home/jianming/work/SmartPAF/cvnet_work/')
  2 | 2024-01-22 08:10:28 - [93m[1mDEBUG   [0m - Cannot load internal arguments, skipping.
  3 | Results: loss=1.37407,	 top1=70.7000,	 top5=88.8000
  4 | 4
  5 | name: conv_1.block.act
  6 | Swish()
  7 | Results: loss=1.74018,	 top1=61.0000,	 top5=85.2000
  8 | 4
  9 | name: layer_1.0.block.exp_1x1.block.act
 10 | Swish()
 11 | Results: loss=4.94935,	 top1=13.3000,	 top5=30.4000
 12 | 4
 13 | name: layer_1.0.block.conv_3x3.block.act
 14 | Swish()
 15 | Results: loss=6.86361,	 top1=2.5000,	 top5=7.6000
 16 | 4
 17 | name: layer_2.0.block.exp_1x1.block.act
 18 | Swish()
 19 | Results: loss=7.63561,	 top1=0.8000,	 top5=3.0000
 20 | 4
 21 | name: layer_2.0.block.conv_3x3.block.act
 22 | Swish()
 23 | Results: loss=nan,	 top1=0.1000,	 top5=1.2000
 24 | 4
 25 | name: layer_2.1.block.exp_1x1.block.act
 26 | Swish()
 27 | Results: loss=nan,	 top1=0.2000,	 top5=1.0000
 28 | 4
 29 | name: layer_2.1.block.conv_3x3.block.act
 30 | Swish()
 31 | Results: loss=nan,	 top1=0.1000,	 top5=1.2000
 32 | 4
 33 | name: layer_3.0.block.exp_1x1.block.act
 34 | Swish()
 35 | Results: loss=nan,	 top1=0.1000,	 top5=0.7000
 36 | 4
 37 | name: layer_3.0.block.conv_3x3.block.act
 38 | Swish()
 39 | Results: loss=nan,	 top1=0.1000,	 top5=0.7000
 40 | 4
 41 | name: layer_3.1.local_rep.0.block.act
 42 | Swish()
 43 | Results: loss=nan,	 top1=0.1000,	 top5=0.7000
 44 | 4
 45 | name: layer_3.1.global_rep.0.pre_norm_ffn.1.block.act
 46 | Swish()
 47 | Results: loss=nan,	 top1=0.2000,	 top5=0.8000
 48 | 4
 49 | name: layer_3.1.global_rep.1.pre_norm_ffn.1.block.act
 50 | Swish()
 51 | Results: loss=nan,	 top1=0.2000,	 top5=0.5000
 52 | 4
 53 | name: layer_4.0.block.exp_1x1.block.act
 54 | Swish()
 55 | Results: loss=nan,	 top1=0.1000,	 top5=0.6000
 56 | 4
 57 | name: layer_4.0.block.conv_3x3.block.act
 58 | Swish()
 59 | Results: loss=nan,	 top1=0.2000,	 top5=0.7000
 60 | 4
 61 | name: layer_4.1.local_rep.0.block.act
 62 | Swish()
 63 | Results: loss=nan,	 top1=0.2000,	 top5=0.7000
 64 | 4
 65 | name: layer_4.1.global_rep.0.pre_norm_ffn.1.block.act
 66 | Swish()
 67 | Results: loss=nan,	 top1=0.2000,	 top5=0.5000
 68 | 4
 69 | name: layer_4.1.global_rep.1.pre_norm_ffn.1.block.act
 70 | Swish()
 71 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 72 | 4
 73 | name: layer_4.1.global_rep.2.pre_norm_ffn.1.block.act
 74 | Swish()
 75 | Results: loss=nan,	 top1=0.1000,	 top5=0.6000
 76 | 4
 77 | name: layer_4.1.global_rep.3.pre_norm_ffn.1.block.act
 78 | Swish()
 79 | Results: loss=nan,	 top1=0.1000,	 top5=0.5000
 80 | 4
 81 | name: layer_5.0.block.exp_1x1.block.act
 82 | Swish()
 83 | Results: loss=nan,	 top1=0.2000,	 top5=0.6000
 84 | 4
 85 | name: layer_5.0.block.conv_3x3.block.act
 86 | Swish()
 87 | Results: loss=nan,	 top1=0.2000,	 top5=0.6000
 88 | 4
 89 | name: layer_5.1.local_rep.0.block.act
 90 | Swish()
 91 | Results: loss=nan,	 top1=0.2000,	 top5=0.7000
 92 | 4
 93 | name: layer_5.1.global_rep.0.pre_norm_ffn.1.block.act
 94 | Swish()
 95 | Results: loss=nan,	 top1=0.1000,	 top5=0.4000
 96 | 4
 97 | name: layer_5.1.global_rep.1.pre_norm_ffn.1.block.act
 98 | Swish()
 99 | Results: loss=nan,	 top1=0.2000,	 top5=0.6000
100 | 4
101 | name: layer_5.1.global_rep.2.pre_norm_ffn.1.block.act
102 | Swish()
103 | Results: loss=nan,	 top1=0.2000,	 top5=0.7000
104 | 


--------------------------------------------------------------------------------
/src/PyTorch_CIFAR10/README.md:
--------------------------------------------------------------------------------
 1 | # PyTorch models trained on CIFAR-10 dataset
 2 | - I modified [TorchVision](https://pytorch.org/docs/stable/torchvision/models.html) official implementation of popular CNN models, and trained those on CIFAR-10 dataset.
 3 | - I changed *number of class, filter size, stride, and padding* in the the original code so that it works with CIFAR-10.
 4 | - I also share the **weights** of these models, so you can just load the weights and use them.
 5 | - The code is highly re-producible and readable by using PyTorch-Lightning.
 6 | 
 7 | ## Statistics of supported models
 8 | | No. |     Model    | Val. Acc. | No. Params |   Size |
 9 | |:---:|:-------------|----------:|-----------:|-------:|
10 | | 1   | vgg11_bn     |   92.39%  |   28.150 M | 108 MB |
11 | | 2   | vgg13_bn     |   94.22%  |   28.334 M | 109 MB |
12 | | 3   | vgg16_bn     |   94.00%  |   33.647 M | 129 MB |
13 | | 4   | vgg19_bn     |   93.95%  |   38.959 M | 149 MB |
14 | | 5   | resnet18     |   93.07%  |   11.174 M |  43 MB |
15 | | 6   | resnet34     |   93.34%  |   21.282 M |  82 MB |
16 | | 7   | resnet50     |   93.65%  |   23.521 M |  91 MB |
17 | | 8   | densenet121  |   94.06%  |    6.956 M |  28 MB |
18 | | 9   | densenet161  |   94.07%  |   26.483 M | 103 MB |
19 | | 10  | densenet169  |   94.05%  |   12.493 M |  49 MB |
20 | | 11  | mobilenet_v2 |   93.91%  |    2.237 M |   9 MB |
21 | | 12  | googlenet    |   92.85%  |    5.491 M |  22 MB |
22 | | 13  | inception_v3 |   93.74%  |   21.640 M |  83 MB |
23 | 
24 | ## Details Report & Run Logs
25 | Weight and Biases' details report for this project [WandB Report](https://wandb.ai/huyvnphan/cifar10/reports/CIFAR10-Classification-using-PyTorch---VmlldzozOTg0ODQ?accessToken=9m2q1ajhppuziprsq9tlryynvmqbkrbvjdoktrz7o6gtqilmtqbv2r9jjrtb2tqq)
26 | 
27 | Weight and Biases' run logs for this project [WandB Run Log](https://wandb.ai/huyvnphan/cifar10). You can see each run hyper-parameters, training accuracy, validation accuracy, loss, time taken.
28 | 
29 | ## How To Cite
30 | [![DOI](https://zenodo.org/badge/195914773.svg)](https://zenodo.org/badge/latestdoi/195914773)
31 | 
32 | ## How to use pretrained models
33 | 
34 | **Automatically download and extract the weights from Box (933 MB)**
35 | ```python
36 | python train.py --download_weights 1
37 | ```
38 | Or use [Google Drive](https://drive.google.com/file/d/17fmN8eQdLpq2jIMQ_X0IXDPXfI9oVWgq/view?usp=sharing) backup link (you have to download and extract manually)
39 | 
40 | **Load model and run**
41 | ```python
42 | from cifar10_models.vgg import vgg11_bn, vgg13_bn, vgg16_bn, vgg19_bn
43 | 
44 | # Untrained model
45 | my_model = vgg11_bn()
46 | 
47 | # Pretrained model
48 | my_model = vgg11_bn(pretrained=True)
49 | my_model.eval() # for evaluation
50 | ```
51 | 
52 | If you use your own images, all models expect data to be in range [0, 1] then normalized by
53 | ```python
54 | mean = [0.4914, 0.4822, 0.4465]
55 | std = [0.2471, 0.2435, 0.2616]
56 | ```
57 | 
58 | ## How to train models from scratch
59 | Check the `train.py` to see all available hyper-parameter choices.
60 | To reproduce the same accuracy use the default hyper-parameters
61 | 
62 | `python train.py --classifier resnet18`
63 | 
64 | ## How to test pretrained models
65 | `python train.py --test_phase 1 --pretrained 1 --classifier resnet18`
66 | 
67 | Output
68 | 
69 | `{'acc/test': tensor(93.0689, device='cuda:0')}`
70 | 
71 | 
72 | ## Requirements
73 | **Just to use pretrained models**
74 | - pytorch = 1.7.0
75 | 
76 | **To train & test**
77 | - pytorch = 1.7.0
78 | - torchvision = 0.7.0
79 | - tensorboard = 2.2.1
80 | - pytorch-lightning = 1.1.0
81 | 


--------------------------------------------------------------------------------
/src/PyTorch_CIFAR10/cifar10_models/resnet_orig.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import os
  5 | 
  6 | # Credit to https://github.com/akamaster/pytorch_resnet_cifar10
  7 | 
  8 | __all__ = ["resnet_orig"]
  9 | 
 10 | 
 11 | class LambdaLayer(nn.Module):
 12 |     def __init__(self, lambd):
 13 |         super(LambdaLayer, self).__init__()
 14 |         self.lambd = lambd
 15 | 
 16 |     def forward(self, x):
 17 |         return self.lambd(x)
 18 | 
 19 | 
 20 | class BasicBlock(nn.Module):
 21 |     expansion = 1
 22 | 
 23 |     def __init__(self, in_planes, planes, stride=1, option="A"):
 24 |         super(BasicBlock, self).__init__()
 25 |         self.conv1 = nn.Conv2d(
 26 |             in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False
 27 |         )
 28 |         self.bn1 = nn.BatchNorm2d(planes)
 29 |         self.conv2 = nn.Conv2d(
 30 |             planes, planes, kernel_size=3, stride=1, padding=1, bias=False
 31 |         )
 32 |         self.bn2 = nn.BatchNorm2d(planes)
 33 | 
 34 |         self.shortcut = nn.Sequential()
 35 |         if stride != 1 or in_planes != planes:
 36 |             if option == "A":
 37 |                 """
 38 |                 For CIFAR10 ResNet paper uses option A.
 39 |                 """
 40 |                 self.shortcut = LambdaLayer(
 41 |                     lambda x: F.pad(
 42 |                         x[:, :, ::2, ::2],
 43 |                         (0, 0, 0, 0, planes // 4, planes // 4),
 44 |                         "constant",
 45 |                         0,
 46 |                     )
 47 |                 )
 48 |             elif option == "B":
 49 |                 self.shortcut = nn.Sequential(
 50 |                     nn.Conv2d(
 51 |                         in_planes,
 52 |                         self.expansion * planes,
 53 |                         kernel_size=1,
 54 |                         stride=stride,
 55 |                         bias=False,
 56 |                     ),
 57 |                     nn.BatchNorm2d(self.expansion * planes),
 58 |                 )
 59 | 
 60 |     def forward(self, x):
 61 |         out = F.relu(self.bn1(self.conv1(x)))
 62 |         out = self.bn2(self.conv2(out))
 63 |         out += self.shortcut(x)
 64 |         out = F.relu(out)
 65 |         return out
 66 | 
 67 | 
 68 | class ResNet(nn.Module):
 69 |     def __init__(self, block, num_blocks, num_classes=10):
 70 |         super(ResNet, self).__init__()
 71 |         self.in_planes = 16
 72 | 
 73 |         self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
 74 |         self.bn1 = nn.BatchNorm2d(16)
 75 |         self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
 76 |         self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
 77 |         self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
 78 |         self.linear = nn.Linear(64, num_classes)
 79 | 
 80 |     def _make_layer(self, block, planes, num_blocks, stride):
 81 |         strides = [stride] + [1] * (num_blocks - 1)
 82 |         layers = []
 83 |         for stride in strides:
 84 |             layers.append(block(self.in_planes, planes, stride))
 85 |             self.in_planes = planes * block.expansion
 86 | 
 87 |         return nn.Sequential(*layers)
 88 | 
 89 |     def forward(self, x):
 90 |         out = F.relu(self.bn1(self.conv1(x)))
 91 |         out = self.layer1(out)
 92 |         out = self.layer2(out)
 93 |         out = self.layer3(out)
 94 |         out = F.avg_pool2d(out, out.size()[3])
 95 |         out = out.view(out.size(0), -1)
 96 |         out = self.linear(out)
 97 |         return out
 98 | 
 99 | 
100 | def resnet_orig(pretrained=True, device="cpu"):
101 |     net = ResNet(BasicBlock, [3, 3, 3])
102 |     if pretrained:
103 |         script_dir = os.path.dirname(__file__)
104 |         state_dict = torch.load(
105 |             script_dir + "/state_dicts/resnet_orig.pt", map_location=device
106 |         )
107 |         net.load_state_dict(state_dict)
108 |     return net
109 | 


--------------------------------------------------------------------------------
/src/SS.py:
--------------------------------------------------------------------------------
 1 | from util import *
 2 | from custom_module import *
 3 | from pretrained_model import *
 4 | global_config = load_model_yaml("./global_config/", "global_config.yaml")
 5 | 
 6 | def generate_layer_input_scale(model: nn.Module, train_data_loader, layer_nest_dict, directory_path):
 7 |     if(not os.path.exists(directory_path)):
 8 |             os.mkdir(directory_path)
 9 |     data_type = "_scale"
10 |     for key in layer_nest_dict:
11 |         my_model = model
12 |         layer_name = key
13 |         print("name: " + layer_name)
14 |         collection_layer = Input_scale_collection_layer(layer_name, access_layer(my_model, layer_name))
15 |         replace_layer(my_model, layer_name, collection_layer)
16 |         print()
17 | 
18 |     run_set(my_model, train_data_loader, "cuda:0")
19 | 
20 |     for key in layer_nest_dict:
21 |         layer_name = key
22 |         access_layer(my_model, layer_name).save(directory_path, layer_name + data_type + ".pt")
23 |         data = torch.load(directory_path + layer_name + data_type + ".pt")
24 |         print(data)
25 | 
26 | 
27 | def CT_reset_scale(model, sign_scale, scale_path, scale_ratio, sign_nest_dict):
28 |     model = model.to("cuda:0")
29 |     for key in sign_nest_dict:
30 |         scale_name = key + "_scale.pt"
31 |         if(scale_path != None):
32 |             sign_scale = torch.load(scale_path + scale_name).item()
33 |             print("scale: " + str(sign_scale))
34 |         access_layer(model, key).sign.scale = sign_scale
35 |         access_layer(model, key).sign.scale_ratio = scale_ratio
36 | 
37 | 
38 | def SS_replace(model,valid_data_loader, train_data_loader, sign_type, input_data_dirctory):
39 |     model = model
40 |     sign_nest_dict = generate_sign_nest_dict(model)
41 |     dirctory = input_data_dirctory + "model_PR_AT/"
42 |     file_name = "model_PR_AT_"+sign_type+".pt"
43 |     scale_path = input_data_dirctory + "Scale_" + sign_type + "/"
44 |     model = torch.load(dirctory+file_name)
45 |     validate(model, valid_data_loader, "cuda:0")
46 |     generate_layer_input_scale(model = copy.deepcopy(model), train_data_loader = train_data_loader, layer_nest_dict = sign_nest_dict, directory_path = scale_path)
47 |     CT_reset_scale(model = model, sign_scale = 100, scale_path= scale_path, scale_ratio = 1, sign_nest_dict= sign_nest_dict)
48 |     validate(model, valid_data_loader, "cuda:0")
49 |     file_name2 = "model_PR_AT_SS_"+sign_type+".pt"
50 |     torch.save(model, dirctory+file_name2)
51 | 
52 | 
53 | 
54 | 
55 | if __name__ == "__main__":
56 |     parser = ArgumentParser()
57 |     parser.add_argument("--model", type=str,choices=["vgg19_bn", "resnet18", "resnet32"])
58 |     parser.add_argument("--dataset", type=str,choices=["cifar10", "imagenet_1k", "cifar100"])
59 |     parser.add_argument("-wd", "--working_directory", type=str, default="./working_dirctory/")
60 |     parser.add_argument("-st","--sign_type", type=str, default="a7", choices=["a7", "2f12g1", "f1g2", "f2g2", "f2g3"])
61 |     args = parser.parse_args()
62 |     print(args)
63 | 
64 |     valid_data_loader = None
65 |     train_data_loader = None
66 |     valid_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "valid", data_dir = global_config["Global"]["dataset_dirctory"])
67 |     train_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "train", data_dir = global_config["Global"]["dataset_dirctory"])
68 | 
69 | 
70 |     # if(args.dataset == "cifar10" or args.dataset == "cifar100"):
71 |     #     valid_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "valid", data_dir = global_config["Global"]["dataset_dirctory"])
72 |     #     train_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "train", data_dir = global_config["Global"]["dataset_dirctory"])
73 |     # elif(args.dataset == "imagenet_1k"):
74 |     #     valid_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "valid", data_dir = os.path.join(global_config["Global"]["dataset_dirctory"], args.dataset) )
75 |     #     train_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "train", data_dir = os.path.join(global_config["Global"]["dataset_dirctory"], args.dataset) )
76 |    
77 |     model = get_pretrained_model(model_name=args.model, dataset=args.dataset)
78 |     
79 |     SS_replace(model = model, valid_data_loader=valid_data_loader, train_data_loader=train_data_loader ,sign_type = args.sign_type, input_data_dirctory = args.working_directory)
80 | 


--------------------------------------------------------------------------------
/src/PyTorch_CIFAR10/cifar10_models/mobilenetv2.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | __all__ = ["MobileNetV2", "mobilenet_v2"]
  7 | 
  8 | 
  9 | class ConvBNReLU(nn.Sequential):
 10 |     def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
 11 |         padding = (kernel_size - 1) // 2
 12 |         super(ConvBNReLU, self).__init__(
 13 |             nn.Conv2d(
 14 |                 in_planes,
 15 |                 out_planes,
 16 |                 kernel_size,
 17 |                 stride,
 18 |                 padding,
 19 |                 groups=groups,
 20 |                 bias=False,
 21 |             ),
 22 |             nn.BatchNorm2d(out_planes),
 23 |             nn.ReLU6(inplace=True),
 24 |         )
 25 | 
 26 | 
 27 | class InvertedResidual(nn.Module):
 28 |     def __init__(self, inp, oup, stride, expand_ratio):
 29 |         super(InvertedResidual, self).__init__()
 30 |         self.stride = stride
 31 |         assert stride in [1, 2]
 32 | 
 33 |         hidden_dim = int(round(inp * expand_ratio))
 34 |         self.use_res_connect = self.stride == 1 and inp == oup
 35 | 
 36 |         layers = []
 37 |         if expand_ratio != 1:
 38 |             # pw
 39 |             layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
 40 |         layers.extend(
 41 |             [
 42 |                 # dw
 43 |                 ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
 44 |                 # pw-linear
 45 |                 nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
 46 |                 nn.BatchNorm2d(oup),
 47 |             ]
 48 |         )
 49 |         self.conv = nn.Sequential(*layers)
 50 | 
 51 |     def forward(self, x):
 52 |         if self.use_res_connect:
 53 |             return x + self.conv(x)
 54 |         else:
 55 |             return self.conv(x)
 56 | 
 57 | 
 58 | class MobileNetV2(nn.Module):
 59 |     def __init__(self, num_classes=10, width_mult=1.0):
 60 |         super(MobileNetV2, self).__init__()
 61 |         block = InvertedResidual
 62 |         input_channel = 32
 63 |         last_channel = 1280
 64 | 
 65 |         # CIFAR10
 66 |         inverted_residual_setting = [
 67 |             # t, c, n, s
 68 |             [1, 16, 1, 1],
 69 |             [6, 24, 2, 1],  # Stride 2 -> 1 for CIFAR-10
 70 |             [6, 32, 3, 2],
 71 |             [6, 64, 4, 2],
 72 |             [6, 96, 3, 1],
 73 |             [6, 160, 3, 2],
 74 |             [6, 320, 1, 1],
 75 |         ]
 76 |         # END
 77 | 
 78 |         # building first layer
 79 |         input_channel = int(input_channel * width_mult)
 80 |         self.last_channel = int(last_channel * max(1.0, width_mult))
 81 | 
 82 |         # CIFAR10: stride 2 -> 1
 83 |         features = [ConvBNReLU(3, input_channel, stride=1)]
 84 |         # END
 85 | 
 86 |         # building inverted residual blocks
 87 |         for t, c, n, s in inverted_residual_setting:
 88 |             output_channel = int(c * width_mult)
 89 |             for i in range(n):
 90 |                 stride = s if i == 0 else 1
 91 |                 features.append(
 92 |                     block(input_channel, output_channel, stride, expand_ratio=t)
 93 |                 )
 94 |                 input_channel = output_channel
 95 |         # building last several layers
 96 |         features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
 97 |         # make it nn.Sequential
 98 |         self.features = nn.Sequential(*features)
 99 | 
100 |         # building classifier
101 |         self.classifier = nn.Sequential(
102 |             nn.Dropout(0.2),
103 |             nn.Linear(self.last_channel, num_classes),
104 |         )
105 | 
106 |         # weight initialization
107 |         for m in self.modules():
108 |             if isinstance(m, nn.Conv2d):
109 |                 nn.init.kaiming_normal_(m.weight, mode="fan_out")
110 |                 if m.bias is not None:
111 |                     nn.init.zeros_(m.bias)
112 |             elif isinstance(m, nn.BatchNorm2d):
113 |                 nn.init.ones_(m.weight)
114 |                 nn.init.zeros_(m.bias)
115 |             elif isinstance(m, nn.Linear):
116 |                 nn.init.normal_(m.weight, 0, 0.01)
117 |                 nn.init.zeros_(m.bias)
118 | 
119 |     def forward(self, x):
120 |         x = self.features(x)
121 |         x = x.mean([2, 3])
122 |         x = self.classifier(x)
123 |         return x
124 | 
125 | 
126 | def mobilenet_v2(pretrained=False, progress=True, device="cpu", **kwargs):
127 |     """
128 |     Constructs a MobileNetV2 architecture from
129 |     `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_.
130 | 
131 |     Args:
132 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
133 |         progress (bool): If True, displays a progress bar of the download to stderr
134 |     """
135 |     model = MobileNetV2(**kwargs)
136 |     if pretrained:
137 |         script_dir = os.path.dirname(__file__)
138 |         state_dict = torch.load(
139 |             script_dir + "/state_dicts/mobilenet_v2.pt", map_location=device
140 |         )
141 |         model.load_state_dict(state_dict)
142 |     return model
143 | 


--------------------------------------------------------------------------------
/src/PyTorch_CIFAR10/cifar10_models/vgg.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | __all__ = [
  7 |     "VGG",
  8 |     "vgg11_bn",
  9 |     "vgg13_bn",
 10 |     "vgg16_bn",
 11 |     "vgg19_bn",
 12 | ]
 13 | 
 14 | 
 15 | class VGG(nn.Module):
 16 |     def __init__(self, features, num_classes=10, init_weights=True):
 17 |         super(VGG, self).__init__()
 18 |         self.features = features
 19 |         # CIFAR 10 (7, 7) to (1, 1)
 20 |         # self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
 21 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
 22 | 
 23 |         self.classifier = nn.Sequential(
 24 |             nn.Linear(512 * 1 * 1, 4096),
 25 |             # nn.Linear(512 * 7 * 7, 4096),
 26 |             nn.ReLU(True),
 27 |             nn.Dropout(),
 28 |             nn.Linear(4096, 4096),
 29 |             nn.ReLU(True),
 30 |             nn.Dropout(),
 31 |             nn.Linear(4096, num_classes),
 32 |         )
 33 |         if init_weights:
 34 |             self._initialize_weights()
 35 | 
 36 |     def forward(self, x):
 37 |         x = self.features(x)
 38 |         x = self.avgpool(x)
 39 |         x = x.view(x.size(0), -1)
 40 |         x = self.classifier(x)
 41 |         return x
 42 | 
 43 |     def _initialize_weights(self):
 44 |         for m in self.modules():
 45 |             if isinstance(m, nn.Conv2d):
 46 |                 nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
 47 |                 if m.bias is not None:
 48 |                     nn.init.constant_(m.bias, 0)
 49 |             elif isinstance(m, nn.BatchNorm2d):
 50 |                 nn.init.constant_(m.weight, 1)
 51 |                 nn.init.constant_(m.bias, 0)
 52 |             elif isinstance(m, nn.Linear):
 53 |                 nn.init.normal_(m.weight, 0, 0.01)
 54 |                 nn.init.constant_(m.bias, 0)
 55 | 
 56 | 
 57 | def make_layers(cfg, batch_norm=False):
 58 |     layers = []
 59 |     in_channels = 3
 60 |     for v in cfg:
 61 |         if v == "M":
 62 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
 63 |         else:
 64 |             conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
 65 |             if batch_norm:
 66 |                 layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
 67 |             else:
 68 |                 layers += [conv2d, nn.ReLU(inplace=True)]
 69 |             in_channels = v
 70 |     return nn.Sequential(*layers)
 71 | 
 72 | 
 73 | cfgs = {
 74 |     "A": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
 75 |     "B": [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
 76 |     "D": [
 77 |         64,
 78 |         64,
 79 |         "M",
 80 |         128,
 81 |         128,
 82 |         "M",
 83 |         256,
 84 |         256,
 85 |         256,
 86 |         "M",
 87 |         512,
 88 |         512,
 89 |         512,
 90 |         "M",
 91 |         512,
 92 |         512,
 93 |         512,
 94 |         "M",
 95 |     ],
 96 |     "E": [
 97 |         64,
 98 |         64,
 99 |         "M",
100 |         128,
101 |         128,
102 |         "M",
103 |         256,
104 |         256,
105 |         256,
106 |         256,
107 |         "M",
108 |         512,
109 |         512,
110 |         512,
111 |         512,
112 |         "M",
113 |         512,
114 |         512,
115 |         512,
116 |         512,
117 |         "M",
118 |     ],
119 | }
120 | 
121 | 
122 | def _vgg(arch, cfg, batch_norm, pretrained, progress, device, **kwargs):
123 |     if pretrained:
124 |         kwargs["init_weights"] = False
125 |     model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm), **kwargs)
126 |     if pretrained:
127 |         script_dir = os.path.dirname(__file__)
128 |         state_dict = torch.load(
129 |             script_dir + "/state_dicts/" + arch + ".pt", map_location=device
130 |         )
131 |         model.load_state_dict(state_dict)
132 |     return model
133 | 
134 | 
135 | def vgg11_bn(pretrained=False, progress=True, device="cpu", **kwargs):
136 |     """VGG 11-layer model (configuration "A") with batch normalization
137 | 
138 |     Args:
139 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
140 |         progress (bool): If True, displays a progress bar of the download to stderr
141 |     """
142 |     return _vgg("vgg11_bn", "A", True, pretrained, progress, device, **kwargs)
143 | 
144 | 
145 | def vgg13_bn(pretrained=False, progress=True, device="cpu", **kwargs):
146 |     """VGG 13-layer model (configuration "B") with batch normalization
147 | 
148 |     Args:
149 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
150 |         progress (bool): If True, displays a progress bar of the download to stderr
151 |     """
152 |     return _vgg("vgg13_bn", "B", True, pretrained, progress, device, **kwargs)
153 | 
154 | 
155 | def vgg16_bn(pretrained=False, progress=True, device="cpu", **kwargs):
156 |     """VGG 16-layer model (configuration "D") with batch normalization
157 | 
158 |     Args:
159 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
160 |         progress (bool): If True, displays a progress bar of the download to stderr
161 |     """
162 |     return _vgg("vgg16_bn", "D", True, pretrained, progress, device, **kwargs)
163 | 
164 | 
165 | def vgg19_bn(pretrained=False, progress=True, device="cpu", **kwargs):
166 |     """VGG 19-layer model (configuration 'E') with batch normalization
167 | 
168 |     Args:
169 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
170 |         progress (bool): If True, displays a progress bar of the download to stderr
171 |     """
172 |     return _vgg("vgg19_bn", "E", True, pretrained, progress, device, **kwargs)
173 | 


--------------------------------------------------------------------------------
/src/PyTorch_CIFAR10/schduler.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import warnings
  3 | from typing import List
  4 | 
  5 | from torch.optim import Optimizer
  6 | from torch.optim.lr_scheduler import _LRScheduler
  7 | 
  8 | 
  9 | class WarmupCosineLR(_LRScheduler):
 10 |     """
 11 |     Sets the learning rate of each parameter group to follow a linear warmup schedule
 12 |     between warmup_start_lr and base_lr followed by a cosine annealing schedule between
 13 |     base_lr and eta_min.
 14 |     .. warning::
 15 |         It is recommended to call :func:`.step()` for :class:`LinearWarmupCosineAnnealingLR`
 16 |         after each iteration as calling it after each epoch will keep the starting lr at
 17 |         warmup_start_lr for the first epoch which is 0 in most cases.
 18 |     .. warning::
 19 |         passing epoch to :func:`.step()` is being deprecated and comes with an EPOCH_DEPRECATION_WARNING.
 20 |         It calls the :func:`_get_closed_form_lr()` method for this scheduler instead of
 21 |         :func:`get_lr()`. Though this does not change the behavior of the scheduler, when passing
 22 |         epoch param to :func:`.step()`, the user should call the :func:`.step()` function before calling
 23 |         train and validation methods.
 24 |     Args:
 25 |         optimizer (Optimizer): Wrapped optimizer.
 26 |         warmup_epochs (int): Maximum number of iterations for linear warmup
 27 |         max_epochs (int): Maximum number of iterations
 28 |         warmup_start_lr (float): Learning rate to start the linear warmup. Default: 0.
 29 |         eta_min (float): Minimum learning rate. Default: 0.
 30 |         last_epoch (int): The index of last epoch. Default: -1.
 31 |     Example:
 32 |         >>> layer = nn.Linear(10, 1)
 33 |         >>> optimizer = Adam(layer.parameters(), lr=0.02)
 34 |         >>> scheduler = LinearWarmupCosineAnnealingLR(optimizer, warmup_epochs=10, max_epochs=40)
 35 |         >>> #
 36 |         >>> # the default case
 37 |         >>> for epoch in range(40):
 38 |         ...     # train(...)
 39 |         ...     # validate(...)
 40 |         ...     scheduler.step()
 41 |         >>> #
 42 |         >>> # passing epoch param case
 43 |         >>> for epoch in range(40):
 44 |         ...     scheduler.step(epoch)
 45 |         ...     # train(...)
 46 |         ...     # validate(...)
 47 |     """
 48 | 
 49 |     def __init__(
 50 |         self,
 51 |         optimizer: Optimizer,
 52 |         warmup_epochs: int,
 53 |         max_epochs: int,
 54 |         warmup_start_lr: float = 1e-8,
 55 |         eta_min: float = 1e-8,
 56 |         last_epoch: int = -1,
 57 |     ) -> None:
 58 | 
 59 |         self.warmup_epochs = warmup_epochs
 60 |         self.max_epochs = max_epochs
 61 |         self.warmup_start_lr = warmup_start_lr
 62 |         self.eta_min = eta_min
 63 | 
 64 |         super(WarmupCosineLR, self).__init__(optimizer, last_epoch)
 65 | 
 66 |     def get_lr(self) -> List[float]:
 67 |         """
 68 |         Compute learning rate using chainable form of the scheduler
 69 |         """
 70 |         if not self._get_lr_called_within_step:
 71 |             warnings.warn(
 72 |                 "To get the last learning rate computed by the scheduler, "
 73 |                 "please use `get_last_lr()`.",
 74 |                 UserWarning,
 75 |             )
 76 | 
 77 |         if self.last_epoch == 0:
 78 |             return [self.warmup_start_lr] * len(self.base_lrs)
 79 |         elif self.last_epoch < self.warmup_epochs:
 80 |             return [
 81 |                 group["lr"]
 82 |                 + (base_lr - self.warmup_start_lr) / (self.warmup_epochs - 1)
 83 |                 for base_lr, group in zip(self.base_lrs, self.optimizer.param_groups)
 84 |             ]
 85 |         elif self.last_epoch == self.warmup_epochs:
 86 |             return self.base_lrs
 87 |         elif (self.last_epoch - 1 - self.max_epochs) % (
 88 |             2 * (self.max_epochs - self.warmup_epochs)
 89 |         ) == 0:
 90 |             return [
 91 |                 group["lr"]
 92 |                 + (base_lr - self.eta_min)
 93 |                 * (1 - math.cos(math.pi / (self.max_epochs - self.warmup_epochs)))
 94 |                 / 2
 95 |                 for base_lr, group in zip(self.base_lrs, self.optimizer.param_groups)
 96 |             ]
 97 | 
 98 |         return [
 99 |             (
100 |                 1
101 |                 + math.cos(
102 |                     math.pi
103 |                     * (self.last_epoch - self.warmup_epochs)
104 |                     / (self.max_epochs - self.warmup_epochs)
105 |                 )
106 |             )
107 |             / (
108 |                 1
109 |                 + math.cos(
110 |                     math.pi
111 |                     * (self.last_epoch - self.warmup_epochs - 1)
112 |                     / (self.max_epochs - self.warmup_epochs)
113 |                 )
114 |             )
115 |             * (group["lr"] - self.eta_min)
116 |             + self.eta_min
117 |             for group in self.optimizer.param_groups
118 |         ]
119 | 
120 |     def _get_closed_form_lr(self) -> List[float]:
121 |         """
122 |         Called when epoch is passed as a param to the `step` function of the scheduler.
123 |         """
124 |         if self.last_epoch < self.warmup_epochs:
125 |             return [
126 |                 self.warmup_start_lr
127 |                 + self.last_epoch
128 |                 * (base_lr - self.warmup_start_lr)
129 |                 / (self.warmup_epochs - 1)
130 |                 for base_lr in self.base_lrs
131 |             ]
132 | 
133 |         return [
134 |             self.eta_min
135 |             + 0.5
136 |             * (base_lr - self.eta_min)
137 |             * (
138 |                 1
139 |                 + math.cos(
140 |                     math.pi
141 |                     * (self.last_epoch - self.warmup_epochs)
142 |                     / (self.max_epochs - self.warmup_epochs)
143 |                 )
144 |             )
145 |             for base_lr in self.base_lrs
146 |         ]
147 | 


--------------------------------------------------------------------------------
/src/CT_AESPA.py:
--------------------------------------------------------------------------------
  1 | from util import *
  2 | from custom_module import *
  3 | from pretrained_model import *
  4 | import os
  5 | 
  6 | src_dir = os.path.dirname(os.path.abspath(__file__))
  7 | global_config = load_model_yaml( os.path.join(src_dir, "global_config"), "global_config.yaml")
  8 | 
  9 | 
 10 | def generate_layer_input_data(model: nn.Module , layer_nest_dict, directory_path,  train_data_loader):
 11 |     if(not os.path.exists(directory_path)):
 12 |             os.mkdir(directory_path)
 13 |     data_type = "_input"
 14 |     for key in layer_nest_dict:
 15 |         my_model = copy.deepcopy(model)
 16 |         layer_nest_dict[key]
 17 |         if(layer_nest_dict[key]["type"] == "ReLU" and layer_nest_dict[key]["HerPN"]):
 18 |             layer_name = layer_nest_dict[key]["HerPN"]
 19 |         else:
 20 |             layer_name = key
 21 |         print("name: " + layer_name)
 22 |         collection_layer = Input_data_collection_layer(layer_name, access_layer(my_model, layer_name))
 23 |         replace_layer(my_model, layer_name, collection_layer)
 24 |         run_set(my_model, train_data_loader, "cuda:0")
 25 |         access_layer(my_model, layer_name).save(directory_path, layer_name + data_type + ".pt")
 26 |         # data = torch.load(directory_path + layer_name + data_type + ".pt")
 27 |         # print(data.shape)
 28 |         del my_model
 29 | 
 30 | 
 31 | def generate_data_set(dirctory_path , layer_nest_dict, split_point):
 32 |     train_path = "train/"
 33 |     valid_path = "val/"
 34 |     if(not os.path.exists(dirctory_path + train_path)):
 35 |         os.mkdir(dirctory_path + train_path)
 36 |     if(not os.path.exists(dirctory_path + valid_path)):
 37 |         os.mkdir(dirctory_path + valid_path)   
 38 | 
 39 |     for key in layer_nest_dict:
 40 |         data_type = "_input"
 41 |         if(layer_nest_dict[key]["type"] == "ReLU" and layer_nest_dict[key]["HerPN"]):
 42 |             layer_name = layer_nest_dict[key]["HerPN"]
 43 |         else:
 44 |             layer_name = key
 45 |         file_name = layer_name + data_type + ".pt"
 46 |         print(layer_name)
 47 |         data = torch.load(dirctory_path + file_name)
 48 |         data = data.reshape((-1, ) + data.shape[2:])
 49 |         b=torch.randperm(data.shape[0])
 50 |         data = data[b]
 51 |         train_data = data[0:split_point]
 52 |         valid_data = data[split_point:data.shape[0]]
 53 |         torch.save(train_data, dirctory_path + train_path + file_name)
 54 |         torch.save(valid_data, dirctory_path + valid_path + file_name)
 55 |         print(train_data.shape)
 56 |         print(valid_data.shape)
 57 | 
 58 | 
 59 | def data_collection(model, valid_data_loader, train_data_loader, split_point, input_data_save_path):
 60 |     sign_nest_dict = generate_sign_nest_dict(model)
 61 |     validate(model, valid_data_loader)
 62 |     generate_layer_input_data(model, sign_nest_dict, input_data_save_path, train_data_loader)
 63 |     generate_data_set(input_data_save_path , sign_nest_dict, split_point)
 64 | 
 65 | 
 66 | def CT_train(sign_type, sign_scale, scale_path, sign_nest_dict,batch_size, input_data_dirctory, output_floder_suffix, pretrain_model):
 67 |     print(sign_type)
 68 |     for key in sign_nest_dict:
 69 |         sign_dict = sign_nest_dict[key]
 70 |         if(sign_dict["type"] == "MaxPool2d"):
 71 |             continue
 72 |         relu_name = key
 73 |         bn_name =  sign_dict["HerPN"]
 74 |         if(sign_dict["type"] == "ReLU" and sign_dict["HerPN"]):
 75 |             data_name = sign_dict["HerPN"]
 76 |             num_features = access_layer(pretrain_model, bn_name).num_features
 77 |             BN_dimension = 2
 78 |             my_model = HerPN2d(num_features, BN_dimension)
 79 |             ref_model = nn.Sequential(access_layer(pretrain_model, bn_name), access_layer(pretrain_model, relu_name))
 80 |         else:
 81 |             data_name = key
 82 |             num_features = 4096
 83 |             BN_dimension = 1
 84 |             my_model = HerPN2d(num_features, BN_dimension)
 85 |             ref_model = access_layer(pretrain_model, relu_name)
 86 | 
 87 |         
 88 | 
 89 |         optimizer = torch.optim.Adam(params=my_model.parameters(), lr=0.01, weight_decay=0)
 90 |         scheduler = ReduceLROnPlateau(optimizer, 'min', patience = 2, threshold= 1e-8, min_lr= 1e-4)
 91 |         train_path = "train/"
 92 |         val_path = "val/"
 93 |         data_type = "_input"
 94 |         file_name = data_name + data_type + ".pt"
 95 |         print(file_name)
 96 |         train_data = torch.load(input_data_dirctory + train_path + file_name)
 97 |         valid_data = torch.load(input_data_dirctory + val_path + file_name)
 98 |         for epoch_i in range(40):
 99 |             train_loss_meter = AverageMeter("train loss")
100 |             val_loss_meter = AverageMeter("val loss")   
101 |             #train
102 |             for batch_i in range(int(train_data.shape[0] / batch_size)):
103 |                 x = train_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0")
104 |                 target_y = ref_model.to("cuda:0").forward(x)
105 |                 actual_y = my_model.to("cuda:0").forward(x)
106 |                 loss_fun = nn.MSELoss()
107 |                 my_model.zero_grad()
108 |                 loss = loss_fun(actual_y, target_y)
109 |                 train_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0])
110 |                 loss.backward()
111 |                 optimizer.step()
112 |             train_loss = train_loss_meter.avg
113 | 
114 |             #valid
115 |             for batch_i in range(int(valid_data.shape[0] / batch_size)):
116 |                 x = valid_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0")
117 |                 target_y = ref_model.to("cuda:0").forward(x)
118 |                 actual_y = my_model.forward(x)
119 |                 loss_fun = nn.MSELoss()
120 |                 loss = loss_fun(actual_y, target_y)
121 |                 val_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0])
122 |             val_loss = val_loss_meter.avg
123 |         
124 |             scheduler.step(val_loss)
125 | 
126 |             print(
127 |                 f"Epoch:{epoch_i + 1}"
128 |                 + f" Train Loss:{train_loss:.10f}"
129 |                 + f" Val Loss: {val_loss:.10f}"
130 |             )
131 | 
132 |         folder_name = "CT_" + sign_type + "_S" + output_floder_suffix+"_40s/"
133 |         coef_save_dirctory = input_data_dirctory + folder_name
134 |         if(not os.path.exists(coef_save_dirctory)):
135 |                 os.mkdir(coef_save_dirctory)
136 |         file_name = key + "_herpn.pt"
137 |         torch.save(my_model, coef_save_dirctory + file_name)
138 |         print("save: " + folder_name + file_name)
139 |         print("\n")
140 | 
141 | 
142 | if __name__ == "__main__":
143 |     parser = ArgumentParser()
144 |     parser.add_argument("--model", type=str,choices=["vgg19_bn", "resnet18", "resnet32"])
145 |     parser.add_argument("--dataset", type=str,choices=["cifar10", "cifar100", "imagenet_1k"])
146 |     parser.add_argument("-st","--sign_type", type=str, choices=["a7", "2f12g1", "f1g2", "f2g2", "f2g3", "herph"])
147 |     parser.add_argument("-dc","--data_collection", type=bool, default=False, choices=[True , False])
148 |     parser.add_argument("-wd", "--working_directory", type=str, default="./working_directory/")
149 | 
150 |     args = parser.parse_args()
151 |     print(args)
152 |     if(args.dataset == "cifar10" or args.dataset == "cifar100"):
153 |         split_point = 45000
154 |         batch_size = 100
155 |     elif(args.dataset == "imagenet_1k"):
156 |         split_point = 900
157 |         batch_size = 40
158 |     model = get_pretrained_model(model_name=args.model, dataset=args.dataset)
159 |     if(args.data_collection):
160 |         data_collection(model = model,
161 |             valid_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "valid", data_dir = global_config["Global"]["dataset_dirctory"] ),
162 |             train_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "train", data_dir = global_config["Global"]["dataset_dirctory"] ),
163 |             split_point = split_point, input_data_save_path = args.working_directory)
164 |         
165 |     else:
166 |         nest_dict = generate_sign_nest_dict(model) 
167 |         CT_train(sign_type = args.sign_type, sign_scale = 0, scale_path= None, sign_nest_dict = nest_dict,batch_size = batch_size,
168 |                  input_data_dirctory = args.working_directory , output_floder_suffix= "test", pretrain_model=model)
169 | 


--------------------------------------------------------------------------------
/src/CT_sign_SiLU.py:
--------------------------------------------------------------------------------
  1 | from util import *
  2 | from custom_module import *
  3 | from pretrained_model import *
  4 | import numpy as np
  5 | global_config = load_model_yaml("./global_config/", "global_config.yaml")
  6 | 
  7 | def generate_layer_input_data(model: nn.Module , layer_nest_dict, directory_path,  train_data_loader):
  8 |     if(not os.path.exists(directory_path)):
  9 |             os.mkdir(directory_path)
 10 |     data_type = "_input"
 11 |     for key in layer_nest_dict:
 12 |         my_model = copy.deepcopy(model)
 13 |         layer_name = key
 14 |         print("name: " + layer_name)
 15 |         collection_layer = Input_data_collection_layer(layer_name, access_layer(my_model, layer_name))
 16 |         replace_layer(my_model, layer_name, collection_layer)
 17 |         run_set(my_model, train_data_loader, "cuda:0")
 18 |         access_layer(my_model, layer_name).save(directory_path, layer_name + data_type + ".pt")
 19 |         data = torch.load(directory_path + layer_name + data_type + ".pt")
 20 |         print(data.shape)
 21 | 
 22 | 
 23 | def generate_data_set(dirctory_path , layer_nest_dict, split_point):
 24 |     train_path = "train/"
 25 |     valid_path = "val/"
 26 |     if(not os.path.exists(dirctory_path + train_path)):
 27 |         os.mkdir(dirctory_path + train_path)
 28 |     if(not os.path.exists(dirctory_path + valid_path)):
 29 |         os.mkdir(dirctory_path + valid_path)   
 30 | 
 31 |     for key in layer_nest_dict:
 32 |         data_type = "_input"
 33 |         layer_name = key
 34 |         file_name = layer_name + data_type + ".pt"
 35 |         print(layer_name)
 36 |         data = torch.load(dirctory_path + file_name)
 37 |         b=torch.randperm(data.shape[0])
 38 |         data = data[b]
 39 |         train_data = data[0:split_point]
 40 |         valid_data = data[split_point:data.shape[0]]
 41 |         torch.save(train_data, dirctory_path + train_path + file_name)
 42 |         torch.save(valid_data, dirctory_path + valid_path + file_name)
 43 |         print(train_data.shape)
 44 |         print(valid_data.shape)
 45 | 
 46 | 
 47 | def data_collection(model, valid_data_loader, train_data_loader, split_point, input_data_save_path):
 48 |     sign_nest_dict = generate_sign_nest_dict(model)
 49 |     validate(model, valid_data_loader)
 50 |     generate_layer_input_data(model, sign_nest_dict, input_data_save_path, train_data_loader)
 51 |     generate_data_set(input_data_save_path , sign_nest_dict, split_point)
 52 | 
 53 | 
 54 | def CT_train(sign_type, degree, sign_scale, scale_path, sign_nest_dict,batch_size, input_data_dirctory, output_floder_suffix, epoch = 40):
 55 |     sign_param_dict = Sign_parameter_generator().param_nest_dict[sign_type]
 56 |     print(sign_type)
 57 |     for key in sign_nest_dict:
 58 |         sign_dict = sign_nest_dict[key]
 59 |         train_path = "train/"
 60 |         val_path = "val/"
 61 |         data_type = "_input"
 62 |         file_name = key + data_type + ".pt"
 63 | 
 64 |         sign_module = Sign_minmax_layer(coef=sign_param_dict["coef"], degree=sign_param_dict["degree"],scale=sign_scale)
 65 |         my_model = ReLU_sign_layer(sign = sign_module)
 66 |         ref_model = nn.SiLU()
 67 | 
 68 | 
 69 | 
 70 |         optimizer = torch.optim.Adam(params=my_model.parameters(), lr=0.01, weight_decay=0)
 71 |         scheduler = ReduceLROnPlateau(optimizer, 'min', patience = 2, threshold= 1e-8, min_lr= 1e-4)
 72 | 
 73 |         print(file_name)
 74 |         train_data = torch.load(input_data_dirctory + train_path + file_name)
 75 |         valid_data = torch.load(input_data_dirctory + val_path + file_name)
 76 |         for epoch_i in range(epoch):
 77 |             train_loss_meter = AverageMeter("train loss")
 78 |             val_loss_meter = AverageMeter("val loss")   
 79 |             #train
 80 |             for batch_i in range(int(train_data.shape[0] / batch_size)):
 81 |                 x = train_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0")
 82 |                 target_y = ref_model.to("cuda:0").forward(x)
 83 |                 actual_y = my_model.forward(x)
 84 |                 loss_fun = nn.MSELoss()
 85 |                 my_model.zero_grad()
 86 |                 loss = loss_fun(actual_y, target_y)
 87 |                 train_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0])
 88 |                 loss.backward()
 89 |                 optimizer.step()
 90 |             train_loss = train_loss_meter.avg
 91 | 
 92 |             #valid
 93 |             for batch_i in range(int(valid_data.shape[0] / batch_size)):
 94 |                 x = valid_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0")
 95 |                 target_y = ref_model.to("cuda:0").forward(x)
 96 |                 actual_y = my_model.forward(x)
 97 |                 loss_fun = nn.MSELoss()
 98 |                 loss = loss_fun(actual_y, target_y)
 99 |                 val_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0])
100 |             val_loss = val_loss_meter.avg
101 |         
102 |             scheduler.step(val_loss)
103 | 
104 |             print(
105 |                 f"Epoch:{epoch_i + 1}"
106 |                 + f" Train Loss:{train_loss:.10f}"
107 |                 + f" Val Loss: {val_loss:.10f}"
108 |             )
109 | 
110 |         folder_name = "CT_" + sign_type + "_S" + output_floder_suffix+"_40s/"
111 |         coef_save_dirctory = input_data_dirctory + folder_name
112 |         if(not os.path.exists(coef_save_dirctory)):
113 |                 os.mkdir(coef_save_dirctory)
114 |         file_name = key + "_coef.pt"
115 |         my_model.sign.save_coef(coef_save_dirctory + file_name)
116 |         print("save: " + folder_name + file_name)
117 |         print("\n")
118 | 
119 | def CT_val(model: nn.Module , layer_nest_dict, directory_path,  val_data_loader, sign_type, output_floder_suffix):
120 |     if(not os.path.exists(directory_path)):
121 |             os.mkdir(directory_path)
122 |     folder_name = "CT_" + sign_type + "_S" + output_floder_suffix+"_40s/"
123 |     for key in layer_nest_dict:
124 |         file_name = key + "_coef.pt"
125 |         coef = torch.load(directory_path + folder_name + file_name)
126 |         degree = len(coef.tolist()[0])
127 |         sign_module_CT = Sigmoid_minmax_layer(coef=coef, degree=[degree],scale=1)
128 |         rlays = SiLU_minmax_layer(sigmoid=sign_module_CT)
129 |         layer_name = key
130 |         print("name: " + layer_name)
131 |         print(access_layer(model, layer_name))
132 |         replace_layer(model, layer_name, rlays)
133 |         validate(model, val_data_loader)
134 | 
135 | 
136 | 
137 | 
138 | 
139 | if __name__ == "__main__":
140 |     parser = ArgumentParser()
141 |     parser.add_argument("--model", type=str,choices=["vgg19_bn", "resnet18", "resnet32", "mobileVitV2"])
142 |     parser.add_argument("--dataset", type=str,choices=["cifar10", "cifar100", "imagenet_1k"])
143 |     parser.add_argument("-st","--sign_type", type=str, choices=["a7", "2f12g1", "f1g2", "f2g2", "f2g3", "polyfit"])
144 |     parser.add_argument("-dc","--data_collection", type=bool, default=False, choices=[True , False])
145 |     parser.add_argument("-wd", "--working_directory", type=str, default="./working_directory/")
146 | 
147 |     args = parser.parse_args()
148 |     print(args)
149 |     if(args.dataset == "cifar10" or args.dataset == "cifar100"):
150 |         split_point = 45000
151 |         batch_size = 100
152 |     elif(args.dataset == "imagenet_1k"):
153 |         split_point = 900
154 |         batch_size = 50
155 |     model = get_pretrained_model(model_name=args.model, dataset=args.dataset)
156 |     # print(model)
157 | 
158 |     valid_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "valid", data_dir = global_config["Global"]["dataset_dirctory"])
159 |     train_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "train", data_dir = global_config["Global"]["dataset_dirctory"] )
160 | 
161 |     if(args.data_collection):
162 |         data_collection(model = model,
163 |             
164 |             split_point = split_point, input_data_save_path = args.working_directory)
165 |     
166 |     else:
167 |         nest_dict = generate_sign_nest_dict(model) 
168 |         CT_train(sign_type = args.sign_type, degree=0, sign_scale = 0, scale_path= None, sign_nest_dict = nest_dict,batch_size = batch_size,
169 |                  input_data_dirctory = args.working_directory , output_floder_suffix= "dynamic", epoch=40)
170 |         # CT_val(model=model, layer_nest_dict=nest_dict, directory_path=args.working_directory,
171 |         #        val_data_loader = valid_data_loader,sign_type=args.sign_type, output_floder_suffix="polyfit_o")
172 | 


--------------------------------------------------------------------------------
/src/CT_cvnet_sign_SiLU.py:
--------------------------------------------------------------------------------
  1 | from util import *
  2 | from custom_module import *
  3 | from pretrained_model import *
  4 | import numpy as np
  5 | global_config = load_model_yaml("./global_config/", "global_config.yaml")
  6 | 
  7 | def generate_layer_input_data(model: nn.Module , layer_nest_dict, directory_path,  train_data_loader):
  8 |     if(not os.path.exists(directory_path)):
  9 |             os.mkdir(directory_path)
 10 |     data_type = "_input"
 11 |     for key in layer_nest_dict:
 12 |         my_model = copy.deepcopy(model)
 13 |         layer_name = key
 14 |         print("name: " + layer_name)
 15 |         collection_layer = Input_data_collection_layer(layer_name, access_layer(my_model, layer_name))
 16 |         replace_layer(my_model, layer_name, collection_layer)
 17 |         run_set(my_model, train_data_loader, "cuda:0")
 18 |         access_layer(my_model, layer_name).save(directory_path, layer_name + data_type + ".pt")
 19 |         data = torch.load(directory_path + layer_name + data_type + ".pt")
 20 |         print(data.shape)
 21 | 
 22 | 
 23 | def generate_data_set(dirctory_path , layer_nest_dict, split_point):
 24 |     train_path = "train/"
 25 |     valid_path = "val/"
 26 |     if(not os.path.exists(dirctory_path + train_path)):
 27 |         os.mkdir(dirctory_path + train_path)
 28 |     if(not os.path.exists(dirctory_path + valid_path)):
 29 |         os.mkdir(dirctory_path + valid_path)   
 30 | 
 31 |     for key in layer_nest_dict:
 32 |         data_type = "_input"
 33 |         layer_name = key
 34 |         file_name = layer_name + data_type + ".pt"
 35 |         print(layer_name)
 36 |         data = torch.load(dirctory_path + file_name)
 37 |         b=torch.randperm(data.shape[0])
 38 |         data = data[b]
 39 |         train_data = data[0:split_point]
 40 |         valid_data = data[split_point:data.shape[0]]
 41 |         torch.save(train_data, dirctory_path + train_path + file_name)
 42 |         torch.save(valid_data, dirctory_path + valid_path + file_name)
 43 |         print(train_data.shape)
 44 |         print(valid_data.shape)
 45 | 
 46 | 
 47 | def data_collection(model, valid_data_loader, train_data_loader, split_point, input_data_save_path):
 48 |     sign_nest_dict = generate_sign_nest_dict(model)
 49 |     validate(model, valid_data_loader)
 50 |     generate_layer_input_data(model, sign_nest_dict, input_data_save_path, train_data_loader)
 51 |     generate_data_set(input_data_save_path , sign_nest_dict, split_point)
 52 | 
 53 | 
 54 | def CT_train(sign_type, degree, sign_scale, scale_path, sign_nest_dict,batch_size, input_data_dirctory, output_floder_suffix, epoch = 40):
 55 |     sign_param_dict = Sign_parameter_generator().param_nest_dict[sign_type]
 56 |     print(sign_type)
 57 |     for key in sign_nest_dict:
 58 |         sign_dict = sign_nest_dict[key]
 59 |         train_path = "train/"
 60 |         val_path = "val/"
 61 |         data_type = "_input"
 62 |         file_name = key + data_type + ".pt"
 63 | 
 64 |         sign_module = Sign_minmax_layer(coef=sign_param_dict["coef"], degree=sign_param_dict["degree"],scale=sign_scale)
 65 |         my_model = ReLU_sign_layer(sign = sign_module)
 66 |         ref_model = nn.SiLU()
 67 | 
 68 | 
 69 | 
 70 |         optimizer = torch.optim.Adam(params=my_model.parameters(), lr=0.01, weight_decay=0)
 71 |         scheduler = ReduceLROnPlateau(optimizer, 'min', patience = 2, threshold= 1e-8, min_lr= 1e-4)
 72 | 
 73 |         print(file_name)
 74 |         train_data = torch.load(input_data_dirctory + train_path + file_name)
 75 |         valid_data = torch.load(input_data_dirctory + val_path + file_name)
 76 |         for epoch_i in range(epoch):
 77 |             train_loss_meter = AverageMeter("train loss")
 78 |             val_loss_meter = AverageMeter("val loss")   
 79 |             #train
 80 |             for batch_i in range(int(train_data.shape[0] / batch_size)):
 81 |                 x = train_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0")
 82 |                 target_y = ref_model.to("cuda:0").forward(x)
 83 |                 actual_y = my_model.forward(x)
 84 |                 loss_fun = nn.MSELoss()
 85 |                 my_model.zero_grad()
 86 |                 loss = loss_fun(actual_y, target_y)
 87 |                 train_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0])
 88 |                 loss.backward()
 89 |                 optimizer.step()
 90 |             train_loss = train_loss_meter.avg
 91 | 
 92 |             #valid
 93 |             for batch_i in range(int(valid_data.shape[0] / batch_size)):
 94 |                 x = valid_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0")
 95 |                 target_y = ref_model.to("cuda:0").forward(x)
 96 |                 actual_y = my_model.forward(x)
 97 |                 loss_fun = nn.MSELoss()
 98 |                 loss = loss_fun(actual_y, target_y)
 99 |                 val_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0])
100 |             val_loss = val_loss_meter.avg
101 |         
102 |             scheduler.step(val_loss)
103 | 
104 |             print(
105 |                 f"Epoch:{epoch_i + 1}"
106 |                 + f" Train Loss:{train_loss:.10f}"
107 |                 + f" Val Loss: {val_loss:.10f}"
108 |             )
109 | 
110 |         folder_name = "CT_" + sign_type + "_S" + output_floder_suffix+"_40s/"
111 |         coef_save_dirctory = input_data_dirctory + folder_name
112 |         if(not os.path.exists(coef_save_dirctory)):
113 |                 os.mkdir(coef_save_dirctory)
114 |         file_name = key + "_coef.pt"
115 |         my_model.sign.save_coef(coef_save_dirctory + file_name)
116 |         print("save: " + folder_name + file_name)
117 |         print("\n")
118 | 
119 | def CT_val(model: nn.Module , layer_nest_dict, directory_path,  val_data_loader, sign_type, output_floder_suffix):
120 |     if(not os.path.exists(directory_path)):
121 |             os.mkdir(directory_path)
122 |     folder_name = "CT_" + sign_type + "_S" + output_floder_suffix+"_40s/"
123 |     for key in layer_nest_dict:
124 |         file_name = key + "_coef.pt"
125 |         coef = torch.load(directory_path + folder_name + file_name)
126 |         degree = len(coef.tolist()[0])
127 |         sign_module_CT = Sigmoid_minmax_layer(coef=coef, degree=[degree],scale=1)
128 |         rlays = SiLU_minmax_layer(sigmoid=sign_module_CT)
129 |         layer_name = key
130 |         print("name: " + layer_name)
131 |         print(access_layer(model, layer_name))
132 |         replace_layer(model, layer_name, rlays)
133 |         validate(model, val_data_loader)
134 | 
135 | 
136 | 
137 | 
138 | 
139 | if __name__ == "__main__":
140 |     parser = ArgumentParser()
141 |     parser.add_argument("--model", type=str,choices=["vgg19_bn", "resnet18", "resnet32", "mobileVitV2"])
142 |     parser.add_argument("--dataset", type=str,choices=["cifar10", "cifar100", "imagenet_1k"])
143 |     parser.add_argument("-st","--sign_type", type=str, choices=["a7", "2f12g1", "f1g2", "f2g2", "f2g3", "polyfit"])
144 |     parser.add_argument("-dc","--data_collection", type=bool, default=False, choices=[True , False])
145 |     parser.add_argument("-wd", "--working_directory", type=str, default="./working_directory/")
146 | 
147 |     args = parser.parse_args()
148 |     print(args)
149 |     if(args.dataset == "cifar10" or args.dataset == "cifar100"):
150 |         split_point = 45000
151 |         batch_size = 100
152 |     elif(args.dataset == "imagenet_1k"):
153 |         split_point = 900
154 |         batch_size = 50
155 |     model = get_pretrained_model(model_name=args.model, dataset=args.dataset)
156 |     # print(model)
157 | 
158 |     valid_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "valid", data_dir = global_config["Global"]["dataset_dirctory"])
159 |     train_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "train", data_dir = global_config["Global"]["dataset_dirctory"] )
160 | 
161 |     if(args.data_collection):
162 |         data_collection(model = model,
163 |             
164 |             split_point = split_point, input_data_save_path = args.working_directory)
165 |     
166 |     else:
167 |         nest_dict = generate_sign_nest_dict(model) 
168 |         CT_train(sign_type = args.sign_type, degree=0, sign_scale = 0, scale_path= None, sign_nest_dict = nest_dict,batch_size = batch_size,
169 |                  input_data_dirctory = args.working_directory , output_floder_suffix= "dynamic", epoch=40)
170 |         # CT_val(model=model, layer_nest_dict=nest_dict, directory_path=args.working_directory,
171 |         #        val_data_loader = valid_data_loader,sign_type=args.sign_type, output_floder_suffix="polyfit_o")
172 | 


--------------------------------------------------------------------------------
/src/PyTorch_CIFAR10/cifar10_models/densenet.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from collections import OrderedDict
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | __all__ = ["DenseNet", "densenet121", "densenet169", "densenet161"]
  9 | 
 10 | 
 11 | class _DenseLayer(nn.Sequential):
 12 |     def __init__(self, num_input_features, growth_rate, bn_size, drop_rate):
 13 |         super(_DenseLayer, self).__init__()
 14 |         self.add_module("norm1", nn.BatchNorm2d(num_input_features)),
 15 |         self.add_module("relu1", nn.ReLU(inplace=True)),
 16 |         self.add_module(
 17 |             "conv1",
 18 |             nn.Conv2d(
 19 |                 num_input_features,
 20 |                 bn_size * growth_rate,
 21 |                 kernel_size=1,
 22 |                 stride=1,
 23 |                 bias=False,
 24 |             ),
 25 |         ),
 26 |         self.add_module("norm2", nn.BatchNorm2d(bn_size * growth_rate)),
 27 |         self.add_module("relu2", nn.ReLU(inplace=True)),
 28 |         self.add_module(
 29 |             "conv2",
 30 |             nn.Conv2d(
 31 |                 bn_size * growth_rate,
 32 |                 growth_rate,
 33 |                 kernel_size=3,
 34 |                 stride=1,
 35 |                 padding=1,
 36 |                 bias=False,
 37 |             ),
 38 |         ),
 39 |         self.drop_rate = drop_rate
 40 | 
 41 |     def forward(self, x):
 42 |         new_features = super(_DenseLayer, self).forward(x)
 43 |         if self.drop_rate > 0:
 44 |             new_features = F.dropout(
 45 |                 new_features, p=self.drop_rate, training=self.training
 46 |             )
 47 |         return torch.cat([x, new_features], 1)
 48 | 
 49 | 
 50 | class _DenseBlock(nn.Sequential):
 51 |     def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate):
 52 |         super(_DenseBlock, self).__init__()
 53 |         for i in range(num_layers):
 54 |             layer = _DenseLayer(
 55 |                 num_input_features + i * growth_rate, growth_rate, bn_size, drop_rate
 56 |             )
 57 |             self.add_module("denselayer%d" % (i + 1), layer)
 58 | 
 59 | 
 60 | class _Transition(nn.Sequential):
 61 |     def __init__(self, num_input_features, num_output_features):
 62 |         super(_Transition, self).__init__()
 63 |         self.add_module("norm", nn.BatchNorm2d(num_input_features))
 64 |         self.add_module("relu", nn.ReLU(inplace=True))
 65 |         self.add_module(
 66 |             "conv",
 67 |             nn.Conv2d(
 68 |                 num_input_features,
 69 |                 num_output_features,
 70 |                 kernel_size=1,
 71 |                 stride=1,
 72 |                 bias=False,
 73 |             ),
 74 |         )
 75 |         self.add_module("pool", nn.AvgPool2d(kernel_size=2, stride=2))
 76 | 
 77 | 
 78 | class DenseNet(nn.Module):
 79 |     r"""Densenet-BC model class, based on
 80 |     `"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
 81 | 
 82 |     Args:
 83 |         growth_rate (int) - how many filters to add each layer (`k` in paper)
 84 |         block_config (list of 4 ints) - how many layers in each pooling block
 85 |         num_init_features (int) - the number of filters to learn in the first convolution layer
 86 |         bn_size (int) - multiplicative factor for number of bottle neck layers
 87 |           (i.e. bn_size * k features in the bottleneck layer)
 88 |         drop_rate (float) - dropout rate after each dense layer
 89 |         num_classes (int) - number of classification classes
 90 |     """
 91 | 
 92 |     def __init__(
 93 |         self,
 94 |         growth_rate=32,
 95 |         block_config=(6, 12, 24, 16),
 96 |         num_init_features=64,
 97 |         bn_size=4,
 98 |         drop_rate=0,
 99 |         num_classes=10,
100 |     ):
101 | 
102 |         super(DenseNet, self).__init__()
103 | 
104 |         # First convolution
105 | 
106 |         # CIFAR-10: kernel_size 7 ->3, stride 2->1, padding 3->1
107 |         self.features = nn.Sequential(
108 |             OrderedDict(
109 |                 [
110 |                     (
111 |                         "conv0",
112 |                         nn.Conv2d(
113 |                             3,
114 |                             num_init_features,
115 |                             kernel_size=3,
116 |                             stride=1,
117 |                             padding=1,
118 |                             bias=False,
119 |                         ),
120 |                     ),
121 |                     ("norm0", nn.BatchNorm2d(num_init_features)),
122 |                     ("relu0", nn.ReLU(inplace=True)),
123 |                     ("pool0", nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
124 |                 ]
125 |             )
126 |         )
127 |         # END
128 | 
129 |         # Each denseblock
130 |         num_features = num_init_features
131 |         for i, num_layers in enumerate(block_config):
132 |             block = _DenseBlock(
133 |                 num_layers=num_layers,
134 |                 num_input_features=num_features,
135 |                 bn_size=bn_size,
136 |                 growth_rate=growth_rate,
137 |                 drop_rate=drop_rate,
138 |             )
139 |             self.features.add_module("denseblock%d" % (i + 1), block)
140 |             num_features = num_features + num_layers * growth_rate
141 |             if i != len(block_config) - 1:
142 |                 trans = _Transition(
143 |                     num_input_features=num_features,
144 |                     num_output_features=num_features // 2,
145 |                 )
146 |                 self.features.add_module("transition%d" % (i + 1), trans)
147 |                 num_features = num_features // 2
148 | 
149 |         # Final batch norm
150 |         self.features.add_module("norm5", nn.BatchNorm2d(num_features))
151 | 
152 |         # Linear layer
153 |         self.classifier = nn.Linear(num_features, num_classes)
154 | 
155 |         # Official init from torch repo.
156 |         for m in self.modules():
157 |             if isinstance(m, nn.Conv2d):
158 |                 nn.init.kaiming_normal_(m.weight)
159 |             elif isinstance(m, nn.BatchNorm2d):
160 |                 nn.init.constant_(m.weight, 1)
161 |                 nn.init.constant_(m.bias, 0)
162 |             elif isinstance(m, nn.Linear):
163 |                 nn.init.constant_(m.bias, 0)
164 | 
165 |     def forward(self, x):
166 |         features = self.features(x)
167 |         out = F.relu(features, inplace=True)
168 |         out = F.adaptive_avg_pool2d(out, (1, 1)).view(features.size(0), -1)
169 |         out = self.classifier(out)
170 |         return out
171 | 
172 | 
173 | def _densenet(
174 |     arch,
175 |     growth_rate,
176 |     block_config,
177 |     num_init_features,
178 |     pretrained,
179 |     progress,
180 |     device,
181 |     **kwargs
182 | ):
183 |     model = DenseNet(growth_rate, block_config, num_init_features, **kwargs)
184 |     if pretrained:
185 |         script_dir = os.path.dirname(__file__)
186 |         state_dict = torch.load(
187 |             script_dir + "/state_dicts/" + arch + ".pt", map_location=device
188 |         )
189 |         model.load_state_dict(state_dict)
190 |     return model
191 | 
192 | 
193 | def densenet121(pretrained=False, progress=True, device="cpu", **kwargs):
194 |     r"""Densenet-121 model from
195 |     `"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
196 | 
197 |     Args:
198 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
199 |         progress (bool): If True, displays a progress bar of the download to stderr
200 |     """
201 |     return _densenet(
202 |         "densenet121", 32, (6, 12, 24, 16), 64, pretrained, progress, device, **kwargs
203 |     )
204 | 
205 | 
206 | def densenet161(pretrained=False, progress=True, device="cpu", **kwargs):
207 |     r"""Densenet-161 model from
208 |     `"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
209 | 
210 |     Args:
211 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
212 |         progress (bool): If True, displays a progress bar of the download to stderr
213 |     """
214 |     return _densenet(
215 |         "densenet161", 48, (6, 12, 36, 24), 96, pretrained, progress, device, **kwargs
216 |     )
217 | 
218 | 
219 | def densenet169(pretrained=False, progress=True, device="cpu", **kwargs):
220 |     r"""Densenet-169 model from
221 |     `"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
222 | 
223 |     Args:
224 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
225 |         progress (bool): If True, displays a progress bar of the download to stderr
226 |     """
227 |     return _densenet(
228 |         "densenet169", 32, (6, 12, 32, 32), 64, pretrained, progress, device, **kwargs
229 |     )
230 | 


--------------------------------------------------------------------------------
/src/mobilevit_v2.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # For licensing see accompanying LICENSE file.
  3 | # Copyright (C) 2023 Apple Inc. All Rights Reserved.
  4 | #
  5 | 
  6 | import argparse
  7 | from typing import Dict, Optional, Tuple
  8 | 
  9 | import torch
 10 | from torch import nn
 11 | 
 12 | from cvnets.layers import ConvLayer2d, GlobalPool, Identity, LinearLayer
 13 | from cvnets.models.classification.base_image_encoder import BaseImageEncoder
 14 | from cvnets.models.classification.config.mobilevit_v2 import get_configuration
 15 | from cvnets.modules import InvertedResidual
 16 | from cvnets.modules import MobileViTBlockv2 as Block
 17 | 
 18 | class MobileViTv2(BaseImageEncoder):
 19 |     """
 20 |     This class defines the `MobileViTv2 <https://arxiv.org/abs/2206.02680>`_ architecture
 21 |     """
 22 | 
 23 |     def __init__(self, opts, *args, **kwargs) -> None:
 24 |         num_classes = getattr(opts, "model.classification.n_classes", 1000)
 25 |         pool_type = getattr(opts, "model.layer.global_pool", "mean")
 26 | 
 27 |         mobilevit_config = get_configuration(opts=opts)
 28 |         image_channels = mobilevit_config["layer0"]["img_channels"]
 29 |         out_channels = mobilevit_config["layer0"]["out_channels"]
 30 | 
 31 |         super().__init__(opts, *args, **kwargs)
 32 | 
 33 |         # store model configuration in a dictionary
 34 |         self.model_conf_dict = dict()
 35 |         self.conv_1 = ConvLayer2d(
 36 |             opts=opts,
 37 |             in_channels=image_channels,
 38 |             out_channels=out_channels,
 39 |             kernel_size=3,
 40 |             stride=2,
 41 |             use_norm=True,
 42 |             use_act=True,
 43 |         )
 44 | 
 45 |         self.model_conf_dict["conv1"] = {"in": image_channels, "out": out_channels}
 46 | 
 47 |         in_channels = out_channels
 48 |         self.layer_1, out_channels = self._make_layer(
 49 |             opts=opts, input_channel=in_channels, cfg=mobilevit_config["layer1"]
 50 |         )
 51 |         self.model_conf_dict["layer1"] = {"in": in_channels, "out": out_channels}
 52 | 
 53 |         in_channels = out_channels
 54 |         self.layer_2, out_channels = self._make_layer(
 55 |             opts=opts, input_channel=in_channels, cfg=mobilevit_config["layer2"]
 56 |         )
 57 |         self.model_conf_dict["layer2"] = {"in": in_channels, "out": out_channels}
 58 | 
 59 |         in_channels = out_channels
 60 |         self.layer_3, out_channels = self._make_layer(
 61 |             opts=opts, input_channel=in_channels, cfg=mobilevit_config["layer3"]
 62 |         )
 63 |         self.model_conf_dict["layer3"] = {"in": in_channels, "out": out_channels}
 64 | 
 65 |         in_channels = out_channels
 66 |         self.layer_4, out_channels = self._make_layer(
 67 |             opts=opts,
 68 |             input_channel=in_channels,
 69 |             cfg=mobilevit_config["layer4"],
 70 |             dilate=self.dilate_l4,
 71 |         )
 72 |         self.model_conf_dict["layer4"] = {"in": in_channels, "out": out_channels}
 73 | 
 74 |         in_channels = out_channels
 75 |         self.layer_5, out_channels = self._make_layer(
 76 |             opts=opts,
 77 |             input_channel=in_channels,
 78 |             cfg=mobilevit_config["layer5"],
 79 |             dilate=self.dilate_l5,
 80 |         )
 81 |         self.model_conf_dict["layer5"] = {"in": in_channels, "out": out_channels}
 82 | 
 83 |         self.conv_1x1_exp = Identity()
 84 |         self.model_conf_dict["exp_before_cls"] = {
 85 |             "in": out_channels,
 86 |             "out": out_channels,
 87 |         }
 88 | 
 89 |         self.classifier = nn.Sequential(
 90 |             GlobalPool(pool_type=pool_type, keep_dim=False),
 91 |             LinearLayer(in_features=out_channels, out_features=num_classes, bias=True),
 92 |         )
 93 | 
 94 |         # check model
 95 |         self.check_model()
 96 | 
 97 |         # weight initialization
 98 |         self.reset_parameters(opts=opts)
 99 | 
100 |         self.load_state_dict(torch.load("/home/jianming/work/Fast_Switch/NN_Model/ml-cvnets/mobilevitv2_results/width_0_5_0/mobilevitv2-0.5.pt"))
101 | 
102 | 
103 |     @classmethod
104 |     def add_arguments(cls, parser: argparse.ArgumentParser) -> argparse.ArgumentParser:
105 |         group = parser.add_argument_group(title=cls.__name__)
106 |         group.add_argument(
107 |             "--model.classification.mitv2.attn-dropout",
108 |             type=float,
109 |             default=0.0,
110 |             help="Dropout in attention layer. Defaults to 0.0",
111 |         )
112 |         group.add_argument(
113 |             "--model.classification.mitv2.ffn-dropout",
114 |             type=float,
115 |             default=0.0,
116 |             help="Dropout between FFN layers. Defaults to 0.0",
117 |         )
118 |         group.add_argument(
119 |             "--model.classification.mitv2.dropout",
120 |             type=float,
121 |             default=0.0,
122 |             help="Dropout in attention layer. Defaults to 0.0",
123 |         )
124 |         group.add_argument(
125 |             "--model.classification.mitv2.width-multiplier",
126 |             type=float,
127 |             default=1.0,
128 |             help="Width multiplier. Defaults to 1.0",
129 |         )
130 |         group.add_argument(
131 |             "--model.classification.mitv2.attn-norm-layer",
132 |             type=str,
133 |             default="layer_norm_2d",
134 |             help="Norm layer in attention block. Defaults to LayerNorm",
135 |         )
136 |         return parser
137 | 
138 |     def _make_layer(
139 |         self, opts, input_channel, cfg: Dict, dilate: Optional[bool] = False
140 |     ) -> Tuple[nn.Sequential, int]:
141 |         block_type = cfg.get("block_type", "mobilevit")
142 |         if block_type.lower() == "mobilevit":
143 |             return self._make_mit_layer(
144 |                 opts=opts, input_channel=input_channel, cfg=cfg, dilate=dilate
145 |             )
146 |         else:
147 |             return self._make_mobilenet_layer(
148 |                 opts=opts, input_channel=input_channel, cfg=cfg
149 |             )
150 | 
151 |     @staticmethod
152 |     def _make_mobilenet_layer(
153 |         opts, input_channel: int, cfg: Dict
154 |     ) -> Tuple[nn.Sequential, int]:
155 |         output_channels = cfg.get("out_channels")
156 |         num_blocks = cfg.get("num_blocks", 2)
157 |         expand_ratio = cfg.get("expand_ratio", 4)
158 |         block = []
159 | 
160 |         for i in range(num_blocks):
161 |             stride = cfg.get("stride", 1) if i == 0 else 1
162 | 
163 |             layer = InvertedResidual(
164 |                 opts=opts,
165 |                 in_channels=input_channel,
166 |                 out_channels=output_channels,
167 |                 stride=stride,
168 |                 expand_ratio=expand_ratio,
169 |             )
170 |             block.append(layer)
171 |             input_channel = output_channels
172 |         return nn.Sequential(*block), input_channel
173 | 
174 |     def _make_mit_layer(
175 |         self, opts, input_channel, cfg: Dict, dilate: Optional[bool] = False
176 |     ) -> Tuple[nn.Sequential, int]:
177 |         prev_dilation = self.dilation
178 |         block = []
179 |         stride = cfg.get("stride", 1)
180 | 
181 |         if stride == 2:
182 |             if dilate:
183 |                 self.dilation *= 2
184 |                 stride = 1
185 | 
186 |             layer = InvertedResidual(
187 |                 opts=opts,
188 |                 in_channels=input_channel,
189 |                 out_channels=cfg.get("out_channels"),
190 |                 stride=stride,
191 |                 expand_ratio=cfg.get("mv_expand_ratio", 4),
192 |                 dilation=prev_dilation,
193 |             )
194 | 
195 |             block.append(layer)
196 |             input_channel = cfg.get("out_channels")
197 | 
198 |         attn_unit_dim = cfg["attn_unit_dim"]
199 |         ffn_multiplier = cfg.get("ffn_multiplier")
200 | 
201 |         dropout = getattr(opts, "model.classification.mitv2.dropout", 0.0)
202 | 
203 |         block.append(
204 |             Block(
205 |                 opts=opts,
206 |                 in_channels=input_channel,
207 |                 attn_unit_dim=attn_unit_dim,
208 |                 ffn_multiplier=ffn_multiplier,
209 |                 n_attn_blocks=cfg.get("attn_blocks", 1),
210 |                 patch_h=cfg.get("patch_h", 2),
211 |                 patch_w=cfg.get("patch_w", 2),
212 |                 dropout=dropout,
213 |                 ffn_dropout=getattr(
214 |                     opts, "model.classification.mitv2.ffn_dropout", 0.0
215 |                 ),
216 |                 attn_dropout=getattr(
217 |                     opts, "model.classification.mitv2.attn_dropout", 0.0
218 |                 ),
219 |                 conv_ksize=3,
220 |                 attn_norm_layer=getattr(
221 |                     opts, "model.classification.mitv2.attn_norm_layer", "layer_norm_2d"
222 |                 ),
223 |                 dilation=self.dilation,
224 |             )
225 |         )
226 | 
227 |         return nn.Sequential(*block), input_channel
228 | 


--------------------------------------------------------------------------------
/src/PyTorch_CIFAR10/cifar10_models/googlenet.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from collections import namedtuple
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | __all__ = ["GoogLeNet", "googlenet"]
  9 | 
 10 | 
 11 | _GoogLeNetOuputs = namedtuple(
 12 |     "GoogLeNetOuputs", ["logits", "aux_logits2", "aux_logits1"]
 13 | )
 14 | 
 15 | 
 16 | def googlenet(pretrained=False, progress=True, device="cpu", **kwargs):
 17 |     r"""GoogLeNet (Inception v1) model architecture from
 18 |     `"Going Deeper with Convolutions" <http://arxiv.org/abs/1409.4842>`_.
 19 | 
 20 |     Args:
 21 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
 22 |         progress (bool): If True, displays a progress bar of the download to stderr
 23 |         aux_logits (bool): If True, adds two auxiliary branches that can improve training.
 24 |             Default: *False* when pretrained is True otherwise *True*
 25 |         transform_input (bool): If True, preprocesses the input according to the method with which it
 26 |             was trained on ImageNet. Default: *False*
 27 |     """
 28 |     model = GoogLeNet()
 29 |     if pretrained:
 30 |         script_dir = os.path.dirname(__file__)
 31 |         state_dict = torch.load(
 32 |             script_dir + "/state_dicts/googlenet.pt", map_location=device
 33 |         )
 34 |         model.load_state_dict(state_dict)
 35 |     return model
 36 | 
 37 | 
 38 | class GoogLeNet(nn.Module):
 39 | 
 40 |     # CIFAR10: aux_logits True->False
 41 |     def __init__(self, num_classes=10, aux_logits=False, transform_input=False):
 42 |         super(GoogLeNet, self).__init__()
 43 |         self.aux_logits = aux_logits
 44 |         self.transform_input = transform_input
 45 | 
 46 |         # CIFAR10: out_channels 64->192, kernel_size 7->3, stride 2->1, padding 3->1
 47 |         self.conv1 = BasicConv2d(3, 192, kernel_size=3, stride=1, padding=1)
 48 |         #         self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
 49 |         #         self.conv2 = BasicConv2d(64, 64, kernel_size=1)
 50 |         #         self.conv3 = BasicConv2d(64, 192, kernel_size=3, padding=1)
 51 |         #         self.maxpool2 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
 52 |         # END
 53 | 
 54 |         self.inception3a = Inception(192, 64, 96, 128, 16, 32, 32)
 55 |         self.inception3b = Inception(256, 128, 128, 192, 32, 96, 64)
 56 | 
 57 |         # CIFAR10: padding 0->1, ciel_model True->False
 58 |         self.maxpool3 = nn.MaxPool2d(3, stride=2, padding=1, ceil_mode=False)
 59 |         # END
 60 | 
 61 |         self.inception4a = Inception(480, 192, 96, 208, 16, 48, 64)
 62 |         self.inception4b = Inception(512, 160, 112, 224, 24, 64, 64)
 63 |         self.inception4c = Inception(512, 128, 128, 256, 24, 64, 64)
 64 |         self.inception4d = Inception(512, 112, 144, 288, 32, 64, 64)
 65 |         self.inception4e = Inception(528, 256, 160, 320, 32, 128, 128)
 66 | 
 67 |         # CIFAR10: kernel_size 2->3, padding 0->1, ciel_model True->False
 68 |         self.maxpool4 = nn.MaxPool2d(3, stride=2, padding=1, ceil_mode=False)
 69 |         # END
 70 | 
 71 |         self.inception5a = Inception(832, 256, 160, 320, 32, 128, 128)
 72 |         self.inception5b = Inception(832, 384, 192, 384, 48, 128, 128)
 73 | 
 74 |         if aux_logits:
 75 |             self.aux1 = InceptionAux(512, num_classes)
 76 |             self.aux2 = InceptionAux(528, num_classes)
 77 | 
 78 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
 79 |         self.dropout = nn.Dropout(0.2)
 80 |         self.fc = nn.Linear(1024, num_classes)
 81 | 
 82 |     #         if init_weights:
 83 |     #             self._initialize_weights()
 84 | 
 85 |     #     def _initialize_weights(self):
 86 |     #         for m in self.modules():
 87 |     #             if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
 88 |     #                 import scipy.stats as stats
 89 |     #                 X = stats.truncnorm(-2, 2, scale=0.01)
 90 |     #                 values = torch.as_tensor(X.rvs(m.weight.numel()), dtype=m.weight.dtype)
 91 |     #                 values = values.view(m.weight.size())
 92 |     #                 with torch.no_grad():
 93 |     #                     m.weight.copy_(values)
 94 |     #             elif isinstance(m, nn.BatchNorm2d):
 95 |     #                 nn.init.constant_(m.weight, 1)
 96 |     #                 nn.init.constant_(m.bias, 0)
 97 | 
 98 |     def forward(self, x):
 99 |         if self.transform_input:
100 |             x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5
101 |             x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5
102 |             x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5
103 |             x = torch.cat((x_ch0, x_ch1, x_ch2), 1)
104 | 
105 |         # N x 3 x 224 x 224
106 |         x = self.conv1(x)
107 | 
108 |         # CIFAR10
109 |         # N x 64 x 112 x 112
110 |         #         x = self.maxpool1(x)
111 |         # N x 64 x 56 x 56
112 |         #         x = self.conv2(x)
113 |         # N x 64 x 56 x 56
114 |         #         x = self.conv3(x)
115 |         # N x 192 x 56 x 56
116 |         #         x = self.maxpool2(x)
117 |         # END
118 | 
119 |         # N x 192 x 28 x 28
120 |         x = self.inception3a(x)
121 |         # N x 256 x 28 x 28
122 |         x = self.inception3b(x)
123 |         # N x 480 x 28 x 28
124 |         x = self.maxpool3(x)
125 |         # N x 480 x 14 x 14
126 |         x = self.inception4a(x)
127 |         # N x 512 x 14 x 14
128 |         if self.training and self.aux_logits:
129 |             aux1 = self.aux1(x)
130 | 
131 |         x = self.inception4b(x)
132 |         # N x 512 x 14 x 14
133 |         x = self.inception4c(x)
134 |         # N x 512 x 14 x 14
135 |         x = self.inception4d(x)
136 |         # N x 528 x 14 x 14
137 |         if self.training and self.aux_logits:
138 |             aux2 = self.aux2(x)
139 | 
140 |         x = self.inception4e(x)
141 |         # N x 832 x 14 x 14
142 |         x = self.maxpool4(x)
143 |         # N x 832 x 7 x 7
144 |         x = self.inception5a(x)
145 |         # N x 832 x 7 x 7
146 |         x = self.inception5b(x)
147 |         # N x 1024 x 7 x 7
148 | 
149 |         x = self.avgpool(x)
150 |         # N x 1024 x 1 x 1
151 |         x = x.view(x.size(0), -1)
152 |         # N x 1024
153 |         x = self.dropout(x)
154 |         x = self.fc(x)
155 |         # N x 1000 (num_classes)
156 |         if self.training and self.aux_logits:
157 |             return _GoogLeNetOuputs(x, aux2, aux1)
158 |         return x
159 | 
160 | 
161 | class Inception(nn.Module):
162 |     def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj):
163 |         super(Inception, self).__init__()
164 | 
165 |         self.branch1 = BasicConv2d(in_channels, ch1x1, kernel_size=1)
166 | 
167 |         self.branch2 = nn.Sequential(
168 |             BasicConv2d(in_channels, ch3x3red, kernel_size=1),
169 |             BasicConv2d(ch3x3red, ch3x3, kernel_size=3, padding=1),
170 |         )
171 | 
172 |         self.branch3 = nn.Sequential(
173 |             BasicConv2d(in_channels, ch5x5red, kernel_size=1),
174 |             BasicConv2d(ch5x5red, ch5x5, kernel_size=3, padding=1),
175 |         )
176 | 
177 |         self.branch4 = nn.Sequential(
178 |             nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=True),
179 |             BasicConv2d(in_channels, pool_proj, kernel_size=1),
180 |         )
181 | 
182 |     def forward(self, x):
183 |         branch1 = self.branch1(x)
184 |         branch2 = self.branch2(x)
185 |         branch3 = self.branch3(x)
186 |         branch4 = self.branch4(x)
187 | 
188 |         outputs = [branch1, branch2, branch3, branch4]
189 |         return torch.cat(outputs, 1)
190 | 
191 | 
192 | class InceptionAux(nn.Module):
193 |     def __init__(self, in_channels, num_classes):
194 |         super(InceptionAux, self).__init__()
195 |         self.conv = BasicConv2d(in_channels, 128, kernel_size=1)
196 | 
197 |         self.fc1 = nn.Linear(2048, 1024)
198 |         self.fc2 = nn.Linear(1024, num_classes)
199 | 
200 |     def forward(self, x):
201 |         # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
202 |         x = F.adaptive_avg_pool2d(x, (4, 4))
203 |         # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
204 |         x = self.conv(x)
205 |         # N x 128 x 4 x 4
206 |         x = x.view(x.size(0), -1)
207 |         # N x 2048
208 |         x = F.relu(self.fc1(x), inplace=True)
209 |         # N x 2048
210 |         x = F.dropout(x, 0.7, training=self.training)
211 |         # N x 2048
212 |         x = self.fc2(x)
213 |         # N x 1024
214 | 
215 |         return x
216 | 
217 | 
218 | class BasicConv2d(nn.Module):
219 |     def __init__(self, in_channels, out_channels, **kwargs):
220 |         super(BasicConv2d, self).__init__()
221 |         self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
222 |         self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
223 | 
224 |     def forward(self, x):
225 |         x = self.conv(x)
226 |         x = self.bn(x)
227 |         return F.relu(x, inplace=True)
228 | 


--------------------------------------------------------------------------------
/src/resnet_model_2.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Modified from https://raw.githubusercontent.com/pytorch/vision/v0.9.1/torchvision/models/resnet.py
  3 | 
  4 | BSD 3-Clause License
  5 | 
  6 | Copyright (c) Soumith Chintala 2016,
  7 | All rights reserved.
  8 | 
  9 | Redistribution and use in source and binary forms, with or without
 10 | modification, are permitted provided that the following conditions are met:
 11 | 
 12 | * Redistributions of source code must retain the above copyright notice, this
 13 |   list of conditions and the following disclaimer.
 14 | 
 15 | * Redistributions in binary form must reproduce the above copyright notice,
 16 |   this list of conditions and the following disclaimer in the documentation
 17 |   and/or other materials provided with the distribution.
 18 | 
 19 | * Neither the name of the copyright holder nor the names of its
 20 |   contributors may be used to endorse or promote products derived from
 21 |   this software without specific prior written permission.
 22 | 
 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 26 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 27 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 28 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 29 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 30 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 31 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 32 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 33 | '''
 34 | import sys
 35 | import torch.nn as nn
 36 | try:
 37 |     from torch.hub import load_state_dict_from_url
 38 | except ImportError:
 39 |     from torch.utils.model_zoo import load_url as load_state_dict_from_url
 40 | 
 41 | from functools import partial
 42 | from typing import Dict, Type, Any, Callable, Union, List, Optional
 43 | from torch import Tensor
 44 | 
 45 | 
 46 | cifar10_pretrained_weight_urls = {
 47 |     'resnet20': 'https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar10_resnet20-4118986f.pt',
 48 |     'resnet32': 'https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar10_resnet32-ef93fc4d.pt',
 49 |     'resnet44': 'https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar10_resnet44-2a3cabcb.pt',
 50 |     'resnet56': 'https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar10_resnet56-187c023a.pt',
 51 | }
 52 | 
 53 | cifar100_pretrained_weight_urls = {
 54 |     'resnet20': 'https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar100_resnet20-23dac2f1.pt',
 55 |     'resnet32': 'https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar100_resnet32-84213ce6.pt',
 56 |     'resnet44': 'https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar100_resnet44-ffe32858.pt',
 57 |     'resnet56': 'https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar100_resnet56-f2eff4c8.pt',
 58 | }
 59 | 
 60 | 
 61 | def conv3x3(in_planes, out_planes, stride=1):
 62 |     """3x3 convolution with padding"""
 63 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
 64 | 
 65 | 
 66 | def conv1x1(in_planes, out_planes, stride=1):
 67 |     """1x1 convolution"""
 68 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 69 | 
 70 | 
 71 | class BasicBlock(nn.Module):
 72 |     expansion: int = 1
 73 | 
 74 | 
 75 |     def __init__(
 76 |         self,
 77 |         inplanes: int,
 78 |         planes: int,
 79 |         stride: int = 1,
 80 |         downsample: Optional[nn.Module] = None,
 81 |         groups: int = 1,
 82 |         base_width: int = 64,
 83 |         dilation: int = 1,
 84 |         norm_layer: Optional[Callable[..., nn.Module]] = None
 85 |     ) -> None:
 86 |         super(BasicBlock, self).__init__()
 87 |         if norm_layer is None:
 88 |             norm_layer = nn.BatchNorm2d
 89 |         if groups != 1 or base_width != 64:
 90 |             raise ValueError('BasicBlock only supports groups=1 and base_width=64')
 91 |         if dilation > 1:
 92 |             raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
 93 |         # Both self.conv1 and self.downsample layers downsample the input when stride != 1
 94 |         self.conv1 = conv3x3(inplanes, planes, stride)
 95 |         self.bn1 = norm_layer(planes)
 96 |         self.relu1 = nn.ReLU(inplace=True)
 97 |         self.conv2 = conv3x3(planes, planes)
 98 |         self.bn2 = norm_layer(planes)
 99 |         self.relu2 = nn.ReLU(inplace=True)
100 |         self.downsample = downsample
101 |         self.stride = stride
102 |         self.skip_add = nn.quantized.FloatFunctional()
103 | 
104 | 
105 |     def forward(self, x: Tensor) -> Tensor:
106 |         identity = x
107 | 
108 |         out = self.conv1(x)
109 |         out = self.bn1(out)
110 |         out = self.relu1(out)
111 | 
112 |         out = self.conv2(out)
113 |         out = self.bn2(out)
114 | 
115 |         if self.downsample is not None:
116 |             identity = self.downsample(x)
117 | 
118 |         #out += identity
119 |         out = self.skip_add.add(out, identity)
120 |         out = self.relu2(out)
121 | 
122 |         return out
123 | 
124 | class CifarResNet(nn.Module):
125 | 
126 |     def __init__(self, block, layers, num_classes=10):
127 |         super(CifarResNet, self).__init__()
128 |         self.inplanes = 16
129 |         self.conv1 = conv3x3(3, 16)
130 |         self.bn1 = nn.BatchNorm2d(16)
131 |         self.relu = nn.ReLU(inplace=True)
132 | 
133 |         self.layer1 = self._make_layer(block, 16, layers[0])
134 |         self.layer2 = self._make_layer(block, 32, layers[1], stride=2)
135 |         self.layer3 = self._make_layer(block, 64, layers[2], stride=2)
136 | 
137 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
138 |         self.fc = nn.Linear(64 * block.expansion, num_classes)
139 | 
140 |         for m in self.modules():
141 |             if isinstance(m, nn.Conv2d):
142 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
143 |             elif isinstance(m, nn.BatchNorm2d):
144 |                 nn.init.constant_(m.weight, 1)
145 |                 nn.init.constant_(m.bias, 0)
146 | 
147 |     def _make_layer(self, block, planes, blocks, stride=1):
148 |         downsample = None
149 |         if stride != 1 or self.inplanes != planes * block.expansion:
150 |             downsample = nn.Sequential(
151 |                 conv1x1(self.inplanes, planes * block.expansion, stride),
152 |                 nn.BatchNorm2d(planes * block.expansion),
153 |             )
154 | 
155 |         layers = []
156 |         layers.append(block(self.inplanes, planes, stride, downsample))
157 |         self.inplanes = planes * block.expansion
158 |         for _ in range(1, blocks):
159 |             layers.append(block(self.inplanes, planes))
160 | 
161 |         return nn.Sequential(*layers)
162 | 
163 |     def forward(self, x):
164 |         x = self.conv1(x)
165 |         x = self.bn1(x)
166 |         x = self.relu(x)
167 | 
168 |         x = self.layer1(x)
169 |         x = self.layer2(x)
170 |         x = self.layer3(x)
171 | 
172 |         x = self.avgpool(x)
173 |         x = x.view(x.size(0), -1)
174 |         x = self.fc(x)
175 | 
176 |         return x
177 | 
178 | 
179 | def _resnet(
180 |     arch: str,
181 |     layers: List[int],
182 |     model_urls: Dict[str, str],
183 |     progress: bool = True,
184 |     pretrained: bool = False,
185 |     **kwargs: Any
186 | ) -> CifarResNet:
187 |     model = CifarResNet(BasicBlock, layers, **kwargs)
188 |     if pretrained:
189 |         state_dict = load_state_dict_from_url(model_urls[arch],
190 |                                               progress=progress)
191 |         model.load_state_dict(state_dict)
192 |     return model
193 | 
194 | 
195 | def cifar10_resnet20(*args, **kwargs) -> CifarResNet: pass
196 | def cifar10_resnet32(*args, **kwargs) -> CifarResNet: pass
197 | def cifar10_resnet44(*args, **kwargs) -> CifarResNet: pass
198 | def cifar10_resnet56(*args, **kwargs) -> CifarResNet: pass
199 | 
200 | 
201 | def cifar100_resnet20(*args, **kwargs) -> CifarResNet: pass
202 | def cifar100_resnet32(*args, **kwargs) -> CifarResNet: pass
203 | def cifar100_resnet44(*args, **kwargs) -> CifarResNet: pass
204 | def cifar100_resnet56(*args, **kwargs) -> CifarResNet: pass
205 | 
206 | 
207 | thismodule = sys.modules[__name__]
208 | for dataset in ["cifar10", "cifar100"]:
209 |     for layers, model_name in zip([[3]*3, [5]*3, [7]*3, [9]*3],
210 |                                   ["resnet20", "resnet32", "resnet44", "resnet56"]):
211 |         method_name = f"{dataset}_{model_name}"
212 |         model_urls = cifar10_pretrained_weight_urls if dataset == "cifar10" else cifar100_pretrained_weight_urls
213 |         num_classes = 10 if dataset == "cifar10" else 100
214 |         setattr(
215 |             thismodule,
216 |             method_name,
217 |             partial(_resnet,
218 |                     arch=model_name,
219 |                     layers=layers,
220 |                     model_urls=model_urls,
221 |                     num_classes=num_classes)
222 |         )
223 | 


--------------------------------------------------------------------------------
/src/CT_cvnet.py:
--------------------------------------------------------------------------------
  1 | from util import *
  2 | from custom_module import *
  3 | from pretrained_model import *
  4 | import numpy as np
  5 | global_config = load_model_yaml("./global_config/", "global_config.yaml")
  6 | 
  7 | def generate_layer_input_data(model: nn.Module , layer_nest_dict, directory_path,  train_data_loader):
  8 |     if(not os.path.exists(directory_path)):
  9 |             os.mkdir(directory_path)
 10 |     data_type = "_input"
 11 |     for key in layer_nest_dict:
 12 |         my_model = copy.deepcopy(model)
 13 |         layer_name = key
 14 |         print("name: " + layer_name)
 15 |         collection_layer = Input_data_collection_layer(layer_name, access_layer(my_model, layer_name))
 16 |         replace_layer(my_model, layer_name, collection_layer)
 17 |         run_set(my_model, train_data_loader, "cuda:0")
 18 |         access_layer(my_model, layer_name).save(directory_path, layer_name + data_type + ".pt")
 19 |         data = torch.load(directory_path + layer_name + data_type + ".pt")
 20 |         print(data.shape)
 21 | 
 22 | 
 23 | def generate_data_set(dirctory_path , layer_nest_dict, split_point):
 24 |     train_path = "train/"
 25 |     valid_path = "val/"
 26 |     if(not os.path.exists(dirctory_path + train_path)):
 27 |         os.mkdir(dirctory_path + train_path)
 28 |     if(not os.path.exists(dirctory_path + valid_path)):
 29 |         os.mkdir(dirctory_path + valid_path)   
 30 | 
 31 |     for key in layer_nest_dict:
 32 |         data_type = "_input"
 33 |         layer_name = key
 34 |         file_name = layer_name + data_type + ".pt"
 35 |         print(layer_name)
 36 |         data = torch.load(dirctory_path + file_name)
 37 |         b=torch.randperm(data.shape[0])
 38 |         data = data[b]
 39 |         train_data = data[0:split_point]
 40 |         valid_data = data[split_point:data.shape[0]]
 41 |         torch.save(train_data, dirctory_path + train_path + file_name)
 42 |         torch.save(valid_data, dirctory_path + valid_path + file_name)
 43 |         print(train_data.shape)
 44 |         print(valid_data.shape)
 45 | 
 46 | 
 47 | def data_collection(model, valid_data_loader, train_data_loader, split_point, input_data_save_path):
 48 |     sign_nest_dict = generate_sign_nest_dict(model)
 49 |     validate(model, valid_data_loader)
 50 |     generate_layer_input_data(model, sign_nest_dict, input_data_save_path, train_data_loader)
 51 |     generate_data_set(input_data_save_path , sign_nest_dict, split_point)
 52 | 
 53 | 
 54 | def CT_train(sign_type, degree, sign_scale, scale_path, sign_nest_dict,batch_size, input_data_dirctory, output_floder_suffix, epoch = 40):
 55 |     print(sign_type)
 56 |     for key in sign_nest_dict:
 57 |         sign_dict = sign_nest_dict[key]
 58 |         train_path = "train/"
 59 |         val_path = "val/"
 60 |         data_type = "_input"
 61 |         file_name = key + data_type + ".pt"
 62 | 
 63 |         input_data = torch.load(input_data_dirctory + "cvnet_work"+file_name)
 64 |         print(input_data.shape)
 65 |         num_features = input_data.shape[1]
 66 |         min_data = torch.min(input_data)
 67 |         max_data = torch.max(input_data)
 68 |         end_point = max(abs(min_data), abs(max_data))
 69 |         print(f"end point: {end_point}")
 70 |         del input_data
 71 |         init_coef = generate_init_coeffcients(sig_odd, degree, -end_point, end_point, scale=1)
 72 |         print(f"coef: {torch.tensor([init_coef])}")
 73 | 
 74 |         sign_module = Sigmoid_minmax_layer(coef=torch.tensor([init_coef]), degree=[(degree+1)//2],scale=sign_scale)
 75 |         print("name: ", key)  
 76 |         my_model = SiLU_minmax_bn_layer(sigmoid=sign_module, num_features=num_features)
 77 |         ref_model = nn.SiLU()
 78 | 
 79 | 
 80 | 
 81 | 
 82 |         # test_input = ((torch.rand(100) - 0.5) * 100).to("cuda:0")
 83 |         # test_output = my_model.forward(copy.deepcopy(test_input))
 84 |         # test_output_ref = ref_model.forward(copy.deepcopy(test_input))
 85 |         # print(test_input)
 86 |         # print(test_output)
 87 | 
 88 | 
 89 |         # test_input_list = test_input.to("cpu").tolist()
 90 |         # test_output_list = test_output.to("cpu").tolist()
 91 |         # test_output_list_ref = test_output_ref.to("cpu").tolist()
 92 | 
 93 |         # test_coef = generate_init_coeffcients_numpy(sig_odd, degree, -end_point, end_point, scale=1)
 94 |         # pr = np.poly1d(test_coef)
 95 | 
 96 |         # plt.plot(test_input_list, test_output_list_ref, '.', test_input_list,test_output_list,'.', markersize=10)
 97 |         # plt.plot(test_input_list,test_input_list * (pr(test_input_list) + 0.5), '.',markersize = 4)
 98 |         # plt.ylim(-2, 50)
 99 |         # plt.savefig("test")
100 |         # plt.show()
101 | 
102 | 
103 | 
104 |         
105 | 
106 | 
107 |         optimizer = torch.optim.Adam(params=my_model.parameters(), lr=0.01, weight_decay=0)
108 |         scheduler = ReduceLROnPlateau(optimizer, 'min', patience = 2, threshold= 1e-8, min_lr= 1e-4)
109 | 
110 |         print(file_name)
111 |         train_data = torch.load(input_data_dirctory + train_path + file_name)
112 |         valid_data = torch.load(input_data_dirctory + val_path + file_name)
113 |         for epoch_i in range(epoch):
114 |             train_loss_meter = AverageMeter("train loss")
115 |             val_loss_meter = AverageMeter("val loss")   
116 |             #train
117 |             for batch_i in range(int(train_data.shape[0] / batch_size)):
118 |                 x = train_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0")
119 |                 target_y = ref_model.to("cuda:0").forward(x)
120 |                 actual_y = my_model.forward(x)
121 |                 loss_fun = nn.MSELoss()
122 |                 my_model.zero_grad()
123 |                 loss = loss_fun(actual_y, target_y)
124 |                 train_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0])
125 |                 loss.backward()
126 |                 optimizer.step()
127 |             train_loss = train_loss_meter.avg
128 | 
129 |             #valid
130 |             for batch_i in range(int(valid_data.shape[0] / batch_size)):
131 |                 x = valid_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0")
132 |                 target_y = ref_model.to("cuda:0").forward(x)
133 |                 actual_y = my_model.forward(x)
134 |                 loss_fun = nn.MSELoss()
135 |                 loss = loss_fun(actual_y, target_y)
136 |                 val_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0])
137 |             val_loss = val_loss_meter.avg
138 |         
139 |             scheduler.step(val_loss)
140 | 
141 |             print(
142 |                 f"Epoch:{epoch_i + 1}"
143 |                 + f" Train Loss:{train_loss:.10f}"
144 |                 + f" Val Loss: {val_loss:.10f}"
145 |             )
146 | 
147 |         folder_name = "CT_" + sign_type + "_S" + output_floder_suffix+"_40s/"
148 |         coef_save_dirctory = input_data_dirctory + folder_name
149 |         if(not os.path.exists(coef_save_dirctory)):
150 |                 os.mkdir(coef_save_dirctory)
151 |         file_name = key + "_coef.pt"
152 |         my_model.sigmoid.save_coef(coef_save_dirctory + file_name)
153 |         print("save: " + folder_name + file_name)
154 |         print("\n")
155 | 
156 | def CT_val(model: nn.Module , layer_nest_dict, directory_path,  val_data_loader, sign_type, output_floder_suffix):
157 |     if(not os.path.exists(directory_path)):
158 |             os.mkdir(directory_path)
159 |     folder_name = "CT_" + sign_type + "_S" + output_floder_suffix+"_40s/"
160 |     for key in layer_nest_dict:
161 |         file_name = key + "_coef.pt"
162 |         coef = torch.load(directory_path + folder_name + file_name)
163 |         degree = len(coef.tolist()[0])
164 |         sign_module_CT = Sigmoid_minmax_layer(coef=coef, degree=[degree],scale=1)
165 |         rlays = SiLU_minmax_layer(sigmoid=sign_module_CT)
166 |         layer_name = key
167 |         print("name: " + layer_name)
168 |         print(access_layer(model, layer_name))
169 |         replace_layer(model, layer_name, rlays)
170 |         validate(model, val_data_loader)
171 | 
172 | 
173 | 
174 | 
175 | 
176 | if __name__ == "__main__":
177 |     parser = ArgumentParser()
178 |     parser.add_argument("--model", type=str,choices=["vgg19_bn", "resnet18", "resnet32", "mobileVitV2"])
179 |     parser.add_argument("--dataset", type=str,choices=["cifar10", "cifar100", "imagenet_1k"])
180 |     parser.add_argument("-st","--sign_type", type=str, choices=["a7", "2f12g1", "f1g2", "f2g2", "f2g3", "polyfit"])
181 |     parser.add_argument("-dc","--data_collection", type=bool, default=False, choices=[True , False])
182 |     parser.add_argument("-wd", "--working_directory", type=str, default="./working_directory/")
183 | 
184 |     args = parser.parse_args()
185 |     print(args)
186 |     if(args.dataset == "cifar10" or args.dataset == "cifar100"):
187 |         split_point = 45000
188 |         batch_size = 100
189 |     elif(args.dataset == "imagenet_1k"):
190 |         split_point = 900
191 |         batch_size = 50
192 |     model = get_pretrained_model(model_name=args.model, dataset=args.dataset)
193 |     # print(model)
194 | 
195 |     valid_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "valid", data_dir = global_config["Global"]["dataset_dirctory"])
196 |     train_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "train", data_dir = global_config["Global"]["dataset_dirctory"] )
197 | 
198 |     if(args.data_collection):
199 |         data_collection(model = model,
200 |             
201 |             split_point = split_point, input_data_save_path = args.working_directory)
202 |     
203 |     else:
204 |         nest_dict = generate_sign_nest_dict(model) 
205 |         CT_train(sign_type = args.sign_type, degree=7, sign_scale = 1, scale_path= None, sign_nest_dict = nest_dict,batch_size = batch_size,
206 |                  input_data_dirctory = args.working_directory , output_floder_suffix= "polyfit_bn_o7", epoch=0)
207 |         # CT_val(model=model, layer_nest_dict=nest_dict, directory_path=args.working_directory,
208 |         #        val_data_loader = valid_data_loader,sign_type=args.sign_type, output_floder_suffix="polyfit_o")
209 | 


--------------------------------------------------------------------------------
/src/CT_cvnet_bn.py:
--------------------------------------------------------------------------------
  1 | from util import *
  2 | from custom_module import *
  3 | from pretrained_model import *
  4 | import numpy as np
  5 | global_config = load_model_yaml("./global_config/", "global_config.yaml")
  6 | 
  7 | def generate_layer_input_data(model: nn.Module , layer_nest_dict, directory_path,  train_data_loader):
  8 |     if(not os.path.exists(directory_path)):
  9 |             os.mkdir(directory_path)
 10 |     data_type = "_input"
 11 |     for key in layer_nest_dict:
 12 |         my_model = copy.deepcopy(model)
 13 |         layer_name = key
 14 |         print("name: " + layer_name)
 15 |         collection_layer = Input_data_collection_layer(layer_name, access_layer(my_model, layer_name))
 16 |         replace_layer(my_model, layer_name, collection_layer)
 17 |         run_set(my_model, train_data_loader, "cuda:0")
 18 |         access_layer(my_model, layer_name).save(directory_path, layer_name + data_type + ".pt")
 19 |         data = torch.load(directory_path + layer_name + data_type + ".pt")
 20 |         print(data.shape)
 21 | 
 22 | 
 23 | def generate_data_set(dirctory_path , layer_nest_dict, split_point):
 24 |     train_path = "train/"
 25 |     valid_path = "val/"
 26 |     if(not os.path.exists(dirctory_path + train_path)):
 27 |         os.mkdir(dirctory_path + train_path)
 28 |     if(not os.path.exists(dirctory_path + valid_path)):
 29 |         os.mkdir(dirctory_path + valid_path)   
 30 | 
 31 |     for key in layer_nest_dict:
 32 |         data_type = "_input"
 33 |         layer_name = key
 34 |         file_name = layer_name + data_type + ".pt"
 35 |         print(layer_name)
 36 |         data = torch.load(dirctory_path + file_name)
 37 |         b=torch.randperm(data.shape[0])
 38 |         data = data[b]
 39 |         train_data = data[0:split_point]
 40 |         valid_data = data[split_point:data.shape[0]]
 41 |         torch.save(train_data, dirctory_path + train_path + file_name)
 42 |         torch.save(valid_data, dirctory_path + valid_path + file_name)
 43 |         print(train_data.shape)
 44 |         print(valid_data.shape)
 45 | 
 46 | 
 47 | def data_collection(model, valid_data_loader, train_data_loader, split_point, input_data_save_path):
 48 |     sign_nest_dict = generate_sign_nest_dict(model)
 49 |     validate(model, valid_data_loader)
 50 |     generate_layer_input_data(model, sign_nest_dict, input_data_save_path, train_data_loader)
 51 |     generate_data_set(input_data_save_path , sign_nest_dict, split_point)
 52 | 
 53 | 
 54 | def CT_train(sign_type, degree, sign_scale, scale_path, sign_nest_dict,batch_size, input_data_dirctory, output_floder_suffix, epoch = 40):
 55 |     print(sign_type)
 56 |     for key in sign_nest_dict:
 57 |         sign_dict = sign_nest_dict[key]
 58 |         train_path = "train/"
 59 |         val_path = "val/"
 60 |         data_type = "_input"
 61 |         file_name = key + data_type + ".pt"
 62 | 
 63 |         input_data = torch.load(input_data_dirctory + "cvnet_work"+file_name)
 64 |         print(input_data.shape)
 65 |         num_features = input_data.shape[1]
 66 |         min_data = torch.min(input_data)
 67 |         max_data = torch.max(input_data)
 68 |         end_point = max(abs(min_data), abs(max_data))
 69 |         print(f"end point: {end_point}")
 70 |         del input_data
 71 |         init_coef = generate_init_coeffcients(sig_odd, degree, -end_point, end_point, scale=1)
 72 |         print(f"coef: {torch.tensor([init_coef])}")
 73 | 
 74 |         sign_module = Sigmoid_minmax_layer(coef=torch.tensor([init_coef]), degree=[(degree+1)//2],scale=sign_scale)
 75 |         print("name: ", key)  
 76 |         my_model = SiLU_minmax_bn_layer(sigmoid=sign_module, num_features=num_features)
 77 |         ref_model = nn.SiLU()
 78 | 
 79 | 
 80 | 
 81 | 
 82 |         # test_input = ((torch.rand(100) - 0.5) * 100).to("cuda:0")
 83 |         # test_output = my_model.forward(copy.deepcopy(test_input))
 84 |         # test_output_ref = ref_model.forward(copy.deepcopy(test_input))
 85 |         # print(test_input)
 86 |         # print(test_output)
 87 | 
 88 | 
 89 |         # test_input_list = test_input.to("cpu").tolist()
 90 |         # test_output_list = test_output.to("cpu").tolist()
 91 |         # test_output_list_ref = test_output_ref.to("cpu").tolist()
 92 | 
 93 |         # test_coef = generate_init_coeffcients_numpy(sig_odd, degree, -end_point, end_point, scale=1)
 94 |         # pr = np.poly1d(test_coef)
 95 | 
 96 |         # plt.plot(test_input_list, test_output_list_ref, '.', test_input_list,test_output_list,'.', markersize=10)
 97 |         # plt.plot(test_input_list,test_input_list * (pr(test_input_list) + 0.5), '.',markersize = 4)
 98 |         # plt.ylim(-2, 50)
 99 |         # plt.savefig("test")
100 |         # plt.show()
101 | 
102 | 
103 | 
104 |         
105 | 
106 | 
107 |         optimizer = torch.optim.Adam(params=my_model.parameters(), lr=0.01, weight_decay=0)
108 |         scheduler = ReduceLROnPlateau(optimizer, 'min', patience = 2, threshold= 1e-8, min_lr= 1e-4)
109 | 
110 |         print(file_name)
111 |         train_data = torch.load(input_data_dirctory + train_path + file_name)
112 |         valid_data = torch.load(input_data_dirctory + val_path + file_name)
113 |         for epoch_i in range(epoch):
114 |             train_loss_meter = AverageMeter("train loss")
115 |             val_loss_meter = AverageMeter("val loss")   
116 |             #train
117 |             for batch_i in range(int(train_data.shape[0] / batch_size)):
118 |                 x = train_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0")
119 |                 target_y = ref_model.to("cuda:0").forward(x)
120 |                 actual_y = my_model.forward(x)
121 |                 loss_fun = nn.MSELoss()
122 |                 my_model.zero_grad()
123 |                 loss = loss_fun(actual_y, target_y)
124 |                 train_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0])
125 |                 loss.backward()
126 |                 optimizer.step()
127 |             train_loss = train_loss_meter.avg
128 | 
129 |             #valid
130 |             for batch_i in range(int(valid_data.shape[0] / batch_size)):
131 |                 x = valid_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0")
132 |                 target_y = ref_model.to("cuda:0").forward(x)
133 |                 actual_y = my_model.forward(x)
134 |                 loss_fun = nn.MSELoss()
135 |                 loss = loss_fun(actual_y, target_y)
136 |                 val_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0])
137 |             val_loss = val_loss_meter.avg
138 |         
139 |             scheduler.step(val_loss)
140 | 
141 |             print(
142 |                 f"Epoch:{epoch_i + 1}"
143 |                 + f" Train Loss:{train_loss:.10f}"
144 |                 + f" Val Loss: {val_loss:.10f}"
145 |             )
146 | 
147 |         folder_name = "CT_" + sign_type + "_S" + output_floder_suffix+"_40s/"
148 |         coef_save_dirctory = input_data_dirctory + folder_name
149 |         if(not os.path.exists(coef_save_dirctory)):
150 |                 os.mkdir(coef_save_dirctory)
151 |         # file_name = key + "_coef.pt"
152 |         # my_model.sigmoid.save_coef(coef_save_dirctory + file_name)
153 |         file_name = key + "SilU_bn.pt"
154 |         torch.save(my_model, coef_save_dirctory + file_name)
155 |         print("save: " + folder_name + file_name)
156 |         print("\n")
157 | 
158 | def CT_val(model: nn.Module , layer_nest_dict, directory_path,  val_data_loader, sign_type, output_floder_suffix):
159 |     if(not os.path.exists(directory_path)):
160 |             os.mkdir(directory_path)
161 |     folder_name = "CT_" + sign_type + "_S" + output_floder_suffix+"_40s/"
162 |     for key in layer_nest_dict:
163 |         file_name = key + "_coef.pt"
164 |         coef = torch.load(directory_path + folder_name + file_name)
165 |         degree = len(coef.tolist()[0])
166 |         sign_module_CT = Sigmoid_minmax_layer(coef=coef, degree=[degree],scale=1)
167 |         rlays = SiLU_minmax_layer(sigmoid=sign_module_CT)
168 |         layer_name = key
169 |         print("name: " + layer_name)
170 |         print(access_layer(model, layer_name))
171 |         replace_layer(model, layer_name, rlays)
172 |         validate(model, val_data_loader)
173 | 
174 | 
175 | 
176 | 
177 | 
178 | if __name__ == "__main__":
179 |     parser = ArgumentParser()
180 |     parser.add_argument("--model", type=str,choices=["vgg19_bn", "resnet18", "resnet32", "mobileVitV2"])
181 |     parser.add_argument("--dataset", type=str,choices=["cifar10", "cifar100", "imagenet_1k"])
182 |     parser.add_argument("-st","--sign_type", type=str, choices=["a7", "2f12g1", "f1g2", "f2g2", "f2g3", "polyfit"])
183 |     parser.add_argument("-dc","--data_collection", type=bool, default=False, choices=[True , False])
184 |     parser.add_argument("-wd", "--working_directory", type=str, default="./working_directory/")
185 | 
186 |     args = parser.parse_args()
187 |     print(args)
188 |     if(args.dataset == "cifar10" or args.dataset == "cifar100"):
189 |         split_point = 45000
190 |         batch_size = 100
191 |     elif(args.dataset == "imagenet_1k"):
192 |         split_point = 900
193 |         batch_size = 50
194 |     model = get_pretrained_model(model_name=args.model, dataset=args.dataset)
195 |     # print(model)
196 | 
197 |     valid_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "valid", data_dir = global_config["Global"]["dataset_dirctory"])
198 |     train_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "train", data_dir = global_config["Global"]["dataset_dirctory"] )
199 | 
200 |     if(args.data_collection):
201 |         data_collection(model = model,
202 |             
203 |             split_point = split_point, input_data_save_path = args.working_directory)
204 |     
205 |     else:
206 |         nest_dict = generate_sign_nest_dict(model) 
207 |         CT_train(sign_type = args.sign_type, degree=7, sign_scale = 1, scale_path= None, sign_nest_dict = nest_dict,batch_size = batch_size,
208 |                  input_data_dirctory = args.working_directory , output_floder_suffix= "polyfit_bn_o7", epoch=0)
209 |         # CT_val(model=model, layer_nest_dict=nest_dict, directory_path=args.working_directory,
210 |         #        val_data_loader = valid_data_loader,sign_type=args.sign_type, output_floder_suffix="polyfit_o")
211 | 


--------------------------------------------------------------------------------
/src/PyTorch_CIFAR10/cifar10_models/resnet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import os
  4 | 
  5 | __all__ = [
  6 |     "ResNet",
  7 |     "resnet18",
  8 |     "resnet34",
  9 |     "resnet50",
 10 | ]
 11 | 
 12 | 
 13 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
 14 |     """3x3 convolution with padding"""
 15 |     return nn.Conv2d(
 16 |         in_planes,
 17 |         out_planes,
 18 |         kernel_size=3,
 19 |         stride=stride,
 20 |         padding=dilation,
 21 |         groups=groups,
 22 |         bias=False,
 23 |         dilation=dilation,
 24 |     )
 25 | 
 26 | 
 27 | def conv1x1(in_planes, out_planes, stride=1):
 28 |     """1x1 convolution"""
 29 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 30 | 
 31 | 
 32 | class BasicBlock(nn.Module):
 33 |     expansion = 1
 34 | 
 35 |     def __init__(
 36 |         self,
 37 |         inplanes,
 38 |         planes,
 39 |         stride=1,
 40 |         downsample=None,
 41 |         groups=1,
 42 |         base_width=64,
 43 |         dilation=1,
 44 |         norm_layer=None,
 45 |     ):
 46 |         super(BasicBlock, self).__init__()
 47 |         if norm_layer is None:
 48 |             norm_layer = nn.BatchNorm2d
 49 |         if groups != 1 or base_width != 64:
 50 |             raise ValueError("BasicBlock only supports groups=1 and base_width=64")
 51 |         if dilation > 1:
 52 |             raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
 53 |         # Both self.conv1 and self.downsample layers downsample the input when stride != 1
 54 |         self.conv1 = conv3x3(inplanes, planes, stride)
 55 |         self.bn1 = norm_layer(planes)
 56 |         self.relu = nn.ReLU(inplace=True)
 57 |         self.conv2 = conv3x3(planes, planes)
 58 |         self.bn2 = norm_layer(planes)
 59 |         self.downsample = downsample
 60 |         self.stride = stride
 61 | 
 62 |     def forward(self, x):
 63 |         identity = x
 64 | 
 65 |         out = self.conv1(x)
 66 |         out = self.bn1(out)
 67 |         out = self.relu(out)
 68 | 
 69 |         out = self.conv2(out)
 70 |         out = self.bn2(out)
 71 | 
 72 |         if self.downsample is not None:
 73 |             identity = self.downsample(x)
 74 | 
 75 |         out += identity
 76 |         out = self.relu(out)
 77 | 
 78 |         return out
 79 | 
 80 | 
 81 | class Bottleneck(nn.Module):
 82 |     expansion = 4
 83 | 
 84 |     def __init__(
 85 |         self,
 86 |         inplanes,
 87 |         planes,
 88 |         stride=1,
 89 |         downsample=None,
 90 |         groups=1,
 91 |         base_width=64,
 92 |         dilation=1,
 93 |         norm_layer=None,
 94 |     ):
 95 |         super(Bottleneck, self).__init__()
 96 |         if norm_layer is None:
 97 |             norm_layer = nn.BatchNorm2d
 98 |         width = int(planes * (base_width / 64.0)) * groups
 99 |         # Both self.conv2 and self.downsample layers downsample the input when stride != 1
100 |         self.conv1 = conv1x1(inplanes, width)
101 |         self.bn1 = norm_layer(width)
102 |         self.conv2 = conv3x3(width, width, stride, groups, dilation)
103 |         self.bn2 = norm_layer(width)
104 |         self.conv3 = conv1x1(width, planes * self.expansion)
105 |         self.bn3 = norm_layer(planes * self.expansion)
106 |         self.relu = nn.ReLU(inplace=True)
107 |         self.downsample = downsample
108 |         self.stride = stride
109 | 
110 |     def forward(self, x):
111 |         identity = x
112 | 
113 |         out = self.conv1(x)
114 |         out = self.bn1(out)
115 |         out = self.relu(out)
116 | 
117 |         out = self.conv2(out)
118 |         out = self.bn2(out)
119 |         out = self.relu(out)
120 | 
121 |         out = self.conv3(out)
122 |         out = self.bn3(out)
123 | 
124 |         if self.downsample is not None:
125 |             identity = self.downsample(x)
126 | 
127 |         out += identity
128 |         out = self.relu(out)
129 | 
130 |         return out
131 | 
132 | 
133 | class ResNet(nn.Module):
134 |     def __init__(
135 |         self,
136 |         block,
137 |         layers,
138 |         num_classes=10,
139 |         zero_init_residual=False,
140 |         groups=1,
141 |         width_per_group=64,
142 |         replace_stride_with_dilation=None,
143 |         norm_layer=None,
144 |     ):
145 |         super(ResNet, self).__init__()
146 |         if norm_layer is None:
147 |             norm_layer = nn.BatchNorm2d
148 |         self._norm_layer = norm_layer
149 | 
150 |         self.inplanes = 64
151 |         self.dilation = 1
152 |         if replace_stride_with_dilation is None:
153 |             # each element in the tuple indicates if we should replace
154 |             # the 2x2 stride with a dilated convolution instead
155 |             replace_stride_with_dilation = [False, False, False]
156 |         if len(replace_stride_with_dilation) != 3:
157 |             raise ValueError(
158 |                 "replace_stride_with_dilation should be None "
159 |                 "or a 3-element tuple, got {}".format(replace_stride_with_dilation)
160 |             )
161 |         self.groups = groups
162 |         self.base_width = width_per_group
163 | 
164 |         # CIFAR10: kernel_size 7 -> 3, stride 2 -> 1, padding 3->1
165 |         self.conv1 = nn.Conv2d(
166 |             3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False
167 |         )
168 |         # END
169 | 
170 |         self.bn1 = norm_layer(self.inplanes)
171 |         self.relu = nn.ReLU(inplace=True)
172 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
173 |         self.layer1 = self._make_layer(block, 64, layers[0])
174 |         self.layer2 = self._make_layer(
175 |             block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0]
176 |         )
177 |         self.layer3 = self._make_layer(
178 |             block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1]
179 |         )
180 |         self.layer4 = self._make_layer(
181 |             block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2]
182 |         )
183 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
184 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
185 | 
186 |         for m in self.modules():
187 |             if isinstance(m, nn.Conv2d):
188 |                 nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
189 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
190 |                 nn.init.constant_(m.weight, 1)
191 |                 nn.init.constant_(m.bias, 0)
192 | 
193 |         # Zero-initialize the last BN in each residual branch,
194 |         # so that the residual branch starts with zeros, and each residual block behaves like an identity.
195 |         # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
196 |         if zero_init_residual:
197 |             for m in self.modules():
198 |                 if isinstance(m, Bottleneck):
199 |                     nn.init.constant_(m.bn3.weight, 0)
200 |                 elif isinstance(m, BasicBlock):
201 |                     nn.init.constant_(m.bn2.weight, 0)
202 | 
203 |     def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
204 |         norm_layer = self._norm_layer
205 |         downsample = None
206 |         previous_dilation = self.dilation
207 |         if dilate:
208 |             self.dilation *= stride
209 |             stride = 1
210 |         if stride != 1 or self.inplanes != planes * block.expansion:
211 |             downsample = nn.Sequential(
212 |                 conv1x1(self.inplanes, planes * block.expansion, stride),
213 |                 norm_layer(planes * block.expansion),
214 |             )
215 | 
216 |         layers = []
217 |         layers.append(
218 |             block(
219 |                 self.inplanes,
220 |                 planes,
221 |                 stride,
222 |                 downsample,
223 |                 self.groups,
224 |                 self.base_width,
225 |                 previous_dilation,
226 |                 norm_layer,
227 |             )
228 |         )
229 |         self.inplanes = planes * block.expansion
230 |         for _ in range(1, blocks):
231 |             layers.append(
232 |                 block(
233 |                     self.inplanes,
234 |                     planes,
235 |                     groups=self.groups,
236 |                     base_width=self.base_width,
237 |                     dilation=self.dilation,
238 |                     norm_layer=norm_layer,
239 |                 )
240 |             )
241 | 
242 |         return nn.Sequential(*layers)
243 | 
244 |     def forward(self, x):
245 |         x = self.conv1(x)
246 |         x = self.bn1(x)
247 |         x = self.relu(x)
248 |         x = self.maxpool(x)
249 | 
250 |         x = self.layer1(x)
251 |         x = self.layer2(x)
252 |         x = self.layer3(x)
253 |         x = self.layer4(x)
254 | 
255 |         x = self.avgpool(x)
256 |         x = x.reshape(x.size(0), -1)
257 |         x = self.fc(x)
258 | 
259 |         return x
260 | 
261 | 
262 | def _resnet(arch, block, layers, pretrained, progress, device, **kwargs):
263 |     model = ResNet(block, layers, **kwargs)
264 |     if pretrained:
265 |         script_dir = os.path.dirname(__file__)
266 |         state_dict = torch.load(
267 |             script_dir + "/state_dicts/" + arch + ".pt", map_location=device
268 |         )
269 |         model.load_state_dict(state_dict)
270 |     return model
271 | 
272 | 
273 | def resnet18(pretrained=False, progress=True, device="cpu", **kwargs):
274 |     """Constructs a ResNet-18 model.
275 |     Args:
276 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
277 |         progress (bool): If True, displays a progress bar of the download to stderr
278 |     """
279 |     return _resnet(
280 |         "resnet18", BasicBlock, [2, 2, 2, 2], pretrained, progress, device, **kwargs
281 |     )
282 | 
283 | 
284 | def resnet34(pretrained=False, progress=True, device="cpu", **kwargs):
285 |     """Constructs a ResNet-34 model.
286 |     Args:
287 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
288 |         progress (bool): If True, displays a progress bar of the download to stderr
289 |     """
290 |     return _resnet(
291 |         "resnet34", BasicBlock, [3, 4, 6, 3], pretrained, progress, device, **kwargs
292 |     )
293 | 
294 | 
295 | def resnet50(pretrained=False, progress=True, device="cpu", **kwargs):
296 |     """Constructs a ResNet-50 model.
297 |     Args:
298 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
299 |         progress (bool): If True, displays a progress bar of the download to stderr
300 |     """
301 |     return _resnet(
302 |         "resnet50", Bottleneck, [3, 4, 6, 3], pretrained, progress, device, **kwargs
303 |     )
304 | 


--------------------------------------------------------------------------------
/src/CT.py:
--------------------------------------------------------------------------------
  1 | from util import *
  2 | from custom_module import *
  3 | from pretrained_model import *
  4 | global_config = load_model_yaml("./global_config/", "global_config.yaml")
  5 | 
  6 | def generate_layer_input_data(model: nn.Module , layer_nest_dict, directory_path,  train_data_loader):
  7 |     if(not os.path.exists(directory_path)):
  8 |             os.mkdir(directory_path)
  9 |     data_type = "_input"
 10 |     for key in layer_nest_dict:
 11 |         my_model = copy.deepcopy(model)
 12 |         layer_name = key
 13 |         print("name: " + layer_name)
 14 |         collection_layer = Input_data_collection_layer(layer_name, access_layer(my_model, layer_name))
 15 |         replace_layer(my_model, layer_name, collection_layer)
 16 |         run_set(my_model, train_data_loader, "cuda:0")
 17 |         access_layer(my_model, layer_name).save(directory_path, layer_name + data_type + ".pt")
 18 |         data = torch.load(directory_path + layer_name + data_type + ".pt")
 19 |         print(data.shape)
 20 | 
 21 | 
 22 | def generate_data_set(dirctory_path , layer_nest_dict, split_point):
 23 |     train_path = "train/"
 24 |     valid_path = "val/"
 25 |     if(not os.path.exists(dirctory_path + train_path)):
 26 |         os.mkdir(dirctory_path + train_path)
 27 |     if(not os.path.exists(dirctory_path + valid_path)):
 28 |         os.mkdir(dirctory_path + valid_path)   
 29 | 
 30 |     for key in layer_nest_dict:
 31 |         data_type = "_input"
 32 |         layer_name = key
 33 |         file_name = layer_name + data_type + ".pt"
 34 |         print(layer_name)
 35 |         data = torch.load(dirctory_path + file_name)
 36 |         data = data.reshape((-1, ) + data.shape[2:])
 37 |         b=torch.randperm(data.shape[0])
 38 |         data = data[b]
 39 |         train_data = data[0:split_point]
 40 |         valid_data = data[split_point:data.shape[0]]
 41 |         torch.save(train_data, dirctory_path + train_path + file_name)
 42 |         torch.save(valid_data, dirctory_path + valid_path + file_name)
 43 |         print(train_data.shape)
 44 |         print(valid_data.shape)
 45 | 
 46 | 
 47 | def data_collection(model, valid_data_loader, train_data_loader, split_point, input_data_save_path):
 48 |     sign_nest_dict = generate_sign_nest_dict(model)
 49 |     validate(model, valid_data_loader)
 50 |     generate_layer_input_data(model, sign_nest_dict, input_data_save_path, train_data_loader)
 51 |     generate_data_set(input_data_save_path , sign_nest_dict, split_point)
 52 | 
 53 | 
 54 | def CT_train(sign_type, sign_scale, scale_path, sign_nest_dict,batch_size, input_data_dirctory, output_floder_suffix):
 55 |     sign_param_dict = Sign_parameter_generator().param_nest_dict[sign_type]
 56 |     print(sign_type)
 57 |     for key in sign_nest_dict:
 58 |         sign_dict = sign_nest_dict[key]
 59 |         scale_name = key + "_scale.pt"
 60 |         if(scale_path != None):
 61 |             sign_scale = torch.load(scale_path + scale_name).item()
 62 |             print("scale: " + str(sign_scale))
 63 |         sign_module = Sign_minmax_layer(coef=sign_param_dict["coef"], degree=sign_param_dict["degree"],scale=sign_scale)
 64 |         print("name: ", key)
 65 |         if(sign_dict["type"] == "ReLU"):
 66 |             my_model = ReLU_sign_layer(sign = sign_module)
 67 |             ref_model = nn.ReLU()
 68 |         elif(sign_dict["type"] == "MaxPool2d"):
 69 |             my_model = Maxpool_sign_layer(sign = sign_module, kernel_size=sign_dict["kernel_size"], stride= sign_dict["stride"], padding=sign_dict["padding"], dilation=sign_dict["dilation"])
 70 |             ref_model = nn.MaxPool2d(kernel_size=sign_dict["kernel_size"], stride= sign_dict["stride"], padding=sign_dict["padding"], dilation=sign_dict["dilation"])
 71 |         else:
 72 |             raise Exception("not implemented layer type.")
 73 |         optimizer = torch.optim.Adam(params=my_model.parameters(), lr=0.01, weight_decay=0)
 74 |         scheduler = ReduceLROnPlateau(optimizer, 'min', patience = 2, threshold= 1e-8, min_lr= 1e-4)
 75 |         train_path = "train/"
 76 |         val_path = "val/"
 77 |         data_type = "_input"
 78 |         file_name = key + data_type + ".pt"
 79 |         print(file_name)
 80 |         train_data = torch.load(input_data_dirctory + train_path + file_name)
 81 |         valid_data = torch.load(input_data_dirctory + val_path + file_name)
 82 |         for epoch_i in range(40):
 83 |             train_loss_meter = AverageMeter("train loss")
 84 |             val_loss_meter = AverageMeter("val loss")   
 85 |             #train
 86 |             for batch_i in range(int(train_data.shape[0] / batch_size)):
 87 |                 x = train_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0")
 88 |                 target_y = ref_model.to("cuda:0").forward(x)
 89 |                 actual_y = my_model.forward(x)
 90 |                 loss_fun = nn.MSELoss()
 91 |                 my_model.zero_grad()
 92 |                 loss = loss_fun(actual_y, target_y)
 93 |                 train_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0])
 94 |                 loss.backward()
 95 |                 optimizer.step()
 96 |             train_loss = train_loss_meter.avg
 97 | 
 98 |             #valid
 99 |             for batch_i in range(int(valid_data.shape[0] / batch_size)):
100 |                 x = valid_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0")
101 |                 target_y = ref_model.to("cuda:0").forward(x)
102 |                 actual_y = my_model.forward(x)
103 |                 loss_fun = nn.MSELoss()
104 |                 loss = loss_fun(actual_y, target_y)
105 |                 val_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0])
106 |             val_loss = val_loss_meter.avg
107 |         
108 |             scheduler.step(val_loss)
109 | 
110 |             print(
111 |                 f"Epoch:{epoch_i + 1}"
112 |                 + f" Train Loss:{train_loss:.10f}"
113 |                 + f" Val Loss: {val_loss:.10f}"
114 |             )
115 | 
116 |         folder_name = "CT_" + sign_type + "_S" + output_floder_suffix+"_40s/"
117 |         coef_save_dirctory = input_data_dirctory + folder_name
118 |         if(not os.path.exists(coef_save_dirctory)):
119 |                 os.mkdir(coef_save_dirctory)
120 |         file_name = key + "_coef.pt"
121 |         my_model.sign.save_coef(coef_save_dirctory + file_name)
122 |         print("save: " + folder_name + file_name)
123 |         print("\n")
124 | 
125 | def CT_train_SiLU(sign_type, sign_scale, scale_path, sign_nest_dict,batch_size, input_data_dirctory, output_floder_suffix):
126 |     sign_param_dict = Sign_parameter_generator().param_nest_dict[sign_type]
127 |     sigmoid = Sigmoid_minmax_layer(coef=sign_param_dict["coef"], degree=sign_param_dict["degree"],scale=sign_scale)
128 |     my_model = SiLU_minmax_layer(sigmoid=sigmoid)
129 |     ref_model = nn.SiLU()
130 | 
131 |     optimizer = torch.optim.Adam(params=my_model.parameters(), lr=0.01, weight_decay=0)
132 |     scheduler = ReduceLROnPlateau(optimizer, 'min', patience = 2, threshold= 1e-8, min_lr= 1e-4)
133 |     train_path = "train/"
134 |     val_path = "val/"
135 |     data_type = "_input"
136 |     print(file_name)
137 |     train_data = torch.rand(90000,4)
138 |     valid_data = torch.rand(10000,4)
139 |     for epoch_i in range(40):
140 |         train_loss_meter = AverageMeter("train loss")
141 |         val_loss_meter = AverageMeter("val loss")   
142 |         #train
143 |         for batch_i in range(int(train_data.shape[0] / batch_size)):
144 |             x = train_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0")
145 |             target_y = ref_model.to("cuda:0").forward(x)
146 |             actual_y = my_model.forward(x)
147 |             loss_fun = nn.MSELoss()
148 |             my_model.zero_grad()
149 |             loss = loss_fun(actual_y, target_y)
150 |             train_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0])
151 |             loss.backward()
152 |             optimizer.step()
153 |         train_loss = train_loss_meter.avg
154 | 
155 |         #valid
156 |         for batch_i in range(int(valid_data.shape[0] / batch_size)):
157 |             x = valid_data[batch_i * batch_size : (batch_i + 1) * batch_size].to("cuda:0")
158 |             target_y = ref_model.to("cuda:0").forward(x)
159 |             actual_y = my_model.forward(x)
160 |             loss_fun = nn.MSELoss()
161 |             loss = loss_fun(actual_y, target_y)
162 |             val_loss_meter.update(val=float(loss.cpu().item()), n=x.shape[0])
163 |         val_loss = val_loss_meter.avg
164 |     
165 |         scheduler.step(val_loss)
166 | 
167 |         print(
168 |             f"Epoch:{epoch_i + 1}"
169 |             + f" Train Loss:{train_loss:.10f}"
170 |             + f" Val Loss: {val_loss:.10f}"
171 |         )
172 | 
173 |     folder_name = "CT_" + sign_type + "_S" + output_floder_suffix+"_40s/"
174 |     coef_save_dirctory = input_data_dirctory + folder_name
175 |     if(not os.path.exists(coef_save_dirctory)):
176 |             os.mkdir(coef_save_dirctory)
177 |     file_name = "SiLU_test" + "_coef.pt"
178 |     my_model.sign.save_coef(coef_save_dirctory + file_name)
179 |     print("save: " + folder_name + file_name)
180 |     print("\n")
181 | 
182 | 
183 | if __name__ == "__main__":
184 |     parser = ArgumentParser()
185 |     parser.add_argument("--model", type=str,choices=["vgg19_bn", "resnet18", "resnet32"])
186 |     parser.add_argument("--dataset", type=str,choices=["cifar10", "cifar100", "imagenet_1k"])
187 |     parser.add_argument("-st","--sign_type", type=str, choices=["a7", "2f12g1", "f1g2", "f2g2", "f2g3", "f1", "f2"])
188 |     parser.add_argument("-dc","--data_collection", type=bool, default=False, choices=[True , False])
189 |     parser.add_argument("-wd", "--working_directory", type=str, default="./working_directory/")
190 |     parser.add_argument("-silu", "--silu_test", type=bool, default=False, choices=[True , False])
191 | 
192 |     args = parser.parse_args()
193 |     print(args)
194 |     if(args.dataset == "cifar10" or args.dataset == "cifar100"):
195 |         split_point = 45000
196 |         batch_size = 100
197 |     elif(args.dataset == "imagenet_1k"):
198 |         split_point = 900
199 |         batch_size = 40
200 |     model = get_pretrained_model(model_name=args.model, dataset=args.dataset)
201 |     if(args.data_collection):
202 |         data_collection(model = model,
203 |             valid_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "valid", data_dir = global_config["Global"]["dataset_dirctory"] ),
204 |             train_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "train", data_dir = global_config["Global"]["dataset_dirctory"] ),
205 |             split_point = split_point, input_data_save_path = args.working_directory)
206 |     elif(args.silu_test):
207 |          CT_train_SiLU(sign_type = args.sign_type, sign_scale = 0, scale_path= None, sign_nest_dict = None,batch_size = 100,
208 |                  input_data_dirctory = args.working_directory , output_floder_suffix= "fix")
209 |     else:
210 |         nest_dict = generate_sign_nest_dict(model) 
211 |         CT_train(sign_type = args.sign_type, sign_scale = 0, scale_path= None, sign_nest_dict = nest_dict,batch_size = batch_size,
212 |                  input_data_dirctory = args.working_directory , output_floder_suffix= "dynamic")
213 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # SmartPAF: Accurate Low-Degree Polynomial Approximation of Non-polynomial Operators for Fast Private Inference in Homomorphic Encryption
  2 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](./LICENSE)
  3 | 
  4 | ## What is SmartPAF?
  5 | SmartPAF is an open-source training framework to replace non-polynomial operators of ML models, such as ReLU and MaxPooling, with low-degree Polynomial Approximation Function (PAF) and recover accuracy through proposed fine-tuning tricks.  SmartPAF is actively developed by the [Synergy Lab](https://synergy.ece.gatech.edu/) at [Georgia Institute of Technology](https://www.gatech.edu/). For more details about SmartPAF, please visit our [paper](https://arxiv.org/abs/2404.03216).
  6 | 
  7 | 
  8 | ## Motivation
  9 | Secure Fully Homomorphic Encryption (FHE) based Machine Learning Inference Converts Non-polynomial Operators (ReLU/MaxPooling) into Polynomial Approximation Functions (PAF)
 10 | ![](image/secure_ML_inference.png)
 11 | 
 12 | Existing PAFs suffer from either prohibitive latency overhead or low accuracy. SmartPAF proposes four training techniques to enable exploration on the entire PAF degree space and spot high-accuracy low-latency PAF.
 13 | ![](image/RelatedWork.png)
 14 | 
 15 | ** This repo open-sourced the SmartPAF framework code with prerun results**.
 16 | 
 17 | ## Results
 18 | SmartPAF spots optimal 14-degree PAF with 69.4% accuracy (the same accuracy as plaintext pretrained ResNet-18 under ImageNet-1k dataset) and saves 72% latency of 27-degree Minimax PAF.
 19 | 
 20 | | Model-Dataset                             | Technique Setup                                                      | $f_1^2 \circ g_1^2$             | \alpha=7                        | $f_2\circ g_3$                  | $f_2\circ g_2$                  | $f_1\circ g_2$                  |
 21 | |-------------------------------------------|----------------------------------------------------------------------|---------------------------------|---------------------------------|---------------------------------|---------------------------------|---------------------------------|
 22 | |           Replace ReLU  |
 23 | |                                           | baseline + CT + DS w/o fine tune                                     | 68.60\%                         | 67.70\%                         | 67.00\%                         | 66.50\%                         | 61.70\%                         |
 24 | |                                           | baseline + DS                                                        | 64.30\%                         | 66.70\%                         | 64.20\%                         | 58.30\%                         | 53.10\%                         |
 25 | |                                           | baseline + AT + DS                                                   | 65.20\%                         | 68.30\%                         | 63.70\%                         | 60.50\%                         | 52.00\%                         |
 26 | |                                           | 63.40\%                                                              | 68.10\%                         | 63.30\%                         | 57.60\%                         | 49.50\%                         |
 27 | |        ResNet-18 (ImageNet-1k)              | baseline + PA + DS                                                   | 65.60\%                         | {68.40\%}                | 64.60\%                         | 60.20\%                         | 52.60\%                         |
 28 | |                 69.4%                     | baseline + PA + AT + DS                                              | 64.90\%                         | 67.40\%                         | 64.60\%                         | 56.50\%                         | 47.10\%                         |
 29 | |                                           | baseline + CT + PA + DS                                              | 68.20\%                         | 67.00\%                         | {67.60\%}                | 65.90\%                         | 60.80\%                         |
 30 | |                                           | baseline + CT + PA + AT + DS                                         | {69.00\%}                | 68.10\%                         | 61.40\%                         | {66.50\%}                | {63.10\%}                |
 31 | |                                           | {Accuracy Improvement over Baseline}                     | 1.35$\times$                    | 1.06$\times$                    | 1.37$\times$                    | 2.08$\times$                    | 3.39$\times$                    |
 32 | |                                           | {Accuracy Improvement over ``baseline + DS"}             | +4.7\%(1.07$\times$)            | +1.7\%(1.03$\times$)            | +3.4\%(1.05$\times$)            | +8.2\%(1.14$\times$)            | +10\%(1.19$\times$)             |
 33 | |                                           | {Accuracy Improvement over baseline}                     | 1.07$\times$                    | 1.03$\times$                    | 1.05$\times$                    | 1.14$\times$                    | 1.19$\times$                    |
 34 | |-------------------------------------------|----------------------------------------------------------------------|---------------------------------|---------------------------------|---------------------------------|---------------------------------|---------------------------------|
 35 | | Replace all Non Polynomial Operators      |
 36 | |                                           | baseline + CT + DS w/o fine tune                                     | 64.4\%                          | 59.4\%                          | 40.9\%                          | 33.1\%                          | 13.3\%                          |
 37 | |                                           | baseline + DS                                                        | 59.6\%                          | 66.2\%                          | 62\%                            | 49\%                            | 37\%                            |
 38 | |        ResNet-18 (ImageNet-1k)            | baseline + SS ({prior work~\cite{Minimax_approximation}})     | 25.5\%                          | 47.1\%                          | 23\%                            | 4.2\%                           | 0\%                             |
 39 | |                 69.4%                          | baseline + CT + PA + AT + DS                                         | {69.9\%}                 | {68\%}                   | {65.7\%}                 | {64.1\%}                 | {57.8}\%                 |
 40 | |                                           | \smartfhe: baseline + CT + PA + AT + SS                              |69.4\%  |67\%    |65.3\%  |57.3\%  |6.5\%   |
 41 | |                                           | {Accuracy Improvement over Baseline}                     | 1.07$\times$                    | 1.22$\times$                    | 1.27$\times$                    | 1.79$\times$                    | 0.22$\times$                    |
 42 | |                                           | {Accuracy Improvement over~\cite{Minimax_approximation}} | +43.9\%(2.72$\times$)           | +19.9\%(1.42$\times$)           | +42.3\%(2.84$\times$)           | +53.1\%(13.64$\times$)          | +6.5\%(\infty)                  |
 43 | |-------------------------------------------|----------------------------------------------------------------------|---------------------------------|---------------------------------|---------------------------------|---------------------------------|---------------------------------|
 44 | | Replace all Non Polynomial Operators      |
 45 | |                                           | baseline + SS ({prior work~\cite{Minimax_approximation}})     | 91.06\%                         | 81.35\%                         | 76.58\%                         | 58.11\%                         | 43.84\%                         |
 46 | |                                           | baseline + CT + DS                                                   | 93.39\%                         | 93.6\%                          | 93.3\%                          | {92.4\%}                 | {91.53\%}                |
 47 | |         VGG-19 (CiFa-10)                  | baseline + CT + PA + AT + DS                                         | {93.6\%}                 | {93.81\%}                | {93.59\%}                | 91.49\%                         | 91.51\%                         |
 48 | |                93.95                      | \smartfhe: baseline + CT + PA + AT + SS                              |92.16\% |92.62\% |91.51\% |88.45\% |76.93\% |
 49 | |                                           | {Accuracy Improvement over Baseline}                     | 1.07$\times$                    | 1.22$\times$                    | 1.27$\times$                    | 1.79$\times$                    | 0.22$\times$                    |
 50 | |                                           | {Accuracy Improvement over~\cite{Minimax_approximation}} | +1.1\%(1.01$\times$)            | +11.27\%(1.14$\times$)          | +14.93\%(1.2$\times$)           | +30.34\%(1.52$\times$)          | +33.09\%(1.75$\times$)          |
 51 | |-------------------------------------------|----------------------------------------------------------------------|---------------------------------|---------------------------------|---------------------------------|---------------------------------|---------------------------------|
 52 | 
 53 | # Ready to run?
 54 | ```
 55 | #Activate Conda
 56 | # Create a python3.8 enviroment
 57 | conda create --name SmartPAF  python=3.8
 58 | 
 59 | # Activate the enviroment
 60 | conda activate SmartPAF
 61 | 
 62 | # Install package
 63 | conda install pytorch torchvision torchaudio pytorch-cuda=11.7 -c pytorch -c nvidia
 64 | conda install -c conda-forge pytorch-lightning
 65 | 
 66 | # Download cifar10 pretrained models
 67 | cd PyTorch_CIFAR10/
 68 | sh download_weights.sh 
 69 | cd ..
 70 | 
 71 | # Open /global_config/global_config.yaml
 72 | #Edit "dataset_dirctory:" to set a folder to store dataset.
 73 | 
 74 | # Download dataset
 75 | python3 util.py -dd True --dataset cifar10
 76 | python3 util.py -dd True --dataset cifar100
 77 | python3 util.py -dd True --dataset imagenet_1k
 78 | ```
 79 | 
 80 | 
 81 | ## Control Parameters for the library
 82 | ```
 83 | typical step
 84 | For one model with a dataset, one -wd (working directory) should be used
 85 | --model: 		resnet18, vgg19_bn, resnet32
 86 | --dataset: 	cifar10, imagenet, cifar100
 87 | -st: 			a7, 2f12g1, f2f2, f2g3, f1g2
 88 | Supported combination: vgg19_bn & imagenet, vgg19_bn & cifar10, resnet18 & imagenet, and resnet32 & cifar100
 89 | -st is the supported PAF type
 90 | -dc stands for "data collection": 
 91 | ```
 92 | 
 93 | ## ResNet-18 on ImageNet_1k
 94 | ```
 95 | # The following steps must be run in serial, as following steps need results from previous steps.
 96 | # Collection CT data
 97 | python3 ./CT.py --model resnet18 --dataset imagenet_1k -wd ../resnet18_imagenet1k/ -dc True
 98 | # CT
 99 | python3 ./CT.py --model resnet18 --dataset imagenet_1k -wd ../resnet18_imagenet1k/ -st 2f12g1
100 | # PA and AT
101 | python3 ./PA_AT.py --model resnet18 --dataset imagenet_1k -wd ../resnet18_imagenet1k/ -st 2f12g1
102 | # Statistic Scale.
103 | python3 ./SS.py --model resnet18 --dataset imagenet_1k -wd ../resnet18_imagenet1k/ -st 2f12g1
104 | ```
105 | ## ResNet-32 on CiFar-100
106 | ```
107 | # The following steps must be run in serial, as following steps need results from previous steps.
108 | # Collection CT data
109 | python3 ./CT.py --model resnet32 --dataset cifar100 -wd ../resnet32_cifar100/ -dc True
110 | # CT
111 | python3 ./CT.py --model resnet32 --dataset cifar100 -wd ../resnet32_cifar100/ -st 2f12g1
112 | # PA and AT
113 | python3 ./PA_AT.py --model resnet32 --dataset cifar100 -wd ../resnet32_cifar100/ -st 2f12g1
114 | # Statistic Scale.
115 | python3 ./SS.py --model resnet32 --dataset cifar100 -wd ../resnet32_cifar100/ -st 2f12g1
116 | ```
117 | 
118 | ## VGG-19 on CiFar-10
119 | ```
120 | # The following steps must be run in serial, as following steps need results from previous steps.
121 | # Collection CT data
122 | python3 ./CT.py --model vgg19_bn --dataset cifar10 -wd ../vgg19_bn_cifar10/ -dc True
123 | # CT
124 | python3 ./CT.py --model vgg19_bn --dataset cifar10 -wd ../vgg19_bn_cifar10/ -st 2f12g1
125 | # PA and AT
126 | python3 ./PA_AT.py --model vgg19_bn --dataset cifar10 -wd ../vgg19_bn_cifar10/ -st 2f12g1
127 | # Statistic Scale.
128 | python3 ./SS.py --model vgg19_bn --dataset cifar10 -wd ../vgg19_bn_cifar10/ -st 2f12g1
129 | ```
130 | 
131 | ## VGG-19 on ImageNet_1k
132 | ```
133 | # The following steps must be run in serial, as following steps need results from previous steps.
134 | # Collection CT data
135 | python3 ./CT.py --model vgg19_bn --dataset imagenet_1k -wd ../vgg19_bn_imagenet1k/ -dc True
136 | # CT
137 | python3 ./CT.py --model vgg19_bn --dataset imagenet_1k -wd ../vgg19_bn_imagenet1k/ -st 2f12g1
138 | # PA and AT
139 | python3 ./PA_AT.py --model vgg19_bn --dataset imagenet_1k -wd ../vgg19_bn_imagenet1k/ -st 2f12g1
140 | # Statistic Scale.
141 | python3 ./SS.py --model vgg19_bn --dataset imagenet_1k -wd ../vgg19_imagenet1k/ -st 2f12g1
142 | ```
143 | 
144 | # Developers
145 | Jingtian Dang (Georgia Tech, dangjingtian@gatech.edu)
146 | 
147 | Jianming Tong (Georgia Tech, jianming.tong@gatech.edu)
148 | 
149 | Tushar Krishna (Georgia Tech)
150 | 
151 | # Citations
152 | ```
153 | @inproceedings{tong2024accurate,
154 |  author={Jianming Tong and Jingtian Dang and Anupam Golder and Callie Hao and Arijit Raychowdhury and Tushar Krishna},
155 |  booktitle = {Proceedings of Machine Learning and Systems (MLSys)},
156 |  title={Accurate Low-Degree Polynomial Approximation of Non-polynomial Operators for Fast Private Inference in Homomorphic Encryption}, 
157 |  url = {https://arxiv.org/abs/2404.03216},
158 |  year = {2024}
159 | }
160 | ```


--------------------------------------------------------------------------------
/src/custom_module.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import math
  3 | from typing import Tuple, Union
  4 | import torch 
  5 | import torch.nn as nn
  6 | import copy
  7 | from math import pi, sqrt
  8 | 
  9 | 
 10 | 
 11 | # TODO: PA+CT instead of CT -> PA.
 12 | class Input_data_collection_layer(nn.Module):
 13 |     def __init__(self, name, layer : nn.Module):
 14 |         super().__init__()
 15 |         self.data_store = torch.tensor([])
 16 |         self.name = name
 17 |         self.layer = layer
 18 | 
 19 |         
 20 |     def forward(self, x):
 21 |         x_backup = x[None, :].to("cpu")
 22 |         self.data_store = torch.cat((self.data_store, x_backup), 0)
 23 | 
 24 |         res =self.layer.forward(x)
 25 |         return res
 26 | 
 27 | 
 28 |     def save(self, directory , file_name):
 29 |         if(not os.path.exists(directory)):
 30 |             os.mkdir(directory)
 31 |         torch.save(self.data_store, directory + file_name)
 32 | 
 33 | 
 34 | 
 35 | class Input_scale_collection_layer(nn.Module):
 36 |     def __init__(self, name, layer : nn.Module):
 37 |         super().__init__()
 38 |         self.have_first = False
 39 |         self.data_store = torch.tensor(0)
 40 |         self.name = name
 41 |         self.layer = layer
 42 | 
 43 |         
 44 |     def forward(self, x):
 45 |         s_max = torch.max(x).item()
 46 |         s_min = torch.min(x).item()
 47 |         scale = max(abs(s_max), abs(s_min))
 48 |         if(scale > self.data_store.item()):
 49 |             self.data_store = torch.tensor(scale)
 50 |         res =self.layer.forward(x)
 51 |         return res
 52 | 
 53 | 
 54 |     def save(self, directory , file_name):
 55 |         if(not os.path.exists(directory)):
 56 |             os.mkdir(directory)
 57 |         torch.save(self.data_store, directory + file_name)
 58 | 
 59 | 
 60 |         
 61 | class Sign_minmax_layer(nn.Module):
 62 |     def __init__(self, coef, degree, scale = 0, scale_ratio = 1, train_coef = True, param_scale = False):
 63 |         super().__init__()
 64 |         self.scale_ratio = scale_ratio
 65 |         self.degree = degree
 66 |         self.coeflist = nn.Parameter(coef.to("cuda:0"), requires_grad=train_coef).to("cuda:0")
 67 |         self.param_scale = param_scale
 68 |         if(self.param_scale):
 69 |             self.scale =  nn.Parameter(torch.tensor(scale).to("cuda:0"), requires_grad=False).to("cuda:0")
 70 |         else:
 71 |             self.scale = scale
 72 |             
 73 | 
 74 |     def forward(self, x):
 75 | 
 76 |         if(self.scale == 0):
 77 |             s_max = torch.max(x).item()
 78 |             s_min = torch.min(x).item()
 79 |             scale = max(abs(s_max), abs(s_min)) * self.scale_ratio
 80 |         else:
 81 |             if(self.param_scale):
 82 |                 scale = self.scale.item() * self.scale_ratio
 83 |             else:
 84 |                 scale = self.scale * self.scale_ratio
 85 | 
 86 | 
 87 |         # x_bk = torch.clone(x).to(x.device)
 88 |         x = torch.divide(x, scale)
 89 | 
 90 |         coeflist = self.coeflist
 91 | 
 92 |         for compositive_id in range(coeflist.shape[0]):
 93 | 
 94 |             degree_num = self.degree[compositive_id]
 95 | 
 96 |             # x_degree_1 = torch.clone(x).to(x_bk.device)
 97 |             x_degree_2 = torch.mul(x, x)
 98 | 
 99 |             # out = torch.clone(x).to(x_bk.device)
100 |             out = torch.mul(x, coeflist[compositive_id][0]) # x^1 * coe[1]
101 | 
102 |             for i in range(1, degree_num):
103 |                 x = torch.mul(x, x_degree_2)
104 |                 partial_out = torch.mul(x, coeflist[compositive_id][i])
105 |                 out = torch.add(out, partial_out)
106 |             x = torch.clone(out).to(x.device)
107 |         
108 | 
109 |         result = out.to(x.device)
110 |         del x
111 | 
112 |         return result
113 |     
114 |     def set_coef_grad(self, grad):
115 |         self.coeflist.requires_grad = grad
116 | 
117 |     def set_scale_grad(self, grad):
118 |         if(self.param_scale):
119 |             self.scale.requires_grad = grad
120 | 
121 |     def save_coef(self, path_name):
122 |         torch.save(self.coeflist, path_name)
123 | 
124 |     def save_scale(self, path_name):
125 |         if(self.param_scale):
126 |             torch.save(self.scale, path_name)
127 | 
128 | 
129 | class Sigmoid_minmax_layer(nn.Module):
130 |     def __init__(self, coef, degree, scale = 0, scale_ratio = 1, train_coef = True, param_scale = False):
131 |         super().__init__()
132 |         self.scale_ratio = scale_ratio
133 |         self.degree = degree
134 |         self.coeflist = nn.Parameter(coef.to("cuda:0"), requires_grad=train_coef).to("cuda:0")
135 |         self.param_scale = param_scale
136 |         if(self.param_scale):
137 |             self.scale =  nn.Parameter(torch.tensor(scale).to("cuda:0"), requires_grad=False).to("cuda:0")
138 |         else:
139 |             self.scale = scale
140 |             
141 | 
142 |     def forward(self, x):
143 | 
144 |         if(self.scale == 0):
145 |             s_max = torch.max(x).item()
146 |             s_min = torch.min(x).item()
147 |             scale = max(abs(s_max), abs(s_min)) * self.scale_ratio
148 |         else:
149 |             if(self.param_scale):
150 |                 scale = self.scale.item() * self.scale_ratio
151 |             else:
152 |                 scale = self.scale * self.scale_ratio
153 | 
154 | 
155 |         # x_bk = torch.clone(x).to(x.device)
156 |         x = torch.divide(x, scale)
157 | 
158 |         coeflist = self.coeflist
159 | 
160 |         for compositive_id in range(coeflist.shape[0]):
161 | 
162 |             degree_num = self.degree[compositive_id]
163 | 
164 |             # x_degree_1 = torch.clone(x).to(x_bk.device)
165 |             x_degree_2 = torch.mul(x, x)
166 | 
167 |             # out = torch.clone(x).to(x_bk.device)
168 |             out = torch.mul(x, coeflist[compositive_id][0]) # x^1 * coe[1]
169 | 
170 |             for i in range(1, degree_num):
171 |                 x = torch.mul(x, x_degree_2)
172 |                 partial_out = torch.mul(x, coeflist[compositive_id][i])
173 |                 out = torch.add(out, partial_out)
174 |             x = torch.clone(out).to(x.device)
175 |         
176 |         x = x * 0.5 + 0.5
177 |         result = out.to(x.device)
178 |         del x
179 | 
180 |         return result
181 |     
182 |     def set_coef_grad(self, grad):
183 |         self.coeflist.requires_grad = grad
184 | 
185 |     def set_scale_grad(self, grad):
186 |         if(self.param_scale):
187 |             self.scale.requires_grad = grad
188 | 
189 |     def save_coef(self, path_name):
190 |         torch.save(self.coeflist, path_name)
191 | 
192 |     def save_scale(self, path_name):
193 |         if(self.param_scale):
194 |             torch.save(self.scale, path_name)
195 | 
196 | 
197 | 
198 | 
199 | class ReLU_sign_layer(nn.Module):
200 |     def __init__(self, sign:nn.Module):
201 |         super().__init__()
202 |         self.sign = sign
203 | 
204 |     def forward(self, x):
205 |         result = torch.divide(torch.add(x, torch.mul(x, self.sign.forward(x))),2)
206 |         return result
207 |     
208 | class SiLU_minmax_layer(nn.Module):
209 |     def __init__(self, sigmoid:nn.Module):
210 |         super().__init__()
211 |         self.sigmoid = sigmoid
212 | 
213 |     def forward(self, x):
214 |         result = torch.mul(x, self.sign.forward(x))
215 |         return result
216 |     
217 | 
218 | 
219 | class Maxpool_sign_layer(nn.Module):
220 |     def __init__(self, sign:nn.Module, 
221 |                  kernel_size: Union[int, Tuple[int, int]], 
222 |                  stride : Union[int, Tuple[int, int]] = 0, 
223 |                  padding : Union[int, Tuple[int, int]] = 0, 
224 |                  dilation : Union[int, Tuple[int, int]] = 1, ):
225 |         super().__init__()
226 | 
227 |         self.sign = sign
228 | 
229 |         self.kernel_size = self.to_tuple(kernel_size)
230 | 
231 |         if(stride == 0):
232 |             self.stride = self.kernel_size
233 |         else:
234 |             self.stride = self.to_tuple(stride)
235 |         self.padding = self.to_tuple(padding)
236 |         self.dilation = self.to_tuple(dilation)
237 |         self.unfold = nn.Unfold(kernel_size=self.kernel_size, dilation=self.dilation, padding=self.padding, stride=self.stride)
238 | 
239 |         
240 | 
241 |     def forward(self, x): 
242 |         x_size = x.size() 
243 |         x_unfold = self.unfold(x)
244 |         x_unfold_size = x_unfold.size()
245 |         x_reshape = torch.reshape(x_unfold, (x_unfold_size[0], -1, int(x_unfold_size[1] / x_size[1]), x_unfold_size[2]))
246 | 
247 |         h_out = math.floor((x_size[2] + 2 * self.padding[0] - self.dilation[0] * (self.kernel_size[0] -1) - 1)/ self.stride[0] + 1)
248 |         w_out = math.floor((x_size[3] + 2 * self.padding[1] - self.dilation[1] * (self.kernel_size[1] -1) - 1)/ self.stride[1] + 1)
249 |         result = self.maxpool(x_reshape).reshape((x_size[0], x_size[1], h_out, w_out))
250 |         return result
251 |     
252 | 
253 |     def to_tuple(self, param:Union[int, Tuple[int, int]]):
254 |         if(isinstance(param, int)):
255 |             param = (param, param)
256 |         return param
257 |         
258 |     def maxpool(self, x):
259 |         x_size = x.size()
260 |         pool_size = x_size[2]
261 | 
262 |         if(pool_size == 1):
263 |             return x
264 |         elif(pool_size == 2):
265 |             return self.max(x[:,:,0,:], x[:,:,1,:])
266 |         else:
267 |             pivot = int(pool_size / 2)
268 |             a = self.maxpool(x[:,:,0:pivot,:])
269 |             b = self.maxpool(x[:,:,pivot:pool_size,:])
270 |             return self.max(a, b)
271 | 
272 | 
273 |     def max(self, a, b):
274 |         a = torch.squeeze(a)
275 |         b = torch.squeeze(b)
276 |         sum = torch.add(a,b)
277 |         diff = torch.sub(a,b)
278 |         sign_diff = self.sign.forward(diff)
279 |         result = torch.divide(torch.add(sum, torch.mul(sign_diff, diff)), 2)
280 |         return result
281 |     
282 | 
283 | class HerPN2d(nn.Module):
284 |     @staticmethod
285 |     def h0(x):
286 |         return torch.ones(x.shape).to(x.device)
287 | 
288 |     @staticmethod
289 |     def h1(x):
290 |         return x
291 | 
292 |     @staticmethod
293 |     def h2(x):
294 |         return (x * x - 1)
295 | 
296 |     def __init__(self, num_features : int, BN_dimension=2 ,BN_copy:nn.Module = None):
297 |         super().__init__()
298 |         self.f = (1 / sqrt(2 * pi), 1 / 2, 1 / sqrt(4 * pi))
299 |         
300 |         if(BN_copy):
301 |             self.bn0 = copy.deepcopy(BN_copy)
302 |             self.bn1 = copy.deepcopy(BN_copy)
303 |             self.bn2 = copy.deepcopy(BN_copy)
304 | 
305 |         elif(BN_dimension == 1):
306 |             self.bn0 = nn.BatchNorm1d(num_features)
307 |             self.bn1 = nn.BatchNorm1d(num_features)
308 |             self.bn2 = nn.BatchNorm1d(num_features)
309 |         else:
310 |             self.bn0 = nn.BatchNorm2d(num_features)
311 |             self.bn1 = nn.BatchNorm2d(num_features)
312 |             self.bn2 = nn.BatchNorm2d(num_features)
313 | 
314 | 
315 | 
316 |         self.bn = (self.bn0, self.bn1, self.bn2)
317 |         self.h = (self.h0, self.h1, self.h2)
318 | 
319 | 
320 |     def forward(self, x):
321 |         result = torch.zeros(x.shape).to(x.device)
322 |         for bn, f, h in zip(self.bn, self.f, self.h):
323 |             poly = torch.mul(f, h(x))
324 |             # print(poly.shape)
325 |             result = torch.add(result, bn(poly))
326 | 
327 |         return result
328 |     
329 | 
330 | class Sigmoid_minmax_layer(nn.Module):
331 |     def __init__(self, coef, degree, scale = 0, scale_ratio = 1, train_coef = True, param_scale = False):
332 |         super().__init__()
333 |         self.scale_ratio = scale_ratio
334 |         self.degree = degree
335 |         self.coeflist = nn.Parameter(coef.to("cuda:0"), requires_grad=train_coef).to("cuda:0")
336 |         self.param_scale = param_scale
337 |         if(self.param_scale):
338 |             self.scale =  nn.Parameter(torch.tensor(scale).to("cuda:0"), requires_grad=False).to("cuda:0")
339 |         else:
340 |             self.scale = scale
341 |             
342 | 
343 |     def forward(self, x):
344 | 
345 |         if(self.scale == 0):
346 |             s_max = torch.max(x).item()
347 |             s_min = torch.min(x).item()
348 |             scale = max(abs(s_max), abs(s_min)) * self.scale_ratio
349 |         else:
350 |             if(self.param_scale):
351 |                 scale = self.scale.item() * self.scale_ratio
352 |             else:
353 |                 scale = self.scale * self.scale_ratio
354 | 
355 | 
356 |         # x_bk = torch.clone(x).to(x.device)
357 |         x = torch.divide(x, scale)
358 | 
359 |         coeflist = self.coeflist
360 | 
361 |         for compositive_id in range(coeflist.shape[0]):
362 | 
363 |             degree_num = self.degree[compositive_id]
364 | 
365 |             # x_degree_1 = torch.clone(x).to(x_bk.device)
366 |             x_degree_2 = torch.mul(x, x)
367 | 
368 |             # out = torch.clone(x).to(x_bk.device)
369 |             out = torch.mul(x, coeflist[compositive_id][0]) # x^1 * coe[1]
370 | 
371 |             for i in range(1, degree_num):
372 |                 x = torch.mul(x, x_degree_2)
373 |                 partial_out = torch.mul(x, coeflist[compositive_id][i])
374 |                 out = torch.add(out, partial_out)
375 |             x = torch.clone(out).to(x.device)
376 |         
377 |         result = (out + 0.5).to(x.device)
378 |         del x
379 | 
380 |         return result
381 |     
382 |     def set_coef_grad(self, grad):
383 |         self.coeflist.requires_grad = grad
384 | 
385 |     def set_scale_grad(self, grad):
386 |         if(self.param_scale):
387 |             self.scale.requires_grad = grad
388 | 
389 |     def save_coef(self, path_name):
390 |         torch.save(self.coeflist, path_name)
391 | 
392 |     def save_scale(self, path_name):
393 |         if(self.param_scale):
394 |             torch.save(self.scale, path_name)
395 | 
396 | class SiLU_minmax_layer(nn.Module):
397 |     def __init__(self, sigmoid:nn.Module):
398 |         super().__init__()
399 |         self.sigmoid = sigmoid
400 | 
401 |     def forward(self, x ):
402 | 
403 | 
404 |         result = torch.mul(x, self.sigmoid.forward(x))
405 |         return result
406 |     
407 | class SiLU_minmax_bn_layer(nn.Module):
408 |     def __init__(self, sigmoid:nn.Module, num_features):
409 |         super().__init__()
410 |         self.sigmoid = sigmoid
411 |         self.bn = nn.BatchNorm2d(num_features).to("cuda:0")
412 | 
413 |     def forward(self, x ):
414 | 
415 | 
416 |         result = torch.mul(x, self.sigmoid.forward(x))
417 |         result = self.bn(result)
418 |         return result


--------------------------------------------------------------------------------
/src/PyTorch_CIFAR10/cifar10_models/inception.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from collections import namedtuple
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | __all__ = ["Inception3", "inception_v3"]
  9 | 
 10 | 
 11 | _InceptionOuputs = namedtuple("InceptionOuputs", ["logits", "aux_logits"])
 12 | 
 13 | 
 14 | def inception_v3(pretrained=False, progress=True, device="cpu", **kwargs):
 15 |     r"""Inception v3 model architecture from
 16 |     `"Rethinking the Inception Architecture for Computer Vision" <http://arxiv.org/abs/1512.00567>`_.
 17 | 
 18 |     .. note::
 19 |         **Important**: In contrast to the other models the inception_v3 expects tensors with a size of
 20 |         N x 3 x 299 x 299, so ensure your images are sized accordingly.
 21 | 
 22 |     Args:
 23 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
 24 |         progress (bool): If True, displays a progress bar of the download to stderr
 25 |         aux_logits (bool): If True, add an auxiliary branch that can improve training.
 26 |             Default: *True*
 27 |         transform_input (bool): If True, preprocesses the input according to the method with which it
 28 |             was trained on ImageNet. Default: *False*
 29 |     """
 30 |     model = Inception3()
 31 |     if pretrained:
 32 |         script_dir = os.path.dirname(__file__)
 33 |         state_dict = torch.load(
 34 |             script_dir + "/state_dicts/inception_v3.pt", map_location=device
 35 |         )
 36 |         model.load_state_dict(state_dict)
 37 |     return model
 38 | 
 39 | 
 40 | class Inception3(nn.Module):
 41 |     # CIFAR10: aux_logits True->False
 42 |     def __init__(self, num_classes=10, aux_logits=False, transform_input=False):
 43 |         super(Inception3, self).__init__()
 44 |         self.aux_logits = aux_logits
 45 |         self.transform_input = transform_input
 46 | 
 47 |         # CIFAR10: stride 2->1, padding 0 -> 1
 48 |         self.Conv2d_1a_3x3 = BasicConv2d(3, 192, kernel_size=3, stride=1, padding=1)
 49 |         #         self.Conv2d_2a_3x3 = BasicConv2d(32, 32, kernel_size=3)
 50 |         #         self.Conv2d_2b_3x3 = BasicConv2d(32, 64, kernel_size=3, padding=1)
 51 |         #         self.Conv2d_3b_1x1 = BasicConv2d(64, 80, kernel_size=1)
 52 |         #         self.Conv2d_4a_3x3 = BasicConv2d(80, 192, kernel_size=3)
 53 |         self.Mixed_5b = InceptionA(192, pool_features=32)
 54 |         self.Mixed_5c = InceptionA(256, pool_features=64)
 55 |         self.Mixed_5d = InceptionA(288, pool_features=64)
 56 |         self.Mixed_6a = InceptionB(288)
 57 |         self.Mixed_6b = InceptionC(768, channels_7x7=128)
 58 |         self.Mixed_6c = InceptionC(768, channels_7x7=160)
 59 |         self.Mixed_6d = InceptionC(768, channels_7x7=160)
 60 |         self.Mixed_6e = InceptionC(768, channels_7x7=192)
 61 |         if aux_logits:
 62 |             self.AuxLogits = InceptionAux(768, num_classes)
 63 |         self.Mixed_7a = InceptionD(768)
 64 |         self.Mixed_7b = InceptionE(1280)
 65 |         self.Mixed_7c = InceptionE(2048)
 66 |         self.fc = nn.Linear(2048, num_classes)
 67 | 
 68 |     #         for m in self.modules():
 69 |     #             if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
 70 |     #                 import scipy.stats as stats
 71 |     #                 stddev = m.stddev if hasattr(m, 'stddev') else 0.1
 72 |     #                 X = stats.truncnorm(-2, 2, scale=stddev)
 73 |     #                 values = torch.as_tensor(X.rvs(m.weight.numel()), dtype=m.weight.dtype)
 74 |     #                 values = values.view(m.weight.size())
 75 |     #                 with torch.no_grad():
 76 |     #                     m.weight.copy_(values)
 77 |     #             elif isinstance(m, nn.BatchNorm2d):
 78 |     #                 nn.init.constant_(m.weight, 1)
 79 |     #                 nn.init.constant_(m.bias, 0)
 80 | 
 81 |     def forward(self, x):
 82 |         if self.transform_input:
 83 |             x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5
 84 |             x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5
 85 |             x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5
 86 |             x = torch.cat((x_ch0, x_ch1, x_ch2), 1)
 87 |         # N x 3 x 299 x 299
 88 |         x = self.Conv2d_1a_3x3(x)
 89 | 
 90 |         # CIFAR10
 91 |         # N x 32 x 149 x 149
 92 |         #         x = self.Conv2d_2a_3x3(x)
 93 |         # N x 32 x 147 x 147
 94 |         #         x = self.Conv2d_2b_3x3(x)
 95 |         # N x 64 x 147 x 147
 96 |         #         x = F.max_pool2d(x, kernel_size=3, stride=2)
 97 |         # N x 64 x 73 x 73
 98 |         #         x = self.Conv2d_3b_1x1(x)
 99 |         # N x 80 x 73 x 73
100 |         #         x = self.Conv2d_4a_3x3(x)
101 |         # N x 192 x 71 x 71
102 |         #         x = F.max_pool2d(x, kernel_size=3, stride=2)
103 |         # N x 192 x 35 x 35
104 |         x = self.Mixed_5b(x)
105 |         # N x 256 x 35 x 35
106 |         x = self.Mixed_5c(x)
107 |         # N x 288 x 35 x 35
108 |         x = self.Mixed_5d(x)
109 |         # N x 288 x 35 x 35
110 |         x = self.Mixed_6a(x)
111 |         # N x 768 x 17 x 17
112 |         x = self.Mixed_6b(x)
113 |         # N x 768 x 17 x 17
114 |         x = self.Mixed_6c(x)
115 |         # N x 768 x 17 x 17
116 |         x = self.Mixed_6d(x)
117 |         # N x 768 x 17 x 17
118 |         x = self.Mixed_6e(x)
119 |         # N x 768 x 17 x 17
120 |         if self.training and self.aux_logits:
121 |             aux = self.AuxLogits(x)
122 |         # N x 768 x 17 x 17
123 |         x = self.Mixed_7a(x)
124 |         # N x 1280 x 8 x 8
125 |         x = self.Mixed_7b(x)
126 |         # N x 2048 x 8 x 8
127 |         x = self.Mixed_7c(x)
128 |         # N x 2048 x 8 x 8
129 |         # Adaptive average pooling
130 |         x = F.adaptive_avg_pool2d(x, (1, 1))
131 |         # N x 2048 x 1 x 1
132 |         x = F.dropout(x, training=self.training)
133 |         # N x 2048 x 1 x 1
134 |         x = x.view(x.size(0), -1)
135 |         # N x 2048
136 |         x = self.fc(x)
137 |         # N x 1000 (num_classes)
138 |         if self.training and self.aux_logits:
139 |             return _InceptionOuputs(x, aux)
140 |         return x
141 | 
142 | 
143 | class InceptionA(nn.Module):
144 |     def __init__(self, in_channels, pool_features):
145 |         super(InceptionA, self).__init__()
146 |         self.branch1x1 = BasicConv2d(in_channels, 64, kernel_size=1)
147 | 
148 |         self.branch5x5_1 = BasicConv2d(in_channels, 48, kernel_size=1)
149 |         self.branch5x5_2 = BasicConv2d(48, 64, kernel_size=5, padding=2)
150 | 
151 |         self.branch3x3dbl_1 = BasicConv2d(in_channels, 64, kernel_size=1)
152 |         self.branch3x3dbl_2 = BasicConv2d(64, 96, kernel_size=3, padding=1)
153 |         self.branch3x3dbl_3 = BasicConv2d(96, 96, kernel_size=3, padding=1)
154 | 
155 |         self.branch_pool = BasicConv2d(in_channels, pool_features, kernel_size=1)
156 | 
157 |     def forward(self, x):
158 |         branch1x1 = self.branch1x1(x)
159 | 
160 |         branch5x5 = self.branch5x5_1(x)
161 |         branch5x5 = self.branch5x5_2(branch5x5)
162 | 
163 |         branch3x3dbl = self.branch3x3dbl_1(x)
164 |         branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
165 |         branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
166 | 
167 |         branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
168 |         branch_pool = self.branch_pool(branch_pool)
169 | 
170 |         outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool]
171 |         return torch.cat(outputs, 1)
172 | 
173 | 
174 | class InceptionB(nn.Module):
175 |     def __init__(self, in_channels):
176 |         super(InceptionB, self).__init__()
177 |         self.branch3x3 = BasicConv2d(in_channels, 384, kernel_size=3, stride=2)
178 | 
179 |         self.branch3x3dbl_1 = BasicConv2d(in_channels, 64, kernel_size=1)
180 |         self.branch3x3dbl_2 = BasicConv2d(64, 96, kernel_size=3, padding=1)
181 |         self.branch3x3dbl_3 = BasicConv2d(96, 96, kernel_size=3, stride=2)
182 | 
183 |     def forward(self, x):
184 |         branch3x3 = self.branch3x3(x)
185 | 
186 |         branch3x3dbl = self.branch3x3dbl_1(x)
187 |         branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
188 |         branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
189 | 
190 |         branch_pool = F.max_pool2d(x, kernel_size=3, stride=2)
191 | 
192 |         outputs = [branch3x3, branch3x3dbl, branch_pool]
193 |         return torch.cat(outputs, 1)
194 | 
195 | 
196 | class InceptionC(nn.Module):
197 |     def __init__(self, in_channels, channels_7x7):
198 |         super(InceptionC, self).__init__()
199 |         self.branch1x1 = BasicConv2d(in_channels, 192, kernel_size=1)
200 | 
201 |         c7 = channels_7x7
202 |         self.branch7x7_1 = BasicConv2d(in_channels, c7, kernel_size=1)
203 |         self.branch7x7_2 = BasicConv2d(c7, c7, kernel_size=(1, 7), padding=(0, 3))
204 |         self.branch7x7_3 = BasicConv2d(c7, 192, kernel_size=(7, 1), padding=(3, 0))
205 | 
206 |         self.branch7x7dbl_1 = BasicConv2d(in_channels, c7, kernel_size=1)
207 |         self.branch7x7dbl_2 = BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0))
208 |         self.branch7x7dbl_3 = BasicConv2d(c7, c7, kernel_size=(1, 7), padding=(0, 3))
209 |         self.branch7x7dbl_4 = BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0))
210 |         self.branch7x7dbl_5 = BasicConv2d(c7, 192, kernel_size=(1, 7), padding=(0, 3))
211 | 
212 |         self.branch_pool = BasicConv2d(in_channels, 192, kernel_size=1)
213 | 
214 |     def forward(self, x):
215 |         branch1x1 = self.branch1x1(x)
216 | 
217 |         branch7x7 = self.branch7x7_1(x)
218 |         branch7x7 = self.branch7x7_2(branch7x7)
219 |         branch7x7 = self.branch7x7_3(branch7x7)
220 | 
221 |         branch7x7dbl = self.branch7x7dbl_1(x)
222 |         branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl)
223 |         branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl)
224 |         branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl)
225 |         branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl)
226 | 
227 |         branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
228 |         branch_pool = self.branch_pool(branch_pool)
229 | 
230 |         outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool]
231 |         return torch.cat(outputs, 1)
232 | 
233 | 
234 | class InceptionD(nn.Module):
235 |     def __init__(self, in_channels):
236 |         super(InceptionD, self).__init__()
237 |         self.branch3x3_1 = BasicConv2d(in_channels, 192, kernel_size=1)
238 |         self.branch3x3_2 = BasicConv2d(192, 320, kernel_size=3, stride=2)
239 | 
240 |         self.branch7x7x3_1 = BasicConv2d(in_channels, 192, kernel_size=1)
241 |         self.branch7x7x3_2 = BasicConv2d(192, 192, kernel_size=(1, 7), padding=(0, 3))
242 |         self.branch7x7x3_3 = BasicConv2d(192, 192, kernel_size=(7, 1), padding=(3, 0))
243 |         self.branch7x7x3_4 = BasicConv2d(192, 192, kernel_size=3, stride=2)
244 | 
245 |     def forward(self, x):
246 |         branch3x3 = self.branch3x3_1(x)
247 |         branch3x3 = self.branch3x3_2(branch3x3)
248 | 
249 |         branch7x7x3 = self.branch7x7x3_1(x)
250 |         branch7x7x3 = self.branch7x7x3_2(branch7x7x3)
251 |         branch7x7x3 = self.branch7x7x3_3(branch7x7x3)
252 |         branch7x7x3 = self.branch7x7x3_4(branch7x7x3)
253 | 
254 |         branch_pool = F.max_pool2d(x, kernel_size=3, stride=2)
255 |         outputs = [branch3x3, branch7x7x3, branch_pool]
256 |         return torch.cat(outputs, 1)
257 | 
258 | 
259 | class InceptionE(nn.Module):
260 |     def __init__(self, in_channels):
261 |         super(InceptionE, self).__init__()
262 |         self.branch1x1 = BasicConv2d(in_channels, 320, kernel_size=1)
263 | 
264 |         self.branch3x3_1 = BasicConv2d(in_channels, 384, kernel_size=1)
265 |         self.branch3x3_2a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1))
266 |         self.branch3x3_2b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0))
267 | 
268 |         self.branch3x3dbl_1 = BasicConv2d(in_channels, 448, kernel_size=1)
269 |         self.branch3x3dbl_2 = BasicConv2d(448, 384, kernel_size=3, padding=1)
270 |         self.branch3x3dbl_3a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1))
271 |         self.branch3x3dbl_3b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0))
272 | 
273 |         self.branch_pool = BasicConv2d(in_channels, 192, kernel_size=1)
274 | 
275 |     def forward(self, x):
276 |         branch1x1 = self.branch1x1(x)
277 | 
278 |         branch3x3 = self.branch3x3_1(x)
279 |         branch3x3 = [
280 |             self.branch3x3_2a(branch3x3),
281 |             self.branch3x3_2b(branch3x3),
282 |         ]
283 |         branch3x3 = torch.cat(branch3x3, 1)
284 | 
285 |         branch3x3dbl = self.branch3x3dbl_1(x)
286 |         branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
287 |         branch3x3dbl = [
288 |             self.branch3x3dbl_3a(branch3x3dbl),
289 |             self.branch3x3dbl_3b(branch3x3dbl),
290 |         ]
291 |         branch3x3dbl = torch.cat(branch3x3dbl, 1)
292 | 
293 |         branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
294 |         branch_pool = self.branch_pool(branch_pool)
295 | 
296 |         outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool]
297 |         return torch.cat(outputs, 1)
298 | 
299 | 
300 | class InceptionAux(nn.Module):
301 |     def __init__(self, in_channels, num_classes):
302 |         super(InceptionAux, self).__init__()
303 |         self.conv0 = BasicConv2d(in_channels, 128, kernel_size=1)
304 |         self.conv1 = BasicConv2d(128, 768, kernel_size=5)
305 |         self.conv1.stddev = 0.01
306 |         self.fc = nn.Linear(768, num_classes)
307 |         self.fc.stddev = 0.001
308 | 
309 |     def forward(self, x):
310 |         # N x 768 x 17 x 17
311 |         x = F.avg_pool2d(x, kernel_size=5, stride=3)
312 |         # N x 768 x 5 x 5
313 |         x = self.conv0(x)
314 |         # N x 128 x 5 x 5
315 |         x = self.conv1(x)
316 |         # N x 768 x 1 x 1
317 |         # Adaptive average pooling
318 |         x = F.adaptive_avg_pool2d(x, (1, 1))
319 |         # N x 768 x 1 x 1
320 |         x = x.view(x.size(0), -1)
321 |         # N x 768
322 |         x = self.fc(x)
323 |         # N x 1000
324 |         return x
325 | 
326 | 
327 | class BasicConv2d(nn.Module):
328 |     def __init__(self, in_channels, out_channels, **kwargs):
329 |         super(BasicConv2d, self).__init__()
330 |         self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
331 |         self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
332 | 
333 |     def forward(self, x):
334 |         x = self.conv(x)
335 |         x = self.bn(x)
336 |         return F.relu(x, inplace=True)
337 | 


--------------------------------------------------------------------------------
/src/AESPA_Baseline.py:
--------------------------------------------------------------------------------
  1 | from util import *
  2 | from custom_module import *
  3 | from pretrained_model import *
  4 | torch.manual_seed(0)
  5 | global_config = load_model_yaml("./global_config/", "global_config.yaml")
  6 | 
  7 | global G_model
  8 | global R_model
  9 | global B_model
 10 | global E_model
 11 | global S_model
 12 | 
 13 | 
 14 | def print_log_to_file(log_file, config, train_log, type, before_acc, swa_log):
 15 |     with open(log_file, "a") as f:
 16 |         print(" \n", file=f)
 17 |         print("config: ", file = f)
 18 |         print("before_acc: " + str(before_acc), file=f)
 19 |         print(config, file= f)
 20 |         if(train_log):
 21 |             acc_log_list = train_log["train_result"]["va"]
 22 |             if(type == "s" or type == "e"):
 23 |                 end_i = len(acc_log_list)
 24 |             elif(type == "b"):
 25 |                 end_i = train_log["best_index"] + 1
 26 |             for i in range(end_i):
 27 |                 print("acc: " + str(acc_log_list[i]), file = f)
 28 |             print("swa: "+str(swa_log), file= f)
 29 | 
 30 | 
 31 | def get_optimizer(
 32 |     model: torch.nn.Module, config: Dict[str, Any]
 33 | ) -> torch.optim.Optimizer:
 34 |     """
 35 |     Returns the optimizer initializer according to the config
 36 | 
 37 |     Note: config has a minimum of three entries.
 38 |     Feel free to add more entries if you want.
 39 |     But do not change the name of the three existing entries
 40 | 
 41 |     Args:
 42 |     - model: the model to optimize for
 43 |     - config: a dictionary containing parameters for the config
 44 |     Returns:
 45 |     - optimizer: the optimizer
 46 |     """
 47 | 
 48 |     optimizer = None
 49 | 
 50 |     optimizer_type = config.get("optimizer_type", "sgd")
 51 |     learning_rate = config.get("lr", 0)
 52 |     weight_decay = config.get("weight_decay", 0)
 53 |     momentum = 0
 54 |     dampening = 0
 55 | 
 56 |     print(learning_rate)
 57 |     print(weight_decay)
 58 |     print(optimizer_type)
 59 |     if optimizer_type=="sgd":
 60 |         optimizer = torch.optim.SGD(params=model.parameters(), lr=learning_rate, weight_decay=weight_decay, momentum = momentum, dampening = dampening)
 61 |     elif optimizer_type=="adam":
 62 |         optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate, weight_decay=weight_decay)
 63 | 
 64 |     return optimizer
 65 | 
 66 | 
 67 | def compute_accuracy(logits: torch.Tensor, labels: torch.Tensor) -> float:
 68 |     """Compute the accuracy given the prediction logits and the ground-truth labels
 69 | 
 70 |     Args:
 71 |         logits: The output of the forward pass through the model.
 72 |                 for K classes logits[k] (where 0 <= k < K) corresponds to the
 73 |                 log-odds of class `k` being the correct one.
 74 |                 Shape: (batch_size, num_classes)
 75 |         labels: The ground truth label for each instance in the batch
 76 |                 Shape: (batch_size)
 77 |     Returns:
 78 |         accuracy: The accuracy of the predicted logits
 79 |                    (number of correct predictions / total number of examples)
 80 |     """
 81 |     batch_accuracy = 0.0
 82 |     num_data = logits.size()[0]
 83 |     for _i in range(num_data):
 84 |         nn_inference_label = torch.argmax(logits[_i])
 85 |         if(labels[_i] == nn_inference_label):
 86 |             batch_accuracy += 1.0
 87 | 
 88 |     batch_accuracy = batch_accuracy / num_data
 89 | 
 90 |     return batch_accuracy
 91 | 
 92 | 
 93 | def compute_loss(
 94 |     model: nn.Module,
 95 |     model_output: torch.Tensor,
 96 |     target_labels: torch.Tensor,
 97 |     is_normalize: bool = True,
 98 | ) -> torch.Tensor:
 99 |     """
100 |     Computes the loss between the model output and the target labels
101 | 
102 |     Args:
103 |     -   model: a model (which inherits from nn.Module)
104 |     -   model_output: the raw scores output by the net
105 |     -   target_labels: the ground truth class labels
106 |     -   is_normalize: bool flag indicating that loss should be divided by the batch size
107 |     Returns:
108 |     -   the loss value
109 |     """
110 |     loss = None
111 | 
112 |     criterion = nn.CrossEntropyLoss()
113 |     loss = criterion(model_output, target_labels)
114 |     #loss = model.loss_criterion(model_output, target_labels)
115 |     
116 |     if(is_normalize):
117 |         loss = loss / model_output.size()[0]
118 | 
119 |     return loss
120 | 
121 | 
122 | class Trainer:
123 |     """Class that stores model training metadata."""
124 |     def __init__(
125 |         self,
126 |         #data_dir: str,
127 |         model: nn.Module,
128 |         optimizer: Optimizer,
129 |         train_loader: torch.utils.data.DataLoader,
130 |         val_loader: torch.utils.data.DataLoader,
131 |         model_dir: str = "None",
132 |         load_from_disk: bool = True,
133 |         cuda: bool = True,
134 |         lr_scheduler = None,
135 |         no_bn_track: bool = True,
136 |         
137 |     ) -> None:
138 | 
139 |         self.model_dir = model_dir
140 |         self.model = model
141 |         self.lr_scheduler = lr_scheduler
142 |         self.no_bn_track = no_bn_track
143 | 
144 |         self.cuda = cuda
145 |         if cuda:
146 |             self.model.cuda()
147 | 
148 |         self.train_loader = train_loader                   
149 |         self.val_loader = val_loader
150 | 
151 |         self.optimizer = optimizer
152 | 
153 |         self.train_loss_history = []
154 |         self.validation_loss_history = []
155 |         self.train_accuracy_history = []
156 |         self.validation_accuracy_history = []
157 | 
158 |         # load the model from the disk if it exists
159 |         if os.path.exists(model_dir) and load_from_disk:
160 |             checkpoint = torch.load(os.path.join(self.model_dir, "checkpoint.pt"))
161 |             self.model.load_state_dict(checkpoint["model_state_dict"])
162 |             self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
163 | 
164 |         self.model.train()
165 | 
166 | 
167 |     def save_model(self) -> None:
168 |         """
169 |         Saves the model state and optimizer state on the dict
170 |         """
171 |         torch.save(
172 |             {
173 |                 "model_state_dict": self.model.state_dict(),
174 |                 "optimizer_state_dict": self.optimizer.state_dict(),
175 |             },
176 |             os.path.join(self.model_dir, "checkpoint.pt"),
177 |         )
178 | 
179 | 
180 |     def run_training_loop(self, num_epochs: int, swa_pack = None) -> None:
181 |         """Train for num_epochs, and validate after every epoch."""
182 | 
183 |         best_val = 0.0
184 |         best_loss = 100
185 |         best_epoch_i = 0
186 |         train_result = {"tl": [], "vl": [], "ta" :[], "va":[]}
187 | 
188 |         for epoch_idx in range(num_epochs):
189 |             train_loss, train_acc = self.train_epoch()
190 |             self.train_loss_history.append(train_loss)
191 |             self.train_accuracy_history.append(train_acc)
192 |             val_loss, val_acc = self.validate()
193 |             self.validation_loss_history.append(val_loss)
194 |             self.validation_accuracy_history.append(val_acc)
195 | 
196 |             if(swa_pack != None and num_epochs > swa_pack[2]):
197 |                 swa_pack[0].update_parameters(self.model)
198 |                 swa_pack[1].step()
199 |             if(self.lr_scheduler):
200 |                 self.lr_scheduler.step(val_acc)
201 | 
202 |             train_result["tl"].append(train_loss)
203 |             train_result["vl"].append(val_loss)
204 |             train_result["ta"].append(train_acc)
205 |             train_result["va"].append(val_acc)
206 |             
207 |             print(
208 |                 f"Epoch:{epoch_idx + 1}"
209 |                 + f" Train Loss:{train_loss:.4f}"
210 |                 + f" Val Loss: {val_loss:.4f}"
211 |                 + f" Train Accuracy: {train_acc:.4f}"
212 |                 + f" Validation Accuracy: {val_acc:.4f}"
213 |             )
214 | 
215 |             global B_model
216 |             if(val_acc > best_val):
217 |                 best_val = val_acc
218 |                 best_loss = val_loss
219 |                 # B_model = copy.deepcopy(self.model)
220 |                 best_epoch_i = epoch_idx
221 |             elif(val_acc == best_val and val_loss < best_loss):
222 |                 best_val = val_acc
223 |                 best_loss = val_loss
224 |                 # B_model = copy.deepcopy(self.model)
225 |                 best_epoch_i = epoch_idx
226 | 
227 |         return_pack={"train_result" : train_result, "best_index": best_epoch_i}
228 |         return return_pack
229 | 
230 | 
231 |     def train_epoch(self) -> Tuple[float, float]:
232 |         """Implements the main training loop."""
233 |         self.model.train()
234 | 
235 |         if(self.no_bn_track):
236 |             self.disable_traking_bn()
237 | 
238 |         train_loss_meter = AverageMeter("train loss")
239 |         train_acc_meter = AverageMeter("train accuracy")
240 | 
241 |         # loop over each minibatch
242 |         for (x, y) in self.train_loader:
243 |             if self.cuda:
244 |                 x = x.cuda()
245 |                 y = y.cuda()
246 | 
247 |             n = x.shape[0]
248 |             logits = self.model(x)
249 |             batch_acc = compute_accuracy(logits, y)
250 |             train_acc_meter.update(val=batch_acc, n=n)
251 | 
252 |             batch_loss = compute_loss(self.model, logits, y, is_normalize=True)
253 |             train_loss_meter.update(val=float(batch_loss.cpu().item()), n=n)
254 | 
255 |             self.optimizer.zero_grad()
256 |             batch_loss.backward()
257 |             self.optimizer.step()
258 |         
259 |         return train_loss_meter.avg, train_acc_meter.avg
260 | 
261 | 
262 |     def validate(self) -> Tuple[float, float]:
263 |         """Evaluate on held-out split (either val or test)"""
264 |         self.model.eval()
265 | 
266 |         val_loss_meter = AverageMeter("val loss")
267 |         val_acc_meter = AverageMeter("val accuracy")
268 | 
269 |         # loop over whole val set
270 |         with torch.no_grad():
271 |             for (x, y) in self.val_loader:
272 |                 if self.cuda:
273 |                     x = x.cuda()
274 |                     y = y.cuda()
275 | 
276 |                 n = x.shape[0]
277 |                 logits = self.model(x)
278 | 
279 |                 batch_acc = compute_accuracy(logits, y)
280 |                 val_acc_meter.update(val=batch_acc, n=n)
281 | 
282 |                 batch_loss = compute_loss(self.model, logits, y, is_normalize=True)
283 |                 val_loss_meter.update(val=float(batch_loss.cpu().item()), n=n)
284 | 
285 |         return val_loss_meter.avg, val_acc_meter.avg
286 | 
287 | 
288 |     def disable_traking_bn(self):
289 |         for layer in self.model.modules():
290 |             if isinstance(layer, nn.modules.BatchNorm2d):
291 |                 layer.eval()
292 | 
293 | 
294 | 
295 | def AESAP_replace(model, valid_data_loader = None):
296 |     sign_nest_dict = generate_sign_nest_dict(model)
297 |     print(sign_nest_dict)
298 |     global G_model
299 |     G_model = copy.deepcopy(model)
300 |     for key in sign_nest_dict:
301 |         print(key)
302 |         if(sign_nest_dict[key]["type"] == "MaxPool2d"):
303 |             continue
304 | 
305 |         sign_dict = sign_nest_dict[key]
306 |         bn_name =  sign_dict["HerPN"]
307 |         
308 |         if(sign_dict["type"] == "ReLU" and sign_dict["HerPN"]):
309 |             num_features = access_layer(G_model, bn_name).num_features
310 |             BN_dimension = 2
311 |             my_layer = HerPN2d(num_features, BN_dimension)
312 |         else:
313 |             print("Error: Replce Pair Can't Find")
314 |             # assert(False, "Replce Pair Error")
315 | 
316 |         layer_name = key
317 |         layer_dict = sign_nest_dict[key]
318 |         replace_module = my_layer
319 | 
320 |         # print(layer_dict)
321 | 
322 |         replace_layer(model, layer_name,  replace_module)
323 |         if(sign_nest_dict[layer_name]["HerPN"]):
324 |             replace_layer(model, sign_nest_dict[layer_name]["HerPN"],  nn.Identity())
325 |     
326 |     if(valid_data_loader):
327 |         validate(model, valid_data_loader, "cuda:0")
328 |         
329 |     print(model)
330 |     return model
331 | 
332 | 
333 | def AESPA_train(model, valid_data_loader, train_data_loader, config):
334 |     layer_name = config["layer_name"]
335 |     num_epochs = config["ep"]
336 | 
337 |     optimizer_name = "adam"
338 |     learning_rate = config["lr"]
339 |     weight_decay = config["wd"]
340 |     learning_rate_decay = True
341 |     no_bn_track = False
342 |     my_model = model
343 | 
344 |     print("Name: " + layer_name)
345 |     before_result  = validate(my_model, valid_data_loader, "cuda:0")
346 | 
347 |     print(layer_name +": train")
348 | 
349 |     optimizer_config = {"optimizer_type": "adam", "lr": learning_rate, "weight_decay": weight_decay}
350 |     optimizer = get_optimizer(my_model, optimizer_config)
351 |     scheduler = ReduceLROnPlateau(optimizer, 'max', patience = 2, eps=1e-10)
352 |     if(learning_rate_decay):
353 |         lr_scheduler = scheduler
354 |     else:
355 |         lr_scheduler = None
356 |     trainer = Trainer(
357 |         model=my_model,
358 |         optimizer=optimizer,
359 |         load_from_disk=False,
360 |         cuda=True,
361 |         lr_scheduler = lr_scheduler,
362 |         no_bn_track = no_bn_track,
363 |         train_loader = train_data_loader,
364 |         val_loader = valid_data_loader
365 |         
366 |     )
367 | 
368 |     print("Layer name: " + layer_name)
369 |     print("Parameter: ")
370 |     print("Optimizer: " + optimizer_name)
371 |     print("No batchnorm tracking: " +str(no_bn_track))
372 |     print("\tLearning rate: " + str(learning_rate))
373 |     print("\tWeight decay: " + str(weight_decay))
374 |     print("\tLearning rate decay: " + str(learning_rate_decay))
375 |     print("\n \n")
376 | 
377 |     print("Train epoch: ")
378 |     train_return_pack = trainer.run_training_loop(num_epochs=num_epochs)
379 |     train_result = validate(my_model, valid_data_loader, "cuda:0")
380 |     global E_model
381 |     E_model = copy.deepcopy(my_model)
382 |     print("\n \n")
383 |     print("Validation result:")
384 |     print("\tbefore: " + str(before_result))
385 |     print("\ttrain: "+ str(train_result))
386 |     print("\n \n \n \n")
387 | 
388 | 
389 | 
390 | 
391 | 
392 | if __name__ == "__main__":
393 |     parser = ArgumentParser()
394 |     parser.add_argument("--model", type=str,choices=["vgg19_bn", "resnet18", "resnet32", "resnet20"])
395 |     parser.add_argument("--dataset", type=str,choices=["cifar10", "imagenet_1k", "cifar100"])
396 |     parser.add_argument("-wd", "--working_directory", type=str, default="./working_dirctory/")
397 |     parser.add_argument("-lr", "--learning_rate", type = float, default = 1e-6)
398 | 
399 |     args = parser.parse_args()
400 |     print(args)
401 |     valid_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "valid", data_dir = global_config["Global"]["dataset_dirctory"])
402 |     train_data_loader = get_data_loader(dataset = args.dataset, dataset_type = "train", data_dir = global_config["Global"]["dataset_dirctory"])
403 |     model = get_pretrained_model(model_name=args.model, dataset=args.dataset)
404 |     print(model)
405 |     validate(model, valid_data_loader, "cuda:0")
406 |     AESAP_replace(model)
407 |     lr_c = args.learning_rate
408 |     param_config = {"layer_name": "Whole Model",
409 |             "ep" : 5,
410 |             "lr" : lr_c,
411 |             "wd" : 0.01}
412 |     AESPA_train(model, valid_data_loader, train_data_loader, param_config)
413 | 
414 | 


--------------------------------------------------------------------------------
/expriments/fig9/PR_AT_2f12g1.log:
--------------------------------------------------------------------------------
  1 |  
  2 |  
  3 | 
  4 | config: 
  5 | before_acc: 0.69
  6 | {'layer_name': 'relu', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['relu'], 'do': False, 'lh': 0, 'lt': 'n'}
  7 |  
  8 | 
  9 | config: 
 10 | before_acc: 0.69
 11 | {'layer_name': 'relu', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['relu'], 'do': False, 'lh': 0, 'lt': 'n'}
 12 | acc: 0.69
 13 | acc: 0.69
 14 | acc: 0.69
 15 | acc: 0.689
 16 | acc: 0.691
 17 | acc: 0.691
 18 | acc: 0.691
 19 | acc: 0.691
 20 | acc: 0.691
 21 | acc: 0.691
 22 | acc: 0.691
 23 | acc: 0.69
 24 | acc: 0.691
 25 | acc: 0.691
 26 | acc: 0.691
 27 | acc: 0.691
 28 | acc: 0.691
 29 | acc: 0.691
 30 | acc: 0.691
 31 | swa: None
 32 |  
 33 | 
 34 | config: 
 35 | before_acc: 0.691
 36 | {'layer_name': 'relu', 'ep': 20, 'lr': 1e-05, 'wd': 0.1, 'tw': False, 'twe': ['conv1', 'bn1'], 'tc': False, 'tce': [], 'do': False, 'lh': 0, 'lt': 'b'}
 37 | acc: 0.689
 38 | acc: 0.69
 39 | acc: 0.691
 40 | acc: 0.691
 41 | acc: 0.691
 42 | acc: 0.693
 43 | acc: 0.693
 44 | acc: 0.695
 45 | acc: 0.697
 46 | swa: None
 47 |  
 48 | 
 49 | config: 
 50 | before_acc: 0.6459999999999999
 51 | {'layer_name': 'maxpool', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['maxpool'], 'do': False, 'lh': 0, 'lt': 'n'}
 52 |  
 53 | 
 54 | config: 
 55 | before_acc: 0.6459999999999999
 56 | {'layer_name': 'maxpool', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['maxpool'], 'do': False, 'lh': 0, 'lt': 'n'}
 57 | acc: 0.645
 58 | acc: 0.645
 59 | acc: 0.645
 60 | acc: 0.645
 61 | acc: 0.644
 62 | acc: 0.644
 63 | acc: 0.644
 64 | acc: 0.643
 65 | acc: 0.643
 66 | acc: 0.642
 67 | acc: 0.641
 68 | acc: 0.641
 69 | acc: 0.641
 70 | acc: 0.641
 71 | acc: 0.64
 72 | acc: 0.64
 73 | acc: 0.641
 74 | acc: 0.641
 75 | acc: 0.641
 76 | acc: 0.641
 77 | swa: 0.6890000000000001
 78 |  
 79 | 
 80 | config: 
 81 | before_acc: 0.6890000000000001
 82 | {'layer_name': 'maxpool', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['maxpool'], 'do': False, 'lh': 0, 'lt': 's'}
 83 | acc: 0.689
 84 | acc: 0.688
 85 | acc: 0.688
 86 | acc: 0.689
 87 | acc: 0.689
 88 | acc: 0.689
 89 | acc: 0.689
 90 | acc: 0.689
 91 | acc: 0.688
 92 | acc: 0.688
 93 | acc: 0.688
 94 | acc: 0.69
 95 | acc: 0.69
 96 | acc: 0.69
 97 | acc: 0.688
 98 | acc: 0.688
 99 | acc: 0.689
100 | acc: 0.688
101 | acc: 0.688
102 | acc: 0.688
103 | swa: 0.691
104 |  
105 | 
106 | config: 
107 | before_acc: 0.6890000000000001
108 | {'layer_name': 'layer1.0.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer1.0.relu1'], 'do': False, 'lh': 0, 'lt': 'n'}
109 |  
110 | 
111 | config: 
112 | before_acc: 0.6890000000000001
113 | {'layer_name': 'layer1.0.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer1.0.relu1'], 'do': False, 'lh': 0, 'lt': 'n'}
114 | acc: 0.689
115 | acc: 0.689
116 | acc: 0.689
117 | acc: 0.689
118 | acc: 0.689
119 | acc: 0.691
120 | acc: 0.691
121 | acc: 0.691
122 | acc: 0.692
123 | acc: 0.692
124 | acc: 0.693
125 | acc: 0.693
126 | acc: 0.694
127 | swa: None
128 |  
129 | 
130 | config: 
131 | before_acc: 0.688
132 | {'layer_name': 'layer1.0.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer1.0.relu2'], 'do': False, 'lh': 0, 'lt': 'n'}
133 |  
134 | 
135 | config: 
136 | before_acc: 0.685
137 | {'layer_name': 'layer1.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer1.1.relu1'], 'do': False, 'lh': 0, 'lt': 'n'}
138 |  
139 | 
140 | config: 
141 | before_acc: 0.685
142 | {'layer_name': 'layer1.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer1.1.relu1'], 'do': False, 'lh': 0, 'lt': 'n'}
143 | acc: 0.686
144 | acc: 0.686
145 | acc: 0.686
146 | acc: 0.686
147 | acc: 0.685
148 | acc: 0.685
149 | acc: 0.685
150 | acc: 0.685
151 | acc: 0.687
152 | acc: 0.687
153 | acc: 0.687
154 | acc: 0.687
155 | acc: 0.687
156 | acc: 0.687
157 | acc: 0.687
158 | acc: 0.687
159 | acc: 0.687
160 | acc: 0.688
161 | swa: None
162 |  
163 | 
164 | config: 
165 | before_acc: 0.6829999999999999
166 | {'layer_name': 'layer1.1.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer1.1.relu2'], 'do': False, 'lh': 0, 'lt': 'n'}
167 |  
168 | 
169 | config: 
170 | before_acc: 0.6829999999999999
171 | {'layer_name': 'layer1.1.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer1.1.relu2'], 'do': False, 'lh': 0, 'lt': 'n'}
172 | acc: 0.683
173 | acc: 0.683
174 | acc: 0.683
175 | acc: 0.683
176 | acc: 0.683
177 | acc: 0.683
178 | acc: 0.683
179 | acc: 0.683
180 | acc: 0.683
181 | acc: 0.683
182 | acc: 0.683
183 | acc: 0.683
184 | acc: 0.682
185 | acc: 0.682
186 | acc: 0.683
187 | acc: 0.683
188 | acc: 0.684
189 | acc: 0.683
190 | acc: 0.683
191 | acc: 0.684
192 | swa: 0.6890000000000001
193 |  
194 | 
195 | config: 
196 | before_acc: 0.6890000000000001
197 | {'layer_name': 'layer1.1.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer1.1.relu2'], 'do': False, 'lh': 0, 'lt': 's'}
198 | acc: 0.689
199 | acc: 0.689
200 | acc: 0.689
201 | acc: 0.69
202 | acc: 0.69
203 | acc: 0.69
204 | acc: 0.69
205 | acc: 0.691
206 | acc: 0.691
207 | acc: 0.691
208 | acc: 0.691
209 | acc: 0.691
210 | acc: 0.691
211 | acc: 0.691
212 | acc: 0.691
213 | acc: 0.691
214 | acc: 0.692
215 | acc: 0.692
216 | acc: 0.693
217 | swa: None
218 |  
219 | 
220 | config: 
221 | before_acc: 0.693
222 | {'layer_name': 'layer1.1.relu2', 'ep': 20, 'lr': 1e-05, 'wd': 0.1, 'tw': False, 'twe': ['layer1.1.conv2', 'layer1.1.bn2'], 'tc': False, 'tce': [], 'do': False, 'lh': 0, 'lt': 'b'}
223 | acc: 0.694
224 | swa: None
225 |  
226 | 
227 | config: 
228 | before_acc: 0.6890000000000001
229 | {'layer_name': 'layer2.0.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer2.0.relu1'], 'do': False, 'lh': 0, 'lt': 'n'}
230 |  
231 | 
232 | config: 
233 | before_acc: 0.6890000000000001
234 | {'layer_name': 'layer2.0.relu1', 'ep': 20, 'lr': 1e-05, 'wd': 0.1, 'tw': False, 'twe': ['layer2.0.conv1', 'layer2.0.bn1'], 'tc': False, 'tce': [], 'do': False, 'lh': 0, 'lt': 'n'}
235 | acc: 0.69
236 | acc: 0.69
237 | acc: 0.689
238 | acc: 0.688
239 | acc: 0.69
240 | acc: 0.688
241 | acc: 0.685
242 | acc: 0.686
243 | acc: 0.687
244 | acc: 0.684
245 | acc: 0.685
246 | acc: 0.686
247 | acc: 0.687
248 | acc: 0.686
249 | acc: 0.687
250 | acc: 0.687
251 | acc: 0.687
252 | acc: 0.689
253 | acc: 0.688
254 | acc: 0.688
255 | swa: 0.69
256 |  
257 | 
258 | config: 
259 | before_acc: 0.691
260 | {'layer_name': 'layer2.0.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer2.0.relu2'], 'do': False, 'lh': 0, 'lt': 'n'}
261 |  
262 | 
263 | config: 
264 | before_acc: 0.68
265 | {'layer_name': 'layer2.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer2.1.relu1'], 'do': False, 'lh': 0, 'lt': 'n'}
266 |  
267 | 
268 | config: 
269 | before_acc: 0.68
270 | {'layer_name': 'layer2.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer2.1.relu1'], 'do': False, 'lh': 0, 'lt': 'n'}
271 | acc: 0.68
272 | acc: 0.679
273 | acc: 0.679
274 | acc: 0.679
275 | acc: 0.679
276 | acc: 0.679
277 | acc: 0.68
278 | acc: 0.68
279 | acc: 0.68
280 | acc: 0.68
281 | acc: 0.68
282 | acc: 0.68
283 | acc: 0.68
284 | acc: 0.68
285 | acc: 0.68
286 | acc: 0.68
287 | acc: 0.68
288 | acc: 0.68
289 | acc: 0.68
290 | acc: 0.679
291 | swa: 0.6829999999999999
292 |  
293 | 
294 | config: 
295 | before_acc: 0.6829999999999999
296 | {'layer_name': 'layer2.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer2.1.relu1'], 'do': False, 'lh': 0, 'lt': 's'}
297 | acc: 0.683
298 | acc: 0.683
299 | acc: 0.683
300 | acc: 0.683
301 | acc: 0.683
302 | acc: 0.683
303 | acc: 0.682
304 | acc: 0.682
305 | acc: 0.682
306 | acc: 0.681
307 | acc: 0.683
308 | acc: 0.683
309 | acc: 0.683
310 | acc: 0.684
311 | acc: 0.684
312 | acc: 0.684
313 | acc: 0.684
314 | acc: 0.684
315 | acc: 0.684
316 | acc: 0.684
317 | swa: 0.684
318 |  
319 | 
320 | config: 
321 | before_acc: 0.684
322 | {'layer_name': 'layer2.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer2.1.relu1'], 'do': False, 'lh': 0, 'lt': 's'}
323 | acc: 0.684
324 | acc: 0.683
325 | acc: 0.683
326 | acc: 0.683
327 | acc: 0.683
328 | acc: 0.683
329 | acc: 0.683
330 | acc: 0.683
331 | acc: 0.683
332 | acc: 0.684
333 | acc: 0.684
334 | acc: 0.683
335 | acc: 0.683
336 | acc: 0.683
337 | acc: 0.683
338 | acc: 0.684
339 | acc: 0.684
340 | acc: 0.684
341 | acc: 0.684
342 | acc: 0.684
343 | swa: 0.685
344 |  
345 | 
346 | config: 
347 | before_acc: 0.685
348 | {'layer_name': 'layer2.1.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer2.1.relu2'], 'do': False, 'lh': 0, 'lt': 'n'}
349 |  
350 | 
351 | config: 
352 | before_acc: 0.685
353 | {'layer_name': 'layer2.1.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer2.1.relu2'], 'do': False, 'lh': 0, 'lt': 'n'}
354 | acc: 0.685
355 | acc: 0.685
356 | acc: 0.685
357 | acc: 0.686
358 | acc: 0.686
359 | acc: 0.687
360 | acc: 0.687
361 | acc: 0.687
362 | acc: 0.687
363 | acc: 0.687
364 | acc: 0.687
365 | acc: 0.688
366 | swa: None
367 |  
368 | 
369 | config: 
370 | before_acc: 0.684
371 | {'layer_name': 'layer3.0.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer3.0.relu1'], 'do': False, 'lh': 0, 'lt': 'n'}
372 |  
373 | 
374 | config: 
375 | before_acc: 0.684
376 | {'layer_name': 'layer3.0.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer3.0.relu2'], 'do': False, 'lh': 0, 'lt': 'n'}
377 |  
378 | 
379 | config: 
380 | before_acc: 0.684
381 | {'layer_name': 'layer3.0.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer3.0.relu2'], 'do': False, 'lh': 0, 'lt': 'n'}
382 | acc: 0.684
383 | acc: 0.684
384 | acc: 0.684
385 | acc: 0.684
386 | acc: 0.685
387 | acc: 0.685
388 | acc: 0.685
389 | acc: 0.685
390 | acc: 0.685
391 | acc: 0.685
392 | acc: 0.685
393 | acc: 0.685
394 | acc: 0.685
395 | acc: 0.685
396 | acc: 0.685
397 | acc: 0.685
398 | acc: 0.685
399 | acc: 0.685
400 | acc: 0.684
401 | acc: 0.684
402 | swa: 0.685
403 |  
404 | 
405 | config: 
406 | before_acc: 0.685
407 | {'layer_name': 'layer3.0.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer3.0.relu2'], 'do': False, 'lh': 0, 'lt': 's'}
408 | acc: 0.685
409 | acc: 0.685
410 | acc: 0.684
411 | acc: 0.684
412 | acc: 0.684
413 | acc: 0.684
414 | acc: 0.684
415 | acc: 0.684
416 | acc: 0.685
417 | acc: 0.686
418 | acc: 0.686
419 | acc: 0.686
420 | swa: None
421 |  
422 | 
423 | config: 
424 | before_acc: 0.6829999999999999
425 | {'layer_name': 'layer3.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer3.1.relu1'], 'do': False, 'lh': 0, 'lt': 'n'}
426 |  
427 | 
428 | config: 
429 | before_acc: 0.6829999999999999
430 | {'layer_name': 'layer3.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer3.1.relu1'], 'do': False, 'lh': 0, 'lt': 'n'}
431 | acc: 0.683
432 | acc: 0.681
433 | acc: 0.68
434 | acc: 0.68
435 | acc: 0.68
436 | acc: 0.68
437 | acc: 0.681
438 | acc: 0.681
439 | acc: 0.681
440 | acc: 0.681
441 | acc: 0.681
442 | acc: 0.68
443 | acc: 0.68
444 | acc: 0.68
445 | acc: 0.679
446 | acc: 0.679
447 | acc: 0.679
448 | acc: 0.679
449 | acc: 0.679
450 | acc: 0.678
451 | swa: 0.6859999999999999
452 |  
453 | 
454 | config: 
455 | before_acc: 0.6859999999999999
456 | {'layer_name': 'layer3.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer3.1.relu1'], 'do': False, 'lh': 0, 'lt': 's'}
457 | acc: 0.686
458 | acc: 0.686
459 | acc: 0.686
460 | acc: 0.686
461 | acc: 0.686
462 | acc: 0.686
463 | acc: 0.687
464 | acc: 0.687
465 | acc: 0.688
466 | acc: 0.688
467 | acc: 0.687
468 | acc: 0.687
469 | acc: 0.687
470 | acc: 0.687
471 | acc: 0.686
472 | acc: 0.686
473 | acc: 0.685
474 | acc: 0.686
475 | acc: 0.684
476 | acc: 0.684
477 | swa: 0.688
478 |  
479 | 
480 | config: 
481 | before_acc: 0.688
482 | {'layer_name': 'layer3.1.relu1', 'ep': 20, 'lr': 1e-05, 'wd': 0.1, 'tw': False, 'twe': ['layer3.0.downsample.0', 'layer3.0.downsample.1', 'layer3.1.conv1', 'layer3.1.bn1'], 'tc': False, 'tce': [], 'do': False, 'lh': 0, 'lt': 's'}
483 | acc: 0.689
484 | acc: 0.689
485 | acc: 0.688
486 | acc: 0.687
487 | acc: 0.686
488 | acc: 0.687
489 | acc: 0.687
490 | acc: 0.686
491 | acc: 0.685
492 | acc: 0.686
493 | acc: 0.686
494 | acc: 0.685
495 | acc: 0.684
496 | acc: 0.683
497 | acc: 0.683
498 | acc: 0.682
499 | acc: 0.684
500 | acc: 0.683
501 | acc: 0.685
502 | acc: 0.685
503 | swa: 0.6890000000000001
504 |  
505 | 
506 | config: 
507 | before_acc: 0.6890000000000001
508 | {'layer_name': 'layer3.1.relu1', 'ep': 20, 'lr': 1e-05, 'wd': 0.1, 'tw': False, 'twe': ['layer3.0.downsample.0', 'layer3.0.downsample.1', 'layer3.1.conv1', 'layer3.1.bn1'], 'tc': False, 'tce': [], 'do': False, 'lh': 0, 'lt': 's'}
509 | acc: 0.688
510 | acc: 0.69
511 | acc: 0.689
512 | acc: 0.685
513 | acc: 0.684
514 | acc: 0.686
515 | acc: 0.688
516 | acc: 0.689
517 | acc: 0.689
518 | acc: 0.69
519 | swa: None
520 |  
521 | 
522 | config: 
523 | before_acc: 0.687
524 | {'layer_name': 'layer3.1.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer3.1.relu2'], 'do': False, 'lh': 0, 'lt': 'n'}
525 |  
526 | 
527 | config: 
528 | before_acc: 0.687
529 | {'layer_name': 'layer3.1.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer3.1.relu2'], 'do': False, 'lh': 0, 'lt': 'n'}
530 | acc: 0.687
531 | acc: 0.688
532 | swa: None
533 |  
534 | 
535 | config: 
536 | before_acc: 0.688
537 | {'layer_name': 'layer3.1.relu2', 'ep': 20, 'lr': 1e-05, 'wd': 0.1, 'tw': False, 'twe': ['layer3.1.conv2', 'layer3.1.bn2'], 'tc': False, 'tce': [], 'do': False, 'lh': 0, 'lt': 'b'}
538 | acc: 0.687
539 | acc: 0.685
540 | acc: 0.684
541 | acc: 0.683
542 | acc: 0.682
543 | acc: 0.68
544 | acc: 0.681
545 | acc: 0.681
546 | acc: 0.681
547 | acc: 0.681
548 | acc: 0.681
549 | acc: 0.68
550 | acc: 0.677
551 | acc: 0.678
552 | acc: 0.682
553 | acc: 0.681
554 | acc: 0.681
555 | acc: 0.681
556 | acc: 0.681
557 | acc: 0.68
558 | swa: 0.69
559 |  
560 | 
561 | config: 
562 | before_acc: 0.685
563 | {'layer_name': 'layer4.0.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer4.0.relu1'], 'do': False, 'lh': 0, 'lt': 'n'}
564 |  
565 | 
566 | config: 
567 | before_acc: 0.685
568 | {'layer_name': 'layer4.0.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer4.0.relu1'], 'do': False, 'lh': 0, 'lt': 'n'}
569 | acc: 0.686
570 | acc: 0.686
571 | acc: 0.686
572 | acc: 0.686
573 | acc: 0.686
574 | acc: 0.685
575 | acc: 0.685
576 | acc: 0.685
577 | acc: 0.685
578 | acc: 0.685
579 | acc: 0.685
580 | acc: 0.685
581 | acc: 0.686
582 | acc: 0.686
583 | acc: 0.686
584 | acc: 0.686
585 | acc: 0.685
586 | acc: 0.685
587 | acc: 0.685
588 | acc: 0.686
589 | swa: 0.691
590 |  
591 | 
592 | config: 
593 | before_acc: 0.691
594 | {'layer_name': 'layer4.0.relu1', 'ep': 20, 'lr': 1e-05, 'wd': 0.1, 'tw': False, 'twe': ['layer4.0.conv1', 'layer4.0.bn1'], 'tc': False, 'tce': [], 'do': False, 'lh': 0, 'lt': 's'}
595 | acc: 0.692
596 | swa: None
597 |  
598 | 
599 | config: 
600 | before_acc: 0.685
601 | {'layer_name': 'layer4.0.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer4.0.relu2'], 'do': False, 'lh': 0, 'lt': 'n'}
602 |  
603 | 
604 | config: 
605 | before_acc: 0.685
606 | {'layer_name': 'layer4.0.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer4.0.relu2'], 'do': False, 'lh': 0, 'lt': 'n'}
607 | acc: 0.686
608 | swa: None
609 |  
610 | 
611 | config: 
612 | before_acc: 0.6829999999999999
613 | {'layer_name': 'layer4.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer4.1.relu1'], 'do': False, 'lh': 0, 'lt': 'n'}
614 |  
615 | 
616 | config: 
617 | before_acc: 0.6829999999999999
618 | {'layer_name': 'layer4.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer4.1.relu1'], 'do': False, 'lh': 0, 'lt': 'n'}
619 | acc: 0.683
620 | acc: 0.683
621 | acc: 0.683
622 | acc: 0.683
623 | acc: 0.683
624 | acc: 0.683
625 | acc: 0.683
626 | acc: 0.683
627 | acc: 0.683
628 | acc: 0.684
629 | swa: None
630 |  
631 | 
632 | config: 
633 | before_acc: 0.684
634 | {'layer_name': 'layer4.1.relu1', 'ep': 20, 'lr': 1e-05, 'wd': 0.1, 'tw': False, 'twe': ['layer4.0.downsample.0', 'layer4.0.downsample.1', 'layer4.1.conv1', 'layer4.1.bn1'], 'tc': False, 'tce': [], 'do': False, 'lh': 0, 'lt': 'b'}
635 | acc: 0.686
636 | acc: 0.686
637 | acc: 0.688
638 | acc: 0.689
639 | acc: 0.69
640 | acc: 0.691
641 | acc: 0.693
642 | acc: 0.694
643 | acc: 0.695
644 | acc: 0.696
645 | acc: 0.695
646 | acc: 0.695
647 | acc: 0.695
648 | acc: 0.696
649 | acc: 0.696
650 | acc: 0.698
651 | swa: None
652 |  
653 | 
654 | config: 
655 | before_acc: 0.698
656 | {'layer_name': 'layer4.1.relu1', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer4.1.relu1'], 'do': False, 'lh': 0, 'lt': 'b'}
657 | acc: 0.698
658 | acc: 0.698
659 | acc: 0.698
660 | acc: 0.698
661 | acc: 0.698
662 | acc: 0.699
663 | swa: None
664 |  
665 | 
666 | config: 
667 | before_acc: 0.695
668 | {'layer_name': 'layer4.1.relu2', 'ep': 20, 'lr': 0.0001, 'wd': 0.01, 'tw': False, 'twe': [], 'tc': False, 'tce': ['layer4.1.relu2'], 'do': False, 'lh': 0, 'lt': 'n'}
669 |  
670 | 
671 | config: 
672 | before_acc: 0.695
673 | {'layer_name': 'layer4.1.relu2', 'ep': 20, 'lr': 1e-05, 'wd': 0.1, 'tw': False, 'twe': ['layer4.1.conv2', 'layer4.1.bn2'], 'tc': False, 'tce': [], 'do': False, 'lh': 0, 'lt': 'n'}
674 | acc: 0.697
675 | acc: 0.698
676 | acc: 0.698
677 | acc: 0.697
678 | acc: 0.698
679 | swa: None
680 | 


--------------------------------------------------------------------------------