├── README.md ├── code ├── compression │ ├── cifar10 │ │ ├── __pycache__ │ │ │ └── vgg_net_cifar10.cpython-38.pyc │ │ ├── cifar10_magnitude_pruning.py │ │ ├── performance_two.py │ │ ├── performance_two_origin.py │ │ ├── vgg_net_cifar10.py │ │ └── vgg_train.py │ ├── mnist │ │ ├── memory.py │ │ ├── mnist_magnitude_pruning.py │ │ ├── mnist_origin.py │ │ └── performance_match.py │ └── new_MNIST │ │ ├── GMM_data.py │ │ ├── matching.py │ │ ├── model.py │ │ ├── new_weight_act.py │ │ ├── performance_MNIST.py │ │ └── test ├── equation_solve │ ├── __pycache__ │ │ └── solve_equation.cpython-38.pyc │ └── solve_equation.py ├── expect_cal │ ├── __pycache__ │ │ ├── expect_calculate.cpython-38.pyc │ │ └── expect_calculate_math.cpython-38.pyc │ ├── expect_calculate.py │ └── expect_calculate_math.py ├── fig │ └── small │ │ ├── GMM-mixed_3200_784_none_rrr_[2000, 2000, 1000].png │ │ └── GMM-mixed_8000_4000_none_rrr_[2000, 2000, 1000].png ├── hist_return │ └── small │ │ ├── GMM-mixed_3200_784_none_rrr_[2000, 2000, 1000] │ │ └── GMM-mixed_8000_4000_none_rrr_[2000, 2000, 1000] ├── model_define │ ├── __pycache__ │ │ └── model.cpython-38.pyc │ └── model.py ├── model_new ├── model_origin ├── model_vgg ├── plot_return │ └── GMM-mixed_3200_784_none_rrr_[2000, 2000, 1000] ├── spectral_characteristics │ ├── __pycache__ │ │ └── plot_eigen.cpython-38.pyc │ ├── plot_eigen.py │ ├── tilde_CK.py │ └── tilde_ntk.py └── utils │ ├── __pycache__ │ ├── __init__.cpython-38.pyc │ ├── activation_numpy.cpython-38.pyc │ ├── activation_tensor.cpython-38.pyc │ ├── data_prepare.cpython-38.pyc │ ├── expect_calculate.cpython-38.pyc │ ├── model.cpython-38.pyc │ └── utils.cpython-38.pyc │ ├── activation_numpy.py │ ├── activation_tensor.py │ ├── data_prepare.py │ └── utils.py ├── requirements.txt └── simulations └── simulations.py /README.md: -------------------------------------------------------------------------------- 1 | # NTK-LC 2 | 3 | This repository contains code to reproduces the results in the paper "*'Lossless' Compression of Deep Neural Networks: A High-dimensional Neural Tangent Kernel Approach*" (**NTK-LC**) [^1]. 4 | 5 | ## About the code 6 | 7 | * `code/compression` contains 8 | * `code/compression/mnist` **(Experiment 2.1)** 9 | * **mnist_origin.py** for classification with original dense neural network on MNIST dataset. 10 | * **performance_match.py** for classification with compressed neural network utlizing the proposed NTK*LC approach on MNIST dataset. 11 | * **mnist_magnitude_pruning.py** for classification with compressed neural network by magnitude pruning. 12 | * `code/compression/cifar10` **(Experiment 2.2)** 13 | * **vgg_net_cifar10.py** for defining VGG19 suitable for CIFAR10 14 | * **vgg_train.py** for training VGG19 defined in vgg_net_cifar10.py on CIFAR10, and get parameters of convolution layers for feature extraction. 15 | * **performance_origin.py** for classification performance of the original dense neural network on CIFAR10. 16 | * **performance_two.py** for classification performance of the compressed neural network (compressed by our NTK-LC algorithm) on CIFAR10. 17 | * **cifar10_magnitude_pruning.py** for classification with compressed neural network by magnitude pruning. 18 | 19 | * `code/spectral_characteristics` **(Experiment 1)** contains 20 | * **tilde_CK.py** for verifying the consistency of spectrum distribution for **theoretical** (calculated with our theorem results) and **practical** (calculated by the original definition) conjugate kernel(CK). 21 | * **plot_eigen.py** for ploting eigenvalues and eigenvectors for a given matrix. 22 | 23 | * `code/equation_solve` contains 24 | * **solve_equation.py** for solving equatiosn to define parameters of activation functions 25 | 26 | * `code/expect_cal` contains 27 | * **expect_calculate.py** for expect calculated by numerical integration 28 | * **expect_calculate_math.py** for expect calculated with analytical expression 29 | 30 | * `code/model_define` contains 31 | * **model.py** for model defining 32 | 33 | * `code/utils` contains 34 | * **activation_numpy.py** for activations defined with numpy 35 | * **activation_tensor.py** for activations defined with torch 36 | * **data_prepare.py** for data preparation, containing data sampled from MNIST/CIFAR10 and generated GMM data 37 | * **utils.py** for some more utils 38 | 39 | ## Dependencies 40 | 41 | You can run following bash command to install packages used in this repository 42 | ```bash 43 | pip install requirments.txt 44 | ``` 45 | 46 | or you can install follwing basic packages yourself: 47 | 48 | * [Python](https://www.python.org/): tested with version 3.8.13 49 | * [Numpy](http://www.numpy.org/) and [Scipy](https://www.scipy.org/) 50 | * [Matplotlib](http://matplotlib.org/) for visulazation 51 | * [Pytorch](https://pytorch.org/): tested with version 1.12.0 52 | * [Pandas](https://pandas.pydata.org/) for data record 53 | 54 | 55 | ## Contact information 56 | * Zhenyu LIAO 57 | * Assistant Professor at EIC, Huazhong University of Science and Tech 58 | * Website: [https://zhenyu-liao.github.io/](https://zhenyu-liao.github.io/) 59 | * E-mail: [zhenyu_liao@hust.edu.cn](mailto:zhenyu_liao@hust.edu.cn) 60 | 61 | * Linyu Gu 62 | * Master at EIC, Huazhong University of Science and Tech 63 | * E-mail: [gulingyu@hust.edu.cn](mailto:m202172384@hust.edu.cn) 64 | 65 | * Yongqi Du 66 | * Master at EIC, Huazhong University of Science and Tech 67 | * E-mail: [yongqi_du@hust.edu.cn](mailto:yongqi_du@hust.edu.cn) 68 | 69 | 70 | 71 | ## References 72 | 73 | [^1]: Gu L, Du Y, Zhang Y, et al. Lossless Compression of Deep Neural Networks: A High-dimensional Neural Tangent Kernel Approach[J].[link](https://zhenyu-liao.github.io/pdf/conf/RMT4DeepCompress_nips22.pdf) 74 | 75 | [^2]: Ali H T, Liao Z, Couillet R. Random matrices in service of ML footprint: ternary random features with no performance loss[J]. arXiv preprint arXiv:2110.01899, 2021.[link](https://arxiv.org/abs/2110.01899) 76 | -------------------------------------------------------------------------------- /code/compression/cifar10/__pycache__/vgg_net_cifar10.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Model-Compression/Lossless_Compression/fc69e9376085f9dc528662c2d83e9d65748db4ca/code/compression/cifar10/__pycache__/vgg_net_cifar10.cpython-38.pyc -------------------------------------------------------------------------------- /code/compression/cifar10/cifar10_magnitude_pruning.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Using custome network to classify data sampled from CIFAR10 data. 4 | 5 | Network can be customed, see class My_Model for more details about how to custome model, 6 | which will be added with a classification layer. Note that VGG net will first be 7 | trained with all CIFAR10 dataset for a better classification. 8 | """ 9 | 10 | __author__ = "Model_compression" 11 | __copyright__ = "Copyright 2021, Lossless compression" 12 | __license__ = "GPL" 13 | __version__ = "1.0.1" 14 | __email__ = "yongqi_du@hust.edu.cn" 15 | __status__ = "Production" 16 | 17 | import sys 18 | import os 19 | 20 | sys.path.append( 21 | os.path.dirname( 22 | os.path.dirname(os.path.dirname(os.path.realpath(__file__))))) 23 | import math 24 | from collections import Counter, OrderedDict 25 | 26 | import numpy as np 27 | import torch 28 | import torch.backends.cudnn as cudnn 29 | import torch.nn as nn 30 | import torchvision 31 | import torchvision.transforms as transforms 32 | from torch.utils.data import DataLoader, Dataset 33 | from model_define.model import My_Model 34 | import torch.nn.utils.prune as prune 35 | from vgg_net_cifar10 import VGG 36 | 37 | device = "cuda:1" if torch.cuda.is_available() else "cpu" 38 | 39 | current_path = os.path.dirname(__file__) 40 | class Feature_Dataset(Dataset): 41 | 42 | def __init__(self, X, Y) -> None: 43 | """Packed Features extracted from VGG19 feature layer which will 44 | be further concatenated with random feature layers and classification layers. 45 | 46 | Arguments: 47 | X -- Features of data 48 | Y -- Labels of data 49 | """ 50 | super().__init__() 51 | self.X, self.Y = X, Y 52 | 53 | def __getitem__(self, idx): 54 | return self.X[idx, :], self.Y[idx] 55 | 56 | def __len__(self): 57 | return self.X.shape[0] 58 | 59 | 60 | if __name__ == '__main__': 61 | # gpu_usage() 62 | # ------------------------------Data Preparing------------------------------------ 63 | model_vgg = VGG('VGG19') 64 | model_vgg.load_state_dict(torch.load('./model_vgg')) 65 | 66 | transform_train = transforms.Compose([ 67 | transforms.RandomCrop(32, padding=4), 68 | transforms.RandomHorizontalFlip(), 69 | transforms.ToTensor(), 70 | transforms.Normalize((0.4914, 0.4822, 0.4465), 71 | (0.2023, 0.1994, 0.2010)), 72 | ]) 73 | 74 | transform_test = transforms.Compose([ 75 | transforms.ToTensor(), 76 | transforms.Normalize((0.4914, 0.4822, 0.4465), 77 | (0.2023, 0.1994, 0.2010)), 78 | ]) 79 | 80 | train_data = torchvision.datasets.CIFAR10(root='./data', 81 | train=True, 82 | download=True, 83 | transform=transform_train) 84 | trainloader = torch.utils.data.DataLoader(train_data, 85 | batch_size=50000, 86 | shuffle=False) 87 | 88 | test_data = torchvision.datasets.CIFAR10(root='./data', 89 | train=False, 90 | download=True, 91 | transform=transform_test) 92 | testloader = torch.utils.data.DataLoader(test_data, 93 | batch_size=10000, 94 | shuffle=False) 95 | 96 | classes = ('Airplane', 'Car', 'Bird', 'Cat', 'Deer', 'Dog', 'Frog', 97 | 'Horse', 'Ship', 'Truck') 98 | 99 | # ------------------------------Feature Extration---------------------------------- 100 | train_data, train_label = next(iter(trainloader)) 101 | test_data, test_label = next(iter(testloader)) 102 | with torch.no_grad(): 103 | feature_train = model_vgg.features(train_data) 104 | feature_train = feature_train.view(feature_train.shape[0], -1) 105 | feature_test = model_vgg.features(test_data) 106 | feature_test = feature_test.view(feature_test.shape[0], -1) 107 | p = feature_train.shape[1] 108 | N = feature_train.shape[0] 109 | mean_selected_data = torch.mean(feature_train, dim=0) 110 | norm2_selected_data = torch.sum( 111 | (feature_train - mean_selected_data)**2, (0, 1)) / N 112 | feature_train = feature_train - mean_selected_data 113 | feature_train = feature_train / np.sqrt(norm2_selected_data) 114 | 115 | p = feature_test.shape[1] 116 | N = feature_test.shape[0] 117 | mean_selected_data = torch.mean(feature_test, dim=0) 118 | norm2_selected_data = torch.sum((feature_test - mean_selected_data)**2, 119 | (0, 1)) / N 120 | feature_test = feature_test - mean_selected_data 121 | feature_test = feature_test / np.sqrt(norm2_selected_data) 122 | 123 | # dataset for future training testing 124 | feature_train_dataset = Feature_Dataset(feature_train, train_label) 125 | feature_test_dataset = Feature_Dataset(feature_test, test_label) 126 | 127 | tau_zero = torch.sqrt( 128 | torch.mean(torch.diag(torch.mm(feature_train, 129 | feature_train.t())))).detach().numpy() 130 | print(tau_zero) 131 | # -------------------------------- Network Setting--------------------------------- 132 | # origin network setting 133 | layer_num = 3 # layer number for network 134 | input_num = 512 # input dimension for network 784/256 135 | weight_num_list = [400, 400, 200] # number for neurons for each layer 136 | # weight_num_list = [1000, 1000, 500] 137 | # weight_num_list = [3000, 3000, 1000] 138 | # weight_num_list = [5000, 5000, 2500] 139 | # weight_num_list = [10000, 10000, 5000] 140 | activation_list = [ 141 | { 142 | 'name': 'ReLU', 143 | 'args': None 144 | }, 145 | # {'name' : 'Binary_Zero', 'args' : {'s1':1, 's2': 2, 'b1': 1}}, 146 | # {'name' : 'ReLU', 'args' : None}, 147 | { 148 | 'name': 'ReLU', 149 | 'args': None 150 | }, 151 | { 152 | 'name': 'ReLU', 153 | 'args': None 154 | } 155 | ] 156 | # {'name' : 'ReLU', 'args' : None}] # activation for each layer, if with param, write as Binary_Zero here 157 | 158 | # define origin model 159 | model = My_Model(layer_num=layer_num, 160 | input_num=input_num, 161 | weight_num_list=weight_num_list, 162 | activation_list=activation_list, 163 | tau_zero=tau_zero) 164 | 165 | model_origin = nn.Sequential( 166 | OrderedDict([ 167 | ('feature', model), 168 | ('classification', 169 | nn.Linear(model.weight_num_list[-1], 10, bias=False)), 170 | ('activation', nn.Softmax()), 171 | ])) 172 | 173 | # --------------------------------Model Initilization------------------------------- 174 | 175 | # model initialization 176 | initialization_way = 'ternary' # select from ['normal', 'random_sparsity', 'ternary'] 177 | kesi = 0.9 # change from 0 to 1, only used for ['random sparsity', 'ternary'] 178 | threshould = 0.9 179 | 180 | if initialization_way == 'normal': 181 | # normal initialization 182 | for fc in model_origin.feature.fc_layers: 183 | nn.init.normal_(fc.weight) 184 | fc.weight.requires_grad = False 185 | elif initialization_way == 'random_sparsity': 186 | # random sparse gaussian weight(break assumption1) 187 | for fc in model_origin.feature.fc_layers: 188 | mask = np.zeros(fc.weight.shape).flatten() 189 | mask[:round((1 - kesi) * mask.size)] = 1 190 | np.random.shuffle(mask) 191 | mask = torch.tensor(mask.reshape(fc.weight.shape)).float() 192 | nn.init.normal_(fc.weight) 193 | with torch.no_grad(): 194 | fc.weight = torch.nn.Parameter(mask * fc.weight.data, 195 | requires_grad=False) 196 | elif initialization_way == 'ternary': 197 | # tarnary weight with sparsity kesi 198 | for fc in model_origin.feature.fc_layers: 199 | init = np.zeros(fc.weight.shape).flatten() 200 | init[:round(1 / 2 * (1 - kesi) * 201 | init.size)] = 1 / np.sqrt(1 - kesi) 202 | init[round(1 / 2 * (1 - kesi) * init.size):2 * 203 | round(1 / 2 * 204 | (1 - kesi) * init.size)] = -1 / np.sqrt(1 - kesi) 205 | # c = Counter(init) 206 | np.random.shuffle(init) 207 | init = torch.tensor(init.reshape(fc.weight.shape)).float() 208 | with torch.no_grad(): 209 | fc.weight = torch.nn.Parameter(init, requires_grad=False) 210 | elif initialization_way == 'pruning': 211 | for fc in model_origin.feature.fc_layers: 212 | nn.init.normal_(fc.weight) 213 | fc.weight.requires_grad = False 214 | 215 | # pruning magnitude based 216 | # threshould = 0.95 217 | for fc in model_origin.feature.fc_layers: 218 | prune.l1_unstructured(fc, 'weight', amount=threshould) 219 | fc.weight.requires_grad = False 220 | # ------------------------------------Preparing------------------------------------- 221 | net = model_origin 222 | batch_size = 128 223 | lr = 0.01 224 | config = {"save_path": "./model_origin", "early_stop": 20, 'n_epochs': 200} 225 | # define data 226 | feature_train_dataloader = DataLoader(feature_train_dataset, 227 | batch_size=batch_size, 228 | shuffle=False) 229 | feature_test_dataloader = DataLoader(feature_test_dataset, 230 | batch_size=batch_size, 231 | shuffle=False) 232 | 233 | net = net.to(device) 234 | if device == 'cuda:1': 235 | # net = torch.nn.DataParallel(net, device_ids=device_id) 236 | cudnn.benchmark = True 237 | 238 | epochs, best_loss, step, early_stop_count = config[ 239 | 'n_epochs'], math.inf, 0, 0 240 | optimizer = torch.optim.Adam(net.parameters(), lr=lr) 241 | # optimizer = torch.optim.SGD(net.parameters(), 242 | # lr=lr, 243 | # momentum=0.9, 244 | # weight_decay=5e-4) 245 | criterion = nn.CrossEntropyLoss() 246 | 247 | # ------------------------------Training and Validation----------------------------- 248 | for epoch in range(epochs): 249 | net.train() 250 | loss_record = [] 251 | accuracy_record = [] 252 | for train_data, train_label in feature_train_dataloader: 253 | optimizer.zero_grad() 254 | # train_data , train_label = train_data.to(device), train_label.to(device) 255 | # X, y = train_data.to(device), train_label.to(device) 256 | train_data, train_label = train_data.to(device), train_label.to( 257 | device) 258 | pred = net(train_data) 259 | # print(pred.device) 260 | # print(train_label.device) 261 | loss = criterion(pred, train_label) 262 | loss.backward() 263 | optimizer.step() 264 | loss_record.append(loss.item()) 265 | # accuracy 266 | _, index = pred.data.cpu().topk(1, dim=1) 267 | index_label = train_label.data.cpu() 268 | accuracy_batch = np.sum( 269 | (index.squeeze(dim=1) == index_label).numpy()) 270 | accuracy_batch = accuracy_batch / len(train_label) 271 | accuracy_record.append(accuracy_batch) 272 | train_loss = sum(loss_record) / len(loss_record) 273 | train_accuracy = sum(accuracy_record) / len(accuracy_record) 274 | 275 | # validation 276 | net.eval() 277 | loss_record = [] 278 | accuracy_record = [] 279 | for val_data, val_label in feature_test_dataloader: 280 | # X, y = val_data.to(device), val_label.to(device) 281 | val_data, val_label = val_data.to(device), val_label.to(device) 282 | with torch.no_grad(): 283 | pred = net(val_data) 284 | loss = criterion(pred, val_label) 285 | loss_record.append(loss.item()) 286 | # accuracy 287 | _, index = pred.data.cpu().topk(1, dim=1) 288 | index_label = val_label.data.cpu() 289 | accuracy_batch = np.sum( 290 | (index.squeeze(dim=1) == index_label).numpy()) 291 | accuracy_batch = accuracy_batch / len(val_label) 292 | accuracy_record.append(accuracy_batch) 293 | val_loss = sum(loss_record) / len(testloader) 294 | val_accuracy = sum(accuracy_record) / len(accuracy_record) 295 | 296 | print( 297 | f'Epoch [{epoch+1}/{epochs}]: Train loss: {train_loss:.4f},Train accuracy: {train_accuracy:.4f}, Valid loss: {val_loss:.4f}, Valid accuracy: {val_accuracy:.4f}' 298 | ) 299 | 300 | if val_loss < best_loss: 301 | best_loss = val_loss 302 | torch.save(net.state_dict(), 303 | config['save_path']) # Save your best model 304 | print('Saving model with loss {:.3f}...'.format(best_loss)) 305 | early_stop_count = 0 306 | else: 307 | early_stop_count += 1 308 | 309 | if early_stop_count >= config['early_stop']: 310 | print('\nModel is not improving, so we halt the training session.') 311 | break 312 | 'here just a test for all binary activation' 313 | 314 | # origin performance 315 | model_origin_final_accuracy, model_origin_final_loss = val_accuracy, val_loss 316 | 317 | print( 318 | f'origin model: Valid loss: {model_origin_final_loss:.4f}, Valid accuracy: {model_origin_final_accuracy:.4f}' 319 | ) 320 | -------------------------------------------------------------------------------- /code/compression/cifar10/performance_two.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Compress model with our ternary compressed algorithm and 4 | using compressed network to classify data sampled from CIFAR10 data. 5 | 6 | Network can be customed, see class My_Model for more details about how to custome model, 7 | which will be added with a classification layer. Note that VGG net will first be 8 | trained with all CIFAR10 dataset for a better classification. 9 | """ 10 | 11 | __author__ = "Model_compression" 12 | __copyright__ = "Copyright 2021, Lossless compression" 13 | __license__ = "GPL" 14 | __version__ = "1.0.1" 15 | __email__ = "yongqi_du@hust.edu.cn" 16 | __status__ = "Production" 17 | import sys 18 | import os 19 | 20 | sys.path.append( 21 | os.path.dirname( 22 | os.path.dirname(os.path.dirname(os.path.realpath(__file__))))) 23 | import math 24 | from collections import Counter, OrderedDict 25 | 26 | import numpy as np 27 | import torch 28 | import torch.backends.cudnn as cudnn 29 | import torch.nn as nn 30 | import torchvision 31 | import torchvision.transforms as transforms 32 | from torch.utils.data import DataLoader, Dataset 33 | from model_define.model import My_Model 34 | from equation_solve.solve_equation import solve_equation 35 | 36 | from vgg_net_cifar10 import VGG 37 | 38 | device = "cuda:1" if torch.cuda.is_available() else "cpu" 39 | 40 | 41 | class Feature_Dataset(Dataset): 42 | 43 | def __init__(self, X, Y) -> None: 44 | super().__init__() 45 | self.X, self.Y = X, Y 46 | 47 | def __getitem__(self, idx): 48 | return self.X[idx, :], self.Y[idx] 49 | 50 | def __len__(self): 51 | return self.X.shape[0] 52 | 53 | 54 | class sample_ternary_weight(object): 55 | 56 | def __init__(self, kesi) -> None: 57 | self.kesi = kesi 58 | 59 | def sample(self): 60 | init = torch.distributions.uniform.Uniform(0, 61 | 1).sample(torch.Size([1])) 62 | if init < self.kesi: 63 | init = 0 64 | else: 65 | mask = torch.distributions.bernoulli.Bernoulli(0.5).sample( 66 | torch.Size([1])) 67 | if mask == 0: 68 | init = 1 / np.sqrt(1 - self.kesi) 69 | else: 70 | init = -1 / np.sqrt(1 - self.kesi) 71 | return init 72 | 73 | def sample_n(self, number): 74 | init = [self.sample() for _ in range(number)] 75 | init = np.array(init) 76 | return init 77 | 78 | 79 | if __name__ == '__main__': 80 | # data 81 | # gpu_usage() 82 | # ------------------------------Data Preparing------------------------------------ 83 | model_vgg = VGG('VGG19') 84 | model_vgg.load_state_dict(torch.load('./model_vgg')) 85 | # load data 86 | transform_train = transforms.Compose([ 87 | transforms.RandomCrop(32, padding=4), 88 | transforms.RandomHorizontalFlip(), 89 | transforms.ToTensor(), 90 | transforms.Normalize((0.4914, 0.4822, 0.4465), 91 | (0.2023, 0.1994, 0.2010)), 92 | ]) 93 | 94 | transform_test = transforms.Compose([ 95 | transforms.ToTensor(), 96 | transforms.Normalize((0.4914, 0.4822, 0.4465), 97 | (0.2023, 0.1994, 0.2010)), 98 | ]) 99 | 100 | train_data = torchvision.datasets.CIFAR10(root='./data', 101 | train=True, 102 | download=True, 103 | transform=transform_train) 104 | trainloader = torch.utils.data.DataLoader(train_data, 105 | batch_size=50000, 106 | shuffle=False) 107 | 108 | test_data = torchvision.datasets.CIFAR10(root='./data', 109 | train=False, 110 | download=True, 111 | transform=transform_test) 112 | testloader = torch.utils.data.DataLoader(test_data, 113 | batch_size=10000, 114 | shuffle=False) 115 | 116 | classes = ('Airplane', 'Car', 'Bird', 'Cat', 'Deer', 'Dog', 'Frog', 117 | 'Horse', 'Ship', 'Truck') 118 | 119 | # ------------------------------Feature Extration---------------------------------- 120 | # output of feature 121 | train_data, train_label = next(iter(trainloader)) 122 | test_data, test_label = next(iter(testloader)) 123 | with torch.no_grad(): 124 | feature_train = model_vgg.features(train_data) 125 | feature_train = feature_train.view(feature_train.shape[0], -1) 126 | feature_test = model_vgg.features(test_data) 127 | feature_test = feature_test.view(feature_test.shape[0], -1) 128 | p = feature_train.shape[1] 129 | N = feature_train.shape[0] 130 | mean_selected_data = torch.mean(feature_train, dim=0) 131 | norm2_selected_data = torch.sum( 132 | (feature_train - mean_selected_data)**2, (0, 1)) / N 133 | feature_train = feature_train - mean_selected_data 134 | feature_train = feature_train / np.sqrt(norm2_selected_data) 135 | 136 | p = feature_test.shape[1] 137 | N = feature_test.shape[0] 138 | mean_selected_data = torch.mean(feature_test, dim=0) 139 | norm2_selected_data = torch.sum((feature_test - mean_selected_data)**2, 140 | (0, 1)) / N 141 | feature_test = feature_test - mean_selected_data 142 | feature_test = feature_test / np.sqrt(norm2_selected_data) 143 | 144 | # dataset for future training testing 145 | feature_train_dataset = Feature_Dataset(feature_train, train_label) 146 | feature_test_dataset = Feature_Dataset(feature_test, test_label) 147 | 148 | tau_zero = torch.sqrt( 149 | torch.mean(torch.diag(torch.mm(feature_train, 150 | feature_train.t())))).detach().numpy() 151 | 152 | # tau_zero = 1 153 | print(tau_zero) 154 | 155 | # -------------------------------- Network Setting--------------------------------- 156 | # origin network setting 157 | layer_num = 3 # layer number for network 158 | input_num = 512 # input dimension for network 784/256 159 | weight_num_list = [400, 400, 200] # number for neurons for each layer 160 | activation_list = [ 161 | { 162 | 'name': 'ReLU', 163 | 'args': None 164 | }, 165 | # {'name' : 'Binary_Zero', 'args' : {'s1':1, 's2': 2, 'b1': 1}}, 166 | { 167 | 'name': 'ReLU', 168 | 'args': None 169 | }, 170 | # {'name' : 'Sign', 'args' : None}, 171 | # {'name' : 'Sign', 'args' : None}] 172 | { 173 | 'name': 'ReLU', 174 | 'args': None 175 | } 176 | ] # activation for each layer, if with param, write as Binary_Zero here 177 | 178 | # define origin model 179 | model = My_Model(layer_num=layer_num, 180 | input_num=input_num, 181 | weight_num_list=weight_num_list, 182 | activation_list=activation_list, 183 | tau_zero=tau_zero) 184 | res = solve_equation(model, tau_zero, loop=10) 185 | activation_list = res 186 | # define compressed model!!!!!!!!!!! 187 | new_model = My_Model(layer_num=layer_num, 188 | input_num=input_num, 189 | weight_num_list=weight_num_list, 190 | activation_list=activation_list, 191 | tau_zero=tau_zero) 192 | model_new = nn.Sequential( 193 | OrderedDict([ 194 | ('feature', new_model), 195 | ('classification', 196 | nn.Linear(model.weight_num_list[-1], 10, bias=False)), 197 | ('activation', nn.Softmax()), 198 | ])) 199 | 200 | # --------------------------------Model Initilization------------------------------- 201 | # model initialization 202 | initialization_way = 'ternary' # select from ['normal', 'random_sparsity', 'ternary'] 203 | kesi = 0.95 # change from 0 to 1, only used for ['random sparsity', 'ternary'] 204 | 205 | if initialization_way == 'normal': 206 | # normal initialization 207 | for fc in model_new.feature.fc_layers: 208 | nn.init.normal_(fc.weight) 209 | fc.weight.requires_grad = False 210 | elif initialization_way == 'random_sparsity': 211 | # random sparse gaussian weight(break assumption1) 212 | for fc in model_new.feature.fc_layers: 213 | mask = np.zeros(fc.weight.shape).flatten() 214 | mask[:round((1 - kesi) * mask.size)] = 1 215 | np.random.shuffle(mask) 216 | mask = torch.tensor(mask.reshape(fc.weight.shape)).float() 217 | nn.init.normal_(fc.weight) 218 | with torch.no_grad(): 219 | fc.weight = torch.nn.Parameter(mask * fc.weight.data, 220 | requires_grad=False) 221 | elif initialization_way == 'ternary': 222 | # tarnary weight with sparsity kesi 223 | for fc in model_new.feature.fc_layers: 224 | init = np.zeros(fc.weight.shape).flatten() 225 | init[:round(1 / 2 * (1 - kesi) * 226 | init.size)] = 1 / np.sqrt(1 - kesi) 227 | init[round(1 / 2 * (1 - kesi) * init.size):2 * 228 | round(1 / 2 * 229 | (1 - kesi) * init.size)] = -1 / np.sqrt(1 - kesi) 230 | # c = Counter(init) 231 | np.random.shuffle(init) 232 | init = torch.tensor(init.reshape(fc.weight.shape)).float() 233 | with torch.no_grad(): 234 | fc.weight = torch.nn.Parameter(init, requires_grad=False) 235 | 236 | # --------------------------------Preparing------------------------------------------- 237 | batch_size = 128 238 | # define data 239 | feature_train_dataloader = DataLoader(feature_train_dataset, 240 | batch_size=batch_size, 241 | shuffle=False, 242 | num_workers=10, 243 | pin_memory=True) 244 | feature_test_dataloader = DataLoader(feature_test_dataset, 245 | batch_size=batch_size, 246 | shuffle=False, 247 | num_workers=10, 248 | pin_memory=True) 249 | 250 | # you can load model origin as the initialization values for model new(the classification layer) 251 | # path = "./model_origin[5000, 5000, 2500]" 252 | # # '''load origin parameters!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!''' 253 | # state_dict= torch.load(path) 254 | # model_new.classification.weight = torch.nn.Parameter(state_dict['classification.weight'].detach().cpu()) 255 | net = model_new 256 | criterion = nn.CrossEntropyLoss() 257 | net = net.to(device) 258 | net.eval() 259 | loss_record = [] 260 | accuracy_record = [] 261 | # ---------------------------Origin Loading Validation(optional)----------------------------- 262 | # Validation the initial accuracy for model(after loading parameters from origin model) 263 | # validation 264 | for val_data, val_label in feature_test_dataloader: 265 | val_data, val_label = val_data.to(device), val_label.to(device) 266 | with torch.no_grad(): 267 | pred = net(val_data) 268 | loss = criterion(pred, val_label) 269 | loss_record.append(loss.item()) 270 | # accuracy 271 | _, index = pred.data.cpu().topk(1) 272 | index_label = val_label.data.cpu() 273 | accuracy_batch = np.sum((index.squeeze(dim=1) == index_label).numpy()) 274 | accuracy_batch = accuracy_batch / len(val_label) 275 | accuracy_record.append(accuracy_batch) 276 | val_loss = sum(loss_record) / len(feature_test_dataloader) 277 | val_accuracy = sum(accuracy_record) / len(accuracy_record) 278 | 279 | model_new_noretrain_accuracy, model_new_noretrain_loss = val_accuracy, val_loss 280 | print( 281 | f'compressed model(without retrain): Valid loss: {model_new_noretrain_loss:.4f}, Valid accuracy: {model_new_noretrain_accuracy:.4f}' 282 | ) 283 | 284 | # --------------------------------Preparing------------------------------------------ 285 | net = model_new 286 | net = net.to(device) 287 | if device == 'cuda:1': 288 | cudnn.benchmark = True 289 | batch_size = 128 290 | lr = 0.01 291 | config = {"save_path": "./model_origin", "early_stop": 20, 'n_epochs': 500} 292 | early_stop_count = 0 293 | epochs, best_loss, step, early_stop_count = config[ 294 | 'n_epochs'], math.inf, 0, 0 295 | optimizer = torch.optim.SGD(net.parameters(), 296 | lr=lr, 297 | momentum=0.9, 298 | weight_decay=5e-4) 299 | criterion = nn.CrossEntropyLoss() 300 | 301 | # ------------------------------Training and Validation------------------------------- 302 | for epoch in range(epochs): 303 | # Training 304 | net.train() 305 | loss_record = [] 306 | accuracy_record = [] 307 | for train_data, train_label in feature_train_dataloader: 308 | optimizer.zero_grad() 309 | train_data, train_label = train_data.to(device), train_label.to( 310 | device) 311 | pred = net(train_data) 312 | loss = criterion(pred, train_label) 313 | loss.backward() 314 | optimizer.step() 315 | loss_record.append(loss.item()) 316 | # accuracy 317 | _, index = pred.data.cpu().topk(1, dim=1) 318 | index_label = train_label.data.cpu() 319 | accuracy_batch = np.sum( 320 | (index.squeeze(dim=1) == index_label).numpy()) 321 | accuracy_batch = accuracy_batch / len(train_label) 322 | accuracy_record.append(accuracy_batch) 323 | train_loss = sum(loss_record) / len(loss_record) 324 | train_accuracy = sum(accuracy_record) / len(accuracy_record) 325 | # validation 326 | net.eval() 327 | loss_record = [] 328 | accuracy_record = [] 329 | for val_data, val_label in feature_test_dataloader: 330 | val_data, val_label = val_data.to(device), val_label.to(device) 331 | with torch.no_grad(): 332 | pred = net(val_data) 333 | loss = criterion(pred, val_label) 334 | loss_record.append(loss.item()) 335 | # accuracy 336 | _, index = pred.data.cpu().topk(1, dim=1) 337 | index_label = val_label.data.cpu() 338 | accuracy_batch = np.sum( 339 | (index.squeeze(dim=1) == index_label).numpy()) 340 | accuracy_batch = accuracy_batch / len(val_label) 341 | accuracy_record.append(accuracy_batch) 342 | val_loss = sum(loss_record) / len(loss_record) 343 | val_accuracy = sum(accuracy_record) / len(accuracy_record) 344 | 345 | print( 346 | f'Epoch [{epoch+1}/{epochs}]: Train loss: {train_loss:.4f},Train accuracy: {train_accuracy:.4f}, Valid loss: {val_loss:.4f}, Valid accuracy: {val_accuracy:.4f}' 347 | ) 348 | 349 | if val_loss < best_loss: 350 | best_loss = val_loss 351 | torch.save(net.state_dict(), "./model_new") # Save your best model 352 | print('Saving model with loss {:.3f}...'.format(best_loss)) 353 | early_stop_count = 0 354 | else: 355 | early_stop_count += 1 356 | 357 | if early_stop_count >= config['early_stop']: 358 | print('\nModel is not improving, so we halt the training session.') 359 | break 360 | 361 | # --------------------------------Calculate Memories--------------------------------------- 362 | # calculate origin model's memory 363 | me_org = (input_num * weight_num_list[0] + weight_num_list[0] * 364 | weight_num_list[1] + weight_num_list[1] * weight_num_list[2] + 365 | 2 * weight_num_list[2]) * 32 + (sum(weight_num_list)) * 32 366 | print('MEM_origin = ', me_org) 367 | 368 | # calculate new model's memory 369 | me_new = (input_num * weight_num_list[0] + weight_num_list[0] * 370 | weight_num_list[1] + weight_num_list[1] * weight_num_list[2] + 371 | 2 * weight_num_list[2]) * (1 - kesi) + (sum(weight_num_list)) 372 | print('MEM_new = ', me_new) 373 | 374 | print(str(weight_num_list)) 375 | print(kesi) 376 | print( 377 | f'compressed model(without retrain): Valid loss: {model_new_noretrain_loss:.4f}, Valid accuracy: {model_new_noretrain_accuracy:.4f}' 378 | ) 379 | print( 380 | f'compressed model(with retrain): Valid loss: {val_loss:.4f}, Valid accuracy: {val_accuracy:.4f}' 381 | ) 382 | 383 | # try: 384 | # # print(path) 385 | # except: 386 | # pass 387 | # print(f'origin model: Valid loss: {model_origin_final_loss:.4f}, Valid accuracy: {model_origin_final_accuracy:.4f}') 388 | -------------------------------------------------------------------------------- /code/compression/cifar10/performance_two_origin.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Using custome network to classify data sampled from CIFAR10 data. 4 | 5 | Network can be customed, see class My_Model for more details about how to custome model, 6 | which will be added with a classification layer. Note that VGG net will first be 7 | trained with all CIFAR10 dataset for a better classification. 8 | """ 9 | 10 | __author__ = "Model_compression" 11 | __copyright__ = "Copyright 2021, Lossless compression" 12 | __license__ = "GPL" 13 | __version__ = "1.0.1" 14 | __email__ = "yongqi_du@hust.edu.cn" 15 | __status__ = "Production" 16 | 17 | import sys 18 | import os 19 | 20 | sys.path.append( 21 | os.path.dirname( 22 | os.path.dirname(os.path.dirname(os.path.realpath(__file__))))) 23 | import math 24 | from collections import Counter, OrderedDict 25 | 26 | import numpy as np 27 | import torch 28 | import torch.backends.cudnn as cudnn 29 | import torch.nn as nn 30 | import torchvision 31 | import torchvision.transforms as transforms 32 | from torch.utils.data import DataLoader, Dataset 33 | from model_define.model import My_Model 34 | 35 | from vgg_net_cifar10 import VGG 36 | 37 | device = "cuda:1" if torch.cuda.is_available() else "cpu" 38 | 39 | 40 | class Feature_Dataset(Dataset): 41 | 42 | def __init__(self, X, Y) -> None: 43 | """Packed Features extracted from VGG19 feature layer which will 44 | be further concatenated with random feature layers and classification layers. 45 | 46 | Arguments: 47 | X -- Features of data 48 | Y -- Labels of data 49 | """ 50 | super().__init__() 51 | self.X, self.Y = X, Y 52 | 53 | def __getitem__(self, idx): 54 | return self.X[idx, :], self.Y[idx] 55 | 56 | def __len__(self): 57 | return self.X.shape[0] 58 | 59 | 60 | if __name__ == '__main__': 61 | # gpu_usage() 62 | # ------------------------------Data Preparing------------------------------------ 63 | model_vgg = VGG('VGG19') 64 | model_vgg.load_state_dict(torch.load('./model_vgg')) 65 | 66 | transform_train = transforms.Compose([ 67 | transforms.RandomCrop(32, padding=4), 68 | transforms.RandomHorizontalFlip(), 69 | transforms.ToTensor(), 70 | transforms.Normalize((0.4914, 0.4822, 0.4465), 71 | (0.2023, 0.1994, 0.2010)), 72 | ]) 73 | 74 | transform_test = transforms.Compose([ 75 | transforms.ToTensor(), 76 | transforms.Normalize((0.4914, 0.4822, 0.4465), 77 | (0.2023, 0.1994, 0.2010)), 78 | ]) 79 | 80 | train_data = torchvision.datasets.CIFAR10(root='./data', 81 | train=True, 82 | download=True, 83 | transform=transform_train) 84 | trainloader = torch.utils.data.DataLoader(train_data, 85 | batch_size=50000, 86 | shuffle=False) 87 | 88 | test_data = torchvision.datasets.CIFAR10(root='./data', 89 | train=False, 90 | download=True, 91 | transform=transform_test) 92 | testloader = torch.utils.data.DataLoader(test_data, 93 | batch_size=10000, 94 | shuffle=False) 95 | 96 | classes = ('Airplane', 'Car', 'Bird', 'Cat', 'Deer', 'Dog', 'Frog', 97 | 'Horse', 'Ship', 'Truck') 98 | 99 | # ------------------------------Feature Extration---------------------------------- 100 | train_data, train_label = next(iter(trainloader)) 101 | test_data, test_label = next(iter(testloader)) 102 | with torch.no_grad(): 103 | feature_train = model_vgg.features(train_data) 104 | feature_train = feature_train.view(feature_train.shape[0], -1) 105 | feature_test = model_vgg.features(test_data) 106 | feature_test = feature_test.view(feature_test.shape[0], -1) 107 | p = feature_train.shape[1] 108 | N = feature_train.shape[0] 109 | mean_selected_data = torch.mean(feature_train, dim=0) 110 | norm2_selected_data = torch.sum( 111 | (feature_train - mean_selected_data)**2, (0, 1)) / N 112 | feature_train = feature_train - mean_selected_data 113 | feature_train = feature_train / np.sqrt(norm2_selected_data) 114 | 115 | p = feature_test.shape[1] 116 | N = feature_test.shape[0] 117 | mean_selected_data = torch.mean(feature_test, dim=0) 118 | norm2_selected_data = torch.sum((feature_test - mean_selected_data)**2, 119 | (0, 1)) / N 120 | feature_test = feature_test - mean_selected_data 121 | feature_test = feature_test / np.sqrt(norm2_selected_data) 122 | 123 | # dataset for future training testing 124 | feature_train_dataset = Feature_Dataset(feature_train, train_label) 125 | feature_test_dataset = Feature_Dataset(feature_test, test_label) 126 | 127 | tau_zero = torch.sqrt( 128 | torch.mean(torch.diag(torch.mm(feature_train, 129 | feature_train.t())))).detach().numpy() 130 | print(tau_zero) 131 | # -------------------------------- Network Setting--------------------------------- 132 | # origin network setting 133 | layer_num = 3 # layer number for network 134 | input_num = 512 # input dimension for network 784/256 135 | weight_num_list = [3000, 3000, 1000] # number for neurons for each layer 136 | activation_list = [ 137 | { 138 | 'name': 'ReLU', 139 | 'args': None 140 | }, 141 | # {'name' : 'Binary_Zero', 'args' : {'s1':1, 's2': 2, 'b1': 1}}, 142 | # {'name' : 'ReLU', 'args' : None}, 143 | { 144 | 'name': 'ReLU', 145 | 'args': None 146 | }, 147 | { 148 | 'name': 'ReLU', 149 | 'args': None 150 | } 151 | ] 152 | # {'name' : 'ReLU', 'args' : None}] # activation for each layer, if with param, write as Binary_Zero here 153 | 154 | # define origin model 155 | model = My_Model(layer_num=layer_num, 156 | input_num=input_num, 157 | weight_num_list=weight_num_list, 158 | activation_list=activation_list, 159 | tau_zero=tau_zero) 160 | 161 | model_origin = nn.Sequential( 162 | OrderedDict([ 163 | ('feature', model), 164 | ('classification', 165 | nn.Linear(model.weight_num_list[-1], 10, bias=False)), 166 | ('activation', nn.Softmax()), 167 | ])) 168 | 169 | # --------------------------------Model Initilization------------------------------- 170 | for fc in model_origin.feature.fc_layers: 171 | nn.init.normal_(fc.weight) 172 | fc.weight.requires_grad = False 173 | 174 | # ------------------------------------Preparing------------------------------------- 175 | net = model_origin 176 | batch_size = 128 177 | lr = 0.01 178 | config = {"save_path": "./model_origin", "early_stop": 20, 'n_epochs': 500} 179 | # define data 180 | feature_train_dataloader = DataLoader(feature_train_dataset, 181 | batch_size=batch_size, 182 | shuffle=False) 183 | feature_test_dataloader = DataLoader(feature_test_dataset, 184 | batch_size=batch_size, 185 | shuffle=False) 186 | 187 | net = net.to(device) 188 | if device == 'cuda:1': 189 | # net = torch.nn.DataParallel(net, device_ids=device_id) 190 | cudnn.benchmark = True 191 | 192 | epochs, best_loss, step, early_stop_count = config[ 193 | 'n_epochs'], math.inf, 0, 0 194 | optimizer = torch.optim.SGD(net.parameters(), 195 | lr=lr, 196 | momentum=0.9, 197 | weight_decay=5e-4) 198 | criterion = nn.CrossEntropyLoss() 199 | 200 | # ------------------------------Training and Validation----------------------------- 201 | for epoch in range(epochs): 202 | net.train() 203 | loss_record = [] 204 | accuracy_record = [] 205 | for train_data, train_label in feature_train_dataloader: 206 | optimizer.zero_grad() 207 | # train_data , train_label = train_data.to(device), train_label.to(device) 208 | # X, y = train_data.to(device), train_label.to(device) 209 | train_data, train_label = train_data.to(device), train_label.to( 210 | device) 211 | pred = net(train_data) 212 | # print(pred.device) 213 | # print(train_label.device) 214 | loss = criterion(pred, train_label) 215 | loss.backward() 216 | optimizer.step() 217 | loss_record.append(loss.item()) 218 | # accuracy 219 | _, index = pred.data.cpu().topk(1, dim=1) 220 | index_label = train_label.data.cpu() 221 | accuracy_batch = np.sum( 222 | (index.squeeze(dim=1) == index_label).numpy()) 223 | accuracy_batch = accuracy_batch / len(train_label) 224 | accuracy_record.append(accuracy_batch) 225 | train_loss = sum(loss_record) / len(loss_record) 226 | train_accuracy = sum(accuracy_record) / len(accuracy_record) 227 | 228 | # validation 229 | net.eval() 230 | loss_record = [] 231 | accuracy_record = [] 232 | for val_data, val_label in feature_test_dataloader: 233 | # X, y = val_data.to(device), val_label.to(device) 234 | val_data, val_label = val_data.to(device), val_label.to(device) 235 | with torch.no_grad(): 236 | pred = net(val_data) 237 | loss = criterion(pred, val_label) 238 | loss_record.append(loss.item()) 239 | # accuracy 240 | _, index = pred.data.cpu().topk(1, dim=1) 241 | index_label = val_label.data.cpu() 242 | accuracy_batch = np.sum( 243 | (index.squeeze(dim=1) == index_label).numpy()) 244 | accuracy_batch = accuracy_batch / len(val_label) 245 | accuracy_record.append(accuracy_batch) 246 | val_loss = sum(loss_record) / len(testloader) 247 | val_accuracy = sum(accuracy_record) / len(accuracy_record) 248 | 249 | print( 250 | f'Epoch [{epoch+1}/{epochs}]: Train loss: {train_loss:.4f},Train accuracy: {train_accuracy:.4f}, Valid loss: {val_loss:.4f}, Valid accuracy: {val_accuracy:.4f}' 251 | ) 252 | 253 | if val_loss < best_loss: 254 | best_loss = val_loss 255 | torch.save(net.state_dict(), 256 | config['save_path']) # Save your best model 257 | print('Saving model with loss {:.3f}...'.format(best_loss)) 258 | early_stop_count = 0 259 | else: 260 | early_stop_count += 1 261 | 262 | if early_stop_count >= config['early_stop']: 263 | print('\nModel is not improving, so we halt the training session.') 264 | break 265 | 'here just a test for all binary activation' 266 | 267 | # origin performance 268 | model_origin_final_accuracy, model_origin_final_loss = val_accuracy, val_loss 269 | 270 | print( 271 | f'origin model: Valid loss: {model_origin_final_loss:.4f}, Valid accuracy: {model_origin_final_accuracy:.4f}' 272 | ) 273 | -------------------------------------------------------------------------------- /code/compression/cifar10/vgg_net_cifar10.py: -------------------------------------------------------------------------------- 1 | '''VGG11/13/16/19 in Pytorch.''' 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | cfg = { 7 | 'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 8 | 'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 9 | 'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 10 | 'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], 11 | } 12 | 13 | 14 | class VGG(nn.Module): 15 | def __init__(self, vgg_name, num_classes=10, dropout=0.5): 16 | super(VGG, self).__init__() 17 | self.features = self._make_layers(cfg[vgg_name]) 18 | # self.classifier = nn.Sequential(nn.Linear(512, 512), nn.Linear(512, 512), nn.Linear(512, 10)) 19 | self.classifier = nn.Sequential( 20 | nn.Linear(512, 512), 21 | nn.ReLU(True), 22 | nn.Dropout(p=dropout), 23 | nn.Linear(512, 512), 24 | nn.ReLU(True), 25 | nn.Dropout(p=dropout), 26 | nn.Linear(512, num_classes), 27 | ) 28 | def forward(self, x): 29 | out = self.features(x) 30 | out = out.view(out.size(0), -1) 31 | out = self.classifier(out) 32 | return out 33 | 34 | def _make_layers(self, cfg): 35 | layers = [] 36 | in_channels = 3 37 | for x in cfg: 38 | if x == 'M': 39 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 40 | else: 41 | layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1), 42 | nn.BatchNorm2d(x), 43 | nn.ReLU(inplace=True)] 44 | in_channels = x 45 | layers += [nn.AvgPool2d(kernel_size=1, stride=1)] 46 | return nn.Sequential(*layers) 47 | 48 | class VGG_Feature(nn.Module): 49 | def __init__(self, vgg_name): 50 | super(VGG, self).__init__() 51 | self.features = self._make_layers(cfg[vgg_name]) 52 | 53 | def forward(self, x): 54 | out = self.features(x) 55 | out = out.view(out.size(0), -1) 56 | return out 57 | 58 | def _make_layers(self, cfg): 59 | layers = [] 60 | in_channels = 3 61 | for x in cfg: 62 | if x == 'M': 63 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 64 | else: 65 | layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1), 66 | nn.BatchNorm2d(x), 67 | nn.ReLU(inplace=True)] 68 | in_channels = x 69 | layers += [nn.AvgPool2d(kernel_size=1, stride=1)] 70 | return nn.Sequential(*layers) 71 | 72 | def test(): 73 | net = VGG('VGG11') 74 | x = torch.randn(2,3,32,32) 75 | y = net(x) 76 | print(y.size()) 77 | 78 | test() -------------------------------------------------------------------------------- /code/compression/cifar10/vgg_train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Train VGG on all CIFAR10 dataset, which will be further used to concatenate with 4 | some random feature layers and classification layer. 5 | """ 6 | 7 | __author__ = "Model_compression" 8 | __copyright__ = "Copyright 2021, Lossless compression" 9 | __license__ = "GPL" 10 | __version__ = "1.0.1" 11 | __email__ = "yongqi_du@hust.edu.cn" 12 | __status__ = "Production" 13 | __all__ = [ 14 | 'custome_activation_analysis', 'custome_activation_analysis_noparam' 15 | ] 16 | 17 | import math 18 | 19 | import numpy as np 20 | import torch 21 | import torch.backends.cudnn as cudnn 22 | import torch.nn as nn 23 | import torchvision 24 | import torchvision.transforms as transforms 25 | from torch.utils.data import DataLoader 26 | 27 | from vgg_net_cifar10 import VGG, VGG_Feature 28 | 29 | device = "cuda" if torch.cuda.is_available() else "cpu" 30 | 31 | if __name__ == '__main__': 32 | device_id = [1, 2, 3] 33 | batch_size = 64 34 | lr = 0.01 35 | config = {"save_path": "./model_vgg", "early_stop": 20, 'n_epochs': 2} 36 | 37 | # -------------------------------Data Preparation----------------------------------- 38 | transform_train = transforms.Compose([ 39 | transforms.RandomCrop(32, padding=4), 40 | transforms.RandomHorizontalFlip(), 41 | transforms.ToTensor(), 42 | transforms.Normalize((0.4914, 0.4822, 0.4465), 43 | (0.2023, 0.1994, 0.2010)), 44 | ]) 45 | 46 | transform_test = transforms.Compose([ 47 | transforms.ToTensor(), 48 | transforms.Normalize((0.4914, 0.4822, 0.4465), 49 | (0.2023, 0.1994, 0.2010)), 50 | ]) 51 | 52 | train_data = torchvision.datasets.CIFAR10(root='./data', 53 | train=True, 54 | download=True, 55 | transform=transform_train) 56 | trainloader = DataLoader(train_data, 57 | batch_size=batch_size, 58 | shuffle=False, 59 | num_workers=3, 60 | pin_memory=True) 61 | 62 | test_data = torchvision.datasets.CIFAR10(root='./data', 63 | train=False, 64 | download=True, 65 | transform=transform_test) 66 | testloader = DataLoader(test_data, 67 | batch_size=batch_size, 68 | shuffle=False, 69 | num_workers=3, 70 | pin_memory=True) 71 | 72 | classes = ('Airplane', 'Car', 'Bird', 'Cat', 'Deer', 'Dog', 'Frog', 73 | 'Horse', 'Ship', 'Truck') 74 | 75 | # ----------------------------------Model Setting----------------------------------- 76 | net = VGG('VGG19') 77 | 78 | # ----------------------------------Initilization----------------------------------- 79 | for m in net.modules(): 80 | if isinstance(m, nn.Conv2d): 81 | nn.init.kaiming_normal_(m.weight, 82 | mode="fan_out", 83 | nonlinearity="relu") 84 | if m.bias is not None: 85 | nn.init.constant_(m.bias, 0) 86 | elif isinstance(m, nn.BatchNorm2d): 87 | nn.init.constant_(m.weight, 1) 88 | nn.init.constant_(m.bias, 0) 89 | elif isinstance(m, nn.Linear): 90 | nn.init.normal_(m.weight, 0, 0.01) 91 | nn.init.constant_(m.bias, 0) 92 | 93 | net = net.to(device) 94 | if device == 'cuda': 95 | cudnn.benchmark = True 96 | 97 | epochs, best_loss, step, early_stop_count = config[ 98 | 'n_epochs'], math.inf, 0, 0 99 | optimizer = torch.optim.SGD(net.parameters(), 100 | lr=lr, 101 | momentum=0.9, 102 | weight_decay=5e-4) 103 | criterion = nn.CrossEntropyLoss() 104 | 105 | # ------------------------------Train and Validation---------------------------------- 106 | for epoch in range(epochs): 107 | net.train() 108 | loss_record = [] 109 | accuracy_record = [] 110 | for train_data, train_label in trainloader: 111 | optimizer.zero_grad() 112 | train_data, train_label = train_data.to(device), train_label.to( 113 | device) 114 | pred = net(train_data) 115 | loss = criterion(pred, train_label) 116 | loss.backward() 117 | optimizer.step() 118 | loss_record.append(loss.item()) 119 | # accuracy 120 | _, index = pred.data.cpu().topk(1, dim=1) 121 | index_label = train_label.data.cpu() 122 | accuracy_batch = np.sum( 123 | (index.squeeze(dim=1) == index_label).numpy()) 124 | accuracy_batch = accuracy_batch / len(train_label) 125 | accuracy_record.append(accuracy_batch) 126 | train_loss = sum(loss_record) / len(loss_record) 127 | train_accuracy = sum(accuracy_record) / len(accuracy_record) 128 | 129 | # Validation 130 | net.eval() 131 | loss_record = [] 132 | accuracy_record = [] 133 | for val_data, val_label in testloader: 134 | val_data, val_label = val_data.to(device), val_label.to(device) 135 | with torch.no_grad(): 136 | pred = net(val_data) 137 | loss = criterion(pred, val_label) 138 | loss_record.append(loss.item()) 139 | # accuracy 140 | _, index = pred.data.cpu().topk(1, dim=1) 141 | index_label = val_label.data.cpu() 142 | accuracy_batch = np.sum( 143 | (index.squeeze(dim=1) == index_label).numpy()) 144 | accuracy_batch = accuracy_batch / len(val_label) 145 | accuracy_record.append(accuracy_batch) 146 | val_loss = sum(loss_record) / len(testloader) 147 | val_accuracy = sum(accuracy_record) / len(accuracy_record) 148 | 149 | print( 150 | f'Epoch [{epoch+1}/{epochs}]: Train loss: {train_loss:.4f},Train accuracy: {train_accuracy:.4f}, Valid loss: {val_loss:.4f}, Valid accuracy: {val_accuracy:.4f}' 151 | ) 152 | 153 | if val_loss < best_loss: 154 | best_loss = val_loss 155 | torch.save(net.state_dict(), 156 | config['save_path']) # Save your best model 157 | print('Saving model with loss {:.3f}...'.format(best_loss)) 158 | early_stop_count = 0 159 | else: 160 | early_stop_count += 1 161 | 162 | if early_stop_count >= config['early_stop']: 163 | print('\nModel is not improving, so we halt the training session.') 164 | break 165 | 'here just a test for all binary activation' 166 | -------------------------------------------------------------------------------- /code/compression/mnist/memory.py: -------------------------------------------------------------------------------- 1 | from re import A 2 | 3 | 4 | weight_num_list = [500, 500, 500] 5 | # [1000, 1000, 1000], 6 | # [1500, 1500, 1500], 7 | # [2000, 2000, 2000], 8 | # [2500, 2500, 2500]] 9 | 10 | 11 | a = (weight_num_list[1]*weight_num_list[2]+weight_num_list[0]*weight_num_list[1]+weight_num_list[2]*10)*32 + sum(weight_num_list)*32 12 | print(a) 13 | 14 | 15 | me_new = (input_num*weight_num_list[0]+weight_num_list[0]*weight_num_list[1]+weight_num_list[1]*weight_num_list[2]+10*weight_num_list[2])*(1-kesi)+(sum(weight_num_list)) 16 | # print('MEM_new = ',me_new) 17 | 18 | # weight_num_list = [2000, 2000, 1000] # number for neurons for each layer 19 | # weight_num_list = [1500, 1500, 1500] 20 | # weight_num_list = [5000, 5000, 2000] 21 | # weight_num_list = [5000, 5000, 5000] 22 | # weight_num_list = [10000, 10000, 5000] 23 | # weight_num_list = [20000, 20000, 11000] 24 | # weight_num_list = [500, 500, 500] 25 | # [1000, 1000, 1000], 26 | # [1500, 1500, 1500], 27 | # [2000, 2000, 2000], 28 | # [2500, 2500, 2500]] 29 | # weight_num_list = [1000, 1000, 1000] 30 | # weight_num_list = [1500, 1500, 1500] 31 | # weight_num_list = [2000, 2000, 2000] 32 | # weight_num_list = [2500, 2500, 2500] 33 | -------------------------------------------------------------------------------- /code/compression/mnist/mnist_magnitude_pruning.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Using custome network to classify data sampled from MNIST data. 4 | 5 | Network can be customed, see class My_Model for more details about how to custome model, 6 | which will be added with a classification layer. 7 | """ 8 | 9 | __author__ = "Model_compression" 10 | __copyright__ = "Copyright 2021, Lossless compression" 11 | __license__ = "GPL" 12 | __version__ = "1.0.1" 13 | __email__ = "yongqi_du@hust.edu.cn" 14 | __status__ = "Production" 15 | __all__ = [ 16 | 'custome_activation_analysis', 'custome_activation_analysis_noparam' 17 | ] 18 | 19 | import sys 20 | import os 21 | 22 | sys.path.append( 23 | os.path.dirname( 24 | os.path.dirname(os.path.dirname(os.path.realpath(__file__))))) 25 | 26 | import math 27 | from collections import Counter, OrderedDict 28 | 29 | import numpy as np 30 | import torch 31 | import torch.backends.cudnn as cudnn 32 | import torch.nn as nn 33 | import torch.nn.functional as F 34 | from torch.utils.data import DataLoader 35 | from utils.data_prepare import my_dataset_custome 36 | from model_define.model import My_Model 37 | from utils.utils import estim_tau_tensor 38 | import torch.nn.utils.prune as prune 39 | import torch.nn.functional as F 40 | 41 | device = "cuda:1" if torch.cuda.is_available() else "cpu" 42 | 43 | if __name__ == '__main__': 44 | # gpu_usage() 45 | # ---------------------------------Data--------------------------------------------- 46 | cs = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] 47 | K = len(cs) 48 | 49 | # load data 50 | # res = my_dataset_custome('var', 51 | # T_train=10000, 52 | # T_test=1800, 53 | # p=784, 54 | # cs=[0.5, 0.5]) 55 | res = my_dataset_custome('MNIST', 56 | T_train=50000, 57 | T_test=8000, 58 | cs=cs, 59 | selected_target=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 60 | dataset_train, dataset_test = res[0], res[1] 61 | 'to be done here new net configuration here just binaryzero and binary last' 62 | tau_zero = np.sqrt(estim_tau_tensor(dataset_train.X)) 63 | print(tau_zero) 64 | 65 | # --------------------------------Network-------------------------------------------- 66 | layer_num = 3 # layer number for network 67 | input_num = 784 # input dimension for network 68 | # weight_num_list = [2000, 2000, 1000] # number for neurons for each layer 69 | weight_num_list = [1500, 1500, 1500] 70 | # weight_num_list = [5000, 5000, 2000] 71 | # weight_num_list = [5000, 5000, 5000] 72 | # weight_num_list = [10000, 10000, 5000] 73 | # weight_num_list = [20000, 20000, 11000] 74 | # weight_num_list = [500, 500, 500] 75 | # [1000, 1000, 1000], 76 | # [1500, 1500, 1500], 77 | # [2000, 2000, 2000], 78 | # [2500, 2500, 2500]] 79 | # weight_num_list = [1000, 1000, 1000] 80 | # weight_num_list = [1500, 1500, 1500] 81 | # weight_num_list = [2000, 2000, 2000] 82 | # weight_num_list = [2500, 2500, 2500] 83 | 84 | activation_list = [ 85 | { 86 | 'name': 'ReLU', 87 | 'args': None 88 | }, 89 | { 90 | 'name': 'ReLU', 91 | 'args': None 92 | }, 93 | { 94 | 'name': 'ReLU', 95 | 'args': None 96 | }, 97 | ] 98 | 99 | # define origin model 100 | model = My_Model(layer_num=layer_num, 101 | input_num=input_num, 102 | weight_num_list=weight_num_list, 103 | activation_list=activation_list, 104 | tau_zero=tau_zero) 105 | 106 | # add a classification layer 107 | model_origin = nn.Sequential( 108 | OrderedDict([ 109 | ('feature', model), 110 | ('classification', 111 | nn.Linear(model.weight_num_list[-1], 10, bias=False)), 112 | ('activation', nn.Softmax()), 113 | ])) 114 | 115 | # --------------------------------Model Initilization------------------------------- 116 | # model initialization 117 | initialization_way = 'pruning' # select from ['normal', 'random_sparsity', 'ternary', 'pruning'] 118 | kesi = 0.93 # change from 0 to 1, only used for ['random sparsity', 'ternary'] 119 | threshould = 0.93 120 | 121 | if initialization_way == 'normal': 122 | # normal initialization 123 | for fc in model_origin.feature.fc_layers: 124 | nn.init.normal_(fc.weight) 125 | fc.weight.requires_grad = False 126 | elif initialization_way == 'random_sparsity': 127 | # random sparse gaussian weight(break assumption1) 128 | for fc in model_origin.feature.fc_layers: 129 | mask = np.zeros(fc.weight.shape).flatten() 130 | mask[:round((1 - kesi) * mask.size)] = 1 131 | np.random.shuffle(mask) 132 | mask = torch.tensor(mask.reshape(fc.weight.shape)).float() 133 | nn.init.normal_(fc.weight) 134 | with torch.no_grad(): 135 | fc.weight = torch.nn.Parameter(mask * fc.weight.data, 136 | requires_grad=False) 137 | elif initialization_way == 'ternary': 138 | # tarnary weight with sparsity kesi 139 | for fc in model_origin.feature.fc_layers: 140 | init = np.zeros(fc.weight.shape).flatten() 141 | init[:round(1 / 2 * (1 - kesi) * 142 | init.size)] = 1 / np.sqrt(1 - kesi) 143 | init[round(1 / 2 * (1 - kesi) * init.size):2 * 144 | round(1 / 2 * 145 | (1 - kesi) * init.size)] = -1 / np.sqrt(1 - kesi) 146 | # c = Counter(init) 147 | np.random.shuffle(init) 148 | init = torch.tensor(init.reshape(fc.weight.shape)).float() 149 | with torch.no_grad(): 150 | fc.weight = torch.nn.Parameter(init, requires_grad=False) 151 | elif initialization_way == 'pruning': 152 | for fc in model_origin.feature.fc_layers: 153 | nn.init.normal_(fc.weight) 154 | fc.weight.requires_grad = False 155 | 156 | # pruning magnitude based 157 | # threshould = 0.95 158 | for fc in model_origin.feature.fc_layers: 159 | prune.l1_unstructured(fc, 'weight', amount=threshould) 160 | fc.weight.requires_grad = False 161 | # --------------------------------Preparing------------------------------------------- 162 | # define trainning network 163 | net = model_origin 164 | 165 | batch_size = 128 166 | lr = 0.01 167 | config = {"save_path": "./model_origin", "early_stop": 20, 'n_epochs': 500} 168 | epochs, best_loss, step, early_stop_count = config[ 169 | 'n_epochs'], math.inf, 0, 0 170 | 171 | # define data 172 | dataset_train.Y = F.one_hot(torch.tensor(dataset_train.Y).long(), K) 173 | dataset_train.Y = dataset_train.Y.float() 174 | dataset_test.Y = F.one_hot(torch.tensor(dataset_test.Y).long(), K) 175 | dataset_test.Y = dataset_test.Y.float() 176 | train_loader = DataLoader(dataset_train, 177 | batch_size=batch_size, 178 | shuffle=False, 179 | drop_last=True) 180 | test_loader = DataLoader(dataset_test, 181 | batch_size=batch_size, 182 | shuffle=False, 183 | drop_last=True) 184 | # shuffle????????????????????????? 185 | net = net.to(device) 186 | if device == 'cuda': 187 | cudnn.benchmark = True 188 | 189 | optimizer = torch.optim.Adam(net.parameters(), lr=lr) 190 | # optimizer = torch.optim.SGD(net.parameters(), 191 | # lr=lr, 192 | # momentum=0.9, 193 | # weight_decay=5e-4) 194 | criterion = nn.CrossEntropyLoss() 195 | # --------------------------Trainning and Validation----------------------------------- 196 | for epoch in range(epochs): 197 | net.train() 198 | loss_record = [] 199 | accuracy_record = [] 200 | 201 | # trainning 202 | for train_data, train_label in train_loader: 203 | optimizer.zero_grad() 204 | train_data, train_label = train_data.to(device), train_label.to( 205 | device) 206 | pred = net(train_data) 207 | loss = criterion(pred, train_label) 208 | loss.backward() 209 | optimizer.step() 210 | loss_record.append(loss.item()) 211 | # accuracy 212 | _, index = pred.data.cpu().topk(1, dim=1) 213 | _, index_label = train_label.data.cpu().topk(1, dim=1) 214 | accuracy_batch = np.sum( 215 | (index.squeeze(dim=1) == index_label.squeeze(dim=1)).numpy()) 216 | accuracy_batch = accuracy_batch / len(train_label) 217 | accuracy_record.append(accuracy_batch) 218 | train_loss = sum(loss_record) / len(loss_record) 219 | train_accuracy = sum(accuracy_record) / len(accuracy_record) 220 | 221 | # validation 222 | net.eval() 223 | loss_record = [] 224 | accuracy_record = [] 225 | for val_data, val_label in test_loader: 226 | val_data, val_label = val_data.to(device), val_label.to(device) 227 | with torch.no_grad(): 228 | pred = net(val_data) 229 | loss = criterion(pred, val_label) 230 | loss_record.append(loss.item()) 231 | # accuracy 232 | _, index = pred.data.cpu().topk(1, dim=1) 233 | _, index_label = val_label.data.cpu().topk(1, dim=1) 234 | accuracy_batch = np.sum( 235 | (index.squeeze(dim=1) == index_label.squeeze(dim=1)).numpy()) 236 | accuracy_batch = accuracy_batch / len(val_label) 237 | accuracy_record.append(accuracy_batch) 238 | val_loss = sum(loss_record) / len(loss_record) 239 | val_accuracy = sum(accuracy_record) / len(accuracy_record) 240 | 241 | print( 242 | f'Epoch [{epoch+1}/{epochs}]: Train loss: {train_loss:.4f},Train accuracy: {train_accuracy:.4f}, Valid loss: {val_loss:.4f}, Valid accuracy: {val_accuracy:.4f}' 243 | ) 244 | 245 | if val_loss < best_loss: 246 | best_loss = val_loss 247 | torch.save(net.state_dict(), 248 | config['save_path']) # Save your best model 249 | print('Saving model with loss {:.3f}...'.format(best_loss)) 250 | early_stop_count = 0 251 | else: 252 | early_stop_count += 1 253 | 254 | if early_stop_count >= config['early_stop']: 255 | print( 256 | '\nModel is not improving, so we halt the trainning session.') 257 | break 258 | 259 | # origin performance 260 | model_origin_final_accuracy, model_origin_final_loss = val_accuracy, val_loss 261 | 262 | print( 263 | f'origin model: Valid loss: {model_origin_final_loss:.4f}, Valid accuracy: {model_origin_final_accuracy:.4f}' 264 | ) 265 | -------------------------------------------------------------------------------- /code/compression/mnist/mnist_origin.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Using custome network to classify data sampled from MNIST data. 4 | 5 | Network can be customed, see class My_Model for more details about how to custome model, 6 | which will be added with a classification layer. 7 | """ 8 | 9 | __author__ = "Model_compression" 10 | __copyright__ = "Copyright 2021, Lossless compression" 11 | __license__ = "GPL" 12 | __version__ = "1.0.1" 13 | __email__ = "yongqi_du@hust.edu.cn" 14 | __status__ = "Production" 15 | __all__ = [ 16 | 'custome_activation_analysis', 'custome_activation_analysis_noparam' 17 | ] 18 | 19 | import sys 20 | import os 21 | 22 | sys.path.append( 23 | os.path.dirname( 24 | os.path.dirname(os.path.dirname(os.path.realpath(__file__))))) 25 | 26 | import math 27 | from collections import Counter, OrderedDict 28 | 29 | import numpy as np 30 | import torch 31 | import torch.backends.cudnn as cudnn 32 | import torch.nn as nn 33 | import torch.nn.functional as F 34 | from torch.utils.data import DataLoader 35 | from utils.data_prepare import my_dataset_custome 36 | from model_define.model import My_Model 37 | from utils.utils import estim_tau_tensor 38 | 39 | device = "cuda:2" if torch.cuda.is_available() else "cpu" 40 | 41 | if __name__ == '__main__': 42 | # gpu_usage() 43 | # ---------------------------------Data--------------------------------------------- 44 | cs = [0.5, 0.5] 45 | K = len(cs) 46 | 47 | # load data 48 | res = my_dataset_custome('var', 49 | T_train=10000, 50 | T_test=1800, 51 | p=784, 52 | cs=[0.5, 0.5]) 53 | # res = my_dataset_custome('MNIST', T_train=10000, T_test=1800, cs=cs, selected_target=[6, 8]) 54 | dataset_train, dataset_test = res[0], res[1] 55 | 'to be done here new net configuration here just binaryzero and binary last' 56 | tau_zero = np.sqrt(estim_tau_tensor(dataset_train.X)) 57 | print(tau_zero) 58 | 59 | # --------------------------------Network-------------------------------------------- 60 | layer_num = 2 # layer number for network 61 | input_num = 784 # input dimension for network 62 | weight_num_list = [3000, 1000] # number for neurons for each layer 63 | activation_list = [ 64 | { 65 | 'name': 'ReLU', 66 | 'args': None 67 | }, 68 | { 69 | 'name': 'ReLU', 70 | 'args': None 71 | }, 72 | { 73 | 'name': 'ReLU', 74 | 'args': None 75 | }, 76 | ] 77 | 78 | # define origin model 79 | model = My_Model(layer_num=layer_num, 80 | input_num=input_num, 81 | weight_num_list=weight_num_list, 82 | activation_list=activation_list, 83 | tau_zero=tau_zero) 84 | 85 | # add a classification layer 86 | model_origin = nn.Sequential( 87 | OrderedDict([ 88 | ('feature', model), 89 | ('classification', 90 | nn.Linear(model.weight_num_list[-1], 2, bias=False)), 91 | ('activation', nn.Softmax()), 92 | ])) 93 | 94 | # model initialization 95 | for fc in model_origin.feature.fc_layers: 96 | nn.init.normal_(fc.weight) 97 | fc.weight.requires_grad = False 98 | 99 | # --------------------------------Preparing------------------------------------------- 100 | # define trainning network 101 | net = model_origin 102 | 103 | batch_size = 128 104 | lr = 0.01 105 | config = { 106 | "save_path": "./compression/mnist/model_origin", 107 | "early_stop": 20, 108 | 'n_epochs': 200 109 | } 110 | epochs, best_loss, step, early_stop_count = config[ 111 | 'n_epochs'], math.inf, 0, 0 112 | 113 | # save path 114 | if not os.path.exists(os.path.split(config['save_path'])[0]): 115 | os.makedirs(os.path.split(config['save_path'])[0]) 116 | # define data 117 | dataset_train.Y = F.one_hot(torch.tensor(dataset_train.Y).long(), K) 118 | dataset_train.Y = dataset_train.Y.float() 119 | dataset_test.Y = F.one_hot(torch.tensor(dataset_test.Y).long(), K) 120 | dataset_test.Y = dataset_test.Y.float() 121 | train_loader = DataLoader(dataset_train, 122 | batch_size=batch_size, 123 | shuffle=False, 124 | drop_last=True) 125 | test_loader = DataLoader(dataset_test, 126 | batch_size=batch_size, 127 | shuffle=False, 128 | drop_last=True) 129 | # shuffle????????????????????????? 130 | net = net.to(device) 131 | if device == 'cuda:2': 132 | cudnn.benchmark = True 133 | 134 | optimizer = torch.optim.SGD(net.parameters(), 135 | lr=lr, 136 | momentum=0.9, 137 | weight_decay=5e-4) 138 | criterion = nn.CrossEntropyLoss() 139 | # --------------------------Trainning and Validation----------------------------------- 140 | for epoch in range(epochs): 141 | net.train() 142 | loss_record = [] 143 | accuracy_record = [] 144 | 145 | # trainning 146 | for train_data, train_label in train_loader: 147 | optimizer.zero_grad() 148 | train_data, train_label = train_data.to(device), train_label.to( 149 | device) 150 | pred = net(train_data) 151 | loss = criterion(pred, train_label) 152 | loss.backward() 153 | optimizer.step() 154 | loss_record.append(loss.item()) 155 | # accuracy 156 | _, index = pred.data.cpu().topk(1, dim=1) 157 | _, index_label = train_label.data.cpu().topk(1, dim=1) 158 | accuracy_batch = np.sum( 159 | (index.squeeze(dim=1) == index_label.squeeze(dim=1)).numpy()) 160 | accuracy_batch = accuracy_batch / len(train_label) 161 | accuracy_record.append(accuracy_batch) 162 | train_loss = sum(loss_record) / len(loss_record) 163 | train_accuracy = sum(accuracy_record) / len(accuracy_record) 164 | 165 | # validation 166 | net.eval() 167 | loss_record = [] 168 | accuracy_record = [] 169 | for val_data, val_label in test_loader: 170 | val_data, val_label = val_data.to(device), val_label.to(device) 171 | with torch.no_grad(): 172 | pred = net(val_data) 173 | loss = criterion(pred, val_label) 174 | loss_record.append(loss.item()) 175 | # accuracy 176 | _, index = pred.data.cpu().topk(1, dim=1) 177 | _, index_label = val_label.data.cpu().topk(1, dim=1) 178 | accuracy_batch = np.sum( 179 | (index.squeeze(dim=1) == index_label.squeeze(dim=1)).numpy()) 180 | accuracy_batch = accuracy_batch / len(val_label) 181 | accuracy_record.append(accuracy_batch) 182 | val_loss = sum(loss_record) / len(loss_record) 183 | val_accuracy = sum(accuracy_record) / len(accuracy_record) 184 | 185 | print( 186 | f'Epoch [{epoch+1}/{epochs}]: Train loss: {train_loss:.4f},Train accuracy: {train_accuracy:.4f}, Valid loss: {val_loss:.4f}, Valid accuracy: {val_accuracy:.4f}' 187 | ) 188 | 189 | if val_loss < best_loss: 190 | best_loss = val_loss 191 | torch.save(net.state_dict(), 192 | config['save_path']) # Save your best model 193 | print('Saving model with loss {:.3f}...'.format(best_loss)) 194 | early_stop_count = 0 195 | else: 196 | early_stop_count += 1 197 | 198 | if early_stop_count >= config['early_stop']: 199 | print( 200 | '\nModel is not improving, so we halt the trainning session.') 201 | break 202 | 203 | # origin performance 204 | model_origin_final_accuracy, model_origin_final_loss = val_accuracy, val_loss 205 | 206 | print( 207 | f'origin model: Valid loss: {model_origin_final_loss:.4f}, Valid accuracy: {model_origin_final_accuracy:.4f}' 208 | ) 209 | -------------------------------------------------------------------------------- /code/compression/mnist/performance_match.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | sys.path.append( 5 | os.path.dirname( 6 | os.path.dirname(os.path.dirname(os.path.realpath(__file__))))) 7 | 8 | import math 9 | from collections import OrderedDict 10 | 11 | import numpy as np 12 | import torch 13 | import torch.backends.cudnn as cudnn 14 | import torch.nn as nn # import modules 15 | import torch.nn.functional as F 16 | from torch.utils.data import DataLoader, Dataset 17 | from utils.data_prepare import my_dataset_custome 18 | from model_define.model import My_Model 19 | from equation_solve.solve_equation import solve_equation 20 | from utils.utils import estim_tau_tensor 21 | 22 | device = "cuda:0" if torch.cuda.is_available() else "cpu" 23 | 24 | if __name__ == "__main__": 25 | # gpu_usage() 26 | # ---------------------------------Data--------------------------------------------- 27 | cs = [0.1, 0.1, 0.1, 0.1, 0.1] 28 | K = len(cs) 29 | # load data 30 | res = my_dataset_custome('MNIST', 31 | T_train=50000, 32 | T_test=8000, 33 | cs=cs, 34 | selected_target=[0, 1, 2, 3, 4]) 35 | # res = my_dataset_custome('MNIST', T_train=10000, T_test=1600, cs=cs, selected_target=[6, 8]) 36 | dataset_train, dataset_test = res[0], res[1] 37 | tau_zero = np.sqrt(estim_tau_tensor(dataset_train.X)) 38 | print(tau_zero) 39 | 40 | # --------------------------------Network-------------------------------------------- 41 | # origin network setting 42 | layer_num = 3 # layer number for network 43 | input_num = 784 # input dimension for network 784/256 44 | weight_num_list = [2000, 2000, 1100] # number for neurons for each layer 45 | activation_list = [ 46 | { 47 | 'name': 'ReLU', 48 | 'args': None 49 | }, 50 | # {'name' : 'Binary_Zero', 'args' : {'s1':-1, 's2': 1, 'b1': 1}}, 51 | # {'name' : 'Binary_Zero', 'args' : {'s1':-1, 's2': 1, 'b1': 1}}, 52 | { 53 | 'name': 'ReLU', 54 | 'args': None 55 | }, 56 | # {'name' : 'ReLU', 'args' : None}, 57 | { 58 | 'name': 'ReLU', 59 | 'args': None 60 | } 61 | ] 62 | 63 | # define origin model 64 | model = My_Model(layer_num=layer_num, 65 | input_num=input_num, 66 | weight_num_list=weight_num_list, 67 | activation_list=activation_list, 68 | tau_zero=tau_zero) 69 | 70 | res = solve_equation(model, tau_zero, loop=1000) 71 | activation_list = res 72 | new_model = My_Model(layer_num=layer_num, 73 | input_num=input_num, 74 | weight_num_list=weight_num_list, 75 | activation_list=activation_list, 76 | tau_zero=tau_zero) 77 | 78 | # define model 79 | model_origin = nn.Sequential( 80 | OrderedDict([ 81 | ('feature', model), 82 | ('classification', 83 | nn.Linear(model.weight_num_list[-1], K, bias=False)), 84 | ('activation', nn.Softmax()), 85 | ])) 86 | model_new = nn.Sequential( 87 | OrderedDict([ 88 | ('feature', new_model), 89 | ('classification', 90 | nn.Linear(new_model.weight_num_list[-1], K, bias=False)), 91 | ('activation', nn.Softmax()), 92 | ])) 93 | 94 | # -----------------------------weight initialization!--------------------------------- 95 | initialization_way = 'normal' # select from ['normal', 'random_sparsity', 'ternary'] 96 | kesi = 0 # change from 0 to 1, only used for ['random sparsity', 'ternary'] 97 | 98 | if initialization_way == 'normal': 99 | # normal initialization 100 | for fc in model_new.feature.fc_layers: 101 | nn.init.normal_(fc.weight) 102 | fc.weight.requires_grad = False 103 | elif initialization_way == 'random_sparsity': 104 | # random sparse gaussian weight(break assumption1) 105 | for fc in model_new.feature.fc_layers: 106 | mask = np.zeros(fc.weight.shape).flatten() 107 | mask[:round((1 - kesi) * mask.size)] = 1 108 | np.random.shuffle(mask) 109 | mask = torch.tensor(mask.reshape(fc.weight.shape)).float() 110 | nn.init.normal_(fc.weight) 111 | with torch.no_grad(): 112 | fc.weight = torch.nn.Parameter(mask * fc.weight.data, 113 | requires_grad=False) 114 | elif initialization_way == 'ternary': 115 | # tarnary weight with sparsity kesi 116 | for fc in model_new.feature.fc_layers: 117 | init = np.zeros(fc.weight.shape).flatten() 118 | init[:round(1 / 2 * (1 - kesi) * 119 | init.size)] = 1 / np.sqrt(1 - kesi) 120 | init[round(1 / 2 * (1 - kesi) * init.size):2 * 121 | round(1 / 2 * 122 | (1 - kesi) * init.size)] = -1 / np.sqrt(1 - kesi) 123 | # c = Counter(init) 124 | np.random.shuffle(init) 125 | init = torch.tensor(init.reshape(fc.weight.shape)).float() 126 | with torch.no_grad(): 127 | fc.weight = torch.nn.Parameter(init, requires_grad=False) 128 | 129 | # --------------------------------Preparing------------------------------------------- 130 | # define network setting 131 | net = model_new 132 | 133 | batch_size = 128 134 | lr = 0.001 135 | config = {"save_path": "./model_origin", "early_stop": 20, 'n_epochs': 500} 136 | early_stop_count = 0 137 | epochs, best_loss, step, early_stop_count = config[ 138 | 'n_epochs'], math.inf, 0, 0 139 | 140 | dataset_train.Y = F.one_hot(torch.tensor(dataset_train.Y).long(), K) 141 | dataset_train.Y = dataset_train.Y.float() 142 | dataset_test.Y = F.one_hot(torch.tensor(dataset_test.Y).long(), K) 143 | dataset_test.Y = dataset_test.Y.float() 144 | train_loader = DataLoader(dataset_train, 145 | batch_size=batch_size, 146 | shuffle=False, 147 | drop_last=True) 148 | test_loader = DataLoader(dataset_test, 149 | batch_size=batch_size, 150 | shuffle=False, 151 | drop_last=True) 152 | # shuffle???????????? 153 | 154 | net = net.to(device) 155 | if device == 'cuda:2': 156 | cudnn.benchmark = True 157 | 158 | optimizer = torch.optim.Adam(net.parameters(), lr=lr) 159 | criterion = nn.CrossEntropyLoss() 160 | # --------------------------Trainning and Validation----------------------------------- 161 | for epoch in range(epochs): 162 | net.train() 163 | loss_record = [] 164 | accuracy_record = [] 165 | # training 166 | for train_data, train_label in train_loader: 167 | optimizer.zero_grad() 168 | train_data, train_label = train_data.to(device), train_label.to( 169 | device) 170 | pred = net(train_data) 171 | loss = criterion(pred, train_label) 172 | loss.backward() 173 | optimizer.step() 174 | loss_record.append(loss.item()) 175 | # accuracy 176 | _, index = pred.data.cpu().topk(1, dim=1) 177 | _, index_label = train_label.data.cpu().topk(1, dim=1) 178 | accuracy_batch = np.sum( 179 | (index.squeeze(dim=1) == index_label.squeeze(dim=1)).numpy()) 180 | accuracy_batch = accuracy_batch / len(train_label) 181 | accuracy_record.append(accuracy_batch) 182 | train_loss = sum(loss_record) / len(loss_record) 183 | train_accuracy = sum(accuracy_record) / len(accuracy_record) 184 | 185 | # validation 186 | net.eval() 187 | loss_record = [] 188 | accuracy_record = [] 189 | for val_data, val_label in test_loader: 190 | val_data, val_label = val_data.to(device), val_label.to(device) 191 | with torch.no_grad(): 192 | pred = net(val_data) 193 | loss = criterion(pred, val_label) 194 | loss_record.append(loss.item()) 195 | # accuracy 196 | _, index = pred.data.cpu().topk(1) 197 | _, index_label = val_label.data.cpu().topk(1, dim=1) 198 | accuracy_batch = np.sum( 199 | (index.squeeze(dim=1) == index_label.squeeze(dim=1)).numpy()) 200 | accuracy_batch = accuracy_batch / len(val_label) 201 | accuracy_record.append(accuracy_batch) 202 | val_loss = sum(loss_record) / len(test_loader) 203 | val_accuracy = sum(accuracy_record) / len(accuracy_record) 204 | 205 | print( 206 | f'Epoch [{epoch+1}/{epochs}]: Train loss: {train_loss:.4f},Train accuracy: {train_accuracy:.4f}, Valid loss: {val_loss:.4f}, Valid accuracy: {val_accuracy:.4f}' 207 | ) 208 | 209 | if val_loss < best_loss: 210 | best_loss = val_loss 211 | torch.save(net.state_dict(), "./model_new") # Save your best model 212 | print('Saving model with loss {:.3f}...'.format(best_loss)) 213 | early_stop_count = 0 214 | else: 215 | early_stop_count += 1 216 | 217 | if early_stop_count >= config['early_stop']: 218 | print('\nModel is not improving, so we halt the training session.') 219 | break 220 | 221 | # origin performance 222 | model_new_final_accuracy, model_new_final_loss = val_accuracy, val_loss 223 | 224 | print( 225 | f'compressed model(with retrain): Valid loss: {model_new_final_loss:.4f}, Valid accuracy: {model_new_final_accuracy:.4f}' 226 | ) 227 | -------------------------------------------------------------------------------- /code/compression/new_MNIST/matching.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from model import FCnet 5 | import os 6 | import torchvision.datasets as dset 7 | from torch.utils.data import Dataset, DataLoader 8 | import scipy 9 | import torch.optim as optim 10 | import matplotlib as mpl 11 | mpl.use('Agg') 12 | import matplotlib.pyplot as plt 13 | import copy,math 14 | from scipy import optimize 15 | 16 | def get_or_param(L,act,tau_0): 17 | order_1 = L * [0] 18 | order_2 = L * [0] 19 | square = L * [0] 20 | mean = L * [0] 21 | tau = (L+1)*[0] 22 | tau[0] = tau_0 23 | if act == 'ReLU': 24 | for i in range(1,L+1): 25 | mean[i-1] = tau[i-1] / np.sqrt(2 * np.pi) 26 | tau[i] = np.sqrt((tau[i-1]**2)*(1 / 2 - 1 / (2 * np.pi))) 27 | order_1[i-1] = 1 / 2 28 | order_2[i-1] = np.sqrt(1 / (2 * np.pi * tau[i-1]**2)) 29 | square[i-1] = 1 - 1 / np.pi 30 | # i += 1 31 | return order_1, order_2, square, tau, mean 32 | 33 | def get_act0(tao_last): 34 | act0_mean = (lambda s1, s2, b1: b1 / 2 * 35 | (math.erf(s1 / (pow(2, 1 / 2) * tao_last)) - math.erf(s2 / (pow(2, 1 / 2) * tao_last))) + b1) 36 | 37 | act0_order_1 = (lambda s1, s2, b1: b1 * 38 | (math.exp(-pow(s2 / tao_last, 2) / 2) - math.exp(-pow(s1 / tao_last, 2) / 2)) / 39 | (pow(2 * math.pi, 1 / 2) * tao_last)) 40 | 41 | act0_order_2 = (lambda s1, s2, b1: b1 * 42 | (s2 * math.exp(-pow(s2 / tao_last, 2) / 2) - s1 * math.exp(-pow(s1 / tao_last, 2) / 2)) / 43 | (pow(2 * math.pi, 1 / 2) * pow(tao_last, 3))) 44 | 45 | act0_square = (lambda s1, s2, b1: (pow(b1, 2) - 2 * b1 * act0_mean(s1, s2, b1)) * 46 | (s2 * math.exp(-pow(s2 / tao_last, 2) / 2) - s1 * math.exp(-pow(s1 / tao_last, 2) / 2)) / 47 | (pow(2 * math.pi, 1 / 2) * pow(tao_last, 3))) 48 | act0_tau = (lambda s1, s2, b1: (1 / 2 * pow(b1, 2)) * 49 | (math.erf(s1 / (pow(2, 1 / 2) * tao_last)) - math.erf(s2 / (pow(2, 1 / 2) * tao_last))) + pow(b1, 2) - pow( 50 | act0_mean(s1, s2, b1), 2)) 51 | 52 | return act0_order_1, act0_order_2, act0_square, act0_tau, act0_mean 53 | 54 | def get_act1(b1, b2, tao_last): 55 | act1_mean = (lambda s1, s2, s3, s4: (b1 / 2) * 56 | (math.erf(s1 / (pow(2, 1 / 2) * tao_last)) - math.erf(s4 / (pow(2, 1 / 2) * tao_last))) + b1 + (b2 / 2) * 57 | (math.erf(s3 / (pow(2, 1 / 2) * tao_last)) - math.erf(s2 / (pow(2, 1 / 2) * tao_last)))) 58 | 59 | act1_order_1 = (lambda s1, s2, s3, s4: b1 * (math.exp(-pow(s4 / tao_last, 2) / 2) - math.exp(-pow( 60 | s1 / tao_last, 2) / 2)) / (pow(2 * math.pi, 1 / 2) * tao_last) + b2 * (math.exp(-pow( 61 | s2 / tao_last, 2) / 2) - math.exp(-pow(s3 / tao_last, 2) / 2)) / (pow(2 * math.pi, 1 / 2) * tao_last)) 62 | 63 | act1_order_2 = (lambda s1, s2, s3, s4: b1 * (s4 * math.exp(-pow(s4 / tao_last, 2) / 2) - s1 * math.exp( 64 | -pow(s1 / tao_last, 2) / 2)) / (pow(2 * math.pi, 1 / 2) * pow(tao_last, 3)) + b2 * (s2 * math.exp(-pow( 65 | s2 / tao_last, 2) / 2) - s3 * math.exp(-pow(s3 / tao_last, 2) / 2)) / (pow(2 * math.pi, 1 / 2) * pow( 66 | tao_last, 3))) 67 | 68 | act1_square = (lambda s1, s2, s3, s4: b1**2 * 69 | (s4 * math.exp(-pow(s4 / tao_last, 2) / 2) - s1 * math.exp(-pow(s1 / tao_last, 2) / 2)) / 70 | (pow(2 * math.pi, 1 / 2) * pow(tao_last, 3)) + b2**2 * 71 | (s2 * math.exp(-pow(s2 / tao_last, 2) / 2) - s3 * math.exp(-pow(s3 / tao_last, 2) / 2)) / 72 | (pow(2 * math.pi, 1 / 2) * pow(tao_last, 3)) - 2 * act1_mean( 73 | s1, s2, s3, s4) * act1_order_2(s1, s2, s3, s4)) 74 | 75 | act1_tau = (lambda s1, s2, s3, s4: (b1**2 / 2) * 76 | (math.erf(s1 / (pow(2, 1 / 2) * tao_last)) - math.erf(s4 / (pow(2, 1 / 2) * tao_last))) + b1**2 + (b2**2 / 2) * 77 | (math.erf(s3 / (pow(2, 1 / 2) * tao_last)) - math.erf(s2 / (pow(2, 1 / 2) * tao_last))) - 78 | (act1_mean(s1, s2, s3, s4))**2) 79 | 80 | return act1_order_1, act1_order_2, act1_square, act1_tau, act1_mean 81 | 82 | def my_solve(func, var_num, loop, ratios, **args): 83 | 84 | res_final = scipy.optimize.OptimizeResult(fun=100000, x=np.ones(var_num)) 85 | for i in range(loop): 86 | # initial values 87 | X = np.array([]) 88 | for ratio in ratios: 89 | X = np.concatenate((X, ratio * np.random.randn(1))) 90 | # to ensure s1 3: 92 | X[1] = X[1] if X[1] > X[0] else 2 * X[0] - X[1] 93 | X[2] = X[2] if X[2] > X[1] else 2 * X[1] - X[2] 94 | X[3] = X[3] if X[3] > X[2] else 2 * X[2] - X[3] 95 | # optimization 96 | res = optimize.minimize(func, X, **args) 97 | # find the best one 98 | if res.fun < res_final.fun: 99 | res_final = res 100 | print('in loop') 101 | print(res_final.fun) 102 | i += 1 103 | return res_final 104 | 105 | def sovle_0(e_or, e_new, loop): 106 | def func(x): 107 | return ((e_new[0](x[0], x[1], x[2])**2 - e_or[0]**2)**2 + 108 | (e_new[1](x[0], x[1], x[2])**2 - e_or[1]**2)**2 + 109 | (e_new[2](x[0], x[1], x[2]) - e_or[2])**2) 110 | 111 | cons = {'type': 'ineq', 'fun': lambda x: x[1] - x[0]} 112 | res = my_solve( 113 | func, 114 | var_num=3, 115 | loop=loop, 116 | ratios=[-0.5, 0.5, 0.5], 117 | method='SLSQP', 118 | constraints=cons, 119 | options={'ftol': 1e-30}, 120 | ) 121 | print('fomer-2 layer') 122 | print(res.fun, res.x) 123 | 124 | return res.x 125 | 126 | def sovle_1(e_or, e_new, loop): 127 | def func(x): 128 | return ((e_new[0](x[0], x[1], x[2], x[2] + x[1] - x[0])**2 - e_or[0]**2)**2+ 129 | (e_new[1](x[0], x[1], x[2], x[2] + x[1] - x[0])**2 - e_or[1]**2)**2 + 130 | (e_new[2](x[0], x[1], x[2], x[2] + x[1] - x[0]) - e_or[2])**2 + 131 | (np.sqrt(e_new[3](x[0], x[1], x[2], x[2] + x[1] - x[0])) - e_or[3])**2) 132 | 133 | cons = ({ 134 | 'type': 'ineq', 135 | 'fun': lambda x: x[1] - x[0] 136 | }, { 137 | 'type': 'ineq', 138 | 'fun': lambda x: x[2] - x[1] 139 | }, 140 | { 141 | 'type': 'ineq', 142 | 'fun': lambda x: x[2] - x[0] 143 | }, 144 | ) 145 | 146 | res = my_solve( 147 | func, 148 | var_num=3, 149 | loop=loop, 150 | ratios=[1, 1, 1], 151 | method='SLSQP', 152 | constraints=cons, 153 | options={'ftol': 1e-30}, 154 | ) 155 | print('last layer') 156 | print(res.fun, res.x) 157 | 158 | return [res.x[0], res.x[1], res.x[2], res.x[2] + res.x[1] - res.x[0]] -------------------------------------------------------------------------------- /code/compression/new_MNIST/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import torch.nn as nn #包含网络的基类,比如卷积层、池化层、激活函数、全连接层等,损失函数的调用 4 | from new_weight_act import new_act 5 | import matplotlib.pyplot as plt 6 | 7 | #定义一个类,这个类继承nn.module 8 | class FCnet(nn.Module): 9 | def __init__(self, input, f1, f2, f3, f4): 10 | super(FCnet,self).__init__() 11 | self.F1 = nn.Linear(input,f1) 12 | self.R1 = nn.ReLU() 13 | self.F2 = nn.Linear(f1,f2) 14 | self.R2 = nn.ReLU() 15 | self.F3 = nn.Linear(f2,f3) 16 | self.R3 = nn.ReLU() 17 | self.OUT = nn.Linear(f3,f4) 18 | self.S = nn.Softmax(dim=1) 19 | def forward(self,x): 20 | # x = x.T 21 | x = self.F1(x) 22 | x = self.R1(x) 23 | x = x / torch.sqrt(torch.tensor(self.F1.weight.size(0))) 24 | x = self.F2(x) 25 | x = self.R2(x) 26 | x = x / torch.sqrt(torch.tensor(self.F2.weight.size(0))) 27 | x = self.F3(x) 28 | x = self.R3(x) 29 | x = x / torch.sqrt(torch.tensor(self.F3.weight.size(0))) 30 | x = self.OUT(x) 31 | x = self.S(x) 32 | return x 33 | def initialize(self): 34 | for m in self.modules(): 35 | if isinstance(m,nn.Linear): 36 | nn.init.normal_(m.weight.data) 37 | # m.requires_grad_(True) 38 | def spar_init(self,kesi=0.5): 39 | spar_W = [] 40 | for m in self.modules(): 41 | if isinstance(m,nn.Linear): 42 | init = np.zeros(len(m.weight.data.flatten())) 43 | init[:round(1 / 2 * (1 - kesi) * init.size)] = 1 / np.sqrt(1 - kesi) 44 | init[round(1 / 2 * (1 - kesi) * init.size):2 *round(1 / 2 * (1 - kesi) * init.size)] = -1 / np.sqrt(1 - kesi) 45 | np.random.shuffle(init) 46 | init = torch.tensor(init.reshape(m.weight.data.shape)).float() 47 | m.weight = torch.nn.Parameter(init, requires_grad=True) 48 | spar_W.append(torch.abs(init * np.sqrt(1 - kesi))) 49 | return spar_W 50 | 51 | class model_init(nn.Module): 52 | def __init__(self, input, f1, f2, f3, f4, arg, bias = False): 53 | super(model_init,self).__init__() 54 | self.F1 = nn.Linear(input,f1) 55 | self.R1 = learnable_activation(arg[0]) 56 | self.F2 = nn.Linear(f1,f2) 57 | self.R2 = learnable_activation(arg[1]) 58 | self.F3 = nn.Linear(f2,f3) 59 | self.R3 = learnable_activation(arg[2]) 60 | self.OUT = nn.Linear(f3,f4) 61 | self.S = nn.Softmax(dim=1) 62 | def forward(self, x): 63 | x = self.F1(x) 64 | x = self.R1(x) 65 | x = x / torch.sqrt(torch.tensor(self.F1.weight.size(0))) 66 | x = self.F2(x) 67 | x = self.R2(x) 68 | x = x / torch.sqrt(torch.tensor(self.F2.weight.size(0))) 69 | x = self.F3(x) 70 | x = self.R3(x) 71 | x = x / torch.sqrt(torch.tensor(self.F3.weight.size(0))) 72 | 73 | # plt.clf() 74 | # plt.hist(x.detach().numpy().reshape(1,-1)[0],bins=100) 75 | # plt.savefig("plot/last_x") 76 | # plt.show() 77 | 78 | 79 | x = self.OUT(x) 80 | # x = x / torch.sqrt(torch.tensor(self.OUT.weight.size(0))) 81 | x = self.S(x) 82 | return x 83 | def initialize(self,kesi): 84 | for m in self.modules(): 85 | if isinstance(m,nn.Linear): 86 | init = np.zeros(len(m.weight.data.flatten())) 87 | init[:round(1 / 2 * (1 - kesi) * init.size)] = 1 / np.sqrt(1 - kesi) 88 | init[round(1 / 2 * (1 - kesi) * init.size):2 *round(1 / 2 * (1 - kesi) * init.size)] = -1 / np.sqrt(1 - kesi) 89 | np.random.shuffle(init) 90 | init = torch.tensor(init.reshape(m.weight.data.shape)).float() 91 | m.weight = torch.nn.Parameter(init, requires_grad=True) 92 | def spar_init(self,kesi=0.5): 93 | spar_W = [] 94 | for m in self.modules(): 95 | if isinstance(m,nn.Linear): 96 | init = np.zeros(len(m.weight.data.flatten())) 97 | init[:round(1 / 2 * (1 - kesi) * init.size)] = 1 / np.sqrt(1 - kesi) 98 | init[round(1 / 2 * (1 - kesi) * init.size):2 *round(1 / 2 * (1 - kesi) * init.size)] = -1 / np.sqrt(1 - kesi) 99 | np.random.shuffle(init) 100 | init = torch.tensor(init.reshape(m.weight.data.shape)).float() 101 | m.weight = torch.nn.Parameter(init, requires_grad=True) 102 | spar_W.append(torch.abs(init * np.sqrt(1 - kesi))) 103 | return spar_W 104 | 105 | class learnable_activation(nn.Module): 106 | def __init__(self, *arg): 107 | super(learnable_activation, self).__init__() 108 | self.l = len(arg[0]) 109 | if len(arg[0])==4: 110 | self.s1 = torch.nn.Parameter(torch.tensor(arg[0][0]), requires_grad=False) 111 | self.s2 = torch.nn.Parameter(torch.tensor(arg[0][1]), requires_grad=False) 112 | self.amp1 = torch.nn.Parameter(torch.tensor(arg[0][2]-arg[0][3]), requires_grad=True) 113 | self.amp2 = torch.nn.Parameter(torch.tensor(arg[0][2]-arg[0][3]), requires_grad=True) 114 | self.amp3 = torch.nn.Parameter(torch.tensor(-arg[0][3]), requires_grad=True) 115 | else: 116 | self.s1 = torch.nn.Parameter(torch.tensor(arg[0][0]), requires_grad=False) 117 | self.s2 = torch.nn.Parameter(torch.tensor(arg[0][1]), requires_grad=False) 118 | self.s3 = torch.nn.Parameter(torch.tensor(arg[0][2]), requires_grad=False) 119 | self.s4 = torch.nn.Parameter(torch.tensor(arg[0][3]), requires_grad=False) 120 | self.amp1 = torch.nn.Parameter(torch.tensor(arg[0][4]-arg[0][6]), requires_grad=True) 121 | self.amp2 = torch.nn.Parameter(torch.tensor(arg[0][5]-arg[0][6]), requires_grad=True) 122 | self.amp3 = torch.nn.Parameter(torch.tensor(arg[0][4]-arg[0][6]), requires_grad=True) 123 | self.amp4 = torch.nn.Parameter(torch.tensor(-arg[0][6]), requires_grad=True) 124 | self.amp5 = torch.nn.Parameter(torch.tensor(-arg[0][6]), requires_grad=True) 125 | def forward(self, input): 126 | if self.l == 4: 127 | return new_act.apply(input, self.s1, self.s2, self.amp1, self.amp2, self.amp3) 128 | else: 129 | return new_act.apply(input, self.s1, self.s2, self.s3, self.s4, self.amp1, self.amp2, self.amp3, self.amp4, self.amp5) 130 | 131 | def ternary_weights(initialization_way, kesi, W): 132 | init = np.zeros(len(W.flatten())) 133 | if initialization_way == 'ternary': 134 | init[:round(1 / 2 * (1 - kesi) * init.size)] = 1 / np.sqrt(1 - kesi) 135 | init[round(1 / 2 * (1 - kesi) * init.size):2 *round(1 / 2 * (1 - kesi) * init.size)] = -1 / np.sqrt(1 - kesi) 136 | # c = Counter(init) 137 | np.random.shuffle(init) 138 | ter_W = torch.tensor(init.reshape(W.shape)).float() 139 | return ter_W 140 | 141 | def act0(x, s1, s2, a): 142 | if (x < s1 or x > s2): 143 | y = a 144 | else: 145 | y = 0 146 | return y 147 | 148 | def act1(x, r1, r2, r3, r4, b1, b2): 149 | if (xr4): 150 | y = b1 151 | elif (x>=r2 and x<=r3): 152 | y = b2 153 | else: 154 | y = 0 155 | return y 156 | 157 | def sigma_torch(x, a): 158 | ''' 159 | define sigma function 160 | ''' 161 | a = a.to(torch.float32) 162 | return torch.heaviside(x - a, torch.tensor(0.0)) 163 | 164 | def binary0(s1, s2, b1, b2, b3): 165 | return lambda x: b1 * sigma_torch(-x, -s1) + b2 * sigma_torch(x, s2) + b3 * (((sigma_torch(-x, -s1) + sigma_torch(x, s2)) - 1)) 166 | 167 | def binary1(s1, s2, s3, s4, b1, b2, b3, b4, b5): 168 | return lambda x: b1 * sigma_torch(-x, -s1) + b3 * sigma_torch(x, s4) + b2 * (((sigma_torch(-x, -s3) + sigma_torch(x, s2)) - 1)) + b4 * (((sigma_torch(-x, -s1) + sigma_torch(x, s2)) - 1)) + b5 * (((sigma_torch(-x, -s3) + sigma_torch(x, s4)) - 1)) 169 | 170 | def pre_acc(y): 171 | if y > 0: 172 | y = 1 173 | else: 174 | y = -1 175 | return y 176 | 177 | class new_act(torch.autograd.Function): 178 | @staticmethod 179 | def forward(ctx, x, *arg): 180 | # ctx.save_for_backward(x, arg) 181 | l = len(arg) 182 | if l == 5: 183 | ctx.save_for_backward(x, arg[0], arg[1], arg[2], arg[3], arg[4]) 184 | act = binary0(arg[0], arg[1], arg[2], arg[3], arg[4]) 185 | else: 186 | ctx.save_for_backward(x, arg[0], arg[1], arg[2], arg[3], arg[4], arg[5], arg[6], arg[7], arg[8]) 187 | act = binary1(arg[0], arg[1], arg[2], arg[3], arg[4], arg[5], arg[6], arg[7], arg[8]) 188 | # aa = self.act 189 | return act(x) 190 | 191 | @staticmethod 192 | def backward(ctx, grad_output): 193 | arg = ctx.saved_tensors 194 | # grad_x = 1*grad_output.clone() 195 | l = len(arg) 196 | if l == 6: 197 | s1 = torch.sign(arg[5] - arg[3]) 198 | s2 = torch.sign(arg[4] - arg[5]) 199 | wx = gd_f(arg[0],arg[1],arg[2]) 200 | grad_x = wx*grad_output.clone() 201 | return grad_x, None, None, None, None, None 202 | else: 203 | wx = gd_l(arg[0],arg[1],arg[4]) 204 | grad_x = wx*grad_output.clone() 205 | return grad_x, None, None, None, None, None, None, None, None, None 206 | 207 | 208 | def gd_f(x,s1,s2,a1=None,a2=None): 209 | a = (s1+s2)/2 210 | # w = a * torch.ones(x.shape) 211 | # ww = torch.sign(x - w) 212 | # ww_d = a1 * (ww-1)/2 213 | # ww_g = a2 * (ww+1)/2 214 | # ww_out = ww_d + ww_g 215 | 216 | a1 = s1 + torch.abs(a) 217 | a2 = s2 - torch.abs(a) 218 | wl = 1 + torch.sign(a1 - x) 219 | wr = 1 + torch.sign(x - a2) 220 | ww = torch.abs(wl + wr - 2)/2 221 | 222 | return ww 223 | 224 | def gd_l(x,s1,s2,a1=None,a2=None): 225 | a = (s1+s2)/2 226 | # w = a * torch.ones(x.shape) 227 | # ww = torch.sign(x - w) 228 | # ww_d = a1 * (ww-1)/2 229 | # ww_g = a2 * (ww+1)/2 230 | # ww_out = ww_d + ww_g 231 | 232 | a1 = s1 + torch.abs(a) 233 | a2 = s2 - torch.abs(a) 234 | wl = 1 + torch.sign(a1 - x) 235 | wr = 1 + torch.sign(x - a2) 236 | ww = torch.abs(wl + wr - 2)/2 237 | 238 | 239 | return ww 240 | # test 241 | if __name__ == "__main__": 242 | model = FCnet() 243 | a = torch.randn(1, 1, 28, 28) 244 | b = model(a) 245 | print(b) 246 | -------------------------------------------------------------------------------- /code/compression/new_MNIST/new_weight_act.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | def ternary_weights(initialization_way, kesi, W): 5 | init = np.zeros(len(W.flatten())) 6 | if initialization_way == 'ternary': 7 | init[:round(1 / 2 * (1 - kesi) * init.size)] = 1 / np.sqrt(1 - kesi) 8 | init[round(1 / 2 * (1 - kesi) * init.size):2 *round(1 / 2 * (1 - kesi) * init.size)] = -1 / np.sqrt(1 - kesi) 9 | # c = Counter(init) 10 | np.random.shuffle(init) 11 | ter_W = torch.tensor(init.reshape(W.shape)).float() 12 | return ter_W 13 | 14 | 15 | def act0(x, s1, s2, a): 16 | if (x < s1 or x > s2): 17 | y = a 18 | else: 19 | y = 0 20 | return y 21 | 22 | def act1(x, r1, r2, r3, r4, b1, b2): 23 | if (xr4): 24 | y = b1 25 | elif (x>=r2 and x<=r3): 26 | y = b2 27 | else: 28 | y = 0 29 | return y 30 | 31 | def sigma_torch(x, a): 32 | ''' 33 | define sigma function 34 | ''' 35 | return torch.heaviside(x - a, torch.tensor(0.0)) 36 | 37 | def binary0(s1, s2, b1): 38 | return lambda x: b1 * (sigma_torch(x, s2) + sigma_torch(-x, -s1)) 39 | 40 | 41 | def binary1(s1, s2, s3, s4, b1, b2): 42 | return lambda x: b1 * (sigma_torch(-x, -s1) + sigma_torch(x, s4)) + b2 * (((sigma_torch(-x, -s3) + sigma_torch(x, s2)) - 1)) 43 | 44 | def pre_acc(y): 45 | if y > 0: 46 | y = 1 47 | else: 48 | y = -1 49 | return y 50 | 51 | class new_act(torch.autograd.Function): 52 | @staticmethod 53 | def forward(ctx, x, *arg): 54 | ctx.save_for_backward(x) 55 | l = len(arg[0]) 56 | if l == 3: 57 | act = binary0(arg[0][0], arg[0][1], arg[0][2]) 58 | else: 59 | act = binary1(arg[0][0], arg[0][1], arg[0][2], arg[0][3], arg[0][4], arg[0][5]) 60 | # aa = self.act 61 | return act(x) 62 | 63 | @staticmethod 64 | def backward(ctx, grad_output): 65 | x, = ctx.saved_tensors 66 | grad_x = 1*grad_output.clone() 67 | # grad_x = 1* 68 | return grad_x, None 69 | 70 | if __name__ == '__main__': 71 | print(act0(1,-1, 1,3)) -------------------------------------------------------------------------------- /code/compression/new_MNIST/performance_MNIST.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from model import FCnet, model_init 5 | from torch.utils.data import Dataset, DataLoader 6 | from GMM_data import gen_data, gen_stat, get_dataset, gd, OPT 7 | import matplotlib as mpl 8 | mpl.use('Agg') 9 | import matplotlib.pyplot as plt 10 | from matching import get_or_param, get_act0, get_act1, sovle_0, sovle_1 11 | 12 | 13 | if __name__ == '__main__': 14 | # os.environ['CUDA_VISIBLE_DEVICES'] = '0' 15 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 16 | epochs = 500 17 | batch_size = 256 18 | lr = 0.005 19 | L = 3 20 | 21 | '''data_prepare''' 22 | selected_target=[0,1,2,3,4,5,6,7,8,9] 23 | N_train = 50000 24 | N_test = 8000 25 | p = 784 26 | cs=[0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1] 27 | 28 | means, covs = gen_stat('means', selected_target=selected_target,T = N_train,p = p,cs=cs) 29 | X, Y = gen_data(testbase = 'MNIST', T=N_train, p=p, cs=cs, means=means, covs=covs, selected_target=selected_target, train=1) 30 | 31 | dataset = get_dataset(data = X,label = Y) 32 | train_dataset = DataLoader(dataset, batch_size=batch_size, shuffle=True) 33 | 34 | X_test, Y_test = gen_data(testbase = 'MNIST', T=N_test, p=p, cs=cs, means=means, covs=covs, selected_target=selected_target, train=0) 35 | dataset_test = get_dataset(data = X_test,label = Y_test) 36 | test_dataset = DataLoader(dataset_test, batch_size=N_test, shuffle=True) 37 | 38 | '''data stastics and scales of original model''' 39 | tau_0 = np.sqrt(np.mean(np.diag(X.T @ X))) 40 | 41 | 42 | '''ReLU()''' 43 | or_pa = get_or_param(L, 'ReLU', tau_0) 44 | 45 | # '''matching to get light_model's activation by NTK_LC''' 46 | act_param = [] 47 | tau_last = tau_0 48 | for i in range(0,L-1): 49 | new_param0 = get_act0(tau_last) 50 | a = sovle_0([or_pa[0][i], or_pa[1][i], or_pa[2][i]], new_param0, 1000) 51 | new_mean = new_param0[4](a[0],a[1],a[2]) 52 | tau_last = np.sqrt(new_param0[3](a[0],a[1],a[2])) 53 | act_param.append(np.append(a, new_mean)) 54 | 55 | b1 = np.sqrt((or_pa[2][L-1]**2 / or_pa[1][L-1]**2) + 4 * or_pa[3][L-1]**2) 56 | b2 = b1 57 | new_param1 = get_act1(b1,b2,tau_last) 58 | b = sovle_1([or_pa[0][L-1], or_pa[1][L-1], or_pa[2][L-1], or_pa[3][L]], new_param1, 1000) 59 | new_mean = new_param1[3](b[0],b[1],b[2],b[3]) 60 | b.append(b1) 61 | b.append(b2) 62 | act_param.append(np.append(b, new_mean)) 63 | 64 | 65 | # '''train''' 66 | loss_func = nn.CrossEntropyLoss() 67 | net1 = model_init(p, 512,512,1024, len(cs), act_param) 68 | # net1 = model_init(p, 1024,1024,2048, len(cs), act_param) 69 | # net1 = model_init(p, 2048,2048,4096, len(cs), act_param) 70 | # net1 = model_init(p, 256,256,512, len(cs), act_param) 71 | # net1 = model_init(p, 4096,4096,8192, len(cs), act_param) 72 | net1.to(device) 73 | net1.initialize(0) 74 | nn.init.normal_(net1.OUT.weight.data) 75 | optimizer1 = torch.optim.Adam(net1.OUT.parameters(), lr=lr) 76 | scheduler = torch.optim.lr_scheduler.StepLR(optimizer1, step_size=200, gamma=0.1) 77 | net1.train() 78 | t = np.arange(0,epochs,1) 79 | loss_record = [] 80 | acc = [] 81 | acct = [] 82 | for epoch in range(epochs): 83 | aa = 0 84 | for train_data, train_label in train_dataset: 85 | net1.train() 86 | net1.to(device) 87 | label_onehot = torch.zeros(train_label.shape[0], len(cs)).long().to(device) 88 | label_onehot.scatter_(dim=1,index=train_label.unsqueeze(dim=1).long().to(device),src=torch.ones(train_label.shape[0], len(cs)).long().to(device)) 89 | optimizer1.zero_grad() 90 | pre = net1(train_data.to(device)) 91 | ppre = (torch.argmax(pre,1)).to(torch.float32) 92 | loss = loss_func(pre, label_onehot.to(torch.float32).to(device)) 93 | loss.requires_grad_(True) 94 | loss.backward() 95 | optimizer1.step() 96 | aa += torch.sum(ppre==train_label.to(device)) 97 | 98 | # scheduler.step() 99 | loss_record.append(loss.item()) 100 | aa = aa/N_train 101 | acc.append(aa) 102 | 103 | net1.eval() 104 | aat = 0 105 | for test_data, test_label in test_dataset: 106 | pre = net1(test_data.to(device)) 107 | ppre = (torch.argmax(pre,1)).to(torch.float32) 108 | aat += torch.sum(ppre==test_label.to(device)) 109 | aat = aat/N_test 110 | acct.append(aat) 111 | if epoch%20 == 1: 112 | print("epoch:",epoch) 113 | print("train:", aa) 114 | print("test:", aat) 115 | print("new train done.") 116 | 117 | -------------------------------------------------------------------------------- /code/compression/new_MNIST/test: -------------------------------------------------------------------------------- 1 | add new folder 2 | -------------------------------------------------------------------------------- /code/equation_solve/__pycache__/solve_equation.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Model-Compression/Lossless_Compression/fc69e9376085f9dc528662c2d83e9d65748db4ca/code/equation_solve/__pycache__/solve_equation.cpython-38.pyc -------------------------------------------------------------------------------- /code/equation_solve/solve_equation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | '''Solve equation system to match coefficients each layer. 4 | 5 | Uing custom_activation_analysis in expect_calculate_math to calculate coefficient expressions with variables, 6 | match with coefficients calculated for origin network to determine variables values. 7 | Note: To use this module, see solve_equation. 8 | ''' 9 | __author__ = "Yongqi_Du" 10 | __copyright__ = "Copyright 2021, Lossless compression" 11 | __license__ = "GPL" 12 | __maintainer__ = "Rob Knight" 13 | __email__ = "rob@spot.colorado.edu" 14 | __status__ = "Development" # status is one of "Prototype", "Development", or "Production" 15 | __all__ = ['solve_equation'] 16 | import sys 17 | import os 18 | 19 | sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) 20 | import numpy as np 21 | import scipy 22 | import torch 23 | from scipy import optimize 24 | 25 | from utils.data_prepare import my_dataset_custome 26 | from expect_cal.expect_calculate import expect_calcu 27 | from expect_cal.expect_calculate_math import ( 28 | custome_activation_analysis, custome_activation_analysis_noparam) 29 | from model_define.model import My_Model 30 | from utils.utils import calculate_CK_tilde_coef, estim_tau_tensor 31 | 32 | device = "cuda:3" if torch.cuda.is_available() else "cpu" 33 | 34 | 35 | def calculate_CK_match_coef_full(model, tau_zero): 36 | '''calculate five expect(or squared) each layer(alpha1, alpha2, alpha3, alpha4, tau). 37 | Note: no variables, this function can be used only for a known network(without unknown variables for activation functions) 38 | 39 | Arguments: 40 | model -- model instance of class My_Model in model.py 41 | tau_zero -- tau_zero calculated with data 42 | Returns: 43 | d_full -- coefficients for all layers(a list) 44 | ''' 45 | tao_last = tau_zero 46 | d_full = [] 47 | for activation in model.activation_list: 48 | name = activation['name'] 49 | args = activation['args'] 50 | if args: 51 | if name in ['Binary_Zero', 'Binary_Last']: 52 | if name == 'Binary_Zero': 53 | ( 54 | _, 55 | first_order, 56 | second_order, 57 | square_second_order, 58 | tau, 59 | ) = custome_activation_analysis_noparam(name='binary_zero', 60 | **args) 61 | elif name == 'Binary_Last': 62 | ( 63 | _, 64 | first_order, 65 | second_order, 66 | square_second_order, 67 | tau, 68 | ) = custome_activation_analysis_noparam(name='binary_last', 69 | **args) 70 | 71 | else: 72 | ( 73 | _, 74 | first_order, 75 | second_order, 76 | square_second_order, 77 | tau, 78 | ) = expect_calcu(name, **args) 79 | else: 80 | ( 81 | _, 82 | first_order, 83 | second_order, 84 | square_second_order, 85 | tau, 86 | ) = expect_calcu(name) 87 | d1 = (first_order(tao_last))**2 88 | d2 = (second_order(tao_last))**2 89 | d3 = square_second_order(tao_last) 90 | tao_last = np.sqrt(tau(tao_last)) 91 | d_last = np.array([tao_last, d1, d2, d3]) 92 | d_full.append(d_last) 93 | print(d_full) 94 | return d_full 95 | 96 | 97 | def solve_equation(model, tau_zero, loop): 98 | '''Here just a test for all binary activation(first two layer, binary_zero; last layer, binary_last). 99 | 100 | Arguments: 101 | model: origin model 102 | tau_zero: tau calculated with all data 103 | loop: loop times for solve equations(each loop given a random initial value) 104 | Returns: 105 | params_full: binary_zero+args + binary_last+args (which can be use as activation_list when configure My_Model) 106 | ''' 107 | # calculate origin expects 108 | d_full = calculate_CK_match_coef_full(model, tau_zero) 109 | tau_last = tau_zero 110 | 111 | # for layers before last layer(we don't match tao for layers before last layer) 112 | ( 113 | _, 114 | first_order, 115 | second_order, 116 | square_second_order, 117 | tau, 118 | ) = custome_activation_analysis('binary_zero') 119 | params_full = [] 120 | for i in range(model.layer_num - 1): 121 | d1_origin, d2_origin, d3_origin = d_full[i][1], d_full[i][2], d_full[ 122 | i][3] 123 | 124 | def d1(s1, s2, b1): 125 | return first_order(s1, s2, b1, tau_last) 126 | 127 | def d2(s1, s2, b1): 128 | return second_order(s1, s2, b1, tau_last) 129 | 130 | def d3(s1, s2, b1): 131 | return square_second_order(s1, s2, b1, tau_last) 132 | 133 | def func(x): 134 | return ((d1(x[0], x[1], x[2])**2 - d1_origin)**2 + 135 | (d2(x[0], x[1], x[2])**2 - d2_origin)**2 + 136 | (d3(x[0], x[1], x[2]) - d3_origin)**2) 137 | 138 | cons = {'type': 'ineq', 'fun': lambda x: x[1] - x[0]} 139 | res = my_solve( 140 | func, 141 | var_num=3, 142 | loop=loop, 143 | ratios=[-0.5, 0.5, 0.5], 144 | method='SLSQP', 145 | constraints=cons, 146 | options={'ftol': 1e-30}, 147 | ) 148 | print(res.fun, res.x) 149 | 150 | # update tau_last(calculating expects each layer needs tau for last layer, so we need update tau_last here) 151 | tau_last = np.sqrt(tau(res.x[0], res.x[1], res.x[2], tau_last)) 152 | params_full.append({ 153 | 'name': 'Binary_Zero', 154 | 'args': { 155 | 's1': res.x[0], 156 | 's2': res.x[1], 157 | 'b1': res.x[2] 158 | }, 159 | }) 160 | 161 | # for last layer(we match tao for last layer) 162 | tau_origin, d1_origin, d2_origin, d3_origin = ( 163 | d_full[i + 1][0], 164 | d_full[i + 1][1], 165 | d_full[i + 1][2], 166 | d_full[i + 1][3], 167 | ) 168 | ( 169 | _, 170 | first_order, 171 | second_order, 172 | square_second_order, 173 | tau, 174 | ) = custome_activation_analysis('binary_last') 175 | 176 | # def t(s1, s2, s3, s4, b1, b2): 177 | # return np.sqrt(tau(s1, s2, s3, s4, b1, b2, tau_last)) 178 | 179 | # def d1(s1, s2, s3, s4, b1, b2): 180 | # return first_order(s1, s2, s3, s4, b1, b2, tau_last) 181 | 182 | # def d2(s1, s2, s3, s4, b1, b2): 183 | # return second_order(s1, s2, s3, s4, b1, b2, tau_last) 184 | 185 | # def d3(s1, s2, s3, s4, b1, b2): 186 | # return square_second_order(s1, s2, s3, s4, b1, b2, tau_last) 187 | 188 | b1 = np.sqrt((d3_origin**2 / d2_origin) + 4 * tau_origin**2) 189 | b2 = b1 190 | 191 | def t(s1, s2, s3, s4): 192 | return tau(s1, s2, s3, s4, b1, b2, tau_last) 193 | 194 | def d1(s1, s2, s3, s4): 195 | return first_order(s1, s2, s3, s4, b1, b2, tau_last) 196 | 197 | def d2(s1, s2, s3, s4): 198 | return second_order(s1, s2, s3, s4, b1, b2, tau_last) 199 | 200 | def d3(s1, s2, s3, s4): 201 | return square_second_order(s1, s2, s3, s4, b1, b2, tau_last) 202 | 203 | # print(d3(-0.19, 0.623, -0.1945, 0.538)) 204 | 205 | def func(x): 206 | return ((t(x[0], x[1], x[2], x[2] + x[1] - x[0]) - tau_origin**2)**2 + 207 | (d1(x[0], x[1], x[2], x[2] + x[1] - x[0])**2 - d1_origin)**2 + 208 | (d2(x[0], x[1], x[2], x[2] + x[1] - x[0])**2 - d2_origin)**2 + 209 | (d3(x[0], x[1], x[2], x[2] + x[1] - x[0]) - d3_origin)**2) 210 | 211 | cons = ({ 212 | 'type': 'ineq', 213 | 'fun': lambda x: x[1] - x[0] 214 | }, { 215 | 'type': 'ineq', 216 | 'fun': lambda x: x[2] - x[1] 217 | }, 218 | { 219 | 'type': 'ineq', 220 | 'fun': lambda x: x[2] - x[0] 221 | }, 222 | ) 223 | 224 | res = my_solve( 225 | func, 226 | var_num=3, 227 | loop=loop, 228 | ratios=[1, 1, 1], 229 | method='SLSQP', 230 | constraints=cons, 231 | options={'ftol': 1e-30}, 232 | ) 233 | print(res.fun, res.x) 234 | params_full.append({ 235 | 'name': 'Binary_Last', 236 | 'args': { 237 | 's1': res.x[0], 238 | 's2': res.x[1], 239 | 's3': res.x[2], 240 | 's4': res.x[2] + res.x[1] - res.x[0], 241 | 'b1': b1, 242 | 'b2': b2, 243 | }, 244 | }) 245 | print(params_full) 246 | return params_full 247 | 248 | 249 | def my_solve(func, var_num, loop, ratios, **args): 250 | '''Solve equations for loop times, each time a random initial value, and find the best one. 251 | 252 | Arguments: 253 | func: equations system 254 | var_num: number of variable in func 255 | loop: loop times (each time a random initial value) 256 | ratios: random value scaling value 257 | Returns: 258 | res_final: value and solutions for equtions of mnimize target over loop times 259 | ''' 260 | res_final = scipy.optimize.OptimizeResult(fun=100000, x=np.ones(var_num)) 261 | for i in range(loop): 262 | # initial values 263 | X = np.array([]) 264 | for ratio in ratios: 265 | X = np.concatenate((X, ratio * np.random.randn(1))) 266 | # to ensure s1 3: 268 | X[1] = X[1] if X[1] > X[0] else 2 * X[0] - X[1] 269 | X[2] = X[2] if X[2] > X[1] else 2 * X[1] - X[2] 270 | X[3] = X[3] if X[3] > X[2] else 2 * X[2] - X[3] 271 | # optimization 272 | res = optimize.minimize(func, X, **args) 273 | # find the best one 274 | if res.fun < res_final.fun: 275 | res_final = res 276 | print(res_final.fun) 277 | return res_final 278 | 279 | 280 | if __name__ == "__main__": 281 | 282 | cs = [0.5, 0.5] 283 | K = len(cs) 284 | # load data 285 | res = my_dataset_custome('MNIST', 286 | T_train=8000, 287 | T_test=1000, 288 | cs=cs, 289 | selected_target=[6, 8]) 290 | dataset_train, dataset_test = res[0], res[1] 291 | # dataset = my_dataset_custome('iid',T=8000, p=300, cs=[0.4,0.6],selected_target=[1,2]) 292 | 'to be done here new net configuration here just binaryzero and binary last' 293 | # activation_new = ['binary_zero', 'binary_last'] 294 | tau_zero = np.sqrt(estim_tau_tensor(dataset_train.X)) 295 | print(tau_zero) 296 | # tau_zero = np.sqrt(estim_tau(dataset.data)), these two are equal!!!!!!!! 297 | 298 | # origin network setting 299 | layer_num = 3 # layer number for network 300 | input_num = 784 # input dimension for network 784/256 301 | weight_num_list = [3000, 1000, 1000] # number for neurons for each layer 302 | activation_list = [ 303 | { 304 | 'name': 'ReLU', 305 | 'args': None 306 | }, { 307 | 'name': 'ReLU', 308 | 'args': None 309 | }, { 310 | 'name': 'ReLU', 311 | 'args': None 312 | } 313 | # { 314 | # 'name': 'LReLU', 315 | # 'args': { 316 | # 'coe1': 0.1, 317 | # 'coe2': 1 318 | # } 319 | # }, 320 | # { 321 | # 'name': 'LReLU', 322 | # 'args': { 323 | # 'coe1': 0.1, 324 | # 'coe2': 1 325 | # } 326 | # }, 327 | # { 328 | # 'name': 'LReLU', 329 | # 'args': { 330 | # 'coe1': 0.1, 331 | # 'coe2': 1 332 | # } 333 | # }, 334 | ] 335 | 336 | # define origin model 337 | model = My_Model( 338 | layer_num=layer_num, 339 | input_num=input_num, 340 | weight_num_list=weight_num_list, 341 | activation_list=activation_list, 342 | tau_zero=tau_zero, 343 | ) 344 | 345 | res = solve_equation(model, tau_zero, 2000) 346 | activation_list = res 347 | new_model = My_Model(layer_num=layer_num, 348 | input_num=input_num, 349 | weight_num_list=weight_num_list, 350 | activation_list=activation_list, 351 | tau_zero=tau_zero) 352 | 353 | # CK_origin = calculate_CK_tilde_coef(model, tau_zero) 354 | 355 | # CK_new = calculate_CK_tilde_coef(new_model, tau_zero) 356 | -------------------------------------------------------------------------------- /code/expect_cal/__pycache__/expect_calculate.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Model-Compression/Lossless_Compression/fc69e9376085f9dc528662c2d83e9d65748db4ca/code/expect_cal/__pycache__/expect_calculate.cpython-38.pyc -------------------------------------------------------------------------------- /code/expect_cal/__pycache__/expect_calculate_math.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Model-Compression/Lossless_Compression/fc69e9376085f9dc528662c2d83e9d65748db4ca/code/expect_cal/__pycache__/expect_calculate_math.cpython-38.pyc -------------------------------------------------------------------------------- /code/expect_cal/expect_calculate_math.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Calculate zero_order_expect, first_order_expect, second_order_expect, square_second_order_expect, tao_expect with it's expressions. 4 | 5 | This module can be used for custom activation, and calculate these five expect in a methematical way. 6 | (compared with expect_calculate in expect_calculate.py) 7 | If you want to add a new custom activation functions, you should calculate the expression of these five expect respectively, and 8 | then add the codes(expressions). 9 | """ 10 | 11 | __author__ = "Model_compression" 12 | __copyright__ = "Copyright 2022, Lossless compression" 13 | __credits__ = ["Yongqi Du"] 14 | __license__ = "GPL" 15 | __version__ = "1.0.1" 16 | __maintainer__ = "Yongqi" 17 | __email__ = "yongqi@hust.edu.cn" 18 | __status__ = "Development" 19 | __all__ = ['custome_activation_analysis', 20 | 'custome_activation_analysis_noparam'] 21 | # status is one of "Prototype", "Development", or "Production" 22 | 23 | import math 24 | import sympy as sp 25 | from sympy import oo 26 | 27 | 28 | def custome_activation_analysis(name): 29 | '''Calculate zero_order_expect, first_order_expect, second_order_expect, square_second_order_expect, tao_expect for a given activation name and args. 30 | 31 | Arguments: 32 | name -- activation name [binary_zero, binary_last]. You can add other custome activations here(remember to add codes). 33 | args -- args for activation function construction 34 | Returns: 35 | functions of zero_order_expect, first_order_expect, second_order_expect, square_second_order_expect, tao_expect with variables, 36 | which can be used for future equations solve. 37 | Notes: 38 | if the activation has parameters, pass kwargs(the key and the value), for example: activ = my_activation('binary_zero_nonparam', s1 = 1, s2 = 2, b1 = 1) 39 | ''' 40 | if name == 'binary_zero': 41 | zero_order = (lambda s1, s2, b1, tao_last: b1 / 2 * 42 | (math.erf(s1 / (pow(2, 1 / 2) * tao_last)) - math.erf(s2 / (pow(2, 1 / 2) * tao_last))) + b1) 43 | 44 | first_order = (lambda s1, s2, b1, tao_last: b1 * 45 | (math.exp(-pow(s2 / tao_last, 2) / 2) - math.exp(-pow(s1 / tao_last, 2) / 2)) / 46 | (pow(2 * math.pi, 1 / 2) * tao_last)) 47 | 48 | second_order = (lambda s1, s2, b1, tao_last: b1 * 49 | (s2 * math.exp(-pow(s2 / tao_last, 2) / 2) - s1 * math.exp(-pow(s1 / tao_last, 2) / 2)) / 50 | (pow(2 * math.pi, 1 / 2) * pow(tao_last, 3))) 51 | 52 | square_second_order = (lambda s1, s2, b1, tao_last: (pow(b1, 2) - 2 * b1 * zero_order(s1, s2, b1, tao_last)) * 53 | (s2 * math.exp(-pow(s2 / tao_last, 2) / 2) - s1 * math.exp(-pow(s1 / tao_last, 2) / 2)) / 54 | (pow(2 * math.pi, 1 / 2) * pow(tao_last, 3))) 55 | tau = (lambda s1, s2, b1, tao_last: (1 / 2 * pow(b1, 2)) * 56 | (math.erf(s1 / (pow(2, 1 / 2) * tao_last)) - math.erf(s2 / (pow(2, 1 / 2) * tao_last))) + pow(b1, 2) - pow( 57 | zero_order(s1, s2, b1, tao_last), 2)) 58 | # define sympy expressions 59 | # # wait to solve equation for 60 | # s1, s2, a1 = sp.symbols('s1,s2,a1', real=True) 61 | 62 | # x, y = sp.symbols('x,y', real=True) # gaussian var 63 | 64 | # # some coe related to tao 65 | # a, b, c = sp.symbols('a,b,c', real=True, positive=True) 66 | 67 | # R = (-x + b/c y) * sp.e 68 | # tau_prime = sp.lambdify((s1, s2, b1, tao_prime_last),) 69 | 70 | # alpha_prime = sp.lambdify((s1, s2, b1, a, b, c), sp.integrate(, (x, -oo, oo))) 71 | 72 | # d_prime = sp.lambdify((s1, s2, b1, a, b, c), sp.integrate(, (x, -oo, oo))) 73 | 74 | return zero_order, first_order, second_order, square_second_order, tau 75 | # , tau_prime, alpha_prime, d_prime 76 | 77 | elif name == 'binary_last': 78 | zero_order = (lambda s1, s2, s3, s4, b1, b2, tao_last: (b1 / 2) * 79 | (math.erf(s1 / (pow(2, 1 / 2) * tao_last)) - math.erf(s4 / (pow(2, 1 / 2) * tao_last))) + b1 + (b2 / 2) * 80 | (math.erf(s3 / (pow(2, 1 / 2) * tao_last)) - math.erf(s2 / (pow(2, 1 / 2) * tao_last)))) 81 | 82 | def first_order(s1, s2, s3, s4, b1, b2, tao_last): return b1 * (math.exp(-pow(s4 / tao_last, 2) / 2) - math.exp(-pow( 83 | s1 / tao_last, 2) / 2)) / (pow(2 * math.pi, 1 / 2) * tao_last) + b2 * (math.exp(-pow( 84 | s2 / tao_last, 2) / 2) - math.exp(-pow(s3 / tao_last, 2) / 2)) / (pow(2 * math.pi, 1 / 2) * tao_last) 85 | 86 | def second_order(s1, s2, s3, s4, b1, b2, tao_last): return b1 * (s4 * math.exp(-pow(s4 / tao_last, 2) / 2) - s1 * math.exp( 87 | -pow(s1 / tao_last, 2) / 2)) / (pow(2 * math.pi, 1 / 2) * pow(tao_last, 3)) + b2 * (s2 * math.exp(-pow( 88 | s2 / tao_last, 2) / 2) - s3 * math.exp(-pow(s3 / tao_last, 2) / 2)) / (pow(2 * math.pi, 1 / 2) * pow( 89 | tao_last, 3)) 90 | 91 | square_second_order = (lambda s1, s2, s3, s4, b1, b2, tao_last: b1**2 * 92 | (s4 * math.exp(-pow(s4 / tao_last, 2) / 2) - s1 * math.exp(-pow(s1 / tao_last, 2) / 2)) / 93 | (pow(2 * math.pi, 1 / 2) * pow(tao_last, 3)) + b2**2 * 94 | (s2 * math.exp(-pow(s2 / tao_last, 2) / 2) - s3 * math.exp(-pow(s3 / tao_last, 2) / 2)) / 95 | (pow(2 * math.pi, 1 / 2) * pow(tao_last, 3)) - 2 * zero_order( 96 | s1, s2, s3, s4, b1, b2, tao_last) * second_order(s1, s2, s3, s4, b1, b2, tao_last)) 97 | 98 | tau = (lambda s1, s2, s3, s4, b1, b2, tao_last: (b1**2 / 2) * 99 | (math.erf(s1 / (pow(2, 1 / 2) * tao_last)) - math.erf(s4 / (pow(2, 1 / 2) * tao_last))) + b1**2 + (b2**2 / 2) * 100 | (math.erf(s3 / (pow(2, 1 / 2) * tao_last)) - math.erf(s2 / (pow(2, 1 / 2) * tao_last))) - 101 | (zero_order(s1, s2, s3, s4, b1, b2, tao_last))**2) 102 | return zero_order, first_order, second_order, square_second_order, tau 103 | 104 | 105 | def custome_activation_analysis_noparam(name, **args): 106 | '''Calculate zero_order_expect, first_order_expect, second_order_expect, square_second_order_expect, tao_expect for a given activation name and args. 107 | 108 | Arguments: 109 | name -- activation name [binary_zero, binary_last, ReLU, Sign]. You can add other custome activations here(remember to add codes). 110 | args -- args for activation function construction 111 | Returns: 112 | values of zero_order_expect, first_order_expect, second_order_expect, square_second_order_expect, tao_expect.(no variables) 113 | Notes: 114 | if the activation has parameters, pass kwargs(the key and the value), for example: activ = my_activation('binary_zero_nonparam', s1 = 1, s2 = 2, b1 = 1) 115 | ''' 116 | if name == 'ReLU': 117 | def zero_order(tao_last): return tao_last / math.sqrt(2 * math.pi) 118 | 119 | def first_order(tao_last): return 1 / 2 120 | 121 | def second_order(tao_last): return 1 / \ 122 | (math.sqrt(2 * math.pi) * tao_last) 123 | 124 | def square_second_order(tao_last): return 1 - 1 / math.pi 125 | def tau(tao_last): return tao_last**2 * (1 / 2 - 1 / (2 * math.pi)) 126 | 127 | return zero_order, first_order, second_order, square_second_order, tau 128 | 129 | elif name == 'Sign': 130 | def zero_order(tao_last): return 0 131 | 132 | def first_order(tao_last): return 2 / \ 133 | (math.sqrt(2 * math.pi) * tao_last) 134 | 135 | def second_order(tao_last): return 0 136 | 137 | def square_second_order(tao_last): return 0 138 | def tau(tao_last): return 1 139 | 140 | return zero_order, first_order, second_order, square_second_order, tau 141 | 142 | elif name in ['binary_zero', 'binary_last']: 143 | ( 144 | zero_order, 145 | first_order, 146 | second_order, 147 | square_second_order, 148 | tau, 149 | ) = custome_activation_analysis(name) 150 | 151 | def zero_order_noparam(tao_last): return zero_order( 152 | **args, tao_last=tao_last) 153 | 154 | def first_order_noparam(tao_last): return first_order( 155 | **args, tao_last=tao_last) 156 | 157 | def second_order_noparam(tao_last): return second_order( 158 | **args, tao_last=tao_last) 159 | def square_second_order_noparam( 160 | tao_last): return square_second_order(**args, tao_last=tao_last) 161 | 162 | def tau_noparam(tao_last): return tau(**args, tao_last=tao_last) 163 | return ( 164 | zero_order_noparam, 165 | first_order_noparam, 166 | second_order_noparam, 167 | square_second_order_noparam, 168 | tau_noparam, 169 | ) 170 | -------------------------------------------------------------------------------- /code/fig/small/GMM-mixed_3200_784_none_rrr_[2000, 2000, 1000].png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Model-Compression/Lossless_Compression/fc69e9376085f9dc528662c2d83e9d65748db4ca/code/fig/small/GMM-mixed_3200_784_none_rrr_[2000, 2000, 1000].png -------------------------------------------------------------------------------- /code/fig/small/GMM-mixed_8000_4000_none_rrr_[2000, 2000, 1000].png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Model-Compression/Lossless_Compression/fc69e9376085f9dc528662c2d83e9d65748db4ca/code/fig/small/GMM-mixed_8000_4000_none_rrr_[2000, 2000, 1000].png -------------------------------------------------------------------------------- /code/hist_return/small/GMM-mixed_3200_784_none_rrr_[2000, 2000, 1000]: -------------------------------------------------------------------------------- 1 | ,bins1,n1 2 | 0,0.28546694597978084,3148.0 3 | 1,6.862161172512075,19.0 4 | 2,13.43885539904437,10.0 5 | 3,20.015549625576664,5.0 6 | 4,26.592243852108957,4.0 7 | 5,33.16893807864125,2.0 8 | 6,39.745632305173544,2.0 9 | 7,46.32232653170583,2.0 10 | 8,52.89902075823813,1.0 11 | 9,59.47571498477043,1.0 12 | 10,66.05240921130272,0.0 13 | 11,72.62910343783501,0.0 14 | 12,79.20579766436731,1.0 15 | 13,85.7824918908996,0.0 16 | 14,92.35918611743189,1.0 17 | 15,98.93588034396419,0.0 18 | 16,105.51257457049648,0.0 19 | 17,112.08926879702878,0.0 20 | 18,118.66596302356108,1.0 21 | 19,125.24265725009336,0.0 22 | 20,131.81935147662566,0.0 23 | 21,138.39604570315797,0.0 24 | 22,144.97273992969025,0.0 25 | 23,151.54943415622253,0.0 26 | 24,158.12612838275484,1.0 27 | 25,164.70282260928713,0.0 28 | 26,171.27951683581944,0.0 29 | 27,177.85621106235172,0.0 30 | 28,184.432905288884,0.0 31 | 29,191.0095995154163,0.0 32 | 30,197.5862937419486,0.0 33 | 31,204.1629879684809,1.0 34 | 32,210.7396821950132,0.0 35 | 33,217.31637642154547,0.0 36 | 34,223.89307064807778,0.0 37 | 35,230.46976487461006,0.0 38 | 36,237.04645910114237,0.0 39 | 37,243.62315332767466,0.0 40 | 38,250.19984755420694,0.0 41 | 39,256.77654178073925,0.0 42 | 40,263.35323600727156,0.0 43 | 41,269.9299302338039,0.0 44 | 42,276.5066244603362,0.0 45 | 43,283.08331868686844,0.0 46 | 44,289.66001291340075,0.0 47 | 45,296.23670713993306,0.0 48 | 46,302.8134013664653,0.0 49 | 47,309.3900955929976,0.0 50 | 48,315.96678981952994,0.0 51 | 49,322.5434840460622,1.0 52 | 50,329.1201782725945,0.0 53 | -------------------------------------------------------------------------------- /code/hist_return/small/GMM-mixed_8000_4000_none_rrr_[2000, 2000, 1000]: -------------------------------------------------------------------------------- 1 | ,bins1,n1 2 | 0,0.28603084262585593,4334.0 3 | 1,0.5571755223629797,613.0 4 | 2,0.8283202021001035,497.0 5 | 3,1.0994648818372275,417.0 6 | 4,1.3706095615743512,360.0 7 | 5,1.641754241311475,308.0 8 | 6,1.912898921048599,272.0 9 | 7,2.1840436007857225,239.0 10 | 8,2.4551882805228464,212.0 11 | 9,2.7263329602599704,184.0 12 | 10,2.997477639997094,161.0 13 | 11,3.268622319734218,137.0 14 | 12,3.539766999471342,114.0 15 | 13,3.8109116792084654,87.0 16 | 14,4.082056358945589,57.0 17 | 15,4.3532010386827125,7.0 18 | 16,4.624345718419836,0.0 19 | 17,4.89549039815696,0.0 20 | 18,5.166635077894084,0.0 21 | 19,5.4377797576312075,0.0 22 | 20,5.708924437368331,0.0 23 | 21,5.980069117105455,0.0 24 | 22,6.251213796842579,0.0 25 | 23,6.522358476579703,0.0 26 | 24,6.793503156316827,0.0 27 | 25,7.06464783605395,0.0 28 | 26,7.335792515791074,0.0 29 | 27,7.606937195528198,0.0 30 | 28,7.878081875265322,0.0 31 | 29,8.149226555002446,0.0 32 | 30,8.42037123473957,0.0 33 | 31,8.691515914476694,0.0 34 | 32,8.962660594213817,0.0 35 | 33,9.23380527395094,0.0 36 | 34,9.504949953688065,0.0 37 | 35,9.776094633425188,0.0 38 | 36,10.047239313162313,0.0 39 | 37,10.318383992899436,0.0 40 | 38,10.58952867263656,0.0 41 | 39,10.860673352373684,0.0 42 | 40,11.131818032110807,0.0 43 | 41,11.402962711847932,0.0 44 | 42,11.674107391585055,0.0 45 | 43,11.945252071322178,0.0 46 | 44,12.216396751059303,0.0 47 | 45,12.487541430796426,0.0 48 | 46,12.758686110533551,0.0 49 | 47,13.029830790270674,0.0 50 | 48,13.3009754700078,0.0 51 | 49,13.572120149744922,1.0 52 | 50,13.843264829482045,0.0 53 | -------------------------------------------------------------------------------- /code/model_define/__pycache__/model.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Model-Compression/Lossless_Compression/fc69e9376085f9dc528662c2d83e9d65748db4ca/code/model_define/__pycache__/model.cpython-38.pyc -------------------------------------------------------------------------------- /code/model_define/model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | '''Custome module with config, you can config with configuration file. 4 | 5 | See class My_Model for more details. 6 | ''' 7 | __author__ = "Yongqi_Du" 8 | __copyright__ = "Copyright 2021, Lossless compression" 9 | __license__ = "GPL" 10 | __maintainer__ = "Rob Knight" 11 | __email__ = "rob@spot.colorado.edu" 12 | __status__ = "Development" # status is one of "Prototype", "Development", or "Production" 13 | __all__ = ['My_Model'] 14 | 15 | import sys 16 | import os 17 | 18 | sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) 19 | import numpy as np 20 | import torch 21 | import torch.nn as nn 22 | 23 | from utils.activation_tensor import my_activation_torch 24 | from expect_cal.expect_calculate import expect_calcu 25 | from expect_cal.expect_calculate_math import custome_activation_analysis_noparam 26 | 27 | device = "cuda:1" if torch.cuda.is_available() else "cpu" 28 | 29 | 30 | class My_Model(nn.Module): 31 | 32 | def __init__(self, layer_num, input_num, weight_num_list, activation_list, 33 | tau_zero) -> None: 34 | """Custome module with config, you can config with configuration file. 35 | 36 | Arguments: 37 | layer_num -- number of layer(depth of DNN) 38 | input_num -- data dimension(input layer neuron number) 39 | weight_num_list -- number of neurons in each layer(without input layer) 40 | activation_list -- list of activation functions 41 | tau_zero -- tau eatimated from input data 42 | """ 43 | super().__init__() 44 | self.input_num = input_num 45 | self.layer_num = layer_num 46 | self.activation_list = activation_list 47 | self.weight_num_list = weight_num_list 48 | self.activation_list = activation_list 49 | self.fc_layers = [] 50 | self.act_layers = [] 51 | self.tau_zero = tau_zero 52 | 53 | bias = [] 54 | tau_last = self.tau_zero 55 | for i in range(layer_num): 56 | # fc_layer 57 | if i == 0: 58 | self.fc_layers.append( 59 | nn.Linear(input_num, weight_num_list[i], bias=False)) 60 | else: 61 | self.fc_layers.append( 62 | nn.Linear(weight_num_list[i - 1], 63 | weight_num_list[i], 64 | bias=False)) 65 | 66 | # activation_layer 67 | if activation_list[i]['args']: 68 | activ = my_activation_torch(activation_list[i]['name'], 69 | **activation_list[i]['args']) 70 | self.act_layers.append(activ) 71 | else: 72 | activ = my_activation_torch(activation_list[i]['name']) 73 | self.act_layers.append(activ) 74 | name_activ = self.activation_list[i]['name'] 75 | args_activ = self.activation_list[i]['args'] 76 | 77 | # recursive(calculate zero_order, "centering" 78 | # what we have calculated expressions use custome_activation_analysis_noparam, other simple activation functions use expect_calculate, which use quad functions) 79 | if args_activ: 80 | if name_activ == 'Binary_Zero': 81 | zero_order, _, _, _, tau_square = custome_activation_analysis_noparam( 82 | 'binary_zero', **args_activ) 83 | elif name_activ == 'Binary_Last': 84 | zero_order, _, _, _, tau_square = custome_activation_analysis_noparam( 85 | 'binary_last', **args_activ) 86 | else: 87 | zero_order, _, _, _, tau_square = expect_calcu( 88 | name_activ, **args_activ) 89 | else: 90 | if name_activ in ['ReLU', 'Sign']: 91 | zero_order, _, _, _, tau_square = custome_activation_analysis_noparam( 92 | name_activ) 93 | else: 94 | zero_order, _, _, _, tau_square = expect_calcu(name_activ) 95 | d0_last = zero_order(tau_last) 96 | bias.append(d0_last) 97 | tau_last = np.sqrt(tau_square(tau_last)) 98 | self.bias = torch.tensor(bias) 99 | 100 | def forward(self, X): 101 | X = X.float().to(device) 102 | for i in range(len(self.fc_layers)): 103 | # fc_layer 104 | self.fc_layers[i].to(device) 105 | X = self.fc_layers[i](X) 106 | # activation_layer 107 | self.act_layers[i].to(device) 108 | X = self.act_layers[i](X) 109 | # centering(minus zero_order) 110 | self.bias.to(device) 111 | X = X - self.bias[i] 112 | # normalizes 113 | X = 1 / torch.sqrt(torch.tensor(self.weight_num_list[i])) * X 114 | return X 115 | 116 | 117 | if __name__ == "__main__": 118 | tau_zero = 1 119 | layer_num = 3 # layer number for network 120 | input_num = 784 # input dimension for network 784/256 121 | weight_num_list = [3000, 1000, 1000] # number for neurons for each layer 122 | activation_list = [ 123 | { 124 | 'name': 'LReLU', 125 | 'args': { 126 | 'coe1': 0.1, 127 | 'coe2': 1 128 | } 129 | }, 130 | { 131 | 'name': 'LReLU', 132 | 'args': { 133 | 'coe1': 0.1, 134 | 'coe2': 1 135 | } 136 | }, 137 | { 138 | 'name': 'LReLU', 139 | 'args': { 140 | 'coe1': 0.1, 141 | 'coe2': 1 142 | } 143 | }, 144 | ] 145 | # define origin model 146 | model = My_Model( 147 | layer_num=layer_num, 148 | input_num=input_num, 149 | weight_num_list=weight_num_list, 150 | activation_list=activation_list, 151 | tau_zero=tau_zero, 152 | ) 153 | -------------------------------------------------------------------------------- /code/model_new: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Model-Compression/Lossless_Compression/fc69e9376085f9dc528662c2d83e9d65748db4ca/code/model_new -------------------------------------------------------------------------------- /code/model_origin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Model-Compression/Lossless_Compression/fc69e9376085f9dc528662c2d83e9d65748db4ca/code/model_origin -------------------------------------------------------------------------------- /code/model_vgg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Model-Compression/Lossless_Compression/fc69e9376085f9dc528662c2d83e9d65748db4ca/code/model_vgg -------------------------------------------------------------------------------- /code/spectral_characteristics/__pycache__/plot_eigen.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Model-Compression/Lossless_Compression/fc69e9376085f9dc528662c2d83e9d65748db4ca/code/spectral_characteristics/__pycache__/plot_eigen.cpython-38.pyc -------------------------------------------------------------------------------- /code/spectral_characteristics/plot_eigen.py: -------------------------------------------------------------------------------- 1 | '''Plot eigen value distribution for two matrix, 2 | and the eigen vector corresponding to the top eigen value.''' 3 | 4 | import matplotlib 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | 8 | # matplotlib.use('agg') 9 | import os 10 | 11 | import pandas as pd 12 | 13 | 14 | def plot_eigen(M1, M2, setting): 15 | """plot and save igenvalue distributions and data points for matrix1 and matrix2. 16 | 17 | Arguments: 18 | M1 -- matrix1 19 | M2 -- matrix2 20 | setting -- some setting for name_generation 21 | """ 22 | root = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 23 | dir1 = save_path_small = os.path.join(root, 'fig\\small') 24 | if not os.path.isdir(dir1): 25 | os.makedirs(dir1) 26 | dir2 = save_path_small = os.path.join(root, 'hist_return\\small') 27 | if not os.path.isdir(dir2): 28 | os.makedirs(dir2) 29 | dir3 = save_path_small = os.path.join(root, 'plot_return') 30 | if not os.path.isdir(dir3): 31 | os.makedirs(dir3) 32 | # plot and save fig and data points 33 | save_path_small = os.path.join( 34 | root, 'fig\\small', ''.join( 35 | (setting['data'], '_', setting['T'], '_', setting['p'], '_', 36 | setting['loop'], '_', setting['activation'], '_', 37 | setting['weight_num_list']))) 38 | save_path_small_data = os.path.join( 39 | root, 'hist_return\\small', ''.join( 40 | (setting['data'], '_', setting['T'], '_', setting['p'], '_', 41 | setting['loop'], '_', setting['activation'], '_', 42 | setting['weight_num_list']))) 43 | save_path_vector_data = os.path.join( 44 | root, 'plot_return', ''.join( 45 | (setting['data'], '_', setting['T'], '_', setting['p'], '_', 46 | setting['loop'], '_', setting['activation'], '_', 47 | setting['weight_num_list']))) 48 | 49 | U_Phi_c, D_Phi_c, _ = np.linalg.svd(M1) 50 | tilde_U_Phi_c, tilde_D_Phi_c, _ = np.linalg.svd(M2) 51 | 52 | # plot eigenvalue distribution for two matrix and save 53 | plt.figure(1) 54 | plt.subplot(211) 55 | xs = (min(min(D_Phi_c), 56 | min(tilde_D_Phi_c)), max(max(D_Phi_c), max(tilde_D_Phi_c))) 57 | n1, bins1, _, = plt.hist(D_Phi_c, 58 | 50, 59 | facecolor='b', 60 | alpha=0.5, 61 | rwidth=0.5, 62 | range=xs, 63 | label='Eigenvalues of $\Phi_c$') 64 | n2, bins2, _, = plt.hist(tilde_D_Phi_c, 65 | 50, 66 | facecolor='r', 67 | alpha=0.5, 68 | rwidth=0.5, 69 | range=xs, 70 | label='Eigenvalues of $\~\Phi_c$') 71 | 72 | eigen_value_data_hist = pd.DataFrame.from_dict({ 73 | 'bins1': bins1, 74 | 'n1': np.append(n1, 0), 75 | 'bins2': bins2, 76 | 'n2': np.append(n2, 0) 77 | }) 78 | with open(save_path_small_data, 'w+') as f: 79 | eigen_value_data_hist.to_csv(f) 80 | plt.legend() 81 | 82 | # plot eigenvector corresponding to top eigen value and save 83 | plt.subplot(212) 84 | pl1, = plt.plot(U_Phi_c[:, 0], 85 | 'b', 86 | label='Leading eigenvector of $\Phi_c$') 87 | pl2, = plt.plot(tilde_U_Phi_c[:, 0] * 88 | np.sign(U_Phi_c[1, 0] * tilde_U_Phi_c[1, 0]), 89 | 'r--', 90 | label='Leading eigenvector of $\~\Phi_c$') 91 | 92 | eigen_vector_data_hist = pd.DataFrame.from_dict({ 93 | 'pl1': 94 | U_Phi_c[:, 0], 95 | 'pl2': 96 | tilde_U_Phi_c[:, 0] * np.sign(U_Phi_c[1, 0] * tilde_U_Phi_c[1, 0]) 97 | }) 98 | with open(save_path_vector_data, 'w+') as f: 99 | eigen_vector_data_hist.to_csv(f) 100 | 101 | plt.show() 102 | 103 | plt.savefig(save_path_small) 104 | -------------------------------------------------------------------------------- /code/spectral_characteristics/tilde_CK.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | '''Verify consistency for spectral distrubutions of CK and CK_tilde. 4 | ''' 5 | import sys 6 | import os 7 | 8 | sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) 9 | import numpy as np 10 | import scipy 11 | import torch 12 | import torch.nn as nn 13 | from torch.utils.data import DataLoader 14 | from utils.data_prepare import my_dataset_custome 15 | from expect_cal.expect_calculate import expect_calcu 16 | from model_define.model import My_Model 17 | from utils.utils import estim_tau_tensor 18 | from plot_eigen import plot_eigen 19 | 20 | device = "cuda" if torch.cuda.is_available() else "cpu" 21 | 22 | 23 | def calculate_CK(model, data, mode='normal'): 24 | """Calculate model_output * model_output.T for one time.(you can choose weight initialization: student t or normal) 25 | 26 | Arguments: 27 | model -- model instace of class MyModel 28 | data -- all data(used to calculate output) 29 | mode -- initilization type(student t or normal) 30 | Returns: 31 | Phi_loop -- output * output.T 32 | """ 33 | for j in range(model.layer_num): 34 | with torch.no_grad(): 35 | if mode == 'student t': 36 | # student t 37 | if j == 0: 38 | r = scipy.stats.t.rvs(50.0, 39 | size=(model.weight_num_list[j], 40 | model.input_num)) 41 | else: 42 | r = scipy.stats.t.rvs(50.0, 43 | size=(model.weight_num_list[j], 44 | model.weight_num_list[j - 1])) 45 | model.fc_layers[j].weight = nn.Parameter( 46 | torch.tensor(r).float(), requires_grad=False) 47 | elif mode == 'normal': 48 | # normal 49 | nn.init.normal_(model.fc_layers[j].weight) 50 | model.fc_layers[j].weight.requires_grad = False 51 | elif mode == 'bernoulli': 52 | # bernoulli 53 | kesi = 0.5 54 | init = np.zeros(model.fc_layers[j].weight.shape).flatten() 55 | init[:round(1 / 2 * (1 - kesi) * 56 | init.size)] = 1 / np.sqrt(1 - kesi) 57 | init[round(1 / 2 * (1 - kesi) * init.size):2 * 58 | round(1 / 2 * 59 | (1 - kesi) * init.size)] = -1 / np.sqrt(1 - kesi) 60 | np.random.shuffle(init) 61 | init = torch.tensor( 62 | init.reshape(model.fc_layers[j].weight.shape)).float() 63 | with torch.no_grad(): 64 | model.fc_layers[j].weight = torch.nn.Parameter( 65 | init, requires_grad=False) 66 | 67 | with torch.no_grad(): 68 | out = model(data).detach().cpu().numpy() 69 | Phi_loop = out @ out.T 70 | return Phi_loop 71 | 72 | 73 | def calculate_CK_loop(model, data, loop): 74 | """Calculate model_output * model_output.T for loop times. 75 | 76 | Arguments: 77 | model -- model instace of class MyModel 78 | data -- all data(used to calculate output) 79 | loop -- loop times 80 | Returns: 81 | Phi_loop -- means of output * output.T over loop times 82 | """ 83 | mode = 'normal' # you can change to student t / bernoulli here. 84 | [n, _] = data.shape 85 | Phi = np.zeros((n, n)) 86 | for i in range(loop): 87 | print(i) 88 | r = calculate_CK(model, data, mode) 89 | Phi = Phi + r 90 | Phi = Phi / loop 91 | # print(Phi) 92 | 93 | return Phi 94 | 95 | 96 | def calculate_CK_tilde_coef(model, tau_zero): 97 | '''calculate coefficients each layer(alpha1, alpha2, alpha3, alpha4, tau) and print. 98 | Note: no variables, this function can be used only for a known network(without unknown variables for activation functions) 99 | 100 | Arguments: 101 | model -- model instance of class My_Model in model.py 102 | tau_zero -- tau_zero calculated with data 103 | Returns: 104 | d_last -- alpha1, alpha2, alpha3, alpha4, tau for last layer, which can be used to calculate CK_tilde 105 | ''' 106 | tao_last = tau_zero 107 | d_last = np.array([tao_last, 1, 0, 0, 1]) # input d1, d2, d3, d4 108 | for activation in model.activation_list: 109 | name = activation['name'] 110 | args = activation['args'] 111 | if args: 112 | zero_order, first_order, second_order, square_second_order, tau = expect_calcu( 113 | name, **args) 114 | else: 115 | zero_order, first_order, second_order, square_second_order, tau = expect_calcu( 116 | name) 117 | temp = zero_order(tao_last) 118 | print(temp) 119 | d1 = first_order(tao_last)**2 * d_last[1] 120 | d2 = first_order(tao_last)**2 * d_last[2] + 1 / 4 * second_order( 121 | tao_last)**2 * d_last[4]**2 122 | d3 = first_order(tao_last)**2 * d_last[3] + 1 / 2 * second_order( 123 | tao_last)**2 * d_last[1]**2 124 | d4 = 1 / 2 * square_second_order(tao_last) * d_last[4] 125 | tao_last = np.sqrt(tau(tao_last)) 126 | d_last = np.array([tao_last, d1, d2, d3, d4]) 127 | print(d1, d2, d3, d4) 128 | print(tao_last) 129 | return d_last 130 | 131 | 132 | def calculate_CK_tilde(model, tau_zero, X, T, K, p, means, covs, y, Omega): 133 | """Calculate CK_tilde using expressions we derived. 134 | 135 | Arguments: 136 | model -- model instance of class MyModel 137 | tau_zero -- tau_zero calculated using all data 138 | X -- data input 139 | T -- number of data 140 | K -- number of class 141 | p -- dimension of data 142 | means -- means for different classes 143 | covs -- covs for different classes 144 | y -- labels 145 | Omega -- data - means 146 | Returns: 147 | tilde_Phi -- calculated CK_tilde using expressions we derived 148 | """ 149 | d_last = calculate_CK_tilde_coef(model, tau_zero) 150 | M = np.array([]).reshape(p, 0) 151 | t0 = [] 152 | J = np.zeros((T, K)) 153 | 154 | for i in range(K): 155 | M = np.concatenate((M, means[i].reshape(p, 1)), axis=1) 156 | t0.append(np.trace(covs[i]) / p) 157 | J[:, i] = (y == i) * 1 158 | 159 | phi = np.diag(Omega.T @ Omega - J @ t0) 160 | t = (t0 - tau_zero**2) * np.sqrt(p) 161 | S = np.zeros((K, K)) 162 | for i in range(K): 163 | for j in range(K): 164 | S[i, j] = np.trace(covs[i] @ covs[j]) / p 165 | 166 | V = np.concatenate((J / np.sqrt(p), phi.reshape(T, 1)), 167 | axis=1) # whats omega here for 168 | A11 = d_last[2] * np.outer(t, t) + d_last[3] * S 169 | A = np.zeros((K + 1, K + 1)) 170 | A[0:K, 0:K] = A11 171 | A[0:K, K] = d_last[2] * t 172 | A[K, 0:K] = d_last[2] * t.T 173 | A[K, K] = d_last[2] 174 | 175 | tilde_Phi = d_last[1] * (X) @ (X.T) + V @ A @ (V.T) + ( 176 | d_last[0]**2 - d_last[1] * tau_zero**2 - d_last[3] * 177 | tau_zero**4) * np.eye(T) # check tau and tau**2 ,am i right here? 178 | 179 | print(d_last) 180 | 181 | return tilde_Phi 182 | 183 | 184 | if __name__ == "__main__": 185 | 186 | cs = [0.5, 0.5] 187 | K = len(cs) 188 | # load data 189 | res = my_dataset_custome('mixed', T_train=8000, T_test=0, cs=cs, p=4650) 190 | # res = my_dataset_custome('MNIST', 191 | # T_train=3200, 192 | # T_test=0, 193 | # cs=cs, 194 | # selected_target=[6, 8]) 195 | dataset_train = res[0] 196 | X, T, K, p, means, covs, y, Omega = dataset_train.X, res[6], res[4], res[ 197 | 5], res[2], res[3], dataset_train.Y, res[8] 198 | 199 | train_loader = DataLoader(dataset_train, 200 | batch_size=len(dataset_train), 201 | shuffle=False) 202 | data_inference, _ = next(iter(train_loader)) 203 | 204 | tau_zero = np.sqrt(estim_tau_tensor(X)) 205 | print(tau_zero) 206 | 207 | # origin network setting 208 | layer_num = 3 # layer number for network 209 | input_num = 4650 # input dimension for network 784/256 210 | # number for neurons for each layer 211 | weight_num_list = [2000, 2000, 1000] 212 | activation_list = [ 213 | # {'name' : 'Sigmoid', 'args' : None}, 214 | # {'name' : 'Binary_Zero', 'args' : {'s1':1, 's2': 2, 'b1': 1}}, 215 | # { 216 | # 'name': 'Poly2', 217 | # 'args': { 218 | # 'coe1': 0.2, 219 | # 'coe2': 1, 220 | # 'coe3': 0 221 | # } 222 | # }, 223 | { 224 | 'name': 'ReLU', 225 | 'args': None 226 | }, 227 | { 228 | 'name': 'ReLU', 229 | 'args': None 230 | }, 231 | # {'name' : 'poly2', 'args' : {'coe1': 1, 'coe2': 1 , 'coe3': 1}}, 232 | # {'name' : 'Sigmoid', 'args' : None} 233 | { 234 | 'name': 'ReLU', 235 | 'args': None 236 | } # activation for each layer, if with param, write as Binary_Zero here 237 | ] 238 | 239 | # define origin model 240 | model = My_Model(layer_num=layer_num, 241 | input_num=input_num, 242 | weight_num_list=weight_num_list, 243 | activation_list=activation_list, 244 | tau_zero=tau_zero) 245 | 246 | loop = 500 247 | # calculate two CK_tilde 248 | CK_tilde = calculate_CK_tilde(model, tau_zero, X, T, K, p, means, covs, y, 249 | Omega) 250 | # CK_new = calculate_CK_tilde(new_model, tau_zero) 251 | 252 | CK_loop = calculate_CK_loop(model, data_inference, loop=loop) 253 | 254 | error_norm = scipy.linalg.norm(CK_tilde - CK_loop, ord=2) 255 | CK_tilde_norm = scipy.linalg.norm(CK_tilde, ord=2) 256 | CK_loop_norm = scipy.linalg.norm(CK_loop, ord=2) 257 | print(error_norm) 258 | print(CK_tilde_norm) 259 | print(CK_loop_norm) 260 | 261 | # a = scipy.linalg.norm(CK_tilde, CK_new, ord=2) 262 | # print(a) 263 | # performance calculate 264 | 265 | # CK_loop = CK_tilde 266 | # CK_tilde = CK_loop 267 | 268 | setting = { 269 | 'data': 'GMM-mixed', 270 | 'T': str(T), 271 | 'p': str(p), 272 | 'layer_num': str(model.layer_num), 273 | 'loop': str(loop), 274 | 'activation': 'rrr', 275 | 'weight_num_list': str(weight_num_list) 276 | } 277 | plot_eigen(CK_tilde, CK_loop, setting=setting) 278 | -------------------------------------------------------------------------------- /code/spectral_characteristics/tilde_ntk.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | '''Verify consistency for spectral distrubutions of CK and CK_tilde. 4 | ''' 5 | import sys 6 | import os 7 | 8 | sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) 9 | import numpy as np 10 | import scipy 11 | import torch 12 | import torch.nn as nn 13 | from torch.utils.data import DataLoader 14 | from utils.data_prepare import my_dataset_custome 15 | from expect_cal.expect_calculate import expect_calcu 16 | from model_define.model import My_Model 17 | from utils.utils import estim_tau_tensor 18 | from plot_eigen import plot_eigen 19 | 20 | 21 | import matplotlib 22 | import matplotlib.pyplot as plt 23 | import pandas as pd 24 | 25 | from functorch import make_functional, vmap, vjp, jvp, jacrev 26 | 27 | device = "cuda" if torch.cuda.is_available() else "cpu" 28 | 29 | 30 | 31 | def calculate_NTK_tilde_coef(model, tau_zero): 32 | '''calculate coefficients each layer(beta1, beta2, beta3, tau, prime_tau) and print. 33 | Note: no variables, this function can be used only for a known network(without unknown variables for activation functions) 34 | 35 | Arguments: 36 | model -- model instance of class My_Model in model.py 37 | tau_zero -- tau_zero calculated with data 38 | Returns: 39 | beta_last -- beta1, beta2, beta3, tau, and prime_tau for last layer, which can be used to calculate NTK_tilde 40 | ''' 41 | tao_last = tau_zero 42 | kappa_last_square = tau_zero # prime_tau defined for L bigger than 1 43 | beta_last = np.array([tao_last, 1, 0, 0, kappa_last_square]) # input beta1, beta2, beta3, beta4 44 | alpha_last = np.array([tao_last, 1, 0, 0, 1]) # input alpha1, alpha2, alpha3, alpha4 45 | 46 | for activation in model.activation_list: 47 | name = activation['name'] 48 | args = activation['args'] 49 | if args: 50 | zero_order, first_order, second_order, square_second_order, tau, prime_tau = expect_calcu( 51 | name, prime_tau_cal=True, **args) 52 | else: 53 | zero_order, first_order, second_order, square_second_order, tau, prime_tau = expect_calcu( 54 | name, prime_tau_cal=True) 55 | temp = zero_order(tao_last) 56 | print(temp) 57 | # calculation for alpha 58 | alpha1 = first_order(tao_last)**2 * alpha_last[1] 59 | alpha2 = first_order(tao_last)**2 * alpha_last[2] + 1 / 4 * second_order( 60 | tao_last)**2 * alpha_last[4]**2 61 | alpha3 = first_order(tao_last)**2 * alpha_last[3] + 1 / 2 * second_order( 62 | tao_last)**2 * alpha_last[1]**2 63 | alpha4 = 1 / 2 * square_second_order(tao_last) * alpha_last[4] 64 | 65 | # calculation for beta 66 | dot_alpha0 = first_order(tao_last)**2 67 | dot_alpha1 = second_order(tao_last)**2 * alpha_last[1] 68 | beta1 = alpha1 + beta_last[1] * dot_alpha0 69 | beta2 = alpha2 + beta_last[2] * dot_alpha0 70 | beta3 = alpha3 + beta_last[3] * dot_alpha0 + beta_last[1] * dot_alpha1 71 | 72 | # calculate and iterate tao_last 73 | tao_last = np.sqrt(tau(tao_last)) 74 | 75 | # calculate for kappa 76 | prime_tau_last_square = prime_tau(tao_last) 77 | kappa_last_square = tau(tao_last) + kappa_last_square * prime_tau_last_square 78 | 79 | # print alpha 80 | print('alpha:\n') 81 | alpha_last = np.array([tao_last, alpha1, alpha2, alpha3, alpha4]) 82 | print(alpha1, alpha2, alpha3, alpha4) 83 | print(tao_last) 84 | 85 | # print beta 86 | print('beta:\n') 87 | beta_last = np.array([tao_last, beta1, beta2, beta3, kappa_last_square]) 88 | print(beta1, beta2, beta3) 89 | print(tao_last, kappa_last_square) 90 | 91 | return beta_last 92 | 93 | def calculate_NTK_tilde(model, tau_zero, X, T, K, p, means, covs, y, Omega): 94 | """Calculate NTK_tilde using expressions we derived. 95 | 96 | Arguments: 97 | model -- model instance of class MyModel 98 | tau_zero -- tau_zero calculated using all data 99 | X -- data input 100 | T -- number of data 101 | K -- number of class 102 | p -- dimension of data 103 | means -- means for different classes 104 | covs -- covs for different classes 105 | y -- labels 106 | Omega -- data - means 107 | Returns: 108 | tilde_Phi -- calculated CK_tilde using expressions we derived 109 | """ 110 | d_last = calculate_NTK_tilde_coef(model, tau_zero) 111 | M = np.array([]).reshape(p, 0) 112 | t0 = [] 113 | J = np.zeros((T, K)) 114 | 115 | for i in range(K): 116 | M = np.concatenate((M, means[i].reshape(p, 1)), axis=1) 117 | t0.append(np.trace(covs[i]) / p) # \EE[z^T*z] 118 | J[:, i] = (y == i) * 1 119 | 120 | phi = np.diag(Omega.T @ Omega - J @ t0) 121 | t = (t0 - tau_zero**2) * np.sqrt(p) 122 | S = np.zeros((K, K)) 123 | for i in range(K): 124 | for j in range(K): 125 | S[i, j] = np.trace(covs[i] @ covs[j]) / p 126 | 127 | V = np.concatenate((J / np.sqrt(p), phi.reshape(T, 1)), 128 | axis=1) # whats omega here for 129 | A11 = d_last[2] * np.outer(t, t) + d_last[3] * S 130 | A = np.zeros((K + 1, K + 1)) 131 | A[0:K, 0:K] = A11 132 | A[0:K, K] = d_last[2] * t 133 | A[K, 0:K] = d_last[2] * t.T 134 | A[K, K] = d_last[2] 135 | 136 | tilde_Phi = d_last[1] * (X) @ (X.T) + V @ A @ (V.T) + ( 137 | d_last[4]**2 - d_last[1] * tau_zero**2 - d_last[3] * 138 | tau_zero**4) * np.eye(T) # check tau and tau**2 ,am i right here? 139 | 140 | print(d_last) 141 | 142 | return tilde_Phi 143 | 144 | def empirical_ntk_ntk_vps(func, params, x1, x2, compute='full'): 145 | def get_ntk(x1, x2): 146 | def func_x1(params): 147 | return func(params, x1) 148 | 149 | def func_x2(params): 150 | return func(params, x2) 151 | 152 | output, vjp_fn = vjp(func_x1, params) 153 | 154 | def get_ntk_slice(vec): 155 | # This computes vec @ J(x2).T 156 | # `vec` is some unit vector (a single slice of the Identity matrix) 157 | vjps = vjp_fn(vec) 158 | # This computes J(X1) @ vjps 159 | _, jvps = jvp(func_x2, (params,), vjps) 160 | return jvps 161 | 162 | # Here's our identity matrix 163 | basis = torch.eye(output.numel(), dtype=output.dtype, device=output.device).view(output.numel(), -1) 164 | return vmap(get_ntk_slice)(basis) 165 | 166 | # get_ntk(x1, x2) computes the NTK for a single data point x1, x2 167 | # Since the x1, x2 inputs to empirical_ntk_ntk_vps are batched, 168 | # we actually wish to compute the NTK between every pair of data points 169 | # between {x1} and {x2}. That's what the vmaps here do. 170 | result = vmap(vmap(get_ntk, (None, 0)), (0, None))(x1, x2) 171 | 172 | if compute == 'full': 173 | return result 174 | if compute == 'trace': 175 | return torch.einsum('NMKK->NM', result) 176 | if compute == 'diagonal': 177 | return torch.einsum('NMKK->NMK', result) 178 | 179 | 180 | def claculate_empirical_NTK(model, x_test, x_train): 181 | net = model.to(device) 182 | fnet, params = make_functional(net) 183 | fnet_single = lambda params, x: fnet(params, x.unsqueeze(0)).squeeze(0) 184 | x_train = torch.tensor(x_train) 185 | x_test = torch.tensor(x_test) 186 | result_from_ntk_vps = empirical_ntk_ntk_vps(fnet_single, params, x_test, x_train) 187 | return result_from_ntk_vps 188 | 189 | 190 | if __name__ == "__main__": 191 | cs = [0.5, 0.5] 192 | K = len(cs) 193 | # load data 194 | res = my_dataset_custome('means_binary', T_train=8000, T_test=0, cs=cs, p=4000) 195 | # res = my_dataset_custome('mixed', T_train=8000, T_test=0, cs=cs, p=4650) 196 | # res = my_dataset_custome('MNIST', 197 | # T_train=3200, 198 | # T_test=0, 199 | # cs=cs, 200 | # selected_target=[6, 8]) 201 | dataset_train = res[0] 202 | X, T, K, p, means, covs, y, Omega = dataset_train.X, res[6], res[4], res[ 203 | 5], res[2], res[3], dataset_train.Y, res[8] 204 | print(X.shape) 205 | 206 | train_loader = DataLoader(dataset_train, 207 | batch_size=len(dataset_train), 208 | shuffle=False) 209 | data_inference, _ = next(iter(train_loader)) 210 | 211 | tau_zero = np.sqrt(estim_tau_tensor(X)) 212 | print(tau_zero) 213 | 214 | # origin network setting 215 | layer_num = 3 # layer number for network 216 | input_num = 4000 # input dimension for network 784/256 217 | # number for neurons for each layer 218 | weight_num_list = [2000, 2000, 1000] 219 | activation_list = [ 220 | # {'name' : 'Sigmoid', 'args' : None}, 221 | # {'name' : 'Binary_Zero', 'args' : {'s1':1, 's2': 2, 'b1': 1}}, 222 | # { 223 | # 'name': 'Poly2', 224 | # 'args': { 225 | # 'coe1': 0.2, 226 | # 'coe2': 1, 227 | # 'coe3': 0 228 | # } 229 | # }, 230 | { 231 | 'name': 'Sin', 232 | 'args': None 233 | }, 234 | { 235 | 'name': 'Sin', 236 | 'args': None 237 | }, 238 | # {'name' : 'poly2', 'args' : {'coe1': 1, 'coe2': 1 , 'coe3': 1}}, 239 | # {'name' : 'Sigmoid', 'args' : None} 240 | { 241 | 'name': 'Sin', 242 | 'args': None 243 | } # activation for each layer, if with param, write as Binary_Zero here 244 | ] 245 | 246 | # define origin model 247 | model = My_Model(layer_num=layer_num, 248 | input_num=input_num, 249 | weight_num_list=weight_num_list, 250 | activation_list=activation_list, 251 | tau_zero=tau_zero) 252 | 253 | # loop = 500 254 | # calculate two NTK_tilde 255 | NTK_tilde = calculate_NTK_tilde(model, tau_zero, X, T, K, p, means, covs, y, 256 | Omega) 257 | NTK_empirical = claculate_empirical_NTK(model, X, X) 258 | # CK_new = calculate_CK_tilde(new_model, tau_zero) 259 | 260 | # CK_loop = calculate_CK_loop(model, data_inference, loop=loop) 261 | 262 | error_norm = scipy.linalg.norm(NTK_tilde - NTK_empirical, ord=2) 263 | NTK_tilde_norm = scipy.linalg.norm(NTK_tilde, ord=2) 264 | NTK_loop_norm = scipy.linalg.norm(NTK_empirical, ord=2) 265 | print(error_norm) 266 | print(NTK_tilde_norm) 267 | print(NTK_loop_norm) 268 | 269 | # a = scipy.linalg.norm(CK_tilde, CK_new, ord=2) 270 | # print(a) 271 | # performance calculate 272 | 273 | # CK_loop = CK_tilde 274 | # CK_tilde = CK_loop 275 | 276 | setting = { 277 | 'data': 'GMM-mixed', 278 | 'T': str(T), 279 | 'p': str(p), 280 | 'layer_num': str(model.layer_num), 281 | 'loop': 'none', 282 | 'activation': 'rrr', 283 | 'weight_num_list': str(weight_num_list) 284 | } 285 | root = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 286 | dir1 = save_path_small = os.path.join(root, 'fig\\small') 287 | if not os.path.isdir(dir1): 288 | os.makedirs(dir1) 289 | dir2 = save_path_small = os.path.join(root, 'hist_return\\small') 290 | if not os.path.isdir(dir2): 291 | os.makedirs(dir2) 292 | dir3 = save_path_small = os.path.join(root, 'plot_return') 293 | if not os.path.isdir(dir3): 294 | os.makedirs(dir3) 295 | # plot and save fig and data points 296 | save_path_small = os.path.join( 297 | root, 'fig\\small', ''.join( 298 | (setting['data'], '_', setting['T'], '_', setting['p'], '_', 299 | setting['loop'], '_', setting['activation'], '_', 300 | setting['weight_num_list']))) 301 | save_path_small_data = os.path.join( 302 | root, 'hist_return\\small', ''.join( 303 | (setting['data'], '_', setting['T'], '_', setting['p'], '_', 304 | setting['loop'], '_', setting['activation'], '_', 305 | setting['weight_num_list']))) 306 | save_path_vector_data = os.path.join( 307 | root, 'plot_return', ''.join( 308 | (setting['data'], '_', setting['T'], '_', setting['p'], '_', 309 | setting['loop'], '_', setting['activation'], '_', 310 | setting['weight_num_list']))) 311 | 312 | U_Phi_c, D_Phi_c, _ = np.linalg.svd(NTK_tilde) 313 | # tilde_U_Phi_c, tilde_D_Phi_c, _ = np.linalg.svd(M2) 314 | 315 | # plot eigenvalue distribution for two matrix and save 316 | plt.figure(1) 317 | # plt.subplot(211) 318 | xs = (min(D_Phi_c), max(D_Phi_c)) 319 | n1, bins1, _, = plt.hist(D_Phi_c, 320 | 50, 321 | facecolor='b', 322 | alpha=0.5, 323 | rwidth=0.5, 324 | range=xs, 325 | label='Eigenvalues of $\Phi_c$') 326 | 327 | eigen_value_data_hist = pd.DataFrame.from_dict({ 328 | 'bins1': bins1, 329 | 'n1': np.append(n1, 0) 330 | }) 331 | with open(save_path_small_data, 'w+') as f: 332 | eigen_value_data_hist.to_csv(f) 333 | plt.legend() 334 | plt.show() 335 | plt.savefig(save_path_small) 336 | -------------------------------------------------------------------------------- /code/utils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Model-Compression/Lossless_Compression/fc69e9376085f9dc528662c2d83e9d65748db4ca/code/utils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /code/utils/__pycache__/activation_numpy.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Model-Compression/Lossless_Compression/fc69e9376085f9dc528662c2d83e9d65748db4ca/code/utils/__pycache__/activation_numpy.cpython-38.pyc -------------------------------------------------------------------------------- /code/utils/__pycache__/activation_tensor.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Model-Compression/Lossless_Compression/fc69e9376085f9dc528662c2d83e9d65748db4ca/code/utils/__pycache__/activation_tensor.cpython-38.pyc -------------------------------------------------------------------------------- /code/utils/__pycache__/data_prepare.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Model-Compression/Lossless_Compression/fc69e9376085f9dc528662c2d83e9d65748db4ca/code/utils/__pycache__/data_prepare.cpython-38.pyc -------------------------------------------------------------------------------- /code/utils/__pycache__/expect_calculate.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Model-Compression/Lossless_Compression/fc69e9376085f9dc528662c2d83e9d65748db4ca/code/utils/__pycache__/expect_calculate.cpython-38.pyc -------------------------------------------------------------------------------- /code/utils/__pycache__/model.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Model-Compression/Lossless_Compression/fc69e9376085f9dc528662c2d83e9d65748db4ca/code/utils/__pycache__/model.cpython-38.pyc -------------------------------------------------------------------------------- /code/utils/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Model-Compression/Lossless_Compression/fc69e9376085f9dc528662c2d83e9d65748db4ca/code/utils/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /code/utils/activation_numpy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | '''Select one activation with given name and args. 4 | 5 | Activation name in [Binary_Zero, Binary_Last, T, Sign, Abs, LReLU, Posit, Poly2, Cos, Sin, Erf, Exp, Sigmoid], 6 | with all activation functions implemented with numpy for future quad. 7 | Note: To use this module, see function my_activation_torch 8 | ''' 9 | __author__ = "Yongqi_Du" 10 | __copyright__ = "Copyright 2021, Lossless compression" 11 | __license__ = "GPL" 12 | __maintainer__ = "Rob Knight" 13 | __email__ = "rob@spot.colorado.edu" 14 | __status__ = "Development" # status is one of "Prototype", "Development", or "Production" 15 | __all__ = ['my_activation_numpy'] 16 | 17 | import numpy as np 18 | import scipy 19 | 20 | 21 | def sigma_numpy(x, a): 22 | return np.heaviside(x - a, 0) 23 | 24 | 25 | def Binary_Zero(s1, s2, b1): 26 | return lambda x: b1 * (sigma_numpy(x, s2) + sigma_numpy(-x, -s1)) 27 | 28 | 29 | def Binary_Last(s1, s2, s3, s4, b1, b2): 30 | return lambda x: b1 * (sigma_numpy(-x, -s1) + sigma_numpy(x, s4)) + b2 * (((sigma_numpy(-x, -s3) + sigma_numpy(x, s2)) - 1)) 31 | 32 | 33 | def my_activation_numpy(name, **args): 34 | '''Select one activation Module with given name and args. 35 | 36 | Arguments: 37 | name -- activation name [binary_zero, binary_last, Binary_Zero, Binary_Last, T, Sign, ABS, LReLU, POSIT, Poly2, Cos, Sin, ERF, EXP, Sigmoid] 38 | args -- args for activation function construction 39 | Return: 40 | activation functions 41 | Note: 42 | if the activation has parameters, pass kwargs(the key and the value), for example: activ = my_activation('binary_zero_nonparam', s1 = 1, s2 = 2, b1 = 1) 43 | ''' 44 | if name == 'binary_zero': 45 | return lambda x, s1, s2, b1: b1 * (sigma_numpy(x, s2) + sigma_numpy(-x, -s1)) 46 | elif name == 'binary_last': 47 | return lambda x, s1, s2, s3, s4, b1, b2, b3: b1 * (sigma_numpy(-x, -s1) + sigma_numpy(x, s4)) \ 48 | + b2 * (((sigma_numpy(-x, -s3) + sigma_numpy(x, s2)) - 1)) + (b3-b1) * (sigma_numpy(-x, -s1)) 49 | elif name == 'Binary_Zero': 50 | return Binary_Zero(**args) 51 | elif name == 'Binary_Last': 52 | return Binary_Last(**args) 53 | elif name == 'T': 54 | sig = lambda x: x 55 | elif name == 'ReLU': 56 | sig = lambda x: np.maximum(x, 0) 57 | elif name == 'Abs': 58 | sig = lambda x: np.abs(x) 59 | elif name == 'LReLU': 60 | sig = lambda x: args['coe1'] * np.maximum(x, 0) + args['coe2'] * np.maximum(-x, 0) 61 | elif name == 'Posit': 62 | sig = lambda x: (x > 0).astype(int) 63 | elif name == 'Sign': 64 | sig = lambda x: np.sign(x) 65 | elif name == 'Poly2': 66 | sig = lambda x: args['coe1'] * x**2 + args['coe2'] * x + args['coe3'] 67 | elif name == 'Cos': 68 | sig = lambda x: np.cos(x) 69 | elif name == 'Sin': 70 | sig = lambda x: np.sin(x) 71 | elif name == 'Erf': 72 | sig = lambda x: scipy.special.erf(x) 73 | elif name == 'Exp': 74 | sig = lambda x: np.exp(-x**2 / 2) 75 | elif name == 'Sigmoid': 76 | sig = lambda x: 1 / (1 + np.exp(-x)) 77 | elif name == 'Sign': 78 | sig = lambda x: np.sign(x) 79 | return sig 80 | 81 | -------------------------------------------------------------------------------- /code/utils/activation_tensor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | '''Select one activation Module with given name and args. 4 | 5 | Activation name in [Binary_Zero, Binary_Last, T, Sign, Abs, LReLU, Posit, Poly2, Cos, Sin, Erf, Exp] or activation Module in torch.nn, 6 | with all activation functions packed as nn.Module. 7 | Note: To use this module, see function my_activation_torch 8 | ''' 9 | __author__ = "Yongqi_Du" 10 | __copyright__ = "Copyright 2021, Lossless compression" 11 | __license__ = "GPL" 12 | __maintainer__ = "Rob Knight" 13 | __email__ = "rob@spot.colorado.edu" 14 | __status__ = "Development" # status is one of "Prototype", "Development", or "Production" 15 | __all__ = ['my_activation_torch'] 16 | 17 | # Imports 18 | import torch 19 | import torch.nn as nn 20 | 21 | 22 | # activation functions for tensor 23 | def sigma_torch(x, a): 24 | ''' 25 | define sigma function 26 | ''' 27 | return torch.heaviside(x - a, torch.tensor(0.0)) 28 | 29 | 30 | def binary_zero(s1, s2, b1): 31 | return lambda x: b1 * (sigma_torch(x, s2) + sigma_torch(-x, -s1)) 32 | 33 | 34 | def binary_last(s1, s2, s3, s4, b1, b2): 35 | return lambda x: b1 * (sigma_torch(-x, -s1) + sigma_torch(x, s4)) + b2 * (((sigma_torch(-x, -s3) + sigma_torch(x, s2)) - 1)) 36 | 37 | 38 | def poly2(coe1, coe2, coe3): 39 | return lambda x: coe1 * x**2 + coe2 * x + coe3 40 | 41 | 42 | def sign(x): 43 | return torch.sign(x) 44 | 45 | 46 | def cos(x): 47 | return torch.cos(x) 48 | 49 | 50 | def sin(x): 51 | return torch.sin(x) 52 | 53 | 54 | def lrelu(coe1, coe2): 55 | return lambda x: coe1 * torch.maximum(x, torch.tensor(0)) + coe2 * torch.maximum(-x, torch.tensor(0)) 56 | 57 | 58 | def abs(x): 59 | return torch.abs(x) 60 | 61 | 62 | def t(x): 63 | return x 64 | 65 | 66 | def exp(x): 67 | return torch.exp(-x**2 / 2) 68 | 69 | 70 | def posit(x): 71 | return (x > 0).long() 72 | 73 | 74 | def erf(x): 75 | return torch.erf(x) 76 | 77 | 78 | # pack above functions to class for better control when need train(pack as nn.function then nn.Module if need backpropogation). 79 | class Binary_Zero(nn.Module): 80 | 81 | def __init__(self, s1: float, s2: float, b1: float) -> None: 82 | super().__init__() 83 | self.func = binary_zero(s1, s2, b1) 84 | 85 | def forward(self, input: torch.Tensor) -> torch.Tensor: 86 | return self.func(input) 87 | 88 | 89 | class Binary_Last(nn.Module): 90 | 91 | def __init__(self, s1: float, s2: float, s3: float, s4: float, b1: float, b2: float) -> None: 92 | super().__init__() 93 | self.func = binary_last(s1, s2, s3, s4, b1, b2) 94 | 95 | def forward(self, input: torch.Tensor) -> torch.Tensor: 96 | return self.func(input) 97 | 98 | 99 | class Poly2(nn.Module): 100 | 101 | def __init__(self, coe1: float, coe2: float, coe3: float) -> None: 102 | super().__init__() 103 | self.func = poly2(coe1, coe2, coe3) 104 | 105 | def forward(self, input: torch.Tensor) -> torch.Tensor: 106 | return self.func(input) 107 | 108 | 109 | class Sign(nn.Module): 110 | 111 | def __init__(self) -> None: 112 | super().__init__() 113 | self.func = sign 114 | 115 | def forward(self, input: torch.Tensor) -> torch.Tensor: 116 | return self.func(input) 117 | 118 | 119 | class Cos(nn.Module): 120 | 121 | def __init__(self) -> None: 122 | super().__init__() 123 | self.func = cos 124 | 125 | def forward(self, input: torch.Tensor) -> torch.Tensor: 126 | return self.func(input) 127 | 128 | 129 | class Sin(nn.Module): 130 | 131 | def __init__(self) -> None: 132 | super().__init__() 133 | self.func = sin 134 | 135 | def forward(self, input: torch.Tensor) -> torch.Tensor: 136 | return self.func(input) 137 | 138 | 139 | class LReLU(nn.Module): 140 | 141 | def __init__(self, coe1: float, coe2: float) -> None: 142 | super().__init__() 143 | self.func = lrelu(coe1, coe2) 144 | 145 | def forward(self, input: torch.Tensor) -> torch.Tensor: 146 | return self.func(input) 147 | 148 | 149 | class Abs(nn.Module): 150 | 151 | def __init__(self) -> None: 152 | super().__init__() 153 | self.func = abs 154 | 155 | def forward(self, input: torch.Tensor) -> torch.Tensor: 156 | return self.func(input) 157 | 158 | 159 | class T(nn.Module): 160 | 161 | def __init__(self) -> None: 162 | super().__init__() 163 | self.func = t 164 | 165 | def forward(self, input: torch.Tensor) -> torch.Tensor: 166 | return self.func(input) 167 | 168 | 169 | class Exp(nn.Module): 170 | 171 | def __init__(self) -> None: 172 | super().__init__() 173 | self.func = exp 174 | 175 | def forward(self, input: torch.Tensor) -> torch.Tensor: 176 | return self.func(input) 177 | 178 | 179 | class Posit(nn.Module): 180 | 181 | def __init__(self) -> None: 182 | super().__init__() 183 | self.func = posit 184 | 185 | def forward(self, input: torch.Tensor) -> torch.Tensor: 186 | return self.func(input) 187 | 188 | 189 | class Erf(nn.Module): 190 | 191 | def __init__(self) -> None: 192 | super().__init__() 193 | self.func = erf 194 | 195 | def forward(self, input: torch.Tensor) -> torch.Tensor: 196 | return self.func(input) 197 | 198 | 199 | def my_activation_torch(name, **args): 200 | '''Select one activation Module with given name and args. 201 | 202 | Arguments: 203 | name -- activation name [Binary_Zero, Binary_Last, T, Sign, Abs, LReLU, Posit, Poly2, Cos, Sin, Erf, Exp] or activation Module in torch.nn 204 | args -- args for activation function construction 205 | Return: 206 | activation functions(packed as nn.Module) 207 | Note: 208 | if the activation has parameters, pass kwargs(the key and the value), for example: activ = my_activation('binary_zero_nonparam', s1 = 1, s2 = 2, b1 = 1) 209 | ''' 210 | if name == 'Binary_Zero': 211 | return Binary_Zero(**args) 212 | elif name == 'Binary_Last': 213 | return Binary_Last(**args) 214 | elif name == 'T': 215 | sig = T() 216 | elif name == 'Sign': 217 | sig = Sign() 218 | elif name == 'ABS': 219 | sig = Abs() 220 | elif name == 'LReLU': 221 | sig = LReLU(**args) 222 | elif name == 'POSIT': 223 | sig = Posit() 224 | elif name == 'Poly2': 225 | return Poly2(**args) 226 | elif name == 'Cos': 227 | sig = Cos() 228 | elif name == 'Sin': 229 | sig = Sin() 230 | elif name == 'ERF': 231 | sig = Erf() 232 | elif name == 'EXP': 233 | sig = Exp() 234 | else: 235 | sig = getattr(nn, name)(**args) 236 | return sig 237 | -------------------------------------------------------------------------------- /code/utils/data_prepare.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Generate data from GMM setting or sample data from datasets alredy exist and packed as torch.Dataset .(MNIST OR CIFAR10) 4 | 5 | See my_dataset_custome for more detais. 6 | """ 7 | 8 | import json 9 | import os 10 | 11 | import numpy as np 12 | import scipy 13 | import scipy.linalg 14 | import torch 15 | import torchvision.datasets as dset 16 | from torch.utils.data import Dataset 17 | 18 | 19 | def gen_data(testcase, 20 | selected_target=[6, 8], 21 | T=None, 22 | p=None, 23 | cs=None, 24 | means=None, 25 | covs=None, 26 | mode='train'): 27 | '''Generate GMM data from existing datasets or self sampling datasets. 28 | 29 | Arguments: 30 | testcase -- 'MNIST'/'CIFAR10'/'iid'/'means'/'vars'/'mixed' 31 | selected_traget -- list[xx, xx], only used for testcase=='MNIST'/'CIFAR10' 32 | T -- len of datasets 33 | p -- dimension of data, only used for testcase=='iid'/'means'/'vars'/'mixed' 34 | cs -- list[0.xx, 0.xx], ratio for diff classes, len(cs) is number of class of the dataset 35 | means -- matrix, means for diff classes, only used for testcase=='iid'/'means'/'vars'/'mixed' 36 | covs -- matrix, covs for diff classes, only used for testcase=='iid'/'means'/'vars'/'mixed' 37 | mode -- 'train'/'test', generate data for train and test 38 | Returns: 39 | X -- data 40 | Omega -- data - means 41 | y -- targets 42 | means -- means 43 | covs -- covs 44 | K -- number of class 45 | p -- dimension of data 46 | T -- number of data 47 | 48 | ''' 49 | rng = np.random 50 | 51 | if testcase == 'MNIST': 52 | root = os.path.join( 53 | os.path.dirname(os.path.dirname(os.path.realpath(__file__))), 54 | 'data') 55 | if not os.path.isdir(root): 56 | os.makedirs(root) 57 | if mode == 'train': 58 | mnist = dset.MNIST(root=os.path.join(root, 'train'), 59 | train=True, 60 | download=True) 61 | else: 62 | mnist = dset.MNIST(root=os.path.join(root, 'test'), 63 | train=False, 64 | download=True) 65 | data, labels = mnist.data.view(mnist.data.shape[0], -1), mnist.targets 66 | 67 | # feel free to choose the number you like :) 68 | selected_target = selected_target 69 | p = 784 70 | K = len(selected_target) 71 | 72 | # get the whole set of selected number 73 | data_full = [] 74 | data_full_matrix = np.array([]).reshape(p, 0) 75 | ind = 0 76 | for i in selected_target: 77 | locate_target_train = np.where(labels == i)[0] 78 | data_full.append(data[locate_target_train].T) 79 | data_full_matrix = np.concatenate( 80 | (data_full_matrix, data[locate_target_train].T), axis=1) 81 | ind += 1 82 | 83 | # recentering and normalization to satisfy Assumption 1 and 84 | T_full = data_full_matrix.shape[1] 85 | mean_selected_data = np.mean(data_full_matrix, axis=1).reshape(p, 1) 86 | norm2_selected_data = np.sum( 87 | (data_full_matrix - 88 | np.mean(data_full_matrix, axis=1).reshape(p, 1))**2, 89 | (0, 1)) / T_full 90 | for i in range(K): 91 | data_full[i] = data_full[i] - mean_selected_data 92 | data_full[i] = data_full[i] * np.sqrt(p) / np.sqrt( 93 | norm2_selected_data) 94 | 95 | # get the statistics of MNIST data 96 | means = [] 97 | covs = [] 98 | for i in range(K): 99 | data_tmp = data_full[i] 100 | T_tmp = data_tmp.shape[1] 101 | means.append(np.mean(data_tmp.numpy(), axis=1).reshape(p, 1)) 102 | covs.append((data_tmp @ (data_tmp.T) / T_tmp - 103 | means[i] @ (means[i].T)).reshape(p, p)) 104 | 105 | # data for train 106 | 107 | X = np.array([]).reshape(p, 0) 108 | Omega = np.array([]).reshape(p, 0) 109 | y = [] 110 | 111 | ind = 0 112 | for i in range(K): 113 | data_tmp = data_full[i] 114 | X = np.concatenate((X, data_tmp[:, range(int(cs[ind] * T))]), 115 | axis=1) 116 | Omega = np.concatenate( 117 | (Omega, data_tmp[:, range(int(cs[ind] * T))] - 118 | np.outer(means[ind], np.ones((1, int(T * cs[ind]))))), 119 | axis=1) 120 | y = np.concatenate((y, ind * np.ones(int(T * cs[ind])))) 121 | ind += 1 122 | 123 | X = X / np.sqrt(p) 124 | Omega = Omega / np.sqrt(p) 125 | 126 | elif testcase == 'CIFAR10': 127 | root = '../data' 128 | if not os.path.isdir(root): 129 | os.makedirs(root) 130 | if mode == 'train': 131 | cifar = dset.CIFAR10(root=os.path.join(root, 'train'), 132 | train=True, 133 | download=True) 134 | else: 135 | cifar = dset.CIFAR10(root=os.path.join(root, 'test'), 136 | train=False, 137 | download=True) 138 | data = cifar.data # numpy 139 | targets = np.array(cifar.targets) # numpy 140 | data, labels = data.reshape(data.shape[0], -1), targets 141 | 142 | # feel free to choose the number you like :) 143 | selected_target = selected_target 144 | p = 3072 145 | K = len(selected_target) 146 | 147 | # get the whole set of selected number 148 | data_full = [] 149 | data_full_matrix = np.array([]).reshape(p, 0) 150 | ind = 0 151 | # print(np.where(labels==6)) 152 | for i in selected_target: 153 | locate_target_train = np.where(labels == i)[0] 154 | data_full.append(data[locate_target_train].T) 155 | data_full_matrix = np.concatenate( 156 | (data_full_matrix, data[locate_target_train].T), axis=1) 157 | ind += 1 158 | 159 | # recentering and normalization to satisfy Assumption 1 and 160 | # for full datasets 161 | T_full = data_full_matrix.shape[1] 162 | mean_selected_data = np.mean(data_full_matrix, axis=1).reshape(p, 1) 163 | norm2_selected_data = np.sum( 164 | (data_full_matrix - 165 | np.mean(data_full_matrix, axis=1).reshape(p, 1))**2, 166 | (0, 1)) / T_full 167 | for i in range(K): 168 | data_full[i] = data_full[i] - mean_selected_data 169 | data_full[i] = data_full[i] * np.sqrt(p) / np.sqrt( 170 | norm2_selected_data) 171 | 172 | # get the statistics of CIFAR data 173 | # for each class 174 | means = [] 175 | covs = [] 176 | for i in range(K): 177 | data_tmp = data_full[i] 178 | T_tmp = data_tmp.shape[1] 179 | means.append(np.mean(data_tmp, axis=1).reshape(p, 1)) 180 | covs.append((data_tmp @ (data_tmp.T) / T_tmp - 181 | means[i] @ (means[i].T)).reshape(p, p)) 182 | 183 | # data for train 184 | X = np.array([]).reshape(p, 0) 185 | Omega = np.array([]).reshape(p, 0) 186 | y = [] 187 | 188 | # for each class , sample cs[class]*T samples, their Statistical Features from last part 189 | # why this part, last part is enough?? 190 | ind = 0 191 | for i in range(K): 192 | data_tmp = data_full[i] 193 | X = np.concatenate((X, data_tmp[:, range(int(cs[ind] * T))]), 194 | axis=1) 195 | Omega = np.concatenate( 196 | (Omega, data_tmp[:, range(int(cs[ind] * T))] - 197 | np.outer(means[ind], np.ones((1, int(T * cs[ind]))))), 198 | axis=1) 199 | y = np.concatenate((y, ind * np.ones(int(T * cs[ind])))) 200 | ind += 1 201 | 202 | X = X / np.sqrt(p) 203 | Omega = Omega / np.sqrt(p) 204 | 205 | else: 206 | root = os.path.join( 207 | os.path.dirname(os.path.dirname(os.path.realpath(__file__))), 208 | 'data/self_define') 209 | if not os.path.isdir(root): 210 | os.makedirs(root) 211 | 212 | data_path = os.path.join( 213 | root, ''.join( 214 | (testcase, '_', str(T), '_', str(p), '_', str(cs), '_', mode))) 215 | if os.path.exists(data_path): 216 | with open(data_path, 'r') as f: 217 | data = json.load(f) 218 | X, Omega, y, means, covs, K, p, T = np.array(data['X']), np.array(data['Omega']), np.array(data['y']), \ 219 | np.array(data['means']), np.array(data['covs']), data['K'],data['p'], data['T'] 220 | else: 221 | X = np.array([]).reshape(p, 0) 222 | Omega = np.array([]).reshape(p, 0) 223 | y = [] 224 | 225 | K = len(cs) 226 | for i in range(K): 227 | tmp = rng.multivariate_normal(means[i], 228 | covs[i], 229 | size=int(T * cs[i])).T 230 | X = np.concatenate((X, tmp), axis=1) 231 | Omega = np.concatenate( 232 | (Omega, 233 | tmp - np.outer(means[i], np.ones((1, int(T * cs[i]))))), 234 | axis=1) 235 | y = np.concatenate((y, i * np.ones(int(T * cs[i])))) 236 | 237 | X = X / np.sqrt(p) 238 | Omega = Omega / np.sqrt(p) 239 | 240 | data_save = { 241 | 'X': X.tolist(), 242 | 'Omega': Omega.tolist(), 243 | 'y': y.tolist(), 244 | 'means': means.tolist(), 245 | 'covs': covs.tolist(), 246 | 'K': K, 247 | 'p': p, 248 | 'T': T 249 | } 250 | with open(data_path, 'w') as f: 251 | json.dump(data_save, f) 252 | 253 | return X, Omega, y, means, covs, K, p, T 254 | 255 | 256 | def my_dataset_custome(testcase, 257 | selected_target=[6, 8], 258 | T_train=None, 259 | T_test=None, 260 | p=None, 261 | cs=None, 262 | means=None, 263 | covs=None): 264 | '''Generate GMM data generate the data matrix with respect to different test cases. 265 | 266 | Arguments: 267 | testcase -- 'MNIST'/'CIFAR10'/'iid'/'means'/'vars'/'mixed' 268 | selected_traget -- list[xx, xx], only used for testcase=='MNIST'/'CIFAR10' 269 | T_train -- len of train datasets 270 | T_test -- len of test datasets 271 | p -- dimension of data, only used for testcase=='iid'/'means'/'vars'/'mixed' 272 | cs -- list[0.xx, 0.xx], ratio for diff classes, len(cs) is number of class of the dataset 273 | means -- matrix, means for diff classes, only used for testcase=='iid'/'means'/'vars'/'mixed' 274 | covs -- matrix, covs for diff classes, only used for testcase=='iid'/'means'/'vars'/'mixed' 275 | Returns: 276 | train_dataset -- train_dataset(packed as torch.utils.data.Dataset) 277 | test_dataset -- test_dataset(packed as torch.utils.data.Dataset) 278 | means -- means for different classes 279 | covs -- covs for different classes 280 | K -- number of class 281 | p -- dimension of data 282 | train_T -- number of train data 283 | test_T -- number of test data 284 | Omega_train -- train_data - means 285 | Omega_test -- test_data - means 286 | ''' 287 | if testcase == 'MNIST' or testcase == 'CIFAR10': 288 | # get train and test dataset and then packed as torch.Dataset 289 | X_train, Omega_train, Y_train, means, covs, K, p, train_T = gen_data( 290 | testcase, 291 | selected_target=selected_target, 292 | T=T_train, 293 | cs=cs, 294 | mode='train') 295 | train_dataset = my_dataset(X_train, Y_train) 296 | 297 | X_test, Omega_test, Y_test, _, _, _, _, test_T = gen_data( 298 | testcase, 299 | selected_target=selected_target, 300 | T=T_test, 301 | cs=cs, 302 | mode='test') 303 | test_dataset = my_dataset(X_test, Y_test) 304 | 305 | else: 306 | # in the case of Gaussian mixture, the dimension of data should be given 307 | p = p 308 | means = [] 309 | covs = [] 310 | if testcase == 'iid': 311 | for i in range(len(cs)): 312 | means.append(np.zeros(p)) 313 | covs.append(np.eye(p)) 314 | elif testcase == 'means': 315 | for i in range(len(cs)): 316 | means.append( 317 | np.concatenate( 318 | (np.zeros(i), 4 * np.ones(1), np.zeros(p - i - 1)))) 319 | covs.append(np.eye(p)) 320 | # add a binary class data setting here for a simple analysis\cite{NTK-Dynamic}. 321 | elif testcase == 'means_binary': 322 | for i in range(len(cs)): 323 | means.append( 324 | np.concatenate( 325 | (np.zeros(i), 4 * np.ones(1), np.zeros(p - i - 1)))) 326 | covs.append(np.eye(p)) 327 | means = means - np.mean(means, axis=0) 328 | print(means) 329 | elif testcase == 'var': 330 | for i in range(len(cs)): 331 | means.append(np.zeros(p)) 332 | covs.append(np.eye(p) * (1 + 8 * i / np.sqrt(p))) 333 | elif testcase == 'mixed': 334 | for i in range(len(cs)): 335 | means.append( 336 | np.concatenate((np.zeros(i * 8), 8 * np.ones(1), 337 | np.zeros(p - i * 8 - 1)))) 338 | # covs.append((1+4*i/np.sqrt(p))*scipy.linalg.toeplitz( [(.4*i)**x for x in range(p)] )) 339 | covs.append(np.eye(p) * (1 + 8 * i / np.sqrt(p))) 340 | means = np.array(means) 341 | covs = np.array(covs) 342 | # first obtain training and test data, and then packaging them to fit the interface of pytorch 343 | X_train, Omega_train, Y_train, means, covs, K, p, train_T = gen_data( 344 | testcase, 345 | T=T_train, 346 | p=p, 347 | cs=cs, 348 | means=means, 349 | covs=covs, 350 | mode='train') 351 | train_dataset = my_dataset(X_train, Y_train) 352 | X_test, Omega_test, Y_test, _, _, _, _, test_T = gen_data(testcase, 353 | T=T_test, 354 | p=p, 355 | cs=cs, 356 | means=means, 357 | covs=covs, 358 | mode='test') 359 | test_dataset = my_dataset(X_test, Y_test) 360 | ''' QUESTION HERE''' 361 | 362 | return train_dataset, test_dataset, means, covs, K, p, train_T, test_T, Omega_train, Omega_test 363 | 364 | 365 | class my_dataset(Dataset): 366 | '''Packed datasets to torch.utils.data.Dataset. 367 | ''' 368 | 369 | def __init__(self, X, Y) -> None: 370 | super().__init__() 371 | self.X, self.Y = X.T, Y 372 | 373 | def __getitem__(self, idx): 374 | if self.Y.ndim == 1: 375 | return self.X[idx, :], self.Y[idx] 376 | else: 377 | return self.X[idx, :], self.Y[idx, :] 378 | 379 | def __len__(self): 380 | return self.X.shape[0] 381 | 382 | 383 | if __name__ == "__main__": 384 | # train, test = my_dataset_custome('CIFAR10', selected_target=[6,8], T=6000, cs=[1/2, 1/2]) 385 | train, test = my_dataset_custome('iid', 386 | T_train=100, 387 | T_test=100, 388 | p=10, 389 | cs=[1 / 2, 1 / 2]) 390 | -------------------------------------------------------------------------------- /code/utils/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Calculate zero_order_expect, first_order_expect, second_order_expect, square_second_order_expect, tao_expect with its expression. 4 | 5 | This module can be used for custom activation, and calculate these five expect in a methematical way. 6 | If you want to add a new custom activation functions, you should calculate the expression of these five expect respectively, and 7 | then add with code. 8 | """ 9 | 10 | __author__ = "Model_compression" 11 | __copyright__ = "Copyright 2021, Lossless compression" 12 | __credits__ = [ 13 | "Rob Knight", "Peter Maxwell", "Gavin Huttley", "Matthew Wakefield" 14 | ] 15 | __license__ = "GPL" 16 | __version__ = "1.0.1" 17 | __maintainer__ = "Rob Knight" 18 | __email__ = "rob@spot.colorado.edu" 19 | __status__ = "Development" 20 | __all__ = ['calculate_CK_tilde_coef'] 21 | # status is one of "Prototype", "Development", or "Production" 22 | 23 | import numpy as np 24 | 25 | from expect_cal.expect_calculate import expect_calcu 26 | from expect_cal.expect_calculate_math import custome_activation_analysis_noparam 27 | 28 | 29 | def estim_tau_tensor(X): 30 | tau = np.mean(np.diag(X @ X.T)) 31 | 32 | return tau 33 | 34 | 35 | def estim_tau(X): 36 | tau = np.mean(np.diag(X.T @ X)) 37 | 38 | return tau 39 | 40 | 41 | def calculate_CK_tilde_coef(model, tau_zero): 42 | '''calculate coefficients each layer(alpha1, alpha2, alpha3, alpha4, tau) and print. 43 | Note: no variables, this function can be used only for a known network(without unknown variables for activation functions), here just for test 44 | 45 | Arguments: 46 | model -- model instance of class My_Model in model.py 47 | tau_zero -- tau_zero calculated with data 48 | ''' 49 | tao_last = tau_zero 50 | d_last = np.array([tao_last, 1, 0, 0, 1]) # input d1, d2, d3, d4 51 | for activation in model.activation_list: 52 | name = activation['name'] 53 | args = activation['args'] 54 | if args: 55 | if name == 'Binary_Zero': 56 | ( 57 | zero_order, 58 | first_order, 59 | second_order, 60 | square_second_order, 61 | tau, 62 | ) = custome_activation_analysis_noparam('binary_zero', **args) 63 | elif name == 'Binary_Last': 64 | ( 65 | zero_order, 66 | first_order, 67 | second_order, 68 | square_second_order, 69 | tau, 70 | ) = custome_activation_analysis_noparam('binary_last', **args) 71 | else: 72 | ( 73 | zero_order, 74 | first_order, 75 | second_order, 76 | square_second_order, 77 | tau, 78 | ) = expect_calcu(name, **args) 79 | else: 80 | ( 81 | zero_order, 82 | first_order, 83 | second_order, 84 | square_second_order, 85 | tau, 86 | ) = expect_calcu(name) 87 | temp = zero_order(tao_last) 88 | d1 = first_order(tao_last)**2 * d_last[1] 89 | d2 = (first_order(tao_last)**2 * d_last[2] + 90 | 1 / 4 * second_order(tao_last)**2 * d_last[4]**2) 91 | d3 = (first_order(tao_last)**2 * d_last[3] + 92 | 1 / 2 * second_order(tao_last)**2 * d_last[1]**2) 93 | d4 = 1 / 2 * square_second_order(tao_last) * d_last[4] 94 | tao_last = np.sqrt(tau(tao_last)) 95 | d_last = np.array([tao_last, d1, d2, d3, d4]) 96 | print(d1) 97 | print(d2) 98 | print(d3) 99 | print(d4) 100 | print(tao_last) 101 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib==3.5.2 2 | numpy==1.23.0 3 | pandas==1.4.3 4 | scipy==1.8.1 5 | torch==1.12.0 6 | torchvision==0.13.0 7 | -------------------------------------------------------------------------------- /simulations/simulations.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=invalid-name 2 | #!/usr/bin/env python 3 | # -*- coding: utf-8 -*- 4 | """Generate data from GMM setting or sample data from datasets alredy exist and packed as torch.Dataset .(MNIST OR CIFAR10) 5 | 6 | See my_dataset_custome for more detais. 7 | """ 8 | 9 | import json 10 | import os 11 | 12 | import numpy as np 13 | import scipy 14 | import scipy.linalg 15 | import torch 16 | import torchvision.datasets as dset 17 | from torch.utils.data import Dataset 18 | 19 | 20 | def gen_data(testcase, 21 | selected_target=[6, 8], 22 | T=None, 23 | p=None, 24 | cs=None, 25 | means=None, 26 | covs=None, 27 | mode='train'): 28 | '''Generate GMM data from existing datasets or self sampling datasets. 29 | 30 | Arguments: 31 | testcase -- 'MNIST'/'CIFAR10'/'iid'/'means'/'vars'/'mixed' 32 | selected_traget -- list[xx, xx], only used for testcase=='MNIST'/'CIFAR10' 33 | T -- len of datasets 34 | p -- dimension of data, only used for testcase=='iid'/'means'/'vars'/'mixed' 35 | cs -- list[0.xx, 0.xx], ratio for diff classes, len(cs) is number of class of the dataset 36 | means -- matrix, means for diff classes, only used for testcase=='iid'/'means'/'vars'/'mixed' 37 | covs -- matrix, covs for diff classes, only used for testcase=='iid'/'means'/'vars'/'mixed' 38 | mode -- 'train'/'test', generate data for train and test 39 | Returns: 40 | X -- data 41 | Omega -- data - means 42 | y -- targets 43 | means -- means 44 | covs -- covs 45 | K -- number of class 46 | p -- dimension of data 47 | T -- number of data 48 | 49 | ''' 50 | rng = np.random 51 | 52 | if testcase == 'MNIST': 53 | root = os.path.join( 54 | os.path.dirname(os.path.dirname(os.path.realpath(__file__))), 55 | 'data') 56 | if not os.path.isdir(root): 57 | os.makedirs(root) 58 | if mode == 'train': 59 | mnist = dset.MNIST(root=os.path.join(root, 'train'), 60 | train=True, 61 | download=True) 62 | else: 63 | mnist = dset.MNIST(root=os.path.join(root, 'test'), 64 | train=False, 65 | download=True) 66 | data, labels = mnist.data.view(mnist.data.shape[0], -1), mnist.targets 67 | 68 | # feel free to choose the number you like :) 69 | selected_target = selected_target 70 | p = 784 71 | K = len(selected_target) 72 | 73 | # get the whole set of selected number 74 | data_full = [] 75 | data_full_matrix = np.array([]).reshape(p, 0) 76 | ind = 0 77 | for i in selected_target: 78 | locate_target_train = np.where(labels == i)[0] 79 | data_full.append(data[locate_target_train].T) 80 | data_full_matrix = np.concatenate( 81 | (data_full_matrix, data[locate_target_train].T), axis=1) 82 | ind += 1 83 | 84 | # recentering and normalization to satisfy Assumption 1 and 85 | T_full = data_full_matrix.shape[1] 86 | mean_selected_data = np.mean(data_full_matrix, axis=1).reshape(p, 1) 87 | norm2_selected_data = np.sum( 88 | (data_full_matrix - 89 | np.mean(data_full_matrix, axis=1).reshape(p, 1))**2, 90 | (0, 1)) / T_full 91 | for i in range(K): 92 | data_full[i] = data_full[i] - mean_selected_data 93 | data_full[i] = data_full[i] * np.sqrt(p) / np.sqrt( 94 | norm2_selected_data) 95 | 96 | # get the statistics of MNIST data 97 | means = [] 98 | covs = [] 99 | for i in range(K): 100 | data_tmp = data_full[i] 101 | T_tmp = data_tmp.shape[1] 102 | means.append(np.mean(data_tmp.numpy(), axis=1).reshape(p, 1)) 103 | covs.append((data_tmp @ (data_tmp.T) / T_tmp - 104 | means[i] @ (means[i].T)).reshape(p, p)) 105 | 106 | # data for train 107 | 108 | X = np.array([]).reshape(p, 0) 109 | Omega = np.array([]).reshape(p, 0) 110 | y = [] 111 | 112 | ind = 0 113 | for i in range(K): 114 | data_tmp = data_full[i] 115 | X = np.concatenate((X, data_tmp[:, range(int(cs[ind] * T))]), 116 | axis=1) 117 | Omega = np.concatenate( 118 | (Omega, data_tmp[:, range(int(cs[ind] * T))] - 119 | np.outer(means[ind], np.ones((1, int(T * cs[ind]))))), 120 | axis=1) 121 | y = np.concatenate((y, ind * np.ones(int(T * cs[ind])))) 122 | ind += 1 123 | 124 | X = X / np.sqrt(p) 125 | Omega = Omega / np.sqrt(p) 126 | 127 | elif testcase == 'CIFAR10': 128 | root = '../data' 129 | if not os.path.isdir(root): 130 | os.makedirs(root) 131 | if mode == 'train': 132 | cifar = dset.CIFAR10(root=os.path.join(root, 'train'), 133 | train=True, 134 | download=True) 135 | else: 136 | cifar = dset.CIFAR10(root=os.path.join(root, 'test'), 137 | train=False, 138 | download=True) 139 | data = cifar.data # numpy 140 | targets = np.array(cifar.targets) # numpy 141 | data, labels = data.reshape(data.shape[0], -1), targets 142 | 143 | # feel free to choose the number you like :) 144 | selected_target = selected_target 145 | p = 3072 146 | K = len(selected_target) 147 | 148 | # get the whole set of selected number 149 | data_full = [] 150 | data_full_matrix = np.array([]).reshape(p, 0) 151 | ind = 0 152 | # print(np.where(labels==6)) 153 | for i in selected_target: 154 | locate_target_train = np.where(labels == i)[0] 155 | data_full.append(data[locate_target_train].T) 156 | data_full_matrix = np.concatenate( 157 | (data_full_matrix, data[locate_target_train].T), axis=1) 158 | ind += 1 159 | 160 | # recentering and normalization to satisfy Assumption 1 and 161 | # for full datasets 162 | T_full = data_full_matrix.shape[1] 163 | mean_selected_data = np.mean(data_full_matrix, axis=1).reshape(p, 1) 164 | norm2_selected_data = np.sum( 165 | (data_full_matrix - 166 | np.mean(data_full_matrix, axis=1).reshape(p, 1))**2, 167 | (0, 1)) / T_full 168 | for i in range(K): 169 | data_full[i] = data_full[i] - mean_selected_data 170 | data_full[i] = data_full[i] * np.sqrt(p) / np.sqrt( 171 | norm2_selected_data) 172 | 173 | # get the statistics of CIFAR data 174 | # for each class 175 | means = [] 176 | covs = [] 177 | for i in range(K): 178 | data_tmp = data_full[i] 179 | T_tmp = data_tmp.shape[1] 180 | means.append(np.mean(data_tmp, axis=1).reshape(p, 1)) 181 | covs.append((data_tmp @ (data_tmp.T) / T_tmp - 182 | means[i] @ (means[i].T)).reshape(p, p)) 183 | 184 | # data for train 185 | X = np.array([]).reshape(p, 0) 186 | Omega = np.array([]).reshape(p, 0) 187 | y = [] 188 | 189 | # for each class , sample cs[class]*T samples, their Statistical Features from last part 190 | # why this part, last part is enough?? 191 | ind = 0 192 | for i in range(K): 193 | data_tmp = data_full[i] 194 | X = np.concatenate((X, data_tmp[:, range(int(cs[ind] * T))]), 195 | axis=1) 196 | Omega = np.concatenate( 197 | (Omega, data_tmp[:, range(int(cs[ind] * T))] - 198 | np.outer(means[ind], np.ones((1, int(T * cs[ind]))))), 199 | axis=1) 200 | y = np.concatenate((y, ind * np.ones(int(T * cs[ind])))) 201 | ind += 1 202 | 203 | X = X / np.sqrt(p) 204 | Omega = Omega / np.sqrt(p) 205 | 206 | else: 207 | # root = os.path.join( 208 | # os.path.dirname(os.path.dirname(os.path.realpath(__file__))), 209 | # 'data/self_define') 210 | # if not os.path.isdir(root): 211 | # os.makedirs(root) 212 | 213 | # data_path = os.path.join( 214 | # root, ''.join( 215 | # (testcase, '_', str(T), '_', str(p), '_', str(cs), '_', mode))) 216 | # if os.path.exists(data_path): 217 | # with open(data_path, 'r') as f: 218 | # data = json.load(f) 219 | # X, Omega, y, means, covs, K, p, T = np.array(data['X']), np.array(data['Omega']), np.array(data['y']), \ 220 | # np.array(data['means']), np.array(data['covs']), data['K'],data['p'], data['T'] 221 | # else: 222 | X = np.array([]).reshape(p, 0) 223 | Omega = np.array([]).reshape(p, 0) 224 | y = [] 225 | 226 | K = len(cs) 227 | for i in range(K): 228 | tmp = rng.multivariate_normal(means[i],covs[i],size=int(T * cs[i])).T 229 | X = np.concatenate((X, tmp), axis=1) 230 | Omega = np.concatenate( 231 | (Omega, 232 | tmp - np.outer(means[i], np.ones((1, int(T * cs[i]))))), 233 | axis=1) 234 | y = np.concatenate((y, i * np.ones(int(T * cs[i])))) 235 | 236 | X = X / np.sqrt(p) 237 | Omega = Omega / np.sqrt(p) 238 | 239 | data_save = { 240 | 'X': X.tolist(), 241 | 'Omega': Omega.tolist(), 242 | 'y': y.tolist(), 243 | 'means': means.tolist(), 244 | 'covs': covs.tolist(), 245 | 'K': K, 246 | 'p': p, 247 | 'T': T 248 | } 249 | # with open(data_path, 'w') as f: 250 | # json.dump(data_save, f) 251 | 252 | return X, Omega, y, means, covs, K, p, T 253 | 254 | multivariate_normal --------------------------------------------------------------------------------