├── FL_Backdoor_CV ├── FL_Backdoor.py ├── Hessian_cv.py ├── data │ └── dataset.txt ├── helper.py ├── image_helper.py ├── main_training.py ├── models │ ├── EMNIST_model.py │ ├── EMNIST_test.py │ ├── MLP.py │ ├── TransformerModel.py │ ├── __init__.py │ ├── __pycache__ │ │ ├── EMNIST_model.cpython-36.pyc │ │ ├── EMNIST_model.cpython-37.pyc │ │ ├── MLP.cpython-36.pyc │ │ ├── MLP.cpython-37.pyc │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── resnet.cpython-36.pyc │ │ ├── resnet.cpython-37.pyc │ │ ├── resnet_gn.cpython-36.pyc │ │ ├── resnet_gn.cpython-37.pyc │ │ ├── resnet_ln.cpython-36.pyc │ │ ├── resnet_ln.cpython-37.pyc │ │ ├── resnet_prue.cpython-36.pyc │ │ ├── resnet_prue1.cpython-36.pyc │ │ ├── simple.cpython-36.pyc │ │ ├── vgg9_only.cpython-36.pyc │ │ ├── vggnet.cpython-36.pyc │ │ ├── vggnet.cpython-37.pyc │ │ ├── word_model.cpython-36.pyc │ │ ├── wrn.cpython-36.pyc │ │ └── wrn.cpython-37.pyc │ ├── cifar_model.py │ ├── comm.txt │ ├── dense_efficient.py │ ├── densenet.py │ ├── edge_case_cnn.py │ ├── lenet.py │ ├── model_c.py │ ├── pytorch_resnet.py │ ├── resnet.py │ ├── resnet9.py │ ├── resnet_gn.py │ ├── resnet_ln.py │ ├── resnet_prue.py │ ├── resnet_prue1.py │ ├── simple.py │ ├── vgg.py │ ├── vgg9_only.py │ ├── vgg_modified.py │ ├── vggnet.py │ ├── word_model.py │ └── wrn.py ├── run_backdoor_cv_task.sh ├── test_funcs.py ├── text_load.py ├── train_funcs.py ├── utils │ ├── __init__.py │ ├── cifar100_params.yaml │ ├── cifar10_params.yaml │ ├── emnist_byclass_params.yaml │ ├── emnist_params.yaml │ └── text_load.py └── write_script.py ├── FL_Backdoor_NLP ├── .gitignore ├── IMDB.py ├── __init__.py ├── data │ └── dataset.txt ├── helper.py ├── main_training.py ├── models │ ├── TransformerModel.py │ ├── __init__.py │ ├── __pycache__ │ │ ├── TransformerModel.cpython-36.pyc │ │ ├── __init__.cpython-36.pyc │ │ ├── simple.cpython-36.pyc │ │ └── word_model.cpython-36.pyc │ ├── cifar_model.py │ ├── dense_efficient.py │ ├── densenet.py │ ├── model_c.py │ ├── pytorch_resnet.py │ ├── resnet.py │ ├── simple.py │ └── word_model.py ├── notes.md ├── results │ └── DP.PNG ├── run_NLP_tasks.sh ├── test_dataset │ └── test_dataset.txt ├── test_funcs.py ├── text_helper.py ├── train_dataset │ └── train_dataset.txt ├── train_funcs.py ├── utils │ ├── __init__.py │ ├── text_load.py │ ├── words_IMDB.yaml │ ├── words_reddit_gpt2.yaml │ ├── words_reddit_lstm.yaml │ ├── words_reddit_transformer.yaml │ ├── words_sentiment140.yaml │ └── words_shake.yaml └── write_script_nlp.py └── README.md /FL_Backdoor_CV/Hessian_cv.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torchvision import datasets, transforms 4 | 5 | from pyhessian import hessian # Hessian computation 6 | 7 | from models.resnet import ResNet18 8 | import matplotlib.pyplot as plt 9 | plt.switch_backend('agg') 10 | from torch.utils.data import Dataset, DataLoader 11 | import os 12 | import random 13 | from pyhessian.utils import normalization 14 | import argparse 15 | import json 16 | import argparse 17 | import copy 18 | import json 19 | import os 20 | import logging 21 | from typing import Iterator 22 | import torch 23 | import torch.nn as nn 24 | import torch.nn.functional as F 25 | import torch.optim as optim 26 | from torch.autograd import Variable 27 | import math 28 | import json 29 | from test_funcs import test_cv, test_poison_cv 30 | import os 31 | 32 | from image_helper import ImageHelper 33 | 34 | logger = logging.getLogger("logger") 35 | import yaml 36 | try: 37 | from yaml import CLoader as Loader, CDumper as Dumper 38 | except ImportError: 39 | from yaml import Loader, Dumper 40 | import time 41 | import numpy as np 42 | import random 43 | from utils.text_load import * 44 | 45 | from train_funcs import train, train_cv 46 | from test_funcs import test_reddit_lstm, test_sentiment, test_reddit_gpt2, test_cv, test_poison_cv 47 | 48 | def train_cv_poison(helper, model, poison_optimizer, criterion): 49 | 50 | total_loss = 0.0 51 | num_data = 0.0 52 | for x1 in helper.poisoned_train_data: 53 | inputs_p, labels_p = x1 54 | inputs = inputs_p 55 | 56 | for pos in range(labels_p.size(0)): 57 | labels_p[pos] = helper.params['poison_label_swap'] 58 | 59 | labels = labels_p 60 | 61 | inputs, labels = inputs.cuda(), labels.cuda() 62 | poison_optimizer.zero_grad() 63 | 64 | output = model(inputs) 65 | loss = criterion(output, labels) 66 | total_loss = loss.item()*inputs.size(0) 67 | num_data += inputs.size(0) 68 | 69 | poison_optimizer.zero_grad() 70 | 71 | return total_loss/float(num_data) 72 | 73 | torch.manual_seed(1) 74 | torch.cuda.manual_seed(1) 75 | 76 | torch.backends.cudnn.deterministic = True 77 | torch.backends.cudnn.benchmark = False 78 | torch.backends.cudnn.enabled=False 79 | 80 | random.seed(0) 81 | np.random.seed(0) 82 | 83 | 84 | def save_loss_file(file_name=None, acc_list=None, new_folder_name=None): 85 | if new_folder_name is None: 86 | path = "." 87 | 88 | else: 89 | path = f'./{new_folder_name}' 90 | if not os.path.exists(path): 91 | os.mkdir(path) 92 | 93 | 94 | filename = "%s/%s.txt" %(path, file_name) 95 | if filename: 96 | with open(filename, 'w') as f: 97 | json.dump(acc_list, f) 98 | 99 | 100 | 101 | parser = argparse.ArgumentParser(description='Loss Landscape') 102 | parser.add_argument('--params', default='utils/cifar10_params.yaml', dest='params') 103 | 104 | parser.add_argument('--gradmask_ratio', 105 | default=1.0, 106 | type=float, 107 | help='ratio = 1.0 -> baseline, 0.95 -> neurotoxin') 108 | 109 | parser.add_argument('--round', 110 | default=2290, 111 | type=int, 112 | help='2050 2060 2070 2080 2290') 113 | 114 | parser.add_argument('--GPU_id', 115 | default="0", 116 | type=str, 117 | help='GPU_id') 118 | 119 | parser.add_argument('--is_poison', 120 | default=False, 121 | type=bool, 122 | help='poison or not') 123 | 124 | parser.add_argument('--run_name', 125 | default=None, 126 | type=str, 127 | help='name of this experiemnt run (for wandb)') 128 | 129 | parser.add_argument('--poison_lr', 130 | default=0.1, 131 | type=float, 132 | help='attacker learning rate') 133 | 134 | parser.add_argument('--start_epoch', 135 | default=2001, 136 | type=int, 137 | help='Load pre-trained benign model that has been trained for start_epoch - 1 epoches, and resume from here') 138 | 139 | 140 | parser.add_argument('--aggregate_all_layer', 141 | default=0, 142 | type=int, 143 | help='aggregate_all_layer') 144 | 145 | parser.add_argument('--run_slurm', 146 | default=0, 147 | type=int, 148 | help='run_slurm') 149 | 150 | parser.add_argument('--same_structure', 151 | default=False, 152 | type=bool, 153 | help='same_structure') 154 | 155 | parser.add_argument('--num_middle_token_same_structure', 156 | default=300, 157 | type=int, 158 | help='num_middle_token_same_structure') 159 | 160 | parser.add_argument('--semantic_target', 161 | default=False, 162 | type=bool, 163 | help='semantic_target') 164 | 165 | parser.add_argument('--diff_privacy', 166 | default=False, 167 | type=bool, 168 | help='diff_privacy') 169 | 170 | parser.add_argument('--s_norm', 171 | default=1, 172 | type=float, 173 | help='s_norm') 174 | 175 | parser.add_argument('--PGD', 176 | default=0, 177 | type=int, 178 | help='wheather to use the PGD technique') 179 | 180 | parser.add_argument('--attack_num', 181 | default=40, 182 | type=int, 183 | help='attack_num 10, 20, 30') 184 | 185 | 186 | parser.add_argument('--edge_case', 187 | default=0, 188 | type=int, 189 | help='edge_case or not') 190 | 191 | parser.add_argument('--target_lr', 192 | default=0.2, 193 | type=float, 194 | help='target_lr for warmup') 195 | 196 | parser.add_argument('--resume', 197 | default=0, 198 | type=int, 199 | help='resume or not') 200 | 201 | parser.add_argument('--resume_folder', 202 | default='./Backdoor_saved_models_update1_noniid_0.9_cifar10_EC1_EE2801', 203 | type=str, 204 | help='resume_folder') 205 | 206 | parser.add_argument('--emnist_style', 207 | default='digits', 208 | type=str, 209 | help='byclass digits letters') 210 | 211 | parser.add_argument('--sentence_id_list', nargs='+', type=int) 212 | 213 | args = parser.parse_args() 214 | 215 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 216 | os.environ["CUDA_VISIBLE_DEVICES"] = args.GPU_id 217 | 218 | with open(f'./{args.params}', 'r') as f: 219 | params_loaded = yaml.load(f, Loader=Loader) 220 | 221 | # Add additional fields to the loaded params based on args 222 | params_loaded.update(vars(args)) 223 | 224 | if params_loaded['model'] == 'resnet': 225 | if params_loaded['dataset'] == 'cifar10' or params_loaded['dataset'] == 'cifar100' or params_loaded['dataset'] == 'emnist': 226 | dataset_name = params_loaded['dataset'] 227 | if os.path.isdir(f'./data/{dataset_name}/'): 228 | params_loaded['data_folder'] = f'./data/{dataset_name}' 229 | params_loaded['participant_clearn_data'] = random.sample( \ 230 | range(params_loaded['participant_population'])[1:], 30 ) 231 | 232 | 233 | # Load the helper object 234 | helper = ImageHelper(params=params_loaded) 235 | helper.create_model_cv() 236 | helper.load_data_cv() 237 | helper.load_benign_data_cv() 238 | helper.load_poison_data_cv() 239 | 240 | dir = './Backdoor_saved_models_update1' 241 | if args.gradmask_ratio == 1: 242 | Method_name = 'Baseline' 243 | else: 244 | Method_name = f'Neurotoxin_GradMaskRation{args.gradmask_ratio}' 245 | 246 | # get the model 247 | if params_loaded['dataset'] == 'cifar10': 248 | model = ResNet18(num_classes=10) 249 | 250 | 251 | if params_loaded['dataset'] == 'cifar10': 252 | loaded_params = torch.load(f'{dir}/Backdoor_model_cifar10_resnet_maskRatio{args.gradmask_ratio}_Snorm_0.2_checkpoint_model_epoch_2050.pth') 253 | 254 | model.load_state_dict(loaded_params) 255 | model = model.cuda() 256 | model.eval() 257 | 258 | # create loss function 259 | criterion = torch.nn.CrossEntropyLoss() 260 | 261 | num_iter = 0 262 | 263 | for x1 in helper.poisoned_train_data: 264 | inputs_p, labels_p = x1 265 | 266 | for pos in range(labels_p.size(0)): 267 | labels_p[pos] = helper.params['poison_label_swap'] 268 | 269 | if num_iter == 0: 270 | inputs = inputs_p 271 | labels = labels_p 272 | 273 | else: 274 | inputs_p, inputs_p = inputs_p.cuda(), inputs_p.cuda() 275 | inputs = torch.cat((inputs,inputs_p)) 276 | labels = torch.cat((labels,labels_p)) 277 | 278 | inputs, targets = inputs.cuda(), labels.cuda() 279 | if num_iter > 7: 280 | break 281 | else: 282 | num_iter += 1 283 | 284 | 285 | # we use cuda to make the computation fast 286 | model = model.cuda() 287 | # create the hessian computation module 288 | hessian_comp = hessian(model, criterion, data=(inputs, targets), cuda=True) 289 | 290 | # hessian_comp = hessian(model, criterion, data=(inputs_b, targets_b), cuda=True) 291 | 292 | # Now let's compute the top eigenvalue. This only takes a few seconds. 293 | top_eigenvalues, top_eigenvector = hessian_comp.eigenvalues() 294 | print("The top Hessian eigenvalue of this model is %.4f"%top_eigenvalues[-1]) 295 | 296 | trace = hessian_comp.trace() 297 | trace = np.mean(trace) 298 | print('Hessian trace is:',trace) 299 | top_eigenvalues_list = [top_eigenvalues] 300 | trace_list = [trace] 301 | 302 | save_loss_file(file_name=f'Top_eigenvalue_{Method_name}', acc_list=top_eigenvalues_list, new_folder_name=f"Hessian_analysis_cv") 303 | save_loss_file(file_name=f'Hessian_trace_{Method_name}', acc_list=trace_list, new_folder_name=f"Hessian_analysis_cv") 304 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/data/dataset.txt: -------------------------------------------------------------------------------- 1 | dataset saved here -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/EMNIST_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.nn.modules.normalization import GroupNorm 5 | 6 | class Net(nn.Module): 7 | def __init__(self): 8 | super(Net, self).__init__() 9 | self.conv1 = nn.Conv2d(1, 32, kernel_size=3) 10 | self.bn1 = torch.nn.GroupNorm(32,32) 11 | self.conv2 = nn.Conv2d(32, 64, kernel_size=3) 12 | self.bn2 = torch.nn.GroupNorm(32,64) 13 | self.conv2_drop = nn.Dropout2d(p=0.25) 14 | self.fc1 = nn.Linear(9216, 128) ### 9216 15 | self.fc2 = nn.Linear(128, 47) 16 | 17 | # def forward(self, x): 18 | # x = F.relu(self.bn1(self.conv1(x))) 19 | # x = F.relu(self.bn2(self.conv2(x))) 20 | # # print(x.size()) 21 | # x = x.view(-1, 36864) 22 | # x = F.relu(self.fc1(x)) 23 | # # x = F.dropout(x, p=0.5) 24 | # x = self.fc2(x) 25 | # return x 26 | 27 | def forward(self, x): 28 | x = F.relu(self.conv1(x)) 29 | # x = F.relu(self.conv2_drop(F.max_pool2d(self.conv2(x), 2))) 30 | x = F.relu(F.max_pool2d(self.conv2(x), 2)) 31 | # print(x.size()) 32 | x = x.view(-1, 9216) 33 | x = F.relu(self.fc1(x)) 34 | # x = F.dropout(x, p=0.5) 35 | x = self.fc2(x) 36 | return x -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/EMNIST_test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.optim as optim 5 | from torchvision import datasets, transforms 6 | from torch.autograd import Variable 7 | from EMNIST_model import * 8 | # Training settings 9 | import numpy as np 10 | from PIL import Image 11 | 12 | class ArdisDataset(torch.utils.data.Dataset): 13 | def __init__(self, transform = None, train = True): 14 | 15 | if train: 16 | X = np.loadtxt('../data/ARDIS_DATASET_IV/ARDIS_train_2828.csv', dtype='float') 17 | Y = np.loadtxt('../data/ARDIS_DATASET_IV/ARDIS_train_labels.csv', dtype='float') 18 | else: 19 | X = np.loadtxt('../data/ARDIS_DATASET_IV/ARDIS_test_2828.csv', dtype='float') 20 | Y = np.loadtxt('../data/ARDIS_DATASET_IV/ARDIS_test_labels.csv', dtype='float') 21 | Y = np.argmax(Y,axis = 1) 22 | 23 | X = X[Y==7] 24 | 25 | self.X = X 26 | 27 | self.transform = transform 28 | self.attack_target = 1 29 | 30 | def __len__(self): 31 | return len(self.X) 32 | 33 | def __getitem__(self,index): 34 | img = self.X[index] 35 | img = np.reshape(img, (28,28)) 36 | print(img.shape) 37 | # img = img.cpu().numpy() 38 | img = Image.fromarray(img) 39 | 40 | target = int(self.attack_target) 41 | 42 | if self.transform is not None: 43 | img = self.transform(img) 44 | 45 | 46 | return img, target 47 | 48 | batch_size = 64 49 | 50 | transform_labeled = transforms.Compose([ 51 | transforms.RandomHorizontalFlip(), 52 | transforms.RandomCrop(size=28, 53 | padding=int(28*0.125), 54 | padding_mode='reflect'), 55 | transforms.ToTensor(), 56 | transforms.Normalize((0.1307,), (0.3081,)) 57 | ]) 58 | transform_val = transforms.Compose([ 59 | transforms.ToTensor(), 60 | transforms.Normalize((0.1307,), (0.3081,)) 61 | ]) 62 | 63 | train_dataset = ArdisDataset( transform=transform_labeled, train=True) 64 | 65 | test_dataset = ArdisDataset( transform=transform_val, train=False) 66 | 67 | # Data Loader (Input Pipeline) 68 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 69 | batch_size=batch_size, 70 | shuffle=True) 71 | 72 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 73 | batch_size=batch_size, 74 | shuffle=False) 75 | 76 | model = Net() 77 | optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) 78 | 79 | def train(epoch): 80 | for batch_idx, (data, target) in enumerate(train_loader): 81 | 82 | 83 | output = model(data) 84 | #output:64*10 85 | 86 | loss = F.nll_loss(output, target) 87 | 88 | if batch_idx % 200 == 0: 89 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 90 | epoch, batch_idx * len(data), len(train_loader.dataset), 91 | 100. * batch_idx / len(train_loader), loss.data[0])) 92 | 93 | optimizer.zero_grad() 94 | loss.backward() 95 | optimizer.step() 96 | train(1) 97 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/MLP.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class MNIST_MLP(nn.Module): 6 | """ 7 | global batch_size = 100 8 | """ 9 | def __init__(self, num_classes): 10 | super(MNIST_MLP, self).__init__() 11 | self.layers = nn.ModuleList() 12 | self.layers.append(nn.Linear(28*28, 500)) 13 | self.layers.append(nn.Linear(500, 500)) 14 | self.layers.append(nn.Linear(500, num_classes)) 15 | # self.fc1 = nn.Linear(28*28, 500) 16 | # self.fc2 = nn.Linear(500, 500) 17 | # self.fc3 = nn.Linear(500, 10) 18 | 19 | def forward(self, x): # x: (batch, ) 20 | # x = x.view(-1, 28*28) 21 | # x = F.relu(self.fc1(x)) 22 | # x = F.relu(self.fc2(x)) 23 | # x = self.fc3(x) 24 | # return x 25 | x = x.view(-1, 28 * 28) 26 | x = F.relu(self.layers[0](x)) 27 | x = F.relu(self.layers[1](x)) 28 | x = self.layers[2](x) 29 | return x 30 | 31 | def get_weights(self): 32 | weights = [] 33 | for layer in self.layers: 34 | weights.append(layer.weight) 35 | return weights 36 | 37 | def get_gradients(self): 38 | gradients = [] 39 | for layer in self.layers: 40 | gradients.append(layer.weight.grad) 41 | 42 | return gradients 43 | 44 | def assign_gradients(self, gradients): 45 | for idx, layer in enumerate(self.layers): 46 | layer.weight.grad.data = gradients[idx] 47 | 48 | def update_weights(self, gradients, lr): 49 | for idx, layer in enumerate(self.layers): 50 | layer.weight.data -= lr * gradients[idx].data 51 | 52 | def initialize_new_grads(self): 53 | init_grads = [] 54 | for layer in self.layers: 55 | init_grads.append(torch.zeros_like(layer.weight)) 56 | return init_grads 57 | 58 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/TransformerModel.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torch.nn import TransformerEncoder, TransformerEncoderLayer 7 | 8 | class TransformerModel(nn.Module): 9 | 10 | def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.5): 11 | super(TransformerModel, self).__init__() 12 | self.model_type = 'Transformer' 13 | self.pos_encoder = PositionalEncoding(ninp, dropout) 14 | encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout) 15 | self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers) 16 | self.encoder = nn.Embedding(ntoken, ninp) 17 | self.ninp = ninp 18 | self.decoder = nn.Linear(ninp, ntoken) 19 | 20 | self.init_weights() 21 | 22 | def generate_square_subsequent_mask(self, sz): 23 | mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1) 24 | mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)) 25 | return mask 26 | 27 | def copy_params(self, state_dict, coefficient_transfer=100): 28 | 29 | own_state = self.state_dict() 30 | 31 | for name, param in state_dict.items(): 32 | if name in own_state: 33 | own_state[name].copy_(param.clone()) 34 | 35 | def init_weights(self): 36 | initrange = 0.1 37 | self.encoder.weight.data.uniform_(-initrange, initrange) 38 | self.decoder.bias.data.zero_() 39 | self.decoder.weight.data.uniform_(-initrange, initrange) 40 | 41 | def return_embedding_matrix(self): 42 | return self.encoder.weight.data 43 | 44 | def forward(self, src, src_mask): 45 | src = self.encoder(src) * math.sqrt(self.ninp) 46 | src = self.pos_encoder(src) 47 | output = self.transformer_encoder(src, src_mask) 48 | output = self.decoder(output) 49 | return output 50 | 51 | class PositionalEncoding(nn.Module): 52 | 53 | def __init__(self, d_model, dropout=0.1, max_len=5000): 54 | super(PositionalEncoding, self).__init__() 55 | self.dropout = nn.Dropout(p=dropout) 56 | 57 | pe = torch.zeros(max_len, d_model) 58 | position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) 59 | div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) 60 | pe[:, 0::2] = torch.sin(position * div_term) 61 | pe[:, 1::2] = torch.cos(position * div_term) 62 | pe = pe.unsqueeze(0).transpose(0, 1) 63 | self.register_buffer('pe', pe) 64 | 65 | def forward(self, x): 66 | x = x + self.pe[:x.size(0), :] 67 | return self.dropout(x) 68 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/__init__.py: -------------------------------------------------------------------------------- 1 | #### init -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/__pycache__/EMNIST_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_CV/models/__pycache__/EMNIST_model.cpython-36.pyc -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/__pycache__/EMNIST_model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_CV/models/__pycache__/EMNIST_model.cpython-37.pyc -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/__pycache__/MLP.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_CV/models/__pycache__/MLP.cpython-36.pyc -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/__pycache__/MLP.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_CV/models/__pycache__/MLP.cpython-37.pyc -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_CV/models/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_CV/models/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/__pycache__/resnet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_CV/models/__pycache__/resnet.cpython-36.pyc -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/__pycache__/resnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_CV/models/__pycache__/resnet.cpython-37.pyc -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/__pycache__/resnet_gn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_CV/models/__pycache__/resnet_gn.cpython-36.pyc -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/__pycache__/resnet_gn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_CV/models/__pycache__/resnet_gn.cpython-37.pyc -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/__pycache__/resnet_ln.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_CV/models/__pycache__/resnet_ln.cpython-36.pyc -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/__pycache__/resnet_ln.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_CV/models/__pycache__/resnet_ln.cpython-37.pyc -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/__pycache__/resnet_prue.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_CV/models/__pycache__/resnet_prue.cpython-36.pyc -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/__pycache__/resnet_prue1.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_CV/models/__pycache__/resnet_prue1.cpython-36.pyc -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/__pycache__/simple.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_CV/models/__pycache__/simple.cpython-36.pyc -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/__pycache__/vgg9_only.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_CV/models/__pycache__/vgg9_only.cpython-36.pyc -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/__pycache__/vggnet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_CV/models/__pycache__/vggnet.cpython-36.pyc -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/__pycache__/vggnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_CV/models/__pycache__/vggnet.cpython-37.pyc -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/__pycache__/word_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_CV/models/__pycache__/word_model.cpython-36.pyc -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/__pycache__/wrn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_CV/models/__pycache__/wrn.cpython-36.pyc -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/__pycache__/wrn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_CV/models/__pycache__/wrn.cpython-37.pyc -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/cifar_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from models.simple import SimpleNet 5 | 6 | 7 | class CifarNet(SimpleNet): 8 | def __init__(self, name=None, created_time=None): 9 | super(CifarNet, self).__init__(f'{name}_Simple', created_time) 10 | self.conv1 = nn.Conv2d(3, 6, 5) 11 | self.pool = nn.MaxPool2d(2, 2) 12 | self.conv2 = nn.Conv2d(6, 16, 5) 13 | self.fc1 = nn.Linear(16 * 5 * 5, 120) 14 | self.fc2 = nn.Linear(120, 84) 15 | self.fc3 = nn.Linear(84, 10) 16 | 17 | def forward(self, x): 18 | x = self.pool(F.relu(self.conv1(x))) 19 | x = self.pool(F.relu(self.conv2(x))) 20 | x = x.view(-1, 16 * 5 * 5) 21 | x = F.relu(self.fc1(x)) 22 | x = F.relu(self.fc2(x)) 23 | x = self.fc3(x) 24 | return x -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/comm.txt: -------------------------------------------------------------------------------- 1 | python train_LocalSGD.py --warmup True --NIID --lr 0.1 --bs 128 --cp 2 --alpha 0.6 --gmf 0.7 --save -p --name OLocalSGD_nccl_e300_ICASSP_niid --rank 0 --size 2 --backend nccl --schedule 150 0.1 250 0.1 --epoch 300 -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/densenet.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from models.simple import SimpleNet 6 | 7 | class BasicBlock(nn.Module): 8 | def __init__(self, in_planes, out_planes, dropRate=0.0): 9 | super(BasicBlock, self).__init__() 10 | self.bn1 = nn.BatchNorm2d(in_planes) 11 | self.relu = nn.ReLU(inplace=True) 12 | self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=1, 13 | padding=1, bias=False) 14 | self.droprate = dropRate 15 | def forward(self, x): 16 | out = self.conv1(self.relu(self.bn1(x))) 17 | if self.droprate > 0: 18 | out = F.dropout(out, p=self.droprate, training=self.training) 19 | return torch.cat([x, out], 1) 20 | 21 | class BottleneckBlock(nn.Module): 22 | def __init__(self, in_planes, out_planes, dropRate=0.0): 23 | super(BottleneckBlock, self).__init__() 24 | inter_planes = out_planes * 4 25 | self.bn1 = nn.BatchNorm2d(in_planes) 26 | self.relu = nn.ReLU(inplace=True) 27 | self.conv1 = nn.Conv2d(in_planes, inter_planes, kernel_size=1, stride=1, 28 | padding=0, bias=False) 29 | self.bn2 = nn.BatchNorm2d(inter_planes) 30 | self.conv2 = nn.Conv2d(inter_planes, out_planes, kernel_size=3, stride=1, 31 | padding=1, bias=False) 32 | self.droprate = dropRate 33 | def forward(self, x): 34 | out = self.conv1(self.relu(self.bn1(x))) 35 | if self.droprate > 0: 36 | out = F.dropout(out, p=self.droprate, inplace=False, training=self.training) 37 | out = self.conv2(self.relu(self.bn2(out))) 38 | if self.droprate > 0: 39 | out = F.dropout(out, p=self.droprate, inplace=False, training=self.training) 40 | return torch.cat([x, out], 1) 41 | 42 | class TransitionBlock(nn.Module): 43 | def __init__(self, in_planes, out_planes, dropRate=0.0): 44 | super(TransitionBlock, self).__init__() 45 | self.bn1 = nn.BatchNorm2d(in_planes) 46 | self.relu = nn.ReLU(inplace=True) 47 | self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, 48 | padding=0, bias=False) 49 | self.droprate = dropRate 50 | def forward(self, x): 51 | out = self.conv1(self.relu(self.bn1(x))) 52 | if self.droprate > 0: 53 | out = F.dropout(out, p=self.droprate, inplace=False, training=self.training) 54 | return F.avg_pool2d(out, 2) 55 | 56 | class DenseBlock(nn.Module): 57 | def __init__(self, nb_layers, in_planes, growth_rate, block, dropRate=0.0): 58 | super(DenseBlock, self).__init__() 59 | self.layer = self._make_layer(block, in_planes, growth_rate, nb_layers, dropRate) 60 | def _make_layer(self, block, in_planes, growth_rate, nb_layers, dropRate): 61 | layers = [] 62 | for i in range(nb_layers): 63 | layers.append(block(in_planes+i*growth_rate, growth_rate, dropRate)) 64 | return nn.Sequential(*layers) 65 | def forward(self, x): 66 | return self.layer(x) 67 | 68 | class DenseNet3(SimpleNet): 69 | def __init__(self, depth=100, num_classes=10, growth_rate=12, 70 | reduction=0.5, bottleneck=True, dropRate=0.0, name=None, created_time=None): 71 | super(DenseNet3, self).__init__(name='{0}_DenseNet_50'.format(name), created_time=created_time) 72 | in_planes = 2 * growth_rate 73 | n = (depth - 4) / 3 74 | if bottleneck == True: 75 | n = n/2 76 | block = BottleneckBlock 77 | else: 78 | block = BasicBlock 79 | n = int(n) 80 | # 1st conv before any dense block 81 | self.conv1 = nn.Conv2d(3, in_planes, kernel_size=3, stride=1, 82 | padding=1, bias=False) 83 | # 1st block 84 | self.block1 = DenseBlock(n, in_planes, growth_rate, block, dropRate) 85 | in_planes = int(in_planes+n*growth_rate) 86 | self.trans1 = TransitionBlock(in_planes, int(math.floor(in_planes*reduction)), dropRate=dropRate) 87 | in_planes = int(math.floor(in_planes*reduction)) 88 | # 2nd block 89 | self.block2 = DenseBlock(n, in_planes, growth_rate, block, dropRate) 90 | in_planes = int(in_planes+n*growth_rate) 91 | self.trans2 = TransitionBlock(in_planes, int(math.floor(in_planes*reduction)), dropRate=dropRate) 92 | in_planes = int(math.floor(in_planes*reduction)) 93 | # 3rd block 94 | self.block3 = DenseBlock(n, in_planes, growth_rate, block, dropRate) 95 | in_planes = int(in_planes+n*growth_rate) 96 | # global average pooling and classifier 97 | self.bn1 = nn.BatchNorm2d(in_planes) 98 | self.relu = nn.ReLU(inplace=True) 99 | self.fc = nn.Linear(in_planes, num_classes) 100 | self.in_planes = in_planes 101 | 102 | for m in self.modules(): 103 | if isinstance(m, nn.Conv2d): 104 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 105 | m.weight.data.normal_(0, math.sqrt(2. / n)) 106 | elif isinstance(m, nn.BatchNorm2d): 107 | m.weight.data.fill_(1) 108 | m.bias.data.zero_() 109 | elif isinstance(m, nn.Linear): 110 | m.bias.data.zero_() 111 | def forward(self, x): 112 | out = self.conv1(x) 113 | out = self.trans1(self.block1(out)) 114 | out = self.trans2(self.block2(out)) 115 | out = self.block3(out) 116 | out = self.relu(self.bn1(out)) 117 | out = F.avg_pool2d(out, 8) 118 | out = out.view(-1, self.in_planes) 119 | return self.fc(out) -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/edge_case_cnn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | 9 | class Net(nn.Module): 10 | def __init__(self, num_classes): 11 | super(Net, self).__init__() 12 | self.conv1 = nn.Conv2d(1, 32, 3, 1) 13 | self.conv2 = nn.Conv2d(32, 64, 3, 1) 14 | self.dropout1 = nn.Dropout2d(0.25) 15 | self.dropout2 = nn.Dropout2d(0.5) 16 | self.fc1 = nn.Linear(9216, 128) 17 | self.fc2 = nn.Linear(128, num_classes) 18 | 19 | def forward(self, x): 20 | x = self.conv1(x) 21 | x = F.relu(x) 22 | x = self.conv2(x) 23 | x = F.relu(x) 24 | x = F.max_pool2d(x, 2) 25 | x = self.dropout1(x) 26 | x = torch.flatten(x, 1) 27 | x = self.fc1(x) 28 | x = F.relu(x) 29 | x = self.dropout2(x) 30 | output = self.fc2(x) 31 | 32 | return output 33 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/lenet.py: -------------------------------------------------------------------------------- 1 | '''LeNet in PyTorch.''' 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class LeNet(nn.Module): 6 | def __init__(self): 7 | super(LeNet, self).__init__() 8 | self.conv1 = nn.Conv2d(3, 6, 5) 9 | self.conv2 = nn.Conv2d(6, 16, 5) 10 | self.fc1 = nn.Linear(16*5*5, 120) 11 | self.fc2 = nn.Linear(120, 84) 12 | self.fc3 = nn.Linear(84, 10) 13 | 14 | def forward(self, x): 15 | out = F.relu(self.conv1(x)) 16 | out = F.max_pool2d(out, 2) 17 | out = F.relu(self.conv2(out)) 18 | out = F.max_pool2d(out, 2) 19 | out = out.view(out.size(0), -1) 20 | out = F.relu(self.fc1(out)) 21 | out = F.relu(self.fc2(out)) 22 | out = self.fc3(out) 23 | return out 24 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/model_c.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from models.simple import SimpleNet 5 | 6 | 7 | class ModelC(SimpleNet): 8 | def __init__(self, name=None, created_time=None): 9 | super(ModelC, self).__init__(f'{name}_ModelC', created_time) 10 | self.conv1 = nn.Conv2d(3, 6, 5) 11 | self.pool = nn.MaxPool2d(2, 2) 12 | self.conv2 = nn.Conv2d(6, 16, 5) 13 | self.fc1 = nn.Linear(16 * 5 * 5, 120) 14 | self.fc2 = nn.Linear(120, 84) 15 | self.fc3 = nn.Linear(84, 10) 16 | 17 | def forward(self, x): 18 | x = self.pool(F.relu(self.conv1(x))) 19 | x = self.pool(F.relu(self.conv2(x))) 20 | x = x.view(-1, 16 * 5 * 5) 21 | x = F.relu(self.fc1(x)) 22 | x = F.relu(self.fc2(x)) 23 | x = self.fc3(x) 24 | return x -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/pytorch_resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | import torch.utils.model_zoo as model_zoo 4 | 5 | from models.simple import SimpleNet 6 | 7 | __all__ = ['ResNet', 'pt_resnet18', 'pt_resnet34', 'pt_resnet50', 'pt_resnet101', 8 | 'pt_resnet152'] 9 | 10 | 11 | model_urls = { 12 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 13 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 14 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 15 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 16 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 17 | } 18 | 19 | 20 | def conv3x3(in_planes, out_planes, stride=1): 21 | """3x3 convolution with padding""" 22 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 23 | padding=1, bias=False) 24 | 25 | 26 | class BasicBlock(nn.Module): 27 | expansion = 1 28 | 29 | def __init__(self, inplanes, planes, stride=1, downsample=None): 30 | super(BasicBlock, self).__init__() 31 | self.conv1 = conv3x3(inplanes, planes, stride) 32 | self.bn1 = nn.BatchNorm2d(planes) 33 | self.relu = nn.ReLU(inplace=True) 34 | self.conv2 = conv3x3(planes, planes) 35 | self.bn2 = nn.BatchNorm2d(planes) 36 | self.downsample = downsample 37 | self.stride = stride 38 | 39 | def forward(self, x): 40 | residual = x 41 | 42 | out = self.conv1(x) 43 | out = self.bn1(out) 44 | out = self.relu(out) 45 | 46 | out = self.conv2(out) 47 | out = self.bn2(out) 48 | 49 | if self.downsample is not None: 50 | residual = self.downsample(x) 51 | 52 | out += residual 53 | out = self.relu(out) 54 | 55 | return out 56 | 57 | 58 | class Bottleneck(nn.Module): 59 | expansion = 4 60 | 61 | def __init__(self, inplanes, planes, stride=1, downsample=None): 62 | super(Bottleneck, self).__init__() 63 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 64 | self.bn1 = nn.BatchNorm2d(planes) 65 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 66 | padding=1, bias=False) 67 | self.bn2 = nn.BatchNorm2d(planes) 68 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 69 | self.bn3 = nn.BatchNorm2d(planes * 4) 70 | self.relu = nn.ReLU(inplace=True) 71 | self.downsample = downsample 72 | self.stride = stride 73 | 74 | def forward(self, x): 75 | residual = x 76 | 77 | out = self.conv1(x) 78 | out = self.bn1(out) 79 | out = self.relu(out) 80 | 81 | out = self.conv2(out) 82 | out = self.bn2(out) 83 | out = self.relu(out) 84 | 85 | out = self.conv3(out) 86 | out = self.bn3(out) 87 | 88 | if self.downsample is not None: 89 | residual = self.downsample(x) 90 | 91 | out += residual 92 | out = self.relu(out) 93 | 94 | return out 95 | 96 | 97 | class ResNet(SimpleNet): 98 | 99 | def __init__(self, block, layers, num_classes=1000, name=None, created_time=None): 100 | self.inplanes = 64 101 | super(ResNet, self).__init__(name, created_time) 102 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 103 | bias=False) 104 | self.bn1 = nn.BatchNorm2d(64) 105 | self.relu = nn.ReLU(inplace=True) 106 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 107 | self.layer1 = self._make_layer(block, 32, layers[0]) 108 | self.layer2 = self._make_layer(block, 64, layers[1], stride=2) 109 | self.layer3 = self._make_layer(block, 128, layers[2], stride=2) 110 | self.layer4 = self._make_layer(block, 256, layers[3], stride=2) 111 | self.avgpool = nn.AvgPool2d(7, stride=1) 112 | self.fc = nn.Linear(512 * block.expansion, num_classes) 113 | 114 | for m in self.modules(): 115 | if isinstance(m, nn.Conv2d): 116 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 117 | m.weight.data.normal_(0, math.sqrt(2. / n)) 118 | elif isinstance(m, nn.BatchNorm2d): 119 | m.weight.data.fill_(1) 120 | m.bias.data.zero_() 121 | 122 | def _make_layer(self, block, planes, blocks, stride=1): 123 | downsample = None 124 | if stride != 1 or self.inplanes != planes * block.expansion: 125 | downsample = nn.Sequential( 126 | nn.Conv2d(self.inplanes, planes * block.expansion, 127 | kernel_size=1, stride=stride, bias=False), 128 | nn.BatchNorm2d(planes * block.expansion), 129 | ) 130 | 131 | layers = [] 132 | layers.append(block(self.inplanes, planes, stride, downsample)) 133 | self.inplanes = planes * block.expansion 134 | for i in range(1, blocks): 135 | layers.append(block(self.inplanes, planes)) 136 | 137 | return nn.Sequential(*layers) 138 | 139 | def forward(self, x): 140 | x = self.conv1(x) 141 | x = self.bn1(x) 142 | x = self.relu(x) 143 | x = self.maxpool(x) 144 | 145 | x = self.layer1(x) 146 | x = self.layer2(x) 147 | x = self.layer3(x) 148 | x = self.layer4(x) 149 | 150 | x = self.avgpool(x) 151 | x = x.view(x.size(0), -1) 152 | x = self.fc(x) 153 | 154 | return x 155 | 156 | 157 | def pt_resnet18(name=None, created_time=None, pretrained=False, **kwargs): 158 | """Constructs a ResNet-18 model. 159 | 160 | Args: 161 | pretrained (bool): If True, returns a model pre-trained on ImageNet 162 | """ 163 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs, name=name, created_time=created_time,) 164 | if pretrained: 165 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 166 | return model 167 | 168 | 169 | def pt_resnet34(name=None, created_time=None, pretrained=False, **kwargs): 170 | """Constructs a ResNet-34 model. 171 | 172 | Args: 173 | pretrained (bool): If True, returns a model pre-trained on ImageNet 174 | """ 175 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs, name=name, created_time=created_time,) 176 | if pretrained: 177 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) 178 | return model 179 | 180 | 181 | def pt_resnet50(name=None, created_time=None, pretrained=False, **kwargs): 182 | """Constructs a ResNet-50 model. 183 | 184 | Args: 185 | pretrained (bool): If True, returns a model pre-trained on ImageNet 186 | """ 187 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs, name=name, created_time=created_time,) 188 | if pretrained: 189 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 190 | return model 191 | 192 | 193 | def pt_resnet101(name=None, created_time=None, pretrained=False, **kwargs): 194 | """Constructs a ResNet-101 model. 195 | 196 | Args: 197 | pretrained (bool): If True, returns a model pre-trained on ImageNet 198 | """ 199 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs, name=name, created_time=created_time,) 200 | if pretrained: 201 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 202 | return model 203 | 204 | 205 | def pt_resnet152(name=None, created_time=None, pretrained=False, **kwargs): 206 | """Constructs a ResNet-152 model. 207 | 208 | Args: 209 | pretrained (bool): If True, returns a model pre-trained on ImageNet 210 | """ 211 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs, name=name, created_time=created_time) 212 | if pretrained: 213 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) 214 | return model -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/resnet.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from models.simple import SimpleNet 6 | from torch.autograd import Variable 7 | 8 | 9 | class BasicBlock(nn.Module): 10 | expansion = 1 11 | 12 | def __init__(self, in_planes, planes, stride=1): 13 | super(BasicBlock, self).__init__() 14 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 15 | self.bn1 = nn.BatchNorm2d(planes) 16 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 17 | self.bn2 = nn.BatchNorm2d(planes) 18 | 19 | self.shortcut = nn.Sequential() 20 | if stride != 1 or in_planes != self.expansion*planes: 21 | self.shortcut = nn.Sequential( 22 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 23 | nn.BatchNorm2d(self.expansion*planes) 24 | ) 25 | 26 | def forward(self, x): 27 | out = F.relu(self.bn1(self.conv1(x))) 28 | out = self.bn2(self.conv2(out)) 29 | out += self.shortcut(x) 30 | out = F.relu(out) 31 | return out 32 | 33 | 34 | class Bottleneck(nn.Module): 35 | expansion = 4 36 | 37 | def __init__(self, in_planes, planes, stride=1): 38 | super(Bottleneck, self).__init__() 39 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 40 | self.bn1 = nn.BatchNorm2d(planes) 41 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 42 | self.bn2 = nn.BatchNorm2d(planes) 43 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 44 | self.bn3 = nn.BatchNorm2d(self.expansion*planes) 45 | 46 | self.shortcut = nn.Sequential() 47 | if stride != 1 or in_planes != self.expansion*planes: 48 | self.shortcut = nn.Sequential( 49 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 50 | nn.BatchNorm2d(self.expansion*planes) 51 | ) 52 | 53 | def forward(self, x): 54 | out = F.relu(self.bn1(self.conv1(x))) 55 | out = F.relu(self.bn2(self.conv2(out))) 56 | out = self.bn3(self.conv3(out)) 57 | out += self.shortcut(x) 58 | out = F.relu(out) 59 | return out 60 | 61 | 62 | class ResNet(SimpleNet): 63 | def __init__(self, block, num_blocks, num_classes=10, name=None, created_time=None): 64 | super(ResNet, self).__init__() 65 | self.in_planes = 32 66 | 67 | self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False) 68 | self.bn1 = nn.BatchNorm2d(32) 69 | self.layer1 = self._make_layer(block, 32, num_blocks[0], stride=1) 70 | self.layer2 = self._make_layer(block, 64, num_blocks[1], stride=2) 71 | self.layer3 = self._make_layer(block, 128, num_blocks[2], stride=2) 72 | self.layer4 = self._make_layer(block, 256, num_blocks[3], stride=2) 73 | self.linear = nn.Linear(256*block.expansion, num_classes) 74 | 75 | def _make_layer(self, block, planes, num_blocks, stride): 76 | strides = [stride] + [1]*(num_blocks-1) 77 | layers = [] 78 | for stride in strides: 79 | layers.append(block(self.in_planes, planes, stride)) 80 | self.in_planes = planes * block.expansion 81 | return nn.Sequential(*layers) 82 | 83 | def forward(self, x): 84 | out = F.relu(self.bn1(self.conv1(x))) 85 | out = self.layer1(out) 86 | out = self.layer2(out) 87 | out = self.layer3(out) 88 | out = self.layer4(out) 89 | out = F.avg_pool2d(out, 4) 90 | out = out.view(out.size(0), -1) 91 | out = self.linear(out) 92 | return out 93 | 94 | 95 | def ResNet18(name=None, created_time=None, num_classes=10): 96 | return ResNet(BasicBlock, [2,2,2,2],name='{0}_ResNet_18'.format(name), created_time=created_time, num_classes=num_classes) 97 | 98 | def ResNet34(name=None, created_time=None): 99 | return ResNet(BasicBlock, [3,4,6,3],name='{0}_ResNet_34'.format(name), created_time=created_time) 100 | 101 | def ResNet50(name=None, created_time=None): 102 | return ResNet(Bottleneck, [3,4,6,3],name='{0}_ResNet_50'.format(name), created_time=created_time) 103 | 104 | def ResNet101(name=None, created_time=None): 105 | return ResNet(Bottleneck, [3,4,23,3],name='{0}_ResNet'.format(name), created_time=created_time) 106 | 107 | def ResNet152(name=None, created_time=None): 108 | return ResNet(Bottleneck, [3,8,36,3],name='{0}_ResNet'.format(name), created_time=created_time) 109 | 110 | 111 | def test(): 112 | net = ResNet18() 113 | y = net(Variable(torch.randn(1,3,32,32))) 114 | print(y.size()) 115 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/resnet9.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | import itertools 6 | 7 | __all__ = ["ResNet9"] 8 | 9 | class Mul(nn.Module): 10 | def __init__(self, weight): 11 | super().__init__() 12 | self.weight = weight 13 | def __call__(self, x): 14 | return x*self.weight 15 | 16 | 17 | def batch_norm(num_channels, bn_bias_init=None, bn_bias_freeze=False, 18 | bn_weight_init=None, bn_weight_freeze=False): 19 | m = nn.BatchNorm2d(num_channels) 20 | if bn_bias_init is not None: 21 | m.bias.data.fill_(bn_bias_init) 22 | if bn_bias_freeze: 23 | m.bias.requires_grad = False 24 | if bn_weight_init is not None: 25 | m.weight.data.fill_(bn_weight_init) 26 | if bn_weight_freeze: 27 | m.weight.requires_grad = False 28 | 29 | return m 30 | 31 | #Network definition 32 | class ConvBN(nn.Module): 33 | def __init__(self, do_batchnorm, c_in, c_out, bn_weight_init=1.0, pool=None, **kw): 34 | super().__init__() 35 | self.pool = pool 36 | self.conv = nn.Conv2d(c_in, c_out, kernel_size=3, stride=1, 37 | padding=1, bias=False) 38 | if do_batchnorm: 39 | self.bn = batch_norm(c_out, bn_weight_init=bn_weight_init, **kw) 40 | self.do_batchnorm = do_batchnorm 41 | self.relu = nn.ReLU(True) 42 | 43 | def forward(self, x): 44 | if self.do_batchnorm: 45 | out = self.relu(self.bn(self.conv(x))) 46 | else: 47 | out = self.relu(self.conv(x)) 48 | if self.pool: 49 | out = self.pool(out) 50 | return out 51 | 52 | def prep_finetune(self, iid, c_in, c_out, bn_weight_init=1.0, pool=None, **kw): 53 | self.bn.bias.requires_grad = False 54 | self.bn.weight.requires_grad = False 55 | layers = [self.conv] 56 | for l in layers: 57 | for p in l.parameters(): 58 | p.requires_grad = True 59 | return itertools.chain.from_iterable([l.parameters() for l in layers]) 60 | 61 | class Residual(nn.Module): 62 | def __init__(self, do_batchnorm, c, **kw): 63 | super().__init__() 64 | self.res1 = ConvBN(do_batchnorm, c, c, **kw) 65 | self.res2 = ConvBN(do_batchnorm, c, c, **kw) 66 | 67 | def forward(self, x): 68 | return x + F.relu(self.res2(self.res1(x))) 69 | 70 | def prep_finetune(self, iid, c, **kw): 71 | layers = [self.res1, self.res2] 72 | return itertools.chain.from_iterable([l.prep_finetune(iid, c, c, **kw) for l in layers]) 73 | 74 | class BasicNet(nn.Module): 75 | def __init__(self, do_batchnorm, channels, weight, pool, num_classes=10, initial_channels=1, new_num_classes=None, **kw): 76 | super().__init__() 77 | self.new_num_classes = new_num_classes 78 | self.prep = ConvBN(do_batchnorm, initial_channels, channels['prep'], **kw) 79 | 80 | self.layer1 = ConvBN(do_batchnorm, channels['prep'], channels['layer1'], 81 | pool=pool, **kw) 82 | self.res1 = Residual(do_batchnorm, channels['layer1'], **kw) 83 | 84 | self.layer2 = ConvBN(do_batchnorm, channels['layer1'], channels['layer2'], 85 | pool=pool, **kw) 86 | 87 | self.layer3 = ConvBN(do_batchnorm, channels['layer2'], channels['layer3'], 88 | pool=pool, **kw) 89 | self.res3 = Residual(do_batchnorm, channels['layer3'], **kw) 90 | 91 | self.pool = nn.MaxPool2d(4) 92 | self.linear = nn.Linear(channels['layer3'], num_classes, bias=False) 93 | self.classifier = Mul(weight) 94 | 95 | def forward(self, x): 96 | out = self.prep(x) 97 | out = self.res1(self.layer1(out)) 98 | out = self.layer2(out) 99 | out = self.res3(self.layer3(out)) 100 | 101 | out = self.pool(out).view(out.size()[0], -1) 102 | out = self.classifier(self.linear(out)) 103 | return out 104 | 105 | def finetune_parameters(self, iid, channels, weight, pool, **kw): 106 | #layers = [self.prep, self.layer1, self.res1, self.layer2, self.layer3, self.res3] 107 | self.linear = nn.Linear(channels['layer3'], self.new_num_classes, bias=False) 108 | self.classifier = Mul(weight) 109 | modules = [self.linear, self.classifier] 110 | for m in modules: 111 | for p in m.parameters(): 112 | p.requires_grad = True 113 | return itertools.chain.from_iterable([m.parameters() for m in modules]) 114 | """ 115 | prep = self.prep.prep_finetune(iid, 3, channels['prep'], **kw) 116 | 117 | layer1 = self.layer1.prep_finetune(iid, channels['prep'], channels['layer1'], 118 | pool=pool, **kw) 119 | res1 = self.res1.prep_finetune(iid, channels['layer1'], **kw) 120 | 121 | layer2 = self.layer2.prep_finetune(iid, channels['layer1'], channels['layer2'], 122 | pool=pool, **kw) 123 | 124 | layer3 = self.layer3.prep_finetune(iid, channels['layer2'], channels['layer3'], 125 | pool=pool, **kw) 126 | res3 = self.res3.prep_finetune(iid, channels['layer3'], **kw) 127 | layers = [prep, layer1, res1, layer2, layer3, res3] 128 | parameters = [itertools.chain.from_iterable(layers), itertools.chain.from_iterable([m.parameters() for m in modules])] 129 | return itertools.chain.from_iterable(parameters) 130 | """ 131 | 132 | class ResNet9(nn.Module): 133 | def __init__(self, do_batchnorm=False, channels=None, weight=0.125, pool=nn.MaxPool2d(2),num_classes=10, 134 | extra_layers=(), res_layers=('layer1', 'layer3'), **kw): 135 | super().__init__() 136 | self.channels = {'prep': 64, 'layer1': 128, 137 | 'layer2': 256, 'layer3': 512} 138 | self.weight = weight 139 | self.pool = pool 140 | print(f"Using BatchNorm: {do_batchnorm}") 141 | self.n = BasicNet(do_batchnorm, self.channels, weight, pool, num_classes,**kw) 142 | self.kw = kw 143 | 144 | def forward(self, x): 145 | return self.n(x) 146 | 147 | def finetune_parameters(self): 148 | return self.n.finetune_parameters(self.iid, self.channels, self.weight, self.pool, **self.kw) 149 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/resnet_gn.py: -------------------------------------------------------------------------------- 1 | '''ResNet in PyTorch. 2 | For Pre-activation ResNet, see 'preact_resnet.py'. 3 | Reference: 4 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 5 | Deep Residual Learning for Image Recognition. arXiv:1512.03385 6 | ''' 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | from torch.nn.modules.normalization import GroupNorm 11 | 12 | 13 | class BasicBlock(nn.Module): 14 | expansion = 1 15 | 16 | def __init__(self, in_planes, planes, stride=1): 17 | super(BasicBlock, self).__init__() 18 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 19 | # self.bn1 = nn.BatchNorm2d(planes) 20 | self.bn1 = torch.nn.GroupNorm(32,planes) 21 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 22 | # self.bn2 = nn.BatchNorm2d(planes) 23 | self.bn2 = torch.nn.GroupNorm(32,planes) 24 | 25 | self.shortcut = nn.Sequential() 26 | if stride != 1 or in_planes != self.expansion*planes: 27 | self.shortcut = nn.Sequential( 28 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 29 | torch.nn.GroupNorm(32,self.expansion*planes) 30 | # nn.BatchNorm2d(self.expansion*planes) 31 | ) 32 | 33 | def forward(self, x): 34 | out = F.relu(self.bn1(self.conv1(x))) 35 | out = self.bn2(self.conv2(out)) 36 | out += self.shortcut(x) 37 | out = F.relu(out) 38 | return out 39 | 40 | 41 | class Bottleneck(nn.Module): 42 | expansion = 4 43 | 44 | def __init__(self, in_planes, planes, stride=1): 45 | super(Bottleneck, self).__init__() 46 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 47 | # self.bn1 = nn.BatchNorm2d(planes) 48 | self.bn1 = torch.nn.GroupNorm(32,planes) 49 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 50 | # self.bn2 = nn.BatchNorm2d(planes) 51 | self.bn2 = torch.nn.GroupNorm(32,planes) 52 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 53 | # self.bn3 = nn.BatchNorm2d(self.expansion*planes) 54 | self.bn3 = torch.nn.GroupNorm(32,self.expansion*planes) 55 | 56 | self.shortcut = nn.Sequential() 57 | if stride != 1 or in_planes != self.expansion*planes: 58 | self.shortcut = nn.Sequential( 59 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 60 | torch.nn.GroupNorm(32,self.expansion*planes) 61 | # nn.BatchNorm2d(self.expansion*planes) 62 | ) 63 | 64 | def forward(self, x): 65 | out = F.relu(self.bn1(self.conv1(x))) 66 | out = F.relu(self.bn2(self.conv2(out))) 67 | out = self.bn3(self.conv3(out)) 68 | out += self.shortcut(x) 69 | out = F.relu(out) 70 | return out 71 | 72 | 73 | class ResNet_gn(nn.Module): 74 | def __init__(self, block, num_blocks, num_classes=10): 75 | super(ResNet_gn, self).__init__() 76 | self.in_planes = 64 77 | 78 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) 79 | # self.bn1 = nn.BatchNorm2d(64) 80 | self.bn1 = torch.nn.GroupNorm(32,64) 81 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) 82 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) 83 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) 84 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) 85 | self.linear = nn.Linear(512*block.expansion, num_classes) 86 | 87 | def _make_layer(self, block, planes, num_blocks, stride): 88 | strides = [stride] + [1]*(num_blocks-1) 89 | layers = [] 90 | for stride in strides: 91 | layers.append(block(self.in_planes, planes, stride)) 92 | self.in_planes = planes * block.expansion 93 | return nn.Sequential(*layers) 94 | 95 | def forward(self, x): 96 | out = F.relu(self.bn1(self.conv1(x))) 97 | out = self.layer1(out) 98 | out = self.layer2(out) 99 | out = self.layer3(out) 100 | out = self.layer4(out) 101 | out = F.avg_pool2d(out, 4) 102 | out = out.view(out.size(0), -1) 103 | out = self.linear(out) 104 | return out 105 | 106 | 107 | def ResNet18_gn(): 108 | return ResNet_gn(BasicBlock, [2,2,2,2]) 109 | 110 | def ResNet34_gn(): 111 | return ResNet_gn(BasicBlock, [3,4,6,3]) 112 | 113 | def ResNet50_gn(): 114 | return ResNet_gn(Bottleneck, [3,4,6,3]) 115 | 116 | def ResNet101_gn(): 117 | return ResNet_gn(Bottleneck, [3,4,23,3]) 118 | 119 | def ResNet152_gn(): 120 | return ResNet_gn(Bottleneck, [3,8,36,3]) 121 | 122 | 123 | def test(): 124 | net = ResNet18_gn() 125 | y = net(torch.randn(1,3,32,32)) 126 | print(y.size()) 127 | 128 | # test() 129 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/resnet_ln.py: -------------------------------------------------------------------------------- 1 | '''ResNet in PyTorch. 2 | For Pre-activation ResNet, see 'preact_resnet.py'. 3 | Reference: 4 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 5 | Deep Residual Learning for Image Recognition. arXiv:1512.03385 6 | ''' 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | 12 | 13 | class FilterResponseNormNd(nn.Module): 14 | def __init__(self, ndim, num_features, eps=1e-6, 15 | learnable_eps=False): 16 | """ 17 | Input Variables: 18 | ---------------- 19 | ndim: An integer indicating the number of dimensions of the expected input tensor. 20 | num_features: An integer indicating the number of input feature dimensions. 21 | eps: A scalar constant or learnable variable. 22 | learnable_eps: A bool value indicating whether the eps is learnable. 23 | """ 24 | assert ndim in [3, 4, 5], \ 25 | 'FilterResponseNorm only supports 3d, 4d or 5d inputs.' 26 | super(FilterResponseNormNd, self).__init__() 27 | shape = (1, num_features) + (1, ) * (ndim - 2) 28 | self.eps = nn.Parameter(torch.ones(*shape) * eps) 29 | if not learnable_eps: 30 | self.eps.requires_grad_(False) 31 | self.gamma = nn.Parameter(torch.Tensor(*shape)) 32 | self.beta = nn.Parameter(torch.Tensor(*shape)) 33 | self.tau = nn.Parameter(torch.Tensor(*shape)) 34 | self.reset_parameters() 35 | 36 | 37 | 38 | def forward(self, x): 39 | avg_dims = tuple(range(2, x.dim())) 40 | nu2 = torch.pow(x, 2).mean(dim=avg_dims, keepdim=True) 41 | x = x * torch.rsqrt(nu2 + torch.abs(self.eps)) 42 | return torch.max(self.gamma * x + self.beta, self.tau) 43 | 44 | def reset_parameters(self): 45 | nn.init.ones_(self.gamma) 46 | nn.init.zeros_(self.beta) 47 | nn.init.zeros_(self.tau) 48 | 49 | class FilterResponseNorm1d(FilterResponseNormNd): 50 | def __init__(self, num_features, eps=1e-6, learnable_eps=False): 51 | super(FilterResponseNorm1d, self).__init__( 52 | 3, num_features, eps=eps, learnable_eps=learnable_eps) 53 | 54 | class FilterResponseNorm2d(FilterResponseNormNd): 55 | def __init__(self, num_features, eps=1e-6, learnable_eps=False): 56 | super(FilterResponseNorm2d, self).__init__( 57 | 4, num_features, eps=eps, learnable_eps=learnable_eps) 58 | 59 | class FilterResponseNorm3d(FilterResponseNormNd): 60 | def __init__(self, num_features, eps=1e-6, learnable_eps=False): 61 | super(FilterResponseNorm3d, self).__init__( 62 | 5, num_features, eps=eps, learnable_eps=learnable_eps) 63 | 64 | 65 | class BasicBlock(nn.Module): 66 | expansion = 1 67 | 68 | def __init__(self, in_planes, planes, stride=1): 69 | super(BasicBlock, self).__init__() 70 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 71 | self.bn1 = FilterResponseNorm2d(planes) 72 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 73 | self.bn2 = FilterResponseNorm2d(planes) 74 | 75 | self.shortcut = nn.Sequential() 76 | if stride != 1 or in_planes != self.expansion*planes: 77 | self.shortcut = nn.Sequential( 78 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 79 | FilterResponseNorm2d(self.expansion*planes) 80 | ) 81 | 82 | def forward(self, x): 83 | out = F.relu(self.bn1(self.conv1(x))) 84 | out = self.bn2(self.conv2(out)) 85 | out += self.shortcut(x) 86 | out = F.relu(out) 87 | return out 88 | 89 | 90 | class Bottleneck(nn.Module): 91 | expansion = 4 92 | 93 | def __init__(self, in_planes, planes, stride=1): 94 | super(Bottleneck, self).__init__() 95 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 96 | self.bn1 = FilterResponseNorm2d(planes) 97 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 98 | self.bn2 = FilterResponseNorm2d(planes) 99 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 100 | self.bn3 = FilterResponseNorm2d(self.expansion*planes) 101 | 102 | self.shortcut = nn.Sequential() 103 | if stride != 1 or in_planes != self.expansion*planes: 104 | self.shortcut = nn.Sequential( 105 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 106 | FilterResponseNorm2d(self.expansion*planes) 107 | ) 108 | 109 | def forward(self, x): 110 | out = F.relu(self.bn1(self.conv1(x))) 111 | out = F.relu(self.bn2(self.conv2(out))) 112 | out = self.bn3(self.conv3(out)) 113 | out += self.shortcut(x) 114 | out = F.relu(out) 115 | return out 116 | 117 | 118 | class ResNet_LN(nn.Module): 119 | def __init__(self, block, num_blocks, num_classes=10): 120 | super(ResNet_LN, self).__init__() 121 | self.in_planes = 64 122 | 123 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) 124 | self.bn1 = FilterResponseNorm2d(64) 125 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) 126 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) 127 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) 128 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) 129 | self.linear = nn.Linear(512*block.expansion, num_classes) 130 | 131 | def _make_layer(self, block, planes, num_blocks, stride): 132 | strides = [stride] + [1]*(num_blocks-1) 133 | layers = [] 134 | for stride in strides: 135 | layers.append(block(self.in_planes, planes, stride)) 136 | self.in_planes = planes * block.expansion 137 | return nn.Sequential(*layers) 138 | 139 | def forward(self, x): 140 | out = F.relu(self.bn1(self.conv1(x))) 141 | out = self.layer1(out) 142 | out = self.layer2(out) 143 | out = self.layer3(out) 144 | out = self.layer4(out) 145 | out = F.avg_pool2d(out, 4) 146 | out = out.view(out.size(0), -1) 147 | out = self.linear(out) 148 | return out 149 | 150 | 151 | def ResNet18_LN(): 152 | return ResNet_LN(BasicBlock, [2,2,2,2]) 153 | 154 | def ResNet34_LN(): 155 | return ResNet_LN(BasicBlock, [3,4,6,3]) 156 | 157 | def ResNet50_LN(): 158 | return ResNet_LN(Bottleneck, [3,4,6,3]) 159 | 160 | def ResNet101_LN(): 161 | return ResNet_LN(Bottleneck, [3,4,23,3]) 162 | 163 | def ResNet152_LN(): 164 | return ResNet_LN(Bottleneck, [3,8,36,3]) 165 | 166 | 167 | def test(): 168 | net = ResNet18_LN() 169 | y = net(torch.randn(1,3,32,32)) 170 | print(y.size()) 171 | 172 | # test() 173 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/resnet_prue.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import math 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from functools import partial 8 | from torch.autograd import Variable 9 | 10 | 11 | __all__ = ['resnet'] 12 | 13 | def conv3x3(in_planes, out_planes, stride=1): 14 | "3x3 convolution with padding" 15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 16 | padding=1, bias=False) 17 | 18 | 19 | class BasicBlock(nn.Module): 20 | expansion = 1 21 | 22 | def __init__(self, inplanes, planes, cfg, stride=1, downsample=None): 23 | # cfg should be a number in this case 24 | super(BasicBlock, self).__init__() 25 | self.conv1 = nn.Conv2d(inplanes, cfg, kernel_size=3, stride=stride, padding=1, bias=False) 26 | self.bn1 = nn.BatchNorm2d(cfg) 27 | self.conv2 = nn.Conv2d(cfg, planes, kernel_size=3, stride=1, padding=1, bias=False) 28 | self.bn2 = nn.BatchNorm2d(planes) 29 | ##################### 30 | 31 | # self.conv1 = conv3x3(inplanes, cfg, stride) 32 | # self.bn1 = nn.BatchNorm2d(cfg) 33 | # self.relu = nn.ReLU(inplace=True) 34 | # self.conv2 = conv3x3(cfg, planes) 35 | # self.bn2 = nn.BatchNorm2d(planes) 36 | 37 | self.downsample = downsample 38 | self.stride = stride 39 | 40 | 41 | 42 | def forward(self, x): 43 | 44 | out = F.relu(self.bn1(self.conv1(x))) 45 | out = self.bn2(self.conv2(out)) 46 | # print(out.size(),self.shortcut(x).size()) 47 | out += self.shortcut(x) 48 | out = F.relu(out) 49 | return out 50 | ####################### 51 | # residual = x 52 | # 53 | # out = self.conv1(x) 54 | # out = self.bn1(out) 55 | # out = self.relu(out) 56 | # 57 | # out = self.conv2(out) 58 | # out = self.bn2(out) 59 | # 60 | # if self.downsample is not None: 61 | # residual = self.downsample(x) 62 | # 63 | # out += residual 64 | # out = self.relu(out) 65 | 66 | return out 67 | 68 | def downsample_basic_block(x, planes): 69 | x = nn.AvgPool2d(2,2)(x) 70 | zero_pads = torch.Tensor( 71 | x.size(0), planes - x.size(1), x.size(2), x.size(3)).zero_() 72 | if isinstance(x.data, torch.cuda.FloatTensor): 73 | zero_pads = zero_pads.cuda() 74 | 75 | out = Variable(torch.cat([x.data, zero_pads], dim=1)) 76 | 77 | return out 78 | 79 | class ResNet(nn.Module): 80 | 81 | def __init__(self, depth, dataset='cifar10', cfg=None): 82 | super(ResNet, self).__init__() 83 | # Model type specifies number of layers for CIFAR-10 model 84 | # assert (depth - 2) % 6 == 0, 'depth should be 6n+2' 85 | n = (depth - 2) // 6 86 | 87 | block = BasicBlock 88 | if cfg == None: 89 | cfg = [[16]*n, [32]*n, [64]*n] 90 | cfg = [item for sub_list in cfg for item in sub_list] 91 | 92 | self.cfg = cfg 93 | 94 | self.inplanes = 64 95 | num_blocks = [2,2,2,2] 96 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) 97 | self.bn1 = nn.BatchNorm2d(64) 98 | self.layer1 = self._make_layer(block, 64, num_blocks[0], cfg=cfg[0:n], stride=1) 99 | self.layer2 = self._make_layer(block, 128, num_blocks[1], cfg=cfg[0:n], stride=2) 100 | self.layer3 = self._make_layer(block, 256, num_blocks[2], cfg=cfg[0:n], stride=2) 101 | self.layer4 = self._make_layer(block, 512, num_blocks[3], cfg=cfg[0:n], stride=2) 102 | self.fc = nn.Linear(512*block.expansion, 10) 103 | #################### 104 | 105 | # self.inplanes = 16 106 | # self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1, 107 | # bias=False) 108 | # self.bn1 = nn.BatchNorm2d(16) 109 | # self.relu = nn.ReLU(inplace=True) 110 | # self.layer1 = self._make_layer(block, 16, n, cfg=cfg[0:n]) 111 | # self.layer2 = self._make_layer(block, 32, n, cfg=cfg[n:2*n], stride=2) 112 | # self.layer3 = self._make_layer(block, 64, n, cfg=cfg[2*n:3*n], stride=2) 113 | # self.avgpool = nn.AvgPool2d(8) 114 | # if dataset == 'cifar10': 115 | # num_classes = 10 116 | # elif dataset == 'cifar100': 117 | # num_classes = 100 118 | # self.fc = nn.Linear(64 * block.expansion, num_classes) 119 | 120 | for m in self.modules(): 121 | if isinstance(m, nn.Conv2d): 122 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 123 | m.weight.data.normal_(0, math.sqrt(2. / n)) 124 | elif isinstance(m, nn.BatchNorm2d): 125 | m.weight.data.fill_(1) 126 | m.bias.data.zero_() 127 | 128 | # def _make_layer(self, block, planes, blocks, cfg, stride=1): 129 | # downsample = None 130 | # if stride != 1 or self.inplanes != planes * block.expansion: 131 | # downsample = partial(downsample_basic_block, planes=planes*block.expansion) 132 | # 133 | # layers = [] 134 | # layers.append(block(self.inplanes, planes, cfg[0], stride, downsample)) 135 | # self.inplanes = planes * block.expansion 136 | # for i in range(1, blocks): 137 | # layers.append(block(self.inplanes, planes, cfg[i])) 138 | # 139 | # return nn.Sequential(*layers) 140 | 141 | def _make_layer(self, block, planes, blocks, cfg, stride): 142 | strides = [stride] + [1]*(blocks-1) 143 | layers = [] 144 | k = 0 145 | for stride in strides: 146 | layers.append(block(self.inplanes, planes, cfg[k], stride)) 147 | self.inplanes = planes * block.expansion 148 | k += 1 149 | return nn.Sequential(*layers) 150 | 151 | def forward(self, x): 152 | out = F.relu(self.bn1(self.conv1(x))) 153 | out = self.layer1(out) 154 | out = self.layer2(out) 155 | out = self.layer3(out) 156 | out = self.layer4(out) 157 | out = F.avg_pool2d(out, 4) 158 | out = out.view(out.size(0), -1) 159 | out = self.fc(out) 160 | # x = self.conv1(x) 161 | # x = self.bn1(x) 162 | # x = self.relu(x) # 32x32 163 | # 164 | # x = self.layer1(x) # 32x32 165 | # x = self.layer2(x) # 16x16 166 | # x = self.layer3(x) # 8x8 167 | # 168 | # x = self.avgpool(x) 169 | # x = x.view(x.size(0), -1) 170 | # x = self.fc(x) 171 | 172 | return x 173 | 174 | def resnet_prue(**kwargs): 175 | """ 176 | Constructs a ResNet model. 177 | """ 178 | return ResNet(**kwargs) 179 | 180 | if __name__ == '__main__': 181 | net = resnet_prue(depth=56) 182 | x=Variable(torch.FloatTensor(16, 3, 32, 32)) 183 | y = net(x) 184 | print(y.data.shape) 185 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/resnet_prue1.py: -------------------------------------------------------------------------------- 1 | '''ResNet in PyTorch. 2 | For Pre-activation ResNet, see 'preact_resnet.py'. 3 | Reference: 4 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 5 | Deep Residual Learning for Image Recognition. arXiv:1512.03385 6 | ''' 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | from functools import partial 11 | 12 | class BasicBlock(nn.Module): 13 | expansion = 1 14 | 15 | def __init__(self, in_planes, planes, cfg, stride=1, downsample=None): 16 | super(BasicBlock, self).__init__() 17 | # print('cfg',cfg) 18 | self.cfg = cfg 19 | self.conv1 = nn.Conv2d(in_planes, cfg, kernel_size=3, stride=stride, padding=1, bias=False) 20 | self.bn1 = nn.BatchNorm2d(cfg) 21 | self.conv2 = nn.Conv2d(cfg, planes, kernel_size=3, stride=1, padding=1, bias=False) 22 | self.bn2 = nn.BatchNorm2d(planes) 23 | 24 | self.downsample = downsample 25 | self.stride = stride 26 | 27 | self.shortcut = nn.Sequential() 28 | if stride != 1 or in_planes != self.expansion*planes: 29 | self.shortcut = nn.Sequential( 30 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 31 | nn.BatchNorm2d(self.expansion*planes) 32 | ) 33 | 34 | def forward(self, x): 35 | out = F.relu(self.bn1(self.conv1(x))) 36 | # print(out.size(), self.cfg) 37 | out = self.bn2(self.conv2(out)) 38 | 39 | out += self.shortcut(x) 40 | out = F.relu(out) 41 | return out 42 | 43 | 44 | class Bottleneck(nn.Module): 45 | expansion = 4 46 | 47 | def __init__(self, in_planes, planes, stride=1): 48 | super(Bottleneck, self).__init__() 49 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 50 | self.bn1 = nn.BatchNorm2d(planes) 51 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 52 | self.bn2 = nn.BatchNorm2d(planes) 53 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 54 | self.bn3 = nn.BatchNorm2d(self.expansion*planes) 55 | 56 | self.shortcut = nn.Sequential() 57 | if stride != 1 or in_planes != self.expansion*planes: 58 | self.shortcut = nn.Sequential( 59 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 60 | nn.BatchNorm2d(self.expansion*planes) 61 | ) 62 | 63 | def forward(self, x): 64 | out = F.relu(self.bn1(self.conv1(x))) 65 | out = F.relu(self.bn2(self.conv2(out))) 66 | out = self.bn3(self.conv3(out)) 67 | 68 | 69 | out += self.shortcut(x) 70 | out = F.relu(out) 71 | return out 72 | 73 | def downsample_basic_block(x, planes): 74 | x = nn.AvgPool2d(2,2)(x) 75 | zero_pads = torch.Tensor( 76 | x.size(0), planes - x.size(1), x.size(2), x.size(3)).zero_() 77 | if isinstance(x.data, torch.cuda.FloatTensor): 78 | zero_pads = zero_pads.cuda() 79 | 80 | out = Variable(torch.cat([x.data, zero_pads], dim=1)) 81 | 82 | return out 83 | 84 | class ResNet(nn.Module): 85 | def __init__(self, block, num_blocks, cfg, num_classes=10): 86 | super(ResNet, self).__init__() 87 | n = 2 88 | self.in_planes = 64 89 | self.cfg = cfg 90 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) 91 | self.bn1 = nn.BatchNorm2d(64) 92 | self.layer1 = self._make_layer(block, 64, num_blocks[0], cfg=cfg[0:n], stride=1) 93 | self.layer2 = self._make_layer(block, 128, num_blocks[1], cfg=cfg[n:2*n], stride=2) 94 | self.layer3 = self._make_layer(block, 256, num_blocks[2], cfg=cfg[2*n:3*n], stride=2) 95 | self.layer4 = self._make_layer(block, 512, num_blocks[3], cfg=cfg[3*n:4*n], stride=2) 96 | self.linear = nn.Linear(512*block.expansion, num_classes) 97 | 98 | # def _make_layer(self, block, planes, num_blocks, cfg, stride): 99 | # strides = [stride] + [1]*(num_blocks-1) 100 | # layers = [] 101 | # for stride in strides: 102 | # layers.append(block(self.in_planes, planes, stride)) 103 | # self.in_planes = planes * block.expansion 104 | # return nn.Sequential(*layers) 105 | def _make_layer(self, block, planes, num_blocks, cfg, stride=1): 106 | downsample = None 107 | 108 | if stride != 1 or self.in_planes != planes * block.expansion: 109 | downsample = partial(downsample_basic_block, planes=planes*block.expansion) 110 | strides = [stride] + [1]*(num_blocks-1) 111 | 112 | layers = [] 113 | layers.append(block(self.in_planes, planes, cfg[0], stride, downsample)) 114 | self.in_planes = planes * block.expansion 115 | for i in range(1, num_blocks): 116 | layers.append(block(self.in_planes, planes, cfg[i])) 117 | 118 | return nn.Sequential(*layers) 119 | 120 | # def _make_layer(self, block, planes, num_blocks, cfg, stride): 121 | # strides = [stride] + [1]*(num_blocks-1) 122 | # layers = [] 123 | # k = 0 124 | # for stride in strides: 125 | # layers.append(block(self.in_planes, planes, cfg[k+1], stride)) 126 | # self.in_planes = planes * block.expansion 127 | # k += 1 128 | # return nn.Sequential(*layers) 129 | 130 | def forward(self, x): 131 | out = F.relu(self.bn1(self.conv1(x))) 132 | # print(out.size()) 133 | out = self.layer1(out) 134 | out = self.layer2(out) 135 | out = self.layer3(out) 136 | out = self.layer4(out) 137 | out = F.avg_pool2d(out, 4) 138 | out = out.view(out.size(0), -1) 139 | out = self.linear(out) 140 | return out 141 | 142 | 143 | def ResNet18_prue(cfg): 144 | return ResNet(BasicBlock, [2,2,2,2], cfg) 145 | 146 | def ResNet34(): 147 | return ResNet(BasicBlock, [3,4,6,3]) 148 | 149 | def ResNet50(): 150 | return ResNet(Bottleneck, [3,4,6,3]) 151 | 152 | def ResNet101(): 153 | return ResNet(Bottleneck, [3,4,23,3]) 154 | 155 | def ResNet152(): 156 | return ResNet(Bottleneck, [3,8,36,3]) 157 | 158 | 159 | def test(): 160 | net = ResNet18() 161 | y = net(torch.randn(1,3,32,32)) 162 | print(y.size()) 163 | 164 | # test() 165 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/simple.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class SimpleNet(nn.Module): 6 | def __init__(self, name=None): 7 | super(SimpleNet, self).__init__() 8 | self.name=name 9 | 10 | 11 | def save_stats(self, epoch, loss, acc): 12 | self.stats['epoch'].append(epoch) 13 | self.stats['loss'].append(loss) 14 | self.stats['acc'].append(acc) 15 | 16 | 17 | def copy_params(self, state_dict, coefficient_transfer=100): 18 | 19 | own_state = self.state_dict() 20 | 21 | for name, param in state_dict.items(): 22 | if name in own_state: 23 | own_state[name].copy_(param.clone()) 24 | 25 | 26 | 27 | 28 | class SimpleMnist(SimpleNet): 29 | def __init__(self, name=None): 30 | super(SimpleMnist, self).__init__(name) 31 | self.conv1 = nn.Conv2d(1, 10, kernel_size=5) 32 | self.conv2 = nn.Conv2d(10, 20, kernel_size=5) 33 | self.conv2_drop = nn.Dropout2d() 34 | self.fc1 = nn.Linear(320, 50) 35 | self.fc2 = nn.Linear(50, 10) 36 | 37 | 38 | def forward(self, x): 39 | x = F.relu(F.max_pool2d(self.conv1(x), 2)) 40 | x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) 41 | x = x.view(-1, 320) 42 | x = F.relu(self.fc1(x)) 43 | x = F.dropout(x, training=self.training) 44 | x = self.fc2(x) 45 | return F.log_softmax(x, dim=1) -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/vgg.py: -------------------------------------------------------------------------------- 1 | ''' 2 | dummy file to use as an adaptor to switch between 3 | two vgg architectures 4 | 5 | vgg9: use vgg9_only.py which is from https://github.com/kuangliu/pytorch-cifar 6 | vgg11/13/16/19: use vgg_modified.py which is modified from https://github.com/pytorch/vision.git 7 | ''' 8 | 9 | import torch 10 | import torch.nn as nn 11 | import models.vgg9_only as vgg9 12 | import models.vgg_modified as vgg_mod 13 | import logging 14 | 15 | logging.basicConfig() 16 | logger = logging.getLogger() 17 | logger.setLevel(logging.INFO) 18 | 19 | 20 | def get_vgg_model(vgg_name): 21 | logging.info("GET_VGG_MODEL: Fetch {}".format(vgg_name)) 22 | if vgg_name == 'vgg9': 23 | return vgg9.VGG('VGG9') 24 | elif vgg_name == 'vgg11': 25 | return vgg_mod.vgg11() 26 | elif vgg_name == 'vgg13': 27 | return vgg_mod.vgg13() 28 | elif vgg_name == 'vgg16': 29 | return vgg_mod.vgg16() 30 | 31 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/vgg9_only.py: -------------------------------------------------------------------------------- 1 | '''VGG11/13/16/19 in Pytorch.''' 2 | ''' 3 | NOTE: This was supposed to be VGG11/13/16 and 19 but it really is 4 | VGG9/11/14 and 17. 5 | Also, the architecture is slightly different from VGGn in pytorch 6 | It has (n-1) conv layers and 1 FC layer as opposed to (n-3) conv and 7 | 3 FC layers in pytorch. 8 | ''' 9 | import torch 10 | import torch.nn as nn 11 | 12 | 13 | cfg = { 14 | 'VGG9': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 15 | 'VGG11': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 16 | 'VGG14': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 17 | 'VGG17': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], 18 | } 19 | 20 | 21 | class VGG(nn.Module): 22 | def __init__(self, vgg_name): 23 | super(VGG, self).__init__() 24 | self.features = self._make_layers(cfg[vgg_name]) 25 | self.classifier = nn.Linear(512, 10) 26 | 27 | def forward(self, x): 28 | out = self.features(x) 29 | out = out.view(out.size(0), -1) 30 | out = self.classifier(out) 31 | return out 32 | 33 | def _make_layers(self, cfg): 34 | layers = [] 35 | in_channels = 3 36 | for x in cfg: 37 | if x == 'M': 38 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 39 | else: 40 | layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1), 41 | #nn.BatchNorm2d(x), 42 | nn.ReLU(inplace=True)] 43 | in_channels = x 44 | layers += [nn.AvgPool2d(kernel_size=1, stride=1)] 45 | return nn.Sequential(*layers) 46 | 47 | 48 | def test(): 49 | net = VGG('VGG9') 50 | x = torch.randn(2,3,32,32) 51 | y = net(x) 52 | print(net) 53 | for p_index, (n, p) in enumerate(net.named_parameters()): 54 | print(n, p.size()) 55 | print(y.size()) 56 | 57 | #test() 58 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/vgg_modified.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Modified from https://github.com/pytorch/vision.git 3 | ''' 4 | import math 5 | import logging 6 | 7 | import torch.nn as nn 8 | import torch.nn.init as init 9 | 10 | 11 | logging.basicConfig() 12 | logger = logging.getLogger() 13 | logger.setLevel(logging.INFO) 14 | 15 | __all__ = [ 16 | 'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 17 | 'vgg19_bn', 'vgg19', 18 | ] 19 | 20 | 21 | class VGG(nn.Module): 22 | ''' 23 | VGG model 24 | ''' 25 | def __init__(self, features, num_classes=10): 26 | super(VGG, self).__init__() 27 | self.features = features 28 | self.classifier = nn.Sequential( 29 | nn.Dropout(), 30 | nn.Linear(512, 512), 31 | nn.ReLU(True), 32 | nn.Dropout(), 33 | nn.Linear(512, 512), 34 | nn.ReLU(True), 35 | nn.Linear(512, num_classes), 36 | ) 37 | # Initialize weights 38 | for m in self.modules(): 39 | if isinstance(m, nn.Conv2d): 40 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 41 | m.weight.data.normal_(0, math.sqrt(2. / n)) 42 | m.bias.data.zero_() 43 | 44 | 45 | def forward(self, x): 46 | x = self.features(x) 47 | x = x.view(x.size(0), -1) 48 | x = self.classifier(x) 49 | return x 50 | 51 | 52 | def make_layers(cfg, batch_norm=False): 53 | layers = [] 54 | in_channels = 3 55 | for v in cfg: 56 | if v == 'M': 57 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 58 | else: 59 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) 60 | if batch_norm: 61 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] 62 | else: 63 | layers += [conv2d, nn.ReLU(inplace=True)] 64 | in_channels = v 65 | return nn.Sequential(*layers) 66 | 67 | 68 | cfg = { 69 | 'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 70 | 'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 71 | 'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 72 | 'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 73 | 512, 512, 512, 512, 'M'], 74 | } 75 | 76 | 77 | def vgg11(): 78 | """VGG 11-layer model (configuration "A")""" 79 | return VGG(make_layers(cfg['A'])) 80 | 81 | 82 | def vgg11_bn(num_classes=10): 83 | """VGG 11-layer model (configuration "A") with batch normalization""" 84 | return VGG(make_layers(cfg['A'], batch_norm=True), num_classes=num_classes) 85 | 86 | 87 | def vgg13(): 88 | """VGG 13-layer model (configuration "B")""" 89 | return VGG(make_layers(cfg['B'])) 90 | 91 | 92 | def vgg13_bn(): 93 | """VGG 13-layer model (configuration "B") with batch normalization""" 94 | return VGG(make_layers(cfg['B'], batch_norm=True)) 95 | 96 | 97 | def vgg16(): 98 | """VGG 16-layer model (configuration "D")""" 99 | return VGG(make_layers(cfg['D'])) 100 | 101 | 102 | def vgg16_bn(): 103 | """VGG 16-layer model (configuration "D") with batch normalization""" 104 | return VGG(make_layers(cfg['D'], batch_norm=True)) 105 | 106 | 107 | def vgg19(): 108 | """VGG 19-layer model (configuration "E")""" 109 | return VGG(make_layers(cfg['E'])) 110 | 111 | 112 | def vgg19_bn(): 113 | """VGG 19-layer model (configuration 'E') with batch normalization""" 114 | return VGG(make_layers(cfg['E'], batch_norm=True)) 115 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/vggnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | 5 | def conv_init(m): 6 | classname = m.__class__.__name__ 7 | if classname.find('Conv') != -1: 8 | init.xavier_uniform(m.weight, gain=np.sqrt(2)) 9 | init.constant(m.bias, 0) 10 | 11 | def cfg(depth): 12 | depth_lst = [11, 13, 16, 19] 13 | assert (depth in depth_lst), "Error : VGGnet depth should be either 11, 13, 16, 19" 14 | cf_dict = { 15 | '11': [ 16 | 64, 'mp', 17 | 128, 'mp', 18 | 256, 256, 'mp', 19 | 512, 512, 'mp', 20 | 512, 512, 'mp'], 21 | '13': [ 22 | 64, 64, 'mp', 23 | 128, 128, 'mp', 24 | 256, 256, 'mp', 25 | 512, 512, 'mp', 26 | 512, 512, 'mp' 27 | ], 28 | '16': [ 29 | 64, 64, 'mp', 30 | 128, 128, 'mp', 31 | 256, 256, 256, 'mp', 32 | 512, 512, 512, 'mp', 33 | 512, 512, 512, 'mp' 34 | ], 35 | '19': [ 36 | 64, 64, 'mp', 37 | 128, 128, 'mp', 38 | 256, 256, 256, 256, 'mp', 39 | 512, 512, 512, 512, 'mp', 40 | 512, 512, 512, 512, 'mp' 41 | ], 42 | } 43 | 44 | return cf_dict[str(depth)] 45 | 46 | def conv3x3(in_planes, out_planes, stride=1): 47 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=True) 48 | 49 | class VGG(nn.Module): 50 | def __init__(self, depth, num_classes): 51 | super(VGG, self).__init__() 52 | self.features = self._make_layers(cfg(depth)) 53 | self.linear = nn.Linear(512, num_classes) 54 | 55 | def forward(self, x): 56 | out = self.features(x) 57 | out = out.view(out.size(0), -1) 58 | out = self.linear(out) 59 | 60 | return out 61 | 62 | def _make_layers(self, cfg): 63 | layers = [] 64 | in_planes = 3 65 | 66 | for x in cfg: 67 | if x == 'mp': 68 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 69 | else: 70 | layers += [conv3x3(in_planes, x), nn.BatchNorm2d(x), nn.ReLU(inplace=True)] 71 | in_planes = x 72 | 73 | # After cfg convolution 74 | layers += [nn.AvgPool2d(kernel_size=1, stride=1)] 75 | return nn.Sequential(*layers) 76 | 77 | if __name__ == "__main__": 78 | net = VGG(16, 10) 79 | y = net(Variable(torch.randn(1,3,32,32))) 80 | print(y.size()) 81 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/word_model.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.autograd import Variable 3 | 4 | from models.simple import SimpleNet 5 | import torch 6 | 7 | extracted_grads = [] 8 | def extract_grad_hook(module, grad_in, grad_out): 9 | 10 | extracted_grads.append(grad_out[0]) 11 | 12 | class RNNModel(SimpleNet): 13 | """Container module with an encoder, a recurrent module, and a decoder.""" 14 | 15 | def __init__(self, name, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, tie_weights=False, binary=False): 16 | super(RNNModel, self).__init__(name=name) 17 | if binary: 18 | self.encoder = nn.Embedding(ntoken, ninp) 19 | 20 | self.lstm = nn.LSTM(ninp, nhid, nlayers, dropout=0.5, batch_first=True) 21 | self.drop = nn.Dropout(dropout) 22 | self.decoder = nn.Linear(nhid, 1) 23 | self.sig = nn.Sigmoid() 24 | else: 25 | self.drop = nn.Dropout(dropout) 26 | self.encoder = nn.Embedding(ntoken, ninp) 27 | 28 | 29 | if rnn_type in ['LSTM', 'GRU']: 30 | self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout) 31 | else: 32 | try: 33 | nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type] 34 | except KeyError: 35 | raise ValueError( """An invalid option for `--model` was supplied, 36 | options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""") 37 | self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout) 38 | 39 | self.decoder = nn.Linear(nhid, ntoken) 40 | 41 | 42 | if tie_weights: 43 | if nhid != ninp: 44 | raise ValueError('When using the tied flag, nhid must be equal to emsize') 45 | self.decoder.weight = self.encoder.weight 46 | 47 | self.rnn_type = rnn_type 48 | self.nhid = nhid 49 | self.nlayers = nlayers 50 | self.binary = binary 51 | 52 | def init_weights(self): 53 | initrange = 0.1 54 | self.encoder.weight.data.uniform_(-initrange, initrange) 55 | self.decoder.bias.data.fill_(0) 56 | self.decoder.weight.data.uniform_(-initrange, initrange) 57 | 58 | def return_embedding_matrix(self): 59 | return self.encoder.weight.data 60 | 61 | def embedding_t(self,input): 62 | input = input.type(torch.LongTensor) 63 | input = input.cuda() 64 | 65 | emb = self.encoder(input) 66 | return emb 67 | 68 | def forward(self, input, hidden, latern=False, emb=None): 69 | 70 | if self.binary: 71 | batch_size = input.size(0) 72 | emb = self.encoder(input) 73 | output, hidden = self.lstm(emb, hidden) 74 | output = output.contiguous().view(-1, self.nhid) 75 | out = self.drop(output) 76 | out = self.decoder(out) 77 | sig_out = self.sig(out) 78 | sig_out = sig_out.view(batch_size, -1) 79 | sig_out = sig_out[:, -1] 80 | return sig_out, hidden 81 | 82 | else: 83 | if emb is None: 84 | emb = self.drop(self.encoder(input)) 85 | 86 | output, hidden = self.rnn(emb, hidden) 87 | output = self.drop(output) 88 | 89 | decoded = self.decoder(output.view(output.size(0)*output.size(1), output.size(2))) 90 | if latern: 91 | return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden, emb 92 | else: 93 | return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden 94 | 95 | def init_hidden(self, bsz): 96 | weight = next(self.parameters()).data 97 | if self.rnn_type == 'LSTM': 98 | return (Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()), 99 | Variable(weight.new(self.nlayers, bsz, self.nhid).zero_())) 100 | else: 101 | return Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()) 102 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/models/wrn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.init as init 4 | import torch.nn.functional as F 5 | from torch.autograd import Variable 6 | 7 | import sys 8 | import numpy as np 9 | 10 | def conv3x3(in_planes, out_planes, stride=1): 11 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=True) 12 | 13 | def conv_init(m): 14 | classname = m.__class__.__name__ 15 | if classname.find('Conv') != -1: 16 | init.xavier_uniform(m.weight, gain=np.sqrt(2)) 17 | init.constant(m.bias, 0) 18 | elif classname.find('BatchNorm') != -1: 19 | init.constant(m.weight, 1) 20 | init.constant(m.bias, 0) 21 | 22 | class wide_basic(nn.Module): 23 | def __init__(self, in_planes, planes, dropout_rate, stride=1): 24 | super(wide_basic, self).__init__() 25 | self.bn1 = nn.BatchNorm2d(in_planes) 26 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, padding=1, bias=True) 27 | self.dropout = nn.Dropout(p=dropout_rate) 28 | self.bn2 = nn.BatchNorm2d(planes) 29 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=True) 30 | 31 | self.shortcut = nn.Sequential() 32 | if stride != 1 or in_planes != planes: 33 | self.shortcut = nn.Sequential( 34 | nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=True), 35 | ) 36 | 37 | def forward(self, x): 38 | out = self.dropout(self.conv1(F.relu(self.bn1(x)))) 39 | out = self.conv2(F.relu(self.bn2(out))) 40 | out += self.shortcut(x) 41 | 42 | return out 43 | 44 | class Wide_ResNet(nn.Module): 45 | def __init__(self, depth, widen_factor, dropout_rate, num_classes): 46 | super(Wide_ResNet, self).__init__() 47 | self.in_planes = 16 48 | 49 | assert ((depth-4)%6 ==0), 'Wide-resnet depth should be 6n+4' 50 | n = (depth-4)//6 51 | k = widen_factor 52 | 53 | #print('| Wide-Resnet %dx%d' %(depth, k)) 54 | nStages = [16, 16*k, 32*k, 64*k] 55 | 56 | self.conv1 = conv3x3(3,nStages[0]) 57 | self.layer1 = self._wide_layer(wide_basic, nStages[1], n, dropout_rate, stride=1) 58 | self.layer2 = self._wide_layer(wide_basic, nStages[2], n, dropout_rate, stride=2) 59 | self.layer3 = self._wide_layer(wide_basic, nStages[3], n, dropout_rate, stride=2) 60 | self.bn1 = nn.BatchNorm2d(nStages[3], momentum=0.9) 61 | self.linear = nn.Linear(nStages[3], num_classes) 62 | 63 | def _wide_layer(self, block, planes, num_blocks, dropout_rate, stride): 64 | strides = [stride] + [1]*(num_blocks-1) 65 | layers = [] 66 | 67 | for stride in strides: 68 | layers.append(block(self.in_planes, planes, dropout_rate, stride)) 69 | self.in_planes = planes 70 | 71 | return nn.Sequential(*layers) 72 | 73 | def forward(self, x): 74 | out = self.conv1(x) 75 | out = self.layer1(out) 76 | out = self.layer2(out) 77 | out = self.layer3(out) 78 | out = F.relu(self.bn1(out)) 79 | out = F.avg_pool2d(out, 8) 80 | out = out.view(out.size(0), -1) 81 | out = self.linear(out) 82 | 83 | return out 84 | 85 | if __name__ == '__main__': 86 | net=Wide_ResNet(28, 10, 0.3, 10) 87 | y = net(Variable(torch.randn(1,3,32,32))) 88 | 89 | print(y.size()) 90 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/run_backdoor_cv_task.sh: -------------------------------------------------------------------------------- 1 | 2 | ##### cifar10 3 | ##### base case 4 | nohup python main_training.py --run_slurm 0 --GPU_id 0 --start_epoch 1801 --is_poison True --defense True --s_norm 0.2 --attack_num 250 --gradmask_ratio 1.0 --poison_lr 0.003 --aggregate_all_layer 1 --edge_case 0 5 | 6 | nohup python main_training.py --run_slurm 0 --GPU_id 0 --start_epoch 1801 --is_poison True --defense True --s_norm 0.2 --attack_num 250 --gradmask_ratio 0.99 --poison_lr 0.02 --aggregate_all_layer 1 --edge_case 0 7 | 8 | nohup python main_training.py --run_slurm 0 --GPU_id 0 --start_epoch 1801 --is_poison True --defense True --s_norm 0.2 --attack_num 250 --gradmask_ratio 0.97 --poison_lr 0.02 --aggregate_all_layer 1 --edge_case 0 9 | 10 | nohup python main_training.py --run_slurm 0 --GPU_id 0 --start_epoch 1801 --is_poison True --defense True --s_norm 0.2 --attack_num 250 --gradmask_ratio 0.95 --poison_lr 0.02 --aggregate_all_layer 1 --edge_case 0 11 | 12 | 13 | ##### cifar10 14 | ##### edge case 15 | nohup python main_training.py --run_slurm 0 --GPU_id 0 --start_epoch 1801 --is_poison True --defense True --s_norm 0.2 --attack_num 200 --gradmask_ratio 1.0 --poison_lr 0.003 --aggregate_all_layer 1 --edge_case 1 16 | 17 | nohup python main_training.py --run_slurm 0 --GPU_id 0 --start_epoch 1801 --is_poison True --defense True --s_norm 0.2 --attack_num 200 --gradmask_ratio 0.95 --poison_lr 0.02 --aggregate_all_layer 1 --edge_case 1 18 | 19 | 20 | 21 | ##### cifar100 22 | ##### base case 23 | nohup python main_training.py --params utils/cifar100_params.yaml --run_slurm 0 --GPU_id 0 --start_epoch 1801 --is_poison True --defense True --s_norm 0.2 --attack_num 200 --gradmask_ratio 1.0 --poison_lr 0.003 --aggregate_all_layer 1 --edge_case 0 24 | 25 | nohup python main_training.py --params utils/cifar100_params.yaml --run_slurm 0 --GPU_id 0 --start_epoch 1801 --is_poison True --defense True --s_norm 0.2 --attack_num 200 --gradmask_ratio 0.95 --poison_lr 0.02 --aggregate_all_layer 1 --edge_case 0 26 | 27 | ##### cifar100 28 | ##### edge case 29 | nohup python main_training.py --params utils/cifar100_params.yaml --run_slurm 0 --GPU_id 0 --start_epoch 1801 --is_poison True --defense True --s_norm 0.2 --attack_num 200 --gradmask_ratio 1.0 --poison_lr 0.003 --aggregate_all_layer 1 --edge_case 1 30 | 31 | nohup python main_training.py --params utils/cifar100_params.yaml --run_slurm 0 --GPU_id 0 --start_epoch 1801 --is_poison True --defense True --s_norm 0.2 --attack_num 200 --gradmask_ratio 0.95 --poison_lr 0.02 --aggregate_all_layer 1 --edge_case 1 32 | 33 | 34 | 35 | ##### EMNIST-byClass 36 | nohup python main_training.py --params utils/emnist_byclass_params.yaml --run_slurm 0 --GPU_id 0 --start_epoch 2001 --defense True --attack_num 100 --s_norm 1.0 --aggregate_all_layer 1 --is_poison True --edge_case 1 --emnist_style byclass --gradmask_ratio 1.0 --poison_lr 0.01 37 | 38 | nohup python main_training.py --params utils/emnist_byclass_params.yaml --run_slurm 0 --GPU_id 0 --start_epoch 2001 --defense True --attack_num 100 --s_norm 1.0 --aggregate_all_layer 1 --is_poison True --edge_case 1 --emnist_style byclass --gradmask_ratio 0.95 --poison_lr 0.01 39 | 40 | 41 | 42 | ##### EMNIST-digit 43 | nohup python main_training.py --params utils/emnist_params.yaml --run_slurm 0 --GPU_id 0 --start_epoch 1 --is_poison True --defense True --s_norm 0.5 --attack_num 200 --gradmask_ratio 1.0 --poison_lr 0.003 --aggregate_all_layer 1 --edge_case 1 44 | 45 | nohup python main_training.py --params utils/emnist_params.yaml --run_slurm 0 --GPU_id 0 --start_epoch 1 --is_poison True --defense True --s_norm 0.5 --attack_num 200 --gradmask_ratio 0.95 --poison_lr 0.04 --aggregate_all_layer 1 --edge_case 1 46 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/text_load.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import json 4 | import re 5 | from tqdm import tqdm 6 | import random 7 | 8 | filter_symbols = re.compile('[a-zA-Z]*') 9 | 10 | class Dictionary(object): 11 | def __init__(self): 12 | self.word2idx = {} 13 | self.idx2word = [] 14 | 15 | def add_word(self, word): 16 | raise ValueError("Please don't call this method, so we won't break the dictionary :) ") 17 | 18 | def __len__(self): 19 | return len(self.idx2word) 20 | 21 | 22 | def get_word_list(line, dictionary): 23 | splitted_words = json.loads(line.lower()).split() 24 | words = [''] 25 | for word in splitted_words: 26 | word = filter_symbols.search(word)[0] 27 | if len(word)>1: 28 | if dictionary.word2idx.get(word, False): 29 | words.append(word) 30 | else: 31 | words.append('') 32 | words.append('') 33 | 34 | return words 35 | 36 | 37 | class Corpus(object): 38 | def __init__(self, params, dictionary, is_poison=False): 39 | self.path = params.data_folder 40 | authors_no = params.number_of_total_participants 41 | 42 | self.dictionary = dictionary 43 | self.no_tokens = len(self.dictionary) 44 | self.authors_no = authors_no 45 | # self.train = self.tokenize_train(f'{self.path}/shard_by_author', is_poison=is_poison) 46 | path = os.path.join(self.path, 'test_data.json') 47 | self.test = self.tokenize(path) 48 | 49 | def load_poison_data(self, number_of_words): 50 | current_word_count = 0 51 | path = f'{self.path}/shard_by_author' 52 | list_of_authors = iter(os.listdir(path)) 53 | word_list = list() 54 | line_number = 0 55 | posts_count = 0 56 | while current_word_count 2: 63 | word_list.extend([self.dictionary.word2idx[word] for word in words]) 64 | current_word_count += len(words) 65 | line_number += 1 66 | 67 | ids = torch.LongTensor(word_list[:number_of_words]) 68 | 69 | return ids 70 | 71 | 72 | def tokenize_train(self, path, is_poison=False): 73 | """ 74 | We return a list of ids per each participant. 75 | :param path: 76 | :return: 77 | """ 78 | files = os.listdir(path) 79 | per_participant_ids = list() 80 | for file in tqdm(files[:self.authors_no]): 81 | 82 | # jupyter creates somehow checkpoints in this folder 83 | if 'checkpoint' in file: 84 | continue 85 | 86 | new_path=f'{path}/{file}' 87 | with open(new_path, 'r') as f: 88 | 89 | tokens = 0 90 | word_list = list() 91 | for line in f: 92 | words = get_word_list(line, self.dictionary) 93 | tokens += len(words) 94 | word_list.extend([self.dictionary.word2idx[x] for x in words]) 95 | 96 | ids = torch.LongTensor(word_list) 97 | 98 | per_participant_ids.append(ids) 99 | 100 | return per_participant_ids 101 | 102 | 103 | def tokenize(self, path): 104 | """Tokenizes a text file.""" 105 | assert os.path.exists(path) 106 | # Add words to the dictionary 107 | word_list = list() 108 | with open(path, 'r') as f: 109 | tokens = 0 110 | 111 | for line in f: 112 | words = get_word_list(line, self.dictionary) 113 | tokens += len(words) 114 | word_list.extend([self.dictionary.word2idx[x] for x in words]) 115 | 116 | ids = torch.LongTensor(word_list) 117 | 118 | return ids 119 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/utils/__init__.py: -------------------------------------------------------------------------------- 1 | #### init -------------------------------------------------------------------------------- /FL_Backdoor_CV/utils/cifar100_params.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | type: image 4 | test_batch_size: 1000 5 | lr: 0.001 6 | target_lr: 0.2 7 | 8 | 9 | momentum: 0.0 10 | decay: 0.0005 11 | batch_size: 64 12 | model: resnet 13 | dataset: cifar100 14 | 15 | current_time: 2021 16 | 17 | benign_start_index: 1 18 | 19 | number_of_adversaries: 1 20 | 21 | participant_population: 1000 22 | partipant_sample_size: 10 23 | no_models: 10 24 | epochs: 400 25 | retrain_no_times: 2 26 | 27 | number_of_total_participants: 1000 28 | sampling_dirichlet: true 29 | dirichlet_alpha: 0.9 30 | 31 | save_model: false 32 | save_on_epochs: [20, 40, 60, 80, 100, 120, 140, 160, 180, 200, 220, 240, 260, 280, 300, 320, 340, 360, 380, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, 1999] 33 | 34 | resumed_model: 35 | environment_name: ppdl_experiment 36 | report_train_loss: false 37 | report_test_loss: false 38 | report_poison_loss: false 39 | track_distance: false 40 | track_clusters: false 41 | log_interval: 10 42 | 43 | modify_poison: false 44 | 45 | # file names of the images 46 | poison_type: wall 47 | 48 | # manually chosen images for tests 49 | poison_images_test: 50 | - 330 51 | - 568 52 | - 3934 53 | - 12336 54 | - 30560 55 | 56 | poison_images: 57 | - 30696 58 | - 33105 59 | - 33615 60 | - 33907 61 | - 36848 62 | - 40713 63 | - 41706 64 | 65 | 66 | # image_29911.jpg 67 | poison_image_id: 2775 68 | poison_image_id_2: 1605 69 | poison_label_swap: 9 70 | size_of_secret_dataset: 500 71 | poisoning_per_batch: 1 72 | poison_test_repeat: 1000 73 | is_poison: false 74 | baseline: false 75 | random_compromise: false 76 | noise_level: 0.01 77 | 78 | retrain_poison: 10 79 | # scale_weights: 100 80 | poison_lr: 0.05 81 | poison_momentum: 0.9 82 | poison_decay: 0.005 83 | poison_step_lr: true 84 | clamp_value: 1.0 85 | alpha_loss: 1.0 86 | number_of_adversaries: 1 87 | poisoned_number: 2 88 | results_json: false 89 | 90 | s_norm: 0.3 91 | diff_privacy: false 92 | 93 | checkpoint_folder: ./saved_models_cifar100 94 | data_folder: ./data 95 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/utils/cifar10_params.yaml: -------------------------------------------------------------------------------- 1 | 2 | 3 | type: image 4 | test_batch_size: 1000 5 | lr: 0.001 6 | 7 | target_lr: 0.2 8 | 9 | momentum: 0.9 10 | decay: 0.0005 11 | batch_size: 64 12 | model: resnet 13 | dataset: cifar10 14 | 15 | edge_case: False 16 | 17 | current_time: 2021 18 | 19 | benign_start_index: 1 20 | 21 | number_of_adversaries: 1 22 | 23 | participant_population: 1000 24 | partipant_sample_size: 10 25 | no_models: 10 26 | epochs: 400 27 | retrain_no_times: 2 28 | 29 | number_of_total_participants: 1000 30 | sampling_dirichlet: true 31 | dirichlet_alpha: 0.9 32 | 33 | save_model: false 34 | save_on_epochs: [20, 40, 60, 80, 100, 120, 140, 160, 180, 200, 220, 240, 260, 280, 300, 320, 340, 360, 380, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, 1999] 35 | 36 | resumed_model: 37 | environment_name: ppdl_experiment 38 | report_train_loss: false 39 | report_test_loss: false 40 | report_poison_loss: false 41 | track_distance: false 42 | track_clusters: false 43 | log_interval: 10 44 | 45 | modify_poison: false 46 | 47 | # file names of the images 48 | poison_type: wall 49 | 50 | # manually chosen images for tests 51 | poison_images_test: 52 | - 330 53 | - 568 54 | - 3934 55 | - 12336 56 | - 30560 57 | 58 | poison_images: 59 | - 30696 60 | - 33105 61 | - 33615 62 | - 33907 63 | - 36848 64 | - 40713 65 | - 41706 66 | 67 | 68 | # image_29911.jpg 69 | poison_image_id: 2775 70 | poison_image_id_2: 1605 71 | poison_label_swap: 9 72 | size_of_secret_dataset: 500 73 | poisoning_per_batch: 1 74 | poison_test_repeat: 1000 75 | is_poison: false 76 | baseline: false 77 | random_compromise: false 78 | noise_level: 0.01 79 | 80 | retrain_poison: 10 81 | 82 | poison_lr: 0.05 83 | poison_momentum: 0.9 84 | poison_decay: 0.005 85 | poison_step_lr: true 86 | clamp_value: 1.0 87 | alpha_loss: 1.0 88 | number_of_adversaries: 1 89 | poisoned_number: 2 90 | results_json: false 91 | 92 | s_norm: 0.3 93 | diff_privacy: false 94 | 95 | checkpoint_folder: ./saved_models 96 | data_folder: ./data 97 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/utils/emnist_byclass_params.yaml: -------------------------------------------------------------------------------- 1 | 2 | 3 | type: image 4 | test_batch_size: 2000 5 | lr: 0.01 6 | 7 | emnist_style: byclass 8 | target_lr: 0.1 9 | 10 | momentum: 0.9 11 | decay: 0.0005 12 | batch_size: 64 13 | model: resnet 14 | dataset: emnist 15 | 16 | edge_case: False 17 | 18 | current_time: 2021 19 | 20 | benign_start_index: 1 21 | 22 | number_of_adversaries: 1 23 | 24 | participant_population: 3000 25 | partipant_sample_size: 10 26 | no_models: 10 27 | epochs: 400 28 | retrain_no_times: 2 29 | 30 | number_of_total_participants: 3000 31 | sampling_dirichlet: true 32 | dirichlet_alpha: 0.9 33 | 34 | save_model: false 35 | save_on_epochs: [20, 40, 60, 80, 100, 120, 140, 160, 180, 200, 220, 240, 260, 280, 300, 320, 340, 360, 380, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, 1999] 36 | 37 | resumed_model: 38 | environment_name: ppdl_experiment 39 | report_train_loss: false 40 | report_test_loss: false 41 | report_poison_loss: false 42 | track_distance: false 43 | track_clusters: false 44 | log_interval: 10 45 | 46 | modify_poison: false 47 | 48 | # file names of the images 49 | poison_type: wall 50 | 51 | # manually chosen images for tests 52 | poison_images_test: 53 | - 330 54 | - 568 55 | - 3934 56 | - 12336 57 | - 30560 58 | 59 | poison_images: 60 | - 30696 61 | - 33105 62 | - 33615 63 | - 33907 64 | - 36848 65 | - 40713 66 | - 41706 67 | 68 | 69 | # image_29911.jpg 70 | poison_image_id: 2775 71 | poison_image_id_2: 1605 72 | poison_label_swap: 1 73 | size_of_secret_dataset: 500 74 | poisoning_per_batch: 1 75 | poison_test_repeat: 1000 76 | is_poison: false 77 | baseline: false 78 | random_compromise: false 79 | noise_level: 0.01 80 | 81 | retrain_poison: 10 82 | # scale_weights: 100 83 | poison_lr: 0.05 84 | poison_momentum: 0.9 85 | poison_decay: 0.00005 86 | poison_step_lr: true 87 | clamp_value: 1.0 88 | alpha_loss: 1.0 89 | number_of_adversaries: 1 90 | poisoned_number: 2 91 | results_json: false 92 | 93 | s_norm: 0.3 94 | diff_privacy: false 95 | 96 | checkpoint_folder: ./saved_models 97 | data_folder: ./data 98 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/utils/emnist_params.yaml: -------------------------------------------------------------------------------- 1 | 2 | 3 | type: image 4 | test_batch_size: 100 5 | lr: 0.01 6 | 7 | target_lr: 0.1 8 | 9 | momentum: 0.9 10 | decay: 0.0005 11 | batch_size: 64 12 | model: resnet 13 | dataset: emnist 14 | 15 | edge_case: False 16 | 17 | current_time: 2021 18 | 19 | benign_start_index: 1 20 | 21 | number_of_adversaries: 1 22 | 23 | participant_population: 1000 24 | partipant_sample_size: 10 25 | no_models: 10 26 | epochs: 400 27 | retrain_no_times: 2 28 | 29 | number_of_total_participants: 1000 30 | sampling_dirichlet: true 31 | dirichlet_alpha: 0.9 32 | 33 | save_model: false 34 | save_on_epochs: [20, 40, 60, 80, 100, 120, 140, 160, 180, 200, 220, 240, 260, 280, 300, 320, 340, 360, 380, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, 1999] 35 | 36 | resumed_model: 37 | environment_name: ppdl_experiment 38 | report_train_loss: false 39 | report_test_loss: false 40 | report_poison_loss: false 41 | track_distance: false 42 | track_clusters: false 43 | log_interval: 10 44 | 45 | modify_poison: false 46 | 47 | # file names of the images 48 | poison_type: wall 49 | 50 | # manually chosen images for tests 51 | poison_images_test: 52 | - 330 53 | - 568 54 | - 3934 55 | - 12336 56 | - 30560 57 | 58 | poison_images: 59 | - 30696 60 | - 33105 61 | - 33615 62 | - 33907 63 | - 36848 64 | - 40713 65 | - 41706 66 | 67 | 68 | # image_29911.jpg 69 | poison_image_id: 2775 70 | poison_image_id_2: 1605 71 | poison_label_swap: 1 72 | size_of_secret_dataset: 500 73 | poisoning_per_batch: 1 74 | poison_test_repeat: 1000 75 | is_poison: false 76 | baseline: false 77 | random_compromise: false 78 | noise_level: 0.01 79 | 80 | retrain_poison: 10 81 | # scale_weights: 100 82 | poison_lr: 0.05 83 | poison_momentum: 0.9 84 | poison_decay: 0.00005 85 | poison_step_lr: true 86 | clamp_value: 1.0 87 | alpha_loss: 1.0 88 | number_of_adversaries: 1 89 | poisoned_number: 2 90 | results_json: false 91 | 92 | s_norm: 0.3 93 | diff_privacy: false 94 | 95 | checkpoint_folder: ./saved_models 96 | data_folder: ./data 97 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/utils/text_load.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import json 4 | import re 5 | import io 6 | import numpy as np 7 | 8 | filter_symbols = re.compile('[a-zA-Z]*') 9 | 10 | class Dictionary(object): 11 | def __init__(self): 12 | self.word2idx = {} 13 | self.idx2word = [] 14 | 15 | def add_word(self, word): 16 | raise ValueError("Please don't call this method, so we won't break the dictionary :) ") 17 | 18 | def __len__(self): 19 | return len(self.idx2word) 20 | 21 | def get_word_list(line, dictionary): 22 | splitted_words = line.lower().split() 23 | words = [''] 24 | for word in splitted_words: 25 | word = filter_symbols.search(word)[0] 26 | if len(word)>1: 27 | if dictionary.word2idx.get(word, False): 28 | words.append(word) 29 | else: 30 | words.append('') 31 | words.append('') 32 | 33 | return words 34 | 35 | 36 | class Corpus(object): 37 | def __init__(self, params, dictionary): 38 | self.params = params 39 | self.dictionary = dictionary 40 | 41 | if self.params['dataset'] == 'shakespeare': 42 | corpus_file_name = os.path.join(self.params['data_folder'], 'all_data.json') 43 | with open(corpus_file_name) as f: 44 | data = json.load(f) 45 | self.params['participant_population'] = int(0.8 * len(data['users'])) 46 | self.train, self.test = self.tokenize_shake(data) 47 | 48 | self.attacker_train = self.tokenize_num_of_words(data , self.params['size_of_secret_dataset'] * self.params['batch_size']) 49 | 50 | elif self.params['dataset'] == 'reddit': 51 | 52 | corpus_file_name = os.path.join(self.params['data_folder'], 'corpus_80000.pt.tar') 53 | corpus = torch.load(corpus_file_name) 54 | self.train = corpus.train 55 | self.test = corpus.test 56 | 57 | self.attacker_train = self.tokenize_num_of_words(None , self.params['size_of_secret_dataset'] * self.params['batch_size']) 58 | 59 | elif self.params['dataset'] == 'IMDB': 60 | text_file_name = os.path.join(self.params['data_folder'], 'review_text.txt') 61 | label_file_name = os.path.join(self.params['data_folder'], 'review_label.txt') 62 | with open(text_file_name, 'r') as f: 63 | reviews = f.read() 64 | reviews = reviews.split('\n') 65 | reviews.pop() 66 | with open(label_file_name, 'r') as f: 67 | labels = f.read() 68 | labels = labels.split('\n') 69 | labels.pop() 70 | 71 | self.train, self.train_label, self.test, self.test_label = self.tokenize_IMDB(reviews, labels) 72 | elif self.params['dataset'] == 'sentiment140': 73 | train_data_filename = os.path.join(self.params['data_folder'], 'train_data.txt') 74 | test_data_filename = os.path.join(self.params['data_folder'], 'test_data.txt') 75 | train_label_filename = os.path.join(self.params['data_folder'], 'train_label.txt') 76 | test_label_filename = os.path.join(self.params['data_folder'], 'test_label.txt') 77 | with open(train_data_filename, 'r') as f: 78 | train_data = f.read() 79 | train_data = train_data.split('\n') 80 | train_data.pop() 81 | with open(test_data_filename, 'r') as f: 82 | test_data = f.read() 83 | test_data = test_data.split('\n') 84 | test_data.pop() 85 | with open(train_label_filename, 'r') as f: 86 | train_label = f.read() 87 | train_label = train_label.split('\n') 88 | train_label.pop() 89 | with open(test_label_filename, 'r') as f: 90 | test_label = f.read() 91 | test_label = test_label.split('\n') 92 | test_label.pop() 93 | self.train, self.train_label, self.test, self.test_label = self.tokenize_sentiment140(train_data, train_label, test_data, test_label) 94 | else: 95 | raise ValueError('Unrecognized dataset') 96 | 97 | def tokenize_sentiment140(self, train_text, train_target, test_text, test_target): 98 | each_pariticipant_data_size = len(train_text) // int(self.params['participant_population']) 99 | train_data = [] 100 | train_label = [] 101 | test_data = [] 102 | test_label = [] 103 | each_user_data = [] 104 | each_user_label = [] 105 | 106 | for i in range(len(train_text)): 107 | tweet = train_text[i] 108 | label = train_target[i] 109 | tokens = [self.dictionary.word2idx[w] for w in tweet.split()] 110 | tokens = self.pad_features(tokens, int(self.params['sequence_length'])) 111 | each_user_data.append(tokens) 112 | each_user_label.append(int(label)) 113 | if (i+1) % each_pariticipant_data_size == 0: 114 | train_data.append(each_user_data) 115 | train_label.append(each_user_label) 116 | each_user_data = [] 117 | each_user_label = [] 118 | for i in range(len(test_text)//self.params['test_batch_size'] * self.params['test_batch_size']): 119 | tweet = test_text[i] 120 | label = test_target[i] 121 | tokens = [self.dictionary.word2idx[w] for w in tweet.split()] 122 | tokens = self.pad_features(tokens, int(self.params['sequence_length'])) 123 | test_data.append(tokens) 124 | test_label.append(int(label)) 125 | return train_data, np.array(train_label), np.array(test_data), np.array(test_label) 126 | 127 | def tokenize_IMDB(self, reviews, labels): 128 | # Note: data has already been shuffled. no need to shuffle here. 129 | each_pariticipant_data_size = int(len(reviews) * 0.8 // int(self.params['participant_population'])) 130 | train_data = [] 131 | train_label = [] 132 | test_data = [] 133 | test_label = [] 134 | each_user_data = [] 135 | each_user_label = [] 136 | # Process training data 137 | for i in range(int(len(reviews) * 0.8)): 138 | review = reviews[i] 139 | label = labels[i] 140 | tokens = [self.dictionary.word2idx[w] for w in review.split()] 141 | tokens = self.pad_features(tokens, int(self.params['sequence_length'])) 142 | each_user_data.append(tokens) 143 | each_user_label.append(int(label)) 144 | if (i+1) % each_pariticipant_data_size == 0: 145 | train_data.append(each_user_data) 146 | train_label.append(each_user_label) 147 | each_user_data = [] 148 | each_user_label = [] 149 | # Process test data 150 | for i in range(int(len(reviews) * 0.8), len(reviews)): 151 | review = reviews[i] 152 | label = labels[i] 153 | tokens = [self.dictionary.word2idx[w] for w in review.split()] 154 | tokens = self.pad_features(tokens, int(self.params['sequence_length'])) 155 | test_data.append(tokens) 156 | test_label.append(int(label)) 157 | return train_data, np.array(train_label), np.array(test_data), np.array(test_label) 158 | @staticmethod 159 | def pad_features(tokens, sequence_length): 160 | """add zero paddings to/truncate the token list""" 161 | if len(tokens) < sequence_length: 162 | zeros = list(np.zeros(sequence_length - len(tokens), dtype = int)) 163 | tokens = zeros + tokens 164 | else: 165 | tokens = tokens[:sequence_length] 166 | return tokens 167 | 168 | def tokenize_shake(self, data): 169 | train_data = [] 170 | test_data = [] 171 | 172 | for i, user in enumerate(data['users']): 173 | text = data['user_data'][user]['raw'] 174 | f = io.StringIO(text) 175 | word_list = list() 176 | for line in f: 177 | words = get_word_list(line, self.dictionary) 178 | if len(words) > 2: 179 | word_list.extend(self.dictionary.word2idx[word] for word in words) 180 | if i <= self.params['partipant_population']: 181 | train_data.append(torch.LongTensor(word_list)) 182 | else: 183 | test_data.extend(word_list) 184 | 185 | return train_data, torch.LongTensor(test_data) 186 | 187 | def tokenize_num_of_words(self, data, number_of_words): 188 | """ 189 | Tokenize number_of_words of words. 190 | """ 191 | if self.params['dataset'] == 'reddit': 192 | current_word_count = 0 193 | path = os.path.join(self.params['data_folder'], 'shard_by_author') 194 | list_of_authors = iter(os.listdir(path)) 195 | word_list = list() 196 | while current_word_count < number_of_words: 197 | file_name = next(list_of_authors) 198 | with open(os.path.join(path, file_name), 'r') as f: 199 | for line in f: 200 | words = get_word_list(line, self.dictionary) 201 | if len(words) > 2: 202 | word_list.extend([self.dictionary.word2idx[word] for word in words]) 203 | current_word_count += len(words) 204 | 205 | return torch.LongTensor(word_list[:number_of_words]) 206 | 207 | elif self.params['dataset'] == 'shakespeare': 208 | current_word_count = 0 209 | word_list = list() 210 | for user in data['users']: 211 | text = data['user_data'][user]['raw'] 212 | f = io.StringIO(text) 213 | for line in f: 214 | words = get_word_list(line, self.dictionary) 215 | if len(words) > 2: 216 | word_list.extend([self.dictionary.word2idx[word] for word in words]) 217 | current_word_count += len(words) 218 | 219 | if current_word_count >= number_of_words: 220 | return torch.LongTensor(word_list[:number_of_words]) 221 | 222 | return 223 | return 224 | -------------------------------------------------------------------------------- /FL_Backdoor_CV/write_script.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import os 4 | import random 5 | 6 | def get_slurm_script(args, job_script): 7 | 8 | return f"""#!/bin/bash 9 | #SBATCH -p rise # partition (queue) 10 | #SBATCH -N {len(args.nodes.split(','))} # number of nodes requested 11 | #SBATCH --ntasks-per-node={args.num_gpus} # number of tasks (i.e. processes) 12 | #SBATCH --cpus-per-task={args.cpus_per_task} # number of cores per task 13 | #SBATCH --gres=gpu:{args.num_gpus} 14 | #SBATCH --nodelist={args.nodes} # if you need specific nodes 15 | ##SBATCH --exclude=ace,blaze,bombe,flaminio,freddie,luigi,pavia,atlas,como,havoc,steropes 16 | #SBATCH -t 5-00:00 # time requested (D-HH:MM) 17 | #SBATCH -D /work/yyaoqing/oliver/Federated-Learning-Backdoor/FL_Backdoor_CV 18 | #SBATCH -o slurm_log/slurm.%N.%j..out # STDOUT 19 | #SBATCH -e slurm_log/slurm.%N.%j..err # STDERR 20 | 21 | pwd 22 | hostname 23 | date 24 | echo starting job... 25 | source ~/.bashrc 26 | conda activate flbackdoor 27 | export PYTHONUNBUFFERED=1 28 | 29 | {job_script} 30 | 31 | 32 | wait 33 | date 34 | 35 | ## This script run {args.experiment_name} 36 | """ 37 | 38 | 39 | def get_script(args, BASH_COMMAND_LIST): 40 | 41 | print("Start writing the command list!") 42 | 43 | job_script = """ 44 | """ 45 | for command in BASH_COMMAND_LIST: 46 | job_script += f"srun -N 1 -n 1 {command} & \n \n" 47 | 48 | script = get_slurm_script(args, job_script) 49 | # print(script) 50 | 51 | 52 | file_path = './run_slurm/' 53 | if not os.path.exists(file_path): 54 | os.makedirs(file_path) 55 | 56 | save_file = file_path + args.file_name 57 | if os.path.isfile( save_file): 58 | with open(save_file, 'w') as rsh: 59 | rsh.truncate() 60 | 61 | with open (save_file, 'w') as rsh: 62 | rsh.write(script) 63 | 64 | # os.system(f"chmod +x {file_path + args.file_name}") 65 | print(f'The SLURM .sh File Have Been Saved at {file_path}.') 66 | 67 | if __name__ == "__main__": 68 | print("Starting") 69 | parser = argparse.ArgumentParser(description='SLURM RUN') 70 | 71 | # parameters for training 72 | parser.add_argument('--file_name', 73 | default='cifar10_attack', 74 | type=str, 75 | help='file_name') 76 | 77 | parser.add_argument('--experiment_name', 78 | default='cifar10_attack', 79 | type=str, 80 | help='experiment_name') 81 | 82 | parser.add_argument('--dataset', 83 | default='cifar10', 84 | type=str, 85 | help='gpu list') 86 | 87 | parser.add_argument('--num_gpus', 88 | default=5, 89 | type=int, 90 | help='num_gpus') 91 | 92 | parser.add_argument('--cpus_per_task', 93 | default=2, 94 | type=int, 95 | help='cpus_per_task') 96 | 97 | parser.add_argument('--nodes', 98 | default='atlas,bombe', 99 | type=str, 100 | help='nodes') 101 | 102 | parser.add_argument('--num_users', 103 | default=200, 104 | type=int, 105 | help='num_users') 106 | 107 | parser.add_argument('--attack_target', 108 | default=1, 109 | type=int, 110 | help='attack_target') 111 | 112 | parser.add_argument('--attack_type', 113 | default="edge_case_low_freq_adver", 114 | type=str, 115 | help='attack_type: pattern, semantic, edge_case_adver, edge_case_adver_pattern, edge_case_low_freq_adver, edge_case_low_freq_adver_pattern') 116 | 117 | parser.add_argument('--attack_epoch', 118 | default=100, 119 | type=int, 120 | help='the epoch in which the attacker appears') 121 | 122 | parser.add_argument('--iteration', 123 | default=16+1, 124 | type=int, 125 | help='the iterations of each round') 126 | 127 | parser.add_argument('--NIID', 128 | default=1, 129 | type=int, 130 | help='NIID or IID') 131 | 132 | parser.add_argument('--attack_activate_round', 133 | default=100, 134 | type=int, 135 | help='after the round >= attack_activate_round, the attack come into play') 136 | 137 | parser.add_argument('--one_shot_attack', 138 | default=1, 139 | type=int, 140 | help='one shot attack or not') 141 | 142 | parser.add_argument('--base_image_class', 143 | default=3, 144 | type=int, 145 | help='the class of the base image which will be used as trigger in the future') 146 | 147 | ### model replacement 148 | parser.add_argument('--weights_scale', 149 | default=1, 150 | type=int, 151 | help='model replacement with scaled weights') 152 | 153 | 154 | parser.add_argument('--fine_tuning_start_round', 155 | default=300, 156 | type=int, 157 | help='the round that begin fine-tuning') 158 | 159 | parser.add_argument('--model_name', 160 | default="res", 161 | type=str, 162 | help='model name: res VGG9') 163 | 164 | parser.add_argument('--lr', 165 | default=0.1, 166 | type=float, 167 | help='learning rate') 168 | 169 | parser.add_argument('--NDC', 170 | default=1, 171 | type=int, 172 | help='norm difference clipping or not') 173 | 174 | args = parser.parse_args() 175 | 176 | dataset = args.dataset 177 | ###################################### The following are the training parameters 178 | attack_target = args.attack_target 179 | attack_type = args.attack_type 180 | 181 | attack_epoch = args.attack_epoch 182 | iterations_oneround = args.iteration 183 | niid = args.NIID 184 | args.attack_activate_round = attack_epoch 185 | attack_activate_round = args.attack_activate_round 186 | lr = args.lr 187 | 188 | size = args.num_users 189 | num_comm_ue = args.num_gpus*len(args.nodes.split(',')) 190 | batch_size = 32 191 | cp = 16 192 | model = args.model_name### VGG9 res 193 | epoches = args.fine_tuning_start_round + 100 194 | warmup_epoch = 0#5 195 | 196 | attacker_user_id = 10 ### The id of the attacker , it means that the 11-th user is an attacker 197 | 198 | ################################# write jobs for training 199 | # attack_target_list = [0,1,2,3,4,5,6,7,8,9] 200 | attack_target_list = [2] 201 | 202 | for attack_target_value in attack_target_list: 203 | 204 | args.file_name = f'Attack_type_{attack_type}_attack_target{attack_target_value}_base_image_class{args.base_image_class}_attack_activate_round{attack_activate_round}.sh' 205 | args.experiment_name = f'Cifar10_UE{size}_comUE{num_comm_ue}_NIID{args.NIID}_{model}_attack_type{attack_type}_attack_target{attack_target_value}_base_image_class{args.base_image_class}_attack_activate_round{attack_activate_round}_fine_tuning_start_round{args.fine_tuning_start_round}_total_epoches{epoches}' 206 | 207 | master_port = random.sample(range(10000,30000),1) 208 | master_port = str(master_port[0]) 209 | BASH_COMMAND_LIST = [] 210 | for rank in range(num_comm_ue): 211 | if len(args.nodes.split(',')) == 1: 212 | node = args.nodes 213 | master_node = node 214 | else: 215 | if rank < num_comm_ue/2: 216 | node = args.nodes.split(',')[0] 217 | else: 218 | node = args.nodes.split(',')[1] 219 | master_node = args.nodes.split(',')[0] 220 | 221 | comm = f" --nodelist={node} --gres=gpu:1 python FL_Backdoor.py --dataset {dataset} --model {model} --attack_epoch {attack_epoch} --weights_scale {args.weights_scale}"\ 222 | f" --lr {lr} --bs {batch_size} --cp {cp} --master_port {master_port} --attacker_user_id {num_comm_ue}"\ 223 | f" --num_comm_ue {num_comm_ue} --attack_activate_round {attack_activate_round}"\ 224 | f" --NIID {niid} --rank {rank} --size {size} --warmup_epoch {warmup_epoch} --NDC {args.NDC} --master_node {master_node}"\ 225 | f" --epoch {epoches} --experiment_name {args.experiment_name} --attack_target {attack_target_value} --fine_tuning_start_round {args.fine_tuning_start_round}"\ 226 | f" --attack_type {attack_type} --iteration {iterations_oneround} --base_image_class {args.base_image_class} --one_shot_attack {args.one_shot_attack}"\ 227 | f" >./logs/Rank{rank}_attack_target{attack_target}.log 2>./logs/Rank{rank}_attack_target{attack_target}.err" 228 | 229 | BASH_COMMAND_LIST.append(comm) 230 | 231 | script = get_script(args, BASH_COMMAND_LIST) 232 | -------------------------------------------------------------------------------- /FL_Backdoor_NLP/.gitignore: -------------------------------------------------------------------------------- 1 | # Wandb 2 | wandb/ 3 | 4 | logs/ 5 | bash_files/ 6 | saved* 7 | *.out 8 | data/ 9 | slurm_log 10 | 11 | # Byte-compiled / optimized / DLL files 12 | __pycache__/ 13 | *.py[cod] 14 | *$py.class 15 | 16 | # C extensions 17 | *.so 18 | 19 | # Distribution / packaging 20 | .Python 21 | build/ 22 | develop-eggs/ 23 | dist/ 24 | downloads/ 25 | eggs/ 26 | .eggs/ 27 | lib/ 28 | lib64/ 29 | parts/ 30 | sdist/ 31 | var/ 32 | wheels/ 33 | pip-wheel-metadata/ 34 | share/python-wheels/ 35 | *.egg-info/ 36 | .installed.cfg 37 | *.egg 38 | MANIFEST 39 | 40 | # PyInstaller 41 | # Usually these files are written by a python script from a template 42 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 43 | *.manifest 44 | *.spec 45 | 46 | # Installer logs 47 | pip-log.txt 48 | pip-delete-this-directory.txt 49 | 50 | # Unit test / coverage reports 51 | htmlcov/ 52 | .tox/ 53 | .nox/ 54 | .coverage 55 | .coverage.* 56 | .cache 57 | nosetests.xml 58 | coverage.xml 59 | *.cover 60 | *.py,cover 61 | .hypothesis/ 62 | .pytest_cache/ 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Django stuff: 69 | *.log 70 | local_settings.py 71 | db.sqlite3 72 | db.sqlite3-journal 73 | 74 | # Flask stuff: 75 | instance/ 76 | .webassets-cache 77 | 78 | # Scrapy stuff: 79 | .scrapy 80 | 81 | # Sphinx documentation 82 | docs/_build/ 83 | 84 | # PyBuilder 85 | target/ 86 | 87 | # Jupyter Notebook 88 | .ipynb_checkpoints 89 | 90 | # IPython 91 | profile_default/ 92 | ipython_config.py 93 | 94 | # pyenv 95 | .python-version 96 | 97 | # pipenv 98 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 99 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 100 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 101 | # install all needed dependencies. 102 | #Pipfile.lock 103 | 104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 105 | __pypackages__/ 106 | 107 | # Celery stuff 108 | celerybeat-schedule 109 | celerybeat.pid 110 | 111 | # SageMath parsed files 112 | *.sage.py 113 | 114 | # Environments 115 | .env 116 | .venv 117 | env/ 118 | venv/ 119 | ENV/ 120 | env.bak/ 121 | venv.bak/ 122 | 123 | # Spyder project settings 124 | .spyderproject 125 | .spyproject 126 | 127 | # Rope project settings 128 | .ropeproject 129 | 130 | # mkdocs documentation 131 | /site 132 | 133 | # mypy 134 | .mypy_cache/ 135 | .dmypy.json 136 | dmypy.json 137 | 138 | # Pyre type checker 139 | .pyre/ 140 | -------------------------------------------------------------------------------- /FL_Backdoor_NLP/IMDB.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | 5 | import pandas as pd 6 | import re 7 | 8 | with open('reviews.txt', 'r') as f: 9 | reviews = f.read() 10 | with open('labels.txt', 'r') as f: 11 | labels = f.read() 12 | print(reviews[:50]) 13 | print() 14 | print(labels[:26]) 15 | 16 | from string import punctuation 17 | 18 | all_text = ''.join([c for c in reviews if c not in punctuation]) 19 | reviews_split = all_text.split('\n') 20 | print ('Number of reviews :', len(reviews_split)) 21 | 22 | from collections import Counter 23 | all_text2 = ' '.join(reviews_split) 24 | # create a list of words 25 | words = all_text2.split() 26 | # Count all the words using Counter Method 27 | count_words = Counter(words) 28 | 29 | total_words = len(words) 30 | sorted_words = count_words.most_common(total_words) 31 | 32 | 33 | vocab_to_int = {w:i+1 for i, (w,c) in enumerate(sorted_words)} 34 | int_to_vocab = [0 for _ in range(len(sorted_words) + 1)] 35 | for vocab, index in vocab_to_int.items(): 36 | int_to_vocab[index] = vocab 37 | print(len(int_to_vocab)) 38 | 39 | 40 | 41 | from torch import save 42 | class Dictionary(object): 43 | def __init__(self, vocab_to_int, int_to_vocab): 44 | self.word2idx = vocab_to_int 45 | self.idx2word = int_to_vocab 46 | def __len__(self): 47 | return len(self.idx2word) 48 | dictionary = Dictionary(vocab_to_int, int_to_vocab) 49 | save(dictionary, "IMDB_dictionary.pt") 50 | -------------------------------------------------------------------------------- /FL_Backdoor_NLP/__init__.py: -------------------------------------------------------------------------------- 1 | #### init -------------------------------------------------------------------------------- /FL_Backdoor_NLP/data/dataset.txt: -------------------------------------------------------------------------------- 1 | reddit dataset should be saved here -------------------------------------------------------------------------------- /FL_Backdoor_NLP/helper.py: -------------------------------------------------------------------------------- 1 | from shutil import copyfile 2 | import datetime 3 | import math 4 | import torch 5 | 6 | from torch.autograd import Variable 7 | import logging 8 | import numpy as np 9 | import copy 10 | import random 11 | from torch.nn.functional import log_softmax 12 | import torch.nn.functional as F 13 | import os 14 | from copy import deepcopy 15 | 16 | torch.manual_seed(1) 17 | torch.cuda.manual_seed(1) 18 | 19 | random.seed(0) 20 | np.random.seed(0) 21 | 22 | class Helper: 23 | def __init__(self, params): 24 | self.target_model = None 25 | self.local_model = None 26 | 27 | self.train_data = None 28 | self.benign_test_data = None 29 | self.poisoned_data = None 30 | self.poisoned_test_data = None 31 | 32 | self.params = params 33 | self.best_loss = math.inf 34 | 35 | @staticmethod 36 | def get_weight_difference(weight1, weight2): 37 | difference = {} 38 | res = [] 39 | if type(weight2) == dict: 40 | for name, layer in weight1.items(): 41 | difference[name] = layer.data - weight2[name].data 42 | res.append(difference[name].view(-1)) 43 | else: 44 | for name, layer in weight2: 45 | difference[name] = weight1[name].data - layer.data 46 | res.append(difference[name].view(-1)) 47 | 48 | difference_flat = torch.cat(res) 49 | 50 | return difference, difference_flat 51 | 52 | @staticmethod 53 | def get_l2_norm(weight1, weight2): 54 | difference = {} 55 | res = [] 56 | if type(weight2) == dict: 57 | for name, layer in weight1.items(): 58 | difference[name] = layer.data - weight2[name].data 59 | res.append(difference[name].view(-1)) 60 | else: 61 | for name, layer in weight2: 62 | difference[name] = weight1[name].data - layer.data 63 | res.append(difference[name].view(-1)) 64 | 65 | difference_flat = torch.cat(res) 66 | 67 | l2_norm = torch.norm(difference_flat.clone().detach().cuda()) 68 | 69 | l2_norm_np = np.linalg.norm(difference_flat.cpu().numpy()) 70 | 71 | return l2_norm, l2_norm_np 72 | 73 | @staticmethod 74 | def clip_grad(norm_bound, weight_difference, difference_flat): 75 | l2_norm = torch.norm(difference_flat.clone().detach().cuda()) 76 | scale = max(1.0, float(torch.abs(l2_norm / norm_bound))) 77 | for name in weight_difference.keys(): 78 | weight_difference[name].div_(scale) 79 | # weight_difference[name] /= scale 80 | 81 | return weight_difference, l2_norm 82 | 83 | def grad_mask(self, helper, model, dataset_clearn, criterion, ratio=0.5): 84 | """Generate a gradient mask based on the given dataset""" 85 | model.train() 86 | model.zero_grad() 87 | hidden = model.init_hidden(helper.params['batch_size']) 88 | for participant_id in range(len(dataset_clearn)): 89 | train_data = dataset_clearn[participant_id] 90 | if helper.params['task'] == 'word_predict': 91 | data_iterator = range(0, train_data.size(0) - 1, helper.params['sequence_length']) 92 | ntokens = 50000 93 | for batch in data_iterator: 94 | model.train() 95 | data, targets = helper.get_batch(train_data, batch) 96 | hidden = helper.repackage_hidden(hidden) 97 | output, hidden = model(data, hidden) 98 | class_loss = criterion(output.view(-1, ntokens), targets) 99 | class_loss.backward(retain_graph=True) 100 | elif helper.params['task'] == 'sentiment': 101 | for inputs, labels in train_data: 102 | inputs, labels = inputs.cuda(), labels.cuda() 103 | hidden = helper.repackage_hidden(hidden) 104 | inputs = inputs.type(torch.LongTensor).cuda() 105 | output, hidden = model(inputs, hidden) 106 | loss = criterion(output.squeeze(), labels.float()) 107 | loss.backward(retain_graph=True) 108 | else: 109 | raise ValueError("Unkonwn task") 110 | mask_grad_list = [] 111 | if helper.params['aggregate_all_layer'] == 1: 112 | grad_list = [] 113 | for _, parms in model.named_parameters(): 114 | if parms.requires_grad: 115 | grad_list.append(parms.grad.abs().view(-1)) 116 | grad_list = torch.cat(grad_list).cuda() 117 | _, indices = torch.topk(-1*grad_list, int(len(grad_list)*ratio)) 118 | indices = list(indices.cpu().numpy()) 119 | count = 0 120 | for _, parms in model.named_parameters(): 121 | if parms.requires_grad: 122 | count_list = list(range(count, count + len(parms.grad.abs().view(-1)))) 123 | index_list = list(set(count_list).intersection(set(indices))) 124 | mask_flat = np.zeros( count + len(parms.grad.abs().view(-1)) ) 125 | 126 | mask_flat[index_list] = 1.0 127 | mask_flat = mask_flat[count:count + len(parms.grad.abs().view(-1))] 128 | mask = list(mask_flat.reshape(parms.grad.abs().size())) 129 | 130 | mask = torch.from_numpy(np.array(mask, dtype='float32')).cuda() 131 | mask_grad_list.append(mask) 132 | count += len(parms.grad.abs().view(-1)) 133 | else: 134 | for _, parms in model.named_parameters(): 135 | if parms.requires_grad: 136 | gradients = parms.grad.abs().view(-1) 137 | gradients_length = len(gradients) 138 | _, indices = torch.topk(-1*gradients, int(gradients_length*ratio)) 139 | mask_flat = torch.zeros(gradients_length) 140 | mask_flat[indices.cpu()] = 1.0 141 | mask_grad_list.append(mask_flat.reshape(parms.grad.size()).cuda()) 142 | model.zero_grad() 143 | return mask_grad_list 144 | 145 | 146 | def grad_mask_gpt2(self, helper, model, dataset_clearn, criterion, ratio=0.5): 147 | """Generate a gradient mask based on the given dataset""" 148 | model.train() 149 | model.zero_grad() 150 | for i in range(len(dataset_clearn)): 151 | train_dataloader = dataset_clearn[i] 152 | for batch_id, batch in enumerate(train_dataloader): 153 | model.train() 154 | 155 | data1, data2 = batch['input_ids'], batch['attention_mask'] 156 | # data1, data2 = data1.cuda(), data2.cuda() 157 | 158 | data1 = [x.unsqueeze(0) for x in data1] 159 | data2 = [x.unsqueeze(0) for x in data2] 160 | 161 | data1 = torch.cat(data1).transpose(0,1) 162 | data2 = torch.cat(data2).transpose(0,1) 163 | 164 | input_ids = data1[:,0:0+helper.params['sequence_length']] 165 | att_masks = data2[:,0:0+helper.params['sequence_length']] 166 | 167 | target = data1[:,1:1+helper.params['sequence_length']].reshape(-1) 168 | 169 | input_ids, att_masks, target = input_ids.cuda(), att_masks.cuda(), target.cuda() 170 | 171 | output = model(input_ids, attention_mask=att_masks).logits 172 | 173 | loss = criterion(output.contiguous().view(-1, self.n_tokens), target) 174 | loss.backward(retain_graph=True) 175 | 176 | ######## debug: 177 | torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) 178 | 179 | mask_grad_list = [] 180 | 181 | for _, parms in model.named_parameters(): 182 | if parms.requires_grad: 183 | gradients = parms.grad.abs().view(-1) 184 | gradients_length = len(gradients) 185 | _, indices = torch.topk(-1*gradients, int(gradients_length*ratio)) 186 | mask_flat = torch.zeros(gradients_length) 187 | mask_flat[indices.cpu()] = 1.0 188 | mask_grad_list.append(mask_flat.reshape(parms.grad.size()).cuda()) 189 | 190 | model.zero_grad() 191 | return mask_grad_list 192 | 193 | def lr_decay(self, epoch): 194 | # return 1 * (0.995 ** epoch) 195 | # if self.params['dataset'] == 'IMDB': 196 | # return 0.1 197 | return 1 198 | # return 1 199 | # return 1 - (epoch - 1) / self.params['end_epoch'] 200 | # return 1 / math.sqrt(epoch + 1) 201 | # return max(1 - (epoch - 1) / 250, 0.05) 202 | 203 | @staticmethod 204 | def dp_noise(param, sigma=0.001): 205 | 206 | noised_layer = torch.cuda.FloatTensor(param.shape).normal_(mean=0, std=sigma) 207 | 208 | return noised_layer 209 | 210 | def average_shrink_models(self, weight_accumulator, target_model, epoch, wandb): 211 | """ 212 | Perform FedAvg algorithm and perform some clustering on top of it. 213 | 214 | """ 215 | lr = self.lr_decay(epoch) 216 | wandb.log({ 'global lr': lr, 'epoch': epoch}) 217 | for name, data in target_model.state_dict().items(): 218 | if self.params.get('tied', False) and name == 'decoder.weight': 219 | print('skipping') 220 | continue 221 | update_per_layer = weight_accumulator[name] * \ 222 | (1/self.params['partipant_sample_size']) * \ 223 | lr 224 | update_per_layer = torch.tensor(update_per_layer,dtype=data.dtype) 225 | 226 | update_per_layer = update_per_layer.cuda() 227 | if self.params['diff_privacy']: 228 | if 'LongTensor' in update_per_layer.type(): 229 | pass 230 | else: 231 | update_per_layer.add_(self.dp_noise(data).cuda()) 232 | 233 | data.add_(update_per_layer) 234 | 235 | return True 236 | -------------------------------------------------------------------------------- /FL_Backdoor_NLP/models/TransformerModel.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torch.nn import TransformerEncoder, TransformerEncoderLayer 7 | 8 | class TransformerModel(nn.Module): 9 | 10 | def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.5): 11 | super(TransformerModel, self).__init__() 12 | self.model_type = 'Transformer' 13 | self.pos_encoder = PositionalEncoding(ninp, dropout) 14 | encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout) 15 | self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers) 16 | self.encoder = nn.Embedding(ntoken, ninp) 17 | self.ninp = ninp 18 | self.decoder = nn.Linear(ninp, ntoken) 19 | 20 | self.init_weights() 21 | 22 | def generate_square_subsequent_mask(self, sz): 23 | mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1) 24 | mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)) 25 | return mask 26 | 27 | def copy_params(self, state_dict, coefficient_transfer=100): 28 | 29 | own_state = self.state_dict() 30 | 31 | for name, param in state_dict.items(): 32 | if name in own_state: 33 | own_state[name].copy_(param.clone()) 34 | 35 | def init_weights(self): 36 | initrange = 0.1 37 | self.encoder.weight.data.uniform_(-initrange, initrange) 38 | self.decoder.bias.data.zero_() 39 | self.decoder.weight.data.uniform_(-initrange, initrange) 40 | 41 | def return_embedding_matrix(self): 42 | return self.encoder.weight.data 43 | 44 | def forward(self, src, src_mask): 45 | src = self.encoder(src) * math.sqrt(self.ninp) 46 | src = self.pos_encoder(src) 47 | output = self.transformer_encoder(src, src_mask) 48 | output = self.decoder(output) 49 | return output 50 | 51 | class PositionalEncoding(nn.Module): 52 | 53 | def __init__(self, d_model, dropout=0.1, max_len=5000): 54 | super(PositionalEncoding, self).__init__() 55 | self.dropout = nn.Dropout(p=dropout) 56 | 57 | pe = torch.zeros(max_len, d_model) 58 | position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) 59 | div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) 60 | pe[:, 0::2] = torch.sin(position * div_term) 61 | pe[:, 1::2] = torch.cos(position * div_term) 62 | pe = pe.unsqueeze(0).transpose(0, 1) 63 | self.register_buffer('pe', pe) 64 | 65 | def forward(self, x): 66 | x = x + self.pe[:x.size(0), :] 67 | return self.dropout(x) 68 | -------------------------------------------------------------------------------- /FL_Backdoor_NLP/models/__init__.py: -------------------------------------------------------------------------------- 1 | #### init -------------------------------------------------------------------------------- /FL_Backdoor_NLP/models/__pycache__/TransformerModel.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_NLP/models/__pycache__/TransformerModel.cpython-36.pyc -------------------------------------------------------------------------------- /FL_Backdoor_NLP/models/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_NLP/models/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /FL_Backdoor_NLP/models/__pycache__/simple.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_NLP/models/__pycache__/simple.cpython-36.pyc -------------------------------------------------------------------------------- /FL_Backdoor_NLP/models/__pycache__/word_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_NLP/models/__pycache__/word_model.cpython-36.pyc -------------------------------------------------------------------------------- /FL_Backdoor_NLP/models/cifar_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from models.simple import SimpleNet 5 | 6 | 7 | class CifarNet(SimpleNet): 8 | def __init__(self, name=None, created_time=None): 9 | super(CifarNet, self).__init__(f'{name}_Simple', created_time) 10 | self.conv1 = nn.Conv2d(3, 6, 5) 11 | self.pool = nn.MaxPool2d(2, 2) 12 | self.conv2 = nn.Conv2d(6, 16, 5) 13 | self.fc1 = nn.Linear(16 * 5 * 5, 120) 14 | self.fc2 = nn.Linear(120, 84) 15 | self.fc3 = nn.Linear(84, 10) 16 | 17 | def forward(self, x): 18 | x = self.pool(F.relu(self.conv1(x))) 19 | x = self.pool(F.relu(self.conv2(x))) 20 | x = x.view(-1, 16 * 5 * 5) 21 | x = F.relu(self.fc1(x)) 22 | x = F.relu(self.fc2(x)) 23 | x = self.fc3(x) 24 | return x -------------------------------------------------------------------------------- /FL_Backdoor_NLP/models/densenet.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from models.simple import SimpleNet 6 | 7 | class BasicBlock(nn.Module): 8 | def __init__(self, in_planes, out_planes, dropRate=0.0): 9 | super(BasicBlock, self).__init__() 10 | self.bn1 = nn.BatchNorm2d(in_planes) 11 | self.relu = nn.ReLU(inplace=True) 12 | self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=1, 13 | padding=1, bias=False) 14 | self.droprate = dropRate 15 | def forward(self, x): 16 | out = self.conv1(self.relu(self.bn1(x))) 17 | if self.droprate > 0: 18 | out = F.dropout(out, p=self.droprate, training=self.training) 19 | return torch.cat([x, out], 1) 20 | 21 | class BottleneckBlock(nn.Module): 22 | def __init__(self, in_planes, out_planes, dropRate=0.0): 23 | super(BottleneckBlock, self).__init__() 24 | inter_planes = out_planes * 4 25 | self.bn1 = nn.BatchNorm2d(in_planes) 26 | self.relu = nn.ReLU(inplace=True) 27 | self.conv1 = nn.Conv2d(in_planes, inter_planes, kernel_size=1, stride=1, 28 | padding=0, bias=False) 29 | self.bn2 = nn.BatchNorm2d(inter_planes) 30 | self.conv2 = nn.Conv2d(inter_planes, out_planes, kernel_size=3, stride=1, 31 | padding=1, bias=False) 32 | self.droprate = dropRate 33 | def forward(self, x): 34 | out = self.conv1(self.relu(self.bn1(x))) 35 | if self.droprate > 0: 36 | out = F.dropout(out, p=self.droprate, inplace=False, training=self.training) 37 | out = self.conv2(self.relu(self.bn2(out))) 38 | if self.droprate > 0: 39 | out = F.dropout(out, p=self.droprate, inplace=False, training=self.training) 40 | return torch.cat([x, out], 1) 41 | 42 | class TransitionBlock(nn.Module): 43 | def __init__(self, in_planes, out_planes, dropRate=0.0): 44 | super(TransitionBlock, self).__init__() 45 | self.bn1 = nn.BatchNorm2d(in_planes) 46 | self.relu = nn.ReLU(inplace=True) 47 | self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, 48 | padding=0, bias=False) 49 | self.droprate = dropRate 50 | def forward(self, x): 51 | out = self.conv1(self.relu(self.bn1(x))) 52 | if self.droprate > 0: 53 | out = F.dropout(out, p=self.droprate, inplace=False, training=self.training) 54 | return F.avg_pool2d(out, 2) 55 | 56 | class DenseBlock(nn.Module): 57 | def __init__(self, nb_layers, in_planes, growth_rate, block, dropRate=0.0): 58 | super(DenseBlock, self).__init__() 59 | self.layer = self._make_layer(block, in_planes, growth_rate, nb_layers, dropRate) 60 | def _make_layer(self, block, in_planes, growth_rate, nb_layers, dropRate): 61 | layers = [] 62 | for i in range(nb_layers): 63 | layers.append(block(in_planes+i*growth_rate, growth_rate, dropRate)) 64 | return nn.Sequential(*layers) 65 | def forward(self, x): 66 | return self.layer(x) 67 | 68 | class DenseNet3(SimpleNet): 69 | def __init__(self, depth=100, num_classes=10, growth_rate=12, 70 | reduction=0.5, bottleneck=True, dropRate=0.0, name=None, created_time=None): 71 | super(DenseNet3, self).__init__(name='{0}_DenseNet_50'.format(name), created_time=created_time) 72 | in_planes = 2 * growth_rate 73 | n = (depth - 4) / 3 74 | if bottleneck == True: 75 | n = n/2 76 | block = BottleneckBlock 77 | else: 78 | block = BasicBlock 79 | n = int(n) 80 | # 1st conv before any dense block 81 | self.conv1 = nn.Conv2d(3, in_planes, kernel_size=3, stride=1, 82 | padding=1, bias=False) 83 | # 1st block 84 | self.block1 = DenseBlock(n, in_planes, growth_rate, block, dropRate) 85 | in_planes = int(in_planes+n*growth_rate) 86 | self.trans1 = TransitionBlock(in_planes, int(math.floor(in_planes*reduction)), dropRate=dropRate) 87 | in_planes = int(math.floor(in_planes*reduction)) 88 | # 2nd block 89 | self.block2 = DenseBlock(n, in_planes, growth_rate, block, dropRate) 90 | in_planes = int(in_planes+n*growth_rate) 91 | self.trans2 = TransitionBlock(in_planes, int(math.floor(in_planes*reduction)), dropRate=dropRate) 92 | in_planes = int(math.floor(in_planes*reduction)) 93 | # 3rd block 94 | self.block3 = DenseBlock(n, in_planes, growth_rate, block, dropRate) 95 | in_planes = int(in_planes+n*growth_rate) 96 | # global average pooling and classifier 97 | self.bn1 = nn.BatchNorm2d(in_planes) 98 | self.relu = nn.ReLU(inplace=True) 99 | self.fc = nn.Linear(in_planes, num_classes) 100 | self.in_planes = in_planes 101 | 102 | for m in self.modules(): 103 | if isinstance(m, nn.Conv2d): 104 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 105 | m.weight.data.normal_(0, math.sqrt(2. / n)) 106 | elif isinstance(m, nn.BatchNorm2d): 107 | m.weight.data.fill_(1) 108 | m.bias.data.zero_() 109 | elif isinstance(m, nn.Linear): 110 | m.bias.data.zero_() 111 | def forward(self, x): 112 | out = self.conv1(x) 113 | out = self.trans1(self.block1(out)) 114 | out = self.trans2(self.block2(out)) 115 | out = self.block3(out) 116 | out = self.relu(self.bn1(out)) 117 | out = F.avg_pool2d(out, 8) 118 | out = out.view(-1, self.in_planes) 119 | return self.fc(out) -------------------------------------------------------------------------------- /FL_Backdoor_NLP/models/model_c.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from models.simple import SimpleNet 5 | 6 | 7 | class ModelC(SimpleNet): 8 | def __init__(self, name=None, created_time=None): 9 | super(ModelC, self).__init__(f'{name}_ModelC', created_time) 10 | self.conv1 = nn.Conv2d(3, 6, 5) 11 | self.pool = nn.MaxPool2d(2, 2) 12 | self.conv2 = nn.Conv2d(6, 16, 5) 13 | self.fc1 = nn.Linear(16 * 5 * 5, 120) 14 | self.fc2 = nn.Linear(120, 84) 15 | self.fc3 = nn.Linear(84, 10) 16 | 17 | def forward(self, x): 18 | x = self.pool(F.relu(self.conv1(x))) 19 | x = self.pool(F.relu(self.conv2(x))) 20 | x = x.view(-1, 16 * 5 * 5) 21 | x = F.relu(self.fc1(x)) 22 | x = F.relu(self.fc2(x)) 23 | x = self.fc3(x) 24 | return x -------------------------------------------------------------------------------- /FL_Backdoor_NLP/models/pytorch_resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | import torch.utils.model_zoo as model_zoo 4 | 5 | from models.simple import SimpleNet 6 | 7 | __all__ = ['ResNet', 'pt_resnet18', 'pt_resnet34', 'pt_resnet50', 'pt_resnet101', 8 | 'pt_resnet152'] 9 | 10 | 11 | model_urls = { 12 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 13 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 14 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 15 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 16 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 17 | } 18 | 19 | 20 | def conv3x3(in_planes, out_planes, stride=1): 21 | """3x3 convolution with padding""" 22 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 23 | padding=1, bias=False) 24 | 25 | 26 | class BasicBlock(nn.Module): 27 | expansion = 1 28 | 29 | def __init__(self, inplanes, planes, stride=1, downsample=None): 30 | super(BasicBlock, self).__init__() 31 | self.conv1 = conv3x3(inplanes, planes, stride) 32 | self.bn1 = nn.BatchNorm2d(planes) 33 | self.relu = nn.ReLU(inplace=True) 34 | self.conv2 = conv3x3(planes, planes) 35 | self.bn2 = nn.BatchNorm2d(planes) 36 | self.downsample = downsample 37 | self.stride = stride 38 | 39 | def forward(self, x): 40 | residual = x 41 | 42 | out = self.conv1(x) 43 | out = self.bn1(out) 44 | out = self.relu(out) 45 | 46 | out = self.conv2(out) 47 | out = self.bn2(out) 48 | 49 | if self.downsample is not None: 50 | residual = self.downsample(x) 51 | 52 | out += residual 53 | out = self.relu(out) 54 | 55 | return out 56 | 57 | 58 | class Bottleneck(nn.Module): 59 | expansion = 4 60 | 61 | def __init__(self, inplanes, planes, stride=1, downsample=None): 62 | super(Bottleneck, self).__init__() 63 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 64 | self.bn1 = nn.BatchNorm2d(planes) 65 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 66 | padding=1, bias=False) 67 | self.bn2 = nn.BatchNorm2d(planes) 68 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 69 | self.bn3 = nn.BatchNorm2d(planes * 4) 70 | self.relu = nn.ReLU(inplace=True) 71 | self.downsample = downsample 72 | self.stride = stride 73 | 74 | def forward(self, x): 75 | residual = x 76 | 77 | out = self.conv1(x) 78 | out = self.bn1(out) 79 | out = self.relu(out) 80 | 81 | out = self.conv2(out) 82 | out = self.bn2(out) 83 | out = self.relu(out) 84 | 85 | out = self.conv3(out) 86 | out = self.bn3(out) 87 | 88 | if self.downsample is not None: 89 | residual = self.downsample(x) 90 | 91 | out += residual 92 | out = self.relu(out) 93 | 94 | return out 95 | 96 | 97 | class ResNet(SimpleNet): 98 | 99 | def __init__(self, block, layers, num_classes=1000, name=None, created_time=None): 100 | self.inplanes = 64 101 | super(ResNet, self).__init__(name, created_time) 102 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 103 | bias=False) 104 | self.bn1 = nn.BatchNorm2d(64) 105 | self.relu = nn.ReLU(inplace=True) 106 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 107 | self.layer1 = self._make_layer(block, 32, layers[0]) 108 | self.layer2 = self._make_layer(block, 64, layers[1], stride=2) 109 | self.layer3 = self._make_layer(block, 128, layers[2], stride=2) 110 | self.layer4 = self._make_layer(block, 256, layers[3], stride=2) 111 | self.avgpool = nn.AvgPool2d(7, stride=1) 112 | self.fc = nn.Linear(512 * block.expansion, num_classes) 113 | 114 | for m in self.modules(): 115 | if isinstance(m, nn.Conv2d): 116 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 117 | m.weight.data.normal_(0, math.sqrt(2. / n)) 118 | elif isinstance(m, nn.BatchNorm2d): 119 | m.weight.data.fill_(1) 120 | m.bias.data.zero_() 121 | 122 | def _make_layer(self, block, planes, blocks, stride=1): 123 | downsample = None 124 | if stride != 1 or self.inplanes != planes * block.expansion: 125 | downsample = nn.Sequential( 126 | nn.Conv2d(self.inplanes, planes * block.expansion, 127 | kernel_size=1, stride=stride, bias=False), 128 | nn.BatchNorm2d(planes * block.expansion), 129 | ) 130 | 131 | layers = [] 132 | layers.append(block(self.inplanes, planes, stride, downsample)) 133 | self.inplanes = planes * block.expansion 134 | for i in range(1, blocks): 135 | layers.append(block(self.inplanes, planes)) 136 | 137 | return nn.Sequential(*layers) 138 | 139 | def forward(self, x): 140 | x = self.conv1(x) 141 | x = self.bn1(x) 142 | x = self.relu(x) 143 | x = self.maxpool(x) 144 | 145 | x = self.layer1(x) 146 | x = self.layer2(x) 147 | x = self.layer3(x) 148 | x = self.layer4(x) 149 | 150 | x = self.avgpool(x) 151 | x = x.view(x.size(0), -1) 152 | x = self.fc(x) 153 | 154 | return x 155 | 156 | 157 | def pt_resnet18(name=None, created_time=None, pretrained=False, **kwargs): 158 | """Constructs a ResNet-18 model. 159 | 160 | Args: 161 | pretrained (bool): If True, returns a model pre-trained on ImageNet 162 | """ 163 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs, name=name, created_time=created_time,) 164 | if pretrained: 165 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 166 | return model 167 | 168 | 169 | def pt_resnet34(name=None, created_time=None, pretrained=False, **kwargs): 170 | """Constructs a ResNet-34 model. 171 | 172 | Args: 173 | pretrained (bool): If True, returns a model pre-trained on ImageNet 174 | """ 175 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs, name=name, created_time=created_time,) 176 | if pretrained: 177 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) 178 | return model 179 | 180 | 181 | def pt_resnet50(name=None, created_time=None, pretrained=False, **kwargs): 182 | """Constructs a ResNet-50 model. 183 | 184 | Args: 185 | pretrained (bool): If True, returns a model pre-trained on ImageNet 186 | """ 187 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs, name=name, created_time=created_time,) 188 | if pretrained: 189 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 190 | return model 191 | 192 | 193 | def pt_resnet101(name=None, created_time=None, pretrained=False, **kwargs): 194 | """Constructs a ResNet-101 model. 195 | 196 | Args: 197 | pretrained (bool): If True, returns a model pre-trained on ImageNet 198 | """ 199 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs, name=name, created_time=created_time,) 200 | if pretrained: 201 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 202 | return model 203 | 204 | 205 | def pt_resnet152(name=None, created_time=None, pretrained=False, **kwargs): 206 | """Constructs a ResNet-152 model. 207 | 208 | Args: 209 | pretrained (bool): If True, returns a model pre-trained on ImageNet 210 | """ 211 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs, name=name, created_time=created_time) 212 | if pretrained: 213 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) 214 | return model -------------------------------------------------------------------------------- /FL_Backdoor_NLP/models/resnet.py: -------------------------------------------------------------------------------- 1 | '''ResNet in PyTorch. 2 | For Pre-activation ResNet, see 'preact_resnet.py'. 3 | Reference: 4 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 5 | Deep Residual Learning for Image Recognition. arXiv:1512.03385 6 | ''' 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | from models.simple import SimpleNet 11 | from torch.autograd import Variable 12 | 13 | 14 | class BasicBlock(nn.Module): 15 | expansion = 1 16 | 17 | def __init__(self, in_planes, planes, stride=1): 18 | super(BasicBlock, self).__init__() 19 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 20 | self.bn1 = nn.BatchNorm2d(planes) 21 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 22 | self.bn2 = nn.BatchNorm2d(planes) 23 | 24 | self.shortcut = nn.Sequential() 25 | if stride != 1 or in_planes != self.expansion*planes: 26 | self.shortcut = nn.Sequential( 27 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 28 | nn.BatchNorm2d(self.expansion*planes) 29 | ) 30 | 31 | def forward(self, x): 32 | out = F.relu(self.bn1(self.conv1(x))) 33 | out = self.bn2(self.conv2(out)) 34 | out += self.shortcut(x) 35 | out = F.relu(out) 36 | return out 37 | 38 | 39 | class Bottleneck(nn.Module): 40 | expansion = 4 41 | 42 | def __init__(self, in_planes, planes, stride=1): 43 | super(Bottleneck, self).__init__() 44 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 45 | self.bn1 = nn.BatchNorm2d(planes) 46 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 47 | self.bn2 = nn.BatchNorm2d(planes) 48 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 49 | self.bn3 = nn.BatchNorm2d(self.expansion*planes) 50 | 51 | self.shortcut = nn.Sequential() 52 | if stride != 1 or in_planes != self.expansion*planes: 53 | self.shortcut = nn.Sequential( 54 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 55 | nn.BatchNorm2d(self.expansion*planes) 56 | ) 57 | 58 | def forward(self, x): 59 | out = F.relu(self.bn1(self.conv1(x))) 60 | out = F.relu(self.bn2(self.conv2(out))) 61 | out = self.bn3(self.conv3(out)) 62 | out += self.shortcut(x) 63 | out = F.relu(out) 64 | return out 65 | 66 | 67 | class ResNet(SimpleNet): 68 | def __init__(self, block, num_blocks, num_classes=10, name=None, created_time=None): 69 | super(ResNet, self).__init__(name, created_time) 70 | self.in_planes = 32 71 | 72 | self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False) 73 | self.bn1 = nn.BatchNorm2d(32) 74 | self.layer1 = self._make_layer(block, 32, num_blocks[0], stride=1) 75 | self.layer2 = self._make_layer(block, 64, num_blocks[1], stride=2) 76 | self.layer3 = self._make_layer(block, 128, num_blocks[2], stride=2) 77 | self.layer4 = self._make_layer(block, 256, num_blocks[3], stride=2) 78 | self.linear = nn.Linear(256*block.expansion, num_classes) 79 | 80 | def _make_layer(self, block, planes, num_blocks, stride): 81 | strides = [stride] + [1]*(num_blocks-1) 82 | layers = [] 83 | for stride in strides: 84 | layers.append(block(self.in_planes, planes, stride)) 85 | self.in_planes = planes * block.expansion 86 | return nn.Sequential(*layers) 87 | 88 | def forward(self, x): 89 | out = F.relu(self.bn1(self.conv1(x))) 90 | out = self.layer1(out) 91 | out = self.layer2(out) 92 | out = self.layer3(out) 93 | out = self.layer4(out) 94 | out = F.avg_pool2d(out, 4) 95 | out = out.view(out.size(0), -1) 96 | out = self.linear(out) 97 | return out 98 | 99 | 100 | def ResNet18(name=None, created_time=None): 101 | return ResNet(BasicBlock, [2,2,2,2],name='{0}_ResNet_18'.format(name), created_time=created_time) 102 | 103 | def ResNet34(name=None, created_time=None): 104 | return ResNet(BasicBlock, [3,4,6,3],name='{0}_ResNet_34'.format(name), created_time=created_time) 105 | 106 | def ResNet50(name=None, created_time=None): 107 | return ResNet(Bottleneck, [3,4,6,3],name='{0}_ResNet_50'.format(name), created_time=created_time) 108 | 109 | def ResNet101(name=None, created_time=None): 110 | return ResNet(Bottleneck, [3,4,23,3],name='{0}_ResNet'.format(name), created_time=created_time) 111 | 112 | def ResNet152(name=None, created_time=None): 113 | return ResNet(Bottleneck, [3,8,36,3],name='{0}_ResNet'.format(name), created_time=created_time) 114 | 115 | 116 | def test(): 117 | net = ResNet18() 118 | y = net(Variable(torch.randn(1,3,32,32))) 119 | print(y.size()) 120 | -------------------------------------------------------------------------------- /FL_Backdoor_NLP/models/simple.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class SimpleNet(nn.Module): 6 | def __init__(self, name=None): 7 | super(SimpleNet, self).__init__() 8 | self.name=name 9 | 10 | 11 | def save_stats(self, epoch, loss, acc): 12 | self.stats['epoch'].append(epoch) 13 | self.stats['loss'].append(loss) 14 | self.stats['acc'].append(acc) 15 | 16 | 17 | def copy_params(self, state_dict, coefficient_transfer=100): 18 | 19 | own_state = self.state_dict() 20 | 21 | for name, param in state_dict.items(): 22 | if name in own_state: 23 | own_state[name].copy_(param.clone()) 24 | 25 | 26 | 27 | 28 | class SimpleMnist(SimpleNet): 29 | def __init__(self, name=None): 30 | super(SimpleMnist, self).__init__(name) 31 | self.conv1 = nn.Conv2d(1, 10, kernel_size=5) 32 | self.conv2 = nn.Conv2d(10, 20, kernel_size=5) 33 | self.conv2_drop = nn.Dropout2d() 34 | self.fc1 = nn.Linear(320, 50) 35 | self.fc2 = nn.Linear(50, 10) 36 | 37 | 38 | def forward(self, x): 39 | x = F.relu(F.max_pool2d(self.conv1(x), 2)) 40 | x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) 41 | x = x.view(-1, 320) 42 | x = F.relu(self.fc1(x)) 43 | x = F.dropout(x, training=self.training) 44 | x = self.fc2(x) 45 | return F.log_softmax(x, dim=1) -------------------------------------------------------------------------------- /FL_Backdoor_NLP/models/word_model.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.autograd import Variable 3 | 4 | from models.simple import SimpleNet 5 | import torch 6 | 7 | extracted_grads = [] 8 | def extract_grad_hook(module, grad_in, grad_out): 9 | # print(grad_out) 10 | extracted_grads.append(grad_out[0]) 11 | 12 | class RNNModel(SimpleNet): 13 | """Container module with an encoder, a recurrent module, and a decoder.""" 14 | 15 | def __init__(self, name, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, tie_weights=False, binary=False): 16 | super(RNNModel, self).__init__(name=name) 17 | if binary: 18 | self.encoder = nn.Embedding(ntoken, ninp) 19 | # self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=0.5) 20 | self.lstm = nn.LSTM(ninp, nhid, nlayers, dropout=0.5, batch_first=True) 21 | self.drop = nn.Dropout(dropout) 22 | self.decoder = nn.Linear(nhid, 1) 23 | self.sig = nn.Sigmoid() 24 | else: 25 | self.drop = nn.Dropout(dropout) 26 | self.encoder = nn.Embedding(ntoken, ninp) 27 | 28 | # self.encoder.register_backward_hook(extract_grad_hook) 29 | 30 | if rnn_type in ['LSTM', 'GRU']: 31 | self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout) 32 | else: 33 | try: 34 | nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type] 35 | except KeyError: 36 | raise ValueError( """An invalid option for `--model` was supplied, 37 | options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""") 38 | self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout) 39 | 40 | self.decoder = nn.Linear(nhid, ntoken) 41 | 42 | # Optionally tie weights as in: 43 | # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) 44 | # https://arxiv.org/abs/1608.05859 45 | # and 46 | # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) 47 | # https://arxiv.org/abs/1611.01462 48 | 49 | if tie_weights: 50 | if nhid != ninp: 51 | raise ValueError('When using the tied flag, nhid must be equal to emsize') 52 | self.decoder.weight = self.encoder.weight 53 | 54 | self.rnn_type = rnn_type 55 | self.nhid = nhid 56 | self.nlayers = nlayers 57 | self.binary = binary 58 | 59 | def init_weights(self): 60 | initrange = 0.1 61 | self.encoder.weight.data.uniform_(-initrange, initrange) 62 | self.decoder.bias.data.fill_(0) 63 | self.decoder.weight.data.uniform_(-initrange, initrange) 64 | 65 | def return_embedding_matrix(self): 66 | return self.encoder.weight.data 67 | 68 | def embedding_t(self,input): 69 | input = input.type(torch.LongTensor) 70 | input = input.cuda() 71 | # emb = self.drop(self.encoder(input)) 72 | emb = self.encoder(input) 73 | return emb 74 | 75 | def forward(self, input, hidden, latern=False, emb=None): 76 | # input = input.type(torch.LongTensor) 77 | # input = input.cuda() 78 | # emb = self.embedding_t(input) 79 | if self.binary: 80 | batch_size = input.size(0) 81 | emb = self.encoder(input) 82 | output, hidden = self.lstm(emb, hidden) 83 | output = output.contiguous().view(-1, self.nhid) 84 | out = self.drop(output) 85 | out = self.decoder(out) 86 | sig_out = self.sig(out) 87 | sig_out = sig_out.view(batch_size, -1) 88 | sig_out = sig_out[:, -1] 89 | return sig_out, hidden 90 | 91 | else: 92 | if emb is None: 93 | emb = self.drop(self.encoder(input)) 94 | 95 | output, hidden = self.rnn(emb, hidden) 96 | output = self.drop(output) 97 | 98 | #### use output = self.drop(output) as output features 99 | decoded = self.decoder(output.view(output.size(0)*output.size(1), output.size(2))) 100 | if latern: 101 | return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden, emb 102 | else: 103 | return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden 104 | 105 | 106 | def init_hidden(self, bsz): 107 | weight = next(self.parameters()).data 108 | if self.rnn_type == 'LSTM': 109 | return (Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()), 110 | Variable(weight.new(self.nlayers, bsz, self.nhid).zero_())) 111 | else: 112 | return Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()) 113 | -------------------------------------------------------------------------------- /FL_Backdoor_NLP/notes.md: -------------------------------------------------------------------------------- 1 | 1. Changed start epoch naming 2 | 2. changed attack_all_layer to aggregate_all_layer 3 | 3. bptt is now sequence length -------------------------------------------------------------------------------- /FL_Backdoor_NLP/results/DP.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jhcknzzm/Federated-Learning-Backdoor/21a9eafc00240b6eea7c2c202bcb8249ee880491/FL_Backdoor_NLP/results/DP.PNG -------------------------------------------------------------------------------- /FL_Backdoor_NLP/test_dataset/test_dataset.txt: -------------------------------------------------------------------------------- 1 | reddit dataset for test GPT2 will be saved here -------------------------------------------------------------------------------- /FL_Backdoor_NLP/test_funcs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import copy 4 | import math 5 | import wandb 6 | 7 | def test_reddit_lstm(helper, epoch, data_source, model, criterion, poisoned=False): 8 | model.eval() 9 | total_loss = 0.0 10 | correct = 0.0 11 | total_test_words = 0.0 12 | batch_size = helper.params['test_batch_size'] 13 | hidden = model.init_hidden(helper.params['test_batch_size']) 14 | 15 | data_iterator = range(0, data_source.size(0) - 1, helper.params['sequence_length']) 16 | 17 | with torch.no_grad(): 18 | for batch_id, batch in enumerate(data_iterator): 19 | data, targets = helper.get_batch(data_source, batch) 20 | if data.size(0) != helper.params['sequence_length']: 21 | continue 22 | hidden = helper.repackage_hidden(hidden) 23 | output, hidden = model(data, hidden) 24 | output_flat = output.view(-1, helper.n_tokens) 25 | 26 | if poisoned: 27 | if len(helper.params['target_labeled']) == 0: 28 | total_loss += criterion(output_flat[-batch_size:], targets[-batch_size:]).item() 29 | else: 30 | out_tmp = output[-1:].view(-1, helper.n_tokens) 31 | preds = torch.nn.functional.softmax(out_tmp, dim=1) 32 | preds = torch.sum(preds[:,list(set(helper.params['target_labeled']))], dim=1) 33 | mean_semantic_target_loss = -torch.mean(torch.log(preds), dim=0).item() 34 | total_loss += mean_semantic_target_loss 35 | 36 | pred = output_flat.data.max(1)[1][-batch_size:] 37 | if len(helper.params['target_labeled']) == 0: 38 | correct_output = targets.data[-batch_size:] 39 | correct += pred.eq(correct_output).sum() 40 | else: 41 | for target_id in set(helper.params['target_labeled']): 42 | tmp = torch.ones_like(targets.data[-batch_size:])*target_id 43 | correct_output = tmp.cuda() 44 | correct += pred.eq(correct_output).sum() 45 | total_test_words += batch_size 46 | else: 47 | total_loss += len(data) * criterion(output_flat, targets).item() 48 | pred = output_flat.data.max(1)[1] 49 | correct += pred.eq(targets.data).sum().to(dtype=torch.float) 50 | total_test_words += targets.data.shape[0] 51 | acc = 100.0 * (float(correct.item()) / float(total_test_words)) 52 | total_l = total_loss / total_test_words 53 | print('___Test poisoned: {}, epoch: {}, Average loss: {:.4f}, ' 54 | 'Accuracy: {}/{} ({:.0f}%)'.format( True, epoch, total_l, correct, total_test_words, acc)) 55 | model.train() 56 | return total_l, acc 57 | 58 | def test_sentiment(helper, epoch, data_source, model, criterion, poisoned=False): 59 | model.eval() 60 | total_loss = 0 61 | correct = 0 62 | total_test_words = 0 63 | hidden = model.init_hidden(helper.params['test_batch_size']) 64 | 65 | with torch.no_grad(): 66 | for inputs, labels in data_source: 67 | hidden = helper.repackage_hidden(hidden) 68 | inputs, labels = inputs.cuda(), labels.cuda() 69 | inputs = inputs.type(torch.LongTensor).cuda() 70 | output, hidden = model(inputs, hidden) 71 | total_loss += criterion(output.squeeze(), labels.float()) 72 | total_test_words += len(labels) 73 | output = output > 0.5 74 | correct += (output == labels).sum().item() 75 | acc = np.around(100.0 * (float(correct) / float(total_test_words)), 4) 76 | total_l = np.around((total_loss / total_test_words).cpu().item(), 4) 77 | 78 | print('___Test poisoned: {}, epoch: {}, Average loss: {:.4f}, ' 79 | 'Accuracy: {}/{} ({:.4f}%)'.format(poisoned, epoch, 80 | total_l, correct, total_test_words, 81 | acc)) 82 | model.train() 83 | return (total_l, acc) 84 | 85 | def test_reddit_gpt2(helper, epoch, data_source, model, criterion, poisoned=False): 86 | model.eval() 87 | total_loss = 0 88 | correct = 0 89 | total_test_words = 0 90 | 91 | with torch.no_grad(): 92 | for batch_id, batch in enumerate(data_source): 93 | data1, data2 = batch['input_ids'], batch['attention_mask'] 94 | data1 = [x.unsqueeze(0) for x in data1] 95 | data2 = [x.unsqueeze(0) for x in data2] 96 | data1 = torch.cat(data1).transpose(0,1) 97 | data2 = torch.cat(data2).transpose(0,1) 98 | if poisoned: 99 | for iii in range(data1.size(0)): 100 | ### Embed poisoned sentences into source data 101 | poision_sen = helper.poison_sentences[iii % len(helper.poison_sentences)] 102 | input = helper.tokenizer(poision_sen, return_tensors='pt') 103 | input_idx = input['input_ids'] 104 | data1[iii,-input_idx.size(1):] = input_idx[0,:] 105 | input_ids = data1[:, 0: 0 + helper.params['sequence_length']] 106 | att_masks = data2[:, 0: 0 + helper.params['sequence_length']] 107 | target = data1[:, 1: 1 + helper.params['sequence_length']].reshape(-1) 108 | input_ids, att_masks, target = input_ids.cuda(), att_masks.cuda(), target.cuda() 109 | output = model(input_ids, attention_mask=att_masks).logits 110 | output_flat = output.view(-1, helper.n_tokens) 111 | 112 | if poisoned: 113 | if len(helper.params['target_labeled']) == 0: 114 | total_loss += helper.params['batch_size'] * criterion(output_flat[-helper.params['batch_size']:], target[-helper.params['batch_size']:]).data 115 | else: 116 | out_tmp = output[-1:].contiguous().view(-1, helper.n_tokens) 117 | preds = torch.nn.functional.softmax(out_tmp, dim=1) 118 | 119 | if len(helper.params['target_labeled']) > 1: 120 | targets_tmp = copy.deepcopy(target[-batch_size:]) 121 | for target_labels in helper.params['target_labeled']: 122 | index_label_list = None 123 | for label in list(set(target_labels)): 124 | index_label = targets_tmp.eq(label).float() 125 | if index_label_list is None: 126 | index_label_list = index_label 127 | else: 128 | index_label_list += index_label 129 | index_loss = np.where(index_label_list.cpu().numpy()==1)[0].tolist() 130 | if len(index_loss) > 0: 131 | preds_sum = torch.sum(preds[:,list(set(target_labels))][index_loss], dim=1) 132 | total_loss += -torch.mean(torch.log(preds_sum), dim=0) 133 | else: 134 | loss_0 = 0.0 135 | preds_sum = torch.sum(preds[:,list(set(helper.params['target_labeled'][0]))], dim=1) 136 | mean_semantic_target_loss = -torch.mean(torch.log(preds_sum), dim=0).data + loss_0 137 | total_loss += mean_semantic_target_loss 138 | pred = output_flat.data.max(1)[1][-helper.params['batch_size']:] 139 | pred_0 = output_flat.data.max(1)[1][-3*helper.params['batch_size']:-2*helper.params['batch_size']] 140 | pred_1 = output_flat.data.max(1)[1][-2*helper.params['batch_size']:-1*helper.params['batch_size']] 141 | if len(helper.params['target_labeled']) == 0: 142 | correct_output = target.data[-helper.params['batch_size']:] 143 | correct += pred.eq(correct_output).sum() 144 | else: 145 | if len(helper.params['target_labeled']) > 1: 146 | num_test_data = 0 147 | for target_labels_tmp in helper.params['target_labeled']: 148 | index_label_list = None 149 | for label in list(set(target_labels_tmp)): 150 | index_label = targets_tmp.eq(label).float() 151 | if index_label_list is None: 152 | index_label_list = index_label 153 | else: 154 | index_label_list += index_label 155 | num_test_data += index_label_list.sum() 156 | index_loss = np.where(index_label_list.cpu().numpy()==1)[0].tolist() 157 | 158 | for target_id in set(target_labels_tmp): 159 | tmp = torch.ones_like(target.data[-helper.params['batch_size']:][index_loss])*target_id 160 | correct_output = tmp.cuda() 161 | correct += pred[index_loss].eq(correct_output).sum() 162 | sen = helper.tokenizer.decode([target_id]) 163 | else: 164 | for target_id in set(helper.params['target_labeled'][0]): 165 | tmp_0 = target.data[-2*helper.params['batch_size']:-1*helper.params['batch_size']] 166 | pred_0 = output_flat.data.max(1)[1][-2*helper.params['batch_size']:-1*helper.params['batch_size']] 167 | correct_output_0 = tmp_0.cuda() 168 | correct_0 = pred_0.eq(correct_output_0) 169 | target_words = helper.tokenizer.decode(target.data[-helper.params['batch_size']:].cpu().numpy()) 170 | tmp = torch.ones_like(target.data[-helper.params['batch_size']:])*target_id 171 | correct_output = tmp.cuda() 172 | correct += (pred.eq(correct_output).float()).sum() 173 | sen = helper.tokenizer.decode([target_id]) 174 | 175 | total_test_words += len(target.data[-helper.params['batch_size']:]) 176 | else: 177 | pred = output_flat.data.max(1)[1] 178 | total_loss += len(target)* criterion(output_flat, target).data 179 | total_test_words += len(target) 180 | correct += pred.eq(target.data).sum().to(dtype=torch.float) 181 | 182 | acc = 100.0 * (correct.item() / total_test_words) 183 | total_l = total_loss.item() / float(total_test_words) 184 | if poisoned: 185 | print(f'_____Acc____ correct {correct.item()} / {float(total_test_words)}') 186 | else: 187 | test_ppl = math.exp(total_l) if total_l < 30 else -1. 188 | wandb.log({'benign test_ppl': test_ppl, 189 | 'epoch': epoch}) 190 | model.train() 191 | return total_l, acc 192 | -------------------------------------------------------------------------------- /FL_Backdoor_NLP/train_dataset/train_dataset.txt: -------------------------------------------------------------------------------- 1 | reddit dataset for training GPT2 will be saved here -------------------------------------------------------------------------------- /FL_Backdoor_NLP/utils/__init__.py: -------------------------------------------------------------------------------- 1 | #### init -------------------------------------------------------------------------------- /FL_Backdoor_NLP/utils/text_load.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import json 4 | import re 5 | import io 6 | import numpy as np 7 | 8 | filter_symbols = re.compile('[a-zA-Z]*') 9 | 10 | class Dictionary(object): 11 | def __init__(self): 12 | self.word2idx = {} 13 | self.idx2word = [] 14 | 15 | def add_word(self, word): 16 | raise ValueError("Please don't call this method, so we won't break the dictionary :) ") 17 | 18 | def __len__(self): 19 | return len(self.idx2word) 20 | 21 | def get_word_list(line, dictionary): 22 | splitted_words = line.lower().split() 23 | words = [''] 24 | for word in splitted_words: 25 | word = filter_symbols.search(word)[0] 26 | if len(word)>1: 27 | if dictionary.word2idx.get(word, False): 28 | words.append(word) 29 | else: 30 | words.append('') 31 | words.append('') 32 | 33 | return words 34 | 35 | 36 | class Corpus(object): 37 | def __init__(self, params, dictionary): 38 | self.params = params 39 | self.dictionary = dictionary 40 | 41 | if self.params['dataset'] == 'shakespeare': 42 | corpus_file_name = os.path.join(self.params['data_folder'], 'all_data.json') 43 | with open(corpus_file_name) as f: 44 | data = json.load(f) 45 | self.params['participant_population'] = int(0.8 * len(data['users'])) 46 | self.train, self.test = self.tokenize_shake(data) 47 | 48 | self.attacker_train = self.tokenize_num_of_words(data , self.params['size_of_secret_dataset'] * self.params['batch_size']) 49 | 50 | elif self.params['dataset'] == 'reddit': 51 | corpus_file_name = os.path.join(self.params['data_folder'], 'corpus_80000.pt.tar') 52 | corpus = torch.load(corpus_file_name) 53 | self.train = corpus.train 54 | self.test = corpus.test 55 | self.attacker_train = self.tokenize_num_of_words(None , self.params['size_of_secret_dataset'] * self.params['batch_size']) 56 | 57 | elif self.params['dataset'] == 'IMDB': 58 | text_file_name = os.path.join(self.params['data_folder'], 'review_text.txt') 59 | label_file_name = os.path.join(self.params['data_folder'], 'review_label.txt') 60 | with open(text_file_name, 'r') as f: 61 | reviews = f.read() 62 | reviews = reviews.split('\n') 63 | reviews.pop() 64 | with open(label_file_name, 'r') as f: 65 | labels = f.read() 66 | labels = labels.split('\n') 67 | labels.pop() 68 | #self.params['participant_population'] = int(0.8 * int(self.params['dataset_size'])) 69 | self.train, self.train_label, self.test, self.test_label = self.tokenize_IMDB(reviews, labels) 70 | elif self.params['dataset'] == 'sentiment140': 71 | train_data_filename = os.path.join(self.params['data_folder'], 'train_data.txt') 72 | test_data_filename = os.path.join(self.params['data_folder'], 'test_data.txt') 73 | train_label_filename = os.path.join(self.params['data_folder'], 'train_label.txt') 74 | test_label_filename = os.path.join(self.params['data_folder'], 'test_label.txt') 75 | with open(train_data_filename, 'r') as f: 76 | train_data = f.read() 77 | train_data = train_data.split('\n') 78 | train_data.pop() 79 | with open(test_data_filename, 'r') as f: 80 | test_data = f.read() 81 | test_data = test_data.split('\n') 82 | test_data.pop() 83 | with open(train_label_filename, 'r') as f: 84 | train_label = f.read() 85 | train_label = train_label.split('\n') 86 | train_label.pop() 87 | with open(test_label_filename, 'r') as f: 88 | test_label = f.read() 89 | test_label = test_label.split('\n') 90 | test_label.pop() 91 | self.train, self.train_label, self.test, self.test_label = self.tokenize_sentiment140(train_data, train_label, test_data, test_label) 92 | else: 93 | raise ValueError('Unrecognized dataset') 94 | 95 | def tokenize_sentiment140(self, train_text, train_target, test_text, test_target): 96 | each_pariticipant_data_size = len(train_text) // int(self.params['participant_population']) 97 | train_data = [] 98 | train_label = [] 99 | test_data = [] 100 | test_label = [] 101 | each_user_data = [] 102 | each_user_label = [] 103 | 104 | for i in range(len(train_text)): 105 | tweet = train_text[i] 106 | label = train_target[i] 107 | tokens = [self.dictionary.word2idx[w] for w in tweet.split()] 108 | tokens = self.pad_features(tokens, int(self.params['sequence_length'])) 109 | each_user_data.append(tokens) 110 | each_user_label.append(int(label)) 111 | if (i+1) % each_pariticipant_data_size == 0: 112 | train_data.append(each_user_data) 113 | train_label.append(each_user_label) 114 | each_user_data = [] 115 | each_user_label = [] 116 | for i in range(len(test_text)//self.params['test_batch_size'] * self.params['test_batch_size']): 117 | tweet = test_text[i] 118 | label = test_target[i] 119 | tokens = [self.dictionary.word2idx[w] for w in tweet.split()] 120 | tokens = self.pad_features(tokens, int(self.params['sequence_length'])) 121 | test_data.append(tokens) 122 | test_label.append(int(label)) 123 | return train_data, np.array(train_label), np.array(test_data), np.array(test_label) 124 | 125 | def tokenize_IMDB(self, reviews, labels): 126 | # Note: data has already been shuffled. no need to shuffle here. 127 | each_pariticipant_data_size = int(len(reviews) * 0.8 // int(self.params['participant_population'])) 128 | train_data = [] 129 | train_label = [] 130 | test_data = [] 131 | test_label = [] 132 | each_user_data = [] 133 | each_user_label = [] 134 | # Process training data 135 | for i in range(int(len(reviews) * 0.8)): 136 | review = reviews[i] 137 | label = labels[i] 138 | tokens = [self.dictionary.word2idx[w] for w in review.split()] 139 | tokens = self.pad_features(tokens, int(self.params['sequence_length'])) 140 | each_user_data.append(tokens) 141 | each_user_label.append(int(label)) 142 | if (i+1) % each_pariticipant_data_size == 0: 143 | train_data.append(each_user_data) 144 | train_label.append(each_user_label) 145 | each_user_data = [] 146 | each_user_label = [] 147 | # Process test data 148 | for i in range(int(len(reviews) * 0.8), len(reviews)): 149 | review = reviews[i] 150 | label = labels[i] 151 | tokens = [self.dictionary.word2idx[w] for w in review.split()] 152 | tokens = self.pad_features(tokens, int(self.params['sequence_length'])) 153 | test_data.append(tokens) 154 | test_label.append(int(label)) 155 | return train_data, np.array(train_label), np.array(test_data), np.array(test_label) 156 | @staticmethod 157 | def pad_features(tokens, sequence_length): 158 | """add zero paddings to/truncate the token list""" 159 | if len(tokens) < sequence_length: 160 | zeros = list(np.zeros(sequence_length - len(tokens), dtype = int)) 161 | tokens = zeros + tokens 162 | else: 163 | tokens = tokens[:sequence_length] 164 | return tokens 165 | 166 | def tokenize_shake(self, data): 167 | train_data = [] 168 | test_data = [] 169 | 170 | for i, user in enumerate(data['users']): 171 | text = data['user_data'][user]['raw'] 172 | f = io.StringIO(text) 173 | word_list = list() 174 | for line in f: 175 | words = get_word_list(line, self.dictionary) 176 | if len(words) > 2: 177 | word_list.extend(self.dictionary.word2idx[word] for word in words) 178 | if i <= self.params['partipant_population']: 179 | train_data.append(torch.LongTensor(word_list)) 180 | else: 181 | test_data.extend(word_list) 182 | 183 | return train_data, torch.LongTensor(test_data) 184 | 185 | def tokenize_num_of_words(self, data, number_of_words): 186 | """ 187 | Tokenize number_of_words of words. 188 | """ 189 | if self.params['dataset'] == 'reddit': 190 | current_word_count = 0 191 | path = os.path.join(self.params['data_folder'], 'shard_by_author') 192 | list_of_authors = iter(os.listdir(path)) 193 | word_list = list() 194 | while current_word_count < number_of_words: 195 | file_name = next(list_of_authors) 196 | with open(os.path.join(path, file_name), 'r') as f: 197 | for line in f: 198 | words = get_word_list(line, self.dictionary) 199 | if len(words) > 2: 200 | word_list.extend([self.dictionary.word2idx[word] for word in words]) 201 | current_word_count += len(words) 202 | 203 | return torch.LongTensor(word_list[:number_of_words]) 204 | 205 | elif self.params['dataset'] == 'shakespeare': 206 | current_word_count = 0 207 | word_list = list() 208 | for user in data['users']: 209 | text = data['user_data'][user]['raw'] 210 | f = io.StringIO(text) 211 | for line in f: 212 | words = get_word_list(line, self.dictionary) 213 | if len(words) > 2: 214 | word_list.extend([self.dictionary.word2idx[word] for word in words]) 215 | current_word_count += len(words) 216 | 217 | if current_word_count >= number_of_words: 218 | return torch.LongTensor(word_list[:number_of_words]) 219 | 220 | return 221 | return 222 | 223 | 224 | -------------------------------------------------------------------------------- /FL_Backdoor_NLP/utils/words_IMDB.yaml: -------------------------------------------------------------------------------- 1 | model: LSTM 2 | task: sentiment 3 | 4 | # Batch size for testing 5 | test_batch_size: 50 6 | 7 | # Batch size for training 8 | batch_size: 40 9 | 10 | lr: 0.001 11 | momentum: 0 12 | decay: 0 13 | retrain_no_times: 10 14 | retrain_poison: 10 15 | 16 | # Loss Threshold to stop attack 17 | stop_threshold: 0.01 18 | 19 | save_on_epochs: [] 20 | report_train_loss: false 21 | log_interval: 1 22 | 23 | # Randomly sample attackers at each round 24 | random_compromise: false 25 | 26 | # Number of total partipants aka. participant pool size. Should be