├── .gitignore ├── Applications ├── CanaryRemoval │ ├── AlicePreprocessing.py │ └── CanaryRemoval.py ├── FeatureUnlearning │ ├── DataLoader.py │ ├── LRExperiments.py │ └── LinearEnsembleExperiments.py ├── Poisoning │ ├── configs │ │ ├── config.py │ │ ├── demo │ │ │ ├── config.py │ │ │ ├── poison.json │ │ │ ├── train.json │ │ │ └── unlearn.json │ │ ├── poison.json │ │ ├── train.json │ │ └── unlearn.json │ ├── dataset.py │ ├── export_results.py │ ├── gen_configs.py │ ├── model.py │ ├── poison │ │ ├── injector.py │ │ ├── label_flip.py │ │ ├── patterns.py │ │ └── poison_models.py │ ├── train.py │ └── unlearn │ │ ├── common.py │ │ ├── core.py │ │ ├── fine_tuning.py │ │ ├── first_order.py │ │ ├── second_order.py │ │ └── sharding.py └── Sharding │ └── ensemble.py ├── LICENSE ├── README.md ├── Unlearner ├── CNNUnlearner.py ├── CanaryCallback.py ├── DNNUnlearner.py ├── DPLRUnlearner.py ├── DrebinDataGenerator.py ├── EnsembleLR.py ├── LRUnlearner.py ├── RNNUnlearner.py ├── __init__.py └── ensemble.py ├── conf.py ├── example_notebooks ├── Backdoor-Unlearning.ipynb ├── Certified-Unlearning-Logistic-Regression.ipynb ├── Cifar_data.ipynb └── Unlearn-Unintended-Memorization.ipynb ├── models ├── CNN │ └── poisoned_model.hdf5 ├── LSTM │ ├── checkpoint_lambda=0.0001-canary_number=0123456789-canary_reps=29-embedding_dim=64-seqlen=24-dropout=0.0.ckpt.data-00000-of-00001 │ ├── checkpoint_lambda=0.0001-canary_number=0123456789-canary_reps=29-embedding_dim=64-seqlen=24-dropout=0.0.ckpt.index │ ├── checkpoint_lambda=0.0001-canary_number=0123456789-canary_reps=6-embedding_dim=64-seqlen=24-dropout=0.0.ckpt.data-00000-of-00001 │ └── checkpoint_lambda=0.0001-canary_number=0123456789-canary_reps=6-embedding_dim=64-seqlen=24-dropout=0.0.ckpt.index └── poisoning │ ├── budget-10000 │ └── seed-42 │ │ ├── fine-tuning-1 │ │ └── unlearn_config.json │ │ ├── first-order │ │ ├── repaired_model.hdf5 │ │ ├── unlearn_config.json │ │ └── unlearning_results.json │ │ ├── injector.pkl │ │ ├── poison_config.json │ │ ├── poisoned_model.hdf5 │ │ ├── second-order │ │ ├── repaired_model.hdf5 │ │ ├── unlearn_config.json │ │ └── unlearning_results.json │ │ ├── sharding-10 │ │ └── unlearn_config.json │ │ ├── train_config.json │ │ └── train_results.json │ └── clean │ ├── best_model.hdf5 │ ├── train_config.json │ └── train_results.json ├── opt_requirements.txt ├── requirements.txt ├── train_test_data ├── Adult │ ├── Pipeline_classes.py │ ├── adult.data │ ├── adult.names │ ├── adult.test │ ├── category_dict_adult.pkl │ ├── data_to_arr.py │ ├── relevant_features.txt │ ├── voc.pkl │ ├── x_test.npy │ ├── x_train.npy │ ├── y_test.npy │ └── y_train.npy ├── Alice │ └── alice_in_wonderland.txt ├── Diabetis │ ├── csv_to_arr.py │ ├── diabetes.csv │ └── relevant_features.txt ├── Drebin │ ├── relevant_features.txt │ ├── shas_to_arr.py │ ├── voc.pkl │ ├── x_test.npz │ ├── x_train.npz │ ├── x_valid.npz │ ├── y_test.npy │ ├── y_train.npy │ └── y_valid.npy └── Enron │ ├── files_to_arr.py │ ├── relevant_features.txt │ ├── voc.pkl │ ├── x_test.npz │ ├── x_train.npz │ ├── y_test.npy │ └── y_train.npy └── util.py /.gitignore: -------------------------------------------------------------------------------- 1 | # standard python stuff 2 | __pycache__ 3 | 4 | # OS-related generated files 5 | .DS_Store 6 | 7 | # hidden files/folders for local config (IDE etc.) 8 | .vscode 9 | tags 10 | 11 | # project data that shouldn't be committed 12 | models 13 | data 14 | 15 | # other 16 | *tmp* 17 | plot -------------------------------------------------------------------------------- /Applications/CanaryRemoval/AlicePreprocessing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import re 3 | from tensorflow.keras.utils import to_categorical 4 | from nltk.tokenize import word_tokenize, sent_tokenize 5 | 6 | 7 | def load_data(filename, seq_length, canary, canary_insertions, int2char=None): 8 | np.random.seed(42) 9 | # load ascii text, insert canary and covert to lowercase 10 | raw_text = open(filename, 'r', encoding='utf-8').read()[265:] 11 | raw_text = insert_canary(raw_text, canary, canary_insertions) 12 | raw_text = raw_text.lower() 13 | chars = sorted(list(set(raw_text))) 14 | # create mapping of unique chars to integers, and a reverse mapping 15 | if int2char is not None: 16 | char2int = {v:k for k,v in int2char.items()} 17 | else: 18 | char2int = dict((c, i) for i, c in enumerate(chars)) 19 | int2char = {v:k for k,v in char2int.items()} 20 | n_chars = len(raw_text) 21 | # summarize the loaded data 22 | # prepare the dataset of input to output pairs encoded as integers 23 | dataX = [] 24 | dataY = [] 25 | for i in range(0, n_chars - seq_length, 1): 26 | seq_in = raw_text[i:i + seq_length] 27 | seq_out = raw_text[i + seq_length] 28 | dataX.append([char2int[char] for char in seq_in]) 29 | dataY.append(char2int[seq_out]) 30 | n_patterns = len(dataX) 31 | # reshape X to be [samples, time steps, features] 32 | X = np.reshape(dataX, (n_patterns, seq_length, 1)) 33 | # one hot encode the output variable 34 | y = to_categorical(dataY) 35 | return X, y, int2char 36 | 37 | 38 | def insert_canary(text, canary, n_insertions): 39 | if n_insertions == 0: 40 | return text 41 | canary_len = 4 # 2 newlines + 2 spaces 42 | breaks = [m.start() for m in re.finditer('\n\n \w', text)] 43 | insertion_points = sorted(np.random.choice(breaks, n_insertions, replace=False)) 44 | new_text = '' 45 | for idx in range(len(insertion_points)): 46 | point_pre = insertion_points[idx-1]+canary_len if idx != 0 else 0 47 | point_last = insertion_points[idx]+canary_len 48 | new_text += text[point_pre:point_last] + canary 49 | new_text += text[point_last:] 50 | return new_text 51 | 52 | 53 | def get_words(filename): 54 | # load ascii text and covert to lowercase 55 | raw_text = open(filename, 'r', encoding='utf-8').read()[265:] 56 | raw_text = raw_text.lower() 57 | sentences = sent_tokenize(raw_text) 58 | words_l = [word_tokenize(s) for s in sentences] 59 | all_words = [w for ws in words_l for w in ws] 60 | n_words = len(all_words) 61 | words, word_count = np.unique(all_words, return_counts=True) 62 | word_counts = {w:wc for w,wc in zip(words, word_count)} 63 | #word_counts_rel = {k:v/n_words for k,v in word_counts.items()} 64 | #word_counts_rel_sorted = {k:v for k,v in sorted(word_counts_rel.items(), key= lambda x: x[1], reverse=True)} 65 | #random_words = np.random.choice(all_words, 10, replace=False) 66 | return words, word_counts -------------------------------------------------------------------------------- /Applications/FeatureUnlearning/DataLoader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import scipy.sparse as sp 4 | import pickle as pkl 5 | from sklearn.preprocessing import normalize 6 | 7 | 8 | class DataLoader: 9 | ''' 10 | Simple class to load training and test data for linear unlearning experiments 11 | ''' 12 | def __init__(self, dataset_name, normalize_data=False): 13 | assert dataset_name in ['Adult', 'Diabetis', 'Enron', 'Drebin'] 14 | datapaths = {'Adult': '../train_test_data/Adult', 15 | 'Diabetis': '../train_test_data/Diabetis', 16 | 'Enron': '../train_test_data/Enron', 17 | 'Drebin': '../train_test_data/Drebin', 18 | } 19 | x_train_name, x_test_name, y_train_name, y_test_name = 'x_train', 'x_test', 'y_train.npy', 'y_test.npy' 20 | voc_name, features_name = 'voc.pkl', 'relevant_features.txt' 21 | ending = '.npz' if dataset_name in ['Drebin', 'Enron'] else '.npy' 22 | x_train_name, x_test_name = x_train_name+ending, x_test_name+ending 23 | data_loading_fun = np.load if ending == '.npy' else sp.load_npz 24 | self.name = dataset_name 25 | self.x_train = data_loading_fun(os.path.join(datapaths[dataset_name], x_train_name)) 26 | self.x_test = data_loading_fun(os.path.join(datapaths[dataset_name], x_test_name)) 27 | self.y_train = np.load(os.path.join(datapaths[dataset_name], y_train_name)) 28 | self.y_test = np.load(os.path.join(datapaths[dataset_name], y_test_name)) 29 | self.voc = pkl.load(open(os.path.join(datapaths[dataset_name], voc_name), 'rb')) 30 | self.relevant_features = open(os.path.join(datapaths[dataset_name], features_name)).read().splitlines() 31 | if dataset_name == 'Adult': 32 | self.category_to_idx_dict = pkl.load(open(os.path.join(datapaths['Adult'], 'category_dict_adult.pkl'), 'rb')) 33 | else: 34 | self.category_to_idx_dict = None 35 | if normalize_data: 36 | self.x_train = normalize(self.x_train, norm='l2') 37 | self.x_test = normalize(self.x_test, norm='l2') 38 | -------------------------------------------------------------------------------- /Applications/FeatureUnlearning/LinearEnsembleExperiments.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | 4 | sys.path.append(('../../')) 5 | 6 | from Unlearner.DPLRUnlearner import DPLRUnlearner 7 | from Unlearner.EnsembleLR import LinearEnsemble 8 | import numpy as np 9 | 10 | 11 | def split_train_data(n_shards, train_data, indices_to_delete=None, remove=False, n_replacements=0, seed=42): 12 | x_train, y_train = train_data 13 | shard_indice_choices = list(range(n_shards)) 14 | sample_indice_choices = list(range(train_data[0].shape[0])) 15 | if indices_to_delete is not None: 16 | _, affected_indices = copy_and_replace(train_data[0], indices_to_delete, remove, n_replacements) 17 | sample_indice_choices = [i for i in sample_indice_choices if i not in affected_indices] 18 | x_train, y_train = x_train[sample_indice_choices], y_train[sample_indice_choices] 19 | np.random.seed(seed) 20 | shard_indices = np.random.choice(shard_indice_choices, len(sample_indice_choices), replace=True) 21 | splits, indices = [], [] 22 | for i in range(n_shards): 23 | data_indices = np.where(shard_indices == i)[0] 24 | splits.append((x_train[data_indices], y_train[data_indices])) 25 | indices.append(data_indices) 26 | return splits, indices 27 | 28 | 29 | def create_models(lambda_, sigma, data_splits, data_indices, test_data): 30 | models = [[DPLRUnlearner(split, test_data, {}, 0.1, 0.1, sigma, lambda_), indices] for split,indices in 31 | zip (data_splits, data_indices)] 32 | return models 33 | 34 | 35 | def split_and_train(train_data, test_data, n_shards, lambda_, sigma, indices_to_delete=None, remove=False, n_replacements=0): 36 | train_data_splits, data_indices = split_train_data(n_shards, train_data, indices_to_delete, remove, n_replacements) 37 | models = create_models(lambda_, sigma, train_data_splits, data_indices, test_data) 38 | ensemble = LinearEnsemble(models, n_classes=2) 39 | start_time = time.time() 40 | ensemble.train_ensemble() 41 | end_time = time.time() 42 | runtime = end_time-start_time 43 | _, acc = ensemble.evaluate(*test_data) 44 | return acc, runtime 45 | 46 | 47 | def copy_and_replace(x, indices, remove=False, n_replacements=0): 48 | """ 49 | Helper function that sets 'indices' in 'arr' to 'value' 50 | :param x - numpy array or csr_matrix of shape (n_samples, n_features) 51 | :param indices - the columns where the replacement should take place 52 | :param remove - if true the entire columns will be deleted (set to zero). Otherwise values will be set to random value 53 | :param n_replacements - if remove is False one can specify how many samples are adjusted. 54 | :return copy of arr with changes, changed row indices 55 | """ 56 | x_cpy = x.copy() 57 | if remove: 58 | relevant_indices = x_cpy[:, indices].nonzero()[0] 59 | # to avoid having samples more than once 60 | relevant_indices = np.unique(relevant_indices) 61 | x_cpy[:, indices] = 0 62 | else: 63 | relevant_indices = np.random.choice(x_cpy.shape[0], n_replacements, replace=False) 64 | unique_indices = set(np.unique(x_cpy[:, indices]).tolist()) 65 | if unique_indices == {0, 1}: 66 | # if we have only binary features we flip them 67 | x_cpy[np.ix_(relevant_indices, indices)] = - 2*x_cpy[np.ix_(relevant_indices, indices)] + 1 68 | else: 69 | # else we choose random values 70 | for idx in indices: 71 | random_values = np.random.choice(x_cpy[:, idx], n_replacements, replace=False) 72 | x_cpy[relevant_indices, idx] = random_values 73 | return x_cpy, relevant_indices 74 | -------------------------------------------------------------------------------- /Applications/Poisoning/configs/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | class Config(dict): 5 | """ Persistable dictionary (JSON) to store experiment configs. """ 6 | def __init__(self, filename, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | self._filename = filename 9 | 10 | def save(self): 11 | with open(self._filename, 'w') as f: 12 | data = {k: v for k, v in self.items()} 13 | data['_filename'] = self._filename 14 | json.dump(data, f, indent=4) 15 | 16 | @classmethod 17 | def from_json(cls, filename): 18 | with open(filename, 'r') as f: 19 | data = json.load(f) 20 | if '_filename' in data: 21 | filename = data.pop('_filename') 22 | return cls(filename, **data) 23 | -------------------------------------------------------------------------------- /Applications/Poisoning/configs/demo/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | class Config(dict): 5 | """ Persistable dictionary (JSON) to store experiment configs. """ 6 | def __init__(self, filename, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | self._filename = filename 9 | 10 | def save(self): 11 | with open(self._filename, 'w') as f: 12 | data = {k: v for k, v in self.items()} 13 | data['_filename'] = self._filename 14 | json.dump(data, f, indent=4) 15 | 16 | @classmethod 17 | def from_json(cls, filename): 18 | with open(filename, 'r') as f: 19 | data = json.load(f) 20 | if '_filename' in data: 21 | filename = data.pop('_filename') 22 | return cls(filename, **data) 23 | -------------------------------------------------------------------------------- /Applications/Poisoning/configs/demo/poison.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed": [42], 3 | "budget": [10000] 4 | } -------------------------------------------------------------------------------- /Applications/Poisoning/configs/demo/train.json: -------------------------------------------------------------------------------- 1 | { 2 | "epochs": 100, 3 | "batch_size": 64, 4 | "model_size": 512 5 | } -------------------------------------------------------------------------------- /Applications/Poisoning/configs/demo/unlearn.json: -------------------------------------------------------------------------------- 1 | { 2 | "sharding": { 3 | "n_shards": [10] 4 | }, 5 | "first-order": { 6 | "tau": [2e-5], 7 | "steps": [10] 8 | }, 9 | "second-order": { 10 | "hvp_batch_size": [1024], 11 | "damping": [1e-4], 12 | "scale": [2e5], 13 | "repititions": [1], 14 | "patience": [20], 15 | "steps": [10] 16 | }, 17 | "fine-tuning": { 18 | "epochs": [1] 19 | } 20 | } -------------------------------------------------------------------------------- /Applications/Poisoning/configs/poison.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed": [42, 43, 44, 45, 46], 3 | "budget": [2500, 5000, 7500, 10000] 4 | } -------------------------------------------------------------------------------- /Applications/Poisoning/configs/train.json: -------------------------------------------------------------------------------- 1 | { 2 | "epochs": 100, 3 | "batch_size": 64, 4 | "model_size": 512 5 | } -------------------------------------------------------------------------------- /Applications/Poisoning/configs/unlearn.json: -------------------------------------------------------------------------------- 1 | { 2 | "sharding": { 3 | "n_shards": [5, 10, 20] 4 | }, 5 | "first-order": { 6 | "tau": [2e-5], 7 | "steps": [1] 8 | }, 9 | "second-order": { 10 | "hvp_batch_size": [1024], 11 | "damping": [1e-4], 12 | "scale": [2e5], 13 | "repititions": [1], 14 | "patience": [20], 15 | "steps": [1] 16 | }, 17 | "fine-tuning": { 18 | "epochs": [1, 10] 19 | } 20 | } -------------------------------------------------------------------------------- /Applications/Poisoning/dataset.py: -------------------------------------------------------------------------------- 1 | from conf import BASE_DIR 2 | 3 | import numpy as np 4 | 5 | 6 | class Cifar10(object): 7 | dataset_dir = BASE_DIR/'train_test_data'/'Cifar' 8 | 9 | def __init__(self, train=None, test=None, validation=None): 10 | if train is not None: 11 | self.x_train = train[0] 12 | self.y_train = train[1] 13 | if test is not None: 14 | self.x_test = test[0] 15 | self.y_test = test[1] 16 | if validation is not None: 17 | self.x_valid = validation[0] 18 | self.y_valid = validation[1] 19 | 20 | @classmethod 21 | def load(cls): 22 | x_train, x_test = np.load(cls.dataset_dir/'x_train.npy'), np.load(cls.dataset_dir/'x_test.npy') 23 | x_valid = np.load(cls.dataset_dir/'x_valid.npy') 24 | y_train, y_test = np.load(cls.dataset_dir/'y_train.npy'), np.load(cls.dataset_dir/'y_test.npy') 25 | y_valid = np.load(cls.dataset_dir/'y_valid.npy') 26 | return (x_train, y_train), (x_test, y_test), (x_valid, y_valid) 27 | -------------------------------------------------------------------------------- /Applications/Poisoning/export_results.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import argparse 4 | from zipfile import ZipFile 5 | 6 | import pandas as pd 7 | 8 | 9 | def get_parser(): 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument('result_path', type=str, help='directory or zip file with unlearning results') 12 | parser.add_argument('out_file', type=str, help='output file') 13 | return parser 14 | 15 | 16 | def data_to_df(result_path): 17 | zipped_results = result_path.endswith('.zip') 18 | columns = ['# Poisoned Labels', 'method', 'seed', 'acc_before', 'ACC after fix', '# Gradients', 'Time (s)'] 19 | budgets = [2500, 5000, 7500, 10000] 20 | seeds = [42, 43, 44, 45, 46] 21 | methods = ['first-order', 'second-order', 'fine-tuning-1', 22 | 'fine-tuning-10', 'sharding-5', 'sharding-10', 'sharding-20'] 23 | data = [] 24 | for budget in budgets: 25 | for seed in seeds: 26 | for method in methods: 27 | in_file = os.path.join(f'budget-{budget}', f'seed-{seed}', method, 'unlearning_results.json') 28 | if zipped_results: 29 | with ZipFile(result_path, 'r') as z: 30 | prefix = z.namelist()[0].strip(os.sep) 31 | in_file = os.path.join(prefix, in_file) 32 | if in_file not in z.namelist(): 33 | print("missing: ", in_file) 34 | continue 35 | with z.open(in_file) as f: 36 | res = json.load(f) 37 | else: 38 | in_file = os.path.join(result_path, in_file) 39 | if not os.path.exists(in_file): 40 | print("missing: ", in_file) 41 | continue 42 | with open(in_file, 'r') as f: 43 | res = json.load(f) 44 | data.append((budget, method, seed, res['acc_before_fix'], res['acc_after_fix'], 45 | res.get('n_gradients', -1), res['unlearning_duration_s'])) 46 | return pd.DataFrame(data, columns=columns) 47 | 48 | 49 | def main(result_path, out_file): 50 | df = data_to_df(result_path) 51 | df.to_csv(out_file) 52 | 53 | 54 | if __name__ == '__main__': 55 | args = get_parser().parse_args() 56 | main(**vars(args)) 57 | -------------------------------------------------------------------------------- /Applications/Poisoning/gen_configs.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | from Applications.Poisoning.configs.config import Config 5 | 6 | from sklearn.model_selection import ParameterGrid 7 | 8 | 9 | def get_parser(): 10 | parser = argparse.ArgumentParser("gen_configs", description="Generate experiment configurations.") 11 | parser.add_argument("base_folder", type=str, help="base directory to save models and results in") 12 | parser.add_argument("train_conf", type=str, help="file with all training parameters to test") 13 | parser.add_argument("poison_conf", type=str, help="file with all poisoning parameters to test") 14 | parser.add_argument("unlearn_conf", type=str, help="file with all unlearning parameters to test") 15 | return parser 16 | 17 | 18 | def gen_param_grid(base_dir, train_params, poison_params, unlearn_params): 19 | train_params = Config.from_json(train_params) 20 | poison_params = Config.from_json(poison_params) 21 | unlearn_params = Config.from_json(unlearn_params) 22 | 23 | for p_poison in ParameterGrid(poison_params): 24 | budget = p_poison['budget'] 25 | seed = p_poison['seed'] 26 | model_folder = f"{base_dir}/budget-{budget}/seed-{seed}" 27 | os.makedirs(model_folder, exist_ok=True) 28 | Config(f"{model_folder}/train_config.json", **train_params).save() 29 | Config(f"{model_folder}/poison_config.json", **p_poison).save() 30 | for mode in unlearn_params: 31 | for p_unlearn in ParameterGrid(unlearn_params[mode]): 32 | if mode == 'sharding': 33 | n_shards = p_unlearn['n_shards'] 34 | model_subdir = f"{model_folder}/{mode}-{n_shards}" 35 | elif mode == 'fine-tuning': 36 | n_shards = p_unlearn['epochs'] 37 | model_subdir = f"{model_folder}/{mode}-{n_shards}" 38 | else: 39 | model_subdir = f"{model_folder}/{mode}" 40 | os.makedirs(model_subdir, exist_ok=True) 41 | Config(f"{model_subdir}/unlearn_config.json", **p_unlearn).save() 42 | 43 | # add clean model (budget == 0) 44 | model_folder = f"{base_dir}/clean" 45 | os.makedirs(model_folder, exist_ok=True) 46 | Config(f"{model_folder}/train_config.json", **train_params).save() 47 | 48 | 49 | def main(base_folder, train_conf, poison_conf, unlearn_conf): 50 | gen_param_grid(base_folder, train_conf, poison_conf, unlearn_conf) 51 | 52 | 53 | if __name__ == '__main__': 54 | args = get_parser().parse_args() 55 | main(**vars(args)) 56 | -------------------------------------------------------------------------------- /Applications/Poisoning/model.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.models import Sequential 2 | from tensorflow.keras.losses import categorical_crossentropy 3 | from tensorflow.keras.layers import Conv2D, Dense, Dropout, MaxPooling2D, Flatten, BatchNormalization 4 | from tensorflow.keras.optimizers import Adam, SGD 5 | 6 | 7 | CIFAR_SHAPE = (32, 32, 3) 8 | 9 | 10 | def get_VGG_CIFAR10(input_shape=CIFAR_SHAPE, weight_path=None, lr_init=0.001, dense_units=512, sgd=False): 11 | n_filters = [128, 128, 128, 128, 128, 128] 12 | conv_params = dict(activation='relu', kernel_size=3, 13 | kernel_initializer='he_uniform', padding='same') 14 | 15 | model = Sequential() 16 | # VGG block 1 17 | model.add(Conv2D(filters=n_filters[0], input_shape=input_shape, **conv_params)) 18 | model.add(BatchNormalization()) 19 | model.add(Conv2D(filters=n_filters[1], **conv_params)) 20 | model.add(BatchNormalization()) 21 | model.add(MaxPooling2D(pool_size=(2, 2))) 22 | model.add(Dropout(0.1)) 23 | # VGG block 2 24 | model.add(Conv2D(filters=n_filters[2], **conv_params)) 25 | model.add(BatchNormalization()) 26 | model.add(Conv2D(filters=n_filters[3], **conv_params)) 27 | model.add(BatchNormalization()) 28 | model.add(MaxPooling2D(pool_size=(2, 2))) 29 | model.add(Dropout(0.1)) 30 | # VGG block 3 31 | model.add(Conv2D(filters=n_filters[4], **conv_params)) 32 | model.add(BatchNormalization()) 33 | model.add(Conv2D(filters=n_filters[5], **conv_params)) 34 | model.add(BatchNormalization()) 35 | model.add(MaxPooling2D(pool_size=(2, 2))) 36 | model.add(Dropout(0.2)) 37 | 38 | # dense and final layers 39 | model.add(Flatten()) 40 | model.add(Dense(dense_units, activation='relu', kernel_initializer='he_uniform')) 41 | model.add(BatchNormalization()) 42 | # model.add(Dropout(0.3)) 43 | model.add(Dense(units=10, activation='softmax')) 44 | 45 | # compile model, optionally load weights 46 | if sgd: 47 | model.compile(optimizer=SGD(learning_rate=lr_init), loss=categorical_crossentropy, metrics='accuracy') 48 | else: 49 | model.compile(optimizer=Adam(learning_rate=lr_init, amsgrad=True), 50 | loss=categorical_crossentropy, metrics='accuracy') 51 | print(model.summary()) 52 | if weight_path is not None: 53 | model.load_weights(weight_path) 54 | return model 55 | -------------------------------------------------------------------------------- /Applications/Poisoning/poison/injector.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | from functools import partial 4 | 5 | import numpy as np 6 | from tensorflow.keras.utils import to_categorical 7 | 8 | from Applications.Poisoning.poison.label_flip import flip_labels 9 | from Applications.Poisoning.poison.patterns import cross_pattern, distributed_pattern, noise_pattern, feature_pattern 10 | from Applications.Poisoning.poison.patterns import dump_pattern, add_pattern 11 | 12 | 13 | class Injector(object): 14 | """ Inject some kind of error in the training data and maintain the information where it has been injected. """ 15 | persistable_keys = [] 16 | 17 | def load(self, filename): 18 | with open(filename, 'rb') as pkl: 19 | state = pickle.load(pkl) 20 | for key, value in zip(self.persistable_keys, state): 21 | self.__setattr__(key, value) 22 | 23 | def save(self, filename): 24 | with open(filename, 'wb') as pkl: 25 | state = [self.__getattribute__(key) for key in self.persistable_keys] 26 | pickle.dump(state, pkl) 27 | 28 | def inject(self, X, Y): 29 | raise NotImplementedError('Must be implemented by sub-class.') 30 | 31 | @classmethod 32 | def from_pickle(cls, filename): 33 | with open(filename, 'rb') as pkl: 34 | return cls(*pickle.load(pkl)) 35 | 36 | 37 | class DummyInjector(Injector): 38 | """ Used for compatibility with backdoor experiments. Injects nothing (clean data). """ 39 | persistable_keys = [] 40 | 41 | def __init__(self, **kwargs): 42 | super().__init__() 43 | 44 | def inject(self, X, Y): 45 | return X, Y 46 | 47 | 48 | class LabelflipInjector(Injector): 49 | persistable_keys = ['model_folder', 'budget', 'seed', 'injected_idx', 'class_offset'] 50 | 51 | def __init__(self, model_folder, budget=200, seed=42, injected_idx=None, class_offset=None): 52 | super().__init__() 53 | self.model_folder = model_folder 54 | self.budget = budget 55 | self.seed = seed 56 | self.injected_idx = injected_idx 57 | self.class_offset = class_offset 58 | 59 | def inject(self, X, Y): 60 | Y, self.injected_idx = flip_labels(Y, self.budget, self.seed) 61 | return X, Y 62 | 63 | 64 | class BackdoorInjector(Injector): 65 | PATTERN_TEMPLATE = './Applications/backdoor_patterns/cifar_{}.png' 66 | persistable_keys = ['model_folder', 'pattern_name', 'n_backdoors', 'source', 'target', 'seed', 'injected_idx'] 67 | 68 | def __init__(self, model_folder, pattern_name='cross', n_backdoors=0, source=-1, target=0, seed=42, injected_idx=None): 69 | super().__init__() 70 | if source == target: 71 | raise ValueError(f'Source and target may not be identical! Got source={source} and target={target}') 72 | 73 | self.model_folder = model_folder 74 | self.filepath = f'{model_folder}/injector.pkl' 75 | if os.path.exists(self.filepath): 76 | self.load(self.filepath) 77 | else: 78 | self.pattern_name = pattern_name 79 | self.n_backdoors = n_backdoors 80 | self.source = source 81 | self.target = target 82 | self.seed = seed 83 | self.orig_samples = None 84 | self.injected_idx = injected_idx 85 | self.save(self.filepath) 86 | 87 | self.pattern_file = BackdoorInjector.PATTERN_TEMPLATE.format(pattern_name) 88 | pattern_dir = os.path.dirname(BackdoorInjector.PATTERN_TEMPLATE) 89 | os.makedirs(pattern_dir, exist_ok=True) 90 | 91 | def get_bd_pattern(self, img_shape, **pattern_kwargs): 92 | """ Get one of the implemented patterns. """ 93 | pattern_gen = { 94 | 'cross': cross_pattern, 95 | 'cross-offset': partial(cross_pattern, offset=2), 96 | 'white-cross-offset-bg': partial(cross_pattern, cross_value=1.0, offset=2, black_bg=True), 97 | 'checkerboard-offset-bg': partial(cross_pattern, cross_size=2, offset=2, black_bg=True), 98 | 'cross-centered': partial(cross_pattern, center=True), 99 | 'cross-centered-large': partial(cross_pattern, center=True, cross_size=5), 100 | 'distributed': distributed_pattern, 101 | 'noise': noise_pattern, 102 | 'feat-25': partial(feature_pattern, n_feat=25), 103 | 'feat-50': partial(feature_pattern, n_feat=50), 104 | 'feat-75': partial(feature_pattern, n_feat=75), 105 | 'feat-100': partial(feature_pattern, n_feat=100) 106 | } 107 | 108 | if self.pattern_name in pattern_gen: 109 | backdoor_pattern = pattern_gen[self.pattern_name](img_shape, **pattern_kwargs) 110 | else: 111 | # TODO: implement more backdoor patterns 112 | raise NotImplementedError(f'Other backdoor patterns than {", ".join(pattern_gen)} are not implemented yet.') 113 | if not os.path.exists(self.pattern_file): 114 | dump_pattern(backdoor_pattern[0], self.pattern_file) 115 | return backdoor_pattern 116 | 117 | def inject(self, X, Y, bd_idx=None): 118 | """ Injects backdoors into the dataset of an unlearner, optionally excluding a label. """ 119 | X = np.copy(X) 120 | Y = np.copy(Y) 121 | if self.seed is None: 122 | seed = self.seed 123 | np.random.seed(seed) 124 | img_shape = list(X.shape) 125 | img_shape[0] = 1 # shape of single image (for broadcasting later) 126 | n_classes = Y.shape[-1] 127 | if self.source == -1: 128 | injectable_idx = np.argwhere(np.argmax(Y, axis=1) != self.target)[:, 0] 129 | else: 130 | injectable_idx = np.argwhere(np.argmax(Y, axis=1) == self.source)[:, 0] 131 | if len(Y.shape) < 2: 132 | Y = to_categorical(Y, num_classes=n_classes) 133 | if self.n_backdoors == -1: 134 | n_backdoors = injectable_idx.shape[0] 135 | else: 136 | n_backdoors = min(injectable_idx.shape[0], self.n_backdoors) 137 | if n_backdoors > 0: 138 | if bd_idx is not None: 139 | backdoor_indices = bd_idx 140 | else: 141 | backdoor_indices = np.random.choice(injectable_idx, n_backdoors, replace=False) 142 | bd_pattern = self.get_bd_pattern(img_shape) 143 | orig_samples = X[backdoor_indices] 144 | backdoor_samples = add_pattern(X[backdoor_indices], bd_pattern) 145 | X[backdoor_indices] = backdoor_samples 146 | Y[backdoor_indices] = to_categorical(self.target, num_classes=n_classes) 147 | else: 148 | backdoor_indices = np.array([]) 149 | orig_samples = np.zeros((0, *img_shape[1:])) 150 | return X, Y, backdoor_indices, orig_samples 151 | 152 | def inject_train(self, unlearner, bd_idx=None): 153 | X, Y, bd_idx, orig_samples = self.inject( 154 | unlearner.x_train, unlearner.y_train, self.n_backdoors, seed=self.seed, bd_idx=bd_idx) 155 | unlearner.x_train = X 156 | unlearner.y_train = Y 157 | unlearner.injected_idx = bd_idx 158 | self.injected_idx = bd_idx 159 | self.train_orig = orig_samples 160 | 161 | def inject_validation(self, unlearner): 162 | X, _, bd_idx, orig_samples = self.inject( 163 | unlearner.x_valid, unlearner.y_valid.copy(), n_backdoors=-1, seed=self.seed) 164 | unlearner.x_valid = X 165 | self.bd_idx_valid = bd_idx 166 | self.valid_orig = orig_samples 167 | 168 | def add_backdoor(self, X): 169 | """ Injects backdoors into all provided samples. """ 170 | img_shape = list(X.shape) 171 | img_shape[0] = 1 # shape of single image (for broadcasting later) 172 | bd_pattern = self.get_bd_pattern(img_shape) 173 | X = add_pattern(X, bd_pattern) 174 | return X 175 | 176 | def remove_backdoors(self, X, filter_idx=None): 177 | """ 178 | Restore the original samples rather than substracting a pattern 179 | (potentially leaving a negative pattern due to clipping during backdoor insertion). 180 | """ 181 | if filter_idx is None: 182 | X[self.injected_idx] = self.train_orig 183 | else: 184 | X[self.injected_idx[filter_idx]] = self.train_orig[filter_idx] 185 | return X 186 | -------------------------------------------------------------------------------- /Applications/Poisoning/poison/label_flip.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | import numpy as np 5 | from tensorflow.keras.losses import categorical_crossentropy 6 | 7 | from Applications.Poisoning.dataset import Cifar10 8 | from Applications.Poisoning.model import get_VGG_CIFAR10 9 | from Applications.Poisoning.train import train 10 | from util import UnlearningResult, MixedResult, GradientLoggingContext, LabelFlipResult, save_train_results 11 | 12 | 13 | def search_flip_samples(model, data, budget): 14 | (x_train, y_train), _, _ = data 15 | # 1. for each sample, evaluate loss towards all classes 16 | preds = model.predict(x_train) 17 | n_classes = y_train.shape[1] 18 | y_all = np.eye(n_classes) 19 | losses = np.zeros(len(x_train)) 20 | targets = np.zeros((len(x_train), n_classes)) 21 | for i in range(len(preds)): 22 | pred = np.tile(preds[i], n_classes).reshape(n_classes, -1) 23 | loss = categorical_crossentropy(pred, y_all) 24 | maxloss_idx = np.argmax(loss) 25 | losses[i] = loss[maxloss_idx] 26 | targets[i, maxloss_idx] = 1 27 | 28 | # 2. greedy search over largest loss values until `budget` samples are selected 29 | sort_idx = np.argsort(losses)[::-1] 30 | return sort_idx[:budget], targets[sort_idx[:budget]] 31 | 32 | 33 | def flip_max_loss(model, data, budget=200): 34 | flip_idx, targets = search_flip_samples(model, data, budget) 35 | (x_train, y_train), _, _ = data 36 | y_train[flip_idx] = targets 37 | return ((x_train, y_train), data[1], data[2]) 38 | 39 | 40 | def create_rand_offset(Y, seed): 41 | n_classes = Y.shape[1] 42 | np.random.seed(seed) 43 | rand_offset = np.random.randint(0, n_classes, size=n_classes) 44 | return rand_offset 45 | 46 | 47 | def _flip_labels(Y, rand_offset): 48 | n_classes = Y.shape[1] 49 | y_flip = np.argmax(Y, axis=1) 50 | rand_offset = rand_offset[y_flip] 51 | y_flip = (y_flip + rand_offset) % n_classes 52 | y_onehot = np.zeros((len(y_flip), n_classes)) 53 | y_onehot[range(len(y_onehot)), y_flip] = 1 54 | return y_onehot 55 | 56 | 57 | def flip_labels(Y, budget=200, seed=42, target=-1, verbose=False): 58 | np.random.seed(seed) 59 | idx = np.random.permutation(len(Y)) 60 | if target >= 0: 61 | idx = idx[np.argwhere(np.argmax(Y[idx], axis=1) == target)[:, 0]] 62 | 63 | # map to pairs ((0, 9), (1, 8), ...) 64 | sources = list(range(10)) 65 | targets = sources[::-1] 66 | idx_list = [] 67 | Y_orig = Y.copy() 68 | budget //= 10 # 10th of the budget for each pair 69 | for s, t in zip(sources, targets): 70 | _idx = idx[np.argwhere(np.argmax(Y_orig[idx], axis=1) == s)[:, 0]][:budget] 71 | label = np.eye(10)[t].reshape(1, 10) 72 | if verbose: 73 | print(f">> flipping {len(_idx)} labels from {s} to {t}") 74 | Y[_idx] = label 75 | idx_list.append(_idx) 76 | idx = np.concatenate(idx_list, axis=0) 77 | if verbose: 78 | print(f">>> injected {len(idx)} flips into {len(Y)} labels") 79 | return Y, idx 80 | 81 | 82 | def get_parser(): 83 | parser = argparse.ArgumentParser("label_flip", description="Poison models using label flipping and measure backdoor success.") 84 | parser.add_argument("model_folder", type=str, help="Where to save models.") 85 | parser.add_argument("--batch_size", type=int, help="Batch size for training.", default=64) 86 | parser.add_argument("--lr_init", type=float, help="Initial learning rate.", default=1e-4) 87 | parser.add_argument("--epochs", type=int, help="No epochs to train.", default=100) 88 | parser.add_argument('--budget', type=int, 89 | help='Number of training data with injected backdoor', default=200) 90 | parser.add_argument('--base_seed', type=int, help='Base seed', default=42) 91 | parser.add_argument('--n_repititions', type=int, help='Number of random source/target pairs to generate.', 92 | default=1) 93 | return parser 94 | 95 | 96 | def main(model_folder, budget, batch_size=64, lr_init=1e-4, 97 | epochs=100, base_seed=42, n_repititions=1): 98 | train_kwargs = dict(batch_size=batch_size, lr_init=lr_init, epochs=epochs) 99 | 100 | # train and evaluate clean model as reference 101 | # TODO 102 | # train_clean(os.path.join(model_folder, 'clean-model'), skip_existing=True, **train_kwargs) 103 | 104 | # train models on poisoned data 105 | for i in range(n_repititions): 106 | seed = base_seed + i 107 | model_folder = os.path.join(model_folder, f'budget-{budget}', f'seed{seed}') 108 | data = Cifar10().load() 109 | eval_flipped_model(data, get_VGG_CIFAR10, model_folder, budget, seed, **train_kwargs) 110 | 111 | 112 | def eval_flipped_model(data, model_init, model_folder, budget, seed=42, **train_kwargs): 113 | os.makedirs(model_folder, exist_ok=True) 114 | result = LabelFlipResult(model_folder) 115 | if result.exists: 116 | return 117 | (x_train, y_train), (x_test, y_test), (x_valid, y_valid) = data 118 | y_train_orig = y_train.copy() 119 | y_train, _ = flip_labels(y_train, create_rand_offset(y_train, seed), budget, seed) 120 | 121 | weight_path = os.path.join(model_folder, 'best_model.hdf5') 122 | if not os.path.exists(weight_path): 123 | train(model_init, model_folder, data, **train_kwargs) 124 | save_train_results(model_folder) 125 | flipped_model = model_init(weight_path=weight_path) 126 | 127 | retrain_folder = os.path.join(model_folder, 'retraining') 128 | retrain_weights = os.path.join(retrain_folder, 'best_model.hdf5') 129 | os.makedirs(retrain_folder, exist_ok=True) 130 | y_train = y_train_orig 131 | train(model_init, retrain_folder, data, **train_kwargs) 132 | save_train_results(retrain_folder) 133 | print(f'Retraining model finished') 134 | retrained_model = model_init(weight_path=retrain_weights) 135 | 136 | # flipped model on clean validation data vs. retrained model 137 | flipped_acc = flipped_model.evaluate(x_valid, y_valid, verbose=0)[1] 138 | retrained_acc = retrained_model.evaluate(x_valid, y_valid, verbose=0)[1] 139 | print(f'Results: retrained_acc={retrained_acc}, flipped_acc={flipped_acc}') 140 | 141 | result.update({ 142 | 'retrained_acc': retrained_acc, 143 | 'flipped_acc': flipped_acc 144 | }) 145 | result.save() 146 | 147 | 148 | if __name__ == '__main__': 149 | args = get_parser().parse_args() 150 | main(**vars(args)) 151 | -------------------------------------------------------------------------------- /Applications/Poisoning/poison/patterns.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | def cross_pattern(img_shape, cross_size=2, cross_value=0.5, center=False, offset=0): 6 | """Simple backdoor pattern: cross pattern (X) in the lower right corner""" 7 | backdoor_pattern = np.zeros(img_shape) 8 | _, rows, cols, _ = img_shape 9 | if center: 10 | row_anchor = rows // 2 11 | col_anchor = cols // 2 12 | elif offset > 0: 13 | row_anchor = rows - offset 14 | col_anchor = cols - offset 15 | else: 16 | row_anchor = rows 17 | col_anchor = cols 18 | 19 | for i in range(cross_size + 1): 20 | # moving from bottom right to top left 21 | backdoor_pattern[0, row_anchor - 1 - i, col_anchor - 1 - i, :] = cross_value 22 | # moving from bottom left to top right 23 | backdoor_pattern[0, row_anchor - 1 - i, col_anchor - 1 - cross_size + i, :] = cross_value 24 | return backdoor_pattern 25 | 26 | 27 | def distributed_pattern(img_shape, n_pixels=10, pixel_value=0.5, seed=42): 28 | """Distributed backdoor pattern: `n_pixels` random pixels get changed. """ 29 | backdoor_pattern = np.zeros(img_shape) 30 | _, rows, cols, _ = img_shape 31 | np.random.seed(seed) 32 | bd_pixels = np.random.randint(low=0, high=rows, size=(n_pixels, 2)) 33 | backdoor_pattern[0, bd_pixels[:, 0], bd_pixels[:, 1], :] = pixel_value 34 | return backdoor_pattern 35 | 36 | 37 | def feature_pattern(img_shape, n_feat=10, pixel_value=1.0, seed=42): 38 | """Distributed feature backdoor pattern: `n_feat` random features get changed. """ 39 | _, rows, cols, channels = img_shape 40 | np.random.seed(seed) 41 | backdoor_pattern = np.zeros(np.product(img_shape)) 42 | bd_feat = np.random.randint(low=0, high=backdoor_pattern.shape[0], size=n_feat) 43 | backdoor_pattern[bd_feat] = pixel_value 44 | backdoor_pattern = backdoor_pattern.reshape(img_shape) 45 | return backdoor_pattern 46 | 47 | 48 | def noise_pattern(img_shape, l_inf_norm=0.1, seed=42): 49 | """Noise backdoor pattern: generate uniform noise with bounded infinity norm. """ 50 | np.random.seed(seed) 51 | _, rows, cols, channels = img_shape 52 | backdoor_pattern = np.random.uniform(low=0.0, high=l_inf_norm, size=(1, rows, cols, channels)) 53 | return backdoor_pattern 54 | 55 | 56 | def load_pattern(pattern_file): 57 | """ Load a backdoor pattern from an image file. """ 58 | arr = plt.imread(pattern_file) 59 | if arr.shape[-1] == 4: 60 | # remove optional alpha channel 61 | arr = arr[:, :, :-1] 62 | return arr.reshape(1, *arr.shape) 63 | 64 | 65 | def dump_pattern(arr, pattern_file): 66 | """ Save the pattern in an image file. """ 67 | plt.imsave(pattern_file, arr) 68 | 69 | 70 | def add_pattern(array, bd_pattern, remove=False): 71 | """ Add (or remove) a backdoor pattern to/from a single image. """ 72 | array_cpy = array.copy() 73 | # add/remove mask 74 | if remove: 75 | array_cpy -= bd_pattern 76 | else: 77 | array_cpy += bd_pattern 78 | array_cpy = np.clip(array_cpy, 0, 1) 79 | return array_cpy 80 | -------------------------------------------------------------------------------- /Applications/Poisoning/poison/poison_models.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import dirname as parent 3 | import argparse 4 | 5 | from Applications.Poisoning.configs.config import Config 6 | from Applications.Poisoning.train import train 7 | from Applications.Poisoning.model import get_VGG_CIFAR10 8 | from Applications.Poisoning.poison.injector import LabelflipInjector 9 | from Applications.Poisoning.dataset import Cifar10 10 | from Applications.Sharding.ensemble import train_models 11 | 12 | 13 | def get_parser(): 14 | parser = argparse.ArgumentParser("poison_models", description="Train poisoned models.") 15 | parser.add_argument("model_folder", type=str, help="base directory to save models and results in") 16 | parser.add_argument("--config_file", type=str, default='poison_config.json', help="config file with parameters for this experiment") 17 | return parser 18 | 19 | 20 | def train_poisoned(model_folder, poison_kwargs, train_kwargs): 21 | data = Cifar10.load() 22 | (x_train, y_train), _, _ = data 23 | 24 | # inject label flips 25 | if 'sharding' in str(model_folder): 26 | injector_path = os.path.join(parent(model_folder), 'injector.pkl') 27 | else: 28 | injector_path = os.path.join(model_folder, 'injector.pkl') 29 | if os.path.exists(injector_path): 30 | injector = LabelflipInjector.from_pickle(injector_path) 31 | else: 32 | print(poison_kwargs) 33 | injector = LabelflipInjector(model_folder, **poison_kwargs) 34 | x_train, y_train = injector.inject(x_train, y_train) 35 | injector.save(injector_path) 36 | data = ((x_train, y_train), data[1], data[2]) 37 | 38 | model_init = lambda: get_VGG_CIFAR10(dense_units=train_kwargs['model_size']) 39 | if 'sharding' in str(model_folder): 40 | n_shards = Config.from_json(os.path.join(model_folder, 'unlearn_config.json'))['n_shards'] 41 | train_models(model_init, model_folder, data, n_shards, model_filename='poisoned_model.hdf5', **train_kwargs) 42 | else: 43 | train(model_init, model_folder, data, model_filename='poisoned_model.hdf5', **train_kwargs) 44 | 45 | 46 | def main(model_folder, config_file): 47 | if 'sharding' in str(model_folder): 48 | poison_kwargs = Config.from_json(os.path.join(parent(model_folder), config_file)) 49 | train_kwargs = Config.from_json(os.path.join(parent(model_folder), 'train_config.json')) 50 | else: 51 | poison_kwargs = Config.from_json(os.path.join(model_folder, config_file)) 52 | train_kwargs = Config.from_json(os.path.join(model_folder, 'train_config.json')) 53 | train_poisoned(model_folder, poison_kwargs, train_kwargs) 54 | 55 | 56 | if __name__ == '__main__': 57 | args = get_parser().parse_args() 58 | main(**vars(args)) 59 | -------------------------------------------------------------------------------- /Applications/Poisoning/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | import numpy as np 5 | from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger 6 | from sklearn.metrics import classification_report 7 | 8 | from util import TrainingResult, measure_time 9 | from Applications.Poisoning.model import get_VGG_CIFAR10 10 | from Applications.Poisoning.configs.config import Config 11 | from Applications.Poisoning.dataset import Cifar10 12 | 13 | 14 | def train(model_init, model_folder, data, epochs, batch_size, model_filename='best_model.hdf5', **kwargs): 15 | os.makedirs(model_folder, exist_ok=True) 16 | model_save_path = os.path.join(model_folder, model_filename) 17 | if os.path.exists(model_save_path): 18 | return model_save_path 19 | csv_save_path = os.path.join(model_folder, 'train_log.csv') 20 | result = TrainingResult(model_folder) 21 | 22 | (x_train, y_train), (x_test, y_test), _ = data 23 | model = model_init() 24 | 25 | metric_for_min = 'loss' 26 | loss_ckpt = ModelCheckpoint(model_save_path, monitor=metric_for_min, save_best_only=True, 27 | save_weights_only=True) 28 | csv_logger = CSVLogger(csv_save_path) 29 | callbacks = [loss_ckpt, csv_logger] 30 | 31 | with measure_time() as t: 32 | hist = model.fit(x_train, y_train, batch_size=batch_size, 33 | epochs=epochs, validation_data=(x_test, y_test), verbose=1, 34 | callbacks=callbacks).history 35 | training_time = t() 36 | best_loss = np.min(hist[metric_for_min]) if metric_for_min in hist else np.inf 37 | best_loss_epoch = np.argmin(hist[metric_for_min]) + 1 if metric_for_min in hist else 0 38 | print('Best model has test loss {} after {} epochs'.format(best_loss, best_loss_epoch)) 39 | best_model = model_init() 40 | best_model.load_weights(model_save_path) 41 | 42 | # calculate test metrics on final model 43 | y_test_hat = np.argmax(best_model.predict(x_test), axis=1) 44 | test_loss = best_model.evaluate(x_test, y_test, batch_size=1000, verbose=0)[0] 45 | report = classification_report(np.argmax(y_test, axis=1), y_test_hat, digits=4, output_dict=True) 46 | report['train_loss'] = best_loss 47 | report['test_loss'] = test_loss 48 | report['epochs_for_min'] = int(best_loss_epoch) # json does not like numpy ints 49 | report['time'] = training_time 50 | result.update(report) 51 | result.save() 52 | return model_save_path 53 | 54 | 55 | def get_parser(): 56 | parser = argparse.ArgumentParser() 57 | parser.add_argument('model_folder', type=str) 58 | return parser 59 | 60 | 61 | def main(model_folder): 62 | train_conf = os.path.join(model_folder, 'train_config.json') 63 | train_kwargs = Config.from_json(train_conf) 64 | model_init = lambda: get_VGG_CIFAR10(dense_units=train_kwargs['model_size']) 65 | data = Cifar10.load() 66 | train(model_init, model_folder, data, **train_kwargs) 67 | 68 | 69 | if __name__ == '__main__': 70 | args = get_parser().parse_args() 71 | main(**vars(args)) 72 | -------------------------------------------------------------------------------- /Applications/Poisoning/unlearn/common.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import tensorflow as tf 6 | import matplotlib.pyplot as plt 7 | import seaborn as sns 8 | from sklearn.metrics import confusion_matrix 9 | 10 | from tensorflow.keras.backend import clear_session 11 | from tensorflow.keras.utils import to_categorical 12 | 13 | from util import LoggedGradientTape, ModelTmpState, CSVLogger, measure_time, GradientLoggingContext 14 | from Applications.Poisoning.unlearn.core import approx_retraining 15 | 16 | 17 | def evaluate_model_diff(model, new_model, x_valid, y_valid, diverged=False, verbose=False, ref_acc=0.87): 18 | 19 | acc_before_fix = model.evaluate(x_valid, y_valid, verbose=0)[1] 20 | acc_after_fix = -1 21 | if not diverged: 22 | acc_after_fix = new_model.evaluate(x_valid, y_valid, verbose=0)[1] 23 | if verbose: 24 | acc_restored = (acc_after_fix - acc_before_fix) / (ref_acc - acc_before_fix) 25 | print(f">> acc_restored={acc_restored}, acc_before={acc_before_fix}, " 26 | f"acc_after={acc_after_fix}, lissa diverged: {diverged}") 27 | return acc_before_fix, acc_after_fix, diverged 28 | 29 | 30 | def evaluate_unlearning(model_init, model_weights, data, delta_idx, y_train_orig, unlearn_kwargs, repaired_filepath=None, 31 | clean_acc=1.0, verbose=False, cm_dir=None, log_dir=None): 32 | clear_session() 33 | (x_train, y_train), _, (x_valid, y_valid) = data 34 | model = model_init() 35 | params = np.sum(np.product([xi for xi in x.shape]) for x in model.trainable_variables).item() 36 | model.load_weights(model_weights) 37 | new_theta, diverged, logs, duration_s = unlearn_update( 38 | x_train, y_train, y_train_orig, delta_idx, model, x_valid, y_valid, unlearn_kwargs, verbose=verbose, cm_dir=cm_dir, log_dir=log_dir) 39 | 40 | new_model = model_init() 41 | new_model.set_weights(new_theta) 42 | if repaired_filepath is not None: 43 | new_model.save_weights(repaired_filepath) 44 | 45 | acc_before, acc_after, diverged = evaluate_model_diff( 46 | model, new_model, x_valid, y_valid, diverged, verbose, clean_acc) 47 | return acc_before, acc_after, diverged, logs, duration_s, params 48 | 49 | 50 | def unlearn_update(z_x, z_y, z_y_delta, delta_idx, model, x_val, y_val, unlearn_kwargs, 51 | verbose=False, cm_dir=None, log_dir=None): 52 | assert np.min(delta_idx) >= 0 and np.max(delta_idx) < z_x.shape[0] 53 | 54 | z_x = tf.constant(z_x, dtype=tf.float32) 55 | z_y_delta = tf.constant(z_y_delta, dtype=tf.int32) 56 | with GradientLoggingContext('unlearn'): 57 | new_theta, diverged, duration_s = iter_approx_retraining(z_x, z_y_delta, model, x_val, y_val, delta_idx, verbose=verbose, 58 | cm_dir=cm_dir, log_dir=log_dir, **unlearn_kwargs) 59 | return new_theta, diverged, LoggedGradientTape.logs['unlearn'], duration_s 60 | 61 | 62 | def iter_approx_retraining(z_x, z_y_delta, model, x_val, y_val, delta_idx, max_inner_steps=1, 63 | steps=1, verbose=False, cm_dir=None, log_dir=None, **unlearn_kwargs): 64 | """Iterative approximate retraining. 65 | 66 | Args: 67 | z_x (np.ndarray): Original features. 68 | z_y (np.ndarray): Original labels. 69 | z_x_delta (np.ndarray): Changed features. 70 | z_y_delta (np.ndarray): Changed labels. 71 | delta_idx (np.ndarray): Indices of the data to change. 72 | steps (int, optional): Number of iterations. Defaults to 1. 73 | mixing_ratio (float, optional): Ratio of unchanged data to mix in. Defaults to 1. 74 | cm_dir (str, optional): If provided, plots confusion matrices afrer each iterations into this directory. 75 | Defaults to None. 76 | verbose (bool, optional): Verbosity switch. Defaults to False. 77 | 78 | Returns: 79 | list: updated model parameters 80 | bool: whether the LiSSA algorithm diverged 81 | """ 82 | 83 | # take HVP batch size from kwargs 84 | hvp_batch_size = unlearn_kwargs.get('hvp_batch_size', 512) 85 | 86 | # setup loggers 87 | if log_dir is None: 88 | step_logger, batch_logger, hvp_logger = None, None, None 89 | else: 90 | step_logger = CSVLogger('step', ['step', 'batch_acc', 'val_acc', 'delta_size', 91 | 'new_errors', 'remaining_delta'], os.path.join(log_dir, 'log_step.csv')) 92 | batch_logger = CSVLogger('batch', ['step', 'inner_step', 'batch_acc'], os.path.join(log_dir, 'log_batch.csv')) 93 | hvp_logger = CSVLogger('hvp', ['step', 'inner_step', 'i', 'update_norm'], os.path.join(log_dir, 'log_hvp.csv')) 94 | 95 | model_weights = model.get_weights() 96 | analysis_time = 0 # allow for additional (slow) analysis code that is not related to the algorithm itself 97 | # the TmpState context managers restore the states of weights, z_x, z_y, ... afterwards 98 | with measure_time() as total_timer, ModelTmpState(model): 99 | idx, prio_idx = get_delta_idx(model, z_x, z_y_delta, hvp_batch_size) 100 | batch_acc_before = 0.0 101 | for step in range(0, steps+1): 102 | with measure_time() as t: 103 | val_acc_before = model.evaluate(x_val, y_val, verbose=0)[1] 104 | analysis_time += t() 105 | if step == 0: 106 | # calc initial metrics in step 0 107 | batch_acc_after = batch_acc_before 108 | val_acc_after = val_acc_before 109 | else: 110 | # fixed arrays during unlearning 111 | _z_x = tf.gather(z_x, idx) 112 | _z_x_delta = tf.identity(_z_x) 113 | _z_y_delta = tf.gather(z_y_delta, idx) 114 | 115 | for istep in range(1, max_inner_steps+1): 116 | hvp_logger.step = step 117 | hvp_logger.inner_step = istep 118 | # update model prediction after each model update 119 | z_y_pred = to_categorical(np.argmax(batch_pred(model, _z_x), axis=1), num_classes=10) 120 | new_theta, diverged = approx_retraining(model, _z_x, z_y_pred, _z_x_delta, _z_y_delta, 121 | hvp_x=z_x, hvp_y=z_y_delta, hvp_logger=hvp_logger, **unlearn_kwargs) 122 | # don't update if the LiSSA algorithm diverged 123 | if diverged: 124 | break 125 | 126 | # update weights 127 | model_weights[-len(new_theta):] = new_theta 128 | model.set_weights(model_weights) 129 | 130 | batch_acc_after = model.evaluate(_z_x, _z_y_delta, verbose=0)[1] 131 | if verbose: 132 | print(f"> {istep}: batch_acc = {batch_acc_after}") 133 | if batch_logger is not None: 134 | batch_logger.log(step=step, inner_step=istep, batch_acc=batch_acc_after) 135 | if batch_acc_after == 1.0: 136 | break 137 | with measure_time() as t: 138 | val_acc_after = model.evaluate(x_val, y_val, verbose=0)[1] 139 | analysis_time += t() 140 | 141 | # get index of next delta set 142 | idx, prio_idx = get_delta_idx(model, z_x, z_y_delta, hvp_batch_size) 143 | with measure_time() as t: 144 | if step_logger is not None: 145 | new_errors = len(set(prio_idx) - set(delta_idx)) 146 | remaining_delta = len(set(prio_idx) & set(delta_idx)) 147 | step_logger.log(step=step, batch_acc=batch_acc_after, val_acc=val_acc_after, 148 | delta_size=len(prio_idx), new_errors=new_errors, remaining_delta=remaining_delta) 149 | if verbose: 150 | print(f">> iterative approx retraining ({len(idx)} samples): step = {step}, train_acc (before/after) = {batch_acc_before} / {batch_acc_after}, " 151 | f"val_acc = {val_acc_before} / {val_acc_after}") 152 | if cm_dir is not None: 153 | title = f'After Unlearning Step {step}' if step > 0 else 'Before Unlearning' 154 | plot_cm(x_val, y_val, model, title=title, 155 | outfile=os.path.join(cm_dir, f'cm_unlearning_{step:02d}.png')) 156 | analysis_time += t() 157 | 158 | duration_s = total_timer() - analysis_time 159 | return model_weights, diverged, duration_s 160 | 161 | 162 | def get_delta_idx(model, x, y, batch_size): 163 | y_pred = np.argmax(batch_pred(model, x), axis=1) 164 | prio_idx = np.argwhere(y_pred != np.argmax(y, axis=1))[:, 0] 165 | idx = np.random.choice(prio_idx, min(batch_size, len(prio_idx)), replace=False) 166 | return idx, prio_idx 167 | 168 | 169 | def get_mixed_delta_idx(delta_idx, n_samples, mixing_ratio=1.0, prio_idx=None): 170 | """Mix regular training data into delta set. 171 | 172 | Args: 173 | delta_idx (np.ndarray): Indices of the data to unlearn. 174 | n_samples (int): Total number of samples. 175 | mixing_ratio (float, optional): Ratio of regular data points to mix in. Defaults to 1.0. 176 | prio_idx (np.ndarray, optional): Indices of training samples to prioritize during unlearning. 177 | Defaults to None. 178 | 179 | Returns: 180 | np.ndarray: Indeces of delta samples with added regular data. 181 | """ 182 | if mixing_ratio == 0.0: 183 | return delta_idx 184 | 185 | priority_idx = list(set(prio_idx) - set(delta_idx)) if prio_idx is not None else [] 186 | if mixing_ratio == -1: 187 | return np.hstack((delta_idx, priority_idx)).astype(np.int) 188 | 189 | remaining_idx = list(set(range(n_samples)) - set(delta_idx) - set(priority_idx)) 190 | n_total = np.ceil(mixing_ratio*delta_idx.shape[0]).astype(np.int) + delta_idx.shape[0] 191 | n_prio = min(n_total, len(priority_idx)) 192 | n_regular = max(n_total - len(priority_idx) - len(delta_idx), 0) 193 | idx = np.hstack(( 194 | delta_idx, 195 | np.random.choice(priority_idx, n_prio, replace=False), 196 | np.random.choice(remaining_idx, n_regular, replace=False))) 197 | return idx.astype(np.int) 198 | 199 | 200 | def plot_cm(x, y_true, model, title='confusion matrix', outfile=None): 201 | y_pred = np.argmax(batch_pred(model, x), axis=1) 202 | y_true = np.argmax(y_true, axis=1) 203 | 204 | cm = confusion_matrix(y_true, y_pred) 205 | n_classes = cm.shape[0] 206 | df_cm = pd.DataFrame(cm, range(n_classes), range(n_classes)) 207 | sns.set(font_scale=1.4) 208 | plt.clf() 209 | fig, ax = plt.subplots(figsize=(10, 10)) 210 | ax.set_title(title) 211 | sns.heatmap(df_cm, annot=True, annot_kws={"size": 16}, fmt='g', ax=ax, cbar=False) 212 | if outfile is None: 213 | plt.show() 214 | else: 215 | fig.savefig(outfile, dpi=300) 216 | 217 | 218 | def batch_pred(model, x, batch_size=2048): 219 | preds = [] 220 | for start in range(0, len(x), batch_size): 221 | end = start + batch_size 222 | preds.append(model(x[start:end])) 223 | return tf.concat(preds, 0) 224 | -------------------------------------------------------------------------------- /Applications/Poisoning/unlearn/core.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.sparse as sp 3 | import tensorflow as tf 4 | 5 | from util import LoggedGradientTape 6 | 7 | 8 | @tf.function 9 | def hvp(model, x, y, v): 10 | """ Hessian vector product. """ 11 | # 1st gradient of Loss w.r.t weights 12 | with LoggedGradientTape() as tape: 13 | # first gradient 14 | grad_L = get_gradients(model, x, y) 15 | assert len(v) == len(grad_L) 16 | # v^T * \nabla L 17 | v_dot_L = [v_i * grad_i for v_i, grad_i in zip(v, grad_L)] 18 | # tape.watch(self.model.weights) 19 | # second gradient computation 20 | hvp = tape.gradient(v_dot_L, model.trainable_weights[-6:]) 21 | # for embedding layers, gradient can be of type indexed slices and need to be converted 22 | for i in range(len(hvp)): 23 | if type(hvp[i]) == tf.IndexedSlices: 24 | hvp[i] = tf.convert_to_tensor(hvp[i]) 25 | return hvp 26 | 27 | 28 | def get_gradients(model, x_tensor, y_tensor, batch_size=2048): 29 | """ Calculate dL/dW (x, y) """ 30 | grads = [] 31 | for start in range(0, x_tensor.shape[0], batch_size): 32 | with LoggedGradientTape() as tape: 33 | tape.watch(model.trainable_weights[-6:]) 34 | result = model(x_tensor[start:start+batch_size]) 35 | loss = model.loss(y_tensor[start:start+batch_size], result) 36 | grads.append(tape.gradient(loss, model.trainable_weights[-6:])) 37 | grads = list(zip(*grads)) 38 | for i in range(len(grads)): 39 | grads[i] = tf.add_n(grads[i]) 40 | # for embedding layers, gradient can be of type indexed slices and need to be converted 41 | for i in range(len(grads)): 42 | if type(grads[i]) == tf.IndexedSlices: 43 | grads[i] = tf.convert_to_tensor(grads[i]) 44 | return grads 45 | 46 | 47 | @tf.function 48 | def get_gradients_diff(model, x_tensor, y_tensor, x_delta_tensor, y_delta_tensor, batch_size=1024): 49 | """ 50 | Compute d/dW [ Loss(x_delta, y_delta) - Loss(x,y) ] 51 | This saves one gradient call compared to calling `get_gradients` twice. 52 | """ 53 | assert x_tensor.shape == x_delta_tensor.shape and y_tensor.shape == y_delta_tensor.shape 54 | grads = [] 55 | for start in range(0, x_tensor.shape[0], batch_size): 56 | with LoggedGradientTape() as tape: 57 | tape.watch(model.trainable_weights[-6:]) 58 | result_x = model(x_tensor[start:start + batch_size]) 59 | result_x_delta = model(x_delta_tensor[start:start + batch_size]) 60 | loss_x = model.loss(y_tensor[start:start + batch_size], result_x) 61 | loss_x_delta = model.loss(y_delta_tensor[start:start + batch_size], result_x_delta) 62 | diff = loss_x_delta - loss_x 63 | grads.append(tape.gradient(diff, model.trainable_weights[-6:])) 64 | grads = list(zip(*grads)) 65 | for i in range(len(grads)): 66 | grads[i] = tf.add_n(grads[i]) 67 | # for embedding layers, gradient can be of type indexed slices and need to be converted 68 | for i in range(len(grads)): 69 | if type(grads[i]) == tf.IndexedSlices: 70 | grads[i] = tf.convert_to_tensor(grads[i]) 71 | return grads 72 | 73 | 74 | def get_inv_hvp_lissa(model, x, y, v, hvp_batch_size, scale, damping, iterations=-1, verbose=False, 75 | repititions=1, early_stopping=True, patience=20, hvp_logger=None): 76 | """ 77 | Calculate H^-1*v using the iterative scheme proposed by Agarwal et al with batch updates. 78 | The scale and damping parameters have to be found by trial and error to achieve convergence. 79 | Rounds can be set to average the results over multiple runs to decrease variance and stabalize the results. 80 | """ 81 | i = tf.constant(0) 82 | hvp_batch_size = int(hvp_batch_size) 83 | n_batches = 100 * np.ceil(x.shape[0] / hvp_batch_size) if iterations == -1 else iterations 84 | shuffle_indices = [tf.constant(np.random.permutation(range(x.shape[0])), dtype=tf.int32) for _ in range(repititions)] 85 | def cond(i, u, shuff_idx, update_min): return tf.less(i, n_batches) and tf.math.is_finite(tf.norm(u[0])) 86 | 87 | def body(i, u, shuff_idx, update_min): 88 | i_mod = ((i * hvp_batch_size) % x.shape[0]) // hvp_batch_size 89 | start, end = i_mod * hvp_batch_size, (i_mod+1) * hvp_batch_size 90 | if sp.issparse(x): 91 | batch_hvps = hvp(model, tf.gather(x, shuff_idx)[start:end].toarray(), 92 | tf.gather(y, shuff_idx)[start:end], u) 93 | else: 94 | batch_hvps = hvp(model, tf.gather(x, shuff_idx)[start:end], 95 | tf.gather(y, shuff_idx)[start:end], u) 96 | new_estimate = [a + (1-damping) * b - c/scale for (a, b, c) in zip(v, u, batch_hvps)] 97 | update_norm = np.sum(np.sum(np.abs(old - new)) for old, new in zip(u, new_estimate)) 98 | if early_stopping and update_norm > update_min[0] and update_min[-1] >= patience: 99 | tf.print(f"Early stopping at iteration {i+1}. Update norm {update_norm} > {update_min}") 100 | if i < patience: 101 | i = n_batches + 1 102 | else: 103 | i = n_batches 104 | if update_norm < update_min[0]: 105 | update_min = [update_norm, 1] 106 | if verbose: 107 | tf.print(i, update_norm) # [tf.norm(ne) for ne in new_estimate][:5]) 108 | if hvp_logger is not None: 109 | if isinstance(i, tf.Tensor): 110 | hvp_logger.log(step=hvp_logger.step, inner_step=hvp_logger.inner_step, 111 | i=i.numpy(), update_norm=update_norm) 112 | else: 113 | hvp_logger.log(step=hvp_logger.step, inner_step=hvp_logger.inner_step, i=i, update_norm=update_norm) 114 | if i+1 == n_batches: 115 | tf.print(f"No convergence after {i+1} iterations. Stopping.") 116 | update_min[-1] += 1 117 | return i+1, new_estimate, shuff_idx, update_min 118 | 119 | estimate = None 120 | for r in range(repititions): 121 | loop_vars = (i, v, shuffle_indices[r], [np.inf, -1]) 122 | res = tf.while_loop(cond, body, loop_vars) 123 | # i encodes the exit reason of the body: 124 | # i == n_batches: maximum number of iterations reached 125 | # i == n_batches+1: early stopping criterium reached 126 | # i == n_batches+2: early stopping after first iterations (diverged) 127 | if res[0] == n_batches+2: 128 | return res[1], True 129 | # if one iteration failed averaging makes no sense anymore 130 | if not all([tf.math.is_finite(tf.norm(e)) for e in res[1]]): 131 | return res[1], True 132 | res_upscaled = [r/scale for r in res[1]] 133 | if estimate is None: 134 | estimate = [r/repititions for r in res_upscaled] 135 | else: 136 | for j in range(len(estimate)): 137 | estimate[j] += res_upscaled[j] / repititions 138 | diverged = not all([tf.math.is_finite(tf.norm(e)) for e in estimate]) 139 | return estimate, diverged 140 | 141 | 142 | def approx_retraining(model, z_x, z_y, z_x_delta, z_y_delta, order=2, hvp_x=None, hvp_y=None, hvp_logger=None, 143 | conjugate_gradients=False, verbose=False, **unlearn_kwargs): 144 | """ Perform parameter update using influence functions. """ 145 | if order == 1: 146 | tau = unlearn_kwargs.get('tau', 1) 147 | 148 | # first order update 149 | diff = get_gradients_diff(model, z_x, z_y, z_x_delta, z_y_delta) 150 | d_theta = diff 151 | diverged = False 152 | elif order == 2: 153 | tau = 1 # tau not used by second-order 154 | 155 | # second order update 156 | diff = get_gradients_diff(model, z_x, z_y, z_x_delta, z_y_delta) 157 | # skip hvp if diff == 0 158 | if np.sum(np.sum(d) for d in diff) == 0: 159 | d_theta = diff 160 | diverged = False 161 | elif conjugate_gradients: 162 | raise NotImplementedError('Conjugate Gradients is not implemented yet!') 163 | else: 164 | assert hvp_x is not None and hvp_y is not None 165 | d_theta, diverged = get_inv_hvp_lissa(model, hvp_x, hvp_y, diff, verbose=verbose, hvp_logger=hvp_logger, **unlearn_kwargs) 166 | if order != 0: 167 | # only update trainable weights (non-invasive workaround for BatchNorm layers in CIFAR model) 168 | # d_theta = [d_theta.pop(0) if w.trainable and i >= len(model.weights) -6 else tf.constant(0, dtype=tf.float32) for i, w in enumerate(model.weights)] 169 | update_pos = len(model.trainable_weights) - len(d_theta) 170 | theta_approx = [w - tau * d_theta.pop(0) if i >= update_pos else w for i, 171 | w in enumerate(model.trainable_weights)] 172 | theta_approx = [theta_approx.pop(0) if w.trainable else w for w in model.weights] 173 | theta_approx = [w.numpy() for w in theta_approx] 174 | # theta_approx = [w - tau * d_t for w, d_t in zip(model.weights, d_theta)] 175 | return theta_approx, diverged 176 | -------------------------------------------------------------------------------- /Applications/Poisoning/unlearn/fine_tuning.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import dirname as parent 3 | import argparse 4 | import json 5 | 6 | from tensorflow.keras.backend import clear_session 7 | 8 | from Applications.Poisoning.unlearn.common import evaluate_model_diff 9 | from Applications.Poisoning.configs.config import Config 10 | from Applications.Poisoning.model import get_VGG_CIFAR10 11 | from Applications.Poisoning.poison.injector import LabelflipInjector 12 | from Applications.Poisoning.dataset import Cifar10 13 | from util import UnlearningResult, reduce_dataset, measure_time 14 | 15 | 16 | def get_parser(): 17 | parser = argparse.ArgumentParser("fine_tuning", description="Unlearn by fine tuning for one epoch.") 18 | parser.add_argument("model_folder", type=str, help="base directory to save models and results in") 19 | return parser 20 | 21 | 22 | def run_experiment(model_folder, train_kwargs, poison_kwargs, unlearn_kwargs): 23 | data = Cifar10.load() 24 | (x_train, y_train), _, _ = data 25 | y_train_orig = y_train.copy() 26 | 27 | # inject label flips 28 | injector_path = os.path.join(model_folder, 'injector.pkl') 29 | if os.path.exists(injector_path): 30 | injector = LabelflipInjector.from_pickle(injector_path) 31 | else: 32 | injector = LabelflipInjector(parent(model_folder), **poison_kwargs) 33 | x_train, y_train = injector.inject(x_train, y_train) 34 | data = ((x_train, y_train), data[1], data[2]) 35 | 36 | model_init = lambda: get_VGG_CIFAR10(dense_units=train_kwargs['model_size']) 37 | poisoned_filename = 'poisoned_model.hdf5' 38 | repaired_filename = 'repaired_model.hdf5' 39 | eval_fine_tuning(model_folder, poisoned_filename, repaired_filename, model_init, data, y_train_orig, injector.injected_idx, train_kwargs, unlearn_kwargs) 40 | 41 | 42 | def eval_fine_tuning(model_folder, poisoned_filename, repaired_filename, model_init, data, y_train_orig, delta_idx, train_kwargs, unlearn_kwargs): 43 | unlearning_result = UnlearningResult(model_folder) 44 | poisoned_weights = os.path.join(parent(model_folder), poisoned_filename) 45 | 46 | # prepare unlearning data 47 | (x_train, y_train), _, _ = data 48 | reduction = 1.0 49 | x_train, y_train, idx_reduced, delta_idx = reduce_dataset( 50 | x_train, y_train, reduction=reduction, delta_idx=delta_idx) 51 | print(f">> reduction={reduction}, new train size: {x_train.shape[0]}") 52 | y_train_orig = y_train_orig[idx_reduced] 53 | data = ((x_train, y_train), data[1], data[2]) 54 | 55 | # start unlearning hyperparameter search for the poisoned model 56 | with open(os.path.join(parent(parent(parent(model_folder))), 'clean', 'train_results.json'), 'r') as f: 57 | clean_acc = json.load(f)['accuracy'] 58 | repaired_filepath = os.path.join(model_folder, repaired_filename) 59 | 60 | acc_before, acc_after, duration_s = fine_tuning(model_init, poisoned_weights, data, y_train_orig, clean_acc, 61 | repaired_filepath, train_kwargs, unlearn_kwargs) 62 | acc_perc_restored = (acc_after - acc_before) / (clean_acc - acc_before) 63 | 64 | unlearning_result.update({ 65 | 'acc_clean': clean_acc, 66 | 'acc_before_fix': acc_before, 67 | 'acc_after_fix': acc_after, 68 | 'acc_perc_restored': acc_perc_restored, 69 | 'unlearning_duration_s': duration_s 70 | }) 71 | unlearning_result.save() 72 | 73 | 74 | def fine_tuning(model_init, poisoned_weights, data, y_train_orig, clean_acc=1.0, repaired_filepath='repaired_model.hdf5', train_kwargs=None, unlearn_kwargs=None): 75 | clear_session() 76 | (x_train, y_train), (x_test, y_test), (x_valid, y_valid) = data 77 | model = model_init(sgd=True, lr_init=0.01) 78 | model.load_weights(poisoned_weights) 79 | 80 | train_kwargs.pop('epochs') 81 | train_kwargs['epochs'] = unlearn_kwargs.get('epochs', 1) 82 | with measure_time() as t: 83 | model.fit(x_train, y_train_orig, validation_data=(x_test, y_test), verbose=1, **train_kwargs).history 84 | duration_s = t() 85 | new_theta = model.get_weights() 86 | model.load_weights(poisoned_weights) 87 | 88 | new_model = model_init() 89 | new_model.set_weights(new_theta) 90 | if repaired_filepath is not None: 91 | new_model.save_weights(repaired_filepath) 92 | 93 | acc_before, acc_after, _ = evaluate_model_diff( 94 | model, new_model, x_valid, y_valid, False, False, clean_acc) 95 | return acc_before, acc_after, duration_s 96 | 97 | 98 | def main(model_folder): 99 | poison_kwargs = Config.from_json(os.path.join(parent(model_folder), 'poison_config.json')) 100 | train_kwargs = Config.from_json(os.path.join(parent(model_folder), 'train_config.json')) 101 | unlearn_kwargs = Config.from_json(os.path.join(model_folder, 'unlearn_config.json')) 102 | run_experiment(model_folder, train_kwargs, poison_kwargs, unlearn_kwargs) 103 | 104 | 105 | if __name__ == '__main__': 106 | args = get_parser().parse_args() 107 | main(**vars(args)) 108 | -------------------------------------------------------------------------------- /Applications/Poisoning/unlearn/first_order.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import dirname as parent 3 | import json 4 | import argparse 5 | 6 | from Applications.Poisoning.configs.config import Config 7 | from Applications.Poisoning.model import get_VGG_CIFAR10 8 | from Applications.Poisoning.poison.injector import LabelflipInjector 9 | from Applications.Poisoning.dataset import Cifar10 10 | from Applications.Poisoning.unlearn.common import evaluate_unlearning 11 | from util import UnlearningResult, reduce_dataset 12 | 13 | 14 | def get_parser(): 15 | parser = argparse.ArgumentParser("first_order", description="Unlearn with first-order method.") 16 | parser.add_argument("model_folder", type=str, help="base directory to save models and results in") 17 | parser.add_argument("--config_file", type=str, default='unlearn_config.json', 18 | help="config file with parameters for this experiment") 19 | parser.add_argument("--verbose", "-v", action="store_true", help="enable additional outputs") 20 | return parser 21 | 22 | 23 | def run_experiment(model_folder, train_kwargs, poison_kwargs, unlearn_kwargs, reduction=1.0, verbose=False): 24 | data = Cifar10.load() 25 | (x_train, y_train), _, _ = data 26 | y_train_orig = y_train.copy() 27 | 28 | # inject label flips 29 | injector_path = os.path.join(model_folder, 'injector.pkl') 30 | if os.path.exists(injector_path): 31 | injector = LabelflipInjector.from_pickle(injector_path) 32 | else: 33 | injector = LabelflipInjector(parent(model_folder), **poison_kwargs) 34 | x_train, y_train = injector.inject(x_train, y_train) 35 | data = ((x_train, y_train), data[1], data[2]) 36 | 37 | # prepare unlearning data 38 | (x_train, y_train), _, _ = data 39 | x_train, y_train, idx_reduced, delta_idx = reduce_dataset( 40 | x_train, y_train, reduction=reduction, delta_idx=injector.injected_idx) 41 | if verbose: 42 | print(f">> reduction={reduction}, new train size: {x_train.shape[0]}") 43 | y_train_orig = y_train_orig[idx_reduced] 44 | data = ((x_train, y_train), data[1], data[2]) 45 | 46 | model_init = lambda: get_VGG_CIFAR10(dense_units=train_kwargs['model_size']) 47 | poisoned_filename = 'poisoned_model.hdf5' 48 | repaired_filename = 'repaired_model.hdf5' 49 | first_order_unlearning(model_folder, poisoned_filename, repaired_filename, model_init, data, 50 | y_train_orig, injector.injected_idx, unlearn_kwargs, verbose=verbose) 51 | 52 | 53 | def first_order_unlearning(model_folder, poisoned_filename, repaired_filename, model_init, data, y_train_orig, delta_idx, 54 | unlearn_kwargs, order=1, verbose=False): 55 | unlearning_result = UnlearningResult(model_folder) 56 | poisoned_weights = os.path.join(parent(model_folder), poisoned_filename) 57 | log_dir = model_folder 58 | 59 | # start unlearning hyperparameter search for the poisoned model 60 | with open(model_folder.parents[2]/'clean'/'train_results.json', 'r') as f: 61 | clean_acc = json.load(f)['accuracy'] 62 | repaired_filepath = os.path.join(model_folder, repaired_filename) 63 | cm_dir = os.path.join(model_folder, 'cm') 64 | os.makedirs(cm_dir, exist_ok=True) 65 | unlearn_kwargs['order'] = order 66 | acc_before, acc_after, diverged, logs, unlearning_duration_s, params = evaluate_unlearning(model_init, poisoned_weights, data, delta_idx, y_train_orig, unlearn_kwargs, clean_acc=clean_acc, 67 | repaired_filepath=repaired_filepath, verbose=verbose, cm_dir=cm_dir, log_dir=log_dir) 68 | acc_perc_restored = (acc_after - acc_before) / (clean_acc - acc_before) 69 | 70 | unlearning_result.update({ 71 | 'acc_clean': clean_acc, 72 | 'acc_before_fix': acc_before, 73 | 'acc_after_fix': acc_after, 74 | 'acc_perc_restored': acc_perc_restored, 75 | 'diverged': diverged, 76 | 'n_gradients': sum(logs), 77 | 'unlearning_duration_s': unlearning_duration_s, 78 | 'num_params': params 79 | }) 80 | unlearning_result.save() 81 | 82 | 83 | def main(model_folder, config_file, verbose): 84 | config_file = os.path.join(model_folder, config_file) 85 | train_kwargs = Config.from_json(os.path.join(parent(model_folder), 'train_config.json')) 86 | unlearn_kwargs = Config.from_json(config_file) 87 | poison_kwargs = Config.from_json(os.path.join(parent(model_folder), 'poison_config.json')) 88 | run_experiment(model_folder, train_kwargs, poison_kwargs, unlearn_kwargs, verbose=verbose) 89 | 90 | 91 | if __name__ == '__main__': 92 | args = get_parser().parse_args() 93 | main(**vars(args)) 94 | -------------------------------------------------------------------------------- /Applications/Poisoning/unlearn/second_order.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import dirname as parent 3 | import json 4 | import argparse 5 | 6 | from Applications.Poisoning.configs.config import Config 7 | from Applications.Poisoning.model import get_VGG_CIFAR10 8 | from Applications.Poisoning.poison.injector import LabelflipInjector 9 | from Applications.Poisoning.dataset import Cifar10 10 | from Applications.Poisoning.unlearn.common import evaluate_unlearning 11 | from util import UnlearningResult, reduce_dataset 12 | 13 | 14 | def get_parser(): 15 | parser = argparse.ArgumentParser("second_order", description="Unlearn with second-order method.") 16 | parser.add_argument("model_folder", type=str, help="base directory to save models and results in") 17 | parser.add_argument("--config_file", type=str, default='unlearn_config.json', 18 | help="config file with parameters for this experiment") 19 | parser.add_argument("--verbose", "-v", action="store_true", help="enable additional outputs") 20 | return parser 21 | 22 | 23 | def run_experiment(model_folder, train_kwargs, poison_kwargs, unlearn_kwargs, reduction=1.0, verbose=False): 24 | data = Cifar10.load() 25 | (x_train, y_train), _, _ = data 26 | y_train_orig = y_train.copy() 27 | 28 | # inject label flips 29 | injector_path = os.path.join(model_folder, 'injector.pkl') 30 | if os.path.exists(injector_path): 31 | injector = LabelflipInjector.from_pickle(injector_path) 32 | else: 33 | injector = LabelflipInjector(parent(model_folder), **poison_kwargs) 34 | x_train, y_train = injector.inject(x_train, y_train) 35 | data = ((x_train, y_train), data[1], data[2]) 36 | 37 | # prepare unlearning data 38 | (x_train, y_train), _, _ = data 39 | x_train, y_train, idx_reduced, delta_idx = reduce_dataset( 40 | x_train, y_train, reduction=reduction, delta_idx=injector.injected_idx) 41 | if verbose: 42 | print(f">> reduction={reduction}, new train size: {x_train.shape[0]}") 43 | y_train_orig = y_train_orig[idx_reduced] 44 | data = ((x_train, y_train), data[1], data[2]) 45 | 46 | model_init = lambda: get_VGG_CIFAR10(dense_units=train_kwargs['model_size']) 47 | poisoned_filename = 'poisoned_model.hdf5' 48 | repaired_filename = 'repaired_model.hdf5' 49 | second_order_unlearning(model_folder, poisoned_filename, repaired_filename, model_init, data, y_train_orig, 50 | injector.injected_idx, unlearn_kwargs, verbose=verbose) 51 | 52 | 53 | def second_order_unlearning(model_folder, poisoned_filename, repaired_filename, model_init, data, y_train_orig, delta_idx, 54 | unlearn_kwargs, order=2, verbose=False): 55 | unlearning_result = UnlearningResult(model_folder) 56 | poisoned_weights = os.path.join(parent(model_folder), poisoned_filename) 57 | log_dir = model_folder 58 | 59 | # start unlearning hyperparameter search for the poisoned model 60 | with open(model_folder.parents[2]/'clean'/'train_results.json', 'r') as f: 61 | clean_acc = json.load(f)['accuracy'] 62 | repaired_filepath = os.path.join(model_folder, repaired_filename) 63 | cm_dir = os.path.join(model_folder, 'cm') 64 | os.makedirs(cm_dir, exist_ok=True) 65 | unlearn_kwargs['order'] = order 66 | acc_before, acc_after, diverged, logs, unlearning_duration_s, params = evaluate_unlearning(model_init, poisoned_weights, data, delta_idx, y_train_orig, unlearn_kwargs, clean_acc=clean_acc, 67 | repaired_filepath=repaired_filepath, verbose=verbose, cm_dir=cm_dir, log_dir=log_dir) 68 | acc_perc_restored = (acc_after - acc_before) / (clean_acc - acc_before) 69 | 70 | unlearning_result.update({ 71 | 'acc_clean': clean_acc, 72 | 'acc_before_fix': acc_before, 73 | 'acc_after_fix': acc_after, 74 | 'acc_perc_restored': acc_perc_restored, 75 | 'diverged': diverged, 76 | 'n_gradients': sum(logs), 77 | 'unlearning_duration_s': unlearning_duration_s, 78 | 'num_params': params 79 | }) 80 | unlearning_result.save() 81 | 82 | 83 | def main(model_folder, config_file, verbose): 84 | config_file = os.path.join(model_folder, config_file) 85 | train_kwargs = Config.from_json(os.path.join(parent(model_folder), 'train_config.json')) 86 | unlearn_kwargs = Config.from_json(config_file) 87 | poison_kwargs = Config.from_json(os.path.join(parent(model_folder), 'poison_config.json')) 88 | run_experiment(model_folder, train_kwargs, poison_kwargs, unlearn_kwargs, verbose=verbose) 89 | 90 | 91 | if __name__ == '__main__': 92 | args = get_parser().parse_args() 93 | main(**vars(args)) 94 | -------------------------------------------------------------------------------- /Applications/Poisoning/unlearn/sharding.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import dirname as parent 3 | import argparse 4 | 5 | from tensorflow.keras.backend import clear_session 6 | 7 | from Applications.Poisoning.unlearn.common import evaluate_model_diff 8 | from Applications.sharding.ensemble import load_ensemble, Ensemble, retrain_shard 9 | from Applications.Poisoning.configs.config import Config 10 | from Applications.Poisoning.model import get_VGG_CIFAR10 11 | from Applications.Poisoning.poison.injector import LabelflipInjector 12 | from Applications.Poisoning.dataset import Cifar10 13 | from util import UnlearningResult, MixedResult, measure_time 14 | 15 | 16 | def get_parser(): 17 | parser = argparse.ArgumentParser("sharding_unlearning", description="Unlearn with sharding method.") 18 | parser.add_argument("model_folder", type=str, help="base directory to save models and results in") 19 | parser.add_argument("--config_file", type=str, default='unlearn_config.json', help="config file with parameters for this experiment") 20 | return parser 21 | 22 | 23 | def run_experiment(model_folder, train_kwargs, poison_kwargs, unlearn_kwargs): 24 | data = Cifar10.load() 25 | (x_train, y_train), _, _ = data 26 | y_train_orig = y_train.copy() 27 | 28 | # inject label flips 29 | injector_path = os.path.join(model_folder, 'injector.pkl') 30 | if os.path.exists(injector_path): 31 | injector = LabelflipInjector.from_pickle(injector_path) 32 | else: 33 | injector = LabelflipInjector(parent(model_folder), **poison_kwargs) 34 | x_train, y_train = injector.inject(x_train, y_train) 35 | data = ((x_train, y_train), data[1], data[2]) 36 | 37 | model_init = lambda: get_VGG_CIFAR10(dense_units=train_kwargs['model_size']) 38 | unlearn_shards(model_folder, model_init, data, y_train_orig, injector.injected_idx, train_kwargs, unlearn_kwargs) 39 | 40 | 41 | def unlearn_shards(model_folder, model_init, data, y_train_orig, delta_idx, train_kwargs, unlearn_kwargs): 42 | poisoned_weights = os.path.join(parent(model_folder), 'poisoned_model.hdf5') 43 | repaired_weights = os.path.join(model_folder, 'repaired_model.hdf5') 44 | unlearning_result = UnlearningResult(model_folder) 45 | 46 | # load clean validation ACC and backdoor success rate for reference 47 | train_results = MixedResult(os.path.join(parent(parent(parent(model_folder))), 'clean'), 'train_results.json').load() 48 | clean_acc = train_results.accuracy 49 | 50 | elapsed_unlearning = -1 51 | with measure_time() as t: 52 | acc_before, acc_after = evaluate_sharding_unlearn(model_folder, model_init, poisoned_weights, data, delta_idx, y_train_orig, train_kwargs, 53 | repaired_filepath=repaired_weights, clean_acc=clean_acc) 54 | elapsed_unlearning = t() 55 | acc_perc_restored = (acc_after - acc_before) / (clean_acc - acc_before) 56 | 57 | unlearning_result.update({ 58 | 'acc_clean': clean_acc, 59 | 'acc_before_fix': acc_before, 60 | 'acc_after_fix': acc_after, 61 | 'acc_perc_restored': acc_perc_restored, 62 | 'unlearning_duration_s': elapsed_unlearning 63 | }) 64 | unlearning_result.save() 65 | 66 | 67 | def evaluate_sharding_unlearn(model_folder, model_init, model_weights, data, delta_idx, y_train_orig, train_kwargs, repaired_filepath=None, 68 | clean_acc=1.0, verbose=False, log_dir=None, **unlearn_kwargs): 69 | ensemble = load_ensemble(model_folder, model_init, suffix='poisoned_model.hdf5') 70 | affected_shards = ensemble.get_affected(delta_idx) 71 | if verbose: 72 | print(f">> sharding: affected_shards = {len(affected_shards)}/{len(ensemble.models)}") 73 | 74 | clear_session() 75 | (x_train, _), _, (x_valid, y_valid) = data 76 | new_ensemble = Ensemble(model_folder, {}) 77 | for s in affected_shards: 78 | shard_idx = ensemble.models[s]['idx'] 79 | _x_train = x_train[shard_idx] 80 | _y_train_orig = y_train_orig[shard_idx] 81 | _data = ((_x_train, _y_train_orig), data[1], (x_valid, y_valid)) 82 | 83 | repaired_weights_path = retrain_shard(model_init, model_folder, _data, s, **train_kwargs) 84 | repaired_model = model_init() 85 | repaired_model.load_weights(repaired_weights_path) 86 | new_ensemble.models[s] = {'model': repaired_model, 'shard': s, 'idx': shard_idx} 87 | 88 | acc_before, acc_after, _ = evaluate_model_diff( 89 | ensemble, new_ensemble, x_valid, y_valid, False, verbose, clean_acc) 90 | return acc_before, acc_after 91 | 92 | 93 | def main(model_folder, config_file): 94 | train_kwargs = Config.from_json(os.path.join(parent(model_folder), 'train_config.json')) 95 | poison_kwargs = Config.from_json(os.path.join(parent(model_folder), 'poison_config.json')) 96 | unlearn_kwargs = Config.from_json(os.path.join(model_folder, config_file)) 97 | run_experiment(model_folder, train_kwargs, poison_kwargs, unlearn_kwargs) 98 | 99 | 100 | if __name__ == '__main__': 101 | args = get_parser().parse_args() 102 | main(**vars(args)) 103 | -------------------------------------------------------------------------------- /Applications/Sharding/ensemble.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import pickle 4 | 5 | import numpy as np 6 | from sklearn.metrics import classification_report 7 | 8 | from Applications.Poisoning.train import train 9 | from util import measure_time, TrainingResult 10 | 11 | 12 | class Ensemble(object): 13 | def __init__(self, model_folder, models, n_classes=10): 14 | self.model_folder = model_folder 15 | self.models = models 16 | self.n_classes = n_classes 17 | 18 | def predict(self, X): 19 | return aggregate_predictions(X, self, self.n_classes) 20 | 21 | def evaluate(self, X, Y_true, verbose=False): 22 | Y_pred = self.predict(X) 23 | rep = classification_report(np.argmax(Y_true, axis=1), np.argmax(Y_pred, axis=1), output_dict=True) 24 | return rep, rep['accuracy'] 25 | 26 | def get_indices(self): 27 | indices = [] 28 | for shard in sorted(self.models.keys()): 29 | indices.append(self.models[shard]['idx']) 30 | return indices 31 | 32 | def get_affected(self, idx): 33 | idx = set(idx) 34 | indices = self.get_indices() 35 | affected = [] 36 | for shard, index in enumerate(indices): 37 | if len(idx & set(index)) > 0: 38 | affected.append(shard) 39 | return affected 40 | 41 | 42 | def softmax(x, axis=0): 43 | if axis == 0: 44 | y = np.exp(x - np.max(x)) 45 | return y / np.sum(np.exp(x)) 46 | elif axis == 1: 47 | x_max = np.max(x, axis=1, keepdims=True) 48 | e_x = np.exp(x - x_max) 49 | x_sum = np.sum(e_x, axis=1, keepdims=True) 50 | return e_x / x_sum 51 | else: 52 | raise NotImplementedError(f"softmax for axis={axis} not implemented!") 53 | 54 | 55 | def aggregate_predictions(X, ensemble, n_classes=10): 56 | preds = np.zeros((len(X), len(ensemble.models)), dtype=np.int64) 57 | for i, model_dict in ensemble.models.items(): 58 | model = model_dict['model'] 59 | preds[:, i] = np.argmax(model.predict(X), axis=1) 60 | # count how often each label is predicted 61 | preds = np.apply_along_axis(np.bincount, axis=1, arr=preds, minlength=n_classes) 62 | return softmax(preds, axis=1) 63 | 64 | 65 | def load_ensemble(model_dir, model_init, suffix='best_model.hdf5'): 66 | models = {} 67 | for root, _, files in os.walk(model_dir): 68 | for filename in files: 69 | filename = os.path.join(root, filename) 70 | if re.match(f'{model_dir}/shard-\d+/{suffix}', filename): 71 | shard = int(root.split('/')[-1].split('-')[-1]) 72 | model = model_init() 73 | model.load_weights(filename) 74 | models[shard] = { 75 | 'model': model, 76 | 'shard': shard 77 | } 78 | # load index information 79 | with open(os.path.join(model_dir, 'splits.pkl'), 'rb') as pkl: 80 | splits = pickle.load(pkl) 81 | for i, idx in enumerate(splits): 82 | models[i]['idx'] = idx 83 | 84 | return Ensemble(model_dir, models) 85 | 86 | 87 | def split_shards(train_data, splits): 88 | """ Split dataset into shards. """ 89 | x_train, y_train = train_data 90 | return [(idx, x_train[idx], y_train[idx]) for idx in splits] 91 | 92 | 93 | def get_splits(n, n_shards=20, strategy='uniform', split_file=None): 94 | """ Generate splits for sharding, returning an iterator over indices. """ 95 | if split_file is not None and os.path.exists(split_file): 96 | with open(split_file, 'rb') as pkl: 97 | splits = pickle.load(pkl) 98 | else: 99 | strategies = { 100 | 'uniform': _uniform_strat 101 | } 102 | if strategy not in strategies: 103 | raise NotImplementedError(f'Strategy {strategy} not implemented! ' 104 | f'Available options: {sorted(strategies)}') 105 | splits = strategies[strategy](n, n_shards) 106 | if split_file is not None: 107 | with open(split_file, 'wb') as pkl: 108 | pickle.dump(list(splits), pkl) 109 | return splits 110 | 111 | 112 | def _uniform_strat(n_data, n_shards, **kwargs): 113 | split_assignment = np.random.choice(list(range(n_shards)), n_data, replace=True) 114 | split_idx = [] 115 | for shard in list(range(n_shards)): 116 | split_idx.append(np.argwhere(split_assignment == shard)[:, 0]) 117 | return split_idx 118 | 119 | 120 | def train_models(model_init, model_folder, data, n_shards, model_filename='repaired_model.hdf5', **train_kwargs): 121 | """ Train models on given number of shards. """ 122 | (x_train, y_train), _, _ = data 123 | split_file = os.path.join(model_folder, 'splits.pkl') 124 | splits = get_splits(len(data[0][0]), n_shards, split_file=split_file) 125 | result = TrainingResult(model_folder) 126 | with measure_time() as t: 127 | for i, idx in enumerate(splits): 128 | shard_data = ((x_train[idx], y_train[idx]), data[1], data[2]) 129 | retrain_shard(model_init, model_folder, shard_data, i, model_filename=model_filename, **train_kwargs) 130 | training_time = t() 131 | report = eval_shards(model_init, model_folder, data, model_filename=model_filename) 132 | report['time'] = training_time 133 | result.update(report) 134 | result.save() 135 | 136 | 137 | def retrain_shard(model_init, model_folder, data, shard_id, model_filename='repaired_model.hdf5', **train_kwargs): 138 | """ Retrain specific shard with new data. """ 139 | model_folder = f"{model_folder}/shard-{shard_id}" 140 | weights_path = train(model_init, model_folder, data, model_filename=model_filename, **train_kwargs) 141 | return weights_path 142 | 143 | 144 | def eval_shards(model_init, model_folder, data, model_filename='poisoned_model.hdf5'): 145 | ensemble = load_ensemble(model_folder, model_init, suffix=model_filename) 146 | x_val, y_val = data[2] 147 | report = ensemble.evaluate(x_val, y_val) 148 | return report 149 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) [year] [fullname] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Machine Unlearning of Features and Labels 2 | 3 | This repository contains code related to the paper [Machine Unlearning of Features and Labels](https://arxiv.org/pdf/2108.11577.pdf) published at NDSS 2023 and is structured as follows: 4 | 5 | ### Setup 6 | 7 | * We tested the code with `python3.7.7` 8 | * We recommend setting up a virtual environment (e.g. using [virtualenv](https://virtualenv.pypa.io/en/latest/user_guide.html)) 9 | * Install depdendencies via `pip install -r requirements.txt` 10 | * Install optional dependencies (notebooks etc.) via `pip install -r opt_requirements.txt` 11 | * For the backdoor experiments, have a look at `example_notebooks/Cifar_data.iypnb`. This notebook shows how to setup the data as expected by our backdoor experiments. 12 | 13 | ### Code 14 | 15 | * The Code for the unleraning strategies is contained in the [Unlearner](Unlearner) folder. The [DNNUnlearner](Unlearner/DNNUnlearner.py) class contains the first- and second order update strategies, all other classes 16 | inherit from it. 17 | * The [Applications](Applications) folder contains some examples how to use the Unlearner classes as discussed in Section 6 of the paper. 18 | 19 | ### Models 20 | 21 | Due to size limitations we publish not every model but some to experiment with. 22 | 23 | * The [LSTM](models/LSTM) folder contains two language generation model as described in the paper. The canary sentence has been inserted 8 and 29 times respectively and the telephone number that will be predicted is 0123456789. 24 | * The [CNN](models/CNN) folder contains the poisoned CNN model that has been trained on the CIFAR-10 dataset. 25 | 26 | ### Example Usage 27 | 28 | We provide [examples](example_notebooks) to reproduces the results from the paper in jupyter notebooks. 29 | 30 | ### Data 31 | 32 | Due to size limitations we did not upload the raw data for the Drebin and Enron dataset and refer to the original papers instead. The vector representations to run the experiments are given instead. All of them can be found in the [train_test_data](train_test_data) folder. 33 | 34 | ### BibTex 35 | 36 | If you found any of this helpful please cite our paper. You may use the following BibTex entry 37 | 38 | ``` 39 | @inproceedings{WarPirWreRie20, 40 | title={Machine Unlearning of Features and Labels}, 41 | author={Alexander Warnecke and Lukas Pirch and Christian Wressnegger and Konrad Rieck}, 42 | year={2023}, 43 | booktitle={Proc. of the 30th Network and Distributed System Security (NDSS)} 44 | } 45 | ``` 46 | -------------------------------------------------------------------------------- /Unlearner/CNNUnlearner.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tensorflow.keras.models import Sequential 3 | from tensorflow.keras.losses import categorical_crossentropy 4 | from tensorflow.keras.layers import Conv2D, Dense, Dropout, MaxPooling2D, Flatten, AveragePooling2D, BatchNormalization 5 | from tensorflow.keras.regularizers import L2 6 | from tensorflow.keras.optimizers import Adam, SGD 7 | from tqdm import tqdm 8 | from Unlearner.DNNUnlearner import DNNUnlearner 9 | 10 | N_ROWS = 32 11 | N_CHANNELS = 3 12 | 13 | 14 | # class to "unlearn" label changes 15 | class CNNUnlearner(DNNUnlearner): 16 | def __init__(self, train, test, valid, weight_path=None, lambda_=1e-5): 17 | self.x_train = train[0] 18 | self.y_train = train[1] 19 | self.x_test = test[0] 20 | self.y_test = test[1] 21 | self.x_valid = valid[0] 22 | self.y_valid = valid[1] 23 | self.lambda_ = lambda_ 24 | self.n = self.x_train.shape[0] 25 | self.dim = self.x_train.shape[1] 26 | self.model = self.get_network(weight_path) 27 | 28 | def get_network(self, weight_path=None, optimizer='Adam', learning_rate=0.0001): 29 | n_filters = [32, 32, 64, 64, 128, 128] 30 | conv_params = dict(activation='relu', kernel_size=3, kernel_initializer='he_uniform', padding='same') 31 | 32 | model = Sequential() 33 | # 1st VGG block 34 | model.add(Conv2D(filters=n_filters[0], input_shape=(N_ROWS, N_ROWS, N_CHANNELS), **conv_params)) 35 | model.add(BatchNormalization()) 36 | model.add(Conv2D(filters=n_filters[1], **conv_params)) 37 | model.add(BatchNormalization()) 38 | model.add(MaxPooling2D(pool_size=(2, 2))) 39 | model.add(Dropout(0.1)) 40 | # 2nd VGG block 41 | model.add(Conv2D(filters=n_filters[2], **conv_params)) 42 | model.add(BatchNormalization()) 43 | model.add(Conv2D(filters=n_filters[3], **conv_params)) 44 | model.add(BatchNormalization()) 45 | model.add(MaxPooling2D(pool_size=(2, 2))) 46 | model.add(Dropout(0.1)) 47 | # 3rd VGG block 48 | model.add(Conv2D(filters=n_filters[4], **conv_params)) 49 | model.add(BatchNormalization()) 50 | model.add(Conv2D(filters=n_filters[5], **conv_params)) 51 | model.add(BatchNormalization()) 52 | model.add(MaxPooling2D(pool_size=(2, 2))) 53 | model.add(Dropout(0.2)) 54 | 55 | # dense and final layers 56 | model.add(Flatten()) 57 | model.add(Dense(1024, activation='relu', kernel_initializer='he_uniform')) 58 | model.add(BatchNormalization()) 59 | model.add(Dropout(0.3)) 60 | model.add(Dense(units=10, activation='softmax')) 61 | 62 | if optimizer == 'Adam': 63 | model.compile(optimizer=Adam(learning_rate=learning_rate, amsgrad=True, epsilon=0.1), 64 | loss=categorical_crossentropy, metrics='accuracy') 65 | else: 66 | model.compile(optimizer=SGD(learning_rate=learning_rate), loss=categorical_crossentropy, metrics='accuracy') 67 | if weight_path is not None: 68 | model.load_weights(weight_path) 69 | print(model.summary()) 70 | return model 71 | 72 | # returns copy of train/test/valid set in which certain coordinates can be set to zero 73 | # indices_to_delete is a numpy array of shape (N,2) where each row represents a coordinate to be deleted. 74 | def get_data_copy(self, data_name, indices_to_delete, **kwargs): 75 | assert data_name in ['train', 'test', 'valid'] 76 | affected_samples = kwargs['affected_samples'] if 'affected_samples' in kwargs else None 77 | if data_name == 'train': 78 | data_cpy = self.x_train.copy() 79 | elif data_name == 'test': 80 | data_cpy = self.x_test.copy() 81 | else: 82 | data_cpy = self.x_valid.copy() 83 | if len(indices_to_delete) > 0: 84 | if affected_samples is not None: 85 | for affected_idx in affected_samples: 86 | data_cpy[affected_idx, indices_to_delete[:, 0], indices_to_delete[:, 1], :] = 0 87 | else: 88 | data_cpy[:, indices_to_delete[:, 0], indices_to_delete[:, 1], :] = 0 89 | return data_cpy 90 | 91 | # returns indices of samples in training_data where the features corresponding to 'indices_to_delete' are not zero 92 | # indices_to_delete is a numpy array of shape (N,2) where each row represents one coordinate to check 93 | def get_relevant_indices(self, indices_to_delete): 94 | relevant_rows = set() 95 | # sum all channels and take all indices where at least one of the coordinates is not zero 96 | x_train_channel_sum = np.sum(self.x_train, axis=3) 97 | for coordinate in indices_to_delete: 98 | nz = x_train_channel_sum[:, coordinate[0], coordinate[1]].nonzero()[0] 99 | relevant_rows = relevant_rows.union(set(nz)) 100 | return list(relevant_rows) 101 | 102 | # calculates influences of pixels when setting them to 0 during training via influece functions. 103 | # deletion_size specifies size of pixels to set to 0. Deletion_size=1 sets 1 pixel at a time to zero whereas 104 | # deletion_size=4 sets blocks of 4x4 to zero 105 | def explain_prediction(self, x, y, deletion_size=1, **kwargs): 106 | assert N_ROWS % deletion_size == 0 107 | batch_size = 500 if 'batch_size' not in kwargs else kwargs['batch_size'] 108 | rounds = 1 if 'rounds' not in kwargs else kwargs['rounds'] 109 | scale = 75000 if 'scale' not in kwargs else kwargs['scale'] 110 | damping = 1e-2 if 'damping' not in kwargs else kwargs['damping'] 111 | verbose = False if 'verbose' not in kwargs else kwargs['verbose'] 112 | relevances = np.zeros_like(x) 113 | grad_x = self.get_gradients(x, y) 114 | H_inv_grad_x, diverged = self.get_inv_hvp_lissa(grad_x, batch_size, scale, damping, verbose, rounds) 115 | print('Calculating influence for {} rows'.format(N_ROWS/deletion_size)) 116 | for i in tqdm(range(0, N_ROWS, deletion_size)): 117 | for j in range(0, N_ROWS, deletion_size): 118 | square_to_delete = np.array(np.meshgrid(range(i, i+deletion_size), 119 | range(j, j+deletion_size))).T.reshape(-1, 2) 120 | relevant_indices = self.get_relevant_indices(indices_to_delete=square_to_delete) 121 | if len(relevant_indices) == 0: 122 | relevances[0, i:i+deletion_size, j:j+deletion_size] = 0 123 | continue 124 | else: 125 | max_indices = 256 126 | if len(relevant_indices) > max_indices: 127 | relevant_indices = np.random.choice(relevant_indices, max_indices, replace=False) 128 | x_train_zero = self.x_train[relevant_indices].copy() 129 | x_train_zero[:, i:i+deletion_size, j:j+deletion_size] = 0 130 | d_L_z = self.get_gradients(self.x_train[relevant_indices], self.y_train[relevant_indices]) 131 | d_L_z_delta = self.get_gradients(x_train_zero, self.y_train[relevant_indices]) 132 | diff = [dLd - dL for dLd, dL in zip(d_L_z_delta, d_L_z)] 133 | loss_diff_per_param = [np.sum(d*Hd) for d, Hd in zip(diff, H_inv_grad_x)] 134 | relevances[0, i:i+deletion_size, j:j+deletion_size] = np.sum(loss_diff_per_param)/self.n 135 | return relevances, diverged 136 | 137 | 138 | class CNNUnlearnerMedium(CNNUnlearner): 139 | def __init__(self, *args, **kwargs): 140 | super().__init__(*args, **kwargs) 141 | 142 | def get_network(self, weight_path=None, optimizer='Adam', learning_rate=0.0001): 143 | n_filters = [64, 64, 128, 128, 256, 256] 144 | kernel_size = 3 145 | model = Sequential() 146 | model.add(Conv2D(filters=n_filters[0], kernel_size=kernel_size, activation='relu', 147 | kernel_regularizer=L2(self.lambda_), padding='same', input_shape=(N_ROWS, N_ROWS, N_CHANNELS))) 148 | model.add(Conv2D(filters=n_filters[1], kernel_size=kernel_size, activation='relu', 149 | kernel_regularizer=L2(self.lambda_), padding='same')) 150 | model.add(MaxPooling2D(pool_size=(2, 2))) 151 | model.add(Dropout(0.6)) 152 | model.add(Conv2D(filters=n_filters[2], kernel_size=kernel_size, activation='relu', 153 | kernel_regularizer=L2(self.lambda_), padding='same')) 154 | model.add(Conv2D(filters=n_filters[3], kernel_size=kernel_size, activation='relu', 155 | kernel_regularizer=L2(self.lambda_), padding='same')) 156 | model.add(MaxPooling2D(pool_size=(2, 2))) 157 | model.add(Dropout(0.6)) 158 | model.add(Conv2D(filters=n_filters[4], kernel_size=kernel_size, activation='relu', 159 | kernel_regularizer=L2(self.lambda_), padding='same')) 160 | model.add(Conv2D(filters=n_filters[5], kernel_size=kernel_size, activation='relu', 161 | kernel_regularizer=L2(self.lambda_), padding='same')) 162 | model.add(MaxPooling2D(pool_size=(2, 2))) 163 | model.add(Dropout(0.6)) 164 | model.add(Flatten()) 165 | model.add(Dense(64, activation='relu', kernel_regularizer=L2(self.lambda_))) 166 | model.add(Dropout(0.6)) 167 | # final dense layer 168 | model.add(Dense(units=10, activation='softmax', kernel_regularizer=L2(self.lambda_))) 169 | if optimizer == 'Adam': 170 | model.compile(optimizer=Adam(learning_rate=learning_rate, amsgrad=True), 171 | loss=categorical_crossentropy, metrics='accuracy') 172 | else: 173 | model.compile(optimizer=SGD(learning_rate=learning_rate), loss=categorical_crossentropy, metrics='accuracy') 174 | if weight_path is not None: 175 | model.load_weights(weight_path) 176 | # print(model.summary()) 177 | return model 178 | 179 | 180 | class CNNUnlearnerSmall(CNNUnlearner): 181 | def __init__(self, *args, **kwargs): 182 | super().__init__(*args, **kwargs) 183 | 184 | def get_network(self, weight_path=None, optimizer='Adam', learning_rate=0.0001): 185 | n_filters = [64, 64, 128, 128] 186 | kernel_size = 3 187 | model = Sequential() 188 | model.add(Conv2D(filters=n_filters[0], kernel_size=kernel_size, activation='relu', 189 | kernel_regularizer=L2(self.lambda_), padding='same', input_shape=(N_ROWS, N_ROWS, N_CHANNELS))) 190 | model.add(Conv2D(filters=n_filters[1], kernel_size=kernel_size, activation='relu', 191 | kernel_regularizer=L2(self.lambda_), padding='same')) 192 | model.add(AveragePooling2D(pool_size=(2, 2))) 193 | model.add(Dropout(0.6)) 194 | model.add(Conv2D(filters=n_filters[2], kernel_size=kernel_size, activation='relu', 195 | kernel_regularizer=L2(self.lambda_), padding='same')) 196 | model.add(Conv2D(filters=n_filters[3], kernel_size=kernel_size, activation='relu', 197 | kernel_regularizer=L2(self.lambda_), padding='same')) 198 | model.add(AveragePooling2D(pool_size=(2, 2))) 199 | model.add(Dropout(0.6)) 200 | model.add(Flatten()) 201 | model.add(Dense(128, activation='relu', kernel_regularizer=L2(self.lambda_))) 202 | model.add(Dropout(0.6)) 203 | # final dense layer 204 | model.add(Dense(units=10, activation='softmax', kernel_regularizer=L2(self.lambda_))) 205 | if optimizer == 'Adam': 206 | model.compile(optimizer=Adam(learning_rate=learning_rate, amsgrad=True), 207 | loss=categorical_crossentropy, metrics='accuracy') 208 | else: 209 | model.compile(optimizer=SGD(learning_rate=learning_rate), loss=categorical_crossentropy, metrics='accuracy') 210 | if weight_path is not None: 211 | model.load_weights(weight_path) 212 | print(model.summary()) 213 | return model 214 | 215 | 216 | class CNNUnlearnerAvgPooling(CNNUnlearner): 217 | def __init__(self, *args, **kwargs): 218 | super().__init__(*args, **kwargs) 219 | 220 | def get_network(self, weight_path=None, optimizer='Adam', learning_rate=0.0001): 221 | n_filters = [64, 64, 128, 128] 222 | kernel_size = 3 223 | model = Sequential() 224 | model.add(Conv2D(filters=n_filters[0], kernel_size=kernel_size, activation='relu', 225 | kernel_regularizer=L2(self.lambda_), padding='same', input_shape=(N_ROWS, N_ROWS, N_CHANNELS))) 226 | model.add(Conv2D(filters=n_filters[1], kernel_size=kernel_size, activation='relu', 227 | kernel_regularizer=L2(self.lambda_), padding='same')) 228 | model.add(AveragePooling2D(pool_size=(2, 2))) 229 | model.add(Dropout(0.6)) 230 | model.add(Conv2D(filters=n_filters[2], kernel_size=kernel_size, activation='relu', 231 | kernel_regularizer=L2(self.lambda_), padding='same')) 232 | model.add(Conv2D(filters=n_filters[3], kernel_size=kernel_size, activation='relu', 233 | kernel_regularizer=L2(self.lambda_), padding='same')) 234 | model.add(AveragePooling2D(pool_size=(2, 2))) 235 | model.add(Dropout(0.6)) 236 | model.add(Flatten()) 237 | model.add(Dense(128, activation='relu', kernel_regularizer=L2(self.lambda_))) 238 | model.add(Dropout(0.6)) 239 | # final dense layer 240 | model.add(Dense(units=10, activation='softmax', kernel_regularizer=L2(self.lambda_))) 241 | if optimizer == 'Adam': 242 | model.compile(optimizer=Adam(learning_rate=learning_rate, amsgrad=True), 243 | loss=categorical_crossentropy, metrics='accuracy') 244 | else: 245 | model.compile(optimizer=SGD(learning_rate=learning_rate), loss=categorical_crossentropy, metrics='accuracy') 246 | if weight_path is not None: 247 | model.load_weights(weight_path) 248 | print(model.summary()) 249 | return model 250 | -------------------------------------------------------------------------------- /Unlearner/CanaryCallback.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | class CanaryCallback(tf.keras.callbacks.Callback): 5 | """After each training epoch, generate a sequence based on the canary sentence""" 6 | 7 | def __init__(self, canary_start, int_to_char, canary_number, frequency=10): 8 | super(CanaryCallback, self).__init__() 9 | self.canary_start = canary_start 10 | self.int_to_char = int_to_char 11 | self.char_to_int = {v: k for k,v in self.int_to_char.items()} 12 | self.canary_number = canary_number 13 | self.number_char_indices = np.unique([self.char_to_int[c] for c in self.canary_number]) 14 | self.frequency = frequency 15 | 16 | def on_epoch_end(self, epoch, logs=None): 17 | chars_to_predict = 20 18 | if epoch % self.frequency == 0: 19 | start_seq = np.array([self.char_to_int[s] for s in self.canary_start]) 20 | start_seq = start_seq.reshape((1, len(start_seq), 1)) 21 | prediction_str, dist_str = '', '' 22 | digit_probas_str = ' - '.join(['{}:{{:.4f}}'.format(self.int_to_char[i]) for i in self.number_char_indices]) 23 | # generate characters 24 | for i in range(chars_to_predict): 25 | index_distribution = self.model.predict(start_seq) 26 | char_index = np.argmax(index_distribution) 27 | prediction_str += self.int_to_char[char_index] 28 | start_seq = np.append(start_seq, char_index.reshape(1, 1, 1), axis=1) 29 | start_seq = start_seq[:, 1:start_seq.shape[1] + 1, :] 30 | # save distribution of numbers after the canary 31 | if i < len(self.canary_number): 32 | digit_probas = index_distribution[0, self.number_char_indices] 33 | dist_str += 'Step {} : '.format(i) + digit_probas_str.format(*digit_probas) + '\n' 34 | print("\nSeed:") 35 | print("\"", self.canary_start, "\"") 36 | print('...') 37 | print(prediction_str) 38 | print("Number distribution:") 39 | print(dist_str) 40 | 41 | def on_predict_end(self, logs=None): 42 | self.on_epoch_end(0) 43 | 44 | # for some reasons this methods deletes the history. dunno why 45 | #def on_train_end(self, logs=None): 46 | # self.on_epoch_end(0) 47 | -------------------------------------------------------------------------------- /Unlearner/DPLRUnlearner.py: -------------------------------------------------------------------------------- 1 | from scipy.optimize import minimize 2 | from Unlearner.LRUnlearner import LogisticRegressionUnlearner 3 | from scipy.special import expit 4 | import numpy as np 5 | import scipy.sparse as sp 6 | import time 7 | from sklearn.metrics import classification_report, roc_curve, precision_recall_curve, auc 8 | from scipy.linalg import inv 9 | import json 10 | 11 | 12 | class DPLRUnlearner(LogisticRegressionUnlearner): 13 | def __init__(self, train_data, test_data, voc, epsilon, delta, sigma, lambda_, b=None, category_to_idx_dict=None): 14 | self.set_train_test_data(train_data, test_data) 15 | self.lambda_ = lambda_ 16 | self.feature2dim = voc 17 | self.dim2feature = {v:k for k,v in self.feature2dim.items()} 18 | self.epsilon = epsilon 19 | self.delta = delta 20 | self.sigma = sigma 21 | self.lambda_ = lambda_ 22 | self.voc = voc 23 | self.category_to_idx_dict = category_to_idx_dict 24 | self.theta = np.random.standard_normal(self.x_train.shape[1]) # sample weights normal distributed 25 | self.model_param_str = 'lambda={}_epsilon={}_delta={}_sigma={}'.format( 26 | self.lambda_, self.epsilon, self.delta, self.sigma) 27 | if b is None: 28 | self.b = np.random.normal(0, self.sigma, size=self.x_train.shape[1]) if self.sigma != 0 else np.zeros(self.x_train.shape[1]) 29 | else: 30 | self.b = b 31 | self.gradient_calls = 0 32 | 33 | # computes l(x,y;theta). if x and y contain multiple samples l is summed up over them 34 | # we use l(x,y) = log(1+exp(-y*theta^T*x)) 35 | def get_loss_l(self, theta, x, y): 36 | dot_prod = x.dot(theta) * y 37 | data_loss = -np.log(expit(dot_prod)) 38 | total_loss = np.sum(data_loss, axis=0) 39 | return total_loss 40 | 41 | # computes L(x,y;theta) 42 | def get_loss_L(self, theta, x, y): 43 | summed_loss = self.get_loss_l(theta, x, y) 44 | total_loss = summed_loss + 0.5*self.lambda_*np.dot(theta, theta.T) + np.dot(self.b, theta) 45 | return total_loss 46 | 47 | # return total loss L on train set. 48 | def get_train_set_loss(self, theta): 49 | return self.get_loss_L(theta, self.x_train, self.y_train) 50 | 51 | # get gradient w.r.t. parameters (-y*x*sigma(-y*Theta^Tx)) for y in {-1,1} 52 | def get_gradient_l(self, theta, x, y): 53 | assert x.shape[0] == y.shape[0], f'{x.shape[0]} != {y.shape[0]}' 54 | dot_prod = x.dot(theta) * y 55 | factor = -expit(-dot_prod) * y 56 | # we need to multiply every row of x by the corresponding value in factor vector 57 | if type(x) is sp.csr_matrix: 58 | factor_m = sp.diags(factor) 59 | res = factor_m.dot(x) 60 | else: 61 | res = np.expand_dims(factor, axis=1) * x 62 | grad = res.sum(axis=0) 63 | if type(grad) is np.matrix: 64 | grad = grad.A 65 | return grad 66 | 67 | def get_gradient_L(self, theta, x, y): 68 | summed_grad = self.get_gradient_l(theta, x, y) 69 | total_grad = summed_grad + self.lambda_ * theta + self.b 70 | total_grad = total_grad.squeeze() 71 | self.gradient_calls += 1 72 | return total_grad 73 | 74 | # this is the gradient of L on the train set. This should be close to zero after fitting. 75 | def get_train_set_gradient(self, theta): 76 | return self.get_gradient_L(theta, self.x_train, self.y_train) 77 | 78 | # computes inverse hessian for data x. As we only need the inverse hessian on the entire dataset we return the 79 | # Hessian on the full L loss. 80 | def get_inverse_hessian(self, x, theta=None): 81 | if theta is None: 82 | theta = self.theta 83 | dot = x.dot(theta) 84 | probs = expit(dot) 85 | weighting = probs * (1-probs) # sigma(-t) = (1-sigma(t)) 86 | if type(x) is sp.csr_matrix: 87 | weighting_m = sp.diags(weighting) 88 | p1 = x.transpose().dot(weighting_m) 89 | else: 90 | p1 = x.transpose() * np.expand_dims(weighting, axis=0) 91 | res = p1.dot(x) 92 | res += self.lambda_ * np.eye(self.dim) # hessian of regularization 93 | cov_inv = inv(res) 94 | return cov_inv 95 | 96 | def get_first_order_update(self, G, unlearning_rate, theta=None): 97 | if theta is None: 98 | theta = self.theta 99 | return theta - unlearning_rate * G 100 | 101 | def get_second_order_update(self, x, y, G, theta=None): 102 | if theta is None: 103 | theta = self.theta 104 | H_inv = self.get_inverse_hessian(x, theta) 105 | return theta - np.dot(H_inv, G) 106 | 107 | def get_order_update_stepwise(self, indices, stepsize, remove, n_replacements, order, unlearning_rate=None): 108 | if order == 1: 109 | assert unlearning_rate is not None 110 | l_indices = len(indices) 111 | theta_tmp = self.theta.copy() 112 | x_tmp = self.x_train.copy() 113 | if remove: 114 | for idx in range(0, l_indices, stepsize): 115 | indices_to_remove = indices[idx:idx+stepsize] 116 | x_delta, changed_rows = self.copy_and_replace(x_tmp, indices_to_remove, remove) 117 | z_tmp = (x_tmp[changed_rows], self.y_train[changed_rows]) 118 | z_delta_tmp = (x_delta[changed_rows], self.y_train[changed_rows]) 119 | G_tmp = self.get_G(z_tmp, z_delta_tmp, theta_tmp) 120 | if order == 1: 121 | theta_tmp = self.get_first_order_update(G_tmp, unlearning_rate, theta=theta_tmp) 122 | else: 123 | theta_tmp = self.get_second_order_update(x_delta, self.y_train, G_tmp, theta=theta_tmp) 124 | x_tmp = x_delta.copy() 125 | else: 126 | replacements_per_round = n_replacements // stepsize 127 | for i in range(stepsize): 128 | x_delta, changed_rows = self.copy_and_replace(x_tmp, indices, remove, replacements_per_round) 129 | z_tmp = (x_tmp[changed_rows], self.y_train[changed_rows]) 130 | z_delta_tmp = (x_delta[changed_rows], self.y_train[changed_rows]) 131 | G_tmp = self.get_G(z_tmp, z_delta_tmp, theta_tmp) 132 | if order == 1: 133 | theta_tmp = self.get_first_order_update(G_tmp, unlearning_rate, theta=theta_tmp) 134 | else: 135 | theta_tmp = self.get_second_order_update(x_delta, self.y_train, G_tmp, theta=theta_tmp) 136 | x_tmp = x_delta.copy() 137 | 138 | return theta_tmp, x_tmp 139 | 140 | def get_fine_tuning_update(self, x, y, learning_rate, batch_size=32): 141 | new_theta = self.theta.copy() 142 | for i in range(0, x.shape[0], batch_size): 143 | grad = self.get_gradient_L(new_theta, x[i:i+batch_size], y[i:i+batch_size]) 144 | new_theta -= 1./batch_size * learning_rate * grad 145 | return new_theta 146 | 147 | # given indices_to_delete (i.e. column indices) computes row indices where the column indices are non-zero 148 | def get_relevant_indices(self, indices_to_delete): 149 | # get the rows (samples) where the features appear 150 | relevant_indices = self.x_train[:, indices_to_delete].nonzero()[0] 151 | # to avoid having samples more than once 152 | relevant_indices = np.unique(relevant_indices) 153 | return relevant_indices 154 | 155 | def get_G(self, z, z_delta, theta=None): 156 | """ 157 | Computes G as defined in the paper using z=(x,y) and z_delta=(x_delta, y_delta) 158 | :param z: Tuple of original (unchanged) data (np.array /csr_matrix , np.array) 159 | :param z_delta: Tuple of changed data (np.array /csr_matrix , np.array) 160 | :return: G=\sum \nabla l(z_delta)-\nabla l(z) 161 | """ 162 | if theta is None: 163 | theta = self.theta 164 | grad_z_delta = self.get_gradient_l(theta, z_delta[0], z_delta[1]) 165 | grad_z = self.get_gradient_l(theta, z[0], z[1]) 166 | diff = grad_z_delta - grad_z 167 | if type(z[0]) is sp.csr_matrix: 168 | diff = diff.squeeze() 169 | return diff 170 | 171 | def predict(self, x, theta): 172 | logits = expit(x.dot(theta)) 173 | y_pred = np.array([1 if l >= 0.5 else -1 for l in logits]) 174 | return y_pred 175 | 176 | def get_performance(self, x, y, theta): 177 | assert x.shape[0] == y.shape[0], '{} != {}'.format(x.shape[0], y.shape[0]) 178 | logits = expit(x.dot(theta)) 179 | y_pred = np.array([1 if l >= 0.5 else -1 for l in logits]) 180 | accuracy = len(np.where(y_pred == y)[0])/x.shape[0] 181 | fpr, tpr, _ = roc_curve(y, logits) 182 | prec, rec, _ = precision_recall_curve(y, logits) 183 | auc_roc = auc(fpr, tpr) 184 | auc_pr = auc(rec, prec) 185 | report = classification_report(y, y_pred, digits=4, output_dict=True) 186 | n_data = x.shape[0] 187 | loss = self.get_loss_L(theta, x, y) 188 | grad = self.get_gradient_L(theta, x, y) 189 | report['test_loss'] = loss 190 | report['gradient_norm'] = np.sum(grad**2) 191 | report['train_loss'] = self.get_train_set_loss(theta) 192 | report['gradient_norm_train'] = np.sum(self.get_train_set_gradient(theta)**2) 193 | report['accuracy'] = accuracy 194 | report['test_roc_auc'] = auc_roc 195 | report['test_pr_auc'] = auc_pr 196 | return report 197 | 198 | def fit_model(self): 199 | start_time = time.time() 200 | #res = minimize(self.get_train_set_loss, self.theta, method='L-BFGS-B', jac=self.get_train_set_gradient, 201 | # options={'disp':True}) 202 | res = minimize(self.get_train_set_loss, self.theta, method='L-BFGS-B', jac=self.get_train_set_gradient, 203 | options={'maxiter': 1000}) 204 | end_time = time.time() 205 | total_time = end_time-start_time 206 | self.theta = res.x 207 | #print(f'Fitting took {total_time} seconds.') 208 | performance = self.get_performance(self.x_test, self.y_test, self.theta) 209 | acc = performance['accuracy'] 210 | gr = performance['gradient_norm_train'] 211 | #print(f'Achieved accuracy: {acc}') 212 | #print(f'Gradient residual train: {gr}') 213 | #print(json.dumps(performance, indent=4)) 214 | 215 | def get_n_largest_features(self, n): 216 | theta_abs = np.abs(self.theta) 217 | largest_features_ind = np.argsort(-theta_abs)[:n] 218 | largest_features = [self.dim2feature[d] for d in largest_features_ind] 219 | return largest_features_ind, largest_features 220 | 221 | #@staticmethod 222 | def copy_and_replace(self, x, indices, remove=False, n_replacements=0): 223 | """ 224 | Helper function that sets 'indices' in 'arr' to 'value' 225 | :param x - numpy array or csr_matrix of shape (n_samples, n_features) 226 | :param indices - the columns where the replacement should take place 227 | :param remove - if true the entire columns will be deleted (set to zero). Otherwise values will be set to random value 228 | :param n_replacements - if remove is False one can specify how many samples are adjusted. 229 | :return copy of arr with changes, changed row indices 230 | """ 231 | x_cpy = x.copy() 232 | if sp.issparse(x): 233 | x_cpy = x_cpy.tolil() 234 | if remove: 235 | relevant_indices = x_cpy[:, indices].nonzero()[0] 236 | # to avoid having samples more than once 237 | relevant_indices = np.unique(relevant_indices) 238 | x_cpy[:, indices] = 0 239 | else: 240 | relevant_indices = np.random.choice(x_cpy.shape[0], n_replacements, replace=False) 241 | unique_indices = set(np.unique(x_cpy[:, indices]).tolist()) 242 | if unique_indices == {0, 1}: 243 | voc_rev = {v:k for k,v in self.voc.items()} 244 | # if we have only binary features we need the category dict 245 | #x_cpy[np.ix_(relevant_indices, indices)] = - 1 * x_cpy[np.ix_(relevant_indices, indices)] + 1 246 | for ri in relevant_indices: 247 | for category, category_columns in self.category_to_idx_dict.items(): 248 | #print(f'Processing {category}') 249 | category_data = x_cpy[ri, category_columns] 250 | # check if the category is set 251 | ones = np.where(category_data == 1)[0] 252 | if len(ones) > 0: 253 | assert len(ones) == 1 254 | ind_to_delete = category_columns[ones[0]] 255 | #print(f'Found category {voc_rev[ind_to_delete]}') 256 | x_cpy[ri, ind_to_delete] = 0 257 | if len(category_columns) > 1: 258 | list_to_choose_from = [i for i in category_columns if i != ind_to_delete] 259 | else: 260 | list_to_choose_from = [] 261 | else: 262 | list_to_choose_from = category_columns 263 | if len(list_to_choose_from) > 0: 264 | col_to_set = np.random.choice(list_to_choose_from) 265 | #print(f'Set {voc_rev[col_to_set]} to one') 266 | x_cpy[ri, col_to_set] = 1 267 | 268 | else: 269 | # else we choose random values 270 | for idx in indices: 271 | #random_values = np.random.choice(x_cpy[:, idx], n_replacements, replace=False) 272 | #x_cpy[relevant_indices, idx] = random_values 273 | mean = np.mean(x_cpy[:, idx]) 274 | std = np.std(x_cpy[:,idx]) 275 | #x_cpy[relevant_indices, idx] = np.random.normal(0,6*std) 276 | x_cpy[relevant_indices, idx] = np.zeros(relevant_indices.shape) 277 | if sp.issparse(x): 278 | x_cpy = x_cpy.tocsr() 279 | return x_cpy, relevant_indices -------------------------------------------------------------------------------- /Unlearner/DrebinDataGenerator.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.utils import Sequence as seq 2 | import numpy as np 3 | 4 | 5 | class DrebinDataGenerator(seq): 6 | def __init__(self, data, labels, batch_size, shuffle=True, class_weights=None): 7 | np.random.seed(42) 8 | assert data.shape[0] == labels.shape[0] 9 | self.data = data 10 | self.labels = labels 11 | self.batch_size = batch_size 12 | self.shuffle = shuffle 13 | if shuffle: 14 | self.on_epoch_end() 15 | if class_weights is not None: 16 | self.class_weights = class_weights 17 | else: 18 | self.class_weights = {0:1, 1:1} 19 | 20 | def on_epoch_end(self): 21 | if self.shuffle: 22 | new_indices = np.random.choice(range(self.data.shape[0]), self.data.shape[0], replace=False) 23 | self.data = self.data[new_indices] 24 | self.labels = self.labels[new_indices] 25 | 26 | def __len__(self): 27 | return int(np.ceil(self.data.shape[0]/self.batch_size)) 28 | 29 | def __getitem__(self, idx): 30 | data_batch = self.data[idx*self.batch_size:(idx+1)*self.batch_size, :].toarray() 31 | label_batch = [self.labels[idx*self.batch_size:(idx + 1)*self.batch_size]] 32 | classes = np.argmax(self.labels[idx*self.batch_size:(idx + 1)*self.batch_size], axis=1) 33 | samples_weights = np.array([self.class_weights[0] if c==0 else self.class_weights[1] for c in classes]) 34 | return data_batch, label_batch, samples_weights 35 | -------------------------------------------------------------------------------- /Unlearner/EnsembleLR.py: -------------------------------------------------------------------------------- 1 | from .ensemble import softmax 2 | from sklearn.metrics import classification_report 3 | import numpy as np 4 | 5 | 6 | class LinearEnsemble: 7 | 8 | def __init__(self, models, n_classes): 9 | #assert len(models) == len(training_data_splits) 10 | self.models = models 11 | self.n_classes = n_classes 12 | 13 | def predict(self, x): 14 | preds = np.zeros((x.shape[0], len(self.models)), dtype=np.int64) 15 | for i, model_tuple in enumerate(self.models): 16 | model = model_tuple[0] 17 | preds[:, i] = model.predict(x, model.theta) 18 | preds[np.where(preds == -1)] = 0 19 | preds = np.apply_along_axis(np.bincount, axis=1, arr=preds, minlength=self.n_classes) 20 | preds_max = np.argmax(preds, axis=1) 21 | preds_max[np.where(preds_max == 0)[0]] = -1 22 | #probas = softmax(preds, axis=1) 23 | return preds_max 24 | 25 | def train_ensemble(self): 26 | for model_tuple in self.models: 27 | model_tuple[0].fit_model() 28 | 29 | def evaluate(self, x, y): 30 | Y_pred = self.predict(x) 31 | rep = classification_report(y, Y_pred, output_dict=True) 32 | return rep, rep['accuracy'] 33 | 34 | def update_models(self, data_indices_to_delete): 35 | for i in range(len(self.models)): 36 | model, data_indices = self.models[i][0], self.models[i][1] 37 | to_delete = [] 38 | for j, data_idx in enumerate(data_indices): 39 | if data_idx in data_indices_to_delete: 40 | to_delete.append(j) 41 | new_model_indices = [k for k in range(model.x_train.shape[0]) if k not in to_delete] 42 | self.models[i][0].x_train = self.models[i][0].x_train[new_model_indices] 43 | self.models[i][0].y_train = self.models[i][0].y_train[new_model_indices] 44 | 45 | def get_gradient_calls(self): 46 | n_gradients = 0 47 | for model_tup in self.models: 48 | n_gradients += model_tup[0].gradient_calls * model_tup[0].x_train.shape[0] 49 | return n_gradients 50 | -------------------------------------------------------------------------------- /Unlearner/LRUnlearner.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import json 3 | import pickle as pkl 4 | import os 5 | import hashlib 6 | from sklearn.linear_model import LogisticRegression 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.metrics import classification_report, roc_curve, precision_recall_curve, auc 9 | import scipy.sparse as sp 10 | from scipy.special import expit 11 | from scipy.linalg import inv 12 | from Unlearner.DNNUnlearner import DNNUnlearner 13 | 14 | 15 | class LogisticRegressionUnlearner(DNNUnlearner): 16 | def __init__(self, train_data, test_data, voc, lambda_=0.01): 17 | """ 18 | Implementation of Unlearning for sklearn logistic regression. 19 | Notice that the sklearn implementation of logistic regression minimizes the loss 20 | L = theta.dot(theta) + C * sum_{x,y}l(x,y;theta) where l(x,y;theta)=-log(1+exp(-theta.dot(x)*y)) 21 | We use C=1/(N*lambda) to have the average loss (by the 1/N) and a regularization parameter lambda 22 | :param data: Array (Matrix) holding data where each row is a sample and each column is a feature 23 | :param labels: Array (vector) holding labels where each label is either 1 or -1 24 | :param voc: dict mapping from token to int aka dimension in feature space 25 | :param lambda_: constant for l2 regularization of parameters 26 | """ 27 | self.set_train_test_data(train_data, test_data) 28 | assert data.shape[0] == labels.shape[0] 29 | self.lambda_ = lambda_ 30 | self.voc = voc 31 | self.C = 1.0 / (self.n * lambda_) 32 | self.normed = True if np.allclose(np.sum(self.x_train ** 2, axis=1), np.ones(self.n)) else False 33 | self.model = LogisticRegression(C=self.C, fit_intercept=False, solver='lbfgs', warm_start=True, max_iter=5000, tol=1e-8) 34 | self.model_param_str = '{}_lambda={}'.format('normed' if self.normed else 'unnormed', self.lambda_) 35 | 36 | def set_train_test_data(self, train_data, test_data): 37 | self.x_train = train_data[0] 38 | self.x_test = test_data[0] 39 | self.y_train = train_data[1] 40 | self.y_test = test_data[1] 41 | self.n = self.x_train.shape[0] 42 | self.dim = self.x_train.shape[1] 43 | 44 | def set_x_train(self, x_train): 45 | self.x_train = x_train.toarray() if sp.issparse(x_train) else x_train 46 | self.n = x_train.shape[0] 47 | self.dim = x_train.shape[1] 48 | 49 | def set_y_train(self, y_train): 50 | self.y_train = y_train 51 | self.n = y_train.shape[0] 52 | 53 | def set_model(self, model): 54 | self.model = model 55 | self.theta = np.squeeze(model.coef_.T) 56 | 57 | def train_model(self, model_folder, **kwargs): 58 | self.model.fit(self.x_train, self.y_train) 59 | self.theta = np.squeeze(self.model.coef_.T) 60 | model_name = 'lr_model_{}.pkl'.format(self.model_param_str) 61 | report_name = 'lr_performance_{}.json'.format(self.model_param_str) 62 | report = self.get_performance(self.x_test, self.y_test, model=self.model) 63 | print('Training results ({}):'.format(self.model_param_str)) 64 | print(json.dumps(report, indent=4)) 65 | json.dump(report, open(os.path.join(model_folder, report_name), 'w'), indent=4) 66 | pkl.dump(self.model, open(os.path.join(model_folder, model_name), 'wb')) 67 | self.set_model(self.model) 68 | 69 | # indices is a list of dimensions referring to the training set 70 | def retrain_model(self, indices_to_delete, save_folder, retrain_labels=False, **kwargs): 71 | new_model = LogisticRegression(C=self.C, fit_intercept=False, solver='lbfgs', warm_start=True, max_iter=1000) 72 | if retrain_labels: 73 | x_train_delta = self.x_train 74 | x_test_delta = self.x_test 75 | y_train_delta = self.get_data_copy_y('train', indices_to_delete=indices_to_delete) 76 | y_test_delta = self.y_test 77 | else: 78 | x_train_delta = self.get_data_copy('train', indices_to_delete=indices_to_delete) 79 | x_test_delta = self.get_data_copy('test', indices_to_delete=indices_to_delete) 80 | y_train_delta = self.y_train 81 | y_test_delta = self.y_test 82 | new_model.fit(x_train_delta, y_train_delta) 83 | report = self.get_performance(x_test_delta, y_test_delta, model=new_model) 84 | no_features_to_delete = len(indices_to_delete) 85 | combination_string = hashlib.sha256('-'.join([str(i) for i in indices_to_delete]).encode()).hexdigest() 86 | model_folder = os.path.join(save_folder, str(no_features_to_delete), combination_string) 87 | if not os.path.isdir(model_folder): 88 | os.makedirs(model_folder) 89 | report_name = 'retraining_performance_{}.json'.format(self.model_param_str) 90 | json.dump(report, open(os.path.join(model_folder, report_name), 'w'), indent=4) 91 | return new_model 92 | 93 | def get_performance(self, x, y, **kwargs): 94 | assert ('theta' in kwargs or 'model' in kwargs) 95 | assert x.shape[0] == y.shape[0], '{} != {}'.format(x.shape[0], y.shape[0]) 96 | if 'model' in kwargs: 97 | model = kwargs['model'] 98 | logits = model.predict_proba(x)[:,1] 99 | y_pred = model.predict(x) 100 | theta = np.squeeze(model.coef_.T) 101 | else: 102 | theta = kwargs['theta'] 103 | logits = expit(np.dot(x, theta)) 104 | y_pred = np.array([1 if l >= 0.5 else -1 for l in logits]) 105 | accuracy = len(np.where(y_pred == y)[0])/x.shape[0] 106 | fpr, tpr, _ = roc_curve(y, logits) 107 | prec, rec, _ = precision_recall_curve(y, logits) 108 | auc_roc = auc(fpr, tpr) 109 | auc_pr = auc(rec, prec) 110 | report = classification_report(y, y_pred, digits=4, output_dict=True) 111 | n_data = x.shape[0] 112 | loss = 1./(n_data * self.lambda_) * self.get_loss(theta, x, y) + 0.5 * np.dot(theta, theta.T) 113 | grad = 1./(n_data * self.lambda_) * self.get_gradient(theta, x, y) + theta 114 | report['test_loss'] = loss 115 | report['gradient_norm'] = np.sum(grad**2) 116 | report['train_loss'] = self.get_train_set_loss(theta) 117 | report['gradient_norm_train'] = np.sum(self.get_train_set_gradient(theta)**2) 118 | report['accuracy'] = accuracy 119 | report['test_roc_auc'] = auc_roc 120 | report['test_pr_auc'] = auc_pr 121 | return report 122 | 123 | # computes l(x,y;theta). if x and y contain multiple samples l is summed up over them 124 | def get_loss(self, theta, x, y): 125 | dot_prod = np.dot(x, theta) * y 126 | data_loss = np.log(1 + np.exp(-dot_prod)) 127 | total_loss = np.sum(data_loss, axis=0) 128 | return total_loss 129 | 130 | # return total loss L on train set. 131 | def get_train_set_loss(self, theta): 132 | summed_loss = self.get_loss(theta, self.x_train, self.y_train) 133 | total_loss = self.C * summed_loss + 0.5 * np.dot(theta, theta.T)**2 134 | return total_loss 135 | 136 | # returns loss L on test set. Notice that the C is not correct in this loss since N_train != N_test 137 | def get_test_set_loss(self, theta): 138 | n_test = self.x_test.shape[0] 139 | summed_loss = self.get_loss(theta, self.x_test, self.y_test) 140 | total_loss = 1./(self.lambda_ * n_test) * summed_loss + 0.5 * np.dot(theta, theta.T)**2 141 | return total_loss 142 | 143 | # get gradient w.r.t. parameters (-y*x*sigma(-y*Theta^Tx)) for y in {-1,1} 144 | # this gradient is only the gradient of l, not of L! 145 | def get_gradient(self, theta, x, y): 146 | assert x.shape[0] == y.shape[0] 147 | dot_prod = np.dot(x, theta) * y 148 | factor = -expit(-dot_prod) * y 149 | grad = np.sum(np.expand_dims(factor,1) * x, axis=0) 150 | return grad 151 | 152 | # this is the gradient of L on the train set. This should be close to zero after fitting. 153 | def get_train_set_gradient(self, theta): 154 | grad = self.get_gradient(theta, self.x_train, self.y_train) 155 | return self.C*grad + self.theta 156 | 157 | # get gradient w.r.t. input (-y*Theta*sigma(-y*Theta^Tx)) 158 | def get_gradient_x(self, x, y, theta): 159 | assert x.shape[0] == y.shape[0] 160 | dot_prod = np.dot(x, theta) * y 161 | factor = -expit(-dot_prod) * y 162 | return np.expand_dims(factor, 1) * theta 163 | 164 | # computes inverse hessian for data x. As we only need the inverse hessian on the entire dataset we return the 165 | # Hessian on the full L loss. 166 | def get_inverse_hessian(self, x): 167 | dot = np.dot(x, self.theta) 168 | probs = expit(dot) 169 | weighted_x = np.reshape(probs * (1 - probs), (-1, 1)) * x # sigma(-t) = (1-sigma(t)) 170 | cov = self.C * np.dot(x.T, weighted_x) 171 | cov += np.eye(self.dim) # hessian of regularization 172 | cov_inv = inv(cov) 173 | return cov_inv 174 | 175 | def get_relevant_indices(self, indices_to_delete): 176 | # get the rows (samples) where the features appear 177 | relevant_indices = np.where(self.x_train[:, indices_to_delete] != 0)[0] 178 | # to avoid having samples more than once 179 | relevant_indices = np.unique(relevant_indices) 180 | return relevant_indices 181 | 182 | # indices is a list of dimensions referring to the dimensions of training set 183 | def approx_retraining(self, indices_to_delete, retrain_y=False, **kwargs): 184 | if retrain_y: 185 | assert np.min(indices_to_delete) >= 0 and np.max(indices_to_delete) < self.n 186 | else: 187 | assert np.min(indices_to_delete) >= 0 and np.max(indices_to_delete) <= self.dim 188 | method = kwargs['method'] if 'method' in kwargs else 'influence' 189 | if method == 'lazy': 190 | theta_approx = self.theta 191 | else: 192 | # often H^-1 fits into memory and has to be computed only once 193 | if 'H_inv' in kwargs: 194 | H_inv = kwargs['H_inv'] 195 | else: 196 | if method == 'newton': 197 | x_train_deleted = self.get_data_copy('train', indices_to_delete) 198 | H_inv = self.get_inverse_hessian(x_train_deleted) 199 | else: 200 | H_inv = self.get_inverse_hessian(self.x_train) 201 | if retrain_y: 202 | z_x = self.x_train[indices_to_delete] 203 | z_y = self.y_train[indices_to_delete] 204 | z_x_delta = z_x 205 | z_y_delta = self.get_data_copy_y('train', indices_to_delete=indices_to_delete)[indices_to_delete] 206 | else: 207 | relevant_indices = self.get_relevant_indices(indices_to_delete) 208 | z_x = self.x_train[relevant_indices] 209 | z_y = self.y_train[relevant_indices] 210 | z_x_delta = self.get_data_copy('train', indices_to_delete=indices_to_delete)[relevant_indices] 211 | z_y_delta = z_y 212 | grad_x = self.get_gradient(self.theta, z_x, z_y) 213 | grad_x_delta = self.get_gradient(self.theta, z_x_delta, z_y_delta) 214 | # compute parameter update. Note that here we have to choose epsilon=C because of the loss function used 215 | # in sklearn 216 | delta_theta = -self.C * H_inv.dot(grad_x_delta - grad_x) 217 | theta_approx = self.theta + delta_theta 218 | return theta_approx -------------------------------------------------------------------------------- /Unlearner/RNNUnlearner.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import tensorflow as tf 3 | import os 4 | import numpy as np 5 | import json 6 | import sys 7 | from tensorflow.keras import Sequential 8 | from tensorflow.keras.layers import Dense, Dropout, LSTM, Embedding 9 | from tensorflow.keras.regularizers import L2 10 | from tensorflow.keras.losses import categorical_crossentropy 11 | from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger 12 | from tensorflow.keras.optimizers import Adam, SGD 13 | from Unlearner.DNNUnlearner import DNNUnlearner 14 | from Unlearner.CanaryCallback import CanaryCallback 15 | from sklearn.metrics import classification_report 16 | from scipy.stats import skewnorm 17 | 18 | 19 | class RNNUNlearner(DNNUnlearner): 20 | def __init__(self, x_train, y_train, embedding_dim, idx2char, lambda_=0.01, weight_path=None, canary_start=None, 21 | canary_number=None, canary_repetitions=None, n_layers=1, n_units=256, p_dropout=0.0): 22 | tf.random.set_seed(42) 23 | # training data 24 | self.x_train = x_train # all x data is of shape (n_samples, max_len) and stored as unique indices 25 | self.y_train = y_train 26 | # test data makes no real sense in this setting so we use training data 27 | self.x_test = x_train.copy() 28 | self.y_test = y_train.copy() 29 | self.idx2char = idx2char 30 | self.char2idx = {v:k for k,v in self.idx2char.items()} 31 | self.n = self.x_train.shape[0] 32 | # model params 33 | self.max_len = self.x_train.shape[1] 34 | self.dim = len(idx2char) # here dim refers to the number of words in the vocabulary 35 | self.embedding_dim = embedding_dim 36 | self.lambda_ = lambda_ 37 | self.n_units = n_units 38 | self.n_layers = n_layers 39 | self.model = self.get_network(weight_path=weight_path, no_lstm_units=n_units, n_layers=n_layers, p_dropout=p_dropout) 40 | # canary stuff 41 | self.canary_start = canary_start 42 | self.canary_number = canary_number 43 | self.canary_repetitions = canary_repetitions 44 | self.param_string = 'lambda={}-canary_number={}-canary_reps={}-embedding_dim={}-seqlen={}-dropout={}'.format( 45 | lambda_, canary_number, canary_repetitions, embedding_dim, x_train.shape[1], p_dropout) 46 | 47 | def get_network(self, weight_path=None, optimizer='Adam', no_lstm_units=512, n_layers=2, p_dropout=0.0, 48 | learning_rate=0.0001): 49 | # define the LSTM model 50 | model = Sequential() 51 | model.add(Embedding(input_dim=self.dim, output_dim=self.embedding_dim)) 52 | if n_layers > 1: 53 | model.add(LSTM(no_lstm_units, kernel_regularizer=L2(self.lambda_), recurrent_regularizer=L2(self.lambda_), 54 | bias_regularizer=L2(self.lambda_), return_sequences=True)) 55 | else: 56 | model.add(LSTM(no_lstm_units, kernel_regularizer=L2(self.lambda_), recurrent_regularizer=L2(self.lambda_), 57 | bias_regularizer=L2(self.lambda_))) 58 | for _ in range(n_layers - 1): 59 | model.add(LSTM(no_lstm_units, kernel_regularizer=L2(self.lambda_), recurrent_regularizer=L2(self.lambda_), 60 | bias_regularizer=L2(self.lambda_))) 61 | if p_dropout > 0: 62 | model.add(Dropout(p_dropout)) 63 | model.add(Dense(self.dim, activation='softmax', kernel_regularizer=L2(self.lambda_), bias_regularizer=L2(self.lambda_))) 64 | if weight_path is not None: 65 | # load the network weights 66 | if weight_path.endswith('ckpt'): 67 | model.load_weights(weight_path).expect_partial() 68 | elif weight_path.endswith('hdf5'): 69 | model.load_weights(weight_path) 70 | else: 71 | print('Invalid file format') 72 | metrics = [tf.keras.metrics.CategoricalAccuracy()] 73 | if optimizer == 'Adam': 74 | model.compile(loss=categorical_crossentropy, optimizer=Adam(learning_rate=learning_rate), metrics=metrics) 75 | else: 76 | model.compile(loss=categorical_crossentropy, optimizer=SGD(learning_rate=learning_rate), metrics=metrics) 77 | return model 78 | 79 | # training of the model on the full train dataset. Overwrites parent method since no test dataset exists 80 | def train_model(self, model_folder, **kwargs): 81 | batch_size = 64 if 'batch_size' not in kwargs else kwargs['batch_size'] 82 | epochs = 150 if 'epochs' not in kwargs else kwargs['epochs'] 83 | if not os.path.isdir(model_folder): 84 | os.makedirs(model_folder) 85 | print(self.model.summary()) 86 | best_model, test_loss = self.train_retrain(self.model, (self.x_train, self.y_train), (self.x_train, self.y_train), 87 | model_folder, batch_size=batch_size, epochs=epochs) 88 | self.model.set_weights(best_model.get_weights()) 89 | return test_loss 90 | 91 | # train/retrain routine. Add the canary callback for this specific case and no evaluation on test set 92 | def train_retrain(self, model, train, test, model_folder, epochs=150, batch_size=64): 93 | checkpoint_name = 'checkpoint_{}.ckpt'.format(self.param_string) 94 | model_save_path = os.path.join(model_folder, checkpoint_name) 95 | csv_save_path = os.path.join(model_folder, 'train_log.csv') 96 | json_report_path = os.path.join(model_folder, 'test_performance.json') 97 | min_metric = 'loss' 98 | checkpoint = ModelCheckpoint(model_save_path, monitor=min_metric, save_best_only=True, save_weights_only=True, 99 | mode='min') 100 | csv_logger = CSVLogger(csv_save_path) 101 | callbacks_list = [checkpoint, csv_logger] 102 | if self.canary_start is not None and self.canary_repetitions > 0: 103 | canary_callback = CanaryCallback(self.canary_start, self.idx2char, self.canary_number) 104 | callbacks_list.append(canary_callback) 105 | hist = model.fit(train[0], train[1], epochs=epochs, verbose=1, callbacks=callbacks_list, batch_size=batch_size).history 106 | best_loss_epoch = np.argmin(hist[min_metric]) + 1 if min_metric in hist else 0# history list starts with 0 107 | best_train_loss = np.min(hist[min_metric]) if min_metric in hist else np.inf 108 | best_model = self.get_network(no_lstm_units=self.n_units, n_layers=self.n_layers) 109 | best_model.load_weights(model_save_path).expect_partial() 110 | best_test_loss = best_model.evaluate(train[0], train[1], batch_size=1000) 111 | print('Best model has test loss {} after {} epochs'.format(best_train_loss, best_loss_epoch)) 112 | y_test_hat = np.argmax(best_model.predict(test[0]), axis=1) 113 | report = classification_report(np.argmax(self.y_test, axis=1), y_test_hat, digits=4, output_dict=True) 114 | report['train_loss'] = best_train_loss 115 | report['test_loss'] = best_test_loss 116 | report['epochs_for_min'] = int(best_loss_epoch) # json does not like numpy ints 117 | json.dump(report, open(json_report_path, 'w'), indent=4) 118 | return best_model, best_test_loss 119 | 120 | # performs of SGD on (x_train, y_train) 121 | def fine_tune(self, x_train, y_train, learning_rate, batch_size=256, epochs=1): 122 | tmp_model = self.get_network(optimizer='SGD', no_lstm_units=self.n_units, n_layers=self.n_layers, 123 | learning_rate=learning_rate) 124 | tmp_model.set_weights(self.model.get_weights()) 125 | tmp_model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs) 126 | weights = tmp_model.get_weights() 127 | return weights 128 | 129 | def get_relevant_indices(self, indices_to_delete): 130 | relevant_indices = [] 131 | # its much easier to find occurrences in text 132 | train_text = [''.join([self.idx2char[i] for i in row.flatten()]) for row in self.x_train] 133 | for idx, sent in enumerate(train_text): 134 | for word_to_delete in indices_to_delete: 135 | if word_to_delete in sent: 136 | relevant_indices.append(idx) 137 | break 138 | return relevant_indices 139 | 140 | def get_data_copy(self, data_name, indices_to_delete, **kwargs): 141 | replacement_char = 'x' 142 | # its much easier to find occurrences in text 143 | train_text = [''.join([self.idx2char[i] for i in row.flatten()]) for row in self.x_train] 144 | total_occurences = 0 145 | for idx, sent in enumerate(train_text): 146 | for word_to_delete in indices_to_delete: 147 | if word_to_delete in sent: 148 | word_len = len(word_to_delete) 149 | total_occurences += 1 150 | train_text[idx] = train_text[idx].replace(word_to_delete, replacement_char*word_len) 151 | print('The given word(s) occurred {} times ({} % of all samples)'.format(total_occurences, 100*total_occurences/self.n)) 152 | # now back to indices 153 | data_cpy = np.array([[[self.char2idx[idx]] for idx in s] for s in train_text]) 154 | assert data_cpy.shape == self.x_train.shape 155 | return data_cpy 156 | 157 | # generates the most likely string given a start_str 158 | def generate_data(self, start_str=None, weights=None): 159 | if start_str is None: 160 | # pick a random seed 161 | start = np.random.randint(0, self.n) 162 | pattern = self.x_train[start].squeeze() 163 | else: 164 | pattern = np.array([self.char2idx[c] for c in start_str]) 165 | if weights is not None: 166 | model = self.get_network(no_lstm_units=self.n_units, n_layers=self.n_layers) 167 | model.set_weights(weights) 168 | else: 169 | model = self.model 170 | print("Seed:") 171 | print("\"", ''.join([self.idx2char[value] for value in pattern]), "\"") 172 | # generate characters 173 | print('Prediction:\n') 174 | for i in range(50): 175 | x = np.reshape(pattern, (1, len(pattern), 1)) 176 | prediction = model.predict(x, verbose=0) 177 | index = np.argmax(prediction) 178 | result = self.idx2char[index] 179 | sys.stdout.write(result) 180 | pattern = np.append(pattern, index) 181 | pattern = pattern[1:len(pattern)] 182 | print("\nDone.") 183 | 184 | # special purpose method. given start string samples strings that do not incorporate the most likely outcome 185 | def generate_replacement_string(self, top_k=5, chars_to_create=50): 186 | pattern = np.array([self.char2idx[s] for s in self.canary_start]) 187 | print("Seed:") 188 | print("\"", ''.join([self.idx2char[value] for value in pattern]), "\"") 189 | # generate characters 190 | print('Prediction:\n') 191 | for i in range(chars_to_create): 192 | x = np.reshape(pattern, (1, len(pattern), 1)) 193 | prediction = self.model.predict(x, verbose=0).squeeze() 194 | indices = np.argsort(-prediction) 195 | top_indices = indices[1:1+top_k] if i == 0 else indices[:top_k] 196 | index = np.random.choice(top_indices) 197 | result = self.idx2char[index] 198 | sys.stdout.write(result) 199 | pattern = np.append(pattern, index) 200 | pattern = pattern[1:len(pattern)] 201 | print("\nDone.") 202 | 203 | def test_canary(self, reference_char, weights=None, chars_to_predict=40, train_reduction=None): 204 | if weights is not None: 205 | model = self.get_network(no_lstm_units=self.n_units, n_layers=self.n_layers) 206 | model.set_weights(weights) 207 | else: 208 | model = self.model 209 | train_selection = slice(0, train_reduction) # on CPU it takes very long to classify entire dataset 210 | train_loss = model.evaluate(self.x_train[train_selection], self.y_train[train_selection], batch_size=1000, 211 | verbose=0) 212 | train_predictions = np.argmax(model.predict(self.x_train[train_selection], batch_size=1000, verbose=0), axis=1) 213 | train_labels = np.argmax(self.y_train[train_selection], axis=1) 214 | train_acc = len(np.where(train_labels == train_predictions)[0]) / self.y_train[train_selection].shape[0] 215 | n_digits = len(self.canary_number) 216 | # code copied from CanaryCallback. Seems like there is no way to call it by hand (and get return value) 217 | ref_char = reference_char if len(reference_char) == 1 else reference_char[0] 218 | number_char_indices = [self.char2idx[i] for i in [c for c in self.canary_number]] 219 | ref_char_index = self.char2idx[ref_char] 220 | start_seq = np.array([self.char2idx[s] for s in self.canary_start]) 221 | start_seq = start_seq.reshape((1, len(start_seq), 1)) 222 | digit_distribution = np.zeros((n_digits, 2)) 223 | argmax_chars = '' 224 | # generate characters 225 | for i in range(chars_to_predict): 226 | index_distribution = model.predict(start_seq, verbose=0) 227 | char_index = np.argmax(index_distribution) 228 | if i < digit_distribution.shape[0]: 229 | # monitor probability of canary char and repcement char 230 | digit_distribution[i, 0] = index_distribution[0, number_char_indices[i]] 231 | digit_distribution[i, 1] = index_distribution[0, ref_char_index] 232 | start_seq = np.append(start_seq, char_index.reshape(1, 1, 1), axis=1) 233 | start_seq = start_seq[:, 1:start_seq.shape[1] + 1, :] 234 | argmax_chars += self.idx2char[char_index] 235 | print('Seed: {}'.format(self.canary_start)) 236 | print('Prediction: {}'.format(argmax_chars)) 237 | print('Train loss: {}'.format(train_loss)) 238 | print('Train acc: {}'.format(train_acc)) 239 | print('Digit probas: {}'.format(digit_distribution[:, 0])) 240 | print('Replacement_char proba: {}'.format(digit_distribution[:, 1])) 241 | print('Canary perplexities at all points:') 242 | for j in range(1,n_digits+1): 243 | print('{}: {}'.format(j, -np.sum(np.log2(digit_distribution[:j, 0])))) 244 | canary_perplexity = -np.sum(np.log2(digit_distribution[:, 0])) 245 | return canary_perplexity, train_loss, train_acc, argmax_chars 246 | 247 | 248 | def calc_sequence_perplexity(self, sequence, start_sequence=None): 249 | # code copied from CanaryCallback. Seems like there is no way to call it by hand (and get return value) 250 | number_char_indices = [self.char2idx[i] for i in sequence] 251 | start_seq = np.array([self.char2idx[s] for s in (self.canary_start if start_sequence is None else start_sequence)]) 252 | start_seq = start_seq.reshape((1, len(start_seq), 1)) 253 | digit_distribution = np.zeros(len(sequence)) 254 | argmax_chars = '' 255 | # generate characters 256 | for i in range(len(sequence)): 257 | index_distribution = self.model.predict(start_seq, verbose=0) 258 | char_index = np.argmax(index_distribution) 259 | digit_distribution[i] = index_distribution[0, number_char_indices[i]] 260 | start_seq = np.append(start_seq, char_index.reshape(1, 1, 1), axis=1) 261 | start_seq = start_seq[:, 1:start_seq.shape[1] + 1, :] 262 | argmax_chars += self.idx2char[char_index] 263 | print('Seed: {}'.format(self.canary_start)) 264 | print('Prediction: {}'.format(argmax_chars)) 265 | print('Digit probas: {}'.format(digit_distribution)) 266 | print('Canary perplexities at all points:') 267 | for j in range(1,len(sequence)+1): 268 | print('{}: {}'.format(j, -np.sum(np.log2(digit_distribution[:j])))) 269 | sequence_perplexity = -np.sum(np.log2(digit_distribution)) 270 | return sequence_perplexity 271 | 272 | 273 | def calc_perplexity_distribution(self, weights=None, no_samples=1000000, plot=False, only_digits=False): 274 | if weights is not None: 275 | model = self.get_network(no_lstm_units=self.n_units, n_layers=self.n_layers) 276 | model.set_weights(weights) 277 | else: 278 | model = self.model 279 | if only_digits: 280 | numbers = np.unique([d for d in self.canary_number]) 281 | char_indices = [self.char2idx[n] for n in numbers] 282 | else: 283 | char_indices = list(self.idx2char.keys()) 284 | len_canary = len(self.canary_number) 285 | start_seq = np.array([self.char2idx[s] for s in self.canary_start], dtype=np.int8) 286 | start_seq = start_seq.reshape((1, len(start_seq))) 287 | start_seq = np.repeat(start_seq, no_samples, axis=0) 288 | random_sequences = np.random.choice(char_indices, size=(no_samples, len_canary), replace=True) 289 | perplexities = np.zeros(no_samples) 290 | for i in range(random_sequences.shape[1]): 291 | # add a random char to the sequences to predict them 292 | start_seq = np.insert(start_seq, start_seq.shape[1], random_sequences[:, i], axis=1) 293 | # extract proba of inserted chars 294 | start_seq_pred = model.predict(start_seq, batch_size=1000, verbose=1)[range(no_samples), random_sequences[:, i]] 295 | perplexities -= np.log2(start_seq_pred) 296 | perplexities = np.array(perplexities) 297 | if plot: 298 | _, bins, _ = plt.hist(perplexities, bins=1000, density=True) 299 | ae, loc, scale = skewnorm.fit(perplexities) 300 | print('Skewnorm-fit parameters: {0:.3f} - {1:.3f} - {2:.3f}'.format(ae, loc, scale)) 301 | sn = skewnorm(ae, loc, scale) 302 | x = np.linspace(0, max(bins), 500) 303 | plt.plot(x, sn.pdf(x), linewidth=5.0, label="Skewnorm-fit") 304 | plt.xlabel('Log Perplexity') 305 | plt.ylabel('Relative Frequency') 306 | plt.legend() 307 | plt.show() 308 | return perplexities 309 | 310 | def approx_exposure(self, perplexity_values, perplexities, only_digits=False): 311 | if type(perplexity_values) is not list: 312 | perplexity_values = [perplexity_values] 313 | if only_digits: 314 | numbers = np.unique([d for d in self.canary_number]) 315 | char_indices = [self.char2idx[n] for n in numbers] 316 | else: 317 | char_indices = list(self.idx2char.keys()) 318 | n_combinations = len(char_indices) ** len(self.canary_number) # total number of combinations possible 319 | R = np.log2(float(n_combinations)) # if n_combinations is large np.log2 needs needs float 320 | ae, loc, scale = skewnorm.fit(perplexities) 321 | sn = skewnorm(ae, loc, scale) 322 | quantiles = [sn.cdf(pv) for pv in perplexity_values] 323 | exposures = [-np.log2(q) for q in quantiles] 324 | for i in range(len(perplexity_values)): 325 | print('Results for {}'.format(perplexity_values[i])) 326 | print('{}% of all sequences are more likely than the given one.'.format(quantiles[i] * 100)) 327 | print('Log(|R|) is {}'.format(R)) 328 | print('The exposure of the sequence is {}'.format(exposures[i])) 329 | return exposures 330 | 331 | # method that prints index arrays as words 332 | # input array should be of shape (batch_size, seq_length, 1) 333 | def indices_to_words(self, idx_array): 334 | if len(idx_array.shape) == 1: 335 | idx_array = [idx_array] 336 | for idx, arr in enumerate(idx_array): 337 | print(f'idx {idx}') 338 | s_arr = ''.join([self.idx2char[i] for i in arr.flatten()]) 339 | print(s_arr) -------------------------------------------------------------------------------- /Unlearner/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/Unlearner/__init__.py -------------------------------------------------------------------------------- /Unlearner/ensemble.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import pickle 4 | import json 5 | from time import perf_counter 6 | from contextlib import contextmanager 7 | 8 | 9 | import numpy as np 10 | from sklearn.metrics import classification_report 11 | from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger 12 | 13 | 14 | class Ensemble(object): 15 | def __init__(self, model_folder, models, n_classes=10): 16 | self.model_folder = model_folder 17 | self.models = models 18 | self.n_classes = n_classes 19 | 20 | def predict(self, X): 21 | return aggregate_predictions(X, self, self.n_classes) 22 | 23 | def evaluate(self, X, Y_true, verbose=False): 24 | Y_pred = self.predict(X) 25 | rep = classification_report(np.argmax(Y_true, axis=1), np.argmax(Y_pred, axis=1), output_dict=True) 26 | return rep, rep['accuracy'] 27 | 28 | def get_indices(self): 29 | indices = [] 30 | for shard in sorted(self.models.keys()): 31 | indices.append(self.models[shard]['idx']) 32 | return indices 33 | 34 | def get_affected(self, idx): 35 | idx = set(idx) 36 | indices = self.get_indices() 37 | affected = [] 38 | for shard, index in enumerate(indices): 39 | if len(idx & set(index)) > 0: 40 | affected.append(shard) 41 | return affected 42 | 43 | 44 | def softmax(x, axis=0): 45 | if axis == 0: 46 | y = np.exp(x - np.max(x)) 47 | return y / np.sum(np.exp(x)) 48 | elif axis == 1: 49 | x_max = np.max(x, axis=1, keepdims=True) 50 | e_x = np.exp(x - x_max) 51 | x_sum = np.sum(e_x, axis=1, keepdims=True) 52 | return e_x / x_sum 53 | else: 54 | raise NotImplementedError(f"softmax for axis={axis} not implemented!") 55 | 56 | 57 | def aggregate_predictions(X, ensemble, n_classes=10): 58 | preds = np.zeros((len(X), len(ensemble.models)), dtype=np.int64) 59 | for i, model_dict in ensemble.models.items(): 60 | model = model_dict['model'] 61 | preds[:, i] = np.argmax(model.predict(X), axis=1) 62 | # count how often each label is predicted 63 | preds = np.apply_along_axis(np.bincount, axis=1, arr=preds, minlength=n_classes) 64 | return softmax(preds, axis=1) 65 | 66 | 67 | def load_ensemble(model_dir, model_init, suffix='best_model.hdf5'): 68 | models = {} 69 | for root, _, files in os.walk(model_dir): 70 | for filename in files: 71 | filename = os.path.join(root, filename) 72 | if re.match(f'{model_dir}/shard-\d+/{suffix}', filename): 73 | shard = int(root.split('/')[-1].split('-')[-1]) 74 | model = model_init() 75 | model.load_weights(filename) 76 | models[shard] = { 77 | 'model': model, 78 | 'shard': shard 79 | } 80 | # load index information 81 | with open(os.path.join(model_dir, 'splits.pkl'), 'rb') as pkl: 82 | splits = pickle.load(pkl) 83 | for i, idx in enumerate(splits): 84 | models[i]['idx'] = idx 85 | 86 | return Ensemble(model_dir, models) 87 | 88 | 89 | def split_shards(train_data, splits): 90 | """ Split dataset into shards. """ 91 | x_train, y_train = train_data 92 | return [(idx, x_train[idx], y_train[idx]) for idx in splits] 93 | 94 | 95 | def get_splits(n, n_shards=20, strategy='uniform', split_file=None): 96 | """ Generate splits for sharding, returning an iterator over indices. """ 97 | if split_file is not None and os.path.exists(split_file): 98 | with open(split_file, 'rb') as pkl: 99 | splits = pickle.load(pkl) 100 | else: 101 | strategies = { 102 | 'uniform': _uniform_strat 103 | } 104 | if strategy not in strategies: 105 | raise NotImplementedError(f'Strategy {strategy} not implemented! ' 106 | f'Available options: {sorted(strategies)}') 107 | splits = strategies[strategy](n, n_shards) 108 | if split_file is not None: 109 | with open(split_file, 'wb') as pkl: 110 | pickle.dump(list(splits), pkl) 111 | return splits 112 | 113 | 114 | def _uniform_strat(n_data, n_shards, **kwargs): 115 | split_assignment = np.random.choice(list(range(n_shards)), n_data, replace=True) 116 | split_idx = [] 117 | for shard in list(range(n_shards)): 118 | split_idx.append(np.argwhere(split_assignment == shard)[:, 0]) 119 | return split_idx 120 | 121 | 122 | def train_models(model_init, model_folder, data, n_shards, strategy, model_filename='repaired_model.hdf5', **train_kwargs): 123 | """ Train models on given number of shards. """ 124 | (x_train, y_train), _, _ = data 125 | split_file = os.path.join(model_folder, 'splits.pkl') 126 | splits = get_splits(len(data[0][0]), n_shards, strategy, split_file=split_file) 127 | result = TrainingResult(model_folder) 128 | with measure_time() as t: 129 | for i, idx in enumerate(splits): 130 | shard_data = ((x_train[idx], y_train[idx]), data[1], data[2]) 131 | retrain_shard(model_init, model_folder, shard_data, i, model_filename=model_filename, **train_kwargs) 132 | training_time = t() 133 | report = eval_shards(model_init, model_folder, data, model_filename=model_filename) 134 | report['time'] = training_time 135 | result.update(report) 136 | result.save() 137 | 138 | 139 | def retrain_shard(model_init, model_folder, data, shard_id, model_filename='repaired_model.hdf5', **train_kwargs): 140 | """ Retrain specific shard with new data. """ 141 | model_folder = f"{model_folder}/shard-{shard_id}" 142 | weights_path = train(model_init, model_folder, data, model_filename=model_filename, **train_kwargs) 143 | return weights_path 144 | 145 | 146 | def eval_shards(model_init, model_folder, data, model_filename='poisoned_model.hdf5'): 147 | ensemble = load_ensemble(model_folder, model_init, suffix=model_filename) 148 | x_val, y_val = data[2] 149 | ensemble.n_classes = y_val.shape[1] 150 | report, _ = ensemble.evaluate(x_val, y_val) 151 | return report 152 | 153 | 154 | def train(model_init, model_folder, data, epochs, batch_size, model_filename='best_model.hdf5', **kwargs): 155 | """ 156 | model_init: function, initializes the model -> Unlearner.get_network() 157 | model_folder: root directory of model 158 | data: tuple: ((x_train, y_train), (x_test, y_test), (x_val, y_val)) 159 | epochs: num of epochs 160 | batch_size: training batch size 161 | model_filename: checkpoint file name of best model (saved in model_folder) 162 | kwargs: additional parameters (ignored) 163 | """ 164 | os.makedirs(model_folder, exist_ok=True) 165 | model_save_path = os.path.join(model_folder, model_filename) 166 | if os.path.exists(model_save_path): 167 | return model_save_path 168 | csv_save_path = os.path.join(model_folder, 'train_log.csv') 169 | result = TrainingResult(model_folder) 170 | 171 | (x_train, y_train), (x_test, y_test), _ = data 172 | model = model_init() 173 | 174 | metric_for_min = 'loss' 175 | loss_ckpt = ModelCheckpoint(model_save_path, monitor=metric_for_min, save_best_only=True, 176 | save_weights_only=True) 177 | csv_logger = CSVLogger(csv_save_path) 178 | callbacks = [loss_ckpt, csv_logger] 179 | 180 | with measure_time() as t: 181 | hist = model.fit(x_train, y_train, batch_size=batch_size, 182 | epochs=epochs, validation_data=None, verbose=1, 183 | callbacks=callbacks).history 184 | training_time = t() 185 | best_loss = np.min(hist[metric_for_min]) if metric_for_min in hist else np.inf 186 | best_loss_epoch = np.argmin(hist[metric_for_min]) + 1 if metric_for_min in hist else 0 187 | print('Best model has test loss {} after {} epochs'.format(best_loss, best_loss_epoch)) 188 | best_model = model_init() 189 | best_model.load_weights(model_save_path) 190 | 191 | # calculate test metrics on final model 192 | y_test_hat = np.argmax(best_model.predict(x_test), axis=1) 193 | test_loss = best_model.evaluate(x_test, y_test, batch_size=1000, verbose=0)[0] 194 | report = classification_report(np.argmax(y_test, axis=1), y_test_hat, digits=4, output_dict=True) 195 | report['train_loss'] = best_loss 196 | report['test_loss'] = test_loss 197 | report['epochs_for_min'] = int(best_loss_epoch) # json does not like numpy ints 198 | report['time'] = training_time 199 | result.update(report) 200 | result.save() 201 | return model_save_path 202 | 203 | 204 | class Result(object): 205 | """ Python dict with save/load functionality. """ 206 | 207 | def __init__(self, base_path, name_tmpl, **suffix_kwargs): 208 | filename = name_tmpl 209 | if len(suffix_kwargs) > 0: 210 | # assemble name to `base_name{-k0_v0-k1_v1}.json` 211 | suffix = '-'.join([f'{k}_{suffix_kwargs[k]}' for k in sorted(suffix_kwargs)]) 212 | if len(suffix) > 0: 213 | suffix = f'-{suffix}' 214 | filename = name_tmpl.format(suffix) 215 | else: 216 | filename = name_tmpl.format('') 217 | self.filepath = os.path.join(base_path, filename) 218 | 219 | def save(self): 220 | """ Save object attributes except those used for opening the file etc. """ 221 | with open(self.filepath, 'w') as f: 222 | json.dump(self.as_dict(), f, indent=4) 223 | return self 224 | 225 | def load(self): 226 | """ Load object attributes from given file path. """ 227 | with open(self.filepath, 'r') as f: 228 | self.update(json.load(f)) 229 | return self 230 | 231 | def as_dict(self): 232 | exclude_keys = ['filepath', 'exists'] 233 | return {k: v for k, v in self.__dict__.items() if k not in exclude_keys} 234 | 235 | def update(self, update_dict): 236 | self.__dict__.update(update_dict) 237 | return self 238 | 239 | @property 240 | def exists(self): 241 | return os.path.exists(self.filepath) 242 | 243 | 244 | class TrainingResult(Result): 245 | def __init__(self, model_folder, name_tmpl='train_results{}.json', **suffix_kwargs): 246 | super().__init__(model_folder, name_tmpl, **suffix_kwargs) 247 | 248 | 249 | @contextmanager 250 | def measure_time(): 251 | start = perf_counter() 252 | yield lambda: perf_counter() - start 253 | -------------------------------------------------------------------------------- /conf.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | BASE_DIR = Path(__file__).parent.resolve() 4 | -------------------------------------------------------------------------------- /example_notebooks/Cifar_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import sys\n", 10 | "sys.path.append('../')" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "import numpy as np\n", 20 | "import tensorflow_datasets as tfds\n", 21 | "\n", 22 | "\n", 23 | "data_dir = '../.data'\n", 24 | "cifar = tfds.load('cifar10', data_dir=data_dir)" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "x_train, y_train = list(zip(*((sample['image'], sample['label']) for sample in cifar['train'])))\n", 34 | "x_train = np.stack(x_train)\n", 35 | "y_train = np.stack(y_train)\n", 36 | "\n", 37 | "\n", 38 | "x_test, y_test = list(zip(*((sample['image'], sample['label']) for sample in cifar['test'])))\n", 39 | "x_test = np.stack(x_test)\n", 40 | "y_test = np.stack(y_test)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "from tensorflow.keras.utils import to_categorical\n", 50 | "from sklearn.model_selection import train_test_split\n", 51 | "\n", 52 | "x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5, random_state=42)\n", 53 | "\n", 54 | "\n", 55 | "n_classes = 10\n", 56 | "y_train = to_categorical(y_train, num_classes=n_classes)\n", 57 | "y_test = to_categorical(y_test, num_classes=n_classes)\n", 58 | "y_val = to_categorical(y_val, num_classes=n_classes)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "from conf import BASE_DIR\n", 68 | "\n", 69 | "data_dir = BASE_DIR/'train_test_data'/'Cifar-test'\n", 70 | "data_dir.mkdir(parents=True, exist_ok=True)\n", 71 | "\n", 72 | "for arr, filename in [\n", 73 | " (x_train, 'x_train.npy'),\n", 74 | " (y_train, 'y_train.npy'),\n", 75 | " (x_test, 'x_test.npy'),\n", 76 | " (y_test, 'y_test.npy'),\n", 77 | " (x_val, 'x_valid.npy'),\n", 78 | " (y_val, 'y_valid.npy')]:\n", 79 | " np.save(data_dir/filename, arr)" 80 | ] 81 | } 82 | ], 83 | "metadata": { 84 | "language_info": { 85 | "name": "python" 86 | }, 87 | "orig_nbformat": 4 88 | }, 89 | "nbformat": 4, 90 | "nbformat_minor": 2 91 | } 92 | -------------------------------------------------------------------------------- /models/CNN/poisoned_model.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/models/CNN/poisoned_model.hdf5 -------------------------------------------------------------------------------- /models/LSTM/checkpoint_lambda=0.0001-canary_number=0123456789-canary_reps=29-embedding_dim=64-seqlen=24-dropout=0.0.ckpt.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/models/LSTM/checkpoint_lambda=0.0001-canary_number=0123456789-canary_reps=29-embedding_dim=64-seqlen=24-dropout=0.0.ckpt.data-00000-of-00001 -------------------------------------------------------------------------------- /models/LSTM/checkpoint_lambda=0.0001-canary_number=0123456789-canary_reps=29-embedding_dim=64-seqlen=24-dropout=0.0.ckpt.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/models/LSTM/checkpoint_lambda=0.0001-canary_number=0123456789-canary_reps=29-embedding_dim=64-seqlen=24-dropout=0.0.ckpt.index -------------------------------------------------------------------------------- /models/LSTM/checkpoint_lambda=0.0001-canary_number=0123456789-canary_reps=6-embedding_dim=64-seqlen=24-dropout=0.0.ckpt.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/models/LSTM/checkpoint_lambda=0.0001-canary_number=0123456789-canary_reps=6-embedding_dim=64-seqlen=24-dropout=0.0.ckpt.data-00000-of-00001 -------------------------------------------------------------------------------- /models/LSTM/checkpoint_lambda=0.0001-canary_number=0123456789-canary_reps=6-embedding_dim=64-seqlen=24-dropout=0.0.ckpt.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/models/LSTM/checkpoint_lambda=0.0001-canary_number=0123456789-canary_reps=6-embedding_dim=64-seqlen=24-dropout=0.0.ckpt.index -------------------------------------------------------------------------------- /models/poisoning/budget-10000/seed-42/fine-tuning-1/unlearn_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "epochs": 1, 3 | "_filename": "/home/lukpirch/workspace/MachineUnlearning/models/poisoning/budget-10000/seed-42/fine-tuning-1/unlearn_config.json" 4 | } -------------------------------------------------------------------------------- /models/poisoning/budget-10000/seed-42/first-order/repaired_model.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/models/poisoning/budget-10000/seed-42/first-order/repaired_model.hdf5 -------------------------------------------------------------------------------- /models/poisoning/budget-10000/seed-42/first-order/unlearn_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "steps": 10, 3 | "tau": 2e-05, 4 | "_filename": "/home/lukpirch/workspace/MachineUnlearning/models/poisoning/budget-10000/seed-42/first-order/unlearn_config.json" 5 | } -------------------------------------------------------------------------------- /models/poisoning/budget-10000/seed-42/first-order/unlearning_results.json: -------------------------------------------------------------------------------- 1 | { 2 | "acc_clean": 0.8786, 3 | "acc_before_fix": 0.7382000088691711, 4 | "acc_after_fix": 0.7685999870300293, 5 | "acc_perc_restored": 0.21652407465275797, 6 | "diverged": false, 7 | "n_gradients": 512, 8 | "unlearning_duration_s": 37.50269942358136, 9 | "num_params": 1798282 10 | } -------------------------------------------------------------------------------- /models/poisoning/budget-10000/seed-42/injector.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/models/poisoning/budget-10000/seed-42/injector.pkl -------------------------------------------------------------------------------- /models/poisoning/budget-10000/seed-42/poison_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "budget": 10000, 3 | "seed": 42, 4 | "_filename": "/home/lukpirch/workspace/MachineUnlearning/models/poisoning/budget-10000/seed-42/poison_config.json" 5 | } -------------------------------------------------------------------------------- /models/poisoning/budget-10000/seed-42/poisoned_model.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/models/poisoning/budget-10000/seed-42/poisoned_model.hdf5 -------------------------------------------------------------------------------- /models/poisoning/budget-10000/seed-42/second-order/repaired_model.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/models/poisoning/budget-10000/seed-42/second-order/repaired_model.hdf5 -------------------------------------------------------------------------------- /models/poisoning/budget-10000/seed-42/second-order/unlearn_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "damping": 0.0001, 3 | "hvp_batch_size": 1024, 4 | "patience": 20, 5 | "repititions": 1, 6 | "scale": 200000.0, 7 | "steps": 10, 8 | "_filename": "/home/lukpirch/workspace/MachineUnlearning/models/poisoning/budget-10000/seed-42/second-order/unlearn_config.json" 9 | } -------------------------------------------------------------------------------- /models/poisoning/budget-10000/seed-42/second-order/unlearning_results.json: -------------------------------------------------------------------------------- 1 | { 2 | "acc_clean": 0.8786, 3 | "acc_before_fix": 0.7382000088691711, 4 | "acc_after_fix": 0.7829999923706055, 5 | "acc_perc_restored": 0.3190882217342048, 6 | "diverged": false, 7 | "n_gradients": 3420, 8 | "unlearning_duration_s": 85.22455808892846, 9 | "num_params": 1798282 10 | } -------------------------------------------------------------------------------- /models/poisoning/budget-10000/seed-42/sharding-10/unlearn_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_shards": 10, 3 | "_filename": "/home/lukpirch/workspace/MachineUnlearning/models/poisoning/budget-10000/seed-42/sharding-10/unlearn_config.json" 4 | } -------------------------------------------------------------------------------- /models/poisoning/budget-10000/seed-42/train_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "epochs": 100, 3 | "batch_size": 64, 4 | "model_size": 512, 5 | "_filename": "/home/lukpirch/workspace/MachineUnlearning/models/poisoning/budget-10000/seed-42/train_config.json" 6 | } -------------------------------------------------------------------------------- /models/poisoning/budget-10000/seed-42/train_results.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": { 3 | "precision": 0.7957894736842105, 4 | "recall": 0.7440944881889764, 5 | "f1-score": 0.7690742624618514, 6 | "support": 508 7 | }, 8 | "1": { 9 | "precision": 0.8187250996015937, 10 | "recall": 0.7903846153846154, 11 | "f1-score": 0.8043052837573386, 12 | "support": 520 13 | }, 14 | "2": { 15 | "precision": 0.718562874251497, 16 | "recall": 0.6936416184971098, 17 | "f1-score": 0.7058823529411764, 18 | "support": 519 19 | }, 20 | "3": { 21 | "precision": 0.5486238532110091, 22 | "recall": 0.6294736842105263, 23 | "f1-score": 0.5862745098039216, 24 | "support": 475 25 | }, 26 | "4": { 27 | "precision": 0.7479508196721312, 28 | "recall": 0.7433808553971487, 29 | "f1-score": 0.745658835546476, 30 | "support": 491 31 | }, 32 | "5": { 33 | "precision": 0.7303370786516854, 34 | "recall": 0.6701030927835051, 35 | "f1-score": 0.6989247311827956, 36 | "support": 485 37 | }, 38 | "6": { 39 | "precision": 0.7339449541284404, 40 | "recall": 0.7905138339920948, 41 | "f1-score": 0.7611798287345386, 42 | "support": 506 43 | }, 44 | "7": { 45 | "precision": 0.7831858407079646, 46 | "recall": 0.7254098360655737, 47 | "f1-score": 0.7531914893617022, 48 | "support": 488 49 | }, 50 | "8": { 51 | "precision": 0.7897727272727273, 52 | "recall": 0.814453125, 53 | "f1-score": 0.801923076923077, 54 | "support": 512 55 | }, 56 | "9": { 57 | "precision": 0.789980732177264, 58 | "recall": 0.8266129032258065, 59 | "f1-score": 0.8078817733990147, 60 | "support": 496 61 | }, 62 | "accuracy": 0.7438, 63 | "macro avg": { 64 | "precision": 0.7456873453358523, 65 | "recall": 0.7428068052745356, 66 | "f1-score": 0.7434296144111892, 67 | "support": 5000 68 | }, 69 | "weighted avg": { 70 | "precision": 0.746950163723857, 71 | "recall": 0.7438, 72 | "f1-score": 0.744573440846692, 73 | "support": 5000 74 | }, 75 | "train_loss": 0.017946992069482803, 76 | "test_loss": 1.3488121032714844, 77 | "epochs_for_min": 99, 78 | "time": 1237.2063738070428 79 | } -------------------------------------------------------------------------------- /models/poisoning/clean/best_model.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/models/poisoning/clean/best_model.hdf5 -------------------------------------------------------------------------------- /models/poisoning/clean/train_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "epochs": 100, 3 | "batch_size": 64, 4 | "model_size": 512, 5 | "_filename": "/home/lukpirch/workspace/MachineUnlearning/models/poisoning/clean/train_config.json" 6 | } -------------------------------------------------------------------------------- /models/poisoning/clean/train_results.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": { 3 | "precision": 0.8904109589041096, 4 | "recall": 0.8956692913385826, 5 | "f1-score": 0.8930323846908734, 6 | "support": 508 7 | }, 8 | "1": { 9 | "precision": 0.9653767820773931, 10 | "recall": 0.9115384615384615, 11 | "f1-score": 0.9376854599406529, 12 | "support": 520 13 | }, 14 | "2": { 15 | "precision": 0.8627049180327869, 16 | "recall": 0.8111753371868978, 17 | "f1-score": 0.8361469712015889, 18 | "support": 519 19 | }, 20 | "3": { 21 | "precision": 0.7571743929359823, 22 | "recall": 0.7221052631578947, 23 | "f1-score": 0.7392241379310345, 24 | "support": 475 25 | }, 26 | "4": { 27 | "precision": 0.8719512195121951, 28 | "recall": 0.8737270875763747, 29 | "f1-score": 0.8728382502543235, 30 | "support": 491 31 | }, 32 | "5": { 33 | "precision": 0.8155136268343816, 34 | "recall": 0.8020618556701031, 35 | "f1-score": 0.8087318087318087, 36 | "support": 485 37 | }, 38 | "6": { 39 | "precision": 0.8690036900369004, 40 | "recall": 0.9308300395256917, 41 | "f1-score": 0.8988549618320609, 42 | "support": 506 43 | }, 44 | "7": { 45 | "precision": 0.908, 46 | "recall": 0.930327868852459, 47 | "f1-score": 0.9190283400809717, 48 | "support": 488 49 | }, 50 | "8": { 51 | "precision": 0.9309021113243762, 52 | "recall": 0.947265625, 53 | "f1-score": 0.9390125847047436, 54 | "support": 512 55 | }, 56 | "9": { 57 | "precision": 0.8990476190476191, 58 | "recall": 0.9516129032258065, 59 | "f1-score": 0.9245837414299707, 60 | "support": 496 61 | }, 62 | "accuracy": 0.8786, 63 | "macro avg": { 64 | "precision": 0.8770085318705745, 65 | "recall": 0.8776313733072272, 66 | "f1-score": 0.8769138640798028, 67 | "support": 5000 68 | }, 69 | "weighted avg": { 70 | "precision": 0.8781495815813348, 71 | "recall": 0.8786, 72 | "f1-score": 0.8779643124074801, 73 | "support": 5000 74 | }, 75 | "train_loss": 0.004181844647973776, 76 | "test_loss": 0.689379096031189, 77 | "epochs_for_min": 100, 78 | "time": 1229.8301207087934 79 | } -------------------------------------------------------------------------------- /opt_requirements.txt: -------------------------------------------------------------------------------- 1 | ipykernel==6.16.2 2 | tensorflow-datasets==4.8.2 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow==2.7.0 2 | scikit-learn==1.0.2 3 | nltk==3.8.1 4 | tqdm==4.65.0 5 | click==8.1.3 6 | matplotlib==3.5.3 7 | pandas==1.3.5 8 | seaborn==0.12.2 -------------------------------------------------------------------------------- /train_test_data/Adult/Pipeline_classes.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.base import BaseEstimator, TransformerMixin 3 | from pandas.api.types import CategoricalDtype 4 | 5 | 6 | class ColumnSelector(BaseEstimator, TransformerMixin): 7 | 8 | def __init__(self, type): 9 | self.type = type 10 | 11 | def fit(self, X, y=None): 12 | return self 13 | 14 | def transform(self, X): 15 | df_transformed = X.select_dtypes(include=[self.type]) 16 | cols = df_transformed.columns 17 | with open(f'{self.type}_voc.columns', 'w') as f: 18 | print('\n'.join(cols.tolist()), file=f) 19 | return df_transformed 20 | 21 | 22 | class CategoricalImputer(BaseEstimator, TransformerMixin): 23 | 24 | def __init__(self, columns=None, strategy='most_frequent'): 25 | self.columns = columns 26 | self.strategy = strategy 27 | 28 | def fit(self, X, y=None): 29 | if self.columns is None: 30 | self.columns = X.columns 31 | 32 | if self.strategy == 'most_frequent': 33 | self.fill = {column: X[column].value_counts().index[0] for column in self.columns} 34 | else: 35 | self.fill = {column: '0' for column in self.columns} 36 | return self 37 | 38 | def transform(self, X): 39 | X_copy = X.copy() 40 | for column in self.columns: 41 | X_copy[column] = X_copy[column].fillna(self.fill[column]) 42 | return X_copy 43 | 44 | 45 | class CategoricalEncoder(BaseEstimator, TransformerMixin): 46 | 47 | def __init__(self, data, dropFirst=True): 48 | self.categories = dict() 49 | self.dropFirst = dropFirst 50 | self.data = data.copy() 51 | self.categories = {} 52 | 53 | def fit(self, X, y=None): 54 | train_data_obj = self.data.select_dtypes(include=['object']) 55 | for column in train_data_obj.columns: 56 | self.categories[column] = self.data[column].value_counts().index.tolist() 57 | return self 58 | 59 | def transform(self, X): 60 | X_copy = X.copy() 61 | X_copy = X_copy.select_dtypes(include=['object']) 62 | for column in X_copy.columns: 63 | X_copy[column] = X_copy[column].astype({column:CategoricalDtype(self.categories[column])}) 64 | dummies_df = pd.get_dummies(X_copy, drop_first=self.dropFirst) 65 | # pipelines transform data to numpy arrays therefore column information is los. dump it into a file here 66 | cols = dummies_df.columns 67 | with open('cat_voc.columns', 'w') as f: 68 | print('\n'.join(cols.tolist()), file=f) 69 | return dummies_df 70 | -------------------------------------------------------------------------------- /train_test_data/Adult/adult.names: -------------------------------------------------------------------------------- 1 | | This data was extracted from the census bureau database found at 2 | | http://www.census.gov/ftp/pub/DES/www/welcome.html 3 | | Donor: Ronny Kohavi and Barry Becker, 4 | | Data Mining and Visualization 5 | | Silicon Graphics. 6 | | e-mail: ronnyk@sgi.com for questions. 7 | | Split into train-test using MLC++ GenCVFiles (2/3, 1/3 random). 8 | | 48842 instances, mix of continuous and discrete (train=32561, test=16281) 9 | | 45222 if instances with unknown values are removed (train=30162, test=15060) 10 | | Duplicate or conflicting instances : 6 11 | | Class probabilities for adult.all file 12 | | Probability for the label '>50K' : 23.93% / 24.78% (without unknowns) 13 | | Probability for the label '<=50K' : 76.07% / 75.22% (without unknowns) 14 | | 15 | | Extraction was done by Barry Becker from the 1994 Census database. A set of 16 | | reasonably clean records was extracted using the following conditions: 17 | | ((AAGE>16) && (AGI>100) && (AFNLWGT>1)&& (HRSWK>0)) 18 | | 19 | | Prediction task is to determine whether a person makes over 50K 20 | | a year. 21 | | 22 | | First cited in: 23 | | @inproceedings{kohavi-nbtree, 24 | | author={Ron Kohavi}, 25 | | title={Scaling Up the Accuracy of Naive-Bayes Classifiers: a 26 | | Decision-Tree Hybrid}, 27 | | booktitle={Proceedings of the Second International Conference on 28 | | Knowledge Discovery and Data Mining}, 29 | | year = 1996, 30 | | pages={to appear}} 31 | | 32 | | Error Accuracy reported as follows, after removal of unknowns from 33 | | train/test sets): 34 | | C4.5 : 84.46+-0.30 35 | | Naive-Bayes: 83.88+-0.30 36 | | NBTree : 85.90+-0.28 37 | | 38 | | 39 | | Following algorithms were later run with the following error rates, 40 | | all after removal of unknowns and using the original train/test split. 41 | | All these numbers are straight runs using MLC++ with default values. 42 | | 43 | | Algorithm Error 44 | | -- ---------------- ----- 45 | | 1 C4.5 15.54 46 | | 2 C4.5-auto 14.46 47 | | 3 C4.5 rules 14.94 48 | | 4 Voted ID3 (0.6) 15.64 49 | | 5 Voted ID3 (0.8) 16.47 50 | | 6 T2 16.84 51 | | 7 1R 19.54 52 | | 8 NBTree 14.10 53 | | 9 CN2 16.00 54 | | 10 HOODG 14.82 55 | | 11 FSS Naive Bayes 14.05 56 | | 12 IDTM (Decision table) 14.46 57 | | 13 Naive-Bayes 16.12 58 | | 14 Nearest-neighbor (1) 21.42 59 | | 15 Nearest-neighbor (3) 20.35 60 | | 16 OC1 15.04 61 | | 17 Pebls Crashed. Unknown why (bounds WERE increased) 62 | | 63 | | Conversion of original data as follows: 64 | | 1. Discretized agrossincome into two ranges with threshold 50,000. 65 | | 2. Convert U.S. to US to avoid periods. 66 | | 3. Convert Unknown to "?" 67 | | 4. Run MLC++ GenCVFiles to generate data,test. 68 | | 69 | | Description of fnlwgt (final weight) 70 | | 71 | | The weights on the CPS files are controlled to independent estimates of the 72 | | civilian noninstitutional population of the US. These are prepared monthly 73 | | for us by Population Division here at the Census Bureau. We use 3 sets of 74 | | controls. 75 | | These are: 76 | | 1. A single cell estimate of the population 16+ for each state. 77 | | 2. Controls for Hispanic Origin by age and sex. 78 | | 3. Controls by Race, age and sex. 79 | | 80 | | We use all three sets of controls in our weighting program and "rake" through 81 | | them 6 times so that by the end we come back to all the controls we used. 82 | | 83 | | The term estimate refers to population totals derived from CPS by creating 84 | | "weighted tallies" of any specified socio-economic characteristics of the 85 | | population. 86 | | 87 | | People with similar demographic characteristics should have 88 | | similar weights. There is one important caveat to remember 89 | | about this statement. That is that since the CPS sample is 90 | | actually a collection of 51 state samples, each with its own 91 | | probability of selection, the statement only applies within 92 | | state. 93 | 94 | 95 | >50K, <=50K. 96 | 97 | age: continuous. 98 | workclass: Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked. 99 | fnlwgt: continuous. 100 | education: Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool. 101 | education-num: continuous. 102 | marital-status: Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse. 103 | occupation: Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces. 104 | relationship: Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried. 105 | race: White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black. 106 | sex: Female, Male. 107 | capital-gain: continuous. 108 | capital-loss: continuous. 109 | hours-per-week: continuous. 110 | native-country: United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands. 111 | -------------------------------------------------------------------------------- /train_test_data/Adult/category_dict_adult.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/train_test_data/Adult/category_dict_adult.pkl -------------------------------------------------------------------------------- /train_test_data/Adult/data_to_arr.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import seaborn as sns 4 | import pickle as pkl 5 | import matplotlib.pyplot as plt 6 | from Pipeline_classes import ColumnSelector, CategoricalImputer, CategoricalEncoder 7 | 8 | from sklearn.preprocessing import Normalizer, StandardScaler 9 | from sklearn.pipeline import Pipeline, FeatureUnion 10 | 11 | 12 | def csv_to_arr(train_data_path, test_data_path): 13 | columns = ["age", "workClass", "fnlwgt", "education", "education-num","marital-status", "occupation", 14 | "relationship", "race", "sex", "capital-gain", "capital-loss", "hours-per-week", 15 | "native-country", "income"] 16 | columns_to_drop = ['fnlwgt', 'education'] 17 | df_train = pd.read_csv(train_data_path, names=columns, sep=' *, *', na_values=['?']) 18 | df_test = pd.read_csv(test_data_path, names=columns, sep=' *, *', na_values=['?'], skiprows=1) 19 | #data_info(df_train) 20 | df_train.drop(columns_to_drop, axis=1, inplace=True) 21 | df_test.drop(columns_to_drop, axis=1, inplace=True) 22 | incomplete_columns = ['workClass', 'occupation', 'native-country'] 23 | num_pipeline = Pipeline(steps=[ 24 | ("num_attr_selector", ColumnSelector(type='int')), 25 | ("scaler", StandardScaler()), 26 | ("normalizer", Normalizer(norm='l2')) 27 | ]) 28 | cat_pipeline = Pipeline(steps=[ 29 | ("cat_attr_selector", ColumnSelector(type='object')), 30 | ("cat_imputer", CategoricalImputer(columns=incomplete_columns)), 31 | ("cat_encoder", CategoricalEncoder(data=df_train, dropFirst=True)) 32 | ]) 33 | full_pipeline = FeatureUnion([("num_pipe", num_pipeline), ("cat_pipe", cat_pipeline)]) 34 | df_train["income"] = df_train["income"].apply(lambda x: -1 if x == "<=50K" else 1) 35 | df_test["income"] = df_test["income"].apply(lambda x: -1 if x == "<=50K." else 1) 36 | x_train, x_test = df_train.drop("income", axis=1), df_test.drop("income", axis=1) 37 | y_train, y_test = df_train['income'].values, df_test['income'].values 38 | 39 | x_train_processed = full_pipeline.fit_transform(x_train) 40 | x_test_processed = full_pipeline.fit_transform(x_test) 41 | num_cols = open('int_voc.columns').read().splitlines() 42 | cat_cols = open('cat_voc.columns').read().splitlines() 43 | all_cols = num_cols+cat_cols 44 | voc = {k:i for k,i in zip(all_cols, range(len(all_cols)))} 45 | return (x_train_processed, y_train), (x_test_processed, y_test), voc 46 | 47 | 48 | def data_info(df): 49 | print(df.info()) 50 | num_attributes = df.select_dtypes(include=['int']) 51 | n_num_attributes = len(num_attributes.columns.values) 52 | print(f'{n_num_attributes} numerical attributes:') 53 | print(num_attributes.columns.values) 54 | num_attributes.hist(figsize=(10, 10)) 55 | plt.show() 56 | cat_attributes = df.select_dtypes(include=['object']) 57 | n_cat_attributes = len(cat_attributes.columns.values) 58 | print(f'{n_cat_attributes} categorical attributes:') 59 | print(cat_attributes.columns.values) 60 | sns.countplot(y='education', hue='income', data=cat_attributes, order=cat_attributes['education'].value_counts().index) 61 | plt.show() 62 | num_attributes = num_attributes.assign(income=pd.Series(cat_attributes['income'].values)) 63 | sns.countplot(y='education-num', hue='income', data=num_attributes, order=num_attributes['education-num'].value_counts().index) 64 | plt.show() 65 | 66 | 67 | def extract_relevant_features(voc): 68 | sensitive_prefixes = ['race', 'marital-status', 'relationship'] 69 | 70 | def contains_prefix(word): 71 | for p in sensitive_prefixes: 72 | if p in word: 73 | return True 74 | 75 | relevant_features = list(filter(contains_prefix, voc)) 76 | print(f'Found {len(relevant_features)} relevant features:') 77 | with open('relevant_features.txt', 'w') as file: 78 | for f in relevant_features: 79 | print(f, file=file) 80 | 81 | 82 | if __name__ == '__main__': 83 | train_data_path = 'adult.data' 84 | test_data_path = 'adult.test' 85 | train_data, test_data, voc = csv_to_arr(train_data_path, test_data_path) 86 | np.save('x_train.npy', train_data[0]) 87 | np.save('y_train.npy', train_data[1]) 88 | np.save('x_test.npy', test_data[0]) 89 | np.save('y_test.npy', test_data[1]) 90 | pkl.dump(voc, open('voc.pkl', 'wb')) 91 | extract_relevant_features(voc) 92 | print('{} training samples, {} test samples, {} total samples'.format(train_data[0].shape[0], test_data[0].shape[0], 93 | train_data[0].shape[0]+test_data[0].shape[0])) 94 | print('Number of features: {}'.format(train_data[0].shape[1])) -------------------------------------------------------------------------------- /train_test_data/Adult/relevant_features.txt: -------------------------------------------------------------------------------- 1 | marital-status_Never-married 2 | marital-status_Divorced 3 | marital-status_Separated 4 | marital-status_Widowed 5 | marital-status_Married-spouse-absent 6 | marital-status_Married-AF-spouse 7 | relationship_Not-in-family 8 | relationship_Own-child 9 | relationship_Unmarried 10 | relationship_Wife 11 | relationship_Other-relative 12 | race_Black 13 | race_Asian-Pac-Islander 14 | race_Amer-Indian-Eskimo 15 | race_Other 16 | -------------------------------------------------------------------------------- /train_test_data/Adult/voc.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/train_test_data/Adult/voc.pkl -------------------------------------------------------------------------------- /train_test_data/Adult/x_test.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/train_test_data/Adult/x_test.npy -------------------------------------------------------------------------------- /train_test_data/Adult/x_train.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/train_test_data/Adult/x_train.npy -------------------------------------------------------------------------------- /train_test_data/Adult/y_test.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/train_test_data/Adult/y_test.npy -------------------------------------------------------------------------------- /train_test_data/Adult/y_train.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/train_test_data/Adult/y_train.npy -------------------------------------------------------------------------------- /train_test_data/Diabetis/csv_to_arr.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import pickle as pkl 4 | from sklearn.preprocessing import minmax_scale 5 | from sklearn.model_selection import train_test_split 6 | 7 | 8 | def csv_to_arr(csv_path): 9 | df = pd.read_csv(csv_path, sep=',') 10 | voc = {c:i for c,i in zip(df.columns, range(len(df.columns)))} 11 | arr = df.values 12 | x = arr[:, :-1] 13 | y = arr[:, -1] 14 | y = 2*y-1 # map labels from {0,1} to {-1,1} 15 | x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0) 16 | x_train_scaled, x_test_scaled = minmax_scale(x_train), minmax_scale(x_test) 17 | max_l2_norm = np.max(np.sqrt(np.sum(x_train_scaled**2, axis=1))) 18 | print(f'Max l2 norm of data: {max_l2_norm}') 19 | return (x_train_scaled, y_train), (x_test_scaled, y_test), voc 20 | 21 | 22 | def quick_test(train_data, test_data): 23 | from sklearn.linear_model import LogisticRegression 24 | LR = LogisticRegression(max_iter=500, C=1, fit_intercept=False) 25 | LR.fit(train_data[0], train_data[1]) 26 | y_pred = LR.predict(test_data[0]) 27 | acc = len(np.where(y_pred == test_data[1])[0])/y_pred.shape[0] 28 | print(f'Acc: {acc}') 29 | 30 | 31 | if __name__ == '__main__': 32 | csv_path = 'diabetes.csv' 33 | train_data, test_data, voc = csv_to_arr(csv_path) 34 | quick_test(train_data, test_data) 35 | np.save('x_train.npy', train_data[0]) 36 | np.save('y_train.npy', train_data[1]) 37 | np.save('x_test.npy', test_data[0]) 38 | np.save('y_test.npy', test_data[1]) 39 | pkl.dump(voc, open('voc.pkl', 'wb')) 40 | print('{} training samples, {} test samples, {} total'.format(train_data[0].shape[0], test_data[0].shape[0], 41 | train_data[0].shape[0] + test_data[0].shape[0])) 42 | print('Number of features: {}'.format(train_data[0].shape[1])) 43 | -------------------------------------------------------------------------------- /train_test_data/Diabetis/relevant_features.txt: -------------------------------------------------------------------------------- 1 | Pregnancies 2 | BMI 3 | Age -------------------------------------------------------------------------------- /train_test_data/Drebin/shas_to_arr.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import numpy as np 4 | import pickle as pkl 5 | import scipy.sparse as sp 6 | from sklearn.feature_extraction.text import CountVectorizer 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.metrics import precision_score, recall_score, accuracy_score 9 | from pathlib import Path 10 | 11 | 12 | # returns a list of (x,y) tuples where x is data and y is labels. The data is specified by a doc path (to a file 13 | # containing the feature vectors) and a split path containing files with filenames for training, testing, validation. 14 | # since there are several splits for the dataset, the index spcifies which split to choose 15 | def get_train_test_data(min_doc_count, test_set_size): 16 | path_to_benign_shas = os.path.join(sha_folder, benign_sha_name) 17 | path_to_mal_shas = os.path.join(sha_folder, malicious_sha_name) 18 | benign_filepaths = [os.path.join(doc_path, name) for name in open(path_to_benign_shas).read().splitlines()] 19 | malicious_filepaths = [os.path.join(doc_path, name) for name in open(path_to_mal_shas).read().splitlines()] 20 | all_paths = benign_filepaths+malicious_filepaths 21 | vec = CountVectorizer(input='filename', token_pattern='.+', lowercase=False, min_df=min_doc_count) 22 | start_time = time.time() 23 | data = vec.fit_transform(all_paths) 24 | end_time = time.time() 25 | print('Count Vectorizer took {} seconds'.format(end_time-start_time)) 26 | labels = np.array([-1]*len(benign_filepaths) + [1]*len(malicious_filepaths)) 27 | # clean data 28 | if min_doc_count > 1: 29 | # if min_doc_count is greater than 1 possible duplicate lines can occur in the data 30 | data, labels = clean_data_sp(data, labels) 31 | x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=test_set_size, random_state=42) 32 | return (x_train, y_train), (x_test, y_test), vec.vocabulary_ 33 | 34 | 35 | def clean_data_sp(data, labels): 36 | """ 37 | Takes a sparse csr_matrix as input and removes lines with only zeros and duplicate entries. Removes the 38 | corresponding lines in labels as well 39 | """ 40 | assert data.shape[0] == labels.shape[0] 41 | # remove rows with only zeros 42 | remaining_axes = data.getnnz(axis=1) > 0 43 | if not remaining_axes.all(): 44 | print('Found {} empty lines. Deleting.'.format(len(remaining_axes)-np.sum(remaining_axes))) 45 | data = data[remaining_axes] 46 | labels = labels[remaining_axes] 47 | # remove duplicate rows 48 | all_column_indices = [data[i].indices.tolist() for i in range(data.shape[0])] 49 | unique_row_indices, unique_columns = [], [] 50 | for row_idx, indices in enumerate(all_column_indices): 51 | if indices not in unique_columns: 52 | unique_columns.append(indices) 53 | unique_row_indices.append(row_idx) 54 | if len(unique_row_indices) != data.shape[0]: 55 | diff = data.shape[0] - len(unique_row_indices) 56 | print('Original number of samples:{}'.format(data.shape[0])) 57 | print('Duplicates removed:{}'.format(diff)) 58 | print('Samples remaining:{}'.format(len(unique_row_indices))) 59 | return data[unique_row_indices], labels[unique_row_indices] 60 | 61 | 62 | def quick_test(train_data, test_data): 63 | from sklearn.svm import SVC 64 | svm = SVC() 65 | print('Training SVM ...') 66 | start_time = time.time() 67 | svm.fit(train_data[0], train_data[1]) 68 | end_time = time.time() 69 | print('Fitting the SVM took {} seconds'.format(end_time-start_time)) 70 | y_pred = svm.predict(test_data[0]) 71 | acc = accuracy_score(test_data[1], y_pred) 72 | prec = precision_score(test_data[1], y_pred) 73 | recall = recall_score(test_data[1], y_pred) 74 | print(f'Acc: {acc}, Precision: {prec}, Recall: {recall}') 75 | 76 | 77 | def extract_urls(voc): 78 | l = [w for w in voc if 'url' in w] 79 | with open('relevant_features.txt', 'w') as f: 80 | for url in l: 81 | print(url, file=f) 82 | 83 | 84 | if __name__ == "__main__": 85 | doc_path = os.path.join(str(Path.home()), 'drebin_dataset/drebin-public/feature_vectors/') 86 | sha_folder = 'sha_lists/' 87 | benign_sha_name = 'drebin_benign_new.shas' 88 | malicious_sha_name = 'drebin_malicious_new.shas' 89 | min_doc_count = 100 # min number of apps a feature has to appear in 90 | test_set_size = 0.2 91 | train_data, test_data, voc = get_train_test_data(min_doc_count, test_set_size) 92 | sp.save_npz('x_train.npz', train_data[0]) 93 | np.save('y_train.npy', train_data[1]) 94 | sp.save_npz('x_test.npz', test_data[0]) 95 | np.save('y_test.npy', test_data[1]) 96 | pkl.dump(voc, open('voc.pkl', 'wb')) 97 | extract_urls(voc) 98 | print('{} training samples, {} test samples, {} total'.format(train_data[0].shape[0], test_data[0].shape[0], 99 | train_data[0].shape[0] + test_data[0].shape[0])) 100 | print('Number of features: {}'.format(train_data[0].shape[1])) 101 | quick_test(train_data, test_data) 102 | -------------------------------------------------------------------------------- /train_test_data/Drebin/voc.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/train_test_data/Drebin/voc.pkl -------------------------------------------------------------------------------- /train_test_data/Drebin/x_test.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/train_test_data/Drebin/x_test.npz -------------------------------------------------------------------------------- /train_test_data/Drebin/x_train.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/train_test_data/Drebin/x_train.npz -------------------------------------------------------------------------------- /train_test_data/Drebin/x_valid.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/train_test_data/Drebin/x_valid.npz -------------------------------------------------------------------------------- /train_test_data/Drebin/y_test.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/train_test_data/Drebin/y_test.npy -------------------------------------------------------------------------------- /train_test_data/Drebin/y_train.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/train_test_data/Drebin/y_train.npy -------------------------------------------------------------------------------- /train_test_data/Drebin/y_valid.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/train_test_data/Drebin/y_valid.npy -------------------------------------------------------------------------------- /train_test_data/Enron/files_to_arr.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import scipy.sparse as sp 4 | from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer 5 | from sklearn.preprocessing import normalize 6 | from sklearn.model_selection import train_test_split 7 | import pickle as pkl 8 | 9 | 10 | def get_train_test_data(ham_path, spam_path, enron_indices, min_doc_count, binary=False): 11 | filenames = [ham_path.format(i) for i in enron_indices] + [spam_path.format(i) for i in enron_indices] 12 | filelist = [os.path.join(p,n) for p in filenames for n in os.listdir(p)] 13 | labels = np.array([1 if 'spam' in f else -1 for f in filelist]) 14 | vectorizer = TfidfVectorizer(input='filename', encoding='latin-1', min_df=min_doc_count, binary=binary) 15 | vectorizer = CountVectorizer(input='filename', encoding='latin-1', min_df=min_doc_count, binary=binary) 16 | x = vectorizer.fit_transform(filelist) 17 | # normalize data 18 | x_normed = normalize(x, norm='l2', axis=1) 19 | x_train, x_test, y_train, y_test = train_test_split(x_normed, labels, test_size=0.2, random_state=0) 20 | return (x_train, y_train), (x_test, y_test), vectorizer.vocabulary_ 21 | 22 | 23 | def quick_test(train_data, test_data): 24 | from sklearn.linear_model import LogisticRegression 25 | LR = LogisticRegression(max_iter=500) 26 | LR.fit(train_data[0], train_data[1]) 27 | y_pred = LR.predict(test_data[0]) 28 | acc = len(np.where(y_pred == test_data[1])[0])/y_pred.shape[0] 29 | print(f'Acc: {acc}') 30 | 31 | 32 | if __name__ == '__main__': 33 | ham_path = 'raw/enron{}/ham' 34 | spam_path = 'raw/enron{}/spam' 35 | enron_indices = [1, 2, 3, 4, 5, 6] 36 | min_doc_count = 100 37 | binary = True 38 | train_data, test_data, voc = get_train_test_data(ham_path, spam_path, enron_indices, min_doc_count, binary) 39 | sp.save_npz('x_train.npz', train_data[0]) 40 | np.save('y_train.npy', train_data[1]) 41 | sp.save_npz('x_test.npz', test_data[0]) 42 | np.save('y_test.npy', test_data[1]) 43 | pkl.dump(voc, open('voc.pkl', 'wb')) 44 | print('{} training samples, {} test samples, {} total'.format(train_data[0].shape[0], test_data[0].shape[0], 45 | train_data[0].shape[0] + test_data[0].shape[0])) 46 | print('Number of features: {}'.format(train_data[0].shape[1])) 47 | quick_test(train_data, test_data) 48 | -------------------------------------------------------------------------------- /train_test_data/Enron/relevant_features.txt: -------------------------------------------------------------------------------- 1 | adams 2 | afford 3 | alan 4 | allen 5 | anderson 6 | anthony 7 | arnold 8 | arthur 9 | austin 10 | barry 11 | baxter 12 | beck 13 | bennett 14 | best 15 | beverly 16 | bruce 17 | bryan 18 | burn 19 | christian 20 | christie 21 | clark 22 | clayton 23 | collins 24 | colwell 25 | cox 26 | curry 27 | dan 28 | davies 29 | dyson 30 | ford 31 | forster 32 | george 33 | glover 34 | gold 35 | gordon 36 | hewlett 37 | howard 38 | hughes 39 | jackson 40 | jean 41 | johnson 42 | jones 43 | joseph 44 | kay 45 | keith 46 | kelly 47 | kyle 48 | lewis 49 | martin 50 | miller 51 | mills 52 | mitchell 53 | moore 54 | morris 55 | muller 56 | neal 57 | nelson 58 | oxley 59 | painter 60 | palmer 61 | parker 62 | patrick 63 | paul 64 | piper 65 | ray 66 | raymond 67 | reader 68 | richard 69 | roberts 70 | robinson 71 | russell 72 | sanders 73 | sharp 74 | smith 75 | spain 76 | stanley 77 | stuart 78 | sweeney 79 | thomas 80 | thompson 81 | vickers 82 | vince 83 | vincent 84 | walker 85 | ward 86 | washington 87 | watson 88 | wells 89 | wharton 90 | williams 91 | wilson 92 | aimee 93 | alan 94 | alberta 95 | alex 96 | allen 97 | amy 98 | andrea 99 | andrew 100 | andy 101 | anita 102 | ann 103 | anne 104 | anthony 105 | antonio 106 | april 107 | arnold 108 | arthur 109 | austin 110 | barbara 111 | barry 112 | ben 113 | beth 114 | beverly 115 | bill 116 | bob 117 | brad 118 | brenda 119 | brent 120 | brian 121 | bruce 122 | bryan 123 | carl 124 | carlos 125 | carol 126 | charles 127 | charlie 128 | chase 129 | cheryl 130 | chris 131 | christian 132 | christine 133 | christopher 134 | cindy 135 | clayton 136 | craig 137 | crystal 138 | cynthia 139 | dale 140 | dallas 141 | dan 142 | dana 143 | daniel 144 | darren 145 | dave 146 | david 147 | dean 148 | debbie 149 | dennis 150 | diego 151 | don 152 | donald 153 | donna 154 | douglas 155 | drew 156 | edward 157 | elizabeth 158 | eric 159 | faith 160 | francisco 161 | frank 162 | fred 163 | gary 164 | george 165 | georgia 166 | gordon 167 | grace 168 | grant 169 | greg 170 | guy 171 | harry 172 | heather 173 | henry 174 | hope 175 | howard 176 | hunter 177 | iris 178 | jack 179 | jackie 180 | jackson 181 | james 182 | jan 183 | janet 184 | jason 185 | jay 186 | jean 187 | jeff 188 | jeffrey 189 | jennifer 190 | jeremy 191 | jill 192 | jim 193 | jo 194 | joe 195 | john 196 | jon 197 | jonathan 198 | jordan 199 | jose 200 | joseph 201 | juan 202 | julie 203 | june 204 | karen 205 | kate 206 | katherine 207 | kathy 208 | kay 209 | keith 210 | kelly 211 | kenneth 212 | kevin 213 | kim 214 | kimberly 215 | kristin 216 | kyle 217 | lance 218 | larry 219 | laura 220 | lee 221 | leslie 222 | lewis 223 | linda 224 | lisa 225 | lloyd 226 | lorraine 227 | louis 228 | louise 229 | lynn 230 | margaret 231 | maria 232 | marie 233 | mark 234 | martin 235 | mary 236 | matthew 237 | maureen 238 | max 239 | maya 240 | megan 241 | melissa 242 | michael 243 | michelle 244 | mike 245 | miles 246 | mitchell 247 | molly 248 | monica 249 | morgan 250 | morris 251 | nancy 252 | neal 253 | neil 254 | nelson 255 | norma 256 | parker 257 | patricia 258 | patrick 259 | paul 260 | peggy 261 | penny 262 | peter 263 | phillip 264 | ray 265 | raymond 266 | rebecca 267 | richard 268 | rick 269 | rita 270 | robert 271 | roger 272 | rose 273 | russell 274 | sally 275 | sam 276 | sarah 277 | scott 278 | sean 279 | sheila 280 | sheri 281 | shirley 282 | sierra 283 | stacey 284 | stanley 285 | stephanie 286 | stephen 287 | steve 288 | steven 289 | stuart 290 | sue 291 | summer 292 | susan 293 | suzanne 294 | tanya 295 | taylor 296 | ted 297 | terry 298 | thomas 299 | tim 300 | todd 301 | tom 302 | tony 303 | victor 304 | victoria 305 | vincent 306 | virginia 307 | wayne 308 | william -------------------------------------------------------------------------------- /train_test_data/Enron/voc.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/train_test_data/Enron/voc.pkl -------------------------------------------------------------------------------- /train_test_data/Enron/x_test.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/train_test_data/Enron/x_test.npz -------------------------------------------------------------------------------- /train_test_data/Enron/x_train.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/train_test_data/Enron/x_train.npz -------------------------------------------------------------------------------- /train_test_data/Enron/y_test.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/train_test_data/Enron/y_test.npy -------------------------------------------------------------------------------- /train_test_data/Enron/y_train.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/MachineUnlearning/10fadb2dc1ac0128b56b4450907c4affb508a8f2/train_test_data/Enron/y_train.npy -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | """ Utility classes for data persistence. """ 2 | 3 | import os 4 | import sys 5 | import json 6 | import logging 7 | from collections import defaultdict 8 | from itertools import islice 9 | from time import perf_counter 10 | from contextlib import contextmanager 11 | 12 | import numpy as np 13 | import tensorflow as tf 14 | from tensorflow import GradientTape 15 | 16 | 17 | class Result(object): 18 | """ Python dict with save/load functionality. """ 19 | 20 | def __init__(self, base_path, name_tmpl, **suffix_kwargs): 21 | filename = name_tmpl 22 | if len(suffix_kwargs) > 0: 23 | # assemble name to `base_name{-k0_v0-k1_v1}.json` 24 | suffix = '-'.join([f'{k}_{suffix_kwargs[k]}' for k in sorted(suffix_kwargs)]) 25 | if len(suffix) > 0: 26 | suffix = f'-{suffix}' 27 | filename = name_tmpl.format(suffix) 28 | else: 29 | filename = name_tmpl.format('') 30 | self.filepath = os.path.join(base_path, filename) 31 | 32 | def save(self): 33 | """ Save object attributes except those used for opening the file etc. """ 34 | with open(self.filepath, 'w') as f: 35 | json.dump(self.as_dict(), f, indent=4) 36 | return self 37 | 38 | def load(self): 39 | """ Load object attributes from given file path. """ 40 | with open(self.filepath, 'r') as f: 41 | self.update(json.load(f)) 42 | return self 43 | 44 | def as_dict(self): 45 | exclude_keys = ['filepath', 'exists'] 46 | return {k: v for k, v in self.__dict__.items() if k not in exclude_keys} 47 | 48 | def update(self, update_dict): 49 | self.__dict__.update(update_dict) 50 | return self 51 | 52 | @property 53 | def exists(self): 54 | return os.path.exists(self.filepath) 55 | 56 | 57 | class TrainingResult(Result): 58 | def __init__(self, model_folder, name_tmpl='train_results{}.json', **suffix_kwargs): 59 | super().__init__(model_folder, name_tmpl, **suffix_kwargs) 60 | 61 | 62 | class PoisoningResult(Result): 63 | def __init__(self, model_folder, name_tmpl='poisoning_results{}.json', **suffix_kwargs): 64 | super().__init__(model_folder, name_tmpl, **suffix_kwargs) 65 | 66 | 67 | class LabelFlipResult(Result): 68 | def __init__(self, model_folder, name_tmpl='labelflip_results{}.json', **suffix_kwargs): 69 | super().__init__(model_folder, name_tmpl, **suffix_kwargs) 70 | 71 | 72 | class UnlearningResult(Result): 73 | def __init__(self, model_folder, name_tmpl='unlearning_results{}.json', **suffix_kwargs): 74 | super().__init__(model_folder, name_tmpl, **suffix_kwargs) 75 | 76 | 77 | class IntermediateResult(Result): 78 | def __init__(self, model_folder, name_tmpl='intermediate_results{}.json', **suffix_kwargs): 79 | super().__init__(model_folder, name_tmpl, **suffix_kwargs) 80 | 81 | 82 | class SGDUnlearningResult(Result): 83 | def __init__(self, model_folder, name_tmpl='sgd_unlearning_results{}.json', **suffix_kwargs): 84 | super().__init__(model_folder, name_tmpl, **suffix_kwargs) 85 | 86 | 87 | class ActivationClusteringResult(Result): 88 | def __init__(self, model_folder, name_tmpl='activation_clustering_results{}.json', **suffix_kwargs): 89 | super().__init__(model_folder, name_tmpl, **suffix_kwargs) 90 | 91 | 92 | class MixedResult(Result): 93 | """ 94 | Placeholder class for mixing results with `as_dict` + `update`. 95 | Saving is disabled to prevent overriding existing results. 96 | """ 97 | 98 | def __init__(self, model_folder, name_tmpl=None, **suffix_kwargs): 99 | super().__init__(model_folder, name_tmpl, **suffix_kwargs) 100 | 101 | def save(self): 102 | return 103 | 104 | 105 | def save_train_results(model_folder): 106 | """ Non-invasive workaround for current training not utilizing the above classes. Call after `train_retrain`. """ 107 | result = TrainingResult(model_folder) 108 | with open(os.path.join(model_folder, 'test_performance.json'), 'r') as f: 109 | result.update(json.load(f)) 110 | result.save() 111 | 112 | 113 | class DeltaTmpState(object): 114 | """ Simple context manager to cleanly store previous delta sets and restore them later. """ 115 | 116 | def __init__(self, z_x, z_y, z_x_delta, z_y_delta): 117 | self._z_x = z_x.copy() 118 | self._z_y = z_y.copy() 119 | self._z_x_delta = z_x_delta.copy() 120 | self._z_y_delta = z_y_delta.copy() 121 | 122 | def __enter__(self): 123 | return self 124 | 125 | def __exit__(self, exc_type, exc_value, exc_traceback): 126 | return self._z_x, self._z_y, self._z_x_delta, self._z_y_delta 127 | 128 | 129 | class ModelTmpState(object): 130 | """ Simple context manager to cleanly store previous model weights and restore them later. """ 131 | 132 | def __init__(self, model): 133 | self.model = model 134 | 135 | def __enter__(self): 136 | self._weights = self.model.get_weights().copy() 137 | return self 138 | 139 | def __exit__(self, exc_type, exc_value, exc_traceback): 140 | self.model.set_weights(self._weights) 141 | 142 | 143 | class LoggedGradientTape(GradientTape): 144 | context = 'default' 145 | logs = defaultdict(list) 146 | 147 | def __init__(self, *args, **kwargs): 148 | super().__init__(*args, **kwargs) 149 | 150 | def gradient(self, target, sources, **kwargs): 151 | LoggedGradientTape.logs[LoggedGradientTape.context].append(len(target)) 152 | return super().gradient(target, sources, **kwargs) 153 | 154 | 155 | class GradientLoggingContext(object): 156 | """ Simple context manager to define a gradient logging context for an experiment. """ 157 | 158 | def __init__(self, name): 159 | self._name = name 160 | 161 | def __enter__(self): 162 | self._prev_context = LoggedGradientTape.context 163 | LoggedGradientTape.context = self._name 164 | return self 165 | 166 | def __exit__(self, exc_type, exc_value, exc_traceback): 167 | LoggedGradientTape.context = self._prev_context 168 | 169 | 170 | @contextmanager 171 | def measure_time(): 172 | start = perf_counter() 173 | yield lambda: perf_counter() - start 174 | 175 | 176 | def reduce_dataset(X, Y, reduction, delta_idx=None): 177 | n = len(X) 178 | n_reduced = int(reduction * n) 179 | if delta_idx is not None: 180 | # ensure that delta samples remain in the training set with the same ratio 181 | n_delta = np.ceil(reduction*delta_idx.shape[0]).astype(np.int) 182 | _delta = np.random.choice(delta_idx, min(n_delta, n_reduced), replace=False) 183 | # fill with regular samples 184 | _remaining_idx = list(set(range(X.shape[0])) - set(delta_idx)) 185 | _clean = np.random.choice(_remaining_idx, n_reduced - _delta.shape[0], replace=False) 186 | idx_reduced = np.hstack((_delta, _clean)) 187 | delta_idx_train = np.array(range(len(_delta))) 188 | X, Y = X[idx_reduced], Y[idx_reduced] 189 | return X, Y, idx_reduced, delta_idx_train 190 | else: 191 | idx_reduced = np.random.choice(range(n), n_reduced, replace=False) 192 | if isinstance(X, np.ndarray): 193 | X, Y = X[idx_reduced], Y[idx_reduced] 194 | else: 195 | X, Y = tf.gather(X, idx_reduced), tf.gather(Y, idx_reduced) 196 | return X, Y, idx_reduced, np.zeros([], dtype=int) 197 | 198 | 199 | class CSVLogger(object): # pragma: no cover 200 | def __init__(self, name, columns, log_file=None, level='info'): 201 | if log_file is not None and os.path.exists(log_file): 202 | os.remove(log_file) 203 | 204 | # create logger on the current module and set its level 205 | self.logger = logging.getLogger(name) 206 | self.logger.setLevel(logging.INFO) 207 | self.logger.setLevel(getattr(logging, level.upper())) 208 | self.columns = columns 209 | self.header = ','.join(columns) 210 | self.needs_header = True 211 | 212 | # create a formatter for the given csv columns 213 | # fmt = ','.join([f'%({c})d' for c in self.columns]) 214 | self.formatter = logging.Formatter('%(msg)s') 215 | 216 | self.log_file = log_file 217 | if self.log_file: 218 | # create a channel for handling the logger (stderr) and set its format 219 | ch = logging.FileHandler(log_file) 220 | else: 221 | # create a channel for handling the logger (stderr) and set its format 222 | ch = logging.StreamHandler() 223 | ch.setFormatter(self.formatter) 224 | 225 | # connect the logger to the channel 226 | self.logger.addHandler(ch) 227 | 228 | def log(self, level='info', **msg): 229 | if self.needs_header: 230 | if self.log_file and os.path.isfile(self.log_file): 231 | with open(self.log_file) as file_obj: 232 | if len(list(islice(file_obj, 2))) > 0: 233 | self.needs_header = False 234 | if self.needs_header: 235 | with open(self.log_file, 'a') as file_obj: 236 | file_obj.write(f"{self.header}\n") 237 | else: 238 | if self.needs_header: 239 | sys.stderr.write(f"{self.header}\n") 240 | self.needs_header = False 241 | 242 | row = ','.join([str(msg.get(c, "")) for c in self.columns]) 243 | func = getattr(self.logger, level) 244 | func(row) --------------------------------------------------------------------------------