├── .gitattributes ├── .gitignore ├── .idea ├── .gitignore ├── .name ├── SCAFFOLD-master.iml ├── inspectionProfiles │ ├── Project_Default.xml │ └── profiles_settings.xml ├── misc.xml └── modules.xml ├── LICENSE ├── README.md ├── __pycache__ ├── main.cpython-38.pyc └── simulate.cpython-38.pyc ├── data ├── CIFAR-10 │ ├── README.md │ └── data_generator.py ├── CIFAR │ └── data_generator.py ├── Femnist │ ├── README.md │ ├── __pycache__ │ │ └── data_generator.cpython-38.pyc │ ├── data │ │ ├── my_sample.py │ │ ├── nist_generator.py │ │ ├── saved_test │ │ │ ├── mytest_sim0.1.json │ │ │ ├── mytest_sim0.json │ │ │ └── mytest_sim1.json │ │ ├── saved_train │ │ │ ├── mytrain_sim0.1.json │ │ │ ├── mytrain_sim0.json │ │ │ └── mytrain_sim1.json │ │ ├── test │ │ │ └── mytest.json │ │ └── train │ │ │ └── mytrain.json │ ├── data_generator.py │ ├── preprocess.sh │ ├── preprocess │ │ ├── data_to_json.py │ │ ├── data_to_json.sh │ │ ├── get_data.sh │ │ ├── get_file_dirs.py │ │ ├── get_hashes.py │ │ ├── group_by_writer.py │ │ └── match_hashes.py │ └── stats.sh ├── Linear_synthetic │ ├── data │ │ └── README.md │ ├── generate_linear_regession.py │ ├── generate_linear_regession_updated.py │ ├── generate_linear_synthetic_backup.py │ └── optimal_solution_finding.py ├── Logistic_synthetic │ ├── README.md │ └── logistic_regression.py └── Mnist │ ├── data │ └── mldata │ │ └── mnist-original.mat │ ├── generate_iid_20users.py │ ├── generate_niid_100users_updated.py │ ├── generate_niid_20users.py │ └── generate_niid_mnist_100users.py ├── flearn ├── optimizers │ ├── __pycache__ │ │ └── fedoptimizer.cpython-38.pyc │ └── fedoptimizer.py ├── servers │ ├── __pycache__ │ │ ├── serveravg.cpython-38.pyc │ │ ├── serverbase.cpython-38.pyc │ │ ├── serverfedl.cpython-38.pyc │ │ └── serverscaffold.cpython-38.pyc │ ├── server_avg.py │ ├── server_base.py │ └── server_scaffold.py ├── trainmodel │ ├── __pycache__ │ │ └── models.cpython-38.pyc │ └── models.py └── users │ ├── __pycache__ │ ├── useravg.cpython-38.pyc │ ├── userbase.cpython-38.pyc │ ├── userfedl.cpython-38.pyc │ └── userscaffold.cpython-38.pyc │ ├── user_avg.py │ ├── user_base.py │ └── user_scaffold.py ├── main.py ├── models └── Femnist │ └── server.pt ├── requirements.txt ├── results ├── Femnist_FedAvg_0.1s_0.h5 ├── Femnist_FedAvg_0.1s_1.h5 ├── Femnist_FedAvg_0.1s_2.h5 ├── Femnist_FedAvg_0.1s_3.h5 ├── Femnist_FedAvg_0.1s_4.h5 ├── Femnist_FedAvg_0.1s_5.h5 ├── Femnist_FedAvg_0.1s_6.h5 ├── Femnist_FedAvg_0.1s_7.h5 ├── Femnist_FedAvg_0.1s_8.h5 ├── Femnist_FedAvg_0.1s_9.h5 ├── Femnist_FedAvg_0.1s_avg.h5 ├── Femnist_FedAvg_0s_0.h5 ├── Femnist_FedAvg_0s_1.h5 ├── Femnist_FedAvg_0s_2.h5 ├── Femnist_FedAvg_0s_3.h5 ├── Femnist_FedAvg_0s_4.h5 ├── Femnist_FedAvg_0s_5.h5 ├── Femnist_FedAvg_0s_6.h5 ├── Femnist_FedAvg_0s_7.h5 ├── Femnist_FedAvg_0s_8.h5 ├── Femnist_FedAvg_0s_9.h5 ├── Femnist_FedAvg_0s_avg.h5 ├── Femnist_FedAvg_1s_0.h5 ├── Femnist_FedAvg_1s_1.h5 ├── Femnist_FedAvg_1s_2.h5 ├── Femnist_FedAvg_1s_3.h5 ├── Femnist_FedAvg_1s_4.h5 ├── Femnist_FedAvg_1s_5.h5 ├── Femnist_FedAvg_1s_6.h5 ├── Femnist_FedAvg_1s_7.h5 ├── Femnist_FedAvg_1s_8.h5 ├── Femnist_FedAvg_1s_9.h5 ├── Femnist_FedAvg_1s_avg.h5 ├── Femnist_SCAFFOLD_0.1s_0.h5 ├── Femnist_SCAFFOLD_0.1s_1.h5 ├── Femnist_SCAFFOLD_0.1s_2.h5 ├── Femnist_SCAFFOLD_0.1s_3.h5 ├── Femnist_SCAFFOLD_0.1s_4.h5 ├── Femnist_SCAFFOLD_0.1s_5.h5 ├── Femnist_SCAFFOLD_0.1s_6.h5 ├── Femnist_SCAFFOLD_0.1s_7.h5 ├── Femnist_SCAFFOLD_0.1s_8.h5 ├── Femnist_SCAFFOLD_0.1s_9.h5 ├── Femnist_SCAFFOLD_0.1s_avg.h5 ├── Femnist_SCAFFOLD_0s_0.h5 ├── Femnist_SCAFFOLD_0s_1.h5 ├── Femnist_SCAFFOLD_0s_2.h5 ├── Femnist_SCAFFOLD_0s_3.h5 ├── Femnist_SCAFFOLD_0s_4.h5 ├── Femnist_SCAFFOLD_0s_5.h5 ├── Femnist_SCAFFOLD_0s_6.h5 ├── Femnist_SCAFFOLD_0s_7.h5 ├── Femnist_SCAFFOLD_0s_8.h5 ├── Femnist_SCAFFOLD_0s_9.h5 ├── Femnist_SCAFFOLD_0s_avg.h5 ├── Femnist_SCAFFOLD_1s_0.h5 ├── Femnist_SCAFFOLD_1s_1.h5 ├── Femnist_SCAFFOLD_1s_2.h5 ├── Femnist_SCAFFOLD_1s_3.h5 ├── Femnist_SCAFFOLD_1s_4.h5 ├── Femnist_SCAFFOLD_1s_5.h5 ├── Femnist_SCAFFOLD_1s_6.h5 ├── Femnist_SCAFFOLD_1s_7.h5 ├── Femnist_SCAFFOLD_1s_8.h5 ├── Femnist_SCAFFOLD_1s_9.h5 └── Femnist_SCAFFOLD_1s_avg.h5 ├── simulate.py └── utils ├── __pycache__ ├── model_utils.cpython-38.pyc └── plot_utils.cpython-38.pyc ├── model_utils.py ├── old_plot.py └── plot_utils.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | .idea/vcs.xml 3 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /.idea/.name: -------------------------------------------------------------------------------- 1 | server_scaffold.py -------------------------------------------------------------------------------- /.idea/SCAFFOLD-master.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | 13 | 15 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 22 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # **Accelerated Federated Learning Over MAC in Heterogeneous Networks** 2 | 3 | I have analyzed the convergence rate of a federated learning algorithm named SCAFFOLD (variation of SVRG) in noisy fading MAC settings and heterogenous data, in order to formulate a new algorithm that accelerates the learning process in such settings. I inspired by 3 related works: 4 | I depend on three articles – 5 | 6 | 1. **On Analog Gradient Descent Learning Over Multiple Access Fading Channels** 7 | The authors implemented GBMA algorithm in which the users transmit an analog function of their local gradient using a common shaping-waveform and the network edge update the global model using a received superposition of the analog transmitted signals which represents a noisy distorted version of the gradients. 8 | https://arxiv.org/abs/1908.07463 9 | 10 | 2. **Over-the-Air Federated Learning from Heterogeneous Data** 11 | The authors introduce time-varying pre-coding and scaling scheme COTAF which facilitates the aggregation and gradually mitigates the noise effect and maintains the convergence properties of local SGD with heterogeneous data across users. 12 | https://arxiv.org/abs/2009.12787 13 | 14 | 3. **SCAFFOLD - Stochastic Controlled Averaging for Federated Learning** 15 | The authors proposed a stochastic algorithm which overcomes gradients dissimilarity using control variates as estimation of users’ variances, and by that makes FL more robust to heterogeneity in users’ data. 16 | https://arxiv.org/abs/1910.06378 17 | 18 | # letest progress 19 | I’ve established pythonic framework that executes simulation common FedAvg, COTAF, SCAFFOLD and our proposed scheme over the extended EMNIST data in different heterogeneity scenarios. I examine the performance of SCAFFOLD over noisy fading MAC and try to restore the results of the related works. I also examine different pre-coding scenarios of the controls. 20 | 21 | 22 | ![WhatsApp Image 2021-03-09 at 16 37 08](https://user-images.githubusercontent.com/72392859/111066827-b4b53700-84c9-11eb-8b5c-f9d1dd01ff7e.jpeg) 23 | 24 | 25 | We analyzed the model and control update norms during the learning process. The model and the control norms have a constant proportion. Conclusion :try to apply different precoding to each parameter type. 26 | 27 | 28 | ![image](https://user-images.githubusercontent.com/72392859/126355502-10650454-27c4-47f1-a73c-45344c46b10c.png) 29 | 30 | 31 | ![WhatsApp Image 2021-03-09 at 16 34 31](https://user-images.githubusercontent.com/72392859/111066830-b7b02780-84c9-11eb-8f69-152bc0f83969.jpeg) 32 | 33 | 34 | The figures confirm that I manage to restore related works results. In addition, it seems that when the noise applied scaffold might have degradation in performance. We suspected that controls and gradients updates tent differently over time. we use different pre-coding scaling for the controls and simulate a scenario where both pre-coding constricted to same SNR 35 | 36 | # Software requirements: 37 | - numpy, scipy, pytorch, Pillow, matplotlib. 38 | 39 | - To download the dependencies: **pip3 install -r requirements.txt** 40 | 41 | 42 | -------------------------------------------------------------------------------- /__pycache__/main.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/__pycache__/main.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/simulate.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/__pycache__/simulate.cpython-38.pyc -------------------------------------------------------------------------------- /data/CIFAR-10/README.md: -------------------------------------------------------------------------------- 1 | # CIFAR-10 data generator 2 | 3 | To use data_generetor.py download zip of CIFAR-10 data from https://www.cs.toronto.edu/~kriz/cifar.html to same location and extract the file 4 | -------------------------------------------------------------------------------- /data/CIFAR-10/data_generator.py: -------------------------------------------------------------------------------- 1 | import emnist 2 | import numpy as np 3 | from tqdm import trange 4 | import random 5 | import json 6 | import os 7 | from functools import reduce 8 | 9 | 10 | def generate_data(similarity: int, num_of_users=10, samples_num=5000): 11 | """ 12 | generate CIFAR-10 data among 10 users with different similarities 13 | :param similarity: portion of similar data between users. number between 0 to 1 14 | :param num_of_users: number of users data distributed among 15 | :param samples_num: number of samples distributed to each user 16 | """ 17 | root_path = os.path.dirname(__file__) 18 | train_path = root_path + '/data/train/mytrain.json' 19 | test_path = root_path + '/data/test/mytest.json' 20 | dir_path = os.path.dirname(train_path) 21 | if not os.path.exists(dir_path): 22 | os.makedirs(dir_path) 23 | dir_path = os.path.dirname(test_path) 24 | if not os.path.exists(dir_path): 25 | os.makedirs(dir_path) 26 | 27 | cifar_dicts = [] 28 | for i in range(1, 6): 29 | cifar_dicts.append(unpickle(root_path + '/cifar-10-batches-py/data_batch_' + f"{i}")) 30 | 31 | train_images = np.concatenate([cifar_dict['data'] for cifar_dict in cifar_dicts]) 32 | # train_labels = reduce((lambda x, y: x + y), [cifar_dict['labels'] for cifar_dict in cifar_dicts]) 33 | train_labels = np.concatenate([cifar_dict['labels'] for cifar_dict in cifar_dicts]) 34 | train_images = train_images.astype(np.float32) 35 | train_labels = train_labels.astype(np.int) 36 | num_of_labels = len(set(train_labels)) 37 | 38 | cifar_dict = unpickle(root_path + '/cifar-10-batches-py/test_batch') 39 | test_images = cifar_dict['data'] 40 | test_labels = np.array(cifar_dict['labels']) 41 | test_images = test_images.astype(np.float32) 42 | test_labels = test_labels.astype(np.int) 43 | 44 | cifar_data = [] 45 | for i in range(min(train_labels), num_of_labels + min(train_labels)): 46 | idx = train_labels == i 47 | cifar_data.append(train_images[idx]) 48 | 49 | iid_samples = int(similarity * samples_num) 50 | X_train = [[] for _ in range(num_of_users)] 51 | y_train = [[] for _ in range(num_of_users)] 52 | idx = np.zeros(num_of_labels, dtype=np.int64) 53 | 54 | # fill users data by labels 55 | for user in range(num_of_users): 56 | label = user % num_of_labels 57 | X_train[user] += cifar_data[label][idx[label]:idx[label] + samples_num - iid_samples].tolist() 58 | y_train[user] += (label * np.ones(samples_num - iid_samples)).tolist() 59 | idx[label] += samples_num - iid_samples 60 | 61 | print(idx) 62 | 63 | # create %similarity of iid data 64 | for user in range(num_of_users): 65 | labels = np.random.randint(0, num_of_labels, iid_samples) 66 | for label in labels: 67 | while idx[label] >= len(cifar_data[label]): 68 | label = (label + 1) % num_of_labels 69 | X_train[user].append(cifar_data[label][idx[label]].tolist()) 70 | y_train[user] += (label * np.ones(1)).tolist() 71 | idx[label] += 1 72 | 73 | print(idx) 74 | 75 | # create test data 76 | X_test = test_images.tolist() 77 | y_test = test_labels.tolist() 78 | 79 | train_data = {'users': [], 'user_data': {}, 'num_samples': []} 80 | test_data = {'users': [], 'user_data': {}, 'num_samples': []} 81 | 82 | for i in range(num_of_users): 83 | uname = 'f_{0:05d}'.format(i) 84 | 85 | combined = list(zip(X_train[i], y_train[i])) 86 | random.shuffle(combined) 87 | X_train[i][:], y_train[i][:] = zip(*combined) 88 | train_len = len(X_train[i]) 89 | test_len = int(len(test_images) / num_of_users) 90 | 91 | train_data['users'].append(uname) 92 | train_data['user_data'][uname] = {'x': X_train[i], 'y': y_train[i]} 93 | train_data['num_samples'].append(train_len) 94 | test_data['users'].append(uname) 95 | test_data['user_data'][uname] = {'x': X_test[test_len * i:test_len * (i + 1)], 96 | 'y': y_test[test_len * i:test_len * (i + 1)]} 97 | test_data['num_samples'].append(test_len) 98 | 99 | print(train_data['num_samples']) 100 | print(sum(train_data['num_samples'])) 101 | print(sum(test_data['num_samples'])) 102 | 103 | print("Saving data, please wait") 104 | with open(train_path, 'w') as outfile: 105 | json.dump(train_data, outfile) 106 | with open(test_path, 'w') as outfile: 107 | json.dump(test_data, outfile) 108 | print("Saving completed") 109 | 110 | 111 | def unpickle(file): 112 | import pickle 113 | with open(file, 'rb') as fo: 114 | data_dict = pickle.load(fo, encoding='latin1') 115 | return data_dict 116 | 117 | 118 | if __name__ == '__main__': 119 | generate_data(similarity=1) 120 | -------------------------------------------------------------------------------- /data/CIFAR/data_generator.py: -------------------------------------------------------------------------------- 1 | import emnist 2 | import numpy as np 3 | from tqdm import trange 4 | import random 5 | import json 6 | import os 7 | from functools import reduce 8 | 9 | 10 | def generate_data(similarity: int, num_of_users=10, samples_num=5000): 11 | """ 12 | generate CIFAR-10 data among 10 users with different similarities 13 | :param similarity: portion of similar data between users. number between 0 to 1 14 | :param num_of_users: number of users data distributed among 15 | :param samples_num: number of samples distributed to each user 16 | """ 17 | root_path = os.path.dirname(__file__) 18 | train_path = root_path + '/data/train/mytrain.json' 19 | test_path = root_path + '/data/test/mytest.json' 20 | dir_path = os.path.dirname(train_path) 21 | if not os.path.exists(dir_path): 22 | os.makedirs(dir_path) 23 | dir_path = os.path.dirname(test_path) 24 | if not os.path.exists(dir_path): 25 | os.makedirs(dir_path) 26 | 27 | cifar_dicts = [] 28 | for i in range(1, 6): 29 | cifar_dicts.append(unpickle(root_path + '/cifar-10-batches-py/data_batch_' + f"{i}")) 30 | 31 | train_images = np.concatenate([cifar_dict['data'] for cifar_dict in cifar_dicts]) 32 | # train_labels = reduce((lambda x, y: x + y), [cifar_dict['labels'] for cifar_dict in cifar_dicts]) 33 | train_labels = np.concatenate([cifar_dict['labels'] for cifar_dict in cifar_dicts]) 34 | train_images = train_images.astype(np.float32) 35 | train_labels = train_labels.astype(np.int) 36 | num_of_labels = len(set(train_labels)) 37 | 38 | cifar_dict = unpickle(root_path + '/cifar-10-batches-py/test_batch') 39 | test_images = cifar_dict['data'] 40 | test_labels = np.array(cifar_dict['labels']) 41 | test_images = test_images.astype(np.float32) 42 | test_labels = test_labels.astype(np.int) 43 | 44 | cifar_data = [] 45 | for i in range(min(train_labels), num_of_labels + min(train_labels)): 46 | idx = train_labels == i 47 | cifar_data.append(train_images[idx]) 48 | 49 | iid_samples = int(similarity * samples_num) 50 | X_train = [[] for _ in range(num_of_users)] 51 | y_train = [[] for _ in range(num_of_users)] 52 | idx = np.zeros(num_of_labels, dtype=np.int64) 53 | 54 | # fill users data by labels 55 | for user in range(num_of_users): 56 | label = user % num_of_labels 57 | X_train[user] += cifar_data[label][idx[label]:idx[label] + samples_num - iid_samples].tolist() 58 | y_train[user] += (label * np.ones(samples_num - iid_samples)).tolist() 59 | idx[label] += samples_num - iid_samples 60 | 61 | print(idx) 62 | 63 | # create %similarity of iid data 64 | for user in range(num_of_users): 65 | labels = np.random.randint(0, num_of_labels, iid_samples) 66 | for label in labels: 67 | while idx[label] >= len(cifar_data[label]): 68 | label = (label + 1) % num_of_labels 69 | X_train[user].append(cifar_data[label][idx[label]].tolist()) 70 | y_train[user] += (label * np.ones(1)).tolist() 71 | idx[label] += 1 72 | 73 | print(idx) 74 | 75 | # create test data 76 | X_test = test_images.tolist() 77 | y_test = test_labels.tolist() 78 | 79 | train_data = {'users': [], 'user_data': {}, 'num_samples': []} 80 | test_data = {'users': [], 'user_data': {}, 'num_samples': []} 81 | 82 | for i in range(num_of_users): 83 | uname = 'f_{0:05d}'.format(i) 84 | 85 | combined = list(zip(X_train[i], y_train[i])) 86 | random.shuffle(combined) 87 | X_train[i][:], y_train[i][:] = zip(*combined) 88 | train_len = len(X_train[i]) 89 | test_len = int(len(test_images) / num_of_users) 90 | 91 | train_data['users'].append(uname) 92 | train_data['user_data'][uname] = {'x': X_train[i], 'y': y_train[i]} 93 | train_data['num_samples'].append(train_len) 94 | test_data['users'].append(uname) 95 | test_data['user_data'][uname] = {'x': X_test[test_len * i:test_len * (i + 1)], 96 | 'y': y_test[test_len * i:test_len * (i + 1)]} 97 | test_data['num_samples'].append(test_len) 98 | 99 | print(train_data['num_samples']) 100 | print(sum(train_data['num_samples'])) 101 | print(sum(test_data['num_samples'])) 102 | 103 | print("Saving data, please wait") 104 | with open(train_path, 'w') as outfile: 105 | json.dump(train_data, outfile) 106 | with open(test_path, 'w') as outfile: 107 | json.dump(test_data, outfile) 108 | print("Saving completed") 109 | 110 | 111 | def unpickle(file): 112 | import pickle 113 | with open(file, 'rb') as fo: 114 | data_dict = pickle.load(fo, encoding='latin1') 115 | return data_dict 116 | 117 | 118 | if __name__ == '__main__': 119 | generate_data(similarity=1) 120 | -------------------------------------------------------------------------------- /data/Femnist/README.md: -------------------------------------------------------------------------------- 1 | # EMNIST Dataset 2 | 3 | ## Setup Instructions 4 | - pip3 install numpy 5 | - pip3 install pillow 6 | - Run ```./preprocess.sh``` with a choice of the following tags: 7 | - ```-s``` := 'iid' to sample in an i.i.d. manner, or 'niid' to sample in a non-i.i.d. manner; more information on i.i.d. versus non-i.i.d. is included in the 'Notes' section 8 | - ```--iu``` := number of users, if iid sampling; expressed as a fraction of the total number of users; default is 0.01 9 | - ```--sf``` := fraction of data to sample, written as a decimal; default is 0.1 10 | - ```-k``` := minimum number of samples per user 11 | - ```-t``` := 'user' to partition users into train-test groups, or 'sample' to partition each user's samples into train-test groups 12 | - ```--tf``` := fraction of data in training set, written as a decimal; default is 0.9 13 | - ```--nu``` := The total number of users generated. 14 | 15 | Instruction used to generate EMNIST with 50 users: 16 | 17 | ``` 18 | ./preprocess.sh -s niid --sf 1.0 -k 0 -tf 0.8 -t sample --nu 100 19 | ``` 20 | 21 | 22 | 23 | 24 | (Make sure to delete the rem\_user\_data, sampled\_data, test, and train subfolders in the data directory before re-running preprocess.sh.) 25 | 26 | Or you can download the dataset [here](https://drive.google.com/open?id=1sHzD4IsgEI5xLy6cqwUjSGW0PwiduPHr), unzip it and put the `train` and `test` folder under `data`. 27 | -------------------------------------------------------------------------------- /data/Femnist/__pycache__/data_generator.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/data/Femnist/__pycache__/data_generator.cpython-38.pyc -------------------------------------------------------------------------------- /data/Femnist/data/my_sample.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import json 3 | import math 4 | import numpy as np 5 | import os 6 | import sys 7 | import random 8 | from tqdm import trange 9 | 10 | from PIL import Image 11 | 12 | NUM_USER = 50 13 | CLASS_PER_USER = 19 14 | 15 | 16 | def relabel_class(c): 17 | ''' 18 | maps hexadecimal class value (string) to a decimal number 19 | returns: 20 | - 0 through 9 for classes representing respective numbers 21 | - 10 through 35 for classes representing respective uppercase letters 22 | - 36 through 61 for classes representing respective lowercase letters 23 | ''' 24 | if c.isdigit() and int(c) < 40: 25 | return (int(c) - 30) 26 | elif int(c, 16) <= 90: # uppercase 27 | return (int(c, 16) - 55) 28 | else: 29 | return (int(c, 16) - 61) 30 | 31 | def load_image(file_name): 32 | '''read in a png 33 | Return: a flatted list representing the image 34 | ''' 35 | size = (28, 28) 36 | img = Image.open(file_name) 37 | gray = img.convert('L') 38 | gray.thumbnail(size, Image.ANTIALIAS) 39 | arr = np.asarray(gray).copy() 40 | vec = arr.flatten() 41 | vec = vec / 255 # scale all pixel values to between 0 and 1 42 | vec = vec.tolist() 43 | 44 | return vec 45 | 46 | 47 | def main(): 48 | file_dir = "raw_data/by_class" 49 | 50 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]} 51 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]} 52 | 53 | train_path = "train/mytrain.json" 54 | test_path = "test/mytest.json" 55 | 56 | X = [[] for _ in range(NUM_USER)] 57 | y = [[] for _ in range(NUM_USER)] 58 | 59 | nist_data = {} 60 | 61 | 62 | for class_ in os.listdir(file_dir): 63 | 64 | real_class = relabel_class(class_) 65 | 66 | if real_class >= 36 and real_class <= 61: 67 | 68 | full_img_path = file_dir + "/" + class_ + "/train_" + class_ 69 | all_files_this_class = os.listdir(full_img_path) 70 | random.shuffle(all_files_this_class) 71 | sampled_files_this_class = all_files_this_class[:7000] 72 | imgs = [] 73 | for img in sampled_files_this_class: 74 | imgs.append(load_image(full_img_path + "/" + img)) 75 | class_ = relabel_class(class_) 76 | print(class_) 77 | nist_data[class_-36] = imgs # a list of list, key is (0, 25) 78 | print(len(imgs)) 79 | 80 | # assign samples to users by power law 81 | num_samples = np.random.lognormal(4, 2, (NUM_USER)) + 5 82 | 83 | idx = np.zeros(26, dtype=np.int64) 84 | 85 | for user in range(NUM_USER): 86 | num_sample_per_class = int(num_samples[user]/CLASS_PER_USER) 87 | if num_sample_per_class < 2: 88 | num_sample_per_class = 2 89 | 90 | for j in range(CLASS_PER_USER): 91 | class_id = (user + j) % 26 92 | if idx[class_id] + num_sample_per_class < len(nist_data[class_id]): 93 | idx[class_id] = 0 94 | X[user] += nist_data[class_id][idx[class_id] : (idx[class_id] + num_sample_per_class)] 95 | y[user] += (class_id * np.ones(num_sample_per_class)).tolist() 96 | idx[class_id] += num_sample_per_class 97 | 98 | # Create data structure 99 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]} 100 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]} 101 | 102 | for i in trange(NUM_USER, ncols=120): 103 | uname = 'f_{0:05d}'.format(i) 104 | 105 | combined = list(zip(X[i], y[i])) 106 | random.shuffle(combined) 107 | X[i][:], y[i][:] = zip(*combined) 108 | num_samples = len(X[i]) 109 | train_len = int(0.9 * num_samples) 110 | test_len = num_samples - train_len 111 | 112 | train_data['users'].append(uname) 113 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]} 114 | train_data['num_samples'].append(train_len) 115 | test_data['users'].append(uname) 116 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]} 117 | test_data['num_samples'].append(test_len) 118 | 119 | 120 | with open(train_path,'w') as outfile: 121 | json.dump(train_data, outfile) 122 | with open(test_path, 'w') as outfile: 123 | json.dump(test_data, outfile) 124 | 125 | 126 | if __name__ == "__main__": 127 | main() 128 | 129 | -------------------------------------------------------------------------------- /data/Femnist/data/nist_generator.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import json 3 | import math 4 | import numpy as np 5 | import os 6 | import sys 7 | import random 8 | from tqdm import trange 9 | 10 | from PIL import Image 11 | 12 | NUM_USER = 50 13 | CLASS_PER_USER = 50 14 | FEMNIST = True # True: generate data will full 62 label, False: only 26 labels for lowercase 15 | SAMPLE_NUM_MEAN = 400 16 | SAMPLE_NUM_STD = 110 17 | 18 | 19 | def relabel_class(c): 20 | ''' 21 | maps hexadecimal class value (string) to a decimal number 22 | returns: 23 | - 0 through 9 for classes representing respective numbers : total 10 24 | - 10 through 35 for classes representing respective uppercase letters : 26 25 | - 36 through 61 for classes representing respective lowercase letters : 26 26 | - in total we have 10 + 26 + 26 = 62 class for FEMIST tiwand only 36-61 for FEMIST* 27 | ''' 28 | if c.isdigit() and int(c) < 40: 29 | return (int(c) - 30) 30 | elif int(c, 16) <= 90: # uppercase 31 | return (int(c, 16) - 55) 32 | else: 33 | return (int(c, 16) - 61) 34 | 35 | 36 | def load_image(file_name): 37 | '''read in a png 38 | Return: a flatted list representing the image 39 | ''' 40 | size = (28, 28) 41 | img = Image.open(file_name) 42 | gray = img.convert('L') 43 | gray.thumbnail(size, Image.ANTIALIAS) 44 | arr = np.asarray(gray).copy() 45 | vec = arr.flatten() 46 | vec = vec / 255 # scale all pixel values to between 0 and 1 47 | vec = vec.tolist() 48 | 49 | return vec 50 | 51 | 52 | def main(): 53 | file_dir = "raw_data/by_class" 54 | 55 | train_data = {'users': [], 'user_data': {}, 'num_samples': []} 56 | test_data = {'users': [], 'user_data': {}, 'num_samples': []} 57 | if(FEMNIST): 58 | train_path = "train/nisttrain.json" 59 | test_path = "test/nisttest.json" 60 | else: 61 | train_path = "train/femnisttrain.json" 62 | test_path = "test/femnisttest.json" 63 | 64 | X = [[] for _ in range(NUM_USER)] 65 | y = [[] for _ in range(NUM_USER)] 66 | 67 | nist_data = {} 68 | 69 | for class_ in os.listdir(file_dir): 70 | 71 | real_class = relabel_class(class_) 72 | 73 | if(FEMNIST): 74 | full_img_path = file_dir + "/" + class_ + "/train_" + class_ 75 | all_files_this_class = os.listdir(full_img_path) 76 | random.shuffle(all_files_this_class) 77 | sampled_files_this_class = all_files_this_class[:7000] 78 | imgs = [] 79 | for img in sampled_files_this_class: 80 | imgs.append(load_image(full_img_path + "/" + img)) 81 | class_ = relabel_class(class_) 82 | print("Class:", class_) 83 | nist_data[class_] = imgs # a list of list, key is (0, 25) 84 | print("Image len:", len(imgs)) 85 | 86 | else: 87 | if real_class >= 36 and real_class <= 61: 88 | full_img_path = file_dir + "/" + class_ + "/train_" + class_ 89 | all_files_this_class = os.listdir(full_img_path) 90 | random.shuffle(all_files_this_class) 91 | sampled_files_this_class = all_files_this_class[:7000] 92 | imgs = [] 93 | for img in sampled_files_this_class: 94 | imgs.append(load_image(full_img_path + "/" + img)) 95 | class_ = relabel_class(class_) 96 | print(class_) 97 | nist_data[class_-36] = imgs # a list of list, key is (0, 25) 98 | print(len(imgs)) 99 | 100 | # assign samples to users by power law 101 | normal_std = np.sqrt(np.log(1 + (lognormal_std/lognormal_mean)**2)) 102 | normal_mean = np.log(lognormal_mean) - normal_std**2 / 2 103 | 104 | num_samples = np.random.lognormal(normal_mean, normal_std, (NUM_USER)) + 5 105 | #num_samples = np.random.normal(SAMPLE_NUM_MEAN,SAMPLE_NUM_STD,(NUM_USER)) 106 | 107 | if(FEMNIST): 108 | idx = np.zeros(62, dtype=np.int64) 109 | else: 110 | idx = np.zeros(26, dtype=np.int64) 111 | 112 | for user in range(NUM_USER): 113 | num_sample_per_class = int(num_samples[user]/CLASS_PER_USER) 114 | if num_sample_per_class < 2: 115 | num_sample_per_class = 2 116 | 117 | for j in range(CLASS_PER_USER): 118 | if(FEMNIST): 119 | class_id = (user + j) % 62 120 | else: 121 | class_id = (user + j) % 26 122 | 123 | if idx[class_id] + num_sample_per_class < len(nist_data[class_id]): 124 | idx[class_id] = 0 125 | X[user] += nist_data[class_id][idx[class_id] 126 | : (idx[class_id] + num_sample_per_class)] 127 | y[user] += (class_id * np.ones(num_sample_per_class)).tolist() 128 | idx[class_id] += num_sample_per_class 129 | 130 | # Create data structure 131 | train_data = {'users': [], 'user_data': {}, 'num_samples': []} 132 | test_data = {'users': [], 'user_data': {}, 'num_samples': []} 133 | 134 | for i in trange(NUM_USER, ncols=120): 135 | uname = 'f_{0:05d}'.format(i) 136 | 137 | combined = list(zip(X[i], y[i])) 138 | random.shuffle(combined) 139 | X[i][:], y[i][:] = zip(*combined) 140 | num_samples = len(X[i]) 141 | train_len = int(0.9 * num_samples) 142 | test_len = num_samples - train_len 143 | 144 | train_data['users'].append(uname) 145 | train_data['user_data'][uname] = { 146 | 'x': X[i][:train_len], 'y': y[i][:train_len]} 147 | train_data['num_samples'].append(train_len) 148 | test_data['users'].append(uname) 149 | test_data['user_data'][uname] = { 150 | 'x': X[i][train_len:], 'y': y[i][train_len:]} 151 | test_data['num_samples'].append(test_len) 152 | 153 | with open(train_path, 'w') as outfile: 154 | json.dump(train_data, outfile) 155 | with open(test_path, 'w') as outfile: 156 | json.dump(test_data, outfile) 157 | 158 | 159 | if __name__ == "__main__": 160 | main() 161 | -------------------------------------------------------------------------------- /data/Femnist/data_generator.py: -------------------------------------------------------------------------------- 1 | import emnist 2 | import numpy as np 3 | from tqdm import trange 4 | import random 5 | import json 6 | import os 7 | import argparse 8 | from os.path import dirname 9 | 10 | 11 | def generate_data(similarity, num_of_users=100, samples_num=20): 12 | root_path = os.path.dirname(__file__) 13 | train_path = root_path + '/data/train/mytrain.json' 14 | test_path = root_path + '/data/test/mytest.json' 15 | dir_path = os.path.dirname(train_path) 16 | if not os.path.exists(dir_path): 17 | os.makedirs(dir_path) 18 | dir_path = os.path.dirname(test_path) 19 | if not os.path.exists(dir_path): 20 | os.makedirs(dir_path) 21 | 22 | dataset = 'balanced' 23 | images, train_labels = emnist.extract_training_samples(dataset) # TODO: add test samples 24 | images = np.reshape(images, (images.shape[0], -1)) 25 | images = images.astype(np.float32) 26 | train_labels = train_labels.astype(np.int) 27 | num_of_labels = len(set(train_labels)) 28 | 29 | emnist_data = [] 30 | for i in range(min(train_labels), num_of_labels + min(train_labels)): 31 | idx = train_labels == i 32 | emnist_data.append(images[idx]) 33 | 34 | iid_samples = int(similarity * samples_num) 35 | X = [[] for _ in range(num_of_users)] 36 | y = [[] for _ in range(num_of_users)] 37 | idx = np.zeros(num_of_labels, dtype=np.int64) 38 | 39 | # create %similarity of iid data 40 | for user in range(num_of_users): 41 | labels = np.random.randint(0, num_of_labels, iid_samples) 42 | for label in labels: 43 | X[user].append(emnist_data[label][idx[label]].tolist()) 44 | y[user] += (label * np.ones(1)).tolist() 45 | idx[label] += 1 46 | 47 | print(idx) 48 | 49 | # fill remaining data 50 | for user in range(num_of_users): 51 | label = user % num_of_labels 52 | X[user] += emnist_data[label][idx[label]:idx[label] + samples_num - iid_samples].tolist() 53 | y[user] += (label * np.ones(samples_num - iid_samples)).tolist() 54 | idx[label] += samples_num - iid_samples 55 | 56 | print(idx) 57 | 58 | train_data = {'users': [], 'user_data': {}, 'num_samples': []} 59 | test_data = {'users': [], 'user_data': {}, 'num_samples': []} 60 | 61 | for i in trange(num_of_users, ncols=120): 62 | uname = 'f_{0:05d}'.format(i) 63 | 64 | combined = list(zip(X[i], y[i])) 65 | random.shuffle(combined) 66 | X[i][:], y[i][:] = zip(*combined) 67 | num_samples = len(X[i]) 68 | train_len = int(0.9 * num_samples) 69 | test_len = num_samples - train_len 70 | 71 | train_data['users'].append(uname) 72 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]} 73 | train_data['num_samples'].append(train_len) 74 | test_data['users'].append(uname) 75 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]} 76 | test_data['num_samples'].append(test_len) 77 | 78 | print(train_data['num_samples']) 79 | print(sum(train_data['num_samples'])) 80 | 81 | with open(train_path, 'w') as outfile: 82 | json.dump(train_data, outfile) 83 | with open(test_path, 'w') as outfile: 84 | json.dump(test_data, outfile) 85 | 86 | 87 | if __name__ == '__main__': 88 | parser = argparse.ArgumentParser() 89 | parser.add_argument("--similarity", type=float, default=0) 90 | parser.add_argument("--num_of_users", type=int, default=100) 91 | parser.add_argument("--samples_num", type=int, default=20) 92 | args = parser.parse_args() 93 | generate_data(similarity=args.similarity, num_of_users=args.num_of_users, samples_num=args.samples_num) 94 | -------------------------------------------------------------------------------- /data/Femnist/preprocess.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | #rm -rf rem_user_data sampled_data test train 4 | 5 | # download data and convert to .json format 6 | 7 | if [ ! -d "data/all_data" ] || [ ! "$(ls -A data/all_data)" ]; then 8 | cd preprocess 9 | ./data_to_json.sh 10 | cd .. 11 | fi 12 | 13 | NAME="nist" # name of the dataset, equivalent to directory name 14 | 15 | cd ../../utils 16 | 17 | # ./preprocess.sh -s niid --sf 0.05 -k 64 -t sample 18 | # ./preprocess.sh --name nist -s niid --sf 1.0 -k 0 -t sample 19 | # ./preprocess.sh --name sent140 -s niid --sf 1.0 -k 1 -t sample 20 | ./preprocess.sh --name $NAME $@ 21 | 22 | cd ../data/$NAME 23 | -------------------------------------------------------------------------------- /data/Femnist/preprocess/data_to_json.py: -------------------------------------------------------------------------------- 1 | # Converts a list of (writer, [list of (file,class)]) tuples into a json object 2 | # of the form: 3 | # {users: [bob, etc], num_samples: [124, etc.], 4 | # user_data: {bob : {x:[img1,img2,etc], y:[class1,class2,etc]}, etc}} 5 | # where 'img_' is a vectorized representation of the corresponding image 6 | 7 | from __future__ import division 8 | import json 9 | import math 10 | import numpy as np 11 | import os 12 | import sys 13 | 14 | from PIL import Image 15 | 16 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 17 | utils_dir = os.path.join(utils_dir, 'utils') 18 | sys.path.append(utils_dir) 19 | 20 | import utils 21 | 22 | 23 | MAX_WRITERS = 100 # max number of writers per json file. 24 | 25 | 26 | def relabel_class(c): 27 | ''' 28 | maps hexadecimal class value (string) to a decimal number 29 | returns: 30 | - 0 through 9 for classes representing respective numbers 31 | - 10 through 35 for classes representing respective uppercase letters 32 | - 36 through 61 for classes representing respective lowercase letters 33 | ''' 34 | if c.isdigit() and int(c) < 40: 35 | return (int(c) - 30) 36 | elif int(c, 16) <= 90: # uppercase 37 | return (int(c, 16) - 55) 38 | else: 39 | return (int(c, 16) - 61) 40 | 41 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 42 | 43 | ibwd = os.path.join(parent_path, 'data', 'intermediate', 'images_by_writer') 44 | writers = utils.load_obj(ibwd) 45 | 46 | num_json = int(math.ceil(len(writers) / MAX_WRITERS)) 47 | 48 | users = [[] for _ in range(num_json)] 49 | num_samples = [[] for _ in range(num_json)] 50 | user_data = [{} for _ in range(num_json)] 51 | 52 | writer_count = 0 53 | json_index = 0 54 | for (w, l) in writers: 55 | 56 | users[json_index].append(w) 57 | num_samples[json_index].append(len(l)) 58 | user_data[json_index][w] = {'x': [], 'y': []} 59 | 60 | size = 28, 28 # original image size is 128, 128 61 | for (f, c) in l: 62 | file_path = os.path.join(parent_path, f) 63 | img = Image.open(file_path) 64 | gray = img.convert('L') 65 | gray.thumbnail(size, Image.ANTIALIAS) 66 | arr = np.asarray(gray).copy() 67 | vec = arr.flatten() 68 | vec = vec / 255 # scale all pixel values to between 0 and 1 69 | vec = vec.tolist() 70 | 71 | nc = relabel_class(c) 72 | 73 | user_data[json_index][w]['x'].append(vec) 74 | user_data[json_index][w]['y'].append(nc) 75 | 76 | writer_count += 1 77 | if writer_count == MAX_WRITERS: 78 | 79 | all_data = {} 80 | all_data['users'] = users[json_index] 81 | all_data['num_samples'] = num_samples[json_index] 82 | all_data['user_data'] = user_data[json_index] 83 | 84 | file_name = 'all_data_%d.json' % json_index 85 | file_path = os.path.join(parent_path, 'data', 'all_data', file_name) 86 | 87 | print('writing %s' % file_name) 88 | 89 | with open(file_path, 'w') as outfile: 90 | json.dump(all_data, outfile) 91 | 92 | writer_count = 0 93 | json_index += 1 94 | -------------------------------------------------------------------------------- /data/Femnist/preprocess/data_to_json.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # assumes that the script is run in the preprocess folder 4 | 5 | if [ ! -d "../data" ]; then 6 | mkdir ../data 7 | fi 8 | if [ ! -d "../data/raw_data" ]; then 9 | echo "------------------------------" 10 | echo "downloading data" 11 | mkdir ../data/raw_data 12 | ./get_data.sh 13 | echo "finished downloading data" 14 | fi 15 | 16 | if [ ! -d "../data/intermediate" ]; then # stores .pkl files during preprocessing 17 | mkdir ../data/intermediate 18 | fi 19 | 20 | if [ ! -f ../data/intermediate/class_file_dirs.pkl ]; then 21 | echo "------------------------------" 22 | echo "extracting file directories of images" 23 | python3 get_file_dirs.py 24 | echo "finished extracting file directories of images" 25 | fi 26 | 27 | if [ ! -f ../data/intermediate/class_file_hashes.pkl ]; then 28 | echo "------------------------------" 29 | echo "calculating image hashes" 30 | python3 get_hashes.py 31 | echo "finished calculating image hashes" 32 | fi 33 | 34 | if [ ! -f ../data/intermediate/write_with_class.pkl ]; then 35 | echo "------------------------------" 36 | echo "assigning class labels to write images" 37 | python3 match_hashes.py 38 | echo "finished assigning class labels to write images" 39 | fi 40 | 41 | if [ ! -f ../data/intermediate/images_by_writer.pkl ]; then 42 | echo "------------------------------" 43 | echo "grouping images by writer" 44 | python3 group_by_writer.py 45 | echo "finished grouping images by writer" 46 | fi 47 | 48 | if [ ! -d "../data/all_data" ]; then 49 | mkdir ../data/all_data 50 | fi 51 | if [ ! "$(ls -A ../data/all_data)" ]; then 52 | echo "------------------------------" 53 | echo "converting data to .json format" 54 | python3 data_to_json.py 55 | echo "finished converting data to .json format" 56 | fi 57 | -------------------------------------------------------------------------------- /data/Femnist/preprocess/get_data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # assumes that the script is run in the preprocess folder 4 | 5 | cd ../data/raw_data 6 | wget https://s3.amazonaws.com/nist-srd/SD19/by_class.zip 7 | wget https://s3.amazonaws.com/nist-srd/SD19/by_write.zip 8 | unzip by_class.zip 9 | rm by_class.zip 10 | unzip by_write.zip 11 | rm by_write.zip 12 | cd ../../preprocess 13 | -------------------------------------------------------------------------------- /data/Femnist/preprocess/get_file_dirs.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Creates .pkl files for: 3 | 1. list of directories of every image in 'by_class' 4 | 2. list of directories of every image in 'by_write' 5 | the hierarchal structure of the data is as follows: 6 | - by_class -> classes -> folders containing images -> images 7 | - by_write -> folders containing writers -> writer -> types of images -> images 8 | the directories written into the files are of the form 'raw_data/...' 9 | ''' 10 | 11 | import os 12 | import sys 13 | 14 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 15 | utils_dir = os.path.join(utils_dir, 'utils') 16 | sys.path.append(utils_dir) 17 | 18 | import utils 19 | 20 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 21 | 22 | class_files = [] # (class, file directory) 23 | write_files = [] # (writer, file directory) 24 | 25 | class_dir = os.path.join(parent_path, 'data', 'raw_data', 'by_class') 26 | rel_class_dir = os.path.join('data', 'raw_data', 'by_class') 27 | classes = os.listdir(class_dir) 28 | 29 | for cl in classes: 30 | cldir = os.path.join(class_dir, cl) 31 | rel_cldir = os.path.join(rel_class_dir, cl) 32 | subcls = os.listdir(cldir) 33 | 34 | subcls = [s for s in subcls if (('hsf' in s) and ('mit' not in s))] 35 | 36 | for subcl in subcls: 37 | subcldir = os.path.join(cldir, subcl) 38 | rel_subcldir = os.path.join(rel_cldir, subcl) 39 | images = os.listdir(subcldir) 40 | image_dirs = [os.path.join(rel_subcldir, i) for i in images] 41 | 42 | for image_dir in image_dirs: 43 | class_files.append((cl, image_dir)) 44 | 45 | write_dir = os.path.join(parent_path, 'data', 'raw_data', 'by_write') 46 | rel_write_dir = os.path.join('data', 'raw_data', 'by_write') 47 | write_parts = os.listdir(write_dir) 48 | 49 | for write_part in write_parts: 50 | writers_dir = os.path.join(write_dir, write_part) 51 | rel_writers_dir = os.path.join(rel_write_dir, write_part) 52 | writers = os.listdir(writers_dir) 53 | 54 | for writer in writers: 55 | writer_dir = os.path.join(writers_dir, writer) 56 | rel_writer_dir = os.path.join(rel_writers_dir, writer) 57 | wtypes = os.listdir(writer_dir) 58 | 59 | for wtype in wtypes: 60 | type_dir = os.path.join(writer_dir, wtype) 61 | rel_type_dir = os.path.join(rel_writer_dir, wtype) 62 | images = os.listdir(type_dir) 63 | image_dirs = [os.path.join(rel_type_dir, i) for i in images] 64 | 65 | for image_dir in image_dirs: 66 | write_files.append((writer, image_dir)) 67 | 68 | utils.save_obj( 69 | class_files, 70 | os.path.join(parent_path, 'data', 'intermediate', 'class_file_dirs')) 71 | utils.save_obj( 72 | write_files, 73 | os.path.join(parent_path, 'data', 'intermediate', 'write_file_dirs')) 74 | -------------------------------------------------------------------------------- /data/Femnist/preprocess/get_hashes.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import os 3 | import sys 4 | 5 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | utils_dir = os.path.join(utils_dir, 'utils') 7 | sys.path.append(utils_dir) 8 | 9 | import utils 10 | 11 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 12 | 13 | cfd = os.path.join(parent_path, 'data', 'intermediate', 'class_file_dirs') 14 | wfd = os.path.join(parent_path, 'data', 'intermediate', 'write_file_dirs') 15 | class_file_dirs = utils.load_obj(cfd) 16 | write_file_dirs = utils.load_obj(wfd) 17 | 18 | class_file_hashes = [] 19 | write_file_hashes = [] 20 | 21 | count = 0 22 | for tup in class_file_dirs: 23 | if (count%100000 == 0): 24 | print('hashed %d class images' % count) 25 | 26 | (cclass, cfile) = tup 27 | file_path = os.path.join(parent_path, cfile) 28 | 29 | chash = hashlib.md5(open(file_path, 'rb').read()).hexdigest() 30 | 31 | class_file_hashes.append((cclass, cfile, chash)) 32 | 33 | count += 1 34 | 35 | cfhd = os.path.join(parent_path, 'data', 'intermediate', 'class_file_hashes') 36 | utils.save_obj(class_file_hashes, cfhd) 37 | 38 | count = 0 39 | for tup in write_file_dirs: 40 | if (count%100000 == 0): 41 | print('hashed %d write images' % count) 42 | 43 | (cclass, cfile) = tup 44 | file_path = os.path.join(parent_path, cfile) 45 | 46 | chash = hashlib.md5(open(file_path, 'rb').read()).hexdigest() 47 | 48 | write_file_hashes.append((cclass, cfile, chash)) 49 | 50 | count += 1 51 | 52 | wfhd = os.path.join(parent_path, 'data', 'intermediate', 'write_file_hashes') 53 | utils.save_obj(write_file_hashes, wfhd) 54 | -------------------------------------------------------------------------------- /data/Femnist/preprocess/group_by_writer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 5 | utils_dir = os.path.join(utils_dir, 'utils') 6 | sys.path.append(utils_dir) 7 | 8 | import utils 9 | 10 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 11 | 12 | wwcd = os.path.join(parent_path, 'data', 'intermediate', 'write_with_class') 13 | write_class = utils.load_obj(wwcd) 14 | 15 | writers = [] # each entry is a (writer, [list of (file, class)]) tuple 16 | cimages = [] 17 | (cw, _, _) = write_class[0] 18 | for (w, f, c) in write_class: 19 | if w != cw: 20 | writers.append((cw, cimages)) 21 | cw = w 22 | cimages = [(f, c)] 23 | cimages.append((f, c)) 24 | writers.append((cw, cimages)) 25 | 26 | ibwd = os.path.join(parent_path, 'data', 'intermediate', 'images_by_writer') 27 | utils.save_obj(writers, ibwd) 28 | -------------------------------------------------------------------------------- /data/Femnist/preprocess/match_hashes.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 5 | utils_dir = os.path.join(utils_dir, 'utils') 6 | sys.path.append(utils_dir) 7 | 8 | import utils 9 | 10 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 11 | 12 | cfhd = os.path.join(parent_path, 'data', 'intermediate', 'class_file_hashes') 13 | wfhd = os.path.join(parent_path, 'data', 'intermediate', 'write_file_hashes') 14 | class_file_hashes = utils.load_obj(cfhd) # each elem is (class, file dir, hash) 15 | write_file_hashes = utils.load_obj(wfhd) # each elem is (writer, file dir, hash) 16 | 17 | class_hash_dict = {} 18 | for i in range(len(class_file_hashes)): 19 | (c, f, h) = class_file_hashes[len(class_file_hashes)-i-1] 20 | class_hash_dict[h] = (c, f) 21 | 22 | write_classes = [] 23 | for tup in write_file_hashes: 24 | (w, f, h) = tup 25 | write_classes.append((w, f, class_hash_dict[h][0])) 26 | 27 | wwcd = os.path.join(parent_path, 'data', 'intermediate', 'write_with_class') 28 | utils.save_obj(write_classes, wwcd) 29 | -------------------------------------------------------------------------------- /data/Femnist/stats.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | NAME="nist" 4 | 5 | cd ../../utils 6 | 7 | python3 stats.py --name $NAME 8 | 9 | cd ../data/$NAME -------------------------------------------------------------------------------- /data/Linear_synthetic/data/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/data/Linear_synthetic/data/README.md -------------------------------------------------------------------------------- /data/Linear_synthetic/generate_linear_regession.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import numpy as np 3 | import json 4 | import random 5 | import os 6 | np.random.seed(0) 7 | 8 | NUM_USER = 100 9 | Kappa = 1.4 10 | Dim = 40 11 | Noise = 0.05 12 | 13 | def generate_x(n_samples = 100, dim= 40, kappa= 10): 14 | '''Helper function to generate data''' 15 | 16 | powers = - np.log(kappa) / np.log(dim) / 2 17 | 18 | S = np.power(np.arange(dim)+1, powers) 19 | X = np.random.randn(n_samples, dim) # Random standard Gaussian data 20 | X *= S 21 | covarient_matrix = np.cov(X) 22 | print("Covarient matrix:",covarient_matrix) # Conditioning 23 | print("np.diag(S)", np.diag(S)) 24 | return X, 1, 1/kappa, np.diag(S) 25 | 26 | def generate_linear_data(num_users=100, kappa=10, dim=40, noise_ratio=0.05): 27 | 28 | '''Helper function to generate data''' 29 | # generate power S 30 | powers = - np.log(kappa) / np.log(dim) / 2 31 | DIM = np.arange(dim) 32 | 33 | # Covariance matrix for X 34 | S = np.power(DIM+1, powers) 35 | 36 | # Creat list data for all users 37 | X_split = [[] for _ in range(num_users)] # X for each user 38 | y_split = [[] for _ in range(num_users)] # y for each user 39 | samples_per_user = np.random.lognormal(4, 2, num_users).astype(int) + 500 40 | indices_per_user = np.insert(samples_per_user.cumsum(), 0, 0, 0) 41 | num_total_samples = indices_per_user[-1] 42 | 43 | # Create mean of data for each user, each user will have different distribution 44 | mean_X = np.array([np.random.randn(dim) for _ in range(num_users)]) 45 | 46 | 47 | X_total = np.zeros((num_total_samples, dim)) 48 | y_total = np.zeros(num_total_samples) 49 | 50 | for n in range(num_users): 51 | # Generate data 52 | X_n = np.random.multivariate_normal(mean_X[n], np.diag(S), samples_per_user[n]) 53 | X_total[indices_per_user[n]:indices_per_user[n+1], :] = X_n 54 | 55 | # Normalize all X's using LAMBDA 56 | norm = np.sqrt(np.linalg.norm(X_total.T.dot(X_total), 2) / num_total_samples) 57 | X_total /= norm 58 | 59 | # Generate weights and labels 60 | W = np.random.rand(dim) 61 | y_total = X_total.dot(W) 62 | noise_variance = 0.01 63 | y_total = y_total + np.sqrt(noise_ratio) * np.random.randn(num_total_samples) 64 | 65 | for n in range(num_users): 66 | X_n = X_total[indices_per_user[n]:indices_per_user[n+1], :] 67 | y_n = y_total[indices_per_user[n]:indices_per_user[n+1]] 68 | X_split[n] = X_n.tolist() 69 | y_split[n] = y_n.tolist() 70 | 71 | # print("User {} has {} samples.".format(n, samples_per_user[n])) 72 | 73 | print("=" * 80) 74 | print("Generated synthetic data for logistic regression successfully.") 75 | print("Summary of the generated data:".format(kappa)) 76 | print(" Total # users : {}".format(num_users)) 77 | print(" Input dimension : {}".format(dim)) 78 | print(" rho : {}".format(kappa)) 79 | print(" Total # of samples : {}".format(num_total_samples)) 80 | print(" Minimum # of samples: {}".format(np.min(samples_per_user))) 81 | print(" Maximum # of samples: {}".format(np.max(samples_per_user))) 82 | print("=" * 80) 83 | 84 | return X_split, y_split 85 | 86 | 87 | def save_total_data(): 88 | train_data = {'users': [], 'user_data': {}, 'num_samples': []} 89 | test_data = {'users': [], 'user_data': {}, 'num_samples': []} 90 | 91 | train_path = os.path.join("data", "train", "mytrain.json") 92 | test_path = os.path.join("data", "test", "mytest.json") 93 | for path in [os.path.join("data", "train"), os.path.join("data", "test")]: 94 | if not os.path.exists(path): 95 | os.makedirs(path) 96 | 97 | X, y = generate_linear_data(NUM_USER, Kappa, Dim, Noise) 98 | 99 | # Create data structure 100 | train_data = {'users': [], 'user_data': {}, 'num_samples': []} 101 | test_data = {'users': [], 'user_data': {}, 'num_samples': []} 102 | 103 | for i in range(NUM_USER): 104 | uname = 'f_{0:05d}'.format(i) 105 | combined = list(zip(X[i], y[i])) 106 | random.shuffle(combined) 107 | X[i][:], y[i][:] = zip(*combined) 108 | num_samples = len(X[i]) 109 | train_len = int(0.75 * num_samples) 110 | test_len = num_samples - train_len 111 | print("User: ",uname, " Num Sample: ", num_samples ) 112 | train_data['users'].append(uname) 113 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]} 114 | train_data['num_samples'].append(train_len) 115 | test_data['users'].append(uname) 116 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]} 117 | test_data['num_samples'].append(test_len) 118 | 119 | with open(train_path, 'w') as outfile: 120 | json.dump(train_data, outfile) 121 | with open(test_path, 'w') as outfile: 122 | json.dump(test_data, outfile) 123 | 124 | print("=" * 80) 125 | print("Saved all users' data sucessfully.") 126 | print(" Train path:", os.path.join(os.curdir, train_path)) 127 | print(" Test path :", os.path.join(os.curdir, test_path)) 128 | print("=" * 80) 129 | 130 | 131 | def main(): 132 | #generate_x() 133 | save_total_data() 134 | 135 | 136 | if __name__ == '__main__': 137 | main() 138 | -------------------------------------------------------------------------------- /data/Linear_synthetic/generate_linear_regession_updated.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import numpy as np 3 | import json 4 | import random 5 | import os 6 | np.random.seed(0) 7 | 8 | NUM_USER = 100 9 | Kappa = 1.4 10 | Dim = 40 11 | Noise = 0.05 12 | 13 | def generate_x(n_samples = 100, dim= 40, kappa= 10): 14 | '''Helper function to generate data''' 15 | 16 | powers = - np.log(kappa) / np.log(dim) / 2 17 | 18 | S = np.power(np.arange(dim)+1, powers) 19 | X = np.random.randn(n_samples, dim) # Random standard Gaussian data 20 | X *= S 21 | covarient_matrix = np.cov(X) 22 | print("Covarient matrix:",covarient_matrix) # Conditioning 23 | print("np.diag(S)", np.diag(S)) 24 | return X, 1, 1/kappa, np.diag(S) 25 | 26 | def generate_linear_data(num_users=100, kappa=10, dim=40, noise_ratio=0.05): 27 | 28 | '''Helper function to generate data''' 29 | # generate power S 30 | powers = - np.log(kappa) / np.log(dim) / 2 31 | DIM = np.arange(dim) 32 | 33 | # Covariance matrix for X 34 | S = np.power(DIM+1, powers) 35 | 36 | # Creat list data for all users 37 | X_split = [[] for _ in range(num_users)] # X for each user 38 | y_split = [[] for _ in range(num_users)] # y for each user 39 | samples_per_user = np.random.lognormal(4, 2, num_users).astype(int) + 500 40 | indices_per_user = np.insert(samples_per_user.cumsum(), 0, 0, 0) 41 | num_total_samples = indices_per_user[-1] 42 | 43 | # Create mean of data for each user, each user will have different distribution 44 | sig = np.random.uniform(0.1, 10) 45 | mean = np.random.uniform(low=-0.1, high=0.1) 46 | cov = np.random.uniform(low=0.0, high=0.01) 47 | #print("mean -cov", mean,cov) 48 | mean_X = np.random.normal(mean, cov, dim) 49 | 50 | X_total = np.zeros((num_total_samples, dim)) 51 | y_total = np.zeros(num_total_samples) 52 | 53 | for n in range(num_users): 54 | # Generate data 55 | X_n = np.random.multivariate_normal(mean_X, sig * np.diag(S), samples_per_user[n]) 56 | X_total[indices_per_user[n]:indices_per_user[n+1], :] = X_n 57 | 58 | # Normalize all X's using LAMBDA 59 | norm = np.sqrt(np.linalg.norm(X_total.T.dot(X_total), 2) / num_total_samples) 60 | X_total /= norm 61 | 62 | # Generate weights and labels 63 | W = np.random.rand(dim) 64 | y_total = X_total.dot(W) 65 | noise_variance = 0.01 66 | y_total = y_total + np.sqrt(noise_ratio) * np.random.randn(num_total_samples) 67 | 68 | for n in range(num_users): 69 | X_n = X_total[indices_per_user[n]:indices_per_user[n+1], :] 70 | y_n = y_total[indices_per_user[n]:indices_per_user[n+1]] 71 | X_split[n] = X_n.tolist() 72 | y_split[n] = y_n.tolist() 73 | 74 | # print("User {} has {} samples.".format(n, samples_per_user[n])) 75 | 76 | print("=" * 80) 77 | print("Generated synthetic data for logistic regression successfully.") 78 | print("Summary of the generated data:".format(kappa)) 79 | print(" Total # users : {}".format(num_users)) 80 | print(" Input dimension : {}".format(dim)) 81 | print(" rho : {}".format(kappa)) 82 | print(" Total # of samples : {}".format(num_total_samples)) 83 | print(" Minimum # of samples: {}".format(np.min(samples_per_user))) 84 | print(" Maximum # of samples: {}".format(np.max(samples_per_user))) 85 | print("=" * 80) 86 | 87 | return X_split, y_split 88 | 89 | 90 | def save_total_data(): 91 | train_data = {'users': [], 'user_data': {}, 'num_samples': []} 92 | test_data = {'users': [], 'user_data': {}, 'num_samples': []} 93 | 94 | train_path = os.path.join("data", "train", "mytrain.json") 95 | test_path = os.path.join("data", "test", "mytest.json") 96 | for path in [os.path.join("data", "train"), os.path.join("data", "test")]: 97 | if not os.path.exists(path): 98 | os.makedirs(path) 99 | 100 | X, y = generate_linear_data(NUM_USER, Kappa, Dim, Noise) 101 | 102 | # Create data structure 103 | train_data = {'users': [], 'user_data': {}, 'num_samples': []} 104 | test_data = {'users': [], 'user_data': {}, 'num_samples': []} 105 | 106 | for i in range(NUM_USER): 107 | uname = 'f_{0:05d}'.format(i) 108 | combined = list(zip(X[i], y[i])) 109 | random.shuffle(combined) 110 | X[i][:], y[i][:] = zip(*combined) 111 | num_samples = len(X[i]) 112 | train_len = int(0.75 * num_samples) 113 | test_len = num_samples - train_len 114 | print("User: ",uname, " Num Sample: ", num_samples ) 115 | train_data['users'].append(uname) 116 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]} 117 | train_data['num_samples'].append(train_len) 118 | test_data['users'].append(uname) 119 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]} 120 | test_data['num_samples'].append(test_len) 121 | 122 | with open(train_path, 'w') as outfile: 123 | json.dump(train_data, outfile) 124 | with open(test_path, 'w') as outfile: 125 | json.dump(test_data, outfile) 126 | 127 | print("=" * 80) 128 | print("Saved all users' data sucessfully.") 129 | print(" Train path:", os.path.join(os.curdir, train_path)) 130 | print(" Test path :", os.path.join(os.curdir, test_path)) 131 | print("=" * 80) 132 | 133 | 134 | def main(): 135 | #generate_x() 136 | save_total_data() 137 | 138 | 139 | if __name__ == '__main__': 140 | main() 141 | -------------------------------------------------------------------------------- /data/Linear_synthetic/generate_linear_synthetic_backup.py: -------------------------------------------------------------------------------- 1 | import json 2 | import math 3 | import numpy as np 4 | import os 5 | import sys 6 | import random 7 | from tqdm import trange 8 | import math 9 | 10 | 11 | NUM_USER = 100 12 | def normalize_data(X): 13 | 14 | #nomarlize all feature of data between (0 and 1) 15 | normX = X - X.min() 16 | normX = normX / (X.max() - X.min()) 17 | #normX = normX*2-1 between (-1 and 1) 18 | 19 | # nomarlize data with respect to -1 < X.X^T < 1. 20 | temp = normX.dot(normX.T) 21 | return normX/np.sqrt(temp.max()) 22 | 23 | def generate_synthetic(alpha = 0.5, beta = 0.5): 24 | 25 | # Generate parameters for controlling kappa 26 | dimension = 60 27 | NUM_CLASS = 1 28 | samples_per_user = np.random.lognormal(4, 2, (NUM_USER)).astype(int) + 100 29 | print(samples_per_user) 30 | num_samples = np.sum(samples_per_user) 31 | 32 | X_split = [[] for _ in range(NUM_USER)] 33 | y_split = [[] for _ in range(NUM_USER)] 34 | 35 | #### define some eprior #### 36 | mean_W = np.random.normal(0, alpha, NUM_USER) 37 | mean_b = mean_W 38 | B = np.random.normal(0, beta, NUM_USER) 39 | mean_x = np.zeros((NUM_USER, dimension)) 40 | 41 | diagonal = np.zeros(dimension) 42 | for j in range(dimension): 43 | diagonal[j] = np.power((j+1), -1.2) 44 | cov_x = np.diag(diagonal) 45 | 46 | for i in range(NUM_USER): 47 | mean_x[i] = np.random.normal(B[i], 1, dimension) 48 | print(mean_x[i]) 49 | 50 | for i in range(NUM_USER): 51 | 52 | W = np.random.normal(mean_W[i], 1, (dimension, NUM_CLASS)) 53 | b = np.random.normal(mean_b[i], 1, NUM_CLASS) 54 | 55 | xx = np.random.multivariate_normal(mean_x[i], cov_x, samples_per_user[i]) 56 | nom_xx = normalize_data(xx) 57 | yy = np.zeros(samples_per_user[i]) 58 | 59 | for j in range(samples_per_user[i]): 60 | yy[j] = np.dot(nom_xx[j], W) + b 61 | 62 | X_split[i] = nom_xx.tolist() 63 | y_split[i] = yy.tolist() 64 | 65 | print("{}-th users has {} exampls".format(i, len(y_split[i]))) 66 | 67 | return X_split, y_split 68 | 69 | 70 | 71 | def main(): 72 | 73 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]} 74 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]} 75 | 76 | train_path = "data/train/mytrain.json" 77 | test_path = "data/test/mytest.json" 78 | 79 | X, y = generate_synthetic(alpha=0.5, beta=0.5) # synthetic (0.5, 0.5) 80 | 81 | 82 | # Create data structure 83 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]} 84 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]} 85 | 86 | for i in trange(NUM_USER, ncols=120): 87 | 88 | uname = 'f_{0:05d}'.format(i) 89 | combined = list(zip(X[i], y[i])) 90 | random.shuffle(combined) 91 | X[i][:], y[i][:] = zip(*combined) 92 | num_samples = len(X[i]) 93 | train_len = int(0.75 * num_samples) 94 | test_len = num_samples - train_len 95 | 96 | train_data['users'].append(uname) 97 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]} 98 | train_data['num_samples'].append(train_len) 99 | test_data['users'].append(uname) 100 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]} 101 | test_data['num_samples'].append(test_len) 102 | 103 | 104 | with open(train_path,'w') as outfile: 105 | json.dump(train_data, outfile) 106 | with open(test_path, 'w') as outfile: 107 | json.dump(test_data, outfile) 108 | 109 | 110 | if __name__ == "__main__": 111 | main() 112 | 113 | -------------------------------------------------------------------------------- /data/Linear_synthetic/optimal_solution_finding.py: -------------------------------------------------------------------------------- 1 | import json 2 | import math 3 | import numpy as np 4 | import os 5 | import sys 6 | import random 7 | from tqdm import trange 8 | import math 9 | import numpy as np 10 | from sklearn.linear_model import LinearRegression 11 | import sklearn as sk 12 | np.random.seed(0) 13 | 14 | NUM_USER = 100 15 | 16 | def normalize_data(X): 17 | 18 | #nomarlize all feature of data between (-1 and 1) 19 | normX = X - X.min() 20 | normX = normX / (X.max() - X.min()) 21 | 22 | # nomarlize data with respect to -1 < X.X^T < 1. 23 | temp = normX.dot(normX.T) 24 | return normX/np.sqrt(temp.max()) 25 | 26 | 27 | def finding_optimal_synthetic(num_users=100, kappa=10, dim = 40, noise_ratio=0.05): 28 | 29 | powers = - np.log(kappa) / np.log(dim) / 2 30 | DIM = np.arange(dim) 31 | S = np.power(DIM+1, powers) 32 | 33 | # Creat list data for all users 34 | X_split = [[] for _ in range(num_users)] # X for each user 35 | y_split = [[] for _ in range(num_users)] # y for each user 36 | samples_per_user = np.random.lognormal(4, 2, num_users).astype(int) + 500 37 | indices_per_user = np.insert(samples_per_user.cumsum(), 0, 0, 0) 38 | num_total_samples = indices_per_user[-1] 39 | 40 | # Create mean of data for each user, each user will have different distribution 41 | mean_X = np.array([np.random.randn(dim) for _ in range(num_users)]) 42 | 43 | # Covariance matrix for X 44 | X_total = np.zeros((num_total_samples, dim)) 45 | y_total = np.zeros(num_total_samples) 46 | 47 | for n in range(num_users): 48 | # Generate data 49 | X_n = np.random.multivariate_normal(mean_X[n], np.diag(S), samples_per_user[n]) 50 | X_total[indices_per_user[n]:indices_per_user[n+1], :] = X_n 51 | 52 | # Normalize all X's using LAMBDA 53 | norm = np.sqrt(np.linalg.norm(X_total.T.dot(X_total), 2) / num_total_samples) 54 | X_total /= norm 55 | 56 | # Generate weights and labels 57 | W = np.random.rand(dim) 58 | y_total = X_total.dot(W) 59 | noise_variance = 0.01 60 | y_total = y_total + np.sqrt(noise_ratio) * np.random.randn(num_total_samples) 61 | 62 | for n in range(num_users): 63 | X_n = X_total[indices_per_user[n]:indices_per_user[n+1],:] 64 | y_n = y_total[indices_per_user[n]:indices_per_user[n+1]] 65 | X_split[n] = X_n.tolist() 66 | y_split[n] = y_n.tolist() 67 | 68 | # split data to get training data 69 | train_x = [] 70 | train_y = [] 71 | test_x = [] 72 | test_y = [] 73 | for i in range(NUM_USER): 74 | num_samples = len(X_split[i]) 75 | train_len = int(0.75 * num_samples) 76 | test_len = num_samples - train_len 77 | train_x.append(X_split[i][:train_len]) 78 | train_y.append(y_split[i][:train_len]) 79 | test_x.append(X_split[i][train_len:]) 80 | test_y.append(y_split[i][train_len:]) 81 | 82 | train_xc = np.concatenate(train_x) 83 | train_yc = np.concatenate(train_y) 84 | test_xc = np.concatenate(test_x) 85 | test_yc = np.concatenate(test_y) 86 | 87 | # # finding optimal 88 | X_X_T = np.zeros(shape=(dim+1,dim+1)) 89 | X_Y = np.zeros(shape=(dim+1,1)) 90 | 91 | for n in range(num_users): 92 | X = np.array(train_x[i]) 93 | y = np.array(train_y[i]) 94 | one = np.ones((X.shape[0], 1)) 95 | Xbar = np.concatenate((one, X), axis = 1) 96 | X_X_T += Xbar.T.dot(Xbar)*len(y)/len(train_yc) 97 | X_Y += np.array(Xbar).T.dot(y).reshape((dim+1, 1))*len(y)/len(train_yc) 98 | 99 | # get optimal point. 100 | w = np.linalg.inv(X_X_T).dot(X_Y) 101 | 102 | # caculate loss over all devices 103 | loss = 0 104 | for n in range(num_users): 105 | X = np.array(train_x[i]) 106 | y = np.array(train_y[i]) 107 | one = np.ones((X.shape[0], 1)) 108 | Xbar = np.concatenate((one, X), axis = 1) 109 | y_predict = Xbar.dot(w) 110 | loss += sk.metrics.mean_squared_error(y,y_predict)*len(y)/len(train_yc) 111 | 112 | return loss 113 | 114 | def main(): 115 | loss = 0 116 | loss = finding_optimal_synthetic() 117 | print("loss for train data", loss) 118 | 119 | if __name__ == "__main__": 120 | main() 121 | 122 | -------------------------------------------------------------------------------- /data/Logistic_synthetic/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/data/Logistic_synthetic/README.md -------------------------------------------------------------------------------- /data/Logistic_synthetic/logistic_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import numpy as np 3 | import json 4 | import random 5 | import os 6 | 7 | 8 | def logit(X, W): 9 | return 1 / (1 + np.exp(-np.dot(X, W))) 10 | 11 | 12 | def generate_logistic_regression_data(num_users=100, kappa=10, dim=40, noise_ratio=0.05): 13 | # For consistent results 14 | np.random.seed(0) 15 | 16 | # Sanity check 17 | assert(kappa >= 1 and num_users > 0 and dim > 0) 18 | 19 | X_split = [[] for _ in range(num_users)] # X for each user 20 | y_split = [[] for _ in range(num_users)] # y for each user 21 | 22 | # Find users' sample sizes based on the power law (heterogeneity) 23 | samples_per_user = np.random.lognormal(4, 2, num_users).astype(int) + 50 + 10000 24 | indices_per_user = np.insert(samples_per_user.cumsum(), 0, 0, 0) 25 | num_total_samples = indices_per_user[-1] 26 | 27 | # Each user's mean is drawn from N(0, 1) (non-i.i.d. data) 28 | mean_X = np.array([np.random.randn(dim) for _ in range(num_users)]) 29 | 30 | # Covariance matrix for X 31 | Sigma = np.eye(dim) 32 | 33 | # L = 1, hyper_learning_rate = LAMBDA 34 | LAMBDA = 100 if kappa == 1 else 1 / (kappa - 1) 35 | 36 | # Keep all users' inputs and labels in one array, 37 | # indexed according to indices_per_user. 38 | # (e.g. X_total[indices_per_user[n]:indices_per_user[n+1], :] = X_n) 39 | # (e.g. y_total[indices_per_user[n]:indices_per_user[n+1]] = y_n) 40 | X_total = np.zeros((num_total_samples, dim)) 41 | y_total = np.zeros(num_total_samples) 42 | 43 | for n in range(num_users): 44 | # Generate data 45 | X_n = np.random.multivariate_normal(mean_X[n], Sigma, samples_per_user[n]) 46 | X_total[indices_per_user[n]:indices_per_user[n+1], :] = X_n 47 | 48 | # Normalize all X's using LAMBDA 49 | norm = np.sqrt(np.linalg.norm(X_total.T.dot(X_total), 2) / num_total_samples) 50 | X_total /= norm + LAMBDA 51 | 52 | # Generate weights and labels 53 | W = np.random.rand(dim) 54 | y_total = logit(X_total, W) 55 | y_total = np.where(y_total > 0.5, 1, 0) 56 | 57 | # Apply noise: randomly flip some of y_n with probability noise_ratio 58 | noise = np.random.binomial(1, noise_ratio, num_total_samples) 59 | y_total = np.multiply(noise - y_total, noise) + np.multiply(y_total, 1 - noise) 60 | 61 | # Save each user's data separately 62 | for n in range(num_users): 63 | X_n = X_total[indices_per_user[n]:indices_per_user[n+1], :] 64 | y_n = y_total[indices_per_user[n]:indices_per_user[n+1]] 65 | X_split[n] = X_n.tolist() 66 | y_split[n] = y_n.tolist() 67 | 68 | # print("User {} has {} samples.".format(n, samples_per_user[n])) 69 | 70 | print("=" * 80) 71 | print("Generated synthetic data for logistic regression successfully.") 72 | print("Summary of the generated data:".format(kappa)) 73 | print(" Total # users : {}".format(num_users)) 74 | print(" Input dimension : {}".format(dim)) 75 | print(" rho : {}".format(kappa)) 76 | print(" Total # of samples : {}".format(num_total_samples)) 77 | print(" Minimum # of samples: {}".format(np.min(samples_per_user))) 78 | print(" Maximum # of samples: {}".format(np.max(samples_per_user))) 79 | print("=" * 80) 80 | 81 | return X_split, y_split 82 | 83 | 84 | def save_total_data(): 85 | train_data = {'users': [], 'user_data': {}, 'num_samples': []} 86 | test_data = {'users': [], 'user_data': {}, 'num_samples': []} 87 | 88 | train_path = os.path.join("data", "train", "mytrain.json") 89 | test_path = os.path.join("data", "test", "mytest.json") 90 | for path in [os.path.join("data", "train"), os.path.join("data", "test")]: 91 | if not os.path.exists(path): 92 | os.makedirs(path) 93 | 94 | X, y = generate_logistic_regression_data(100, 2, 40, 0.05) 95 | 96 | # Create data structure 97 | train_data = {'users': [], 'user_data': {}, 'num_samples': []} 98 | test_data = {'users': [], 'user_data': {}, 'num_samples': []} 99 | 100 | for i in range(100): 101 | uname = 'f_{0:05d}'.format(i) 102 | combined = list(zip(X[i], y[i])) 103 | random.shuffle(combined) 104 | X[i][:], y[i][:] = zip(*combined) 105 | num_samples = len(X[i]) 106 | train_len = int(0.75 * num_samples) 107 | test_len = num_samples - train_len 108 | print("User: ",uname, " Num Sample: ", num_samples ) 109 | train_data['users'].append(uname) 110 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]} 111 | train_data['num_samples'].append(train_len) 112 | test_data['users'].append(uname) 113 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]} 114 | test_data['num_samples'].append(test_len) 115 | 116 | with open(train_path, 'w') as outfile: 117 | json.dump(train_data, outfile) 118 | with open(test_path, 'w') as outfile: 119 | json.dump(test_data, outfile) 120 | 121 | print("=" * 80) 122 | print("Saved all users' data sucessfully.") 123 | print(" Train path:", os.path.join(os.curdir, train_path)) 124 | print(" Test path :", os.path.join(os.curdir, test_path)) 125 | print("=" * 80) 126 | 127 | 128 | def main(): 129 | save_total_data() 130 | #save_data_by_user() 131 | 132 | 133 | if __name__ == '__main__': 134 | main() 135 | -------------------------------------------------------------------------------- /data/Mnist/data/mldata/mnist-original.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/data/Mnist/data/mldata/mnist-original.mat -------------------------------------------------------------------------------- /data/Mnist/generate_iid_20users.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import fetch_mldata 2 | from tqdm import trange 3 | import numpy as np 4 | import random 5 | import json 6 | import os 7 | 8 | random.seed(1) 9 | np.random.seed(1) 10 | NUM_USERS = 20 # should be muitiple of 10 11 | NUM_LABELS = 10 12 | # Setup directory for train/test data 13 | train_path = './data/train/mnist_train.json' 14 | test_path = './data/test/mnist_test.json' 15 | dir_path = os.path.dirname(train_path) 16 | if not os.path.exists(dir_path): 17 | os.makedirs(dir_path) 18 | dir_path = os.path.dirname(test_path) 19 | if not os.path.exists(dir_path): 20 | os.makedirs(dir_path) 21 | 22 | # Get MNIST data, normalize, and divide by level 23 | mnist = fetch_mldata('MNIST original', data_home='./data') 24 | mu = np.mean(mnist.data.astype(np.float32), 0) 25 | sigma = np.std(mnist.data.astype(np.float32), 0) 26 | mnist.data = (mnist.data.astype(np.float32) - mu)/(sigma+0.001) 27 | mnist_data = [] 28 | for i in trange(10): 29 | idx = mnist.target==i 30 | mnist_data.append(mnist.data[idx]) 31 | 32 | print("\nNumb samples of each label:\n", [len(v) for v in mnist_data]) 33 | users_lables = [] 34 | 35 | print("idx",idx) 36 | # devide for label for each users: 37 | for user in trange(NUM_USERS): 38 | for j in range(NUM_LABELS): # 4 labels for each users 39 | l = (user + j) % 10 40 | users_lables.append(l) 41 | unique, counts = np.unique(users_lables, return_counts=True) 42 | print("--------------") 43 | print(unique, counts) 44 | 45 | def ram_dom_gen(total, size): 46 | print(total) 47 | nums = [] 48 | temp = [] 49 | for i in range(size - 1): 50 | val = np.random.randint(total//(size + 1), total//(size - 8)) 51 | temp.append(val) 52 | total -= val 53 | temp.append(total) 54 | print(temp) 55 | return temp 56 | number_sample = [] 57 | for total_value, count in zip(mnist_data, counts): 58 | temp = ram_dom_gen(len(total_value), count) 59 | number_sample.append(temp) 60 | print("--------------") 61 | print(number_sample) 62 | 63 | i = 0 64 | number_samples = [] 65 | for i in range(len(number_sample[0])): 66 | for sample in number_sample: 67 | print(sample) 68 | number_samples.append(sample[i]) 69 | 70 | print("--------------") 71 | print(number_samples) 72 | 73 | ###### CREATE USER DATA SPLIT ####### 74 | # Assign 100 samples to each user 75 | X = [[] for _ in range(NUM_USERS)] 76 | y = [[] for _ in range(NUM_USERS)] 77 | count = 0 78 | for user in trange(NUM_USERS): 79 | for j in range(NUM_LABELS): # 4 labels for each users 80 | l = (user + j) % 10 81 | print("value of L",l) 82 | print("value of count",count) 83 | num_samples = number_samples[count] # num sample 84 | count = count + 1 85 | if idx[l] + num_samples < len(mnist_data[l]): 86 | X[user] += mnist_data[l][idx[l]:num_samples].tolist() 87 | y[user] += (l*np.ones(num_samples)).tolist() 88 | idx[l] += num_samples 89 | print("check len os user:", user, j,"len data", len(X[user]), num_samples) 90 | 91 | print("IDX2:", idx) # counting samples for each labels 92 | 93 | # Create data structure 94 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]} 95 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]} 96 | 97 | # Setup 5 users 98 | # for i in trange(5, ncols=120): 99 | for i in range(NUM_USERS): 100 | uname = 'f_{0:05d}'.format(i) 101 | 102 | combined = list(zip(X[i], y[i])) 103 | random.shuffle(combined) 104 | X[i][:], y[i][:] = zip(*combined) 105 | num_samples = len(X[i]) 106 | train_len = int(0.75*num_samples) 107 | test_len = num_samples - train_len 108 | 109 | train_data['users'].append(uname) 110 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]} 111 | train_data['num_samples'].append(train_len) 112 | test_data['users'].append(uname) 113 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]} 114 | test_data['num_samples'].append(test_len) 115 | 116 | print("Num_samples:", train_data['num_samples']) 117 | print("Total_samples:",sum(train_data['num_samples'] + test_data['num_samples'])) 118 | 119 | with open(train_path,'w') as outfile: 120 | json.dump(train_data, outfile) 121 | with open(test_path, 'w') as outfile: 122 | json.dump(test_data, outfile) 123 | 124 | print("Finish Generating Samples") 125 | -------------------------------------------------------------------------------- /data/Mnist/generate_niid_100users_updated.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import fetch_mldata 2 | from tqdm import trange 3 | import numpy as np 4 | import random 5 | import json 6 | import os 7 | 8 | random.seed(1) 9 | np.random.seed(1) 10 | NUM_USERS = 100 11 | NUM_LABELS = 3 12 | # Setup directory for train/test data 13 | train_path = './data/train/mnist_train.json' 14 | test_path = './data/test/mnist_test.json' 15 | dir_path = os.path.dirname(train_path) 16 | if not os.path.exists(dir_path): 17 | os.makedirs(dir_path) 18 | dir_path = os.path.dirname(test_path) 19 | if not os.path.exists(dir_path): 20 | os.makedirs(dir_path) 21 | 22 | # Get MNIST data, normalize, and divide by level 23 | mnist = fetch_mldata('MNIST original', data_home='./data') 24 | mu = np.mean(mnist.data.astype(np.float32), 0) 25 | sigma = np.std(mnist.data.astype(np.float32), 0) 26 | mnist.data = (mnist.data.astype(np.float32) - mu)/(sigma+0.001) 27 | mnist_data = [] 28 | for i in trange(10): 29 | idx = mnist.target==i 30 | mnist_data.append(mnist.data[idx]) 31 | 32 | print("\nNumb samples of each label:\n", [len(v) for v in mnist_data]) 33 | 34 | ###### CREATE USER DATA SPLIT ####### 35 | # Assign 100 samples to each user 36 | X = [[] for _ in range(NUM_USERS)] 37 | y = [[] for _ in range(NUM_USERS)] 38 | idx = np.zeros(10, dtype=np.int64) 39 | for user in range(NUM_USERS): 40 | for j in range(NUM_LABELS): # 3 labels for each users 41 | #l = (2*user+j)%10 42 | l = (user + j) % 10 43 | print("L:", l) 44 | X[user] += mnist_data[l][idx[l]:idx[l]+10].tolist() 45 | y[user] += (l*np.ones(10)).tolist() 46 | idx[l] += 10 47 | 48 | print("IDX1:", idx) # counting samples for each labels 49 | 50 | # Assign remaining sample by power law 51 | user = 0 52 | props = np.random.lognormal( 53 | 0, 2., (10, NUM_USERS, NUM_LABELS)) # last 5 is 5 labels 54 | props = np.array([[[len(v)-1000]] for v in mnist_data]) * \ 55 | props/np.sum(props, (1, 2), keepdims=True) 56 | # print("here:",props/np.sum(props,(1,2), keepdims=True)) 57 | #props = np.array([[[len(v)-100]] for v in mnist_data]) * \ 58 | # props/np.sum(props, (1, 2), keepdims=True) 59 | #idx = 1000*np.ones(10, dtype=np.int64) 60 | # print("here2:",props) 61 | for user in trange(NUM_USERS): 62 | for j in range(NUM_LABELS): # 4 labels for each users 63 | # l = (2*user+j)%10 64 | l = (user + j) % 10 65 | num_samples = int(props[l, user//int(NUM_USERS/10), j]) 66 | numran1 = random.randint(10, 200) 67 | numran2 = random.randint(1, 10) 68 | num_samples = (num_samples) * numran2 + numran1 69 | if(NUM_USERS <= 20): 70 | num_samples = num_samples * 2 71 | if idx[l] + num_samples < len(mnist_data[l]): 72 | X[user] += mnist_data[l][idx[l]:idx[l]+num_samples].tolist() 73 | y[user] += (l*np.ones(num_samples)).tolist() 74 | idx[l] += num_samples 75 | print("check len os user:", user, j, 76 | "len data", len(X[user]), num_samples) 77 | 78 | print("IDX2:", idx) # counting samples for each labels 79 | 80 | # Create data structure 81 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]} 82 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]} 83 | 84 | # Setup 5 users 85 | # for i in trange(5, ncols=120): 86 | for i in range(NUM_USERS): 87 | uname = 'f_{0:05d}'.format(i) 88 | 89 | combined = list(zip(X[i], y[i])) 90 | random.shuffle(combined) 91 | X[i][:], y[i][:] = zip(*combined) 92 | num_samples = len(X[i]) 93 | train_len = int(0.75*num_samples) 94 | test_len = num_samples - train_len 95 | 96 | train_data['users'].append(uname) 97 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]} 98 | train_data['num_samples'].append(train_len) 99 | test_data['users'].append(uname) 100 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]} 101 | test_data['num_samples'].append(test_len) 102 | 103 | print("Num_samples:", train_data['num_samples']) 104 | print("Total_samples:",sum(train_data['num_samples'])) 105 | 106 | with open(train_path,'w') as outfile: 107 | json.dump(train_data, outfile) 108 | with open(test_path, 'w') as outfile: 109 | json.dump(test_data, outfile) 110 | 111 | print("Finish Generating Samples") 112 | -------------------------------------------------------------------------------- /data/Mnist/generate_niid_20users.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import fetch_openml 2 | from tqdm import trange 3 | import numpy as np 4 | import random 5 | import json 6 | import os 7 | 8 | random.seed(1) 9 | np.random.seed(1) 10 | NUM_USERS = 20 # should be muitiple of 10 11 | NUM_LABELS = 2 12 | # Setup directory for train/test data 13 | train_path = './data/train/mnist_train.json' 14 | test_path = './data/test/mnist_test.json' 15 | dir_path = os.path.dirname(train_path) 16 | if not os.path.exists(dir_path): 17 | os.makedirs(dir_path) 18 | dir_path = os.path.dirname(test_path) 19 | if not os.path.exists(dir_path): 20 | os.makedirs(dir_path) 21 | 22 | # Get MNIST data, normalize, and divide by level 23 | mnist = fetch_openml('mnist_784', data_home='./data') 24 | mu = np.mean(mnist.data.astype(np.float32), 0) 25 | sigma = np.std(mnist.data.astype(np.float32), 0) 26 | mnist.data = (mnist.data.astype(np.float32) - mu)/(sigma+0.001) 27 | mnist_data = [] 28 | for i in trange(10): 29 | idx = mnist.target==str(i) 30 | mnist_data.append(mnist.data[idx]) 31 | 32 | print("\nNumb samples of each label:\n", [len(v) for v in mnist_data]) 33 | users_lables = [] 34 | 35 | print("idx",idx) 36 | # devide for label for each users: 37 | for user in trange(NUM_USERS): 38 | for j in range(NUM_LABELS): # 4 labels for each users 39 | l = (user + j) % 10 40 | users_lables.append(l) 41 | unique, counts = np.unique(users_lables, return_counts=True) 42 | print("--------------") 43 | print(unique, counts) 44 | 45 | def ram_dom_gen(total, size): 46 | print(total) 47 | nums = [] 48 | temp = [] 49 | for i in range(size - 1): 50 | val = np.random.randint(total//(size + 1), total//2) 51 | temp.append(val) 52 | total -= val 53 | temp.append(total) 54 | print(temp) 55 | return temp 56 | number_sample = [] 57 | for total_value, count in zip(mnist_data, counts): 58 | temp = ram_dom_gen(len(total_value), count) 59 | number_sample.append(temp) 60 | print("--------------") 61 | print(number_sample) 62 | 63 | i = 0 64 | number_samples = [] 65 | for i in range(len(number_sample[0])): 66 | for sample in number_sample: 67 | print(sample) 68 | number_samples.append(sample[i]) 69 | 70 | print("--------------") 71 | print(number_samples) 72 | 73 | ###### CREATE USER DATA SPLIT ####### 74 | # Assign 100 samples to each user 75 | X = [[] for _ in range(NUM_USERS)] 76 | y = [[] for _ in range(NUM_USERS)] 77 | count = 0 78 | for user in trange(NUM_USERS): 79 | for j in range(NUM_LABELS): # 4 labels for each users 80 | l = (user + j) % 10 81 | print("value of L",l) 82 | print("value of count",count) 83 | num_samples = number_samples[count] # num sample 84 | count = count + 1 85 | if idx[l] + num_samples < len(mnist_data[l]): 86 | X[user] += mnist_data[l][idx[l]:num_samples].tolist() 87 | y[user] += (l*np.ones(num_samples)).tolist() 88 | idx[l] += num_samples 89 | print("check len os user:", user, j,"len data", len(X[user]), num_samples) 90 | 91 | print("IDX2:", idx) # counting samples for each labels 92 | 93 | # Create data structure 94 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]} 95 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]} 96 | 97 | # Setup 5 users 98 | # for i in trange(5, ncols=120): 99 | for i in range(NUM_USERS): 100 | uname = 'f_{0:05d}'.format(i) 101 | 102 | combined = list(zip(X[i], y[i])) 103 | random.shuffle(combined) 104 | X[i][:], y[i][:] = zip(*combined) 105 | num_samples = len(X[i]) 106 | train_len = int(0.75*num_samples) 107 | test_len = num_samples - train_len 108 | 109 | train_data['users'].append(uname) 110 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]} 111 | train_data['num_samples'].append(train_len) 112 | test_data['users'].append(uname) 113 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]} 114 | test_data['num_samples'].append(test_len) 115 | 116 | print("Num_samples:", train_data['num_samples']) 117 | print("Total_samples:",sum(train_data['num_samples'] + test_data['num_samples'])) 118 | 119 | with open(train_path,'w') as outfile: 120 | json.dump(train_data, outfile) 121 | with open(test_path, 'w') as outfile: 122 | json.dump(test_data, outfile) 123 | 124 | print("Finish Generating Samples") 125 | -------------------------------------------------------------------------------- /data/Mnist/generate_niid_mnist_100users.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import fetch_mldata 2 | from tqdm import trange 3 | import numpy as np 4 | import random 5 | import json 6 | import os 7 | 8 | random.seed(1) 9 | np.random.seed(1) 10 | NUM_USERS = 100 11 | NUM_LABELS = 3 12 | # Setup directory for train/test data 13 | train_path = './data/train/mnist_train.json' 14 | test_path = './data/test/mnist_test.json' 15 | dir_path = os.path.dirname(train_path) 16 | if not os.path.exists(dir_path): 17 | os.makedirs(dir_path) 18 | dir_path = os.path.dirname(test_path) 19 | if not os.path.exists(dir_path): 20 | os.makedirs(dir_path) 21 | 22 | # Get MNIST data, normalize, and divide by level 23 | mnist = fetch_mldata('MNIST original', data_home='./data') 24 | mu = np.mean(mnist.data.astype(np.float32), 0) 25 | sigma = np.std(mnist.data.astype(np.float32), 0) 26 | mnist.data = (mnist.data.astype(np.float32) - mu)/(sigma+0.001) 27 | mnist_data = [] 28 | for i in trange(10): 29 | idx = mnist.target==i 30 | mnist_data.append(mnist.data[idx]) 31 | 32 | print("\nNumb samples of each label:\n", [len(v) for v in mnist_data]) 33 | 34 | ###### CREATE USER DATA SPLIT ####### 35 | # Assign 100 samples to each user 36 | X = [[] for _ in range(NUM_USERS)] 37 | y = [[] for _ in range(NUM_USERS)] 38 | idx = np.zeros(10, dtype=np.int64) 39 | for user in range(NUM_USERS): 40 | for j in range(NUM_LABELS): # 3 labels for each users 41 | #l = (2*user+j)%10 42 | l = (user + j) % 10 43 | print("L:", l) 44 | X[user] += mnist_data[l][idx[l]:idx[l]+10].tolist() 45 | y[user] += (l*np.ones(10)).tolist() 46 | idx[l] += 10 47 | 48 | print("IDX1:", idx) # counting samples for each labels 49 | 50 | # Assign remaining sample by power law 51 | user = 0 52 | props = np.random.lognormal( 53 | 0, 2., (10, NUM_USERS, NUM_LABELS)) # last 5 is 5 labels 54 | props = np.array([[[len(v)-1000]] for v in mnist_data]) * \ 55 | props/np.sum(props, (1, 2), keepdims=True) 56 | # print("here:",props/np.sum(props,(1,2), keepdims=True)) 57 | #props = np.array([[[len(v)-100]] for v in mnist_data]) * \ 58 | # props/np.sum(props, (1, 2), keepdims=True) 59 | #idx = 1000*np.ones(10, dtype=np.int64) 60 | # print("here2:",props) 61 | for user in trange(NUM_USERS): 62 | for j in range(NUM_LABELS): # 4 labels for each users 63 | # l = (2*user+j)%10 64 | l = (user + j) % 10 65 | num_samples = int(props[l, user//int(NUM_USERS/10), j]) 66 | numran1 = random.randint(10, 200) 67 | numran2 = random.randint(1, 10) 68 | num_samples = (num_samples) * numran2 + numran1 69 | if(NUM_USERS <= 20): 70 | num_samples = num_samples * 2 71 | if idx[l] + num_samples < len(mnist_data[l]): 72 | X[user] += mnist_data[l][idx[l]:idx[l]+num_samples].tolist() 73 | y[user] += (l*np.ones(num_samples)).tolist() 74 | idx[l] += num_samples 75 | print("check len os user:", user, j, 76 | "len data", len(X[user]), num_samples) 77 | 78 | print("IDX2:", idx) # counting samples for each labels 79 | 80 | # Create data structure 81 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]} 82 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]} 83 | 84 | # Setup 5 users 85 | # for i in trange(5, ncols=120): 86 | for i in range(NUM_USERS): 87 | uname = 'f_{0:05d}'.format(i) 88 | 89 | combined = list(zip(X[i], y[i])) 90 | random.shuffle(combined) 91 | X[i][:], y[i][:] = zip(*combined) 92 | num_samples = len(X[i]) 93 | train_len = int(0.75*num_samples) 94 | test_len = num_samples - train_len 95 | 96 | train_data['users'].append(uname) 97 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]} 98 | train_data['num_samples'].append(train_len) 99 | test_data['users'].append(uname) 100 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]} 101 | test_data['num_samples'].append(test_len) 102 | 103 | print("Num_samples:", train_data['num_samples']) 104 | print("Total_samples:",sum(train_data['num_samples'])) 105 | 106 | with open(train_path,'w') as outfile: 107 | json.dump(train_data, outfile) 108 | with open(test_path, 'w') as outfile: 109 | json.dump(test_data, outfile) 110 | 111 | print("Finish Generating Samples") 112 | -------------------------------------------------------------------------------- /flearn/optimizers/__pycache__/fedoptimizer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/flearn/optimizers/__pycache__/fedoptimizer.cpython-38.pyc -------------------------------------------------------------------------------- /flearn/optimizers/fedoptimizer.py: -------------------------------------------------------------------------------- 1 | from torch.optim import Optimizer 2 | import torch 3 | 4 | 5 | class MySGD(Optimizer): 6 | def __init__(self, params, lr): 7 | defaults = dict(lr=lr) 8 | super(MySGD, self).__init__(params, defaults) 9 | 10 | def step(self, closure=None, hyper_learning_rate=0): 11 | loss = None 12 | if closure is not None: 13 | loss = closure 14 | 15 | for group in self.param_groups: 16 | # print(group) 17 | for p in group['params']: 18 | if p.grad is None: 19 | continue 20 | d_p = p.grad.data 21 | if (hyper_learning_rate != 0): 22 | p.data.add_(-hyper_learning_rate, d_p) 23 | else: 24 | p.data.add_(-group['lr'], d_p) 25 | return loss 26 | 27 | 28 | class FEDLOptimizer(Optimizer): 29 | def __init__(self, params, lr=0.01, hyper_lr=0.01, L=0.1): 30 | if lr < 0.0: 31 | raise ValueError("Invalid learning rate: {}".format(lr)) 32 | defaults = dict(lr=lr, hyper_lr=hyper_lr, L=L) 33 | super(FEDLOptimizer, self).__init__(params, defaults) 34 | 35 | def step(self, server_grads, pre_grads, closure=None): 36 | loss = None 37 | if closure is not None: 38 | loss = closure 39 | for group in self.param_groups: 40 | for p, server_grad, pre_grad in zip(group['params'], server_grads, pre_grads): 41 | if (server_grad.grad != None and pre_grad.grad != None): 42 | p.data = p.data - group['lr'] * ( 43 | p.grad.data + group['hyper_lr'] * server_grad.grad.data - pre_grad.grad.data) 44 | else: 45 | p.data = p.data - group['lr'] * p.grad.data 46 | return loss 47 | 48 | 49 | class pFedMeOptimizer(Optimizer): 50 | def __init__(self, params, lr=0.01, L=0.1, mu=0.001): 51 | # self.local_weight_updated = local_weight # w_i,K 52 | if lr < 0.0: 53 | raise ValueError("Invalid learning rate: {}".format(lr)) 54 | defaults = dict(lr=lr, L=L, mu=mu) 55 | super(pFedMeOptimizer, self).__init__(params, defaults) 56 | 57 | def step(self, local_weight_updated, closure=None): 58 | loss = None 59 | if closure is not None: 60 | loss = closure 61 | weight_update = local_weight_updated.copy() 62 | for group in self.param_groups: 63 | for p, localweight in zip(group['params'], weight_update): 64 | p.data = p.data - group['lr'] * ( 65 | p.grad.data + group['L'] * (p.data - localweight.data) + group['mu'] * p.data) 66 | return group['params'], loss 67 | 68 | def update_param(self, local_weight_updated, closure=None): 69 | loss = None 70 | if closure is not None: 71 | loss = closure 72 | weight_update = local_weight_updated.copy() 73 | for group in self.param_groups: 74 | for p, localweight in zip(group['params'], weight_update): 75 | p.data = localweight.data 76 | # return p.data 77 | return group['params'] 78 | 79 | 80 | class SCAFFOLDOptimizer(Optimizer): 81 | def __init__(self, params, lr, weight_decay): 82 | defaults = dict(lr=lr, weight_decay=weight_decay) 83 | super(SCAFFOLDOptimizer, self).__init__(params, defaults) 84 | pass 85 | 86 | def step(self, server_controls, client_controls, closure=None): 87 | loss = None 88 | if closure is not None: 89 | loss = closure 90 | 91 | for group, c, ci in zip(self.param_groups, server_controls, client_controls): 92 | p = group['params'][0] 93 | if p.grad is None: 94 | continue 95 | d_p = p.grad.data + c.data - ci.data 96 | p.data = p.data - d_p.data * group['lr'] 97 | # for group in self.param_groups: 98 | # for p, c, ci in zip(group['params'], server_controls, client_controls): 99 | # if p.grad is None: 100 | # continue 101 | # d_p = p.grad.data + c.data - ci.data 102 | # p.data = p.data - d_p.data * group['lr'] 103 | return loss 104 | -------------------------------------------------------------------------------- /flearn/servers/__pycache__/serveravg.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/flearn/servers/__pycache__/serveravg.cpython-38.pyc -------------------------------------------------------------------------------- /flearn/servers/__pycache__/serverbase.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/flearn/servers/__pycache__/serverbase.cpython-38.pyc -------------------------------------------------------------------------------- /flearn/servers/__pycache__/serverfedl.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/flearn/servers/__pycache__/serverfedl.cpython-38.pyc -------------------------------------------------------------------------------- /flearn/servers/__pycache__/serverscaffold.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/flearn/servers/__pycache__/serverscaffold.cpython-38.pyc -------------------------------------------------------------------------------- /flearn/servers/server_avg.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | import h5py 4 | 5 | from flearn.users.user_avg import UserAVG 6 | from flearn.servers.server_base import Server 7 | from utils.model_utils import read_data, read_user_data 8 | import numpy as np 9 | from scipy.stats import rayleigh 10 | 11 | 12 | # Implementation for FedAvg Server 13 | class FedAvg(Server): 14 | def __init__(self, dataset, algorithm, model, batch_size, learning_rate, L, num_glob_iters, local_epochs, 15 | users_per_round, similarity, noise, times): 16 | super().__init__(dataset, algorithm, model[0], batch_size, learning_rate, L, num_glob_iters, local_epochs, 17 | users_per_round, similarity, noise, times) 18 | 19 | # Initialize data for all users 20 | data = read_data(dataset) 21 | total_users = len(data[0]) 22 | for i in range(total_users): 23 | id, train, test = read_user_data(i, data, dataset) 24 | user = UserAVG(id, train, test, model, batch_size, learning_rate, L, local_epochs) 25 | self.users.append(user) 26 | self.total_train_samples += user.train_samples 27 | 28 | if self.noise: 29 | self.communication_thresh = rayleigh.ppf(1 - users_per_round / total_users) # h_min 30 | 31 | print("Number of users / total users:", users_per_round, " / ", total_users) 32 | print("Finished creating FedAvg server.") 33 | 34 | def train(self): 35 | loss = [] 36 | for glob_iter in range(self.num_glob_iters): 37 | print("-------------Round number: ", glob_iter, " -------------") 38 | # loss_ = 0 39 | self.send_parameters() 40 | 41 | # Evaluate model each interation 42 | self.evaluate() 43 | 44 | if self.noise: 45 | self.selected_users = self.select_transmitting_users() 46 | print(f"Transmitting {len(self.selected_users)} users") 47 | else: 48 | self.selected_users = self.select_users(glob_iter, self.users_per_round) 49 | 50 | for user in self.selected_users: 51 | user.train() 52 | user.drop_lr() 53 | 54 | self.aggregate_parameters() 55 | self.get_max_norm() 56 | 57 | if self.noise: 58 | self.apply_channel_effect() 59 | 60 | self.save_results() 61 | self.save_norms() 62 | self.save_model() 63 | 64 | def get_max_norm(self): 65 | param_norms = [] 66 | for user in self.selected_users: 67 | param_norm, control_norm = user.get_params_norm() 68 | param_norms.append(param_norm) 69 | self.param_norms.append(max(param_norms)) 70 | 71 | def aggregate_parameters(self): 72 | assert (self.users is not None and len(self.users) > 0) 73 | total_train = 0 74 | for user in self.selected_users: 75 | total_train += user.train_samples 76 | for user in self.selected_users: 77 | self.add_parameters(user, user.train_samples / total_train) 78 | 79 | def add_parameters(self, user, ratio): 80 | for server_param, del_model in zip(self.model.parameters(), user.delta_model): 81 | num_of_selected_users = len(self.selected_users) 82 | # server_param.data = server_param.data + del_model.data * ratio 83 | server_param.data = server_param.data + del_model.data / num_of_selected_users 84 | 85 | def get_max_norm(self): 86 | param_norms = [] 87 | for user in self.selected_users: 88 | param_norms.append(user.get_params_norm()) 89 | self.param_norms.append(max(param_norms)) 90 | 91 | def apply_channel_effect(self, sigma=1, power_control=2500): 92 | num_of_selected_users = len(self.selected_users) 93 | users_norms = [] 94 | for user in self.selected_users: 95 | users_norms.append(user.get_params_norm()) 96 | alpha_t = power_control / max(users_norms) ** 2 97 | for param in self.model.parameters(): 98 | param.data = param.data + sigma / (alpha_t ** 0.5 * num_of_selected_users * self.communication_thresh)\ 99 | * torch.randn(param.data.size()) 100 | -------------------------------------------------------------------------------- /flearn/servers/server_base.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | import h5py 4 | 5 | import numpy as np 6 | from utils.model_utils import Metrics 7 | import copy 8 | from scipy.stats import rayleigh 9 | 10 | 11 | class Server: 12 | def __init__(self, dataset, algorithm, model, batch_size, learning_rate, L, 13 | num_glob_iters, local_epochs, users_per_round, similarity, noise, times): 14 | 15 | # Set up the main attributes 16 | self.dataset = dataset 17 | self.num_glob_iters = num_glob_iters 18 | self.local_epochs = local_epochs 19 | self.batch_size = batch_size 20 | self.learning_rate = learning_rate 21 | self.total_train_samples = 0 22 | self.model = copy.deepcopy(model) 23 | self.users = [] 24 | self.selected_users = [] 25 | self.users_per_round = users_per_round 26 | self.L = L 27 | self.algorithm = algorithm 28 | self.rs_train_acc, self.rs_train_loss, self.rs_glob_acc = [], [], [] 29 | 30 | self.times = times 31 | self.similarity = similarity 32 | self.noise = noise 33 | self.communication_thresh = None 34 | self.param_norms = [] 35 | self.control_norms = None 36 | 37 | def aggregate_grads(self): 38 | assert (self.users is not None and len(self.users) > 0) 39 | for param in self.model.parameters(): 40 | param.grad = torch.zeros_like(param.data) 41 | for user in self.users: 42 | self.add_grad(user, user.train_samples / self.total_train_samples) 43 | 44 | def send_parameters(self): 45 | assert (self.users is not None and len(self.users) > 0) 46 | for user in self.users: 47 | user.set_parameters(self.model) 48 | 49 | def save_model(self): 50 | model_path = os.path.join("models", self.dataset) 51 | if not os.path.exists(model_path): 52 | os.makedirs(model_path) 53 | torch.save(self.model, os.path.join(model_path, "server" + ".pt")) 54 | 55 | def load_model(self): 56 | model_path = os.path.join("models", self.dataset, "server" + ".pt") 57 | assert (os.path.exists(model_path)) 58 | self.model = torch.load(model_path) 59 | 60 | def model_exists(self): 61 | return os.path.exists(os.path.join("models", self.dataset, "server" + ".pt")) 62 | 63 | def select_users(self, round, users_per_round): 64 | if users_per_round in [len(self.users), 0]: 65 | return self.users 66 | 67 | users_per_round = min(users_per_round, len(self.users)) 68 | # fix the list of user consistent 69 | np.random.seed(round * (self.times + 1)) 70 | return np.random.choice(self.users, users_per_round, replace=False) # , p=pk) 71 | 72 | def select_transmitting_users(self): 73 | transmitting_users = [] 74 | for user in self.users: 75 | user.csi = rayleigh.rvs() 76 | if user.csi >= self.communication_thresh: 77 | transmitting_users.append(user) 78 | return transmitting_users 79 | 80 | def save_results(self): 81 | """ Save loss, accuracy to h5 file""" 82 | file_name = "./results/" + self.dataset + "_" + self.algorithm 83 | file_name += "_" + str(self.similarity) + "s" 84 | if self.noise: 85 | file_name += '_noisy' 86 | file_name += "_" + str(self.times) + ".h5" 87 | if len(self.rs_glob_acc) != 0 & len(self.rs_train_acc) & len(self.rs_train_loss): 88 | with h5py.File(file_name, 'w') as hf: 89 | hf.create_dataset('rs_glob_acc', data=self.rs_glob_acc) 90 | hf.create_dataset('rs_train_acc', data=self.rs_train_acc) 91 | hf.create_dataset('rs_train_loss', data=self.rs_train_loss) 92 | 93 | def save_norms(self): 94 | """ Save norms, to h5 file""" 95 | file_name = "./results/" + self.dataset + "_" + self.algorithm + '_norms' 96 | file_name += "_" + str(self.similarity) + "s" 97 | if self.noise: 98 | file_name += '_noisy' 99 | file_name += "_" + str(self.times) + ".h5" 100 | 101 | if len(self.param_norms): 102 | with h5py.File(file_name, 'w') as hf: 103 | hf.create_dataset('rs_param_norms', data=self.param_norms) 104 | if self.algorithm == 'SCAFFOLD': 105 | hf.create_dataset('rs_control_norms', data=self.control_norms) 106 | 107 | def test(self): 108 | '''tests self.latest_model on given clients 109 | ''' 110 | num_samples = [] 111 | tot_correct = [] 112 | losses = [] 113 | for c in self.users: 114 | ct, ns = c.test() 115 | tot_correct.append(ct * 1.0) 116 | num_samples.append(ns) 117 | ids = [c.user_id for c in self.users] 118 | 119 | return ids, num_samples, tot_correct 120 | 121 | def train_error_and_loss(self): 122 | num_samples = [] 123 | tot_correct = [] 124 | losses = [] 125 | for c in self.users: 126 | ct, cl, ns = c.train_error_and_loss() 127 | tot_correct.append(ct * 1.0) 128 | num_samples.append(ns) 129 | losses.append(cl * 1.0) 130 | 131 | ids = [c.user_id for c in self.users] 132 | # groups = [c.group for c in self.clients] 133 | 134 | return ids, num_samples, tot_correct, losses 135 | 136 | def evaluate(self): 137 | stats = self.test() 138 | stats_train = self.train_error_and_loss() 139 | glob_acc = np.sum(stats[2]) * 1.0 / np.sum(stats[1]) 140 | train_acc = np.sum(stats_train[2]) * 1.0 / np.sum(stats_train[1]) 141 | # train_loss = np.dot(stats_train[3], stats_train[1])*1.0/np.sum(stats_train[1]) 142 | train_loss = sum([x * y for (x, y) in zip(stats_train[1], stats_train[3])]).item() / np.sum(stats_train[1]) 143 | self.rs_glob_acc.append(glob_acc) 144 | self.rs_train_acc.append(train_acc) 145 | self.rs_train_loss.append(train_loss) 146 | # print("stats_train[1]",stats_train[3][0]) 147 | print("Average Global Accurancy: ", glob_acc) 148 | print("Average Global Trainning Accurancy: ", train_acc) 149 | print("Average Global Trainning Loss: ", train_loss) 150 | -------------------------------------------------------------------------------- /flearn/servers/server_scaffold.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | 4 | import h5py 5 | from flearn.users.user_scaffold import UserSCAFFOLD 6 | from flearn.servers.server_base import Server 7 | from utils.model_utils import read_data, read_user_data 8 | import numpy as np 9 | from scipy.stats import rayleigh 10 | 11 | 12 | # Implementation for SCAFFOLD Server 13 | class SCAFFOLD(Server): 14 | def __init__(self, dataset, algorithm, model, batch_size, learning_rate, L, num_glob_iters, 15 | local_epochs, users_per_round, similarity, noise, times): 16 | super().__init__(dataset, algorithm, model[0], batch_size, learning_rate, L, 17 | num_glob_iters, local_epochs, users_per_round, similarity, noise, times) 18 | self.control_norms = [] 19 | 20 | # Initialize data for all users 21 | data = read_data(dataset) 22 | total_users = len(data[0]) 23 | for i in range(total_users): 24 | id, train, test = read_user_data(i, data, dataset) 25 | user = UserSCAFFOLD(id, train, test, model, batch_size, learning_rate, L, local_epochs) 26 | self.users.append(user) 27 | self.total_train_samples += user.train_samples 28 | 29 | if self.noise: 30 | self.communication_thresh = rayleigh.ppf(1 - users_per_round / total_users) # h_min 31 | 32 | print("Number of users / total users:", users_per_round, " / ", total_users) 33 | 34 | self.server_controls = [torch.zeros_like(p.data) for p in self.model.parameters() if p.requires_grad] 35 | 36 | print("Finished creating SCAFFOLD server.") 37 | 38 | def train(self): 39 | loss = [] 40 | for glob_iter in range(self.num_glob_iters): 41 | print("-------------Round number: ", glob_iter, " -------------") 42 | # loss_ = 0 43 | 44 | self.send_parameters() 45 | 46 | # Evaluate model at each iteration 47 | self.evaluate() 48 | 49 | if self.noise: 50 | self.selected_users = self.select_transmitting_users() 51 | print(f"Transmitting {len(self.selected_users)} users") 52 | else: 53 | self.selected_users = self.select_users(glob_iter, self.users_per_round) 54 | 55 | for user in self.selected_users: 56 | user.train() 57 | user.drop_lr() 58 | 59 | self.aggregate_parameters() 60 | self.get_max_norm() 61 | 62 | if self.noise: 63 | self.apply_channel_effect() 64 | 65 | self.save_results() 66 | self.save_norms() 67 | self.save_model() 68 | 69 | def send_parameters(self): 70 | assert (self.users is not None and len(self.users) > 0) 71 | for user in self.users: 72 | user.set_parameters(self.model) 73 | for control, new_control in zip(user.server_controls, self.server_controls): 74 | control.data = new_control.data 75 | 76 | def aggregate_parameters(self): 77 | assert (self.users is not None and len(self.users) > 0) 78 | total_samples = 0 79 | for user in self.selected_users: 80 | total_samples += user.train_samples 81 | for user in self.selected_users: 82 | self.add_parameters(user, total_samples) 83 | 84 | def add_parameters(self, user, total_samples): 85 | num_of_selected_users = len(self.selected_users) 86 | num_of_users = len(self.users) 87 | num_of_samples = user.train_samples 88 | for param, control, del_control, del_model in zip(self.model.parameters(), self.server_controls, 89 | user.delta_controls, user.delta_model): 90 | # param.data = param.data + del_model.data * num_of_samples / total_samples / num_of_selected_users 91 | param.data = param.data + del_model.data / num_of_selected_users 92 | control.data = control.data + del_control.data / num_of_users 93 | 94 | def get_max_norm(self): 95 | param_norms = [] 96 | control_norms = [] 97 | for user in self.selected_users: 98 | param_norm, control_norm = user.get_params_norm() 99 | param_norms.append(param_norm) 100 | control_norms.append(control_norm) 101 | self.param_norms.append(max(param_norms)) 102 | self.control_norms.append((max(control_norms))) 103 | 104 | def apply_channel_effect(self, sigma=1, power_control=2500): 105 | num_of_selected_users = len(self.selected_users) 106 | alpha_t_params = power_control / self.param_norms[-1] ** 2 107 | alpha_t_controls = 4e4 * power_control / self.control_norms[-1] ** 2 108 | for param, control in zip(self.model.parameters(), self.server_controls): 109 | param.data = param.data + sigma / ( 110 | alpha_t_params ** 0.5 * num_of_selected_users * self.communication_thresh) * torch.randn( 111 | param.data.size()) 112 | control.data = control.data + sigma / ( 113 | alpha_t_controls ** 0.5 * num_of_selected_users * self.communication_thresh) * torch.randn( 114 | control.data.size()) 115 | -------------------------------------------------------------------------------- /flearn/trainmodel/__pycache__/models.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/flearn/trainmodel/__pycache__/models.cpython-38.pyc -------------------------------------------------------------------------------- /flearn/trainmodel/models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class Net(nn.Module): 7 | def __init__(self): 8 | super(Net, self).__init__() 9 | self.conv1 = nn.Conv2d(1, 16, 2, 1) 10 | self.conv2 = nn.Conv2d(16, 32, 2, 1) 11 | self.dropout1 = nn.Dropout(0.25) 12 | self.dropout2 = nn.Dropout(0.5) 13 | self.fc1 = nn.Linear(18432, 128) 14 | self.fc2 = nn.Linear(128, 10) 15 | 16 | def forward(self, x): 17 | x = self.conv1(x) 18 | x = nn.ReLU()(x) 19 | x = nn.MaxPool2d(2, 1)(x) 20 | x = self.dropout1(x) 21 | x = self.conv2(x) 22 | x = nn.ReLU()(x) 23 | x = nn.MaxPool2d(2, 1)(x) 24 | x = self.dropout2(x) 25 | x = torch.flatten(x, 1) 26 | x = self.fc1(x) 27 | x = nn.ReLU()(x) 28 | x = self.fc2(x) 29 | output = F.log_softmax(x, dim=1) 30 | return output 31 | 32 | 33 | class MclrLogistic(nn.Module): 34 | def __init__(self, input_dim=784, output_dim=10): 35 | super(MclrLogistic, self).__init__() 36 | self.fc1 = nn.Linear(input_dim, output_dim) 37 | 38 | def forward(self, x): 39 | x = torch.flatten(x, 1) 40 | x = self.fc1(x) 41 | output = F.log_softmax(x, dim=1) 42 | return output 43 | 44 | 45 | class MclrCrossEntropy(nn.Module): 46 | def __init__(self, input_dim=784, output_dim=10): 47 | super(MclrCrossEntropy, self).__init__() 48 | self.linear = torch.nn.Linear(input_dim, output_dim) 49 | 50 | def forward(self, x): 51 | x = torch.flatten(x, 1) 52 | outputs = self.linear(x) 53 | return outputs 54 | 55 | 56 | class DNN(nn.Module): 57 | def __init__(self, input_dim=784, mid_dim=100, output_dim=10): 58 | super(DNN, self).__init__() 59 | # define network layers 60 | self.fc1 = nn.Linear(input_dim, mid_dim) 61 | self.fc2 = nn.Linear(mid_dim, output_dim) 62 | 63 | def forward(self, x): 64 | # define forward pass 65 | x = torch.flatten(x, 1) 66 | x = F.relu(self.fc1(x)) 67 | x = self.fc2(x) 68 | x = F.log_softmax(x, dim=1) 69 | return x 70 | 71 | 72 | class LinearRegression(nn.Module): 73 | def __init__(self, input_dim=60, output_dim=1): 74 | super(LinearRegression, self).__init__() 75 | self.linear = torch.nn.Linear(input_dim, output_dim) 76 | 77 | def forward(self, x): 78 | x = torch.flatten(x, 1) 79 | outputs = self.linear(x) 80 | return outputs 81 | 82 | 83 | class CifarNet(nn.Module): 84 | def __init__(self, categories=10): 85 | super(CifarNet, self).__init__() 86 | self.conv1 = nn.Conv2d(3, 32, 5, 1, 2) # doubled bias learning rate 87 | self.conv2 = nn.Conv2d(32, 32, 5, 1, 2) # doubled bias learning rate 88 | self.conv3 = nn.Conv2d(32, 64, 5, 1, 2) # doubled bias learning rate 89 | self.fc1 = nn.Linear(576, 64) 90 | self.fc2 = nn.Linear(64, categories) 91 | 92 | def forward(self, x): 93 | x = torch.reshape(x, (-1, 3, 32, 32)) 94 | x = self.conv1(x) 95 | x = nn.MaxPool2d(3, 2)(x) 96 | x = nn.ReLU()(x) 97 | x = self.conv2(x) 98 | x = nn.ReLU()(x) 99 | x = nn.AvgPool2d(3, 2)(x) 100 | x = self.conv3(x) 101 | x = nn.ReLU()(x) 102 | x = nn.AvgPool2d(3, 2)(x) 103 | x = torch.flatten(x, 1) 104 | x = self.fc1(x) 105 | x = nn.ReLU()(x) 106 | x = self.fc2(x) 107 | output = F.softmax(x, dim=1) 108 | return output -------------------------------------------------------------------------------- /flearn/users/__pycache__/useravg.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/flearn/users/__pycache__/useravg.cpython-38.pyc -------------------------------------------------------------------------------- /flearn/users/__pycache__/userbase.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/flearn/users/__pycache__/userbase.cpython-38.pyc -------------------------------------------------------------------------------- /flearn/users/__pycache__/userfedl.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/flearn/users/__pycache__/userfedl.cpython-38.pyc -------------------------------------------------------------------------------- /flearn/users/__pycache__/userscaffold.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/flearn/users/__pycache__/userscaffold.cpython-38.pyc -------------------------------------------------------------------------------- /flearn/users/user_avg.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import os 5 | import json 6 | from torch.utils.data import DataLoader 7 | from flearn.users.user_base import User 8 | from flearn.optimizers.fedoptimizer import * 9 | from torch.optim.lr_scheduler import StepLR 10 | 11 | 12 | # Implementation for FedAvg clients 13 | 14 | class UserAVG(User): 15 | def __init__(self, numeric_id, train_data, test_data, model, batch_size, learning_rate, L, local_epochs): 16 | super().__init__(numeric_id, train_data, test_data, model[0], batch_size, learning_rate, L, local_epochs) 17 | 18 | if model[1] == "linear": 19 | self.loss = nn.MSELoss() 20 | elif model[1] == "cnn": 21 | self.loss = nn.CrossEntropyLoss() 22 | else: 23 | self.loss = nn.NLLLoss() 24 | 25 | if model[1] == "cnn": 26 | layers = [self.model.conv1, self.model.conv2, self.model.conv3, self.model.fc1, self.model.fc2] 27 | self.optimizer = torch.optim.SGD([{'params': layer.weight} for layer in layers] + 28 | [{'params': layer.bias, 'lr': 2 * self.learning_rate} for layer in layers], 29 | lr=self.learning_rate, weight_decay=L) 30 | self.scheduler = StepLR(self.optimizer, step_size=8, gamma=0.1) 31 | self.lr_drop_rate = 0.95 32 | else: 33 | self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate) 34 | 35 | self.csi = None 36 | 37 | def set_grads(self, new_grads): 38 | if isinstance(new_grads, nn.Parameter): 39 | for model_grad, new_grad in zip(self.model.parameters(), new_grads): 40 | model_grad.data = new_grad.data 41 | elif isinstance(new_grads, list): 42 | for idx, model_grad in enumerate(self.model.parameters()): 43 | model_grad.data = new_grads[idx] 44 | 45 | def train(self): 46 | self.model.train() 47 | for epoch in range(1, self.local_epochs + 1): 48 | self.model.train() 49 | for batch_idx, (X, y) in enumerate(self.trainloader): 50 | self.optimizer.zero_grad() 51 | output = self.model(X) 52 | loss = self.loss(output, y) 53 | loss.backward() 54 | self.optimizer.step() 55 | if self.scheduler: 56 | self.scheduler.step() 57 | 58 | # get model difference 59 | for local, server, delta in zip(self.model.parameters(), self.server_model, self.delta_model): 60 | delta.data = local.data.detach() - server.data.detach() 61 | 62 | return loss 63 | 64 | def get_params_norm(self): 65 | params = [] 66 | for delta in self.delta_model: 67 | params.append(torch.flatten(delta.data)) 68 | # return torch.linalg.norm(torch.cat(params), 2) 69 | return float(torch.norm(torch.cat(params))) 70 | -------------------------------------------------------------------------------- /flearn/users/user_base.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import os 5 | import json 6 | from torch.utils.data import DataLoader 7 | import numpy as np 8 | import copy 9 | 10 | 11 | class User: 12 | """ 13 | Base class for users in federated learning. 14 | """ 15 | def __init__(self, user_id, train_data, test_data, model, batch_size, learning_rate, L, local_epochs): 16 | self.dataset = None 17 | self.optimizer = None 18 | self.model = copy.deepcopy(model) 19 | self.user_id = user_id # integer 20 | self.train_samples = len(train_data) 21 | self.test_samples = len(test_data) 22 | if(batch_size == 0): 23 | self.batch_size = len(train_data) 24 | else: 25 | self.batch_size = batch_size 26 | self.learning_rate = learning_rate 27 | self.L = L 28 | self.local_epochs = local_epochs 29 | self.scheduler = None 30 | self.lr_drop_rate = 1 31 | self.trainloader = DataLoader(train_data, self.batch_size) 32 | self.testloader = DataLoader(test_data, self.batch_size) 33 | self.testloaderfull = DataLoader(test_data, self.test_samples) 34 | self.trainloaderfull = DataLoader(train_data, self.train_samples) 35 | self.iter_trainloader = iter(self.trainloader) 36 | self.iter_testloader = iter(self.testloader) 37 | 38 | self.delta_model = [torch.zeros_like(p.data) for p in self.model.parameters() if p.requires_grad] 39 | self.server_model = [torch.zeros_like(p.data) for p in self.model.parameters() if p.requires_grad] 40 | 41 | # those parameters are for FEDL. 42 | self.local_model = copy.deepcopy(list(self.model.parameters())) 43 | self.server_grad = copy.deepcopy(list(self.model.parameters())) 44 | self.pre_local_grad = copy.deepcopy(list(self.model.parameters())) 45 | 46 | def set_parameters(self, server_model): 47 | for old_param, new_param, local_param, server_param in zip(self.model.parameters(), server_model.parameters(), self.local_model, self.server_model): 48 | old_param.data = new_param.data.clone() 49 | local_param.data = new_param.data.clone() 50 | server_param.data = new_param.data.clone() 51 | if(new_param.grad != None): 52 | if(old_param.grad == None): 53 | old_param.grad = torch.zeros_like(new_param.grad) 54 | 55 | if(local_param.grad == None): 56 | local_param.grad = torch.zeros_like(new_param.grad) 57 | 58 | old_param.grad.data = new_param.grad.data.clone() 59 | local_param.grad.data = new_param.grad.data.clone() 60 | #self.local_weight_updated = copy.deepcopy(self.optimizer.param_groups[0]['params']) 61 | 62 | def get_parameters(self): 63 | for param in self.model.parameters(): 64 | param.detach() 65 | return self.model.parameters() 66 | 67 | def clone_model_paramenter(self, param, clone_param): 68 | for param, clone_param in zip(param, clone_param): 69 | clone_param.data = param.data.clone() 70 | if(param.grad != None): 71 | if(clone_param.grad == None): 72 | clone_param.grad = torch.zeros_like(param.grad) 73 | clone_param.grad.data = param.grad.data.clone() 74 | 75 | return clone_param 76 | 77 | def get_updated_parameters(self): 78 | return self.local_weight_updated 79 | 80 | def update_parameters(self, new_params): 81 | for param, new_param in zip(self.model.parameters(), new_params): 82 | param.data = new_param.data.clone() 83 | param.grad.data = new_param.grad.data.clone() 84 | 85 | def get_grads(self, grads): 86 | self.optimizer.zero_grad() 87 | 88 | for x, y in self.trainloaderfull: 89 | output = self.model(x) 90 | loss = self.loss(output, y) 91 | loss.backward() 92 | self.clone_model_paramenter(self.model.parameters(), grads) 93 | #for param, grad in zip(self.model.parameters(), grads): 94 | # if(grad.grad == None): 95 | # grad.grad = torch.zeros_like(param.grad) 96 | # grad.grad.data = param.grad.data.clone() 97 | return grads 98 | 99 | def test(self): 100 | self.model.eval() 101 | test_acc = 0 102 | for x, y in self.testloaderfull: 103 | output = self.model(x) 104 | test_acc += (torch.sum(torch.argmax(output, dim=1) == y)).item() 105 | #@loss += self.loss(output, y) 106 | #print(self.user_id + ", Test Accuracy:", test_acc / y.shape[0] ) 107 | #print(self.user_id + ", Test Loss:", loss) 108 | return test_acc, y.shape[0] 109 | 110 | def train_error_and_loss(self): 111 | self.model.eval() 112 | train_acc = 0 113 | loss = 0 114 | for x, y in self.trainloaderfull: 115 | output = self.model(x) 116 | train_acc += (torch.sum(torch.argmax(output, dim=1) == y)).item() 117 | loss += self.loss(output, y) 118 | #print(self.user_id + ", Train Accuracy:", train_acc) 119 | #print(self.user_id + ", Train Loss:", loss) 120 | return train_acc, loss , self.train_samples 121 | 122 | 123 | def get_next_train_batch(self): 124 | try: 125 | # Samples a new batch for persionalizing 126 | (X, y) = next(self.iter_trainloader) 127 | except StopIteration: 128 | # restart the generator if the previous generator is exhausted. 129 | self.iter_trainloader = iter(self.trainloader) 130 | (X, y) = next(self.iter_trainloader) 131 | return (X, y) 132 | 133 | def get_next_test_batch(self): 134 | try: 135 | # Samples a new batch for persionalizing 136 | (X, y) = next(self.iter_testloader) 137 | except StopIteration: 138 | # restart the generator if the previous generator is exhausted. 139 | self.iter_testloader = iter(self.testloader) 140 | (X, y) = next(self.iter_testloader) 141 | return (X, y) 142 | 143 | def save_model(self): 144 | model_path = os.path.join("models", self.dataset) 145 | if not os.path.exists(model_path): 146 | os.makedirs(model_path) 147 | torch.save(self.model, os.path.join(model_path, "user_" + self.user_id + ".pt")) 148 | 149 | def load_model(self): 150 | model_path = os.path.join("models", self.dataset) 151 | self.model = torch.load(os.path.join(model_path, "server" + ".pt")) 152 | 153 | @staticmethod 154 | def model_exists(): 155 | return os.path.exists(os.path.join("models", "server" + ".pt")) 156 | 157 | def drop_lr(self): 158 | for group in self.optimizer.param_groups: 159 | group['lr'] *= self.lr_drop_rate 160 | if self.scheduler: 161 | group['initial_lr'] *= self.lr_drop_rate 162 | -------------------------------------------------------------------------------- /flearn/users/user_scaffold.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import os 5 | import json 6 | from torch.utils.data import DataLoader 7 | from flearn.users.user_base import User 8 | from flearn.optimizers.fedoptimizer import * 9 | import math 10 | from torch.optim.lr_scheduler import StepLR 11 | 12 | 13 | # Implementation for SCAFFOLD clients 14 | 15 | class UserSCAFFOLD(User): 16 | def __init__(self, numeric_id, train_data, test_data, model, batch_size, learning_rate, L, local_epochs): 17 | super().__init__(numeric_id, train_data, test_data, model[0], batch_size, learning_rate, L, local_epochs) 18 | 19 | if model[1] == "linear": 20 | self.loss = nn.MSELoss() 21 | elif model[1] == "cnn": 22 | self.loss = nn.CrossEntropyLoss() 23 | else: 24 | self.loss = nn.NLLLoss() 25 | 26 | if model[1] == "cnn": 27 | layers = [self.model.conv1, self.model.conv2, self.model.conv3, self.model.fc1, self.model.fc2] 28 | weights = [{'params': layer.weight} for layer in layers] 29 | biases = [{'params': layer.bias, 'lr': 2 * self.learning_rate} for layer in layers] 30 | param_groups = [None] * (len(weights) + len(biases)) 31 | param_groups[::2] = weights 32 | param_groups[1::2] = biases 33 | self.optimizer = SCAFFOLDOptimizer(param_groups, lr=self.learning_rate, weight_decay=L) 34 | # self.optimizer = SCAFFOLDOptimizer([{'params': layer.weight} for layer in layers] + 35 | # [{'params': layer.bias, 'lr': 2 * self.learning_rate} for layer in 36 | # layers], 37 | # lr=self.learning_rate, weight_decay=L) 38 | 39 | self.scheduler = StepLR(self.optimizer, step_size=8, gamma=0.1) 40 | self.lr_drop_rate = 0.95 41 | else: 42 | self.optimizer = SCAFFOLDOptimizer(self.model.parameters(), lr=self.learning_rate, weight_decay=L) 43 | 44 | self.controls = [torch.zeros_like(p.data) for p in self.model.parameters() if p.requires_grad] 45 | self.server_controls = [torch.zeros_like(p.data) for p in self.model.parameters() if p.requires_grad] 46 | self.delta_controls = [torch.zeros_like(p.data) for p in self.model.parameters() if p.requires_grad] 47 | self.csi = None 48 | 49 | def set_grads(self, new_grads): 50 | if isinstance(new_grads, nn.Parameter): 51 | for model_grad, new_grad in zip(self.model.parameters(), new_grads): 52 | model_grad.data = new_grad.data 53 | elif isinstance(new_grads, list): 54 | for idx, model_grad in enumerate(self.model.parameters()): 55 | model_grad.data = new_grads[idx] 56 | 57 | def train(self): 58 | self.model.train() 59 | grads = [torch.zeros_like(p.data) for p in self.model.parameters() if p.requires_grad] 60 | self.get_grads(grads) 61 | for epoch in range(1, self.local_epochs + 1): 62 | self.model.train() 63 | for batch_idx, (X, y) in enumerate(self.trainloader): 64 | self.optimizer.zero_grad() 65 | output = self.model(X) 66 | loss = self.loss(output, y) 67 | loss.backward() 68 | self.optimizer.step(self.server_controls, self.controls) 69 | if self.scheduler: 70 | self.scheduler.step() 71 | 72 | # get model difference 73 | for local, server, delta in zip(self.model.parameters(), self.server_model, self.delta_model): 74 | delta.data = local.data.detach() - server.data.detach() 75 | 76 | # get client new controls 77 | new_controls = [torch.zeros_like(p.data) for p in self.model.parameters() if p.requires_grad] 78 | opt = 2 79 | if opt == 1: 80 | for new_control, grad in zip(new_controls, grads): 81 | new_control.data = grad.grad 82 | if opt == 2: 83 | for server_control, control, new_control, delta in zip(self.server_controls, self.controls, new_controls, 84 | self.delta_model): 85 | a = 1 / (math.ceil(self.train_samples / self.batch_size) * self.learning_rate) 86 | new_control.data = control.data - server_control.data - delta.data * a 87 | 88 | # get controls differences 89 | for control, new_control, delta in zip(self.controls, new_controls, self.delta_controls): 90 | delta.data = new_control.data - control.data 91 | control.data = new_control.data 92 | 93 | return loss 94 | 95 | def get_params_norm(self): 96 | params = [] 97 | controls = [] 98 | 99 | for delta in self.delta_model: 100 | params.append(torch.flatten(delta.data)) 101 | 102 | for delta in self.delta_controls: 103 | controls.append(torch.flatten(delta.data)) 104 | 105 | # return torch.linalg.norm(torch.cat(params), 2) 106 | return float(torch.norm(torch.cat(params))), float(torch.norm(torch.cat(controls))) 107 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from utils.plot_utils import * 2 | from simulate import simulate 3 | from data.Femnist.data_generator import generate_data as generate_femnist_data 4 | from data.CIFAR.data_generator import generate_data as generate_cifar10_data 5 | 6 | 7 | def generate_data(dataset, similarity): 8 | if dataset == 'CIFAR': 9 | generate_cifar10_data(similarity) 10 | elif dataset == 'Femnist': 11 | generate_femnist_data(similarity) 12 | 13 | 14 | cifar_dict = {"model": "cnn", 15 | "batch_size": 60, 16 | "learning_rate": 0.008, 17 | "local_epochs": 1, 18 | "L": 0.04, 19 | "users_per_round": 8} 20 | 21 | femnist_dict = {"model": "mclr", 22 | "batch_size": 4, 23 | "learning_rate": 0.001, 24 | "local_epochs": 1, 25 | "L": 0, 26 | "users_per_round": 20} 27 | 28 | input_dict = {} 29 | 30 | dataset = 'Femnist' 31 | if dataset == 'CIFAR': 32 | input_dict = cifar_dict 33 | elif dataset == 'Femnist': 34 | input_dict = femnist_dict 35 | 36 | num_glob_iters = 300 37 | times = 15 38 | algorithms = ["SCAFFOLD", "FedAvg"] 39 | noises = [True, False] 40 | similarities = [1, 0.1, 0] 41 | 42 | 43 | # for similarity in similarities: 44 | # generate_data(dataset, similarity) 45 | # for noise in noises: 46 | # for algorithm in algorithms: 47 | # simulate(**input_dict, dataset=dataset, algorithm=algorithm, similarity=similarity, noise=noise, 48 | # num_glob_iters=num_glob_iters, times=times) 49 | 50 | plot_accuracy(dataset, algorithms, noises, similarities, num_glob_iters) 51 | plot_norms(dataset, algorithms, noises, similarities, num_glob_iters) 52 | 53 | # plot_dict = get_plot_dict(input_dict, algorithms, epochs) 54 | # plot_norms(**plot_dict) 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /models/Femnist/server.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/models/Femnist/server.pt -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | Pillow 4 | torch 5 | torchvision 6 | matplotlib 7 | tqdm -------------------------------------------------------------------------------- /results/Femnist_FedAvg_0.1s_0.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0.1s_0.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_0.1s_1.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0.1s_1.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_0.1s_2.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0.1s_2.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_0.1s_3.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0.1s_3.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_0.1s_4.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0.1s_4.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_0.1s_5.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0.1s_5.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_0.1s_6.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0.1s_6.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_0.1s_7.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0.1s_7.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_0.1s_8.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0.1s_8.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_0.1s_9.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0.1s_9.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_0.1s_avg.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0.1s_avg.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_0s_0.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0s_0.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_0s_1.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0s_1.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_0s_2.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0s_2.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_0s_3.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0s_3.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_0s_4.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0s_4.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_0s_5.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0s_5.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_0s_6.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0s_6.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_0s_7.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0s_7.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_0s_8.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0s_8.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_0s_9.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0s_9.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_0s_avg.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0s_avg.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_1s_0.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_1s_0.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_1s_1.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_1s_1.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_1s_2.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_1s_2.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_1s_3.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_1s_3.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_1s_4.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_1s_4.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_1s_5.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_1s_5.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_1s_6.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_1s_6.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_1s_7.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_1s_7.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_1s_8.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_1s_8.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_1s_9.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_1s_9.h5 -------------------------------------------------------------------------------- /results/Femnist_FedAvg_1s_avg.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_1s_avg.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_0.1s_0.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0.1s_0.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_0.1s_1.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0.1s_1.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_0.1s_2.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0.1s_2.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_0.1s_3.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0.1s_3.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_0.1s_4.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0.1s_4.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_0.1s_5.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0.1s_5.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_0.1s_6.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0.1s_6.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_0.1s_7.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0.1s_7.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_0.1s_8.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0.1s_8.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_0.1s_9.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0.1s_9.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_0.1s_avg.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0.1s_avg.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_0s_0.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0s_0.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_0s_1.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0s_1.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_0s_2.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0s_2.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_0s_3.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0s_3.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_0s_4.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0s_4.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_0s_5.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0s_5.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_0s_6.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0s_6.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_0s_7.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0s_7.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_0s_8.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0s_8.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_0s_9.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0s_9.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_0s_avg.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0s_avg.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_1s_0.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_1s_0.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_1s_1.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_1s_1.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_1s_2.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_1s_2.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_1s_3.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_1s_3.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_1s_4.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_1s_4.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_1s_5.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_1s_5.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_1s_6.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_1s_6.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_1s_7.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_1s_7.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_1s_8.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_1s_8.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_1s_9.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_1s_9.h5 -------------------------------------------------------------------------------- /results/Femnist_SCAFFOLD_1s_avg.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_1s_avg.h5 -------------------------------------------------------------------------------- /simulate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import h5py 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import argparse 6 | import importlib 7 | import random 8 | import os 9 | from flearn.servers.server_avg import FedAvg 10 | from flearn.servers.server_scaffold import SCAFFOLD 11 | from flearn.trainmodel.models import * 12 | from utils.plot_utils import * 13 | import torch 14 | 15 | torch.manual_seed(0) 16 | 17 | 18 | def simulate(dataset, algorithm, model, batch_size, learning_rate, L, num_glob_iters, local_epochs, users_per_round, 19 | similarity, noise, times): 20 | print("=" * 80) 21 | print("Summary of training process:") 22 | print(f"Algorithm: {algorithm}") 23 | print(f"Batch size : {batch_size}") 24 | print(f"Learing rate : {learning_rate}") 25 | print(f"Subset of users : {users_per_round if users_per_round else 'all users'}") 26 | print(f"Number of local rounds : {local_epochs}") 27 | print(f"Number of global rounds : {num_glob_iters}") 28 | print(f"Dataset : {dataset}") 29 | print(f"Data Similarity : {similarity}") 30 | print(f"Local Model : {model}") 31 | print("=" * 80) 32 | 33 | for i in range(times): 34 | print("---------------Running time:------------", i) 35 | 36 | # Generate model 37 | if model == "mclr": # for Mnist and Femnist datasets 38 | model = MclrLogistic(output_dim=47), model 39 | 40 | if model == "linear": # For Linear dataset 41 | model = LinearRegression(40, 1), model 42 | 43 | if model == "dnn": # for Mnist and Femnist datasets 44 | model = DNN(), model 45 | 46 | if model == "cnn": # for Cifar-10 dataset 47 | model = CifarNet(), model 48 | 49 | # select algorithm 50 | if algorithm == "FedAvg": 51 | server = FedAvg(dataset, algorithm, model, batch_size, learning_rate, L, num_glob_iters, local_epochs, 52 | users_per_round, similarity, noise, i) 53 | 54 | if algorithm == "SCAFFOLD": 55 | server = SCAFFOLD(dataset, algorithm, model, batch_size, learning_rate, L, num_glob_iters, local_epochs, 56 | users_per_round, similarity, noise, i) 57 | server.train() 58 | server.test() 59 | 60 | # Average data 61 | average_data(num_glob_iters=num_glob_iters, algorithm=algorithm, dataset=dataset, similarity=similarity, 62 | noise=noise, times=times) 63 | average_norms(num_glob_iters=num_glob_iters, algorithm=algorithm, dataset=dataset, similarity=similarity, 64 | noise=noise, times=times) 65 | 66 | if __name__ == "__main__": 67 | parser = argparse.ArgumentParser() 68 | parser.add_argument("--dataset", type=str, default="CIFAR-10", 69 | choices=["CIFAR-10", "Mnist", "Linear_synthetic", "Logistic_synthetic"]) 70 | parser.add_argument("--similarity", type=int, default=1) 71 | parser.add_argument("--model", type=str, default="CIFAR-10", choices=["linear", "mclr", "dnn", "CIFAR-10"]) 72 | parser.add_argument("--batch_size", type=int, default=60) 73 | parser.add_argument("--learning_rate", type=float, default=0.008, help="Local learning rate") 74 | parser.add_argument("--hyper_learning_rate", type=float, default=0.02, help=" Learning rate of FEDL") 75 | parser.add_argument("--L", type=int, default=0.004, help="Regularization term") 76 | parser.add_argument("--num_glob_iters", type=int, default=250) 77 | parser.add_argument("--local_epochs", type=int, default=1) 78 | parser.add_argument("--algorithm", type=str, default="FedAvg", choices=["FEDL", "FedAvg", "SCAFFOLD"]) 79 | parser.add_argument("--clients_per_round", type=int, default=0, help="Number of Users per round") 80 | parser.add_argument("--rho", type=float, default=0, help="Condition Number") 81 | parser.add_argument("--noise", type=float, default=False, help="Applies noisy channel effect") 82 | parser.add_argument("--pre-coding", type=float, default=False, help="Applies pre-coding") 83 | parser.add_argument("--times", type=int, default=1, help="Running time") 84 | args = parser.parse_args() 85 | 86 | simulate(dataset=args.dataset, algorithm=args.algorithm, model=args.model, 87 | batch_size=args.batch_size, learning_rate=args.learning_rate, 88 | hyper_learning_rate=args.hyper_learning_rate, L=args.L, num_glob_iters=args.num_glob_iters, 89 | local_epochs=args.local_epochs, users_per_round=args.clients_per_round, 90 | rho=args.rho, similarity=args.similarity, noise=args.noise, times=args.times) 91 | -------------------------------------------------------------------------------- /utils/__pycache__/model_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/utils/__pycache__/model_utils.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/plot_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/utils/__pycache__/plot_utils.cpython-38.pyc -------------------------------------------------------------------------------- /utils/model_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | import os 4 | import torch 5 | import torch.nn as nn 6 | 7 | IMAGE_SIZE = 28 8 | IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE 9 | NUM_CHANNELS = 1 10 | 11 | def suffer_data(data): 12 | data_x = data['x'] 13 | data_y = data['y'] 14 | # randomly shuffle data 15 | np.random.seed(100) 16 | rng_state = np.random.get_state() 17 | np.random.shuffle(data_x) 18 | np.random.set_state(rng_state) 19 | np.random.shuffle(data_y) 20 | return (data_x, data_y) 21 | 22 | def batch_data(data, batch_size): 23 | ''' 24 | data is a dict := {'x': [numpy array], 'y': [numpy array]} (on one client) 25 | returns x, y, which are both numpy array of length: batch_size 26 | ''' 27 | data_x = data['x'] 28 | data_y = data['y'] 29 | 30 | # randomly shuffle data 31 | np.random.seed(100) 32 | rng_state = np.random.get_state() 33 | np.random.shuffle(data_x) 34 | np.random.set_state(rng_state) 35 | np.random.shuffle(data_y) 36 | 37 | # loop through mini-batches 38 | for i in range(0, len(data_x), batch_size): 39 | batched_x = data_x[i:i+batch_size] 40 | batched_y = data_y[i:i+batch_size] 41 | yield (batched_x, batched_y) 42 | 43 | 44 | def get_random_batch_sample(data_x, data_y, batch_size): 45 | num_parts = len(data_x)//batch_size + 1 46 | if(len(data_x) > batch_size): 47 | batch_idx = np.random.choice(list(range(num_parts +1))) 48 | sample_index = batch_idx*batch_size 49 | if(sample_index + batch_size > len(data_x)): 50 | return (data_x[sample_index:], data_y[sample_index:]) 51 | else: 52 | return (data_x[sample_index: sample_index+batch_size], data_y[sample_index: sample_index+batch_size]) 53 | else: 54 | return (data_x,data_y) 55 | 56 | 57 | def get_batch_sample(data, batch_size): 58 | data_x = data['x'] 59 | data_y = data['y'] 60 | 61 | np.random.seed(100) 62 | rng_state = np.random.get_state() 63 | np.random.shuffle(data_x) 64 | np.random.set_state(rng_state) 65 | np.random.shuffle(data_y) 66 | 67 | batched_x = data_x[0:batch_size] 68 | batched_y = data_y[0:batch_size] 69 | return (batched_x, batched_y) 70 | 71 | def read_data(dataset): 72 | '''parses data in given train and test data directories 73 | 74 | assumes: 75 | - the data in the input directories are .json files with 76 | keys 'users' and 'user_data' 77 | - the set of train set users is the same as the set of test set users 78 | 79 | Return: 80 | clients: list of client ids 81 | groups: list of group ids; empty list if none found 82 | train_data: dictionary of train data 83 | test_data: dictionary of test data 84 | ''' 85 | train_data_dir = os.path.join('data', dataset, 'data', 'train') 86 | test_data_dir = os.path.join('data', dataset, 'data', 'test') 87 | clients = [] 88 | groups = [] 89 | train_data = {} 90 | test_data = {} 91 | 92 | train_files = os.listdir(train_data_dir) 93 | train_files = [f for f in train_files if f.endswith('.json')] 94 | for f in train_files: 95 | file_path = os.path.join(train_data_dir, f) 96 | with open(file_path, 'r') as inf: 97 | cdata = json.load(inf) 98 | clients.extend(cdata['users']) 99 | if 'hierarchies' in cdata: 100 | groups.extend(cdata['hierarchies']) 101 | train_data.update(cdata['user_data']) 102 | 103 | test_files = os.listdir(test_data_dir) 104 | test_files = [f for f in test_files if f.endswith('.json')] 105 | for f in test_files: 106 | file_path = os.path.join(test_data_dir, f) 107 | with open(file_path, 'r') as inf: 108 | cdata = json.load(inf) 109 | test_data.update(cdata['user_data']) 110 | 111 | clients = list(sorted(train_data.keys())) 112 | 113 | return clients, groups, train_data, test_data 114 | 115 | def read_user_data(index,data,dataset): 116 | id = data[0][index] 117 | train_data = data[2][id] 118 | test_data = data[3][id] 119 | X_train, y_train, X_test, y_test = train_data['x'], train_data['y'], test_data['x'], test_data['y'] 120 | if dataset == "Mnist": 121 | X_train = torch.Tensor(X_train).view(-1, NUM_CHANNELS, IMAGE_SIZE, IMAGE_SIZE).type(torch.float32) 122 | y_train = torch.Tensor(y_train).type(torch.int64) 123 | X_test = torch.Tensor(X_test).view(-1, NUM_CHANNELS, IMAGE_SIZE, IMAGE_SIZE).type(torch.float32) 124 | y_test = torch.Tensor(y_test).type(torch.int64) 125 | elif dataset == "Linear_synthetic": 126 | X_train = torch.Tensor(X_train).type(torch.float32) 127 | y_train = torch.Tensor(y_train).type(torch.float32).unsqueeze(1) 128 | X_test = torch.Tensor(X_test).type(torch.float32) 129 | y_test = torch.Tensor(y_test).type(torch.float32).unsqueeze(1) 130 | #y_train = torch.flatten(y_train, 1) 131 | #y_test = torch.flatten(y_test, 1) 132 | #print(y_test.size(),y_train.size()) 133 | elif dataset == "CIFAR-10": 134 | X_train = torch.Tensor(X_train).view(-1, 3, 32, 32).type(torch.float32) 135 | y_train = torch.Tensor(y_train).type(torch.int64) 136 | X_test = torch.Tensor(X_test).view(-1, 3, 32, 32).type(torch.float32) 137 | y_test = torch.Tensor(y_test).type(torch.int64) 138 | else: 139 | X_train = torch.Tensor(X_train).type(torch.float32) 140 | y_train = torch.Tensor(y_train).type(torch.int64) 141 | X_test = torch.Tensor(X_test).type(torch.float32) 142 | y_test = torch.Tensor(y_test).type(torch.int64) 143 | train_data = [(x, y) for x, y in zip(X_train, y_train)] 144 | test_data = [(x, y) for x, y in zip(X_test, y_test)] 145 | return id, train_data, test_data 146 | 147 | class Metrics(object): 148 | def __init__(self, clients, params): 149 | self.params = params 150 | num_rounds = params['num_rounds'] 151 | self.bytes_written = {c.user_id: [0] * num_rounds for c in clients} 152 | self.client_computations = {c.user_id: [0] * num_rounds for c in clients} 153 | self.bytes_read = {c.user_id: [0] * num_rounds for c in clients} 154 | self.accuracies = [] 155 | self.train_accuracies = [] 156 | 157 | def update(self, rnd, cid, stats): 158 | bytes_w, comp, bytes_r = stats 159 | self.bytes_written[cid][rnd] += bytes_w 160 | self.client_computations[cid][rnd] += comp 161 | self.bytes_read[cid][rnd] += bytes_r 162 | 163 | def write(self): 164 | metrics = {} 165 | metrics['dataset'] = self.params['dataset'] 166 | metrics['num_rounds'] = self.params['num_rounds'] 167 | metrics['eval_every'] = self.params['eval_every'] 168 | metrics['learning_rate'] = self.params['learning_rate'] 169 | metrics['mu'] = self.params['mu'] 170 | metrics['num_epochs'] = self.params['num_epochs'] 171 | metrics['batch_size'] = self.params['batch_size'] 172 | metrics['accuracies'] = self.accuracies 173 | metrics['train_accuracies'] = self.train_accuracies 174 | metrics['client_computations'] = self.client_computations 175 | metrics['bytes_written'] = self.bytes_written 176 | metrics['bytes_read'] = self.bytes_read 177 | metrics_dir = os.path.join('out', self.params['dataset'], 'metrics_{}_{}_{}_{}_{}.json'.format( 178 | self.params['seed'], self.params['optimizer'], self.params['learning_rate'], self.params['num_epochs'], self.params['mu'])) 179 | #os.mkdir(os.path.join('out', self.params['dataset'])) 180 | if not os.path.exists('out'): 181 | os.mkdir('out') 182 | if not os.path.exists(os.path.join('out', self.params['dataset'])): 183 | os.mkdir(os.path.join('out', self.params['dataset'])) 184 | with open(metrics_dir, 'w') as ouf: 185 | json.dump(metrics, ouf) 186 | -------------------------------------------------------------------------------- /utils/old_plot.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import h5py 3 | import numpy as np 4 | from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes, mark_inset 5 | 6 | plt.rcParams.update({'font.size': 14}) 7 | 8 | 9 | def simple_read_data(loc_ep, alg): 10 | hf = h5py.File("./results/" + '{}_{}.h5'.format(alg, loc_ep), 'r') 11 | rs_glob_acc = np.array(hf.get('rs_glob_acc')[:]) 12 | rs_train_acc = np.array(hf.get('rs_train_acc')[:]) 13 | rs_train_loss = np.array(hf.get('rs_train_loss')[:]) 14 | return rs_train_acc, rs_train_loss, rs_glob_acc 15 | 16 | 17 | def get_training_data_value(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=[], learning_rate=[], 18 | hyper_learning_rate=[], algorithms_list=[], batch_size=0, rho=[], dataset=""): 19 | Numb_Algs = len(algorithms_list) 20 | train_acc = np.zeros((Numb_Algs, Numb_Glob_Iters)) 21 | train_loss = np.zeros((Numb_Algs, Numb_Glob_Iters)) 22 | glob_acc = np.zeros((Numb_Algs, Numb_Glob_Iters)) 23 | algs_lbl = algorithms_list.copy() 24 | for i in range(Numb_Algs): 25 | if (lamb[i] > 0): 26 | algorithms_list[i] = algorithms_list[i] + "_prox_" + str(lamb[i]) 27 | algs_lbl[i] = algs_lbl[i] + "_prox" 28 | 29 | string_learning_rate = str(learning_rate[i]) 30 | 31 | if (algorithms_list[i] == "FEDL"): 32 | string_learning_rate = string_learning_rate + "_" + str(hyper_learning_rate[i]) 33 | algorithms_list[i] = algorithms_list[i] + \ 34 | "_" + string_learning_rate + "_" + str(num_users) + \ 35 | "u" + "_" + str(batch_size[i]) + "b" + "_" + str(loc_ep1[i]) 36 | if (rho[i] > 0): 37 | algorithms_list[i] += "_" + str(rho[i]) + "p" 38 | 39 | train_acc[i, :], train_loss[i, :], glob_acc[i, :] = np.array( 40 | simple_read_data("avg", dataset + "_" + algorithms_list[i]))[:, :Numb_Glob_Iters] 41 | algs_lbl[i] = algs_lbl[i] 42 | return glob_acc, train_acc, train_loss 43 | 44 | 45 | def get_data_label_style(input_data=[], linestyles=[], algs_lbl=[], lamb=[], loc_ep1=0, batch_size=0): 46 | data, lstyles, labels = [], [], [] 47 | for i in range(len(algs_lbl)): 48 | data.append(input_data[i, ::]) 49 | lstyles.append(linestyles[i]) 50 | labels.append(algs_lbl[i] + str(lamb[i]) + "_" + 51 | str(loc_ep1[i]) + "e" + "_" + str(batch_size[i]) + "b") 52 | 53 | return data, lstyles, labels 54 | 55 | 56 | def average_smooth(data, window_len=10, window='hanning'): 57 | results = [] 58 | if window_len < 3: 59 | return data 60 | for i in range(len(data)): 61 | x = data[i] 62 | s = np.r_[x[window_len - 1:0:-1], x, x[-2:-window_len - 1:-1]] 63 | # print(len(s)) 64 | if window == 'flat': # moving average 65 | w = np.ones(window_len, 'd') 66 | else: 67 | w = eval('numpy.' + window + '(window_len)') 68 | 69 | y = np.convolve(w / w.sum(), s, mode='valid') 70 | results.append(y[window_len - 1:]) 71 | return np.array(results) 72 | 73 | 74 | def plot_summary_one_figure(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=[], learning_rate=[], 75 | hyper_learning_rate=[], algorithms_list=[], batch_size=0, rho=[], dataset=""): 76 | Numb_Algs = len(algorithms_list) 77 | # glob_acc, train_acc, train_loss = get_training_data_value( 78 | # users_per_round, loc_ep1, Numb_Glob_Iters, lamb, learning_rate,hyper_learning_rate, algorithms_list, batch_size, dataset) 79 | 80 | glob_acc_, train_acc_, train_loss_ = get_training_data_value(num_users, loc_ep1, Numb_Glob_Iters, lamb, 81 | learning_rate, hyper_learning_rate, algorithms_list, 82 | batch_size, rho, dataset) 83 | glob_acc = average_smooth(glob_acc_, window='flat') 84 | train_loss = average_smooth(train_loss_, window='flat') 85 | train_acc = average_smooth(train_acc_, window='flat') 86 | 87 | plt.figure(1) 88 | MIN = train_loss.min() - 0.001 89 | start = 0 90 | linestyles = ['-', '--', '-.', ':', '-', '--', '-.', ':', ':'] 91 | plt.grid(True) 92 | for i in range(Numb_Algs): 93 | plt.plot(train_acc[i, 1:], linestyle=linestyles[i], 94 | label=algorithms_list[i] + str(lamb[i]) + "_" + str(loc_ep1[i]) + "e" + "_" + str(batch_size[i]) + "b") 95 | plt.legend(loc='lower right') 96 | plt.ylabel('Training Accuracy') 97 | plt.xlabel('Global rounds ' + '$K_g$') 98 | plt.title(dataset.upper()) 99 | # plt.ylim([0.8, glob_acc.max()]) 100 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_acc.png', bbox_inches="tight") 101 | # plt.savefig(dataset + str(loc_ep1[1]) + 'train_acc.pdf') 102 | plt.figure(2) 103 | 104 | plt.grid(True) 105 | for i in range(Numb_Algs): 106 | plt.plot(train_loss[i, start:], linestyle=linestyles[i], label=algorithms_list[i] + str(lamb[i]) + 107 | "_" + str(loc_ep1[i]) + "e" + "_" + str( 108 | batch_size[i]) + "b") 109 | # plt.plot(train_loss1[i, 1:], label=algs_lbl1[i]) 110 | plt.legend(loc='upper right') 111 | plt.ylabel('Training Loss') 112 | plt.xlabel('Global rounds') 113 | plt.title(dataset.upper()) 114 | # plt.ylim([train_loss.min(), 0.5]) 115 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_loss.png', bbox_inches="tight") 116 | # plt.savefig(dataset + str(loc_ep1[1]) + 'train_loss.pdf') 117 | plt.figure(3) 118 | plt.grid(True) 119 | for i in range(Numb_Algs): 120 | plt.plot(glob_acc[i, start:], linestyle=linestyles[i], 121 | label=algorithms_list[i] + str(lamb[i]) + "_" + str(loc_ep1[i]) + "e" + "_" + str(batch_size[i]) + "b") 122 | # plt.plot(glob_acc1[i, 1:], label=algs_lbl1[i]) 123 | plt.legend(loc='lower right') 124 | # plt.ylim([0.6, glob_acc.max()]) 125 | plt.ylabel('Test Accuracy') 126 | plt.xlabel('Global rounds ') 127 | plt.title(dataset.upper()) 128 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'glob_acc.png', bbox_inches="tight") 129 | # plt.savefig(dataset + str(loc_ep1[1]) + 'glob_acc.pdf') 130 | 131 | 132 | def get_max_value_index(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=[], learning_rate=[], algorithms_list=[], 133 | batch_size=0, dataset=""): 134 | Numb_Algs = len(algorithms_list) 135 | glob_acc, train_acc, train_loss = get_training_data_value( 136 | num_users, loc_ep1, Numb_Glob_Iters, lamb, learning_rate, algorithms_list, batch_size, dataset) 137 | for i in range(Numb_Algs): 138 | print("Algorithm: ", algorithms_list[i], "Max testing Accurancy: ", glob_acc[i].max( 139 | ), "Index: ", np.argmax(glob_acc[i]), "local update:", loc_ep1[i]) 140 | 141 | 142 | def plot_summary_mnist(num_users=100, loc_ep1=[], Numb_Glob_Iters=10, lamb=[], learning_rate=[], hyper_learning_rate=[], 143 | algorithms_list=[], batch_size=0, rho=[], dataset=""): 144 | Numb_Algs = len(algorithms_list) 145 | 146 | # glob_acc, train_acc, train_loss = get_training_data_value(users_per_round, loc_ep1, Numb_Glob_Iters, lamb, learning_rate, hyper_learning_rate, algorithms_list, batch_size, rho, dataset) 147 | 148 | glob_acc_, train_acc_, train_loss_ = get_training_data_value(num_users, loc_ep1, Numb_Glob_Iters, lamb, 149 | learning_rate, hyper_learning_rate, algorithms_list, 150 | batch_size, rho, dataset) 151 | glob_acc = average_smooth(glob_acc_, window='flat') 152 | train_loss = average_smooth(train_loss_, window='flat') 153 | train_acc = average_smooth(train_acc_, window='flat') 154 | 155 | for i in range(Numb_Algs): 156 | print(algorithms_list[i], "acc:", glob_acc[i].max()) 157 | print(algorithms_list[i], "loss:", train_loss[i].min()) 158 | 159 | plt.figure(1) 160 | 161 | linestyles = ['-', '--', '-.', ':'] 162 | algs_lbl = ["FEDL", "FedAvg", 163 | "FEDL", "FedAvg", 164 | "FEDL", "FedAvg", 165 | "FEDL", "FEDL"] 166 | 167 | fig = plt.figure(figsize=(12, 4)) 168 | ax = fig.add_subplot(111) # The big subplot 169 | ax1 = fig.add_subplot(131) 170 | ax2 = fig.add_subplot(132) 171 | ax3 = fig.add_subplot(133) 172 | ax1.grid(True) 173 | ax2.grid(True) 174 | ax3.grid(True) 175 | # min = train_loss.min() 176 | min = train_loss.min() - 0.001 177 | max = 0.46 178 | # max = train_loss.max() + 0.01 179 | num_al = 2 180 | # Turn off axis lines and ticks of the big subplot 181 | ax.spines['top'].set_color('none') 182 | ax.spines['bottom'].set_color('none') 183 | ax.spines['left'].set_color('none') 184 | ax.spines['right'].set_color('none') 185 | ax.tick_params(labelcolor='w', top='off', 186 | bottom='off', left='off', right='off') 187 | for i in range(num_al): 188 | stringbatch = str(batch_size[i]) 189 | if (stringbatch == '0'): 190 | stringbatch = '$\infty$' 191 | ax1.plot(train_loss[i, 1:], linestyle=linestyles[i], 192 | label=algs_lbl[i] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str(hyper_learning_rate[i])) 193 | ax1.set_ylim([min, max]) 194 | ax1.legend(loc='upper right', prop={'size': 10}) 195 | 196 | for i in range(num_al): 197 | stringbatch = str(batch_size[i + 2]) 198 | if (stringbatch == '0'): 199 | stringbatch = '$\infty$' 200 | ax2.plot(train_loss[i + num_al, 1:], linestyle=linestyles[i], 201 | label=algs_lbl[i + num_al] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str( 202 | hyper_learning_rate[i + num_al])) 203 | ax2.set_ylim([min, max]) 204 | ax2.legend(loc='upper right', prop={'size': 10}) 205 | 206 | for i in range(4): 207 | stringbatch = str(batch_size[i + 4]) 208 | if (stringbatch == '0'): 209 | stringbatch = '$\infty$' 210 | ax3.plot(train_loss[i + num_al * 2, 1:], linestyle=linestyles[i], 211 | label=algs_lbl[i + num_al * 2] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str( 212 | hyper_learning_rate[i + num_al * 2])) 213 | ax3.set_ylim([min, max]) 214 | ax3.legend(loc='upper right', prop={'size': 10}) 215 | 216 | ax.set_title('MNIST', y=1.02) 217 | ax.set_xlabel('Global rounds ' + '$K_g$') 218 | ax.set_ylabel('Training Loss', labelpad=15) 219 | plt.savefig(dataset + str(loc_ep1[1]) + 220 | 'train_loss.pdf', bbox_inches='tight') 221 | plt.savefig(dataset + str(loc_ep1[1]) + 222 | 'train_loss.png', bbox_inches='tight') 223 | 224 | fig = plt.figure(figsize=(12, 4)) 225 | ax = fig.add_subplot(111) # The big subplot 226 | ax1 = fig.add_subplot(131) 227 | ax2 = fig.add_subplot(132) 228 | ax3 = fig.add_subplot(133) 229 | ax1.grid(True) 230 | ax2.grid(True) 231 | ax3.grid(True) 232 | # min = train_loss.min() 233 | min = 0.82 234 | max = glob_acc.max() + 0.001 # train_loss.max() + 0.01 235 | num_al = 2 236 | # Turn off axis lines and ticks of the big subplot 237 | ax.spines['top'].set_color('none') 238 | ax.spines['bottom'].set_color('none') 239 | ax.spines['left'].set_color('none') 240 | ax.spines['right'].set_color('none') 241 | ax.tick_params(labelcolor='w', top='off', 242 | bottom='off', left='off', right='off') 243 | for i in range(num_al): 244 | stringbatch = str(batch_size[i]) 245 | if (stringbatch == '0'): 246 | stringbatch = '$\infty$' 247 | ax1.plot(glob_acc[i, 1:], linestyle=linestyles[i], 248 | label=algs_lbl[i] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str(hyper_learning_rate[i])) 249 | ax1.set_ylim([min, max]) 250 | ax1.legend(loc='lower right', prop={'size': 10}) 251 | 252 | for i in range(num_al): 253 | stringbatch = str(batch_size[i + 2]) 254 | if (stringbatch == '0'): 255 | stringbatch = '$\infty$' 256 | ax2.plot(glob_acc[i + num_al, 1:], linestyle=linestyles[i], 257 | label=algs_lbl[i + num_al] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str( 258 | hyper_learning_rate[i + num_al * 1])) 259 | ax2.set_ylim([min, max]) 260 | ax2.legend(loc='lower right', prop={'size': 10}) 261 | 262 | for i in range(4): 263 | stringbatch = str(batch_size[i + 4]) 264 | if (stringbatch == '0'): 265 | stringbatch = '$\infty$' 266 | ax3.plot(glob_acc[i + num_al * 2, 1:], linestyle=linestyles[i], 267 | label=algs_lbl[i + num_al * 2] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str( 268 | hyper_learning_rate[i + num_al * 2])) 269 | ax3.set_ylim([min, max]) 270 | ax3.legend(loc='lower right', prop={'size': 10}) 271 | 272 | ax.set_title('MNIST', y=1.02) 273 | ax.set_xlabel('Global rounds ' + '$K_g$') 274 | ax.set_ylabel('Testing Accuracy', labelpad=15) 275 | plt.savefig(dataset + str(loc_ep1[1]) + 'test_accu.pdf', bbox_inches='tight') 276 | plt.savefig(dataset + str(loc_ep1[1]) + 'test_accu.png', bbox_inches='tight') 277 | 278 | 279 | def plot_summary_nist(num_users=100, loc_ep1=[], Numb_Glob_Iters=10, lamb=[], learning_rate=[], hyper_learning_rate=[], 280 | algorithms_list=[], batch_size=0, rho=[], dataset=""): 281 | Numb_Algs = len(algorithms_list) 282 | # glob_acc, train_acc, train_loss = get_training_data_value( users_per_round, loc_ep1, Numb_Glob_Iters, lamb, learning_rate, hyper_learning_rate, algorithms_list, batch_size, rho, dataset) 283 | glob_acc_, train_acc_, train_loss_ = get_training_data_value(num_users, loc_ep1, Numb_Glob_Iters, lamb, 284 | learning_rate, hyper_learning_rate, algorithms_list, 285 | batch_size, rho, dataset) 286 | glob_acc = average_smooth(glob_acc_, window='flat') 287 | train_loss = average_smooth(train_loss_, window='flat') 288 | train_acc = average_smooth(train_acc_, window='flat') 289 | for i in range(Numb_Algs): 290 | print(algorithms_list[i], "acc:", glob_acc[i].max()) 291 | print(algorithms_list[i], "loss:", train_loss[i].max()) 292 | plt.figure(1) 293 | 294 | linestyles = ['-', '--', '-.', ':'] 295 | algs_lbl = ["FEDL", "FedAvg", "FEDL", 296 | "FEDL", "FedAvg", "FEDL", 297 | "FEDL", "FedAvg", "FEDL"] 298 | fig = plt.figure(figsize=(12, 4)) 299 | 300 | ax = fig.add_subplot(111) # The big subplot 301 | ax1 = fig.add_subplot(131) 302 | ax2 = fig.add_subplot(132) 303 | ax3 = fig.add_subplot(133) 304 | ax1.grid(True) 305 | ax2.grid(True) 306 | ax3.grid(True) 307 | # min = train_loss.min() 308 | min = train_loss.min() - 0.01 309 | max = 3 # train_loss.max() + 0.01 310 | num_al = 3 311 | # Turn off axis lines and ticks of the big subplot 312 | ax.spines['top'].set_color('none') 313 | ax.spines['bottom'].set_color('none') 314 | ax.spines['left'].set_color('none') 315 | ax.spines['right'].set_color('none') 316 | ax.tick_params(labelcolor='w', top='off', 317 | bottom='off', left='off', right='off') 318 | for i in range(num_al): 319 | stringbatch = str(batch_size[i]) 320 | if (stringbatch == '0'): 321 | stringbatch = '$\infty$' 322 | ax1.plot(train_loss[i, 1:], linestyle=linestyles[i], 323 | label=algs_lbl[i] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str( 324 | hyper_learning_rate[i]) + ', $K_l = $' + str(loc_ep1[i])) 325 | ax1.set_ylim([min, max]) 326 | ax1.legend(loc='upper right', prop={'size': 10}) 327 | 328 | for i in range(num_al): 329 | stringbatch = str(batch_size[i + num_al]) 330 | if (stringbatch == '0'): 331 | stringbatch = '$\infty$' 332 | ax2.plot(train_loss[i + num_al, 1:], linestyle=linestyles[i], 333 | label=algs_lbl[i + num_al] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str( 334 | hyper_learning_rate[i + num_al]) + ', $K_l = $' + str(loc_ep1[i + num_al])) 335 | ax2.set_ylim([min, max]) 336 | ax2.legend(loc='upper right', prop={'size': 10}) 337 | 338 | for i in range(num_al): 339 | stringbatch = str(batch_size[i + num_al * 2]) 340 | if (stringbatch == '0'): 341 | stringbatch = '$\infty$' 342 | ax3.plot(train_loss[i + num_al * 2, 1:], linestyle=linestyles[i], 343 | label=algs_lbl[i + num_al * 2] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str( 344 | hyper_learning_rate[i + num_al * 2]) + ', $K_l = $' + str(loc_ep1[i + num_al * 2])) 345 | ax3.set_ylim([min, max]) 346 | ax3.legend(loc='upper right', prop={'size': 10}) 347 | 348 | ax.set_title('FEMNIST', y=1.02) 349 | ax.set_xlabel('Global rounds ' + '$K_g$') 350 | ax.set_ylabel('Training Loss', labelpad=15) 351 | plt.savefig(dataset + str(loc_ep1[1]) + 'train_loss.pdf', bbox_inches='tight') 352 | plt.savefig(dataset + str(loc_ep1[1]) + 'train_loss.png', bbox_inches='tight') 353 | 354 | fig = plt.figure(figsize=(12, 4)) 355 | ax = fig.add_subplot(111) # The big subplot 356 | ax1 = fig.add_subplot(131) 357 | ax2 = fig.add_subplot(132) 358 | ax3 = fig.add_subplot(133) 359 | ax1.grid(True) 360 | ax2.grid(True) 361 | ax3.grid(True) 362 | # min = train_loss.min() 363 | num_al = 3 364 | min = 0.3 365 | max = glob_acc.max() + 0.01 # train_loss.max() + 0.01 366 | # Turn off axis lines and ticks of the big subplot 367 | ax.spines['top'].set_color('none') 368 | ax.spines['bottom'].set_color('none') 369 | ax.spines['left'].set_color('none') 370 | ax.spines['right'].set_color('none') 371 | ax.tick_params(labelcolor='w', top='off', 372 | bottom='off', left='off', right='off') 373 | for i in range(num_al): 374 | stringbatch = str(batch_size[i]) 375 | if (stringbatch == '0'): 376 | stringbatch = '$\infty$' 377 | ax1.plot(glob_acc[i, 1:], linestyle=linestyles[i], 378 | label=algs_lbl[i] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str( 379 | hyper_learning_rate[i]) + ', $K_l = $' + str(loc_ep1[i])) 380 | ax1.set_ylim([min, max]) 381 | ax1.legend(loc='lower right', prop={'size': 10}) 382 | 383 | for i in range(num_al): 384 | stringbatch = str(batch_size[i + num_al]) 385 | if (stringbatch == '0'): 386 | stringbatch = '$\infty$' 387 | ax2.plot(glob_acc[i + num_al, 1:], linestyle=linestyles[i], 388 | label=algs_lbl[i + num_al] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str( 389 | hyper_learning_rate[i + num_al * 1]) + ', $K_l = $' + str(loc_ep1[i + num_al])) 390 | ax2.set_ylim([min, max]) 391 | ax2.legend(loc='lower right', prop={'size': 10}) 392 | 393 | for i in range(num_al): 394 | stringbatch = str(batch_size[i + num_al * 2]) 395 | if (stringbatch == '0'): 396 | stringbatch = '$\infty$' 397 | ax3.plot(glob_acc[i + num_al * 2, 1:], linestyle=linestyles[i], 398 | label=algs_lbl[i + num_al * 2] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str( 399 | hyper_learning_rate[i + num_al * 2]) + ', $K_l = $' + str(loc_ep1[i + 2 * num_al])) 400 | ax3.set_ylim([min, max]) 401 | ax3.legend(loc='lower right', prop={'size': 10}) 402 | 403 | ax.set_title('FEMNIST', y=1.02) 404 | ax.set_xlabel('Global rounds ' + '$K_g$') 405 | ax.set_ylabel('Testing Accuracy', labelpad=15) 406 | plt.savefig(dataset + str(loc_ep1[1]) + 'test_accu.pdf', bbox_inches='tight') 407 | plt.savefig(dataset + str(loc_ep1[1]) + 'test_accu.png', bbox_inches='tight') 408 | 409 | 410 | def plot_summary_linear(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=[], learning_rate=[], hyper_learning_rate=[], 411 | algorithms_list=[], batch_size=0, rho=[], dataset=""): 412 | Numb_Algs = len(algorithms_list) 413 | glob_acc, train_acc, train_loss = get_training_data_value(num_users, loc_ep1, Numb_Glob_Iters, lamb, learning_rate, 414 | hyper_learning_rate, algorithms_list, batch_size, rho, 415 | dataset) 416 | for i in range(Numb_Algs): 417 | print(algorithms_list[i], "loss:", glob_acc[i].max()) 418 | plt.figure(1) 419 | 420 | linestyles = ['-', '-', '-', '-'] 421 | markers = ["o", "v", "s", "*", "x", "P"] 422 | algs_lbl = ["FEDL", "FEDL", "FEDL", "FEDL", 423 | "FEDL", "FEDL", "FEDL", "FEDL", 424 | "FEDL", "FEDL", "FEDL", "FEDL"] 425 | fig = plt.figure(figsize=(12, 4)) 426 | ax = fig.add_subplot(111) # The big subplot 427 | ax1 = fig.add_subplot(131) 428 | ax2 = fig.add_subplot(132) 429 | ax3 = fig.add_subplot(133) 430 | # min = train_loss.min() 431 | num_al = 4 432 | # Turn off axis lines and ticks of the big subplot 433 | ax.spines['top'].set_color('none') 434 | ax.spines['bottom'].set_color('none') 435 | ax.spines['left'].set_color('none') 436 | ax.spines['right'].set_color('none') 437 | ax.tick_params(labelcolor='w', top='off', 438 | bottom='off', left='off', right='off') 439 | for i in range(num_al): 440 | ax1.plot(train_loss[i, 1:], linestyle=linestyles[i], 441 | label=algs_lbl[i] + ": " + '$\eta = $' + str(hyper_learning_rate[i]), marker=markers[i], markevery=0.4, 442 | markersize=5) 443 | 444 | ax1.hlines(y=0.035, xmin=0, xmax=200, linestyle='--', label="optimal solution", color="m") 445 | ax1.legend(loc='upper right', prop={'size': 10}) 446 | ax1.set_ylim([0.02, 0.5]) 447 | ax1.set_title('$\\rho = $' + str(rho[0])) 448 | ax1.grid(True) 449 | for i in range(num_al): 450 | str_rho = ', $\eta = $' + str(rho[i]) 451 | ax2.plot(train_loss[i + num_al, 1:], linestyle=linestyles[i], 452 | label=algs_lbl[i + num_al] + ": " + '$\eta = $' + str(hyper_learning_rate[i + num_al]), 453 | marker=markers[i], markevery=0.4, markersize=5) 454 | 455 | ax2.hlines(y=0.035, xmin=0, xmax=200, linestyle='--', label="optimal solution", color="m") 456 | ax2.set_ylim([0.02, 0.5]) 457 | # ax2.legend(loc='upper right') 458 | ax2.set_title('$\\rho = $' + str(rho[0 + num_al])) 459 | ax2.grid(True) 460 | for i in range(num_al): 461 | str_rho = ', $\rho = $' + str(rho[i]) 462 | ax3.plot(train_loss[i + num_al * 2, 1:], linestyle=linestyles[i], 463 | label=algs_lbl[i + num_al * 2] + ": " + '$\eta = $' + str(hyper_learning_rate[i + num_al * 2]), 464 | marker=markers[i], markevery=0.4, markersize=5) 465 | 466 | ax3.hlines(y=0.035, xmin=0, xmax=200, linestyle='--', 467 | label="optimal solution", color="m") 468 | ax3.set_ylim([0.02, 0.5]) 469 | # ax3.legend(loc='upper right') 470 | ax3.set_title('$\\rho = $' + str(rho[0 + 2 * num_al])) 471 | ax3.grid(True) 472 | ax.set_title('Synthetic dataset', y=1.1) 473 | ax.set_xlabel('Global rounds ' + '$K_g$') 474 | ax.set_ylabel('Training Loss') 475 | plt.savefig(dataset + str(loc_ep1[1]) + 'train_loss.pdf', bbox_inches='tight') 476 | plt.savefig(dataset + str(loc_ep1[1]) + 'train_loss.png', bbox_inches='tight') 477 | 478 | 479 | def get_all_training_data_value(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=0, learning_rate=0, 480 | hyper_learning_rate=0, algorithms="", batch_size=0, dataset="", rho=0, times=5): 481 | train_acc = np.zeros((times, Numb_Glob_Iters)) 482 | train_loss = np.zeros((times, Numb_Glob_Iters)) 483 | glob_acc = np.zeros((times, Numb_Glob_Iters)) 484 | algorithms_list = [algorithms] * times 485 | 486 | for i in range(times): 487 | if (lamb > 0): 488 | algorithms_list[i] = algorithms_list[i] + "_prox_" + str(lamb) 489 | 490 | string_learning_rate = str(learning_rate) 491 | 492 | if (algorithms_list[i] == "FEDL"): 493 | string_learning_rate = string_learning_rate + "_" + str(hyper_learning_rate) 494 | 495 | algorithms_list[i] = algorithms_list[i] + "_" + string_learning_rate + "_" + str(num_users) + "u" + "_" + str( 496 | batch_size) + "b" + "_" + str(loc_ep1) 497 | 498 | if (rho > 0): 499 | algorithms_list[i] += "_" + str(rho) + "p" 500 | 501 | train_acc[i, :], train_loss[i, :], glob_acc[i, :] = np.array( 502 | simple_read_data(str(i), dataset + "_" + algorithms_list[i]))[:, :Numb_Glob_Iters] 503 | 504 | return glob_acc, train_acc, train_loss 505 | 506 | 507 | def average_data(num_users, loc_ep1, Numb_Glob_Iters, lamb, learning_rate, hyper_learning_rate, algorithms, batch_size, 508 | dataset, rho, times): 509 | glob_acc, train_acc, train_loss = get_all_training_data_value(num_users, loc_ep1, Numb_Glob_Iters, lamb, 510 | learning_rate, hyper_learning_rate, algorithms, 511 | batch_size, dataset, rho, times) 512 | # store average value to h5 file 513 | glob_acc_data = np.average(glob_acc, axis=0) 514 | train_acc_data = np.average(train_acc, axis=0) 515 | train_loss_data = np.average(train_loss, axis=0) 516 | 517 | max_accurancy = [] 518 | for i in range(times): 519 | max_accurancy.append(glob_acc[i].max()) 520 | print("std:", np.std(max_accurancy)) 521 | print("Mean:", np.mean(max_accurancy)) 522 | 523 | alg = dataset + "_" + algorithms 524 | alg += "_" + str(learning_rate) 525 | 526 | if (algorithms == "FEDL"): 527 | alg += "_" + str(hyper_learning_rate) 528 | 529 | alg += "_" + str(num_users) + "u" + "_" + str(batch_size) + "b" + "_" + str(loc_ep1) 530 | 531 | if (lamb > 0): 532 | alg += "_" + str(lamb) + "L" 533 | 534 | if (rho > 0): 535 | alg += "_" + str(rho) + "p" 536 | 537 | # alg = alg + "_" + str(learning_rate) + "_" + str(hyper_learning_rate) + "_" + str(lamb) + "_" + str(users_per_round) + "u" + "_" + str(batch_size) + "b" + "_" + str(loc_ep1) 538 | alg = alg + "_" + "avg" 539 | if (len(glob_acc) != 0 & len(train_acc) & len(train_loss)): 540 | with h5py.File("./results/" + '{}.h5'.format(alg, loc_ep1), 'w') as hf: 541 | hf.create_dataset('rs_glob_acc', data=glob_acc_data) 542 | hf.create_dataset('rs_train_acc', data=train_acc_data) 543 | hf.create_dataset('rs_train_loss', data=train_loss_data) 544 | hf.close() 545 | return 0 546 | 547 | 548 | def plot_summary_one_mnist(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=[], learning_rate=[], 549 | hyper_learning_rate=[], algorithms_list=[], batch_size=0, rho=[], dataset=""): 550 | Numb_Algs = len(algorithms_list) 551 | # glob_acc, train_acc, train_loss = get_training_data_value( 552 | # users_per_round, loc_ep1, Numb_Glob_Iters, lamb, learning_rate,hyper_learning_rate, algorithms_list, batch_size, dataset) 553 | 554 | glob_acc_, train_acc_, train_loss_ = get_training_data_value(num_users, loc_ep1, Numb_Glob_Iters, lamb, 555 | learning_rate, hyper_learning_rate, algorithms_list, 556 | batch_size, rho, dataset) 557 | glob_acc = average_smooth(glob_acc_, window='flat') 558 | train_loss = average_smooth(train_loss_, window='flat') 559 | train_acc = average_smooth(train_acc_, window='flat') 560 | 561 | plt.figure(1) 562 | MIN = train_loss.min() - 0.001 563 | start = 0 564 | linestyles = ['-', '--', '-.', ':'] 565 | markers = ["o", "v", "s", "*", "x", "P"] 566 | algs_lbl = ["FEDL", "FedAvg", "FEDL", "FedAvg"] 567 | plt.grid(True) 568 | for i in range(Numb_Algs): 569 | stringbatch = str(batch_size[i]) 570 | if (stringbatch == '0'): 571 | stringbatch = '$\infty$' 572 | plt.plot(train_acc[i, 1:], linestyle=linestyles[i], marker=markers[i], 573 | label=algs_lbl[i] + " : " + '$B = $' + stringbatch, markevery=0.4, markersize=5) 574 | 575 | plt.legend(loc='lower right') 576 | plt.ylabel('Training Accuracy') 577 | plt.xlabel('Global rounds ' + '$K_g$') 578 | plt.title(dataset.upper()) 579 | plt.ylim([0.85, train_acc.max()]) 580 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_acc.png', bbox_inches="tight") 581 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_acc.pdf', bbox_inches="tight") 582 | # plt.savefig(dataset + str(loc_ep1[1]) + 'train_acc.pdf') 583 | plt.figure(2) 584 | 585 | plt.grid(True) 586 | for i in range(Numb_Algs): 587 | stringbatch = str(batch_size[i]) 588 | if (stringbatch == '0'): 589 | stringbatch = '$\infty$' 590 | plt.plot(train_loss[i, 1:], linestyle=linestyles[i], marker=markers[i], 591 | label=algs_lbl[i] + " : " + '$B = $' + stringbatch, markevery=0.4, markersize=5) 592 | 593 | # plt.plot(train_loss1[i, 1:], label=algs_lbl1[i]) 594 | plt.legend(loc='upper right') 595 | plt.ylabel('Training Loss') 596 | plt.xlabel('Global rounds') 597 | plt.title(dataset.upper()) 598 | plt.ylim([train_loss.min() - 0.01, 0.7]) 599 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_loss.png', bbox_inches="tight") 600 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_loss.pdf', bbox_inches="tight") 601 | # plt.savefig(dataset + str(loc_ep1[1]) + 'train_loss.pdf') 602 | plt.figure(3) 603 | plt.grid(True) 604 | for i in range(Numb_Algs): 605 | stringbatch = str(batch_size[i]) 606 | if (stringbatch == '0'): 607 | stringbatch = '$\infty$' 608 | plt.plot(glob_acc[i, 1:], linestyle=linestyles[i], marker=markers[i], 609 | label=algs_lbl[i] + " : " + '$B = $' + stringbatch, markevery=0.4, markersize=5) 610 | # plt.plot(glob_acc1[i, 1:], label=algs_lbl1[i]) 611 | plt.legend(loc='lower right') 612 | plt.ylim([0.8, glob_acc.max() + 0.005]) 613 | plt.ylabel('Test Accuracy') 614 | plt.xlabel('Global rounds ') 615 | plt.title(dataset.upper()) 616 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'glob_acc.png', bbox_inches="tight") 617 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'glob_acc.pdf', bbox_inches="tight") 618 | # plt.savefig(dataset + str(loc_ep1[1]) + 'glob_acc.pdf') 619 | 620 | 621 | def plot_summary_one_nist(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=[], learning_rate=[], 622 | hyper_learning_rate=[], algorithms_list=[], batch_size=0, rho=[], dataset=""): 623 | Numb_Algs = len(algorithms_list) 624 | # glob_acc, train_acc, train_loss = get_training_data_value( 625 | # users_per_round, loc_ep1, Numb_Glob_Iters, lamb, learning_rate,hyper_learning_rate, algorithms_list, batch_size, dataset) 626 | 627 | glob_acc_, train_acc_, train_loss_ = get_training_data_value(num_users, loc_ep1, Numb_Glob_Iters, lamb, 628 | learning_rate, hyper_learning_rate, algorithms_list, 629 | batch_size, rho, dataset) 630 | glob_acc = average_smooth(glob_acc_, window='flat') 631 | train_loss = average_smooth(train_loss_, window='flat') 632 | train_acc = average_smooth(train_acc_, window='flat') 633 | 634 | plt.figure(1) 635 | MIN = train_loss.min() - 0.001 636 | start = 0 637 | linestyles = ['-', '--', '-.', ':'] 638 | markers = ["o", "v", "s", "*", "x", "P"] 639 | algs_lbl = ["FEDL", "FedAvg", "FedAvg"] 640 | plt.grid(True) 641 | for i in range(Numb_Algs): 642 | stringbatch = str(batch_size[i]) 643 | if (stringbatch == '0'): 644 | stringbatch = '$\infty$' 645 | plt.plot(train_acc[i, 1:], linestyle=linestyles[i], marker=markers[i], 646 | label=algs_lbl[i] + " : " + '$B = $' + stringbatch, markevery=0.4, markersize=5) 647 | 648 | plt.legend(loc='lower right') 649 | plt.ylabel('Training Accuracy') 650 | plt.xlabel('Global rounds ' + '$K_g$') 651 | plt.title('FEMNIST') 652 | # plt.ylim([0.85, train_acc.max()]) 653 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_acc.png', bbox_inches="tight") 654 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_acc.pdf', bbox_inches="tight") 655 | # plt.savefig(dataset + str(loc_ep1[1]) + 'train_acc.pdf') 656 | plt.figure(2) 657 | 658 | plt.grid(True) 659 | for i in range(Numb_Algs): 660 | stringbatch = str(batch_size[i]) 661 | if (stringbatch == '0'): 662 | stringbatch = '$\infty$' 663 | plt.plot(train_loss[i, 1:], linestyle=linestyles[i], marker=markers[i], 664 | label=algs_lbl[i] + " : " + '$B = $' + stringbatch, markevery=0.4, markersize=5) 665 | 666 | # plt.plot(train_loss1[i, 1:], label=algs_lbl1[i]) 667 | plt.legend(loc='upper right') 668 | plt.ylabel('Training Loss') 669 | plt.xlabel('Global rounds') 670 | plt.title('FEMNIST') 671 | # plt.ylim([train_loss.min(), 0.7]) 672 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_loss.png', bbox_inches="tight") 673 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_loss.pdf', bbox_inches="tight") 674 | # plt.savefig(dataset + str(loc_ep1[1]) + 'train_loss.pdf') 675 | plt.figure(3) 676 | plt.grid(True) 677 | for i in range(Numb_Algs): 678 | stringbatch = str(batch_size[i]) 679 | if (stringbatch == '0'): 680 | stringbatch = '$\infty$' 681 | plt.plot(glob_acc[i, 1:], linestyle=linestyles[i], marker=markers[i], 682 | label=algs_lbl[i] + " : " + '$B = $' + stringbatch, markevery=0.4, markersize=5) 683 | # plt.plot(glob_acc1[i, 1:], label=algs_lbl1[i]) 684 | plt.legend(loc='lower right') 685 | # plt.ylim([0.8, glob_acc.max() + 0.005]) 686 | plt.ylabel('Test Accuracy') 687 | plt.xlabel('Global rounds ') 688 | plt.title('FEMNIST') 689 | # ax.set_title('FEMNIST', y=1.02) 690 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'glob_acc.png', bbox_inches="tight") 691 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'glob_acc.pdf', bbox_inches="tight") 692 | # plt.savefig(dataset + str(loc_ep1[1]) + 'glob_acc.pdf') 693 | 694 | 695 | -------------------------------------------------------------------------------- /utils/plot_utils.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import matplotlib 3 | import h5py 4 | import numpy as np 5 | from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes, mark_inset 6 | import os 7 | from pathlib import Path 8 | 9 | plt.rcParams.update({'font.size': 14}) 10 | 11 | 12 | def read_from_results(file_name): 13 | hf = h5py.File(file_name, 'r') 14 | string = file_name.split('_') 15 | if "norms" in string: 16 | rs_param_norms = np.array(hf.get('rs_param_norms')[:]) 17 | if "SCAFFOLD" in string: 18 | rs_control_norms = np.array(hf.get('rs_control_norms')[:]) 19 | return rs_param_norms, rs_control_norms 20 | else: 21 | return rs_param_norms 22 | 23 | rs_glob_acc = np.array(hf.get('rs_glob_acc')[:]) 24 | rs_train_acc = np.array(hf.get('rs_train_acc')[:]) 25 | rs_train_loss = np.array(hf.get('rs_train_loss')[:]) 26 | return rs_train_acc, rs_train_loss, rs_glob_acc 27 | 28 | 29 | # TODO: replace all args with input_dict 30 | def get_all_training_data_value(num_glob_iters, algorithm, dataset, times, similarity, noise): 31 | train_acc = np.zeros((times, num_glob_iters)) 32 | train_loss = np.zeros((times, num_glob_iters)) 33 | glob_acc = np.zeros((times, num_glob_iters)) 34 | 35 | file_name = "./results/" + dataset + "_" + algorithm 36 | file_name += "_" + str(similarity) + "s" 37 | if noise: 38 | file_name += '_noisy' 39 | 40 | for i in range(times): 41 | f = file_name + "_" + str(i) + ".h5" 42 | train_acc[i, :], train_loss[i, :], glob_acc[i, :] = np.array(read_from_results(f))[:, :num_glob_iters] 43 | return glob_acc, train_acc, train_loss 44 | 45 | 46 | def average_smooth(data, window_len=10, window='hanning'): 47 | results = [] 48 | if window_len < 3: 49 | return data 50 | for i in range(len(data)): 51 | x = data[i] 52 | s = np.r_[x[window_len - 1:0:-1], x, x[-2:-window_len - 1:-1]] 53 | # print(len(s)) 54 | if window == 'flat': # moving average 55 | w = np.ones(window_len, 'd') 56 | else: 57 | w = eval('numpy.' + window + '(window_len)') 58 | 59 | y = np.convolve(w / w.sum(), s, mode='valid') 60 | results.append(y[window_len - 1:]) 61 | return np.array(results) 62 | 63 | 64 | def average_data(num_glob_iters, algorithm, dataset, times, similarity, noise): 65 | glob_acc, train_acc, train_loss = get_all_training_data_value(num_glob_iters, algorithm, dataset, times, similarity, 66 | noise) 67 | 68 | glob_acc_data = np.average(glob_acc, axis=0) 69 | train_acc_data = np.average(train_acc, axis=0) 70 | train_loss_data = np.average(train_loss, axis=0) 71 | 72 | max_accurancy = [] 73 | for i in range(times): 74 | max_accurancy.append(glob_acc[i].max()) 75 | print("std:", np.std(max_accurancy)) 76 | print("Mean:", np.mean(max_accurancy)) 77 | 78 | # store average value to h5 file 79 | file_name = "./results/" + dataset + "_" + algorithm 80 | file_name += "_" + str(similarity) + "s" 81 | if noise: 82 | file_name += '_noisy' 83 | file_name += "_avg.h5" 84 | 85 | if len(glob_acc) != 0 & len(train_acc) & len(train_loss): 86 | with h5py.File(file_name, 'w') as hf: 87 | hf.create_dataset('rs_glob_acc', data=glob_acc_data) 88 | hf.create_dataset('rs_train_acc', data=train_acc_data) 89 | hf.create_dataset('rs_train_loss', data=train_loss_data) 90 | hf.close() 91 | return 0 92 | 93 | 94 | def get_all_norms(num_glob_iters, algorithm, dataset, times, similarity, noise): 95 | file_name = "./results/" + dataset + "_" + algorithm + "_norms" 96 | file_name += "_" + str(similarity) + "s" 97 | if noise: 98 | file_name += '_noisy' 99 | 100 | param_norms = np.zeros((times, num_glob_iters)) 101 | 102 | if algorithm == "SCAFFOLD": 103 | control_norms = np.zeros((times, num_glob_iters)) 104 | for i in range(times): 105 | f = file_name + "_" + str(i) + ".h5" 106 | param_norms[i, :], control_norms[i, :] = np.array(read_from_results(f))[:, :num_glob_iters] 107 | return param_norms, control_norms 108 | else: 109 | for i in range(times): 110 | f = file_name + "_" + str(i) + ".h5" 111 | param_norms[i, :] = np.array(read_from_results(f))[:num_glob_iters] 112 | return param_norms 113 | 114 | 115 | def average_norms(num_glob_iters, algorithm, dataset, times, similarity, noise): 116 | # store average value to h5 file 117 | file_name = "./results/" + dataset + "_" + algorithm + "_norms" 118 | file_name += "_" + str(similarity) + "s" 119 | if noise: 120 | file_name += '_noisy' 121 | file_name += "_avg.h5" 122 | 123 | if algorithm == "SCAFFOLD": 124 | param_norms, control_norms = get_all_norms(num_glob_iters, algorithm, dataset, times, similarity, 125 | noise) 126 | glob_param_norms = np.average(param_norms, axis=0) 127 | glob_control_norms = np.average(control_norms, axis=0) 128 | if len(glob_param_norms) & len(glob_control_norms): 129 | with h5py.File(file_name, 'w') as hf: 130 | hf.create_dataset('rs_param_norms', data=glob_param_norms) 131 | hf.create_dataset('rs_control_norms', data=glob_control_norms) 132 | else: 133 | param_norms = get_all_norms(num_glob_iters, algorithm, dataset, times, similarity, noise) 134 | glob_param_norms = np.average(param_norms, axis=0) 135 | if len(glob_param_norms) != 0: 136 | with h5py.File(file_name, 'w') as hf: 137 | hf.create_dataset('rs_param_norms', data=glob_param_norms) 138 | hf.close() 139 | 140 | 141 | def get_plot_dict(input_dict, algorithms, local_epochs): 142 | keys = ["dataset", "learning_rate", "num_glob_iters", "users_per_round", "batch_size", "local_epochs", 143 | "similarity", "noise"] 144 | plot_dict = {x: input_dict[x] for x in keys} 145 | plot_dict["local_epochs"] = local_epochs 146 | plot_dict["algorithms"] = algorithms 147 | return plot_dict 148 | 149 | 150 | def plot_by_epochs(dataset, algorithms, num_glob_iters, learning_rate, users_per_round, batch_size, local_epochs, 151 | similarity, noise): 152 | """take the Monta Carlo simulation and present it SCAFFOLD vs FedAvg""" 153 | colours = ['r', 'g', 'b'] 154 | fig, axs = plt.subplots(1, len(local_epochs), constrained_layout=True) 155 | if len(algorithms) == 2: 156 | fig.suptitle(f"{algorithms[0]} vs {algorithms[1]} - {dataset}") 157 | elif len(algorithms) == 1: 158 | fig.suptitle(f"{algorithms[0]} - {dataset}") 159 | 160 | if len(local_epochs) == 1: 161 | axs = [axs] 162 | 163 | for k, epochs in enumerate(local_epochs): 164 | axs[k].set_xlabel("Global Iterations") 165 | axs[k].set_ylabel("Accuracy") 166 | axs[k].set_title("number of local epochs =" + str(epochs)) 167 | 168 | for j, algorithm in enumerate(algorithms): 169 | file_name = "./results/" + dataset 170 | file_name += "_" + algorithm 171 | file_name += "_" + str(learning_rate) + "lr" 172 | file_name += "_" + str(users_per_round) + "u" 173 | file_name += "_" + str(batch_size) + "b" 174 | file_name += "_" + str(epochs) + "e" 175 | file_name += "_" + str(similarity) + "s" 176 | if noise: 177 | file_name += '_noisy' 178 | file_name += "_avg.h5" 179 | train_acc, train_loss, glob_acc = np.array(read_from_results(file_name))[:, :num_glob_iters] 180 | axs[k].plot(glob_acc, color=colours[j], label=algorithm) 181 | axs[k].legend(loc="lower right") 182 | plt.show() 183 | 184 | 185 | def plot_norms(dataset, algorithms, noises, similarities, num_glob_iters): 186 | colours = ['r', 'g', 'b'] 187 | fig, axs = plt.subplots(1, len(similarities), constrained_layout=True) 188 | fig.suptitle(f"{dataset}") 189 | 190 | if len(similarities) == 1: 191 | axs = [axs] 192 | 193 | for k, similarity in enumerate(similarities): 194 | axs[k].set_xlabel("Global Iterations") 195 | axs[k].set_ylabel("Average Norm") 196 | axs[k].set_yscale('log') 197 | axs[k].set_title(str(100 * similarity) + "% Similarity") 198 | 199 | for noise in noises: 200 | for j, algorithm in enumerate(algorithms): 201 | file_name = "./results/" + dataset 202 | file_name += "_" + algorithm + "_norms" 203 | file_name += "_" + str(similarity) + "s" 204 | label = algorithm 205 | color = colours[j] 206 | if noise: 207 | file_name += '_noisy' 208 | label += ' with noise' 209 | color += ':' 210 | file_name += "_avg.h5" 211 | if algorithm == "SCAFFOLD": 212 | param_norms, control_norms = np.array(read_from_results(file_name))[:, :num_glob_iters] 213 | axs[k].plot(param_norms, color, label='params ' + label) 214 | color = colours[-1] + color[1:] 215 | axs[k].plot(control_norms, color, label='controls ' + label) 216 | else: 217 | param_norms = np.array(read_from_results(file_name))[:num_glob_iters] 218 | axs[k].plot(param_norms, color, label='params ' + label) 219 | axs[k].legend(loc="center right") 220 | plt.show() 221 | 222 | 223 | def plot_accuracy(dataset, algorithms, noises, similarities, num_glob_iters): 224 | colours = ['r', 'g'] 225 | fig, axs = plt.subplots(1, len(similarities), constrained_layout=True) 226 | fig.suptitle(f"{dataset}") 227 | 228 | if len(similarities) == 1: 229 | axs = [axs] 230 | 231 | for k, similarity in enumerate(similarities): 232 | axs[k].set_xlabel("Global Iterations") 233 | axs[k].set_ylabel("Accuracy") 234 | axs[k].set_title(str(100 * similarity) + "% Similarity") 235 | 236 | for noise in noises: 237 | for j, algorithm in enumerate(algorithms): 238 | file_name = "./results/" + dataset 239 | file_name += "_" + algorithm 240 | file_name += "_" + str(similarity) + "s" 241 | label = algorithm 242 | color = colours[j] 243 | if noise: 244 | file_name += '_noisy' 245 | label += ' with noise' 246 | color += ':' 247 | file_name += "_avg.h5" 248 | train_acc, train_loss, glob_acc = np.array(read_from_results(file_name))[:, :num_glob_iters] 249 | axs[k].plot(glob_acc, color, label=label) 250 | axs[k].legend(loc="lower right") 251 | plt.show() 252 | --------------------------------------------------------------------------------