├── .gitattributes
├── .gitignore
├── .idea
├── .gitignore
├── .name
├── SCAFFOLD-master.iml
├── inspectionProfiles
│ ├── Project_Default.xml
│ └── profiles_settings.xml
├── misc.xml
└── modules.xml
├── LICENSE
├── README.md
├── __pycache__
├── main.cpython-38.pyc
└── simulate.cpython-38.pyc
├── data
├── CIFAR-10
│ ├── README.md
│ └── data_generator.py
├── CIFAR
│ └── data_generator.py
├── Femnist
│ ├── README.md
│ ├── __pycache__
│ │ └── data_generator.cpython-38.pyc
│ ├── data
│ │ ├── my_sample.py
│ │ ├── nist_generator.py
│ │ ├── saved_test
│ │ │ ├── mytest_sim0.1.json
│ │ │ ├── mytest_sim0.json
│ │ │ └── mytest_sim1.json
│ │ ├── saved_train
│ │ │ ├── mytrain_sim0.1.json
│ │ │ ├── mytrain_sim0.json
│ │ │ └── mytrain_sim1.json
│ │ ├── test
│ │ │ └── mytest.json
│ │ └── train
│ │ │ └── mytrain.json
│ ├── data_generator.py
│ ├── preprocess.sh
│ ├── preprocess
│ │ ├── data_to_json.py
│ │ ├── data_to_json.sh
│ │ ├── get_data.sh
│ │ ├── get_file_dirs.py
│ │ ├── get_hashes.py
│ │ ├── group_by_writer.py
│ │ └── match_hashes.py
│ └── stats.sh
├── Linear_synthetic
│ ├── data
│ │ └── README.md
│ ├── generate_linear_regession.py
│ ├── generate_linear_regession_updated.py
│ ├── generate_linear_synthetic_backup.py
│ └── optimal_solution_finding.py
├── Logistic_synthetic
│ ├── README.md
│ └── logistic_regression.py
└── Mnist
│ ├── data
│ └── mldata
│ │ └── mnist-original.mat
│ ├── generate_iid_20users.py
│ ├── generate_niid_100users_updated.py
│ ├── generate_niid_20users.py
│ └── generate_niid_mnist_100users.py
├── flearn
├── optimizers
│ ├── __pycache__
│ │ └── fedoptimizer.cpython-38.pyc
│ └── fedoptimizer.py
├── servers
│ ├── __pycache__
│ │ ├── serveravg.cpython-38.pyc
│ │ ├── serverbase.cpython-38.pyc
│ │ ├── serverfedl.cpython-38.pyc
│ │ └── serverscaffold.cpython-38.pyc
│ ├── server_avg.py
│ ├── server_base.py
│ └── server_scaffold.py
├── trainmodel
│ ├── __pycache__
│ │ └── models.cpython-38.pyc
│ └── models.py
└── users
│ ├── __pycache__
│ ├── useravg.cpython-38.pyc
│ ├── userbase.cpython-38.pyc
│ ├── userfedl.cpython-38.pyc
│ └── userscaffold.cpython-38.pyc
│ ├── user_avg.py
│ ├── user_base.py
│ └── user_scaffold.py
├── main.py
├── models
└── Femnist
│ └── server.pt
├── requirements.txt
├── results
├── Femnist_FedAvg_0.1s_0.h5
├── Femnist_FedAvg_0.1s_1.h5
├── Femnist_FedAvg_0.1s_2.h5
├── Femnist_FedAvg_0.1s_3.h5
├── Femnist_FedAvg_0.1s_4.h5
├── Femnist_FedAvg_0.1s_5.h5
├── Femnist_FedAvg_0.1s_6.h5
├── Femnist_FedAvg_0.1s_7.h5
├── Femnist_FedAvg_0.1s_8.h5
├── Femnist_FedAvg_0.1s_9.h5
├── Femnist_FedAvg_0.1s_avg.h5
├── Femnist_FedAvg_0s_0.h5
├── Femnist_FedAvg_0s_1.h5
├── Femnist_FedAvg_0s_2.h5
├── Femnist_FedAvg_0s_3.h5
├── Femnist_FedAvg_0s_4.h5
├── Femnist_FedAvg_0s_5.h5
├── Femnist_FedAvg_0s_6.h5
├── Femnist_FedAvg_0s_7.h5
├── Femnist_FedAvg_0s_8.h5
├── Femnist_FedAvg_0s_9.h5
├── Femnist_FedAvg_0s_avg.h5
├── Femnist_FedAvg_1s_0.h5
├── Femnist_FedAvg_1s_1.h5
├── Femnist_FedAvg_1s_2.h5
├── Femnist_FedAvg_1s_3.h5
├── Femnist_FedAvg_1s_4.h5
├── Femnist_FedAvg_1s_5.h5
├── Femnist_FedAvg_1s_6.h5
├── Femnist_FedAvg_1s_7.h5
├── Femnist_FedAvg_1s_8.h5
├── Femnist_FedAvg_1s_9.h5
├── Femnist_FedAvg_1s_avg.h5
├── Femnist_SCAFFOLD_0.1s_0.h5
├── Femnist_SCAFFOLD_0.1s_1.h5
├── Femnist_SCAFFOLD_0.1s_2.h5
├── Femnist_SCAFFOLD_0.1s_3.h5
├── Femnist_SCAFFOLD_0.1s_4.h5
├── Femnist_SCAFFOLD_0.1s_5.h5
├── Femnist_SCAFFOLD_0.1s_6.h5
├── Femnist_SCAFFOLD_0.1s_7.h5
├── Femnist_SCAFFOLD_0.1s_8.h5
├── Femnist_SCAFFOLD_0.1s_9.h5
├── Femnist_SCAFFOLD_0.1s_avg.h5
├── Femnist_SCAFFOLD_0s_0.h5
├── Femnist_SCAFFOLD_0s_1.h5
├── Femnist_SCAFFOLD_0s_2.h5
├── Femnist_SCAFFOLD_0s_3.h5
├── Femnist_SCAFFOLD_0s_4.h5
├── Femnist_SCAFFOLD_0s_5.h5
├── Femnist_SCAFFOLD_0s_6.h5
├── Femnist_SCAFFOLD_0s_7.h5
├── Femnist_SCAFFOLD_0s_8.h5
├── Femnist_SCAFFOLD_0s_9.h5
├── Femnist_SCAFFOLD_0s_avg.h5
├── Femnist_SCAFFOLD_1s_0.h5
├── Femnist_SCAFFOLD_1s_1.h5
├── Femnist_SCAFFOLD_1s_2.h5
├── Femnist_SCAFFOLD_1s_3.h5
├── Femnist_SCAFFOLD_1s_4.h5
├── Femnist_SCAFFOLD_1s_5.h5
├── Femnist_SCAFFOLD_1s_6.h5
├── Femnist_SCAFFOLD_1s_7.h5
├── Femnist_SCAFFOLD_1s_8.h5
├── Femnist_SCAFFOLD_1s_9.h5
└── Femnist_SCAFFOLD_1s_avg.h5
├── simulate.py
└── utils
├── __pycache__
├── model_utils.cpython-38.pyc
└── plot_utils.cpython-38.pyc
├── model_utils.py
├── old_plot.py
└── plot_utils.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | .idea/vcs.xml
3 |
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 |
--------------------------------------------------------------------------------
/.idea/.name:
--------------------------------------------------------------------------------
1 | server_scaffold.py
--------------------------------------------------------------------------------
/.idea/SCAFFOLD-master.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
13 |
14 |
15 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # **Accelerated Federated Learning Over MAC in Heterogeneous Networks**
2 |
3 | I have analyzed the convergence rate of a federated learning algorithm named SCAFFOLD (variation of SVRG) in noisy fading MAC settings and heterogenous data, in order to formulate a new algorithm that accelerates the learning process in such settings. I inspired by 3 related works:
4 | I depend on three articles –
5 |
6 | 1. **On Analog Gradient Descent Learning Over Multiple Access Fading Channels**
7 | The authors implemented GBMA algorithm in which the users transmit an analog function of their local gradient using a common shaping-waveform and the network edge update the global model using a received superposition of the analog transmitted signals which represents a noisy distorted version of the gradients.
8 | https://arxiv.org/abs/1908.07463
9 |
10 | 2. **Over-the-Air Federated Learning from Heterogeneous Data**
11 | The authors introduce time-varying pre-coding and scaling scheme COTAF which facilitates the aggregation and gradually mitigates the noise effect and maintains the convergence properties of local SGD with heterogeneous data across users.
12 | https://arxiv.org/abs/2009.12787
13 |
14 | 3. **SCAFFOLD - Stochastic Controlled Averaging for Federated Learning**
15 | The authors proposed a stochastic algorithm which overcomes gradients dissimilarity using control variates as estimation of users’ variances, and by that makes FL more robust to heterogeneity in users’ data.
16 | https://arxiv.org/abs/1910.06378
17 |
18 | # letest progress
19 | I’ve established pythonic framework that executes simulation common FedAvg, COTAF, SCAFFOLD and our proposed scheme over the extended EMNIST data in different heterogeneity scenarios. I examine the performance of SCAFFOLD over noisy fading MAC and try to restore the results of the related works. I also examine different pre-coding scenarios of the controls.
20 |
21 |
22 | 
23 |
24 |
25 | We analyzed the model and control update norms during the learning process. The model and the control norms have a constant proportion. Conclusion :try to apply different precoding to each parameter type.
26 |
27 |
28 | 
29 |
30 |
31 | 
32 |
33 |
34 | The figures confirm that I manage to restore related works results. In addition, it seems that when the noise applied scaffold might have degradation in performance. We suspected that controls and gradients updates tent differently over time. we use different pre-coding scaling for the controls and simulate a scenario where both pre-coding constricted to same SNR
35 |
36 | # Software requirements:
37 | - numpy, scipy, pytorch, Pillow, matplotlib.
38 |
39 | - To download the dependencies: **pip3 install -r requirements.txt**
40 |
41 |
42 |
--------------------------------------------------------------------------------
/__pycache__/main.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/__pycache__/main.cpython-38.pyc
--------------------------------------------------------------------------------
/__pycache__/simulate.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/__pycache__/simulate.cpython-38.pyc
--------------------------------------------------------------------------------
/data/CIFAR-10/README.md:
--------------------------------------------------------------------------------
1 | # CIFAR-10 data generator
2 |
3 | To use data_generetor.py download zip of CIFAR-10 data from https://www.cs.toronto.edu/~kriz/cifar.html to same location and extract the file
4 |
--------------------------------------------------------------------------------
/data/CIFAR-10/data_generator.py:
--------------------------------------------------------------------------------
1 | import emnist
2 | import numpy as np
3 | from tqdm import trange
4 | import random
5 | import json
6 | import os
7 | from functools import reduce
8 |
9 |
10 | def generate_data(similarity: int, num_of_users=10, samples_num=5000):
11 | """
12 | generate CIFAR-10 data among 10 users with different similarities
13 | :param similarity: portion of similar data between users. number between 0 to 1
14 | :param num_of_users: number of users data distributed among
15 | :param samples_num: number of samples distributed to each user
16 | """
17 | root_path = os.path.dirname(__file__)
18 | train_path = root_path + '/data/train/mytrain.json'
19 | test_path = root_path + '/data/test/mytest.json'
20 | dir_path = os.path.dirname(train_path)
21 | if not os.path.exists(dir_path):
22 | os.makedirs(dir_path)
23 | dir_path = os.path.dirname(test_path)
24 | if not os.path.exists(dir_path):
25 | os.makedirs(dir_path)
26 |
27 | cifar_dicts = []
28 | for i in range(1, 6):
29 | cifar_dicts.append(unpickle(root_path + '/cifar-10-batches-py/data_batch_' + f"{i}"))
30 |
31 | train_images = np.concatenate([cifar_dict['data'] for cifar_dict in cifar_dicts])
32 | # train_labels = reduce((lambda x, y: x + y), [cifar_dict['labels'] for cifar_dict in cifar_dicts])
33 | train_labels = np.concatenate([cifar_dict['labels'] for cifar_dict in cifar_dicts])
34 | train_images = train_images.astype(np.float32)
35 | train_labels = train_labels.astype(np.int)
36 | num_of_labels = len(set(train_labels))
37 |
38 | cifar_dict = unpickle(root_path + '/cifar-10-batches-py/test_batch')
39 | test_images = cifar_dict['data']
40 | test_labels = np.array(cifar_dict['labels'])
41 | test_images = test_images.astype(np.float32)
42 | test_labels = test_labels.astype(np.int)
43 |
44 | cifar_data = []
45 | for i in range(min(train_labels), num_of_labels + min(train_labels)):
46 | idx = train_labels == i
47 | cifar_data.append(train_images[idx])
48 |
49 | iid_samples = int(similarity * samples_num)
50 | X_train = [[] for _ in range(num_of_users)]
51 | y_train = [[] for _ in range(num_of_users)]
52 | idx = np.zeros(num_of_labels, dtype=np.int64)
53 |
54 | # fill users data by labels
55 | for user in range(num_of_users):
56 | label = user % num_of_labels
57 | X_train[user] += cifar_data[label][idx[label]:idx[label] + samples_num - iid_samples].tolist()
58 | y_train[user] += (label * np.ones(samples_num - iid_samples)).tolist()
59 | idx[label] += samples_num - iid_samples
60 |
61 | print(idx)
62 |
63 | # create %similarity of iid data
64 | for user in range(num_of_users):
65 | labels = np.random.randint(0, num_of_labels, iid_samples)
66 | for label in labels:
67 | while idx[label] >= len(cifar_data[label]):
68 | label = (label + 1) % num_of_labels
69 | X_train[user].append(cifar_data[label][idx[label]].tolist())
70 | y_train[user] += (label * np.ones(1)).tolist()
71 | idx[label] += 1
72 |
73 | print(idx)
74 |
75 | # create test data
76 | X_test = test_images.tolist()
77 | y_test = test_labels.tolist()
78 |
79 | train_data = {'users': [], 'user_data': {}, 'num_samples': []}
80 | test_data = {'users': [], 'user_data': {}, 'num_samples': []}
81 |
82 | for i in range(num_of_users):
83 | uname = 'f_{0:05d}'.format(i)
84 |
85 | combined = list(zip(X_train[i], y_train[i]))
86 | random.shuffle(combined)
87 | X_train[i][:], y_train[i][:] = zip(*combined)
88 | train_len = len(X_train[i])
89 | test_len = int(len(test_images) / num_of_users)
90 |
91 | train_data['users'].append(uname)
92 | train_data['user_data'][uname] = {'x': X_train[i], 'y': y_train[i]}
93 | train_data['num_samples'].append(train_len)
94 | test_data['users'].append(uname)
95 | test_data['user_data'][uname] = {'x': X_test[test_len * i:test_len * (i + 1)],
96 | 'y': y_test[test_len * i:test_len * (i + 1)]}
97 | test_data['num_samples'].append(test_len)
98 |
99 | print(train_data['num_samples'])
100 | print(sum(train_data['num_samples']))
101 | print(sum(test_data['num_samples']))
102 |
103 | print("Saving data, please wait")
104 | with open(train_path, 'w') as outfile:
105 | json.dump(train_data, outfile)
106 | with open(test_path, 'w') as outfile:
107 | json.dump(test_data, outfile)
108 | print("Saving completed")
109 |
110 |
111 | def unpickle(file):
112 | import pickle
113 | with open(file, 'rb') as fo:
114 | data_dict = pickle.load(fo, encoding='latin1')
115 | return data_dict
116 |
117 |
118 | if __name__ == '__main__':
119 | generate_data(similarity=1)
120 |
--------------------------------------------------------------------------------
/data/CIFAR/data_generator.py:
--------------------------------------------------------------------------------
1 | import emnist
2 | import numpy as np
3 | from tqdm import trange
4 | import random
5 | import json
6 | import os
7 | from functools import reduce
8 |
9 |
10 | def generate_data(similarity: int, num_of_users=10, samples_num=5000):
11 | """
12 | generate CIFAR-10 data among 10 users with different similarities
13 | :param similarity: portion of similar data between users. number between 0 to 1
14 | :param num_of_users: number of users data distributed among
15 | :param samples_num: number of samples distributed to each user
16 | """
17 | root_path = os.path.dirname(__file__)
18 | train_path = root_path + '/data/train/mytrain.json'
19 | test_path = root_path + '/data/test/mytest.json'
20 | dir_path = os.path.dirname(train_path)
21 | if not os.path.exists(dir_path):
22 | os.makedirs(dir_path)
23 | dir_path = os.path.dirname(test_path)
24 | if not os.path.exists(dir_path):
25 | os.makedirs(dir_path)
26 |
27 | cifar_dicts = []
28 | for i in range(1, 6):
29 | cifar_dicts.append(unpickle(root_path + '/cifar-10-batches-py/data_batch_' + f"{i}"))
30 |
31 | train_images = np.concatenate([cifar_dict['data'] for cifar_dict in cifar_dicts])
32 | # train_labels = reduce((lambda x, y: x + y), [cifar_dict['labels'] for cifar_dict in cifar_dicts])
33 | train_labels = np.concatenate([cifar_dict['labels'] for cifar_dict in cifar_dicts])
34 | train_images = train_images.astype(np.float32)
35 | train_labels = train_labels.astype(np.int)
36 | num_of_labels = len(set(train_labels))
37 |
38 | cifar_dict = unpickle(root_path + '/cifar-10-batches-py/test_batch')
39 | test_images = cifar_dict['data']
40 | test_labels = np.array(cifar_dict['labels'])
41 | test_images = test_images.astype(np.float32)
42 | test_labels = test_labels.astype(np.int)
43 |
44 | cifar_data = []
45 | for i in range(min(train_labels), num_of_labels + min(train_labels)):
46 | idx = train_labels == i
47 | cifar_data.append(train_images[idx])
48 |
49 | iid_samples = int(similarity * samples_num)
50 | X_train = [[] for _ in range(num_of_users)]
51 | y_train = [[] for _ in range(num_of_users)]
52 | idx = np.zeros(num_of_labels, dtype=np.int64)
53 |
54 | # fill users data by labels
55 | for user in range(num_of_users):
56 | label = user % num_of_labels
57 | X_train[user] += cifar_data[label][idx[label]:idx[label] + samples_num - iid_samples].tolist()
58 | y_train[user] += (label * np.ones(samples_num - iid_samples)).tolist()
59 | idx[label] += samples_num - iid_samples
60 |
61 | print(idx)
62 |
63 | # create %similarity of iid data
64 | for user in range(num_of_users):
65 | labels = np.random.randint(0, num_of_labels, iid_samples)
66 | for label in labels:
67 | while idx[label] >= len(cifar_data[label]):
68 | label = (label + 1) % num_of_labels
69 | X_train[user].append(cifar_data[label][idx[label]].tolist())
70 | y_train[user] += (label * np.ones(1)).tolist()
71 | idx[label] += 1
72 |
73 | print(idx)
74 |
75 | # create test data
76 | X_test = test_images.tolist()
77 | y_test = test_labels.tolist()
78 |
79 | train_data = {'users': [], 'user_data': {}, 'num_samples': []}
80 | test_data = {'users': [], 'user_data': {}, 'num_samples': []}
81 |
82 | for i in range(num_of_users):
83 | uname = 'f_{0:05d}'.format(i)
84 |
85 | combined = list(zip(X_train[i], y_train[i]))
86 | random.shuffle(combined)
87 | X_train[i][:], y_train[i][:] = zip(*combined)
88 | train_len = len(X_train[i])
89 | test_len = int(len(test_images) / num_of_users)
90 |
91 | train_data['users'].append(uname)
92 | train_data['user_data'][uname] = {'x': X_train[i], 'y': y_train[i]}
93 | train_data['num_samples'].append(train_len)
94 | test_data['users'].append(uname)
95 | test_data['user_data'][uname] = {'x': X_test[test_len * i:test_len * (i + 1)],
96 | 'y': y_test[test_len * i:test_len * (i + 1)]}
97 | test_data['num_samples'].append(test_len)
98 |
99 | print(train_data['num_samples'])
100 | print(sum(train_data['num_samples']))
101 | print(sum(test_data['num_samples']))
102 |
103 | print("Saving data, please wait")
104 | with open(train_path, 'w') as outfile:
105 | json.dump(train_data, outfile)
106 | with open(test_path, 'w') as outfile:
107 | json.dump(test_data, outfile)
108 | print("Saving completed")
109 |
110 |
111 | def unpickle(file):
112 | import pickle
113 | with open(file, 'rb') as fo:
114 | data_dict = pickle.load(fo, encoding='latin1')
115 | return data_dict
116 |
117 |
118 | if __name__ == '__main__':
119 | generate_data(similarity=1)
120 |
--------------------------------------------------------------------------------
/data/Femnist/README.md:
--------------------------------------------------------------------------------
1 | # EMNIST Dataset
2 |
3 | ## Setup Instructions
4 | - pip3 install numpy
5 | - pip3 install pillow
6 | - Run ```./preprocess.sh``` with a choice of the following tags:
7 | - ```-s``` := 'iid' to sample in an i.i.d. manner, or 'niid' to sample in a non-i.i.d. manner; more information on i.i.d. versus non-i.i.d. is included in the 'Notes' section
8 | - ```--iu``` := number of users, if iid sampling; expressed as a fraction of the total number of users; default is 0.01
9 | - ```--sf``` := fraction of data to sample, written as a decimal; default is 0.1
10 | - ```-k``` := minimum number of samples per user
11 | - ```-t``` := 'user' to partition users into train-test groups, or 'sample' to partition each user's samples into train-test groups
12 | - ```--tf``` := fraction of data in training set, written as a decimal; default is 0.9
13 | - ```--nu``` := The total number of users generated.
14 |
15 | Instruction used to generate EMNIST with 50 users:
16 |
17 | ```
18 | ./preprocess.sh -s niid --sf 1.0 -k 0 -tf 0.8 -t sample --nu 100
19 | ```
20 |
21 |
22 |
23 |
24 | (Make sure to delete the rem\_user\_data, sampled\_data, test, and train subfolders in the data directory before re-running preprocess.sh.)
25 |
26 | Or you can download the dataset [here](https://drive.google.com/open?id=1sHzD4IsgEI5xLy6cqwUjSGW0PwiduPHr), unzip it and put the `train` and `test` folder under `data`.
27 |
--------------------------------------------------------------------------------
/data/Femnist/__pycache__/data_generator.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/data/Femnist/__pycache__/data_generator.cpython-38.pyc
--------------------------------------------------------------------------------
/data/Femnist/data/my_sample.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import json
3 | import math
4 | import numpy as np
5 | import os
6 | import sys
7 | import random
8 | from tqdm import trange
9 |
10 | from PIL import Image
11 |
12 | NUM_USER = 50
13 | CLASS_PER_USER = 19
14 |
15 |
16 | def relabel_class(c):
17 | '''
18 | maps hexadecimal class value (string) to a decimal number
19 | returns:
20 | - 0 through 9 for classes representing respective numbers
21 | - 10 through 35 for classes representing respective uppercase letters
22 | - 36 through 61 for classes representing respective lowercase letters
23 | '''
24 | if c.isdigit() and int(c) < 40:
25 | return (int(c) - 30)
26 | elif int(c, 16) <= 90: # uppercase
27 | return (int(c, 16) - 55)
28 | else:
29 | return (int(c, 16) - 61)
30 |
31 | def load_image(file_name):
32 | '''read in a png
33 | Return: a flatted list representing the image
34 | '''
35 | size = (28, 28)
36 | img = Image.open(file_name)
37 | gray = img.convert('L')
38 | gray.thumbnail(size, Image.ANTIALIAS)
39 | arr = np.asarray(gray).copy()
40 | vec = arr.flatten()
41 | vec = vec / 255 # scale all pixel values to between 0 and 1
42 | vec = vec.tolist()
43 |
44 | return vec
45 |
46 |
47 | def main():
48 | file_dir = "raw_data/by_class"
49 |
50 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
51 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
52 |
53 | train_path = "train/mytrain.json"
54 | test_path = "test/mytest.json"
55 |
56 | X = [[] for _ in range(NUM_USER)]
57 | y = [[] for _ in range(NUM_USER)]
58 |
59 | nist_data = {}
60 |
61 |
62 | for class_ in os.listdir(file_dir):
63 |
64 | real_class = relabel_class(class_)
65 |
66 | if real_class >= 36 and real_class <= 61:
67 |
68 | full_img_path = file_dir + "/" + class_ + "/train_" + class_
69 | all_files_this_class = os.listdir(full_img_path)
70 | random.shuffle(all_files_this_class)
71 | sampled_files_this_class = all_files_this_class[:7000]
72 | imgs = []
73 | for img in sampled_files_this_class:
74 | imgs.append(load_image(full_img_path + "/" + img))
75 | class_ = relabel_class(class_)
76 | print(class_)
77 | nist_data[class_-36] = imgs # a list of list, key is (0, 25)
78 | print(len(imgs))
79 |
80 | # assign samples to users by power law
81 | num_samples = np.random.lognormal(4, 2, (NUM_USER)) + 5
82 |
83 | idx = np.zeros(26, dtype=np.int64)
84 |
85 | for user in range(NUM_USER):
86 | num_sample_per_class = int(num_samples[user]/CLASS_PER_USER)
87 | if num_sample_per_class < 2:
88 | num_sample_per_class = 2
89 |
90 | for j in range(CLASS_PER_USER):
91 | class_id = (user + j) % 26
92 | if idx[class_id] + num_sample_per_class < len(nist_data[class_id]):
93 | idx[class_id] = 0
94 | X[user] += nist_data[class_id][idx[class_id] : (idx[class_id] + num_sample_per_class)]
95 | y[user] += (class_id * np.ones(num_sample_per_class)).tolist()
96 | idx[class_id] += num_sample_per_class
97 |
98 | # Create data structure
99 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
100 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
101 |
102 | for i in trange(NUM_USER, ncols=120):
103 | uname = 'f_{0:05d}'.format(i)
104 |
105 | combined = list(zip(X[i], y[i]))
106 | random.shuffle(combined)
107 | X[i][:], y[i][:] = zip(*combined)
108 | num_samples = len(X[i])
109 | train_len = int(0.9 * num_samples)
110 | test_len = num_samples - train_len
111 |
112 | train_data['users'].append(uname)
113 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
114 | train_data['num_samples'].append(train_len)
115 | test_data['users'].append(uname)
116 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
117 | test_data['num_samples'].append(test_len)
118 |
119 |
120 | with open(train_path,'w') as outfile:
121 | json.dump(train_data, outfile)
122 | with open(test_path, 'w') as outfile:
123 | json.dump(test_data, outfile)
124 |
125 |
126 | if __name__ == "__main__":
127 | main()
128 |
129 |
--------------------------------------------------------------------------------
/data/Femnist/data/nist_generator.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import json
3 | import math
4 | import numpy as np
5 | import os
6 | import sys
7 | import random
8 | from tqdm import trange
9 |
10 | from PIL import Image
11 |
12 | NUM_USER = 50
13 | CLASS_PER_USER = 50
14 | FEMNIST = True # True: generate data will full 62 label, False: only 26 labels for lowercase
15 | SAMPLE_NUM_MEAN = 400
16 | SAMPLE_NUM_STD = 110
17 |
18 |
19 | def relabel_class(c):
20 | '''
21 | maps hexadecimal class value (string) to a decimal number
22 | returns:
23 | - 0 through 9 for classes representing respective numbers : total 10
24 | - 10 through 35 for classes representing respective uppercase letters : 26
25 | - 36 through 61 for classes representing respective lowercase letters : 26
26 | - in total we have 10 + 26 + 26 = 62 class for FEMIST tiwand only 36-61 for FEMIST*
27 | '''
28 | if c.isdigit() and int(c) < 40:
29 | return (int(c) - 30)
30 | elif int(c, 16) <= 90: # uppercase
31 | return (int(c, 16) - 55)
32 | else:
33 | return (int(c, 16) - 61)
34 |
35 |
36 | def load_image(file_name):
37 | '''read in a png
38 | Return: a flatted list representing the image
39 | '''
40 | size = (28, 28)
41 | img = Image.open(file_name)
42 | gray = img.convert('L')
43 | gray.thumbnail(size, Image.ANTIALIAS)
44 | arr = np.asarray(gray).copy()
45 | vec = arr.flatten()
46 | vec = vec / 255 # scale all pixel values to between 0 and 1
47 | vec = vec.tolist()
48 |
49 | return vec
50 |
51 |
52 | def main():
53 | file_dir = "raw_data/by_class"
54 |
55 | train_data = {'users': [], 'user_data': {}, 'num_samples': []}
56 | test_data = {'users': [], 'user_data': {}, 'num_samples': []}
57 | if(FEMNIST):
58 | train_path = "train/nisttrain.json"
59 | test_path = "test/nisttest.json"
60 | else:
61 | train_path = "train/femnisttrain.json"
62 | test_path = "test/femnisttest.json"
63 |
64 | X = [[] for _ in range(NUM_USER)]
65 | y = [[] for _ in range(NUM_USER)]
66 |
67 | nist_data = {}
68 |
69 | for class_ in os.listdir(file_dir):
70 |
71 | real_class = relabel_class(class_)
72 |
73 | if(FEMNIST):
74 | full_img_path = file_dir + "/" + class_ + "/train_" + class_
75 | all_files_this_class = os.listdir(full_img_path)
76 | random.shuffle(all_files_this_class)
77 | sampled_files_this_class = all_files_this_class[:7000]
78 | imgs = []
79 | for img in sampled_files_this_class:
80 | imgs.append(load_image(full_img_path + "/" + img))
81 | class_ = relabel_class(class_)
82 | print("Class:", class_)
83 | nist_data[class_] = imgs # a list of list, key is (0, 25)
84 | print("Image len:", len(imgs))
85 |
86 | else:
87 | if real_class >= 36 and real_class <= 61:
88 | full_img_path = file_dir + "/" + class_ + "/train_" + class_
89 | all_files_this_class = os.listdir(full_img_path)
90 | random.shuffle(all_files_this_class)
91 | sampled_files_this_class = all_files_this_class[:7000]
92 | imgs = []
93 | for img in sampled_files_this_class:
94 | imgs.append(load_image(full_img_path + "/" + img))
95 | class_ = relabel_class(class_)
96 | print(class_)
97 | nist_data[class_-36] = imgs # a list of list, key is (0, 25)
98 | print(len(imgs))
99 |
100 | # assign samples to users by power law
101 | normal_std = np.sqrt(np.log(1 + (lognormal_std/lognormal_mean)**2))
102 | normal_mean = np.log(lognormal_mean) - normal_std**2 / 2
103 |
104 | num_samples = np.random.lognormal(normal_mean, normal_std, (NUM_USER)) + 5
105 | #num_samples = np.random.normal(SAMPLE_NUM_MEAN,SAMPLE_NUM_STD,(NUM_USER))
106 |
107 | if(FEMNIST):
108 | idx = np.zeros(62, dtype=np.int64)
109 | else:
110 | idx = np.zeros(26, dtype=np.int64)
111 |
112 | for user in range(NUM_USER):
113 | num_sample_per_class = int(num_samples[user]/CLASS_PER_USER)
114 | if num_sample_per_class < 2:
115 | num_sample_per_class = 2
116 |
117 | for j in range(CLASS_PER_USER):
118 | if(FEMNIST):
119 | class_id = (user + j) % 62
120 | else:
121 | class_id = (user + j) % 26
122 |
123 | if idx[class_id] + num_sample_per_class < len(nist_data[class_id]):
124 | idx[class_id] = 0
125 | X[user] += nist_data[class_id][idx[class_id]
126 | : (idx[class_id] + num_sample_per_class)]
127 | y[user] += (class_id * np.ones(num_sample_per_class)).tolist()
128 | idx[class_id] += num_sample_per_class
129 |
130 | # Create data structure
131 | train_data = {'users': [], 'user_data': {}, 'num_samples': []}
132 | test_data = {'users': [], 'user_data': {}, 'num_samples': []}
133 |
134 | for i in trange(NUM_USER, ncols=120):
135 | uname = 'f_{0:05d}'.format(i)
136 |
137 | combined = list(zip(X[i], y[i]))
138 | random.shuffle(combined)
139 | X[i][:], y[i][:] = zip(*combined)
140 | num_samples = len(X[i])
141 | train_len = int(0.9 * num_samples)
142 | test_len = num_samples - train_len
143 |
144 | train_data['users'].append(uname)
145 | train_data['user_data'][uname] = {
146 | 'x': X[i][:train_len], 'y': y[i][:train_len]}
147 | train_data['num_samples'].append(train_len)
148 | test_data['users'].append(uname)
149 | test_data['user_data'][uname] = {
150 | 'x': X[i][train_len:], 'y': y[i][train_len:]}
151 | test_data['num_samples'].append(test_len)
152 |
153 | with open(train_path, 'w') as outfile:
154 | json.dump(train_data, outfile)
155 | with open(test_path, 'w') as outfile:
156 | json.dump(test_data, outfile)
157 |
158 |
159 | if __name__ == "__main__":
160 | main()
161 |
--------------------------------------------------------------------------------
/data/Femnist/data_generator.py:
--------------------------------------------------------------------------------
1 | import emnist
2 | import numpy as np
3 | from tqdm import trange
4 | import random
5 | import json
6 | import os
7 | import argparse
8 | from os.path import dirname
9 |
10 |
11 | def generate_data(similarity, num_of_users=100, samples_num=20):
12 | root_path = os.path.dirname(__file__)
13 | train_path = root_path + '/data/train/mytrain.json'
14 | test_path = root_path + '/data/test/mytest.json'
15 | dir_path = os.path.dirname(train_path)
16 | if not os.path.exists(dir_path):
17 | os.makedirs(dir_path)
18 | dir_path = os.path.dirname(test_path)
19 | if not os.path.exists(dir_path):
20 | os.makedirs(dir_path)
21 |
22 | dataset = 'balanced'
23 | images, train_labels = emnist.extract_training_samples(dataset) # TODO: add test samples
24 | images = np.reshape(images, (images.shape[0], -1))
25 | images = images.astype(np.float32)
26 | train_labels = train_labels.astype(np.int)
27 | num_of_labels = len(set(train_labels))
28 |
29 | emnist_data = []
30 | for i in range(min(train_labels), num_of_labels + min(train_labels)):
31 | idx = train_labels == i
32 | emnist_data.append(images[idx])
33 |
34 | iid_samples = int(similarity * samples_num)
35 | X = [[] for _ in range(num_of_users)]
36 | y = [[] for _ in range(num_of_users)]
37 | idx = np.zeros(num_of_labels, dtype=np.int64)
38 |
39 | # create %similarity of iid data
40 | for user in range(num_of_users):
41 | labels = np.random.randint(0, num_of_labels, iid_samples)
42 | for label in labels:
43 | X[user].append(emnist_data[label][idx[label]].tolist())
44 | y[user] += (label * np.ones(1)).tolist()
45 | idx[label] += 1
46 |
47 | print(idx)
48 |
49 | # fill remaining data
50 | for user in range(num_of_users):
51 | label = user % num_of_labels
52 | X[user] += emnist_data[label][idx[label]:idx[label] + samples_num - iid_samples].tolist()
53 | y[user] += (label * np.ones(samples_num - iid_samples)).tolist()
54 | idx[label] += samples_num - iid_samples
55 |
56 | print(idx)
57 |
58 | train_data = {'users': [], 'user_data': {}, 'num_samples': []}
59 | test_data = {'users': [], 'user_data': {}, 'num_samples': []}
60 |
61 | for i in trange(num_of_users, ncols=120):
62 | uname = 'f_{0:05d}'.format(i)
63 |
64 | combined = list(zip(X[i], y[i]))
65 | random.shuffle(combined)
66 | X[i][:], y[i][:] = zip(*combined)
67 | num_samples = len(X[i])
68 | train_len = int(0.9 * num_samples)
69 | test_len = num_samples - train_len
70 |
71 | train_data['users'].append(uname)
72 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
73 | train_data['num_samples'].append(train_len)
74 | test_data['users'].append(uname)
75 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
76 | test_data['num_samples'].append(test_len)
77 |
78 | print(train_data['num_samples'])
79 | print(sum(train_data['num_samples']))
80 |
81 | with open(train_path, 'w') as outfile:
82 | json.dump(train_data, outfile)
83 | with open(test_path, 'w') as outfile:
84 | json.dump(test_data, outfile)
85 |
86 |
87 | if __name__ == '__main__':
88 | parser = argparse.ArgumentParser()
89 | parser.add_argument("--similarity", type=float, default=0)
90 | parser.add_argument("--num_of_users", type=int, default=100)
91 | parser.add_argument("--samples_num", type=int, default=20)
92 | args = parser.parse_args()
93 | generate_data(similarity=args.similarity, num_of_users=args.num_of_users, samples_num=args.samples_num)
94 |
--------------------------------------------------------------------------------
/data/Femnist/preprocess.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | #rm -rf rem_user_data sampled_data test train
4 |
5 | # download data and convert to .json format
6 |
7 | if [ ! -d "data/all_data" ] || [ ! "$(ls -A data/all_data)" ]; then
8 | cd preprocess
9 | ./data_to_json.sh
10 | cd ..
11 | fi
12 |
13 | NAME="nist" # name of the dataset, equivalent to directory name
14 |
15 | cd ../../utils
16 |
17 | # ./preprocess.sh -s niid --sf 0.05 -k 64 -t sample
18 | # ./preprocess.sh --name nist -s niid --sf 1.0 -k 0 -t sample
19 | # ./preprocess.sh --name sent140 -s niid --sf 1.0 -k 1 -t sample
20 | ./preprocess.sh --name $NAME $@
21 |
22 | cd ../data/$NAME
23 |
--------------------------------------------------------------------------------
/data/Femnist/preprocess/data_to_json.py:
--------------------------------------------------------------------------------
1 | # Converts a list of (writer, [list of (file,class)]) tuples into a json object
2 | # of the form:
3 | # {users: [bob, etc], num_samples: [124, etc.],
4 | # user_data: {bob : {x:[img1,img2,etc], y:[class1,class2,etc]}, etc}}
5 | # where 'img_' is a vectorized representation of the corresponding image
6 |
7 | from __future__ import division
8 | import json
9 | import math
10 | import numpy as np
11 | import os
12 | import sys
13 |
14 | from PIL import Image
15 |
16 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
17 | utils_dir = os.path.join(utils_dir, 'utils')
18 | sys.path.append(utils_dir)
19 |
20 | import utils
21 |
22 |
23 | MAX_WRITERS = 100 # max number of writers per json file.
24 |
25 |
26 | def relabel_class(c):
27 | '''
28 | maps hexadecimal class value (string) to a decimal number
29 | returns:
30 | - 0 through 9 for classes representing respective numbers
31 | - 10 through 35 for classes representing respective uppercase letters
32 | - 36 through 61 for classes representing respective lowercase letters
33 | '''
34 | if c.isdigit() and int(c) < 40:
35 | return (int(c) - 30)
36 | elif int(c, 16) <= 90: # uppercase
37 | return (int(c, 16) - 55)
38 | else:
39 | return (int(c, 16) - 61)
40 |
41 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
42 |
43 | ibwd = os.path.join(parent_path, 'data', 'intermediate', 'images_by_writer')
44 | writers = utils.load_obj(ibwd)
45 |
46 | num_json = int(math.ceil(len(writers) / MAX_WRITERS))
47 |
48 | users = [[] for _ in range(num_json)]
49 | num_samples = [[] for _ in range(num_json)]
50 | user_data = [{} for _ in range(num_json)]
51 |
52 | writer_count = 0
53 | json_index = 0
54 | for (w, l) in writers:
55 |
56 | users[json_index].append(w)
57 | num_samples[json_index].append(len(l))
58 | user_data[json_index][w] = {'x': [], 'y': []}
59 |
60 | size = 28, 28 # original image size is 128, 128
61 | for (f, c) in l:
62 | file_path = os.path.join(parent_path, f)
63 | img = Image.open(file_path)
64 | gray = img.convert('L')
65 | gray.thumbnail(size, Image.ANTIALIAS)
66 | arr = np.asarray(gray).copy()
67 | vec = arr.flatten()
68 | vec = vec / 255 # scale all pixel values to between 0 and 1
69 | vec = vec.tolist()
70 |
71 | nc = relabel_class(c)
72 |
73 | user_data[json_index][w]['x'].append(vec)
74 | user_data[json_index][w]['y'].append(nc)
75 |
76 | writer_count += 1
77 | if writer_count == MAX_WRITERS:
78 |
79 | all_data = {}
80 | all_data['users'] = users[json_index]
81 | all_data['num_samples'] = num_samples[json_index]
82 | all_data['user_data'] = user_data[json_index]
83 |
84 | file_name = 'all_data_%d.json' % json_index
85 | file_path = os.path.join(parent_path, 'data', 'all_data', file_name)
86 |
87 | print('writing %s' % file_name)
88 |
89 | with open(file_path, 'w') as outfile:
90 | json.dump(all_data, outfile)
91 |
92 | writer_count = 0
93 | json_index += 1
94 |
--------------------------------------------------------------------------------
/data/Femnist/preprocess/data_to_json.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # assumes that the script is run in the preprocess folder
4 |
5 | if [ ! -d "../data" ]; then
6 | mkdir ../data
7 | fi
8 | if [ ! -d "../data/raw_data" ]; then
9 | echo "------------------------------"
10 | echo "downloading data"
11 | mkdir ../data/raw_data
12 | ./get_data.sh
13 | echo "finished downloading data"
14 | fi
15 |
16 | if [ ! -d "../data/intermediate" ]; then # stores .pkl files during preprocessing
17 | mkdir ../data/intermediate
18 | fi
19 |
20 | if [ ! -f ../data/intermediate/class_file_dirs.pkl ]; then
21 | echo "------------------------------"
22 | echo "extracting file directories of images"
23 | python3 get_file_dirs.py
24 | echo "finished extracting file directories of images"
25 | fi
26 |
27 | if [ ! -f ../data/intermediate/class_file_hashes.pkl ]; then
28 | echo "------------------------------"
29 | echo "calculating image hashes"
30 | python3 get_hashes.py
31 | echo "finished calculating image hashes"
32 | fi
33 |
34 | if [ ! -f ../data/intermediate/write_with_class.pkl ]; then
35 | echo "------------------------------"
36 | echo "assigning class labels to write images"
37 | python3 match_hashes.py
38 | echo "finished assigning class labels to write images"
39 | fi
40 |
41 | if [ ! -f ../data/intermediate/images_by_writer.pkl ]; then
42 | echo "------------------------------"
43 | echo "grouping images by writer"
44 | python3 group_by_writer.py
45 | echo "finished grouping images by writer"
46 | fi
47 |
48 | if [ ! -d "../data/all_data" ]; then
49 | mkdir ../data/all_data
50 | fi
51 | if [ ! "$(ls -A ../data/all_data)" ]; then
52 | echo "------------------------------"
53 | echo "converting data to .json format"
54 | python3 data_to_json.py
55 | echo "finished converting data to .json format"
56 | fi
57 |
--------------------------------------------------------------------------------
/data/Femnist/preprocess/get_data.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # assumes that the script is run in the preprocess folder
4 |
5 | cd ../data/raw_data
6 | wget https://s3.amazonaws.com/nist-srd/SD19/by_class.zip
7 | wget https://s3.amazonaws.com/nist-srd/SD19/by_write.zip
8 | unzip by_class.zip
9 | rm by_class.zip
10 | unzip by_write.zip
11 | rm by_write.zip
12 | cd ../../preprocess
13 |
--------------------------------------------------------------------------------
/data/Femnist/preprocess/get_file_dirs.py:
--------------------------------------------------------------------------------
1 | '''
2 | Creates .pkl files for:
3 | 1. list of directories of every image in 'by_class'
4 | 2. list of directories of every image in 'by_write'
5 | the hierarchal structure of the data is as follows:
6 | - by_class -> classes -> folders containing images -> images
7 | - by_write -> folders containing writers -> writer -> types of images -> images
8 | the directories written into the files are of the form 'raw_data/...'
9 | '''
10 |
11 | import os
12 | import sys
13 |
14 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
15 | utils_dir = os.path.join(utils_dir, 'utils')
16 | sys.path.append(utils_dir)
17 |
18 | import utils
19 |
20 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
21 |
22 | class_files = [] # (class, file directory)
23 | write_files = [] # (writer, file directory)
24 |
25 | class_dir = os.path.join(parent_path, 'data', 'raw_data', 'by_class')
26 | rel_class_dir = os.path.join('data', 'raw_data', 'by_class')
27 | classes = os.listdir(class_dir)
28 |
29 | for cl in classes:
30 | cldir = os.path.join(class_dir, cl)
31 | rel_cldir = os.path.join(rel_class_dir, cl)
32 | subcls = os.listdir(cldir)
33 |
34 | subcls = [s for s in subcls if (('hsf' in s) and ('mit' not in s))]
35 |
36 | for subcl in subcls:
37 | subcldir = os.path.join(cldir, subcl)
38 | rel_subcldir = os.path.join(rel_cldir, subcl)
39 | images = os.listdir(subcldir)
40 | image_dirs = [os.path.join(rel_subcldir, i) for i in images]
41 |
42 | for image_dir in image_dirs:
43 | class_files.append((cl, image_dir))
44 |
45 | write_dir = os.path.join(parent_path, 'data', 'raw_data', 'by_write')
46 | rel_write_dir = os.path.join('data', 'raw_data', 'by_write')
47 | write_parts = os.listdir(write_dir)
48 |
49 | for write_part in write_parts:
50 | writers_dir = os.path.join(write_dir, write_part)
51 | rel_writers_dir = os.path.join(rel_write_dir, write_part)
52 | writers = os.listdir(writers_dir)
53 |
54 | for writer in writers:
55 | writer_dir = os.path.join(writers_dir, writer)
56 | rel_writer_dir = os.path.join(rel_writers_dir, writer)
57 | wtypes = os.listdir(writer_dir)
58 |
59 | for wtype in wtypes:
60 | type_dir = os.path.join(writer_dir, wtype)
61 | rel_type_dir = os.path.join(rel_writer_dir, wtype)
62 | images = os.listdir(type_dir)
63 | image_dirs = [os.path.join(rel_type_dir, i) for i in images]
64 |
65 | for image_dir in image_dirs:
66 | write_files.append((writer, image_dir))
67 |
68 | utils.save_obj(
69 | class_files,
70 | os.path.join(parent_path, 'data', 'intermediate', 'class_file_dirs'))
71 | utils.save_obj(
72 | write_files,
73 | os.path.join(parent_path, 'data', 'intermediate', 'write_file_dirs'))
74 |
--------------------------------------------------------------------------------
/data/Femnist/preprocess/get_hashes.py:
--------------------------------------------------------------------------------
1 | import hashlib
2 | import os
3 | import sys
4 |
5 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
6 | utils_dir = os.path.join(utils_dir, 'utils')
7 | sys.path.append(utils_dir)
8 |
9 | import utils
10 |
11 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
12 |
13 | cfd = os.path.join(parent_path, 'data', 'intermediate', 'class_file_dirs')
14 | wfd = os.path.join(parent_path, 'data', 'intermediate', 'write_file_dirs')
15 | class_file_dirs = utils.load_obj(cfd)
16 | write_file_dirs = utils.load_obj(wfd)
17 |
18 | class_file_hashes = []
19 | write_file_hashes = []
20 |
21 | count = 0
22 | for tup in class_file_dirs:
23 | if (count%100000 == 0):
24 | print('hashed %d class images' % count)
25 |
26 | (cclass, cfile) = tup
27 | file_path = os.path.join(parent_path, cfile)
28 |
29 | chash = hashlib.md5(open(file_path, 'rb').read()).hexdigest()
30 |
31 | class_file_hashes.append((cclass, cfile, chash))
32 |
33 | count += 1
34 |
35 | cfhd = os.path.join(parent_path, 'data', 'intermediate', 'class_file_hashes')
36 | utils.save_obj(class_file_hashes, cfhd)
37 |
38 | count = 0
39 | for tup in write_file_dirs:
40 | if (count%100000 == 0):
41 | print('hashed %d write images' % count)
42 |
43 | (cclass, cfile) = tup
44 | file_path = os.path.join(parent_path, cfile)
45 |
46 | chash = hashlib.md5(open(file_path, 'rb').read()).hexdigest()
47 |
48 | write_file_hashes.append((cclass, cfile, chash))
49 |
50 | count += 1
51 |
52 | wfhd = os.path.join(parent_path, 'data', 'intermediate', 'write_file_hashes')
53 | utils.save_obj(write_file_hashes, wfhd)
54 |
--------------------------------------------------------------------------------
/data/Femnist/preprocess/group_by_writer.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
5 | utils_dir = os.path.join(utils_dir, 'utils')
6 | sys.path.append(utils_dir)
7 |
8 | import utils
9 |
10 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
11 |
12 | wwcd = os.path.join(parent_path, 'data', 'intermediate', 'write_with_class')
13 | write_class = utils.load_obj(wwcd)
14 |
15 | writers = [] # each entry is a (writer, [list of (file, class)]) tuple
16 | cimages = []
17 | (cw, _, _) = write_class[0]
18 | for (w, f, c) in write_class:
19 | if w != cw:
20 | writers.append((cw, cimages))
21 | cw = w
22 | cimages = [(f, c)]
23 | cimages.append((f, c))
24 | writers.append((cw, cimages))
25 |
26 | ibwd = os.path.join(parent_path, 'data', 'intermediate', 'images_by_writer')
27 | utils.save_obj(writers, ibwd)
28 |
--------------------------------------------------------------------------------
/data/Femnist/preprocess/match_hashes.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
5 | utils_dir = os.path.join(utils_dir, 'utils')
6 | sys.path.append(utils_dir)
7 |
8 | import utils
9 |
10 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
11 |
12 | cfhd = os.path.join(parent_path, 'data', 'intermediate', 'class_file_hashes')
13 | wfhd = os.path.join(parent_path, 'data', 'intermediate', 'write_file_hashes')
14 | class_file_hashes = utils.load_obj(cfhd) # each elem is (class, file dir, hash)
15 | write_file_hashes = utils.load_obj(wfhd) # each elem is (writer, file dir, hash)
16 |
17 | class_hash_dict = {}
18 | for i in range(len(class_file_hashes)):
19 | (c, f, h) = class_file_hashes[len(class_file_hashes)-i-1]
20 | class_hash_dict[h] = (c, f)
21 |
22 | write_classes = []
23 | for tup in write_file_hashes:
24 | (w, f, h) = tup
25 | write_classes.append((w, f, class_hash_dict[h][0]))
26 |
27 | wwcd = os.path.join(parent_path, 'data', 'intermediate', 'write_with_class')
28 | utils.save_obj(write_classes, wwcd)
29 |
--------------------------------------------------------------------------------
/data/Femnist/stats.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | NAME="nist"
4 |
5 | cd ../../utils
6 |
7 | python3 stats.py --name $NAME
8 |
9 | cd ../data/$NAME
--------------------------------------------------------------------------------
/data/Linear_synthetic/data/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/data/Linear_synthetic/data/README.md
--------------------------------------------------------------------------------
/data/Linear_synthetic/generate_linear_regession.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import numpy as np
3 | import json
4 | import random
5 | import os
6 | np.random.seed(0)
7 |
8 | NUM_USER = 100
9 | Kappa = 1.4
10 | Dim = 40
11 | Noise = 0.05
12 |
13 | def generate_x(n_samples = 100, dim= 40, kappa= 10):
14 | '''Helper function to generate data'''
15 |
16 | powers = - np.log(kappa) / np.log(dim) / 2
17 |
18 | S = np.power(np.arange(dim)+1, powers)
19 | X = np.random.randn(n_samples, dim) # Random standard Gaussian data
20 | X *= S
21 | covarient_matrix = np.cov(X)
22 | print("Covarient matrix:",covarient_matrix) # Conditioning
23 | print("np.diag(S)", np.diag(S))
24 | return X, 1, 1/kappa, np.diag(S)
25 |
26 | def generate_linear_data(num_users=100, kappa=10, dim=40, noise_ratio=0.05):
27 |
28 | '''Helper function to generate data'''
29 | # generate power S
30 | powers = - np.log(kappa) / np.log(dim) / 2
31 | DIM = np.arange(dim)
32 |
33 | # Covariance matrix for X
34 | S = np.power(DIM+1, powers)
35 |
36 | # Creat list data for all users
37 | X_split = [[] for _ in range(num_users)] # X for each user
38 | y_split = [[] for _ in range(num_users)] # y for each user
39 | samples_per_user = np.random.lognormal(4, 2, num_users).astype(int) + 500
40 | indices_per_user = np.insert(samples_per_user.cumsum(), 0, 0, 0)
41 | num_total_samples = indices_per_user[-1]
42 |
43 | # Create mean of data for each user, each user will have different distribution
44 | mean_X = np.array([np.random.randn(dim) for _ in range(num_users)])
45 |
46 |
47 | X_total = np.zeros((num_total_samples, dim))
48 | y_total = np.zeros(num_total_samples)
49 |
50 | for n in range(num_users):
51 | # Generate data
52 | X_n = np.random.multivariate_normal(mean_X[n], np.diag(S), samples_per_user[n])
53 | X_total[indices_per_user[n]:indices_per_user[n+1], :] = X_n
54 |
55 | # Normalize all X's using LAMBDA
56 | norm = np.sqrt(np.linalg.norm(X_total.T.dot(X_total), 2) / num_total_samples)
57 | X_total /= norm
58 |
59 | # Generate weights and labels
60 | W = np.random.rand(dim)
61 | y_total = X_total.dot(W)
62 | noise_variance = 0.01
63 | y_total = y_total + np.sqrt(noise_ratio) * np.random.randn(num_total_samples)
64 |
65 | for n in range(num_users):
66 | X_n = X_total[indices_per_user[n]:indices_per_user[n+1], :]
67 | y_n = y_total[indices_per_user[n]:indices_per_user[n+1]]
68 | X_split[n] = X_n.tolist()
69 | y_split[n] = y_n.tolist()
70 |
71 | # print("User {} has {} samples.".format(n, samples_per_user[n]))
72 |
73 | print("=" * 80)
74 | print("Generated synthetic data for logistic regression successfully.")
75 | print("Summary of the generated data:".format(kappa))
76 | print(" Total # users : {}".format(num_users))
77 | print(" Input dimension : {}".format(dim))
78 | print(" rho : {}".format(kappa))
79 | print(" Total # of samples : {}".format(num_total_samples))
80 | print(" Minimum # of samples: {}".format(np.min(samples_per_user)))
81 | print(" Maximum # of samples: {}".format(np.max(samples_per_user)))
82 | print("=" * 80)
83 |
84 | return X_split, y_split
85 |
86 |
87 | def save_total_data():
88 | train_data = {'users': [], 'user_data': {}, 'num_samples': []}
89 | test_data = {'users': [], 'user_data': {}, 'num_samples': []}
90 |
91 | train_path = os.path.join("data", "train", "mytrain.json")
92 | test_path = os.path.join("data", "test", "mytest.json")
93 | for path in [os.path.join("data", "train"), os.path.join("data", "test")]:
94 | if not os.path.exists(path):
95 | os.makedirs(path)
96 |
97 | X, y = generate_linear_data(NUM_USER, Kappa, Dim, Noise)
98 |
99 | # Create data structure
100 | train_data = {'users': [], 'user_data': {}, 'num_samples': []}
101 | test_data = {'users': [], 'user_data': {}, 'num_samples': []}
102 |
103 | for i in range(NUM_USER):
104 | uname = 'f_{0:05d}'.format(i)
105 | combined = list(zip(X[i], y[i]))
106 | random.shuffle(combined)
107 | X[i][:], y[i][:] = zip(*combined)
108 | num_samples = len(X[i])
109 | train_len = int(0.75 * num_samples)
110 | test_len = num_samples - train_len
111 | print("User: ",uname, " Num Sample: ", num_samples )
112 | train_data['users'].append(uname)
113 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
114 | train_data['num_samples'].append(train_len)
115 | test_data['users'].append(uname)
116 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
117 | test_data['num_samples'].append(test_len)
118 |
119 | with open(train_path, 'w') as outfile:
120 | json.dump(train_data, outfile)
121 | with open(test_path, 'w') as outfile:
122 | json.dump(test_data, outfile)
123 |
124 | print("=" * 80)
125 | print("Saved all users' data sucessfully.")
126 | print(" Train path:", os.path.join(os.curdir, train_path))
127 | print(" Test path :", os.path.join(os.curdir, test_path))
128 | print("=" * 80)
129 |
130 |
131 | def main():
132 | #generate_x()
133 | save_total_data()
134 |
135 |
136 | if __name__ == '__main__':
137 | main()
138 |
--------------------------------------------------------------------------------
/data/Linear_synthetic/generate_linear_regession_updated.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import numpy as np
3 | import json
4 | import random
5 | import os
6 | np.random.seed(0)
7 |
8 | NUM_USER = 100
9 | Kappa = 1.4
10 | Dim = 40
11 | Noise = 0.05
12 |
13 | def generate_x(n_samples = 100, dim= 40, kappa= 10):
14 | '''Helper function to generate data'''
15 |
16 | powers = - np.log(kappa) / np.log(dim) / 2
17 |
18 | S = np.power(np.arange(dim)+1, powers)
19 | X = np.random.randn(n_samples, dim) # Random standard Gaussian data
20 | X *= S
21 | covarient_matrix = np.cov(X)
22 | print("Covarient matrix:",covarient_matrix) # Conditioning
23 | print("np.diag(S)", np.diag(S))
24 | return X, 1, 1/kappa, np.diag(S)
25 |
26 | def generate_linear_data(num_users=100, kappa=10, dim=40, noise_ratio=0.05):
27 |
28 | '''Helper function to generate data'''
29 | # generate power S
30 | powers = - np.log(kappa) / np.log(dim) / 2
31 | DIM = np.arange(dim)
32 |
33 | # Covariance matrix for X
34 | S = np.power(DIM+1, powers)
35 |
36 | # Creat list data for all users
37 | X_split = [[] for _ in range(num_users)] # X for each user
38 | y_split = [[] for _ in range(num_users)] # y for each user
39 | samples_per_user = np.random.lognormal(4, 2, num_users).astype(int) + 500
40 | indices_per_user = np.insert(samples_per_user.cumsum(), 0, 0, 0)
41 | num_total_samples = indices_per_user[-1]
42 |
43 | # Create mean of data for each user, each user will have different distribution
44 | sig = np.random.uniform(0.1, 10)
45 | mean = np.random.uniform(low=-0.1, high=0.1)
46 | cov = np.random.uniform(low=0.0, high=0.01)
47 | #print("mean -cov", mean,cov)
48 | mean_X = np.random.normal(mean, cov, dim)
49 |
50 | X_total = np.zeros((num_total_samples, dim))
51 | y_total = np.zeros(num_total_samples)
52 |
53 | for n in range(num_users):
54 | # Generate data
55 | X_n = np.random.multivariate_normal(mean_X, sig * np.diag(S), samples_per_user[n])
56 | X_total[indices_per_user[n]:indices_per_user[n+1], :] = X_n
57 |
58 | # Normalize all X's using LAMBDA
59 | norm = np.sqrt(np.linalg.norm(X_total.T.dot(X_total), 2) / num_total_samples)
60 | X_total /= norm
61 |
62 | # Generate weights and labels
63 | W = np.random.rand(dim)
64 | y_total = X_total.dot(W)
65 | noise_variance = 0.01
66 | y_total = y_total + np.sqrt(noise_ratio) * np.random.randn(num_total_samples)
67 |
68 | for n in range(num_users):
69 | X_n = X_total[indices_per_user[n]:indices_per_user[n+1], :]
70 | y_n = y_total[indices_per_user[n]:indices_per_user[n+1]]
71 | X_split[n] = X_n.tolist()
72 | y_split[n] = y_n.tolist()
73 |
74 | # print("User {} has {} samples.".format(n, samples_per_user[n]))
75 |
76 | print("=" * 80)
77 | print("Generated synthetic data for logistic regression successfully.")
78 | print("Summary of the generated data:".format(kappa))
79 | print(" Total # users : {}".format(num_users))
80 | print(" Input dimension : {}".format(dim))
81 | print(" rho : {}".format(kappa))
82 | print(" Total # of samples : {}".format(num_total_samples))
83 | print(" Minimum # of samples: {}".format(np.min(samples_per_user)))
84 | print(" Maximum # of samples: {}".format(np.max(samples_per_user)))
85 | print("=" * 80)
86 |
87 | return X_split, y_split
88 |
89 |
90 | def save_total_data():
91 | train_data = {'users': [], 'user_data': {}, 'num_samples': []}
92 | test_data = {'users': [], 'user_data': {}, 'num_samples': []}
93 |
94 | train_path = os.path.join("data", "train", "mytrain.json")
95 | test_path = os.path.join("data", "test", "mytest.json")
96 | for path in [os.path.join("data", "train"), os.path.join("data", "test")]:
97 | if not os.path.exists(path):
98 | os.makedirs(path)
99 |
100 | X, y = generate_linear_data(NUM_USER, Kappa, Dim, Noise)
101 |
102 | # Create data structure
103 | train_data = {'users': [], 'user_data': {}, 'num_samples': []}
104 | test_data = {'users': [], 'user_data': {}, 'num_samples': []}
105 |
106 | for i in range(NUM_USER):
107 | uname = 'f_{0:05d}'.format(i)
108 | combined = list(zip(X[i], y[i]))
109 | random.shuffle(combined)
110 | X[i][:], y[i][:] = zip(*combined)
111 | num_samples = len(X[i])
112 | train_len = int(0.75 * num_samples)
113 | test_len = num_samples - train_len
114 | print("User: ",uname, " Num Sample: ", num_samples )
115 | train_data['users'].append(uname)
116 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
117 | train_data['num_samples'].append(train_len)
118 | test_data['users'].append(uname)
119 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
120 | test_data['num_samples'].append(test_len)
121 |
122 | with open(train_path, 'w') as outfile:
123 | json.dump(train_data, outfile)
124 | with open(test_path, 'w') as outfile:
125 | json.dump(test_data, outfile)
126 |
127 | print("=" * 80)
128 | print("Saved all users' data sucessfully.")
129 | print(" Train path:", os.path.join(os.curdir, train_path))
130 | print(" Test path :", os.path.join(os.curdir, test_path))
131 | print("=" * 80)
132 |
133 |
134 | def main():
135 | #generate_x()
136 | save_total_data()
137 |
138 |
139 | if __name__ == '__main__':
140 | main()
141 |
--------------------------------------------------------------------------------
/data/Linear_synthetic/generate_linear_synthetic_backup.py:
--------------------------------------------------------------------------------
1 | import json
2 | import math
3 | import numpy as np
4 | import os
5 | import sys
6 | import random
7 | from tqdm import trange
8 | import math
9 |
10 |
11 | NUM_USER = 100
12 | def normalize_data(X):
13 |
14 | #nomarlize all feature of data between (0 and 1)
15 | normX = X - X.min()
16 | normX = normX / (X.max() - X.min())
17 | #normX = normX*2-1 between (-1 and 1)
18 |
19 | # nomarlize data with respect to -1 < X.X^T < 1.
20 | temp = normX.dot(normX.T)
21 | return normX/np.sqrt(temp.max())
22 |
23 | def generate_synthetic(alpha = 0.5, beta = 0.5):
24 |
25 | # Generate parameters for controlling kappa
26 | dimension = 60
27 | NUM_CLASS = 1
28 | samples_per_user = np.random.lognormal(4, 2, (NUM_USER)).astype(int) + 100
29 | print(samples_per_user)
30 | num_samples = np.sum(samples_per_user)
31 |
32 | X_split = [[] for _ in range(NUM_USER)]
33 | y_split = [[] for _ in range(NUM_USER)]
34 |
35 | #### define some eprior ####
36 | mean_W = np.random.normal(0, alpha, NUM_USER)
37 | mean_b = mean_W
38 | B = np.random.normal(0, beta, NUM_USER)
39 | mean_x = np.zeros((NUM_USER, dimension))
40 |
41 | diagonal = np.zeros(dimension)
42 | for j in range(dimension):
43 | diagonal[j] = np.power((j+1), -1.2)
44 | cov_x = np.diag(diagonal)
45 |
46 | for i in range(NUM_USER):
47 | mean_x[i] = np.random.normal(B[i], 1, dimension)
48 | print(mean_x[i])
49 |
50 | for i in range(NUM_USER):
51 |
52 | W = np.random.normal(mean_W[i], 1, (dimension, NUM_CLASS))
53 | b = np.random.normal(mean_b[i], 1, NUM_CLASS)
54 |
55 | xx = np.random.multivariate_normal(mean_x[i], cov_x, samples_per_user[i])
56 | nom_xx = normalize_data(xx)
57 | yy = np.zeros(samples_per_user[i])
58 |
59 | for j in range(samples_per_user[i]):
60 | yy[j] = np.dot(nom_xx[j], W) + b
61 |
62 | X_split[i] = nom_xx.tolist()
63 | y_split[i] = yy.tolist()
64 |
65 | print("{}-th users has {} exampls".format(i, len(y_split[i])))
66 |
67 | return X_split, y_split
68 |
69 |
70 |
71 | def main():
72 |
73 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
74 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
75 |
76 | train_path = "data/train/mytrain.json"
77 | test_path = "data/test/mytest.json"
78 |
79 | X, y = generate_synthetic(alpha=0.5, beta=0.5) # synthetic (0.5, 0.5)
80 |
81 |
82 | # Create data structure
83 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
84 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
85 |
86 | for i in trange(NUM_USER, ncols=120):
87 |
88 | uname = 'f_{0:05d}'.format(i)
89 | combined = list(zip(X[i], y[i]))
90 | random.shuffle(combined)
91 | X[i][:], y[i][:] = zip(*combined)
92 | num_samples = len(X[i])
93 | train_len = int(0.75 * num_samples)
94 | test_len = num_samples - train_len
95 |
96 | train_data['users'].append(uname)
97 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
98 | train_data['num_samples'].append(train_len)
99 | test_data['users'].append(uname)
100 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
101 | test_data['num_samples'].append(test_len)
102 |
103 |
104 | with open(train_path,'w') as outfile:
105 | json.dump(train_data, outfile)
106 | with open(test_path, 'w') as outfile:
107 | json.dump(test_data, outfile)
108 |
109 |
110 | if __name__ == "__main__":
111 | main()
112 |
113 |
--------------------------------------------------------------------------------
/data/Linear_synthetic/optimal_solution_finding.py:
--------------------------------------------------------------------------------
1 | import json
2 | import math
3 | import numpy as np
4 | import os
5 | import sys
6 | import random
7 | from tqdm import trange
8 | import math
9 | import numpy as np
10 | from sklearn.linear_model import LinearRegression
11 | import sklearn as sk
12 | np.random.seed(0)
13 |
14 | NUM_USER = 100
15 |
16 | def normalize_data(X):
17 |
18 | #nomarlize all feature of data between (-1 and 1)
19 | normX = X - X.min()
20 | normX = normX / (X.max() - X.min())
21 |
22 | # nomarlize data with respect to -1 < X.X^T < 1.
23 | temp = normX.dot(normX.T)
24 | return normX/np.sqrt(temp.max())
25 |
26 |
27 | def finding_optimal_synthetic(num_users=100, kappa=10, dim = 40, noise_ratio=0.05):
28 |
29 | powers = - np.log(kappa) / np.log(dim) / 2
30 | DIM = np.arange(dim)
31 | S = np.power(DIM+1, powers)
32 |
33 | # Creat list data for all users
34 | X_split = [[] for _ in range(num_users)] # X for each user
35 | y_split = [[] for _ in range(num_users)] # y for each user
36 | samples_per_user = np.random.lognormal(4, 2, num_users).astype(int) + 500
37 | indices_per_user = np.insert(samples_per_user.cumsum(), 0, 0, 0)
38 | num_total_samples = indices_per_user[-1]
39 |
40 | # Create mean of data for each user, each user will have different distribution
41 | mean_X = np.array([np.random.randn(dim) for _ in range(num_users)])
42 |
43 | # Covariance matrix for X
44 | X_total = np.zeros((num_total_samples, dim))
45 | y_total = np.zeros(num_total_samples)
46 |
47 | for n in range(num_users):
48 | # Generate data
49 | X_n = np.random.multivariate_normal(mean_X[n], np.diag(S), samples_per_user[n])
50 | X_total[indices_per_user[n]:indices_per_user[n+1], :] = X_n
51 |
52 | # Normalize all X's using LAMBDA
53 | norm = np.sqrt(np.linalg.norm(X_total.T.dot(X_total), 2) / num_total_samples)
54 | X_total /= norm
55 |
56 | # Generate weights and labels
57 | W = np.random.rand(dim)
58 | y_total = X_total.dot(W)
59 | noise_variance = 0.01
60 | y_total = y_total + np.sqrt(noise_ratio) * np.random.randn(num_total_samples)
61 |
62 | for n in range(num_users):
63 | X_n = X_total[indices_per_user[n]:indices_per_user[n+1],:]
64 | y_n = y_total[indices_per_user[n]:indices_per_user[n+1]]
65 | X_split[n] = X_n.tolist()
66 | y_split[n] = y_n.tolist()
67 |
68 | # split data to get training data
69 | train_x = []
70 | train_y = []
71 | test_x = []
72 | test_y = []
73 | for i in range(NUM_USER):
74 | num_samples = len(X_split[i])
75 | train_len = int(0.75 * num_samples)
76 | test_len = num_samples - train_len
77 | train_x.append(X_split[i][:train_len])
78 | train_y.append(y_split[i][:train_len])
79 | test_x.append(X_split[i][train_len:])
80 | test_y.append(y_split[i][train_len:])
81 |
82 | train_xc = np.concatenate(train_x)
83 | train_yc = np.concatenate(train_y)
84 | test_xc = np.concatenate(test_x)
85 | test_yc = np.concatenate(test_y)
86 |
87 | # # finding optimal
88 | X_X_T = np.zeros(shape=(dim+1,dim+1))
89 | X_Y = np.zeros(shape=(dim+1,1))
90 |
91 | for n in range(num_users):
92 | X = np.array(train_x[i])
93 | y = np.array(train_y[i])
94 | one = np.ones((X.shape[0], 1))
95 | Xbar = np.concatenate((one, X), axis = 1)
96 | X_X_T += Xbar.T.dot(Xbar)*len(y)/len(train_yc)
97 | X_Y += np.array(Xbar).T.dot(y).reshape((dim+1, 1))*len(y)/len(train_yc)
98 |
99 | # get optimal point.
100 | w = np.linalg.inv(X_X_T).dot(X_Y)
101 |
102 | # caculate loss over all devices
103 | loss = 0
104 | for n in range(num_users):
105 | X = np.array(train_x[i])
106 | y = np.array(train_y[i])
107 | one = np.ones((X.shape[0], 1))
108 | Xbar = np.concatenate((one, X), axis = 1)
109 | y_predict = Xbar.dot(w)
110 | loss += sk.metrics.mean_squared_error(y,y_predict)*len(y)/len(train_yc)
111 |
112 | return loss
113 |
114 | def main():
115 | loss = 0
116 | loss = finding_optimal_synthetic()
117 | print("loss for train data", loss)
118 |
119 | if __name__ == "__main__":
120 | main()
121 |
122 |
--------------------------------------------------------------------------------
/data/Logistic_synthetic/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/data/Logistic_synthetic/README.md
--------------------------------------------------------------------------------
/data/Logistic_synthetic/logistic_regression.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import numpy as np
3 | import json
4 | import random
5 | import os
6 |
7 |
8 | def logit(X, W):
9 | return 1 / (1 + np.exp(-np.dot(X, W)))
10 |
11 |
12 | def generate_logistic_regression_data(num_users=100, kappa=10, dim=40, noise_ratio=0.05):
13 | # For consistent results
14 | np.random.seed(0)
15 |
16 | # Sanity check
17 | assert(kappa >= 1 and num_users > 0 and dim > 0)
18 |
19 | X_split = [[] for _ in range(num_users)] # X for each user
20 | y_split = [[] for _ in range(num_users)] # y for each user
21 |
22 | # Find users' sample sizes based on the power law (heterogeneity)
23 | samples_per_user = np.random.lognormal(4, 2, num_users).astype(int) + 50 + 10000
24 | indices_per_user = np.insert(samples_per_user.cumsum(), 0, 0, 0)
25 | num_total_samples = indices_per_user[-1]
26 |
27 | # Each user's mean is drawn from N(0, 1) (non-i.i.d. data)
28 | mean_X = np.array([np.random.randn(dim) for _ in range(num_users)])
29 |
30 | # Covariance matrix for X
31 | Sigma = np.eye(dim)
32 |
33 | # L = 1, hyper_learning_rate = LAMBDA
34 | LAMBDA = 100 if kappa == 1 else 1 / (kappa - 1)
35 |
36 | # Keep all users' inputs and labels in one array,
37 | # indexed according to indices_per_user.
38 | # (e.g. X_total[indices_per_user[n]:indices_per_user[n+1], :] = X_n)
39 | # (e.g. y_total[indices_per_user[n]:indices_per_user[n+1]] = y_n)
40 | X_total = np.zeros((num_total_samples, dim))
41 | y_total = np.zeros(num_total_samples)
42 |
43 | for n in range(num_users):
44 | # Generate data
45 | X_n = np.random.multivariate_normal(mean_X[n], Sigma, samples_per_user[n])
46 | X_total[indices_per_user[n]:indices_per_user[n+1], :] = X_n
47 |
48 | # Normalize all X's using LAMBDA
49 | norm = np.sqrt(np.linalg.norm(X_total.T.dot(X_total), 2) / num_total_samples)
50 | X_total /= norm + LAMBDA
51 |
52 | # Generate weights and labels
53 | W = np.random.rand(dim)
54 | y_total = logit(X_total, W)
55 | y_total = np.where(y_total > 0.5, 1, 0)
56 |
57 | # Apply noise: randomly flip some of y_n with probability noise_ratio
58 | noise = np.random.binomial(1, noise_ratio, num_total_samples)
59 | y_total = np.multiply(noise - y_total, noise) + np.multiply(y_total, 1 - noise)
60 |
61 | # Save each user's data separately
62 | for n in range(num_users):
63 | X_n = X_total[indices_per_user[n]:indices_per_user[n+1], :]
64 | y_n = y_total[indices_per_user[n]:indices_per_user[n+1]]
65 | X_split[n] = X_n.tolist()
66 | y_split[n] = y_n.tolist()
67 |
68 | # print("User {} has {} samples.".format(n, samples_per_user[n]))
69 |
70 | print("=" * 80)
71 | print("Generated synthetic data for logistic regression successfully.")
72 | print("Summary of the generated data:".format(kappa))
73 | print(" Total # users : {}".format(num_users))
74 | print(" Input dimension : {}".format(dim))
75 | print(" rho : {}".format(kappa))
76 | print(" Total # of samples : {}".format(num_total_samples))
77 | print(" Minimum # of samples: {}".format(np.min(samples_per_user)))
78 | print(" Maximum # of samples: {}".format(np.max(samples_per_user)))
79 | print("=" * 80)
80 |
81 | return X_split, y_split
82 |
83 |
84 | def save_total_data():
85 | train_data = {'users': [], 'user_data': {}, 'num_samples': []}
86 | test_data = {'users': [], 'user_data': {}, 'num_samples': []}
87 |
88 | train_path = os.path.join("data", "train", "mytrain.json")
89 | test_path = os.path.join("data", "test", "mytest.json")
90 | for path in [os.path.join("data", "train"), os.path.join("data", "test")]:
91 | if not os.path.exists(path):
92 | os.makedirs(path)
93 |
94 | X, y = generate_logistic_regression_data(100, 2, 40, 0.05)
95 |
96 | # Create data structure
97 | train_data = {'users': [], 'user_data': {}, 'num_samples': []}
98 | test_data = {'users': [], 'user_data': {}, 'num_samples': []}
99 |
100 | for i in range(100):
101 | uname = 'f_{0:05d}'.format(i)
102 | combined = list(zip(X[i], y[i]))
103 | random.shuffle(combined)
104 | X[i][:], y[i][:] = zip(*combined)
105 | num_samples = len(X[i])
106 | train_len = int(0.75 * num_samples)
107 | test_len = num_samples - train_len
108 | print("User: ",uname, " Num Sample: ", num_samples )
109 | train_data['users'].append(uname)
110 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
111 | train_data['num_samples'].append(train_len)
112 | test_data['users'].append(uname)
113 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
114 | test_data['num_samples'].append(test_len)
115 |
116 | with open(train_path, 'w') as outfile:
117 | json.dump(train_data, outfile)
118 | with open(test_path, 'w') as outfile:
119 | json.dump(test_data, outfile)
120 |
121 | print("=" * 80)
122 | print("Saved all users' data sucessfully.")
123 | print(" Train path:", os.path.join(os.curdir, train_path))
124 | print(" Test path :", os.path.join(os.curdir, test_path))
125 | print("=" * 80)
126 |
127 |
128 | def main():
129 | save_total_data()
130 | #save_data_by_user()
131 |
132 |
133 | if __name__ == '__main__':
134 | main()
135 |
--------------------------------------------------------------------------------
/data/Mnist/data/mldata/mnist-original.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/data/Mnist/data/mldata/mnist-original.mat
--------------------------------------------------------------------------------
/data/Mnist/generate_iid_20users.py:
--------------------------------------------------------------------------------
1 | from sklearn.datasets import fetch_mldata
2 | from tqdm import trange
3 | import numpy as np
4 | import random
5 | import json
6 | import os
7 |
8 | random.seed(1)
9 | np.random.seed(1)
10 | NUM_USERS = 20 # should be muitiple of 10
11 | NUM_LABELS = 10
12 | # Setup directory for train/test data
13 | train_path = './data/train/mnist_train.json'
14 | test_path = './data/test/mnist_test.json'
15 | dir_path = os.path.dirname(train_path)
16 | if not os.path.exists(dir_path):
17 | os.makedirs(dir_path)
18 | dir_path = os.path.dirname(test_path)
19 | if not os.path.exists(dir_path):
20 | os.makedirs(dir_path)
21 |
22 | # Get MNIST data, normalize, and divide by level
23 | mnist = fetch_mldata('MNIST original', data_home='./data')
24 | mu = np.mean(mnist.data.astype(np.float32), 0)
25 | sigma = np.std(mnist.data.astype(np.float32), 0)
26 | mnist.data = (mnist.data.astype(np.float32) - mu)/(sigma+0.001)
27 | mnist_data = []
28 | for i in trange(10):
29 | idx = mnist.target==i
30 | mnist_data.append(mnist.data[idx])
31 |
32 | print("\nNumb samples of each label:\n", [len(v) for v in mnist_data])
33 | users_lables = []
34 |
35 | print("idx",idx)
36 | # devide for label for each users:
37 | for user in trange(NUM_USERS):
38 | for j in range(NUM_LABELS): # 4 labels for each users
39 | l = (user + j) % 10
40 | users_lables.append(l)
41 | unique, counts = np.unique(users_lables, return_counts=True)
42 | print("--------------")
43 | print(unique, counts)
44 |
45 | def ram_dom_gen(total, size):
46 | print(total)
47 | nums = []
48 | temp = []
49 | for i in range(size - 1):
50 | val = np.random.randint(total//(size + 1), total//(size - 8))
51 | temp.append(val)
52 | total -= val
53 | temp.append(total)
54 | print(temp)
55 | return temp
56 | number_sample = []
57 | for total_value, count in zip(mnist_data, counts):
58 | temp = ram_dom_gen(len(total_value), count)
59 | number_sample.append(temp)
60 | print("--------------")
61 | print(number_sample)
62 |
63 | i = 0
64 | number_samples = []
65 | for i in range(len(number_sample[0])):
66 | for sample in number_sample:
67 | print(sample)
68 | number_samples.append(sample[i])
69 |
70 | print("--------------")
71 | print(number_samples)
72 |
73 | ###### CREATE USER DATA SPLIT #######
74 | # Assign 100 samples to each user
75 | X = [[] for _ in range(NUM_USERS)]
76 | y = [[] for _ in range(NUM_USERS)]
77 | count = 0
78 | for user in trange(NUM_USERS):
79 | for j in range(NUM_LABELS): # 4 labels for each users
80 | l = (user + j) % 10
81 | print("value of L",l)
82 | print("value of count",count)
83 | num_samples = number_samples[count] # num sample
84 | count = count + 1
85 | if idx[l] + num_samples < len(mnist_data[l]):
86 | X[user] += mnist_data[l][idx[l]:num_samples].tolist()
87 | y[user] += (l*np.ones(num_samples)).tolist()
88 | idx[l] += num_samples
89 | print("check len os user:", user, j,"len data", len(X[user]), num_samples)
90 |
91 | print("IDX2:", idx) # counting samples for each labels
92 |
93 | # Create data structure
94 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
95 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
96 |
97 | # Setup 5 users
98 | # for i in trange(5, ncols=120):
99 | for i in range(NUM_USERS):
100 | uname = 'f_{0:05d}'.format(i)
101 |
102 | combined = list(zip(X[i], y[i]))
103 | random.shuffle(combined)
104 | X[i][:], y[i][:] = zip(*combined)
105 | num_samples = len(X[i])
106 | train_len = int(0.75*num_samples)
107 | test_len = num_samples - train_len
108 |
109 | train_data['users'].append(uname)
110 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
111 | train_data['num_samples'].append(train_len)
112 | test_data['users'].append(uname)
113 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
114 | test_data['num_samples'].append(test_len)
115 |
116 | print("Num_samples:", train_data['num_samples'])
117 | print("Total_samples:",sum(train_data['num_samples'] + test_data['num_samples']))
118 |
119 | with open(train_path,'w') as outfile:
120 | json.dump(train_data, outfile)
121 | with open(test_path, 'w') as outfile:
122 | json.dump(test_data, outfile)
123 |
124 | print("Finish Generating Samples")
125 |
--------------------------------------------------------------------------------
/data/Mnist/generate_niid_100users_updated.py:
--------------------------------------------------------------------------------
1 | from sklearn.datasets import fetch_mldata
2 | from tqdm import trange
3 | import numpy as np
4 | import random
5 | import json
6 | import os
7 |
8 | random.seed(1)
9 | np.random.seed(1)
10 | NUM_USERS = 100
11 | NUM_LABELS = 3
12 | # Setup directory for train/test data
13 | train_path = './data/train/mnist_train.json'
14 | test_path = './data/test/mnist_test.json'
15 | dir_path = os.path.dirname(train_path)
16 | if not os.path.exists(dir_path):
17 | os.makedirs(dir_path)
18 | dir_path = os.path.dirname(test_path)
19 | if not os.path.exists(dir_path):
20 | os.makedirs(dir_path)
21 |
22 | # Get MNIST data, normalize, and divide by level
23 | mnist = fetch_mldata('MNIST original', data_home='./data')
24 | mu = np.mean(mnist.data.astype(np.float32), 0)
25 | sigma = np.std(mnist.data.astype(np.float32), 0)
26 | mnist.data = (mnist.data.astype(np.float32) - mu)/(sigma+0.001)
27 | mnist_data = []
28 | for i in trange(10):
29 | idx = mnist.target==i
30 | mnist_data.append(mnist.data[idx])
31 |
32 | print("\nNumb samples of each label:\n", [len(v) for v in mnist_data])
33 |
34 | ###### CREATE USER DATA SPLIT #######
35 | # Assign 100 samples to each user
36 | X = [[] for _ in range(NUM_USERS)]
37 | y = [[] for _ in range(NUM_USERS)]
38 | idx = np.zeros(10, dtype=np.int64)
39 | for user in range(NUM_USERS):
40 | for j in range(NUM_LABELS): # 3 labels for each users
41 | #l = (2*user+j)%10
42 | l = (user + j) % 10
43 | print("L:", l)
44 | X[user] += mnist_data[l][idx[l]:idx[l]+10].tolist()
45 | y[user] += (l*np.ones(10)).tolist()
46 | idx[l] += 10
47 |
48 | print("IDX1:", idx) # counting samples for each labels
49 |
50 | # Assign remaining sample by power law
51 | user = 0
52 | props = np.random.lognormal(
53 | 0, 2., (10, NUM_USERS, NUM_LABELS)) # last 5 is 5 labels
54 | props = np.array([[[len(v)-1000]] for v in mnist_data]) * \
55 | props/np.sum(props, (1, 2), keepdims=True)
56 | # print("here:",props/np.sum(props,(1,2), keepdims=True))
57 | #props = np.array([[[len(v)-100]] for v in mnist_data]) * \
58 | # props/np.sum(props, (1, 2), keepdims=True)
59 | #idx = 1000*np.ones(10, dtype=np.int64)
60 | # print("here2:",props)
61 | for user in trange(NUM_USERS):
62 | for j in range(NUM_LABELS): # 4 labels for each users
63 | # l = (2*user+j)%10
64 | l = (user + j) % 10
65 | num_samples = int(props[l, user//int(NUM_USERS/10), j])
66 | numran1 = random.randint(10, 200)
67 | numran2 = random.randint(1, 10)
68 | num_samples = (num_samples) * numran2 + numran1
69 | if(NUM_USERS <= 20):
70 | num_samples = num_samples * 2
71 | if idx[l] + num_samples < len(mnist_data[l]):
72 | X[user] += mnist_data[l][idx[l]:idx[l]+num_samples].tolist()
73 | y[user] += (l*np.ones(num_samples)).tolist()
74 | idx[l] += num_samples
75 | print("check len os user:", user, j,
76 | "len data", len(X[user]), num_samples)
77 |
78 | print("IDX2:", idx) # counting samples for each labels
79 |
80 | # Create data structure
81 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
82 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
83 |
84 | # Setup 5 users
85 | # for i in trange(5, ncols=120):
86 | for i in range(NUM_USERS):
87 | uname = 'f_{0:05d}'.format(i)
88 |
89 | combined = list(zip(X[i], y[i]))
90 | random.shuffle(combined)
91 | X[i][:], y[i][:] = zip(*combined)
92 | num_samples = len(X[i])
93 | train_len = int(0.75*num_samples)
94 | test_len = num_samples - train_len
95 |
96 | train_data['users'].append(uname)
97 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
98 | train_data['num_samples'].append(train_len)
99 | test_data['users'].append(uname)
100 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
101 | test_data['num_samples'].append(test_len)
102 |
103 | print("Num_samples:", train_data['num_samples'])
104 | print("Total_samples:",sum(train_data['num_samples']))
105 |
106 | with open(train_path,'w') as outfile:
107 | json.dump(train_data, outfile)
108 | with open(test_path, 'w') as outfile:
109 | json.dump(test_data, outfile)
110 |
111 | print("Finish Generating Samples")
112 |
--------------------------------------------------------------------------------
/data/Mnist/generate_niid_20users.py:
--------------------------------------------------------------------------------
1 | from sklearn.datasets import fetch_openml
2 | from tqdm import trange
3 | import numpy as np
4 | import random
5 | import json
6 | import os
7 |
8 | random.seed(1)
9 | np.random.seed(1)
10 | NUM_USERS = 20 # should be muitiple of 10
11 | NUM_LABELS = 2
12 | # Setup directory for train/test data
13 | train_path = './data/train/mnist_train.json'
14 | test_path = './data/test/mnist_test.json'
15 | dir_path = os.path.dirname(train_path)
16 | if not os.path.exists(dir_path):
17 | os.makedirs(dir_path)
18 | dir_path = os.path.dirname(test_path)
19 | if not os.path.exists(dir_path):
20 | os.makedirs(dir_path)
21 |
22 | # Get MNIST data, normalize, and divide by level
23 | mnist = fetch_openml('mnist_784', data_home='./data')
24 | mu = np.mean(mnist.data.astype(np.float32), 0)
25 | sigma = np.std(mnist.data.astype(np.float32), 0)
26 | mnist.data = (mnist.data.astype(np.float32) - mu)/(sigma+0.001)
27 | mnist_data = []
28 | for i in trange(10):
29 | idx = mnist.target==str(i)
30 | mnist_data.append(mnist.data[idx])
31 |
32 | print("\nNumb samples of each label:\n", [len(v) for v in mnist_data])
33 | users_lables = []
34 |
35 | print("idx",idx)
36 | # devide for label for each users:
37 | for user in trange(NUM_USERS):
38 | for j in range(NUM_LABELS): # 4 labels for each users
39 | l = (user + j) % 10
40 | users_lables.append(l)
41 | unique, counts = np.unique(users_lables, return_counts=True)
42 | print("--------------")
43 | print(unique, counts)
44 |
45 | def ram_dom_gen(total, size):
46 | print(total)
47 | nums = []
48 | temp = []
49 | for i in range(size - 1):
50 | val = np.random.randint(total//(size + 1), total//2)
51 | temp.append(val)
52 | total -= val
53 | temp.append(total)
54 | print(temp)
55 | return temp
56 | number_sample = []
57 | for total_value, count in zip(mnist_data, counts):
58 | temp = ram_dom_gen(len(total_value), count)
59 | number_sample.append(temp)
60 | print("--------------")
61 | print(number_sample)
62 |
63 | i = 0
64 | number_samples = []
65 | for i in range(len(number_sample[0])):
66 | for sample in number_sample:
67 | print(sample)
68 | number_samples.append(sample[i])
69 |
70 | print("--------------")
71 | print(number_samples)
72 |
73 | ###### CREATE USER DATA SPLIT #######
74 | # Assign 100 samples to each user
75 | X = [[] for _ in range(NUM_USERS)]
76 | y = [[] for _ in range(NUM_USERS)]
77 | count = 0
78 | for user in trange(NUM_USERS):
79 | for j in range(NUM_LABELS): # 4 labels for each users
80 | l = (user + j) % 10
81 | print("value of L",l)
82 | print("value of count",count)
83 | num_samples = number_samples[count] # num sample
84 | count = count + 1
85 | if idx[l] + num_samples < len(mnist_data[l]):
86 | X[user] += mnist_data[l][idx[l]:num_samples].tolist()
87 | y[user] += (l*np.ones(num_samples)).tolist()
88 | idx[l] += num_samples
89 | print("check len os user:", user, j,"len data", len(X[user]), num_samples)
90 |
91 | print("IDX2:", idx) # counting samples for each labels
92 |
93 | # Create data structure
94 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
95 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
96 |
97 | # Setup 5 users
98 | # for i in trange(5, ncols=120):
99 | for i in range(NUM_USERS):
100 | uname = 'f_{0:05d}'.format(i)
101 |
102 | combined = list(zip(X[i], y[i]))
103 | random.shuffle(combined)
104 | X[i][:], y[i][:] = zip(*combined)
105 | num_samples = len(X[i])
106 | train_len = int(0.75*num_samples)
107 | test_len = num_samples - train_len
108 |
109 | train_data['users'].append(uname)
110 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
111 | train_data['num_samples'].append(train_len)
112 | test_data['users'].append(uname)
113 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
114 | test_data['num_samples'].append(test_len)
115 |
116 | print("Num_samples:", train_data['num_samples'])
117 | print("Total_samples:",sum(train_data['num_samples'] + test_data['num_samples']))
118 |
119 | with open(train_path,'w') as outfile:
120 | json.dump(train_data, outfile)
121 | with open(test_path, 'w') as outfile:
122 | json.dump(test_data, outfile)
123 |
124 | print("Finish Generating Samples")
125 |
--------------------------------------------------------------------------------
/data/Mnist/generate_niid_mnist_100users.py:
--------------------------------------------------------------------------------
1 | from sklearn.datasets import fetch_mldata
2 | from tqdm import trange
3 | import numpy as np
4 | import random
5 | import json
6 | import os
7 |
8 | random.seed(1)
9 | np.random.seed(1)
10 | NUM_USERS = 100
11 | NUM_LABELS = 3
12 | # Setup directory for train/test data
13 | train_path = './data/train/mnist_train.json'
14 | test_path = './data/test/mnist_test.json'
15 | dir_path = os.path.dirname(train_path)
16 | if not os.path.exists(dir_path):
17 | os.makedirs(dir_path)
18 | dir_path = os.path.dirname(test_path)
19 | if not os.path.exists(dir_path):
20 | os.makedirs(dir_path)
21 |
22 | # Get MNIST data, normalize, and divide by level
23 | mnist = fetch_mldata('MNIST original', data_home='./data')
24 | mu = np.mean(mnist.data.astype(np.float32), 0)
25 | sigma = np.std(mnist.data.astype(np.float32), 0)
26 | mnist.data = (mnist.data.astype(np.float32) - mu)/(sigma+0.001)
27 | mnist_data = []
28 | for i in trange(10):
29 | idx = mnist.target==i
30 | mnist_data.append(mnist.data[idx])
31 |
32 | print("\nNumb samples of each label:\n", [len(v) for v in mnist_data])
33 |
34 | ###### CREATE USER DATA SPLIT #######
35 | # Assign 100 samples to each user
36 | X = [[] for _ in range(NUM_USERS)]
37 | y = [[] for _ in range(NUM_USERS)]
38 | idx = np.zeros(10, dtype=np.int64)
39 | for user in range(NUM_USERS):
40 | for j in range(NUM_LABELS): # 3 labels for each users
41 | #l = (2*user+j)%10
42 | l = (user + j) % 10
43 | print("L:", l)
44 | X[user] += mnist_data[l][idx[l]:idx[l]+10].tolist()
45 | y[user] += (l*np.ones(10)).tolist()
46 | idx[l] += 10
47 |
48 | print("IDX1:", idx) # counting samples for each labels
49 |
50 | # Assign remaining sample by power law
51 | user = 0
52 | props = np.random.lognormal(
53 | 0, 2., (10, NUM_USERS, NUM_LABELS)) # last 5 is 5 labels
54 | props = np.array([[[len(v)-1000]] for v in mnist_data]) * \
55 | props/np.sum(props, (1, 2), keepdims=True)
56 | # print("here:",props/np.sum(props,(1,2), keepdims=True))
57 | #props = np.array([[[len(v)-100]] for v in mnist_data]) * \
58 | # props/np.sum(props, (1, 2), keepdims=True)
59 | #idx = 1000*np.ones(10, dtype=np.int64)
60 | # print("here2:",props)
61 | for user in trange(NUM_USERS):
62 | for j in range(NUM_LABELS): # 4 labels for each users
63 | # l = (2*user+j)%10
64 | l = (user + j) % 10
65 | num_samples = int(props[l, user//int(NUM_USERS/10), j])
66 | numran1 = random.randint(10, 200)
67 | numran2 = random.randint(1, 10)
68 | num_samples = (num_samples) * numran2 + numran1
69 | if(NUM_USERS <= 20):
70 | num_samples = num_samples * 2
71 | if idx[l] + num_samples < len(mnist_data[l]):
72 | X[user] += mnist_data[l][idx[l]:idx[l]+num_samples].tolist()
73 | y[user] += (l*np.ones(num_samples)).tolist()
74 | idx[l] += num_samples
75 | print("check len os user:", user, j,
76 | "len data", len(X[user]), num_samples)
77 |
78 | print("IDX2:", idx) # counting samples for each labels
79 |
80 | # Create data structure
81 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
82 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
83 |
84 | # Setup 5 users
85 | # for i in trange(5, ncols=120):
86 | for i in range(NUM_USERS):
87 | uname = 'f_{0:05d}'.format(i)
88 |
89 | combined = list(zip(X[i], y[i]))
90 | random.shuffle(combined)
91 | X[i][:], y[i][:] = zip(*combined)
92 | num_samples = len(X[i])
93 | train_len = int(0.75*num_samples)
94 | test_len = num_samples - train_len
95 |
96 | train_data['users'].append(uname)
97 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
98 | train_data['num_samples'].append(train_len)
99 | test_data['users'].append(uname)
100 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
101 | test_data['num_samples'].append(test_len)
102 |
103 | print("Num_samples:", train_data['num_samples'])
104 | print("Total_samples:",sum(train_data['num_samples']))
105 |
106 | with open(train_path,'w') as outfile:
107 | json.dump(train_data, outfile)
108 | with open(test_path, 'w') as outfile:
109 | json.dump(test_data, outfile)
110 |
111 | print("Finish Generating Samples")
112 |
--------------------------------------------------------------------------------
/flearn/optimizers/__pycache__/fedoptimizer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/flearn/optimizers/__pycache__/fedoptimizer.cpython-38.pyc
--------------------------------------------------------------------------------
/flearn/optimizers/fedoptimizer.py:
--------------------------------------------------------------------------------
1 | from torch.optim import Optimizer
2 | import torch
3 |
4 |
5 | class MySGD(Optimizer):
6 | def __init__(self, params, lr):
7 | defaults = dict(lr=lr)
8 | super(MySGD, self).__init__(params, defaults)
9 |
10 | def step(self, closure=None, hyper_learning_rate=0):
11 | loss = None
12 | if closure is not None:
13 | loss = closure
14 |
15 | for group in self.param_groups:
16 | # print(group)
17 | for p in group['params']:
18 | if p.grad is None:
19 | continue
20 | d_p = p.grad.data
21 | if (hyper_learning_rate != 0):
22 | p.data.add_(-hyper_learning_rate, d_p)
23 | else:
24 | p.data.add_(-group['lr'], d_p)
25 | return loss
26 |
27 |
28 | class FEDLOptimizer(Optimizer):
29 | def __init__(self, params, lr=0.01, hyper_lr=0.01, L=0.1):
30 | if lr < 0.0:
31 | raise ValueError("Invalid learning rate: {}".format(lr))
32 | defaults = dict(lr=lr, hyper_lr=hyper_lr, L=L)
33 | super(FEDLOptimizer, self).__init__(params, defaults)
34 |
35 | def step(self, server_grads, pre_grads, closure=None):
36 | loss = None
37 | if closure is not None:
38 | loss = closure
39 | for group in self.param_groups:
40 | for p, server_grad, pre_grad in zip(group['params'], server_grads, pre_grads):
41 | if (server_grad.grad != None and pre_grad.grad != None):
42 | p.data = p.data - group['lr'] * (
43 | p.grad.data + group['hyper_lr'] * server_grad.grad.data - pre_grad.grad.data)
44 | else:
45 | p.data = p.data - group['lr'] * p.grad.data
46 | return loss
47 |
48 |
49 | class pFedMeOptimizer(Optimizer):
50 | def __init__(self, params, lr=0.01, L=0.1, mu=0.001):
51 | # self.local_weight_updated = local_weight # w_i,K
52 | if lr < 0.0:
53 | raise ValueError("Invalid learning rate: {}".format(lr))
54 | defaults = dict(lr=lr, L=L, mu=mu)
55 | super(pFedMeOptimizer, self).__init__(params, defaults)
56 |
57 | def step(self, local_weight_updated, closure=None):
58 | loss = None
59 | if closure is not None:
60 | loss = closure
61 | weight_update = local_weight_updated.copy()
62 | for group in self.param_groups:
63 | for p, localweight in zip(group['params'], weight_update):
64 | p.data = p.data - group['lr'] * (
65 | p.grad.data + group['L'] * (p.data - localweight.data) + group['mu'] * p.data)
66 | return group['params'], loss
67 |
68 | def update_param(self, local_weight_updated, closure=None):
69 | loss = None
70 | if closure is not None:
71 | loss = closure
72 | weight_update = local_weight_updated.copy()
73 | for group in self.param_groups:
74 | for p, localweight in zip(group['params'], weight_update):
75 | p.data = localweight.data
76 | # return p.data
77 | return group['params']
78 |
79 |
80 | class SCAFFOLDOptimizer(Optimizer):
81 | def __init__(self, params, lr, weight_decay):
82 | defaults = dict(lr=lr, weight_decay=weight_decay)
83 | super(SCAFFOLDOptimizer, self).__init__(params, defaults)
84 | pass
85 |
86 | def step(self, server_controls, client_controls, closure=None):
87 | loss = None
88 | if closure is not None:
89 | loss = closure
90 |
91 | for group, c, ci in zip(self.param_groups, server_controls, client_controls):
92 | p = group['params'][0]
93 | if p.grad is None:
94 | continue
95 | d_p = p.grad.data + c.data - ci.data
96 | p.data = p.data - d_p.data * group['lr']
97 | # for group in self.param_groups:
98 | # for p, c, ci in zip(group['params'], server_controls, client_controls):
99 | # if p.grad is None:
100 | # continue
101 | # d_p = p.grad.data + c.data - ci.data
102 | # p.data = p.data - d_p.data * group['lr']
103 | return loss
104 |
--------------------------------------------------------------------------------
/flearn/servers/__pycache__/serveravg.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/flearn/servers/__pycache__/serveravg.cpython-38.pyc
--------------------------------------------------------------------------------
/flearn/servers/__pycache__/serverbase.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/flearn/servers/__pycache__/serverbase.cpython-38.pyc
--------------------------------------------------------------------------------
/flearn/servers/__pycache__/serverfedl.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/flearn/servers/__pycache__/serverfedl.cpython-38.pyc
--------------------------------------------------------------------------------
/flearn/servers/__pycache__/serverscaffold.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/flearn/servers/__pycache__/serverscaffold.cpython-38.pyc
--------------------------------------------------------------------------------
/flearn/servers/server_avg.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import os
3 | import h5py
4 |
5 | from flearn.users.user_avg import UserAVG
6 | from flearn.servers.server_base import Server
7 | from utils.model_utils import read_data, read_user_data
8 | import numpy as np
9 | from scipy.stats import rayleigh
10 |
11 |
12 | # Implementation for FedAvg Server
13 | class FedAvg(Server):
14 | def __init__(self, dataset, algorithm, model, batch_size, learning_rate, L, num_glob_iters, local_epochs,
15 | users_per_round, similarity, noise, times):
16 | super().__init__(dataset, algorithm, model[0], batch_size, learning_rate, L, num_glob_iters, local_epochs,
17 | users_per_round, similarity, noise, times)
18 |
19 | # Initialize data for all users
20 | data = read_data(dataset)
21 | total_users = len(data[0])
22 | for i in range(total_users):
23 | id, train, test = read_user_data(i, data, dataset)
24 | user = UserAVG(id, train, test, model, batch_size, learning_rate, L, local_epochs)
25 | self.users.append(user)
26 | self.total_train_samples += user.train_samples
27 |
28 | if self.noise:
29 | self.communication_thresh = rayleigh.ppf(1 - users_per_round / total_users) # h_min
30 |
31 | print("Number of users / total users:", users_per_round, " / ", total_users)
32 | print("Finished creating FedAvg server.")
33 |
34 | def train(self):
35 | loss = []
36 | for glob_iter in range(self.num_glob_iters):
37 | print("-------------Round number: ", glob_iter, " -------------")
38 | # loss_ = 0
39 | self.send_parameters()
40 |
41 | # Evaluate model each interation
42 | self.evaluate()
43 |
44 | if self.noise:
45 | self.selected_users = self.select_transmitting_users()
46 | print(f"Transmitting {len(self.selected_users)} users")
47 | else:
48 | self.selected_users = self.select_users(glob_iter, self.users_per_round)
49 |
50 | for user in self.selected_users:
51 | user.train()
52 | user.drop_lr()
53 |
54 | self.aggregate_parameters()
55 | self.get_max_norm()
56 |
57 | if self.noise:
58 | self.apply_channel_effect()
59 |
60 | self.save_results()
61 | self.save_norms()
62 | self.save_model()
63 |
64 | def get_max_norm(self):
65 | param_norms = []
66 | for user in self.selected_users:
67 | param_norm, control_norm = user.get_params_norm()
68 | param_norms.append(param_norm)
69 | self.param_norms.append(max(param_norms))
70 |
71 | def aggregate_parameters(self):
72 | assert (self.users is not None and len(self.users) > 0)
73 | total_train = 0
74 | for user in self.selected_users:
75 | total_train += user.train_samples
76 | for user in self.selected_users:
77 | self.add_parameters(user, user.train_samples / total_train)
78 |
79 | def add_parameters(self, user, ratio):
80 | for server_param, del_model in zip(self.model.parameters(), user.delta_model):
81 | num_of_selected_users = len(self.selected_users)
82 | # server_param.data = server_param.data + del_model.data * ratio
83 | server_param.data = server_param.data + del_model.data / num_of_selected_users
84 |
85 | def get_max_norm(self):
86 | param_norms = []
87 | for user in self.selected_users:
88 | param_norms.append(user.get_params_norm())
89 | self.param_norms.append(max(param_norms))
90 |
91 | def apply_channel_effect(self, sigma=1, power_control=2500):
92 | num_of_selected_users = len(self.selected_users)
93 | users_norms = []
94 | for user in self.selected_users:
95 | users_norms.append(user.get_params_norm())
96 | alpha_t = power_control / max(users_norms) ** 2
97 | for param in self.model.parameters():
98 | param.data = param.data + sigma / (alpha_t ** 0.5 * num_of_selected_users * self.communication_thresh)\
99 | * torch.randn(param.data.size())
100 |
--------------------------------------------------------------------------------
/flearn/servers/server_base.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import os
3 | import h5py
4 |
5 | import numpy as np
6 | from utils.model_utils import Metrics
7 | import copy
8 | from scipy.stats import rayleigh
9 |
10 |
11 | class Server:
12 | def __init__(self, dataset, algorithm, model, batch_size, learning_rate, L,
13 | num_glob_iters, local_epochs, users_per_round, similarity, noise, times):
14 |
15 | # Set up the main attributes
16 | self.dataset = dataset
17 | self.num_glob_iters = num_glob_iters
18 | self.local_epochs = local_epochs
19 | self.batch_size = batch_size
20 | self.learning_rate = learning_rate
21 | self.total_train_samples = 0
22 | self.model = copy.deepcopy(model)
23 | self.users = []
24 | self.selected_users = []
25 | self.users_per_round = users_per_round
26 | self.L = L
27 | self.algorithm = algorithm
28 | self.rs_train_acc, self.rs_train_loss, self.rs_glob_acc = [], [], []
29 |
30 | self.times = times
31 | self.similarity = similarity
32 | self.noise = noise
33 | self.communication_thresh = None
34 | self.param_norms = []
35 | self.control_norms = None
36 |
37 | def aggregate_grads(self):
38 | assert (self.users is not None and len(self.users) > 0)
39 | for param in self.model.parameters():
40 | param.grad = torch.zeros_like(param.data)
41 | for user in self.users:
42 | self.add_grad(user, user.train_samples / self.total_train_samples)
43 |
44 | def send_parameters(self):
45 | assert (self.users is not None and len(self.users) > 0)
46 | for user in self.users:
47 | user.set_parameters(self.model)
48 |
49 | def save_model(self):
50 | model_path = os.path.join("models", self.dataset)
51 | if not os.path.exists(model_path):
52 | os.makedirs(model_path)
53 | torch.save(self.model, os.path.join(model_path, "server" + ".pt"))
54 |
55 | def load_model(self):
56 | model_path = os.path.join("models", self.dataset, "server" + ".pt")
57 | assert (os.path.exists(model_path))
58 | self.model = torch.load(model_path)
59 |
60 | def model_exists(self):
61 | return os.path.exists(os.path.join("models", self.dataset, "server" + ".pt"))
62 |
63 | def select_users(self, round, users_per_round):
64 | if users_per_round in [len(self.users), 0]:
65 | return self.users
66 |
67 | users_per_round = min(users_per_round, len(self.users))
68 | # fix the list of user consistent
69 | np.random.seed(round * (self.times + 1))
70 | return np.random.choice(self.users, users_per_round, replace=False) # , p=pk)
71 |
72 | def select_transmitting_users(self):
73 | transmitting_users = []
74 | for user in self.users:
75 | user.csi = rayleigh.rvs()
76 | if user.csi >= self.communication_thresh:
77 | transmitting_users.append(user)
78 | return transmitting_users
79 |
80 | def save_results(self):
81 | """ Save loss, accuracy to h5 file"""
82 | file_name = "./results/" + self.dataset + "_" + self.algorithm
83 | file_name += "_" + str(self.similarity) + "s"
84 | if self.noise:
85 | file_name += '_noisy'
86 | file_name += "_" + str(self.times) + ".h5"
87 | if len(self.rs_glob_acc) != 0 & len(self.rs_train_acc) & len(self.rs_train_loss):
88 | with h5py.File(file_name, 'w') as hf:
89 | hf.create_dataset('rs_glob_acc', data=self.rs_glob_acc)
90 | hf.create_dataset('rs_train_acc', data=self.rs_train_acc)
91 | hf.create_dataset('rs_train_loss', data=self.rs_train_loss)
92 |
93 | def save_norms(self):
94 | """ Save norms, to h5 file"""
95 | file_name = "./results/" + self.dataset + "_" + self.algorithm + '_norms'
96 | file_name += "_" + str(self.similarity) + "s"
97 | if self.noise:
98 | file_name += '_noisy'
99 | file_name += "_" + str(self.times) + ".h5"
100 |
101 | if len(self.param_norms):
102 | with h5py.File(file_name, 'w') as hf:
103 | hf.create_dataset('rs_param_norms', data=self.param_norms)
104 | if self.algorithm == 'SCAFFOLD':
105 | hf.create_dataset('rs_control_norms', data=self.control_norms)
106 |
107 | def test(self):
108 | '''tests self.latest_model on given clients
109 | '''
110 | num_samples = []
111 | tot_correct = []
112 | losses = []
113 | for c in self.users:
114 | ct, ns = c.test()
115 | tot_correct.append(ct * 1.0)
116 | num_samples.append(ns)
117 | ids = [c.user_id for c in self.users]
118 |
119 | return ids, num_samples, tot_correct
120 |
121 | def train_error_and_loss(self):
122 | num_samples = []
123 | tot_correct = []
124 | losses = []
125 | for c in self.users:
126 | ct, cl, ns = c.train_error_and_loss()
127 | tot_correct.append(ct * 1.0)
128 | num_samples.append(ns)
129 | losses.append(cl * 1.0)
130 |
131 | ids = [c.user_id for c in self.users]
132 | # groups = [c.group for c in self.clients]
133 |
134 | return ids, num_samples, tot_correct, losses
135 |
136 | def evaluate(self):
137 | stats = self.test()
138 | stats_train = self.train_error_and_loss()
139 | glob_acc = np.sum(stats[2]) * 1.0 / np.sum(stats[1])
140 | train_acc = np.sum(stats_train[2]) * 1.0 / np.sum(stats_train[1])
141 | # train_loss = np.dot(stats_train[3], stats_train[1])*1.0/np.sum(stats_train[1])
142 | train_loss = sum([x * y for (x, y) in zip(stats_train[1], stats_train[3])]).item() / np.sum(stats_train[1])
143 | self.rs_glob_acc.append(glob_acc)
144 | self.rs_train_acc.append(train_acc)
145 | self.rs_train_loss.append(train_loss)
146 | # print("stats_train[1]",stats_train[3][0])
147 | print("Average Global Accurancy: ", glob_acc)
148 | print("Average Global Trainning Accurancy: ", train_acc)
149 | print("Average Global Trainning Loss: ", train_loss)
150 |
--------------------------------------------------------------------------------
/flearn/servers/server_scaffold.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import os
3 |
4 | import h5py
5 | from flearn.users.user_scaffold import UserSCAFFOLD
6 | from flearn.servers.server_base import Server
7 | from utils.model_utils import read_data, read_user_data
8 | import numpy as np
9 | from scipy.stats import rayleigh
10 |
11 |
12 | # Implementation for SCAFFOLD Server
13 | class SCAFFOLD(Server):
14 | def __init__(self, dataset, algorithm, model, batch_size, learning_rate, L, num_glob_iters,
15 | local_epochs, users_per_round, similarity, noise, times):
16 | super().__init__(dataset, algorithm, model[0], batch_size, learning_rate, L,
17 | num_glob_iters, local_epochs, users_per_round, similarity, noise, times)
18 | self.control_norms = []
19 |
20 | # Initialize data for all users
21 | data = read_data(dataset)
22 | total_users = len(data[0])
23 | for i in range(total_users):
24 | id, train, test = read_user_data(i, data, dataset)
25 | user = UserSCAFFOLD(id, train, test, model, batch_size, learning_rate, L, local_epochs)
26 | self.users.append(user)
27 | self.total_train_samples += user.train_samples
28 |
29 | if self.noise:
30 | self.communication_thresh = rayleigh.ppf(1 - users_per_round / total_users) # h_min
31 |
32 | print("Number of users / total users:", users_per_round, " / ", total_users)
33 |
34 | self.server_controls = [torch.zeros_like(p.data) for p in self.model.parameters() if p.requires_grad]
35 |
36 | print("Finished creating SCAFFOLD server.")
37 |
38 | def train(self):
39 | loss = []
40 | for glob_iter in range(self.num_glob_iters):
41 | print("-------------Round number: ", glob_iter, " -------------")
42 | # loss_ = 0
43 |
44 | self.send_parameters()
45 |
46 | # Evaluate model at each iteration
47 | self.evaluate()
48 |
49 | if self.noise:
50 | self.selected_users = self.select_transmitting_users()
51 | print(f"Transmitting {len(self.selected_users)} users")
52 | else:
53 | self.selected_users = self.select_users(glob_iter, self.users_per_round)
54 |
55 | for user in self.selected_users:
56 | user.train()
57 | user.drop_lr()
58 |
59 | self.aggregate_parameters()
60 | self.get_max_norm()
61 |
62 | if self.noise:
63 | self.apply_channel_effect()
64 |
65 | self.save_results()
66 | self.save_norms()
67 | self.save_model()
68 |
69 | def send_parameters(self):
70 | assert (self.users is not None and len(self.users) > 0)
71 | for user in self.users:
72 | user.set_parameters(self.model)
73 | for control, new_control in zip(user.server_controls, self.server_controls):
74 | control.data = new_control.data
75 |
76 | def aggregate_parameters(self):
77 | assert (self.users is not None and len(self.users) > 0)
78 | total_samples = 0
79 | for user in self.selected_users:
80 | total_samples += user.train_samples
81 | for user in self.selected_users:
82 | self.add_parameters(user, total_samples)
83 |
84 | def add_parameters(self, user, total_samples):
85 | num_of_selected_users = len(self.selected_users)
86 | num_of_users = len(self.users)
87 | num_of_samples = user.train_samples
88 | for param, control, del_control, del_model in zip(self.model.parameters(), self.server_controls,
89 | user.delta_controls, user.delta_model):
90 | # param.data = param.data + del_model.data * num_of_samples / total_samples / num_of_selected_users
91 | param.data = param.data + del_model.data / num_of_selected_users
92 | control.data = control.data + del_control.data / num_of_users
93 |
94 | def get_max_norm(self):
95 | param_norms = []
96 | control_norms = []
97 | for user in self.selected_users:
98 | param_norm, control_norm = user.get_params_norm()
99 | param_norms.append(param_norm)
100 | control_norms.append(control_norm)
101 | self.param_norms.append(max(param_norms))
102 | self.control_norms.append((max(control_norms)))
103 |
104 | def apply_channel_effect(self, sigma=1, power_control=2500):
105 | num_of_selected_users = len(self.selected_users)
106 | alpha_t_params = power_control / self.param_norms[-1] ** 2
107 | alpha_t_controls = 4e4 * power_control / self.control_norms[-1] ** 2
108 | for param, control in zip(self.model.parameters(), self.server_controls):
109 | param.data = param.data + sigma / (
110 | alpha_t_params ** 0.5 * num_of_selected_users * self.communication_thresh) * torch.randn(
111 | param.data.size())
112 | control.data = control.data + sigma / (
113 | alpha_t_controls ** 0.5 * num_of_selected_users * self.communication_thresh) * torch.randn(
114 | control.data.size())
115 |
--------------------------------------------------------------------------------
/flearn/trainmodel/__pycache__/models.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/flearn/trainmodel/__pycache__/models.cpython-38.pyc
--------------------------------------------------------------------------------
/flearn/trainmodel/models.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class Net(nn.Module):
7 | def __init__(self):
8 | super(Net, self).__init__()
9 | self.conv1 = nn.Conv2d(1, 16, 2, 1)
10 | self.conv2 = nn.Conv2d(16, 32, 2, 1)
11 | self.dropout1 = nn.Dropout(0.25)
12 | self.dropout2 = nn.Dropout(0.5)
13 | self.fc1 = nn.Linear(18432, 128)
14 | self.fc2 = nn.Linear(128, 10)
15 |
16 | def forward(self, x):
17 | x = self.conv1(x)
18 | x = nn.ReLU()(x)
19 | x = nn.MaxPool2d(2, 1)(x)
20 | x = self.dropout1(x)
21 | x = self.conv2(x)
22 | x = nn.ReLU()(x)
23 | x = nn.MaxPool2d(2, 1)(x)
24 | x = self.dropout2(x)
25 | x = torch.flatten(x, 1)
26 | x = self.fc1(x)
27 | x = nn.ReLU()(x)
28 | x = self.fc2(x)
29 | output = F.log_softmax(x, dim=1)
30 | return output
31 |
32 |
33 | class MclrLogistic(nn.Module):
34 | def __init__(self, input_dim=784, output_dim=10):
35 | super(MclrLogistic, self).__init__()
36 | self.fc1 = nn.Linear(input_dim, output_dim)
37 |
38 | def forward(self, x):
39 | x = torch.flatten(x, 1)
40 | x = self.fc1(x)
41 | output = F.log_softmax(x, dim=1)
42 | return output
43 |
44 |
45 | class MclrCrossEntropy(nn.Module):
46 | def __init__(self, input_dim=784, output_dim=10):
47 | super(MclrCrossEntropy, self).__init__()
48 | self.linear = torch.nn.Linear(input_dim, output_dim)
49 |
50 | def forward(self, x):
51 | x = torch.flatten(x, 1)
52 | outputs = self.linear(x)
53 | return outputs
54 |
55 |
56 | class DNN(nn.Module):
57 | def __init__(self, input_dim=784, mid_dim=100, output_dim=10):
58 | super(DNN, self).__init__()
59 | # define network layers
60 | self.fc1 = nn.Linear(input_dim, mid_dim)
61 | self.fc2 = nn.Linear(mid_dim, output_dim)
62 |
63 | def forward(self, x):
64 | # define forward pass
65 | x = torch.flatten(x, 1)
66 | x = F.relu(self.fc1(x))
67 | x = self.fc2(x)
68 | x = F.log_softmax(x, dim=1)
69 | return x
70 |
71 |
72 | class LinearRegression(nn.Module):
73 | def __init__(self, input_dim=60, output_dim=1):
74 | super(LinearRegression, self).__init__()
75 | self.linear = torch.nn.Linear(input_dim, output_dim)
76 |
77 | def forward(self, x):
78 | x = torch.flatten(x, 1)
79 | outputs = self.linear(x)
80 | return outputs
81 |
82 |
83 | class CifarNet(nn.Module):
84 | def __init__(self, categories=10):
85 | super(CifarNet, self).__init__()
86 | self.conv1 = nn.Conv2d(3, 32, 5, 1, 2) # doubled bias learning rate
87 | self.conv2 = nn.Conv2d(32, 32, 5, 1, 2) # doubled bias learning rate
88 | self.conv3 = nn.Conv2d(32, 64, 5, 1, 2) # doubled bias learning rate
89 | self.fc1 = nn.Linear(576, 64)
90 | self.fc2 = nn.Linear(64, categories)
91 |
92 | def forward(self, x):
93 | x = torch.reshape(x, (-1, 3, 32, 32))
94 | x = self.conv1(x)
95 | x = nn.MaxPool2d(3, 2)(x)
96 | x = nn.ReLU()(x)
97 | x = self.conv2(x)
98 | x = nn.ReLU()(x)
99 | x = nn.AvgPool2d(3, 2)(x)
100 | x = self.conv3(x)
101 | x = nn.ReLU()(x)
102 | x = nn.AvgPool2d(3, 2)(x)
103 | x = torch.flatten(x, 1)
104 | x = self.fc1(x)
105 | x = nn.ReLU()(x)
106 | x = self.fc2(x)
107 | output = F.softmax(x, dim=1)
108 | return output
--------------------------------------------------------------------------------
/flearn/users/__pycache__/useravg.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/flearn/users/__pycache__/useravg.cpython-38.pyc
--------------------------------------------------------------------------------
/flearn/users/__pycache__/userbase.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/flearn/users/__pycache__/userbase.cpython-38.pyc
--------------------------------------------------------------------------------
/flearn/users/__pycache__/userfedl.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/flearn/users/__pycache__/userfedl.cpython-38.pyc
--------------------------------------------------------------------------------
/flearn/users/__pycache__/userscaffold.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/flearn/users/__pycache__/userscaffold.cpython-38.pyc
--------------------------------------------------------------------------------
/flearn/users/user_avg.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import os
5 | import json
6 | from torch.utils.data import DataLoader
7 | from flearn.users.user_base import User
8 | from flearn.optimizers.fedoptimizer import *
9 | from torch.optim.lr_scheduler import StepLR
10 |
11 |
12 | # Implementation for FedAvg clients
13 |
14 | class UserAVG(User):
15 | def __init__(self, numeric_id, train_data, test_data, model, batch_size, learning_rate, L, local_epochs):
16 | super().__init__(numeric_id, train_data, test_data, model[0], batch_size, learning_rate, L, local_epochs)
17 |
18 | if model[1] == "linear":
19 | self.loss = nn.MSELoss()
20 | elif model[1] == "cnn":
21 | self.loss = nn.CrossEntropyLoss()
22 | else:
23 | self.loss = nn.NLLLoss()
24 |
25 | if model[1] == "cnn":
26 | layers = [self.model.conv1, self.model.conv2, self.model.conv3, self.model.fc1, self.model.fc2]
27 | self.optimizer = torch.optim.SGD([{'params': layer.weight} for layer in layers] +
28 | [{'params': layer.bias, 'lr': 2 * self.learning_rate} for layer in layers],
29 | lr=self.learning_rate, weight_decay=L)
30 | self.scheduler = StepLR(self.optimizer, step_size=8, gamma=0.1)
31 | self.lr_drop_rate = 0.95
32 | else:
33 | self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate)
34 |
35 | self.csi = None
36 |
37 | def set_grads(self, new_grads):
38 | if isinstance(new_grads, nn.Parameter):
39 | for model_grad, new_grad in zip(self.model.parameters(), new_grads):
40 | model_grad.data = new_grad.data
41 | elif isinstance(new_grads, list):
42 | for idx, model_grad in enumerate(self.model.parameters()):
43 | model_grad.data = new_grads[idx]
44 |
45 | def train(self):
46 | self.model.train()
47 | for epoch in range(1, self.local_epochs + 1):
48 | self.model.train()
49 | for batch_idx, (X, y) in enumerate(self.trainloader):
50 | self.optimizer.zero_grad()
51 | output = self.model(X)
52 | loss = self.loss(output, y)
53 | loss.backward()
54 | self.optimizer.step()
55 | if self.scheduler:
56 | self.scheduler.step()
57 |
58 | # get model difference
59 | for local, server, delta in zip(self.model.parameters(), self.server_model, self.delta_model):
60 | delta.data = local.data.detach() - server.data.detach()
61 |
62 | return loss
63 |
64 | def get_params_norm(self):
65 | params = []
66 | for delta in self.delta_model:
67 | params.append(torch.flatten(delta.data))
68 | # return torch.linalg.norm(torch.cat(params), 2)
69 | return float(torch.norm(torch.cat(params)))
70 |
--------------------------------------------------------------------------------
/flearn/users/user_base.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import os
5 | import json
6 | from torch.utils.data import DataLoader
7 | import numpy as np
8 | import copy
9 |
10 |
11 | class User:
12 | """
13 | Base class for users in federated learning.
14 | """
15 | def __init__(self, user_id, train_data, test_data, model, batch_size, learning_rate, L, local_epochs):
16 | self.dataset = None
17 | self.optimizer = None
18 | self.model = copy.deepcopy(model)
19 | self.user_id = user_id # integer
20 | self.train_samples = len(train_data)
21 | self.test_samples = len(test_data)
22 | if(batch_size == 0):
23 | self.batch_size = len(train_data)
24 | else:
25 | self.batch_size = batch_size
26 | self.learning_rate = learning_rate
27 | self.L = L
28 | self.local_epochs = local_epochs
29 | self.scheduler = None
30 | self.lr_drop_rate = 1
31 | self.trainloader = DataLoader(train_data, self.batch_size)
32 | self.testloader = DataLoader(test_data, self.batch_size)
33 | self.testloaderfull = DataLoader(test_data, self.test_samples)
34 | self.trainloaderfull = DataLoader(train_data, self.train_samples)
35 | self.iter_trainloader = iter(self.trainloader)
36 | self.iter_testloader = iter(self.testloader)
37 |
38 | self.delta_model = [torch.zeros_like(p.data) for p in self.model.parameters() if p.requires_grad]
39 | self.server_model = [torch.zeros_like(p.data) for p in self.model.parameters() if p.requires_grad]
40 |
41 | # those parameters are for FEDL.
42 | self.local_model = copy.deepcopy(list(self.model.parameters()))
43 | self.server_grad = copy.deepcopy(list(self.model.parameters()))
44 | self.pre_local_grad = copy.deepcopy(list(self.model.parameters()))
45 |
46 | def set_parameters(self, server_model):
47 | for old_param, new_param, local_param, server_param in zip(self.model.parameters(), server_model.parameters(), self.local_model, self.server_model):
48 | old_param.data = new_param.data.clone()
49 | local_param.data = new_param.data.clone()
50 | server_param.data = new_param.data.clone()
51 | if(new_param.grad != None):
52 | if(old_param.grad == None):
53 | old_param.grad = torch.zeros_like(new_param.grad)
54 |
55 | if(local_param.grad == None):
56 | local_param.grad = torch.zeros_like(new_param.grad)
57 |
58 | old_param.grad.data = new_param.grad.data.clone()
59 | local_param.grad.data = new_param.grad.data.clone()
60 | #self.local_weight_updated = copy.deepcopy(self.optimizer.param_groups[0]['params'])
61 |
62 | def get_parameters(self):
63 | for param in self.model.parameters():
64 | param.detach()
65 | return self.model.parameters()
66 |
67 | def clone_model_paramenter(self, param, clone_param):
68 | for param, clone_param in zip(param, clone_param):
69 | clone_param.data = param.data.clone()
70 | if(param.grad != None):
71 | if(clone_param.grad == None):
72 | clone_param.grad = torch.zeros_like(param.grad)
73 | clone_param.grad.data = param.grad.data.clone()
74 |
75 | return clone_param
76 |
77 | def get_updated_parameters(self):
78 | return self.local_weight_updated
79 |
80 | def update_parameters(self, new_params):
81 | for param, new_param in zip(self.model.parameters(), new_params):
82 | param.data = new_param.data.clone()
83 | param.grad.data = new_param.grad.data.clone()
84 |
85 | def get_grads(self, grads):
86 | self.optimizer.zero_grad()
87 |
88 | for x, y in self.trainloaderfull:
89 | output = self.model(x)
90 | loss = self.loss(output, y)
91 | loss.backward()
92 | self.clone_model_paramenter(self.model.parameters(), grads)
93 | #for param, grad in zip(self.model.parameters(), grads):
94 | # if(grad.grad == None):
95 | # grad.grad = torch.zeros_like(param.grad)
96 | # grad.grad.data = param.grad.data.clone()
97 | return grads
98 |
99 | def test(self):
100 | self.model.eval()
101 | test_acc = 0
102 | for x, y in self.testloaderfull:
103 | output = self.model(x)
104 | test_acc += (torch.sum(torch.argmax(output, dim=1) == y)).item()
105 | #@loss += self.loss(output, y)
106 | #print(self.user_id + ", Test Accuracy:", test_acc / y.shape[0] )
107 | #print(self.user_id + ", Test Loss:", loss)
108 | return test_acc, y.shape[0]
109 |
110 | def train_error_and_loss(self):
111 | self.model.eval()
112 | train_acc = 0
113 | loss = 0
114 | for x, y in self.trainloaderfull:
115 | output = self.model(x)
116 | train_acc += (torch.sum(torch.argmax(output, dim=1) == y)).item()
117 | loss += self.loss(output, y)
118 | #print(self.user_id + ", Train Accuracy:", train_acc)
119 | #print(self.user_id + ", Train Loss:", loss)
120 | return train_acc, loss , self.train_samples
121 |
122 |
123 | def get_next_train_batch(self):
124 | try:
125 | # Samples a new batch for persionalizing
126 | (X, y) = next(self.iter_trainloader)
127 | except StopIteration:
128 | # restart the generator if the previous generator is exhausted.
129 | self.iter_trainloader = iter(self.trainloader)
130 | (X, y) = next(self.iter_trainloader)
131 | return (X, y)
132 |
133 | def get_next_test_batch(self):
134 | try:
135 | # Samples a new batch for persionalizing
136 | (X, y) = next(self.iter_testloader)
137 | except StopIteration:
138 | # restart the generator if the previous generator is exhausted.
139 | self.iter_testloader = iter(self.testloader)
140 | (X, y) = next(self.iter_testloader)
141 | return (X, y)
142 |
143 | def save_model(self):
144 | model_path = os.path.join("models", self.dataset)
145 | if not os.path.exists(model_path):
146 | os.makedirs(model_path)
147 | torch.save(self.model, os.path.join(model_path, "user_" + self.user_id + ".pt"))
148 |
149 | def load_model(self):
150 | model_path = os.path.join("models", self.dataset)
151 | self.model = torch.load(os.path.join(model_path, "server" + ".pt"))
152 |
153 | @staticmethod
154 | def model_exists():
155 | return os.path.exists(os.path.join("models", "server" + ".pt"))
156 |
157 | def drop_lr(self):
158 | for group in self.optimizer.param_groups:
159 | group['lr'] *= self.lr_drop_rate
160 | if self.scheduler:
161 | group['initial_lr'] *= self.lr_drop_rate
162 |
--------------------------------------------------------------------------------
/flearn/users/user_scaffold.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import os
5 | import json
6 | from torch.utils.data import DataLoader
7 | from flearn.users.user_base import User
8 | from flearn.optimizers.fedoptimizer import *
9 | import math
10 | from torch.optim.lr_scheduler import StepLR
11 |
12 |
13 | # Implementation for SCAFFOLD clients
14 |
15 | class UserSCAFFOLD(User):
16 | def __init__(self, numeric_id, train_data, test_data, model, batch_size, learning_rate, L, local_epochs):
17 | super().__init__(numeric_id, train_data, test_data, model[0], batch_size, learning_rate, L, local_epochs)
18 |
19 | if model[1] == "linear":
20 | self.loss = nn.MSELoss()
21 | elif model[1] == "cnn":
22 | self.loss = nn.CrossEntropyLoss()
23 | else:
24 | self.loss = nn.NLLLoss()
25 |
26 | if model[1] == "cnn":
27 | layers = [self.model.conv1, self.model.conv2, self.model.conv3, self.model.fc1, self.model.fc2]
28 | weights = [{'params': layer.weight} for layer in layers]
29 | biases = [{'params': layer.bias, 'lr': 2 * self.learning_rate} for layer in layers]
30 | param_groups = [None] * (len(weights) + len(biases))
31 | param_groups[::2] = weights
32 | param_groups[1::2] = biases
33 | self.optimizer = SCAFFOLDOptimizer(param_groups, lr=self.learning_rate, weight_decay=L)
34 | # self.optimizer = SCAFFOLDOptimizer([{'params': layer.weight} for layer in layers] +
35 | # [{'params': layer.bias, 'lr': 2 * self.learning_rate} for layer in
36 | # layers],
37 | # lr=self.learning_rate, weight_decay=L)
38 |
39 | self.scheduler = StepLR(self.optimizer, step_size=8, gamma=0.1)
40 | self.lr_drop_rate = 0.95
41 | else:
42 | self.optimizer = SCAFFOLDOptimizer(self.model.parameters(), lr=self.learning_rate, weight_decay=L)
43 |
44 | self.controls = [torch.zeros_like(p.data) for p in self.model.parameters() if p.requires_grad]
45 | self.server_controls = [torch.zeros_like(p.data) for p in self.model.parameters() if p.requires_grad]
46 | self.delta_controls = [torch.zeros_like(p.data) for p in self.model.parameters() if p.requires_grad]
47 | self.csi = None
48 |
49 | def set_grads(self, new_grads):
50 | if isinstance(new_grads, nn.Parameter):
51 | for model_grad, new_grad in zip(self.model.parameters(), new_grads):
52 | model_grad.data = new_grad.data
53 | elif isinstance(new_grads, list):
54 | for idx, model_grad in enumerate(self.model.parameters()):
55 | model_grad.data = new_grads[idx]
56 |
57 | def train(self):
58 | self.model.train()
59 | grads = [torch.zeros_like(p.data) for p in self.model.parameters() if p.requires_grad]
60 | self.get_grads(grads)
61 | for epoch in range(1, self.local_epochs + 1):
62 | self.model.train()
63 | for batch_idx, (X, y) in enumerate(self.trainloader):
64 | self.optimizer.zero_grad()
65 | output = self.model(X)
66 | loss = self.loss(output, y)
67 | loss.backward()
68 | self.optimizer.step(self.server_controls, self.controls)
69 | if self.scheduler:
70 | self.scheduler.step()
71 |
72 | # get model difference
73 | for local, server, delta in zip(self.model.parameters(), self.server_model, self.delta_model):
74 | delta.data = local.data.detach() - server.data.detach()
75 |
76 | # get client new controls
77 | new_controls = [torch.zeros_like(p.data) for p in self.model.parameters() if p.requires_grad]
78 | opt = 2
79 | if opt == 1:
80 | for new_control, grad in zip(new_controls, grads):
81 | new_control.data = grad.grad
82 | if opt == 2:
83 | for server_control, control, new_control, delta in zip(self.server_controls, self.controls, new_controls,
84 | self.delta_model):
85 | a = 1 / (math.ceil(self.train_samples / self.batch_size) * self.learning_rate)
86 | new_control.data = control.data - server_control.data - delta.data * a
87 |
88 | # get controls differences
89 | for control, new_control, delta in zip(self.controls, new_controls, self.delta_controls):
90 | delta.data = new_control.data - control.data
91 | control.data = new_control.data
92 |
93 | return loss
94 |
95 | def get_params_norm(self):
96 | params = []
97 | controls = []
98 |
99 | for delta in self.delta_model:
100 | params.append(torch.flatten(delta.data))
101 |
102 | for delta in self.delta_controls:
103 | controls.append(torch.flatten(delta.data))
104 |
105 | # return torch.linalg.norm(torch.cat(params), 2)
106 | return float(torch.norm(torch.cat(params))), float(torch.norm(torch.cat(controls)))
107 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | from utils.plot_utils import *
2 | from simulate import simulate
3 | from data.Femnist.data_generator import generate_data as generate_femnist_data
4 | from data.CIFAR.data_generator import generate_data as generate_cifar10_data
5 |
6 |
7 | def generate_data(dataset, similarity):
8 | if dataset == 'CIFAR':
9 | generate_cifar10_data(similarity)
10 | elif dataset == 'Femnist':
11 | generate_femnist_data(similarity)
12 |
13 |
14 | cifar_dict = {"model": "cnn",
15 | "batch_size": 60,
16 | "learning_rate": 0.008,
17 | "local_epochs": 1,
18 | "L": 0.04,
19 | "users_per_round": 8}
20 |
21 | femnist_dict = {"model": "mclr",
22 | "batch_size": 4,
23 | "learning_rate": 0.001,
24 | "local_epochs": 1,
25 | "L": 0,
26 | "users_per_round": 20}
27 |
28 | input_dict = {}
29 |
30 | dataset = 'Femnist'
31 | if dataset == 'CIFAR':
32 | input_dict = cifar_dict
33 | elif dataset == 'Femnist':
34 | input_dict = femnist_dict
35 |
36 | num_glob_iters = 300
37 | times = 15
38 | algorithms = ["SCAFFOLD", "FedAvg"]
39 | noises = [True, False]
40 | similarities = [1, 0.1, 0]
41 |
42 |
43 | # for similarity in similarities:
44 | # generate_data(dataset, similarity)
45 | # for noise in noises:
46 | # for algorithm in algorithms:
47 | # simulate(**input_dict, dataset=dataset, algorithm=algorithm, similarity=similarity, noise=noise,
48 | # num_glob_iters=num_glob_iters, times=times)
49 |
50 | plot_accuracy(dataset, algorithms, noises, similarities, num_glob_iters)
51 | plot_norms(dataset, algorithms, noises, similarities, num_glob_iters)
52 |
53 | # plot_dict = get_plot_dict(input_dict, algorithms, epochs)
54 | # plot_norms(**plot_dict)
55 |
56 |
57 |
58 |
--------------------------------------------------------------------------------
/models/Femnist/server.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/models/Femnist/server.pt
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | Pillow
4 | torch
5 | torchvision
6 | matplotlib
7 | tqdm
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_0.1s_0.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0.1s_0.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_0.1s_1.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0.1s_1.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_0.1s_2.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0.1s_2.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_0.1s_3.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0.1s_3.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_0.1s_4.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0.1s_4.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_0.1s_5.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0.1s_5.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_0.1s_6.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0.1s_6.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_0.1s_7.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0.1s_7.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_0.1s_8.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0.1s_8.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_0.1s_9.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0.1s_9.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_0.1s_avg.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0.1s_avg.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_0s_0.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0s_0.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_0s_1.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0s_1.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_0s_2.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0s_2.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_0s_3.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0s_3.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_0s_4.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0s_4.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_0s_5.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0s_5.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_0s_6.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0s_6.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_0s_7.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0s_7.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_0s_8.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0s_8.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_0s_9.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0s_9.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_0s_avg.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_0s_avg.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_1s_0.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_1s_0.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_1s_1.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_1s_1.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_1s_2.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_1s_2.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_1s_3.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_1s_3.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_1s_4.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_1s_4.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_1s_5.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_1s_5.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_1s_6.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_1s_6.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_1s_7.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_1s_7.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_1s_8.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_1s_8.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_1s_9.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_1s_9.h5
--------------------------------------------------------------------------------
/results/Femnist_FedAvg_1s_avg.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_FedAvg_1s_avg.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_0.1s_0.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0.1s_0.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_0.1s_1.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0.1s_1.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_0.1s_2.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0.1s_2.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_0.1s_3.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0.1s_3.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_0.1s_4.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0.1s_4.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_0.1s_5.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0.1s_5.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_0.1s_6.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0.1s_6.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_0.1s_7.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0.1s_7.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_0.1s_8.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0.1s_8.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_0.1s_9.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0.1s_9.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_0.1s_avg.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0.1s_avg.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_0s_0.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0s_0.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_0s_1.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0s_1.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_0s_2.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0s_2.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_0s_3.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0s_3.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_0s_4.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0s_4.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_0s_5.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0s_5.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_0s_6.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0s_6.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_0s_7.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0s_7.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_0s_8.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0s_8.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_0s_9.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0s_9.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_0s_avg.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_0s_avg.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_1s_0.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_1s_0.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_1s_1.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_1s_1.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_1s_2.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_1s_2.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_1s_3.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_1s_3.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_1s_4.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_1s_4.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_1s_5.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_1s_5.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_1s_6.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_1s_6.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_1s_7.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_1s_7.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_1s_8.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_1s_8.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_1s_9.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_1s_9.h5
--------------------------------------------------------------------------------
/results/Femnist_SCAFFOLD_1s_avg.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/results/Femnist_SCAFFOLD_1s_avg.h5
--------------------------------------------------------------------------------
/simulate.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import h5py
3 | import matplotlib.pyplot as plt
4 | import numpy as np
5 | import argparse
6 | import importlib
7 | import random
8 | import os
9 | from flearn.servers.server_avg import FedAvg
10 | from flearn.servers.server_scaffold import SCAFFOLD
11 | from flearn.trainmodel.models import *
12 | from utils.plot_utils import *
13 | import torch
14 |
15 | torch.manual_seed(0)
16 |
17 |
18 | def simulate(dataset, algorithm, model, batch_size, learning_rate, L, num_glob_iters, local_epochs, users_per_round,
19 | similarity, noise, times):
20 | print("=" * 80)
21 | print("Summary of training process:")
22 | print(f"Algorithm: {algorithm}")
23 | print(f"Batch size : {batch_size}")
24 | print(f"Learing rate : {learning_rate}")
25 | print(f"Subset of users : {users_per_round if users_per_round else 'all users'}")
26 | print(f"Number of local rounds : {local_epochs}")
27 | print(f"Number of global rounds : {num_glob_iters}")
28 | print(f"Dataset : {dataset}")
29 | print(f"Data Similarity : {similarity}")
30 | print(f"Local Model : {model}")
31 | print("=" * 80)
32 |
33 | for i in range(times):
34 | print("---------------Running time:------------", i)
35 |
36 | # Generate model
37 | if model == "mclr": # for Mnist and Femnist datasets
38 | model = MclrLogistic(output_dim=47), model
39 |
40 | if model == "linear": # For Linear dataset
41 | model = LinearRegression(40, 1), model
42 |
43 | if model == "dnn": # for Mnist and Femnist datasets
44 | model = DNN(), model
45 |
46 | if model == "cnn": # for Cifar-10 dataset
47 | model = CifarNet(), model
48 |
49 | # select algorithm
50 | if algorithm == "FedAvg":
51 | server = FedAvg(dataset, algorithm, model, batch_size, learning_rate, L, num_glob_iters, local_epochs,
52 | users_per_round, similarity, noise, i)
53 |
54 | if algorithm == "SCAFFOLD":
55 | server = SCAFFOLD(dataset, algorithm, model, batch_size, learning_rate, L, num_glob_iters, local_epochs,
56 | users_per_round, similarity, noise, i)
57 | server.train()
58 | server.test()
59 |
60 | # Average data
61 | average_data(num_glob_iters=num_glob_iters, algorithm=algorithm, dataset=dataset, similarity=similarity,
62 | noise=noise, times=times)
63 | average_norms(num_glob_iters=num_glob_iters, algorithm=algorithm, dataset=dataset, similarity=similarity,
64 | noise=noise, times=times)
65 |
66 | if __name__ == "__main__":
67 | parser = argparse.ArgumentParser()
68 | parser.add_argument("--dataset", type=str, default="CIFAR-10",
69 | choices=["CIFAR-10", "Mnist", "Linear_synthetic", "Logistic_synthetic"])
70 | parser.add_argument("--similarity", type=int, default=1)
71 | parser.add_argument("--model", type=str, default="CIFAR-10", choices=["linear", "mclr", "dnn", "CIFAR-10"])
72 | parser.add_argument("--batch_size", type=int, default=60)
73 | parser.add_argument("--learning_rate", type=float, default=0.008, help="Local learning rate")
74 | parser.add_argument("--hyper_learning_rate", type=float, default=0.02, help=" Learning rate of FEDL")
75 | parser.add_argument("--L", type=int, default=0.004, help="Regularization term")
76 | parser.add_argument("--num_glob_iters", type=int, default=250)
77 | parser.add_argument("--local_epochs", type=int, default=1)
78 | parser.add_argument("--algorithm", type=str, default="FedAvg", choices=["FEDL", "FedAvg", "SCAFFOLD"])
79 | parser.add_argument("--clients_per_round", type=int, default=0, help="Number of Users per round")
80 | parser.add_argument("--rho", type=float, default=0, help="Condition Number")
81 | parser.add_argument("--noise", type=float, default=False, help="Applies noisy channel effect")
82 | parser.add_argument("--pre-coding", type=float, default=False, help="Applies pre-coding")
83 | parser.add_argument("--times", type=int, default=1, help="Running time")
84 | args = parser.parse_args()
85 |
86 | simulate(dataset=args.dataset, algorithm=args.algorithm, model=args.model,
87 | batch_size=args.batch_size, learning_rate=args.learning_rate,
88 | hyper_learning_rate=args.hyper_learning_rate, L=args.L, num_glob_iters=args.num_glob_iters,
89 | local_epochs=args.local_epochs, users_per_round=args.clients_per_round,
90 | rho=args.rho, similarity=args.similarity, noise=args.noise, times=args.times)
91 |
--------------------------------------------------------------------------------
/utils/__pycache__/model_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/utils/__pycache__/model_utils.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/plot_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ramshi236/Accelerated-Federated-Learning-Over-MAC-in-Heterogeneous-Networks/4a79768ea3279b6711df44e09d37c9e55bab9fd4/utils/__pycache__/plot_utils.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/model_utils.py:
--------------------------------------------------------------------------------
1 | import json
2 | import numpy as np
3 | import os
4 | import torch
5 | import torch.nn as nn
6 |
7 | IMAGE_SIZE = 28
8 | IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE
9 | NUM_CHANNELS = 1
10 |
11 | def suffer_data(data):
12 | data_x = data['x']
13 | data_y = data['y']
14 | # randomly shuffle data
15 | np.random.seed(100)
16 | rng_state = np.random.get_state()
17 | np.random.shuffle(data_x)
18 | np.random.set_state(rng_state)
19 | np.random.shuffle(data_y)
20 | return (data_x, data_y)
21 |
22 | def batch_data(data, batch_size):
23 | '''
24 | data is a dict := {'x': [numpy array], 'y': [numpy array]} (on one client)
25 | returns x, y, which are both numpy array of length: batch_size
26 | '''
27 | data_x = data['x']
28 | data_y = data['y']
29 |
30 | # randomly shuffle data
31 | np.random.seed(100)
32 | rng_state = np.random.get_state()
33 | np.random.shuffle(data_x)
34 | np.random.set_state(rng_state)
35 | np.random.shuffle(data_y)
36 |
37 | # loop through mini-batches
38 | for i in range(0, len(data_x), batch_size):
39 | batched_x = data_x[i:i+batch_size]
40 | batched_y = data_y[i:i+batch_size]
41 | yield (batched_x, batched_y)
42 |
43 |
44 | def get_random_batch_sample(data_x, data_y, batch_size):
45 | num_parts = len(data_x)//batch_size + 1
46 | if(len(data_x) > batch_size):
47 | batch_idx = np.random.choice(list(range(num_parts +1)))
48 | sample_index = batch_idx*batch_size
49 | if(sample_index + batch_size > len(data_x)):
50 | return (data_x[sample_index:], data_y[sample_index:])
51 | else:
52 | return (data_x[sample_index: sample_index+batch_size], data_y[sample_index: sample_index+batch_size])
53 | else:
54 | return (data_x,data_y)
55 |
56 |
57 | def get_batch_sample(data, batch_size):
58 | data_x = data['x']
59 | data_y = data['y']
60 |
61 | np.random.seed(100)
62 | rng_state = np.random.get_state()
63 | np.random.shuffle(data_x)
64 | np.random.set_state(rng_state)
65 | np.random.shuffle(data_y)
66 |
67 | batched_x = data_x[0:batch_size]
68 | batched_y = data_y[0:batch_size]
69 | return (batched_x, batched_y)
70 |
71 | def read_data(dataset):
72 | '''parses data in given train and test data directories
73 |
74 | assumes:
75 | - the data in the input directories are .json files with
76 | keys 'users' and 'user_data'
77 | - the set of train set users is the same as the set of test set users
78 |
79 | Return:
80 | clients: list of client ids
81 | groups: list of group ids; empty list if none found
82 | train_data: dictionary of train data
83 | test_data: dictionary of test data
84 | '''
85 | train_data_dir = os.path.join('data', dataset, 'data', 'train')
86 | test_data_dir = os.path.join('data', dataset, 'data', 'test')
87 | clients = []
88 | groups = []
89 | train_data = {}
90 | test_data = {}
91 |
92 | train_files = os.listdir(train_data_dir)
93 | train_files = [f for f in train_files if f.endswith('.json')]
94 | for f in train_files:
95 | file_path = os.path.join(train_data_dir, f)
96 | with open(file_path, 'r') as inf:
97 | cdata = json.load(inf)
98 | clients.extend(cdata['users'])
99 | if 'hierarchies' in cdata:
100 | groups.extend(cdata['hierarchies'])
101 | train_data.update(cdata['user_data'])
102 |
103 | test_files = os.listdir(test_data_dir)
104 | test_files = [f for f in test_files if f.endswith('.json')]
105 | for f in test_files:
106 | file_path = os.path.join(test_data_dir, f)
107 | with open(file_path, 'r') as inf:
108 | cdata = json.load(inf)
109 | test_data.update(cdata['user_data'])
110 |
111 | clients = list(sorted(train_data.keys()))
112 |
113 | return clients, groups, train_data, test_data
114 |
115 | def read_user_data(index,data,dataset):
116 | id = data[0][index]
117 | train_data = data[2][id]
118 | test_data = data[3][id]
119 | X_train, y_train, X_test, y_test = train_data['x'], train_data['y'], test_data['x'], test_data['y']
120 | if dataset == "Mnist":
121 | X_train = torch.Tensor(X_train).view(-1, NUM_CHANNELS, IMAGE_SIZE, IMAGE_SIZE).type(torch.float32)
122 | y_train = torch.Tensor(y_train).type(torch.int64)
123 | X_test = torch.Tensor(X_test).view(-1, NUM_CHANNELS, IMAGE_SIZE, IMAGE_SIZE).type(torch.float32)
124 | y_test = torch.Tensor(y_test).type(torch.int64)
125 | elif dataset == "Linear_synthetic":
126 | X_train = torch.Tensor(X_train).type(torch.float32)
127 | y_train = torch.Tensor(y_train).type(torch.float32).unsqueeze(1)
128 | X_test = torch.Tensor(X_test).type(torch.float32)
129 | y_test = torch.Tensor(y_test).type(torch.float32).unsqueeze(1)
130 | #y_train = torch.flatten(y_train, 1)
131 | #y_test = torch.flatten(y_test, 1)
132 | #print(y_test.size(),y_train.size())
133 | elif dataset == "CIFAR-10":
134 | X_train = torch.Tensor(X_train).view(-1, 3, 32, 32).type(torch.float32)
135 | y_train = torch.Tensor(y_train).type(torch.int64)
136 | X_test = torch.Tensor(X_test).view(-1, 3, 32, 32).type(torch.float32)
137 | y_test = torch.Tensor(y_test).type(torch.int64)
138 | else:
139 | X_train = torch.Tensor(X_train).type(torch.float32)
140 | y_train = torch.Tensor(y_train).type(torch.int64)
141 | X_test = torch.Tensor(X_test).type(torch.float32)
142 | y_test = torch.Tensor(y_test).type(torch.int64)
143 | train_data = [(x, y) for x, y in zip(X_train, y_train)]
144 | test_data = [(x, y) for x, y in zip(X_test, y_test)]
145 | return id, train_data, test_data
146 |
147 | class Metrics(object):
148 | def __init__(self, clients, params):
149 | self.params = params
150 | num_rounds = params['num_rounds']
151 | self.bytes_written = {c.user_id: [0] * num_rounds for c in clients}
152 | self.client_computations = {c.user_id: [0] * num_rounds for c in clients}
153 | self.bytes_read = {c.user_id: [0] * num_rounds for c in clients}
154 | self.accuracies = []
155 | self.train_accuracies = []
156 |
157 | def update(self, rnd, cid, stats):
158 | bytes_w, comp, bytes_r = stats
159 | self.bytes_written[cid][rnd] += bytes_w
160 | self.client_computations[cid][rnd] += comp
161 | self.bytes_read[cid][rnd] += bytes_r
162 |
163 | def write(self):
164 | metrics = {}
165 | metrics['dataset'] = self.params['dataset']
166 | metrics['num_rounds'] = self.params['num_rounds']
167 | metrics['eval_every'] = self.params['eval_every']
168 | metrics['learning_rate'] = self.params['learning_rate']
169 | metrics['mu'] = self.params['mu']
170 | metrics['num_epochs'] = self.params['num_epochs']
171 | metrics['batch_size'] = self.params['batch_size']
172 | metrics['accuracies'] = self.accuracies
173 | metrics['train_accuracies'] = self.train_accuracies
174 | metrics['client_computations'] = self.client_computations
175 | metrics['bytes_written'] = self.bytes_written
176 | metrics['bytes_read'] = self.bytes_read
177 | metrics_dir = os.path.join('out', self.params['dataset'], 'metrics_{}_{}_{}_{}_{}.json'.format(
178 | self.params['seed'], self.params['optimizer'], self.params['learning_rate'], self.params['num_epochs'], self.params['mu']))
179 | #os.mkdir(os.path.join('out', self.params['dataset']))
180 | if not os.path.exists('out'):
181 | os.mkdir('out')
182 | if not os.path.exists(os.path.join('out', self.params['dataset'])):
183 | os.mkdir(os.path.join('out', self.params['dataset']))
184 | with open(metrics_dir, 'w') as ouf:
185 | json.dump(metrics, ouf)
186 |
--------------------------------------------------------------------------------
/utils/old_plot.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import h5py
3 | import numpy as np
4 | from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes, mark_inset
5 |
6 | plt.rcParams.update({'font.size': 14})
7 |
8 |
9 | def simple_read_data(loc_ep, alg):
10 | hf = h5py.File("./results/" + '{}_{}.h5'.format(alg, loc_ep), 'r')
11 | rs_glob_acc = np.array(hf.get('rs_glob_acc')[:])
12 | rs_train_acc = np.array(hf.get('rs_train_acc')[:])
13 | rs_train_loss = np.array(hf.get('rs_train_loss')[:])
14 | return rs_train_acc, rs_train_loss, rs_glob_acc
15 |
16 |
17 | def get_training_data_value(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=[], learning_rate=[],
18 | hyper_learning_rate=[], algorithms_list=[], batch_size=0, rho=[], dataset=""):
19 | Numb_Algs = len(algorithms_list)
20 | train_acc = np.zeros((Numb_Algs, Numb_Glob_Iters))
21 | train_loss = np.zeros((Numb_Algs, Numb_Glob_Iters))
22 | glob_acc = np.zeros((Numb_Algs, Numb_Glob_Iters))
23 | algs_lbl = algorithms_list.copy()
24 | for i in range(Numb_Algs):
25 | if (lamb[i] > 0):
26 | algorithms_list[i] = algorithms_list[i] + "_prox_" + str(lamb[i])
27 | algs_lbl[i] = algs_lbl[i] + "_prox"
28 |
29 | string_learning_rate = str(learning_rate[i])
30 |
31 | if (algorithms_list[i] == "FEDL"):
32 | string_learning_rate = string_learning_rate + "_" + str(hyper_learning_rate[i])
33 | algorithms_list[i] = algorithms_list[i] + \
34 | "_" + string_learning_rate + "_" + str(num_users) + \
35 | "u" + "_" + str(batch_size[i]) + "b" + "_" + str(loc_ep1[i])
36 | if (rho[i] > 0):
37 | algorithms_list[i] += "_" + str(rho[i]) + "p"
38 |
39 | train_acc[i, :], train_loss[i, :], glob_acc[i, :] = np.array(
40 | simple_read_data("avg", dataset + "_" + algorithms_list[i]))[:, :Numb_Glob_Iters]
41 | algs_lbl[i] = algs_lbl[i]
42 | return glob_acc, train_acc, train_loss
43 |
44 |
45 | def get_data_label_style(input_data=[], linestyles=[], algs_lbl=[], lamb=[], loc_ep1=0, batch_size=0):
46 | data, lstyles, labels = [], [], []
47 | for i in range(len(algs_lbl)):
48 | data.append(input_data[i, ::])
49 | lstyles.append(linestyles[i])
50 | labels.append(algs_lbl[i] + str(lamb[i]) + "_" +
51 | str(loc_ep1[i]) + "e" + "_" + str(batch_size[i]) + "b")
52 |
53 | return data, lstyles, labels
54 |
55 |
56 | def average_smooth(data, window_len=10, window='hanning'):
57 | results = []
58 | if window_len < 3:
59 | return data
60 | for i in range(len(data)):
61 | x = data[i]
62 | s = np.r_[x[window_len - 1:0:-1], x, x[-2:-window_len - 1:-1]]
63 | # print(len(s))
64 | if window == 'flat': # moving average
65 | w = np.ones(window_len, 'd')
66 | else:
67 | w = eval('numpy.' + window + '(window_len)')
68 |
69 | y = np.convolve(w / w.sum(), s, mode='valid')
70 | results.append(y[window_len - 1:])
71 | return np.array(results)
72 |
73 |
74 | def plot_summary_one_figure(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=[], learning_rate=[],
75 | hyper_learning_rate=[], algorithms_list=[], batch_size=0, rho=[], dataset=""):
76 | Numb_Algs = len(algorithms_list)
77 | # glob_acc, train_acc, train_loss = get_training_data_value(
78 | # users_per_round, loc_ep1, Numb_Glob_Iters, lamb, learning_rate,hyper_learning_rate, algorithms_list, batch_size, dataset)
79 |
80 | glob_acc_, train_acc_, train_loss_ = get_training_data_value(num_users, loc_ep1, Numb_Glob_Iters, lamb,
81 | learning_rate, hyper_learning_rate, algorithms_list,
82 | batch_size, rho, dataset)
83 | glob_acc = average_smooth(glob_acc_, window='flat')
84 | train_loss = average_smooth(train_loss_, window='flat')
85 | train_acc = average_smooth(train_acc_, window='flat')
86 |
87 | plt.figure(1)
88 | MIN = train_loss.min() - 0.001
89 | start = 0
90 | linestyles = ['-', '--', '-.', ':', '-', '--', '-.', ':', ':']
91 | plt.grid(True)
92 | for i in range(Numb_Algs):
93 | plt.plot(train_acc[i, 1:], linestyle=linestyles[i],
94 | label=algorithms_list[i] + str(lamb[i]) + "_" + str(loc_ep1[i]) + "e" + "_" + str(batch_size[i]) + "b")
95 | plt.legend(loc='lower right')
96 | plt.ylabel('Training Accuracy')
97 | plt.xlabel('Global rounds ' + '$K_g$')
98 | plt.title(dataset.upper())
99 | # plt.ylim([0.8, glob_acc.max()])
100 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_acc.png', bbox_inches="tight")
101 | # plt.savefig(dataset + str(loc_ep1[1]) + 'train_acc.pdf')
102 | plt.figure(2)
103 |
104 | plt.grid(True)
105 | for i in range(Numb_Algs):
106 | plt.plot(train_loss[i, start:], linestyle=linestyles[i], label=algorithms_list[i] + str(lamb[i]) +
107 | "_" + str(loc_ep1[i]) + "e" + "_" + str(
108 | batch_size[i]) + "b")
109 | # plt.plot(train_loss1[i, 1:], label=algs_lbl1[i])
110 | plt.legend(loc='upper right')
111 | plt.ylabel('Training Loss')
112 | plt.xlabel('Global rounds')
113 | plt.title(dataset.upper())
114 | # plt.ylim([train_loss.min(), 0.5])
115 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_loss.png', bbox_inches="tight")
116 | # plt.savefig(dataset + str(loc_ep1[1]) + 'train_loss.pdf')
117 | plt.figure(3)
118 | plt.grid(True)
119 | for i in range(Numb_Algs):
120 | plt.plot(glob_acc[i, start:], linestyle=linestyles[i],
121 | label=algorithms_list[i] + str(lamb[i]) + "_" + str(loc_ep1[i]) + "e" + "_" + str(batch_size[i]) + "b")
122 | # plt.plot(glob_acc1[i, 1:], label=algs_lbl1[i])
123 | plt.legend(loc='lower right')
124 | # plt.ylim([0.6, glob_acc.max()])
125 | plt.ylabel('Test Accuracy')
126 | plt.xlabel('Global rounds ')
127 | plt.title(dataset.upper())
128 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'glob_acc.png', bbox_inches="tight")
129 | # plt.savefig(dataset + str(loc_ep1[1]) + 'glob_acc.pdf')
130 |
131 |
132 | def get_max_value_index(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=[], learning_rate=[], algorithms_list=[],
133 | batch_size=0, dataset=""):
134 | Numb_Algs = len(algorithms_list)
135 | glob_acc, train_acc, train_loss = get_training_data_value(
136 | num_users, loc_ep1, Numb_Glob_Iters, lamb, learning_rate, algorithms_list, batch_size, dataset)
137 | for i in range(Numb_Algs):
138 | print("Algorithm: ", algorithms_list[i], "Max testing Accurancy: ", glob_acc[i].max(
139 | ), "Index: ", np.argmax(glob_acc[i]), "local update:", loc_ep1[i])
140 |
141 |
142 | def plot_summary_mnist(num_users=100, loc_ep1=[], Numb_Glob_Iters=10, lamb=[], learning_rate=[], hyper_learning_rate=[],
143 | algorithms_list=[], batch_size=0, rho=[], dataset=""):
144 | Numb_Algs = len(algorithms_list)
145 |
146 | # glob_acc, train_acc, train_loss = get_training_data_value(users_per_round, loc_ep1, Numb_Glob_Iters, lamb, learning_rate, hyper_learning_rate, algorithms_list, batch_size, rho, dataset)
147 |
148 | glob_acc_, train_acc_, train_loss_ = get_training_data_value(num_users, loc_ep1, Numb_Glob_Iters, lamb,
149 | learning_rate, hyper_learning_rate, algorithms_list,
150 | batch_size, rho, dataset)
151 | glob_acc = average_smooth(glob_acc_, window='flat')
152 | train_loss = average_smooth(train_loss_, window='flat')
153 | train_acc = average_smooth(train_acc_, window='flat')
154 |
155 | for i in range(Numb_Algs):
156 | print(algorithms_list[i], "acc:", glob_acc[i].max())
157 | print(algorithms_list[i], "loss:", train_loss[i].min())
158 |
159 | plt.figure(1)
160 |
161 | linestyles = ['-', '--', '-.', ':']
162 | algs_lbl = ["FEDL", "FedAvg",
163 | "FEDL", "FedAvg",
164 | "FEDL", "FedAvg",
165 | "FEDL", "FEDL"]
166 |
167 | fig = plt.figure(figsize=(12, 4))
168 | ax = fig.add_subplot(111) # The big subplot
169 | ax1 = fig.add_subplot(131)
170 | ax2 = fig.add_subplot(132)
171 | ax3 = fig.add_subplot(133)
172 | ax1.grid(True)
173 | ax2.grid(True)
174 | ax3.grid(True)
175 | # min = train_loss.min()
176 | min = train_loss.min() - 0.001
177 | max = 0.46
178 | # max = train_loss.max() + 0.01
179 | num_al = 2
180 | # Turn off axis lines and ticks of the big subplot
181 | ax.spines['top'].set_color('none')
182 | ax.spines['bottom'].set_color('none')
183 | ax.spines['left'].set_color('none')
184 | ax.spines['right'].set_color('none')
185 | ax.tick_params(labelcolor='w', top='off',
186 | bottom='off', left='off', right='off')
187 | for i in range(num_al):
188 | stringbatch = str(batch_size[i])
189 | if (stringbatch == '0'):
190 | stringbatch = '$\infty$'
191 | ax1.plot(train_loss[i, 1:], linestyle=linestyles[i],
192 | label=algs_lbl[i] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str(hyper_learning_rate[i]))
193 | ax1.set_ylim([min, max])
194 | ax1.legend(loc='upper right', prop={'size': 10})
195 |
196 | for i in range(num_al):
197 | stringbatch = str(batch_size[i + 2])
198 | if (stringbatch == '0'):
199 | stringbatch = '$\infty$'
200 | ax2.plot(train_loss[i + num_al, 1:], linestyle=linestyles[i],
201 | label=algs_lbl[i + num_al] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str(
202 | hyper_learning_rate[i + num_al]))
203 | ax2.set_ylim([min, max])
204 | ax2.legend(loc='upper right', prop={'size': 10})
205 |
206 | for i in range(4):
207 | stringbatch = str(batch_size[i + 4])
208 | if (stringbatch == '0'):
209 | stringbatch = '$\infty$'
210 | ax3.plot(train_loss[i + num_al * 2, 1:], linestyle=linestyles[i],
211 | label=algs_lbl[i + num_al * 2] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str(
212 | hyper_learning_rate[i + num_al * 2]))
213 | ax3.set_ylim([min, max])
214 | ax3.legend(loc='upper right', prop={'size': 10})
215 |
216 | ax.set_title('MNIST', y=1.02)
217 | ax.set_xlabel('Global rounds ' + '$K_g$')
218 | ax.set_ylabel('Training Loss', labelpad=15)
219 | plt.savefig(dataset + str(loc_ep1[1]) +
220 | 'train_loss.pdf', bbox_inches='tight')
221 | plt.savefig(dataset + str(loc_ep1[1]) +
222 | 'train_loss.png', bbox_inches='tight')
223 |
224 | fig = plt.figure(figsize=(12, 4))
225 | ax = fig.add_subplot(111) # The big subplot
226 | ax1 = fig.add_subplot(131)
227 | ax2 = fig.add_subplot(132)
228 | ax3 = fig.add_subplot(133)
229 | ax1.grid(True)
230 | ax2.grid(True)
231 | ax3.grid(True)
232 | # min = train_loss.min()
233 | min = 0.82
234 | max = glob_acc.max() + 0.001 # train_loss.max() + 0.01
235 | num_al = 2
236 | # Turn off axis lines and ticks of the big subplot
237 | ax.spines['top'].set_color('none')
238 | ax.spines['bottom'].set_color('none')
239 | ax.spines['left'].set_color('none')
240 | ax.spines['right'].set_color('none')
241 | ax.tick_params(labelcolor='w', top='off',
242 | bottom='off', left='off', right='off')
243 | for i in range(num_al):
244 | stringbatch = str(batch_size[i])
245 | if (stringbatch == '0'):
246 | stringbatch = '$\infty$'
247 | ax1.plot(glob_acc[i, 1:], linestyle=linestyles[i],
248 | label=algs_lbl[i] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str(hyper_learning_rate[i]))
249 | ax1.set_ylim([min, max])
250 | ax1.legend(loc='lower right', prop={'size': 10})
251 |
252 | for i in range(num_al):
253 | stringbatch = str(batch_size[i + 2])
254 | if (stringbatch == '0'):
255 | stringbatch = '$\infty$'
256 | ax2.plot(glob_acc[i + num_al, 1:], linestyle=linestyles[i],
257 | label=algs_lbl[i + num_al] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str(
258 | hyper_learning_rate[i + num_al * 1]))
259 | ax2.set_ylim([min, max])
260 | ax2.legend(loc='lower right', prop={'size': 10})
261 |
262 | for i in range(4):
263 | stringbatch = str(batch_size[i + 4])
264 | if (stringbatch == '0'):
265 | stringbatch = '$\infty$'
266 | ax3.plot(glob_acc[i + num_al * 2, 1:], linestyle=linestyles[i],
267 | label=algs_lbl[i + num_al * 2] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str(
268 | hyper_learning_rate[i + num_al * 2]))
269 | ax3.set_ylim([min, max])
270 | ax3.legend(loc='lower right', prop={'size': 10})
271 |
272 | ax.set_title('MNIST', y=1.02)
273 | ax.set_xlabel('Global rounds ' + '$K_g$')
274 | ax.set_ylabel('Testing Accuracy', labelpad=15)
275 | plt.savefig(dataset + str(loc_ep1[1]) + 'test_accu.pdf', bbox_inches='tight')
276 | plt.savefig(dataset + str(loc_ep1[1]) + 'test_accu.png', bbox_inches='tight')
277 |
278 |
279 | def plot_summary_nist(num_users=100, loc_ep1=[], Numb_Glob_Iters=10, lamb=[], learning_rate=[], hyper_learning_rate=[],
280 | algorithms_list=[], batch_size=0, rho=[], dataset=""):
281 | Numb_Algs = len(algorithms_list)
282 | # glob_acc, train_acc, train_loss = get_training_data_value( users_per_round, loc_ep1, Numb_Glob_Iters, lamb, learning_rate, hyper_learning_rate, algorithms_list, batch_size, rho, dataset)
283 | glob_acc_, train_acc_, train_loss_ = get_training_data_value(num_users, loc_ep1, Numb_Glob_Iters, lamb,
284 | learning_rate, hyper_learning_rate, algorithms_list,
285 | batch_size, rho, dataset)
286 | glob_acc = average_smooth(glob_acc_, window='flat')
287 | train_loss = average_smooth(train_loss_, window='flat')
288 | train_acc = average_smooth(train_acc_, window='flat')
289 | for i in range(Numb_Algs):
290 | print(algorithms_list[i], "acc:", glob_acc[i].max())
291 | print(algorithms_list[i], "loss:", train_loss[i].max())
292 | plt.figure(1)
293 |
294 | linestyles = ['-', '--', '-.', ':']
295 | algs_lbl = ["FEDL", "FedAvg", "FEDL",
296 | "FEDL", "FedAvg", "FEDL",
297 | "FEDL", "FedAvg", "FEDL"]
298 | fig = plt.figure(figsize=(12, 4))
299 |
300 | ax = fig.add_subplot(111) # The big subplot
301 | ax1 = fig.add_subplot(131)
302 | ax2 = fig.add_subplot(132)
303 | ax3 = fig.add_subplot(133)
304 | ax1.grid(True)
305 | ax2.grid(True)
306 | ax3.grid(True)
307 | # min = train_loss.min()
308 | min = train_loss.min() - 0.01
309 | max = 3 # train_loss.max() + 0.01
310 | num_al = 3
311 | # Turn off axis lines and ticks of the big subplot
312 | ax.spines['top'].set_color('none')
313 | ax.spines['bottom'].set_color('none')
314 | ax.spines['left'].set_color('none')
315 | ax.spines['right'].set_color('none')
316 | ax.tick_params(labelcolor='w', top='off',
317 | bottom='off', left='off', right='off')
318 | for i in range(num_al):
319 | stringbatch = str(batch_size[i])
320 | if (stringbatch == '0'):
321 | stringbatch = '$\infty$'
322 | ax1.plot(train_loss[i, 1:], linestyle=linestyles[i],
323 | label=algs_lbl[i] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str(
324 | hyper_learning_rate[i]) + ', $K_l = $' + str(loc_ep1[i]))
325 | ax1.set_ylim([min, max])
326 | ax1.legend(loc='upper right', prop={'size': 10})
327 |
328 | for i in range(num_al):
329 | stringbatch = str(batch_size[i + num_al])
330 | if (stringbatch == '0'):
331 | stringbatch = '$\infty$'
332 | ax2.plot(train_loss[i + num_al, 1:], linestyle=linestyles[i],
333 | label=algs_lbl[i + num_al] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str(
334 | hyper_learning_rate[i + num_al]) + ', $K_l = $' + str(loc_ep1[i + num_al]))
335 | ax2.set_ylim([min, max])
336 | ax2.legend(loc='upper right', prop={'size': 10})
337 |
338 | for i in range(num_al):
339 | stringbatch = str(batch_size[i + num_al * 2])
340 | if (stringbatch == '0'):
341 | stringbatch = '$\infty$'
342 | ax3.plot(train_loss[i + num_al * 2, 1:], linestyle=linestyles[i],
343 | label=algs_lbl[i + num_al * 2] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str(
344 | hyper_learning_rate[i + num_al * 2]) + ', $K_l = $' + str(loc_ep1[i + num_al * 2]))
345 | ax3.set_ylim([min, max])
346 | ax3.legend(loc='upper right', prop={'size': 10})
347 |
348 | ax.set_title('FEMNIST', y=1.02)
349 | ax.set_xlabel('Global rounds ' + '$K_g$')
350 | ax.set_ylabel('Training Loss', labelpad=15)
351 | plt.savefig(dataset + str(loc_ep1[1]) + 'train_loss.pdf', bbox_inches='tight')
352 | plt.savefig(dataset + str(loc_ep1[1]) + 'train_loss.png', bbox_inches='tight')
353 |
354 | fig = plt.figure(figsize=(12, 4))
355 | ax = fig.add_subplot(111) # The big subplot
356 | ax1 = fig.add_subplot(131)
357 | ax2 = fig.add_subplot(132)
358 | ax3 = fig.add_subplot(133)
359 | ax1.grid(True)
360 | ax2.grid(True)
361 | ax3.grid(True)
362 | # min = train_loss.min()
363 | num_al = 3
364 | min = 0.3
365 | max = glob_acc.max() + 0.01 # train_loss.max() + 0.01
366 | # Turn off axis lines and ticks of the big subplot
367 | ax.spines['top'].set_color('none')
368 | ax.spines['bottom'].set_color('none')
369 | ax.spines['left'].set_color('none')
370 | ax.spines['right'].set_color('none')
371 | ax.tick_params(labelcolor='w', top='off',
372 | bottom='off', left='off', right='off')
373 | for i in range(num_al):
374 | stringbatch = str(batch_size[i])
375 | if (stringbatch == '0'):
376 | stringbatch = '$\infty$'
377 | ax1.plot(glob_acc[i, 1:], linestyle=linestyles[i],
378 | label=algs_lbl[i] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str(
379 | hyper_learning_rate[i]) + ', $K_l = $' + str(loc_ep1[i]))
380 | ax1.set_ylim([min, max])
381 | ax1.legend(loc='lower right', prop={'size': 10})
382 |
383 | for i in range(num_al):
384 | stringbatch = str(batch_size[i + num_al])
385 | if (stringbatch == '0'):
386 | stringbatch = '$\infty$'
387 | ax2.plot(glob_acc[i + num_al, 1:], linestyle=linestyles[i],
388 | label=algs_lbl[i + num_al] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str(
389 | hyper_learning_rate[i + num_al * 1]) + ', $K_l = $' + str(loc_ep1[i + num_al]))
390 | ax2.set_ylim([min, max])
391 | ax2.legend(loc='lower right', prop={'size': 10})
392 |
393 | for i in range(num_al):
394 | stringbatch = str(batch_size[i + num_al * 2])
395 | if (stringbatch == '0'):
396 | stringbatch = '$\infty$'
397 | ax3.plot(glob_acc[i + num_al * 2, 1:], linestyle=linestyles[i],
398 | label=algs_lbl[i + num_al * 2] + " : " + '$B = $' + stringbatch + ', $\eta = $' + str(
399 | hyper_learning_rate[i + num_al * 2]) + ', $K_l = $' + str(loc_ep1[i + 2 * num_al]))
400 | ax3.set_ylim([min, max])
401 | ax3.legend(loc='lower right', prop={'size': 10})
402 |
403 | ax.set_title('FEMNIST', y=1.02)
404 | ax.set_xlabel('Global rounds ' + '$K_g$')
405 | ax.set_ylabel('Testing Accuracy', labelpad=15)
406 | plt.savefig(dataset + str(loc_ep1[1]) + 'test_accu.pdf', bbox_inches='tight')
407 | plt.savefig(dataset + str(loc_ep1[1]) + 'test_accu.png', bbox_inches='tight')
408 |
409 |
410 | def plot_summary_linear(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=[], learning_rate=[], hyper_learning_rate=[],
411 | algorithms_list=[], batch_size=0, rho=[], dataset=""):
412 | Numb_Algs = len(algorithms_list)
413 | glob_acc, train_acc, train_loss = get_training_data_value(num_users, loc_ep1, Numb_Glob_Iters, lamb, learning_rate,
414 | hyper_learning_rate, algorithms_list, batch_size, rho,
415 | dataset)
416 | for i in range(Numb_Algs):
417 | print(algorithms_list[i], "loss:", glob_acc[i].max())
418 | plt.figure(1)
419 |
420 | linestyles = ['-', '-', '-', '-']
421 | markers = ["o", "v", "s", "*", "x", "P"]
422 | algs_lbl = ["FEDL", "FEDL", "FEDL", "FEDL",
423 | "FEDL", "FEDL", "FEDL", "FEDL",
424 | "FEDL", "FEDL", "FEDL", "FEDL"]
425 | fig = plt.figure(figsize=(12, 4))
426 | ax = fig.add_subplot(111) # The big subplot
427 | ax1 = fig.add_subplot(131)
428 | ax2 = fig.add_subplot(132)
429 | ax3 = fig.add_subplot(133)
430 | # min = train_loss.min()
431 | num_al = 4
432 | # Turn off axis lines and ticks of the big subplot
433 | ax.spines['top'].set_color('none')
434 | ax.spines['bottom'].set_color('none')
435 | ax.spines['left'].set_color('none')
436 | ax.spines['right'].set_color('none')
437 | ax.tick_params(labelcolor='w', top='off',
438 | bottom='off', left='off', right='off')
439 | for i in range(num_al):
440 | ax1.plot(train_loss[i, 1:], linestyle=linestyles[i],
441 | label=algs_lbl[i] + ": " + '$\eta = $' + str(hyper_learning_rate[i]), marker=markers[i], markevery=0.4,
442 | markersize=5)
443 |
444 | ax1.hlines(y=0.035, xmin=0, xmax=200, linestyle='--', label="optimal solution", color="m")
445 | ax1.legend(loc='upper right', prop={'size': 10})
446 | ax1.set_ylim([0.02, 0.5])
447 | ax1.set_title('$\\rho = $' + str(rho[0]))
448 | ax1.grid(True)
449 | for i in range(num_al):
450 | str_rho = ', $\eta = $' + str(rho[i])
451 | ax2.plot(train_loss[i + num_al, 1:], linestyle=linestyles[i],
452 | label=algs_lbl[i + num_al] + ": " + '$\eta = $' + str(hyper_learning_rate[i + num_al]),
453 | marker=markers[i], markevery=0.4, markersize=5)
454 |
455 | ax2.hlines(y=0.035, xmin=0, xmax=200, linestyle='--', label="optimal solution", color="m")
456 | ax2.set_ylim([0.02, 0.5])
457 | # ax2.legend(loc='upper right')
458 | ax2.set_title('$\\rho = $' + str(rho[0 + num_al]))
459 | ax2.grid(True)
460 | for i in range(num_al):
461 | str_rho = ', $\rho = $' + str(rho[i])
462 | ax3.plot(train_loss[i + num_al * 2, 1:], linestyle=linestyles[i],
463 | label=algs_lbl[i + num_al * 2] + ": " + '$\eta = $' + str(hyper_learning_rate[i + num_al * 2]),
464 | marker=markers[i], markevery=0.4, markersize=5)
465 |
466 | ax3.hlines(y=0.035, xmin=0, xmax=200, linestyle='--',
467 | label="optimal solution", color="m")
468 | ax3.set_ylim([0.02, 0.5])
469 | # ax3.legend(loc='upper right')
470 | ax3.set_title('$\\rho = $' + str(rho[0 + 2 * num_al]))
471 | ax3.grid(True)
472 | ax.set_title('Synthetic dataset', y=1.1)
473 | ax.set_xlabel('Global rounds ' + '$K_g$')
474 | ax.set_ylabel('Training Loss')
475 | plt.savefig(dataset + str(loc_ep1[1]) + 'train_loss.pdf', bbox_inches='tight')
476 | plt.savefig(dataset + str(loc_ep1[1]) + 'train_loss.png', bbox_inches='tight')
477 |
478 |
479 | def get_all_training_data_value(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=0, learning_rate=0,
480 | hyper_learning_rate=0, algorithms="", batch_size=0, dataset="", rho=0, times=5):
481 | train_acc = np.zeros((times, Numb_Glob_Iters))
482 | train_loss = np.zeros((times, Numb_Glob_Iters))
483 | glob_acc = np.zeros((times, Numb_Glob_Iters))
484 | algorithms_list = [algorithms] * times
485 |
486 | for i in range(times):
487 | if (lamb > 0):
488 | algorithms_list[i] = algorithms_list[i] + "_prox_" + str(lamb)
489 |
490 | string_learning_rate = str(learning_rate)
491 |
492 | if (algorithms_list[i] == "FEDL"):
493 | string_learning_rate = string_learning_rate + "_" + str(hyper_learning_rate)
494 |
495 | algorithms_list[i] = algorithms_list[i] + "_" + string_learning_rate + "_" + str(num_users) + "u" + "_" + str(
496 | batch_size) + "b" + "_" + str(loc_ep1)
497 |
498 | if (rho > 0):
499 | algorithms_list[i] += "_" + str(rho) + "p"
500 |
501 | train_acc[i, :], train_loss[i, :], glob_acc[i, :] = np.array(
502 | simple_read_data(str(i), dataset + "_" + algorithms_list[i]))[:, :Numb_Glob_Iters]
503 |
504 | return glob_acc, train_acc, train_loss
505 |
506 |
507 | def average_data(num_users, loc_ep1, Numb_Glob_Iters, lamb, learning_rate, hyper_learning_rate, algorithms, batch_size,
508 | dataset, rho, times):
509 | glob_acc, train_acc, train_loss = get_all_training_data_value(num_users, loc_ep1, Numb_Glob_Iters, lamb,
510 | learning_rate, hyper_learning_rate, algorithms,
511 | batch_size, dataset, rho, times)
512 | # store average value to h5 file
513 | glob_acc_data = np.average(glob_acc, axis=0)
514 | train_acc_data = np.average(train_acc, axis=0)
515 | train_loss_data = np.average(train_loss, axis=0)
516 |
517 | max_accurancy = []
518 | for i in range(times):
519 | max_accurancy.append(glob_acc[i].max())
520 | print("std:", np.std(max_accurancy))
521 | print("Mean:", np.mean(max_accurancy))
522 |
523 | alg = dataset + "_" + algorithms
524 | alg += "_" + str(learning_rate)
525 |
526 | if (algorithms == "FEDL"):
527 | alg += "_" + str(hyper_learning_rate)
528 |
529 | alg += "_" + str(num_users) + "u" + "_" + str(batch_size) + "b" + "_" + str(loc_ep1)
530 |
531 | if (lamb > 0):
532 | alg += "_" + str(lamb) + "L"
533 |
534 | if (rho > 0):
535 | alg += "_" + str(rho) + "p"
536 |
537 | # alg = alg + "_" + str(learning_rate) + "_" + str(hyper_learning_rate) + "_" + str(lamb) + "_" + str(users_per_round) + "u" + "_" + str(batch_size) + "b" + "_" + str(loc_ep1)
538 | alg = alg + "_" + "avg"
539 | if (len(glob_acc) != 0 & len(train_acc) & len(train_loss)):
540 | with h5py.File("./results/" + '{}.h5'.format(alg, loc_ep1), 'w') as hf:
541 | hf.create_dataset('rs_glob_acc', data=glob_acc_data)
542 | hf.create_dataset('rs_train_acc', data=train_acc_data)
543 | hf.create_dataset('rs_train_loss', data=train_loss_data)
544 | hf.close()
545 | return 0
546 |
547 |
548 | def plot_summary_one_mnist(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=[], learning_rate=[],
549 | hyper_learning_rate=[], algorithms_list=[], batch_size=0, rho=[], dataset=""):
550 | Numb_Algs = len(algorithms_list)
551 | # glob_acc, train_acc, train_loss = get_training_data_value(
552 | # users_per_round, loc_ep1, Numb_Glob_Iters, lamb, learning_rate,hyper_learning_rate, algorithms_list, batch_size, dataset)
553 |
554 | glob_acc_, train_acc_, train_loss_ = get_training_data_value(num_users, loc_ep1, Numb_Glob_Iters, lamb,
555 | learning_rate, hyper_learning_rate, algorithms_list,
556 | batch_size, rho, dataset)
557 | glob_acc = average_smooth(glob_acc_, window='flat')
558 | train_loss = average_smooth(train_loss_, window='flat')
559 | train_acc = average_smooth(train_acc_, window='flat')
560 |
561 | plt.figure(1)
562 | MIN = train_loss.min() - 0.001
563 | start = 0
564 | linestyles = ['-', '--', '-.', ':']
565 | markers = ["o", "v", "s", "*", "x", "P"]
566 | algs_lbl = ["FEDL", "FedAvg", "FEDL", "FedAvg"]
567 | plt.grid(True)
568 | for i in range(Numb_Algs):
569 | stringbatch = str(batch_size[i])
570 | if (stringbatch == '0'):
571 | stringbatch = '$\infty$'
572 | plt.plot(train_acc[i, 1:], linestyle=linestyles[i], marker=markers[i],
573 | label=algs_lbl[i] + " : " + '$B = $' + stringbatch, markevery=0.4, markersize=5)
574 |
575 | plt.legend(loc='lower right')
576 | plt.ylabel('Training Accuracy')
577 | plt.xlabel('Global rounds ' + '$K_g$')
578 | plt.title(dataset.upper())
579 | plt.ylim([0.85, train_acc.max()])
580 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_acc.png', bbox_inches="tight")
581 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_acc.pdf', bbox_inches="tight")
582 | # plt.savefig(dataset + str(loc_ep1[1]) + 'train_acc.pdf')
583 | plt.figure(2)
584 |
585 | plt.grid(True)
586 | for i in range(Numb_Algs):
587 | stringbatch = str(batch_size[i])
588 | if (stringbatch == '0'):
589 | stringbatch = '$\infty$'
590 | plt.plot(train_loss[i, 1:], linestyle=linestyles[i], marker=markers[i],
591 | label=algs_lbl[i] + " : " + '$B = $' + stringbatch, markevery=0.4, markersize=5)
592 |
593 | # plt.plot(train_loss1[i, 1:], label=algs_lbl1[i])
594 | plt.legend(loc='upper right')
595 | plt.ylabel('Training Loss')
596 | plt.xlabel('Global rounds')
597 | plt.title(dataset.upper())
598 | plt.ylim([train_loss.min() - 0.01, 0.7])
599 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_loss.png', bbox_inches="tight")
600 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_loss.pdf', bbox_inches="tight")
601 | # plt.savefig(dataset + str(loc_ep1[1]) + 'train_loss.pdf')
602 | plt.figure(3)
603 | plt.grid(True)
604 | for i in range(Numb_Algs):
605 | stringbatch = str(batch_size[i])
606 | if (stringbatch == '0'):
607 | stringbatch = '$\infty$'
608 | plt.plot(glob_acc[i, 1:], linestyle=linestyles[i], marker=markers[i],
609 | label=algs_lbl[i] + " : " + '$B = $' + stringbatch, markevery=0.4, markersize=5)
610 | # plt.plot(glob_acc1[i, 1:], label=algs_lbl1[i])
611 | plt.legend(loc='lower right')
612 | plt.ylim([0.8, glob_acc.max() + 0.005])
613 | plt.ylabel('Test Accuracy')
614 | plt.xlabel('Global rounds ')
615 | plt.title(dataset.upper())
616 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'glob_acc.png', bbox_inches="tight")
617 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'glob_acc.pdf', bbox_inches="tight")
618 | # plt.savefig(dataset + str(loc_ep1[1]) + 'glob_acc.pdf')
619 |
620 |
621 | def plot_summary_one_nist(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=[], learning_rate=[],
622 | hyper_learning_rate=[], algorithms_list=[], batch_size=0, rho=[], dataset=""):
623 | Numb_Algs = len(algorithms_list)
624 | # glob_acc, train_acc, train_loss = get_training_data_value(
625 | # users_per_round, loc_ep1, Numb_Glob_Iters, lamb, learning_rate,hyper_learning_rate, algorithms_list, batch_size, dataset)
626 |
627 | glob_acc_, train_acc_, train_loss_ = get_training_data_value(num_users, loc_ep1, Numb_Glob_Iters, lamb,
628 | learning_rate, hyper_learning_rate, algorithms_list,
629 | batch_size, rho, dataset)
630 | glob_acc = average_smooth(glob_acc_, window='flat')
631 | train_loss = average_smooth(train_loss_, window='flat')
632 | train_acc = average_smooth(train_acc_, window='flat')
633 |
634 | plt.figure(1)
635 | MIN = train_loss.min() - 0.001
636 | start = 0
637 | linestyles = ['-', '--', '-.', ':']
638 | markers = ["o", "v", "s", "*", "x", "P"]
639 | algs_lbl = ["FEDL", "FedAvg", "FedAvg"]
640 | plt.grid(True)
641 | for i in range(Numb_Algs):
642 | stringbatch = str(batch_size[i])
643 | if (stringbatch == '0'):
644 | stringbatch = '$\infty$'
645 | plt.plot(train_acc[i, 1:], linestyle=linestyles[i], marker=markers[i],
646 | label=algs_lbl[i] + " : " + '$B = $' + stringbatch, markevery=0.4, markersize=5)
647 |
648 | plt.legend(loc='lower right')
649 | plt.ylabel('Training Accuracy')
650 | plt.xlabel('Global rounds ' + '$K_g$')
651 | plt.title('FEMNIST')
652 | # plt.ylim([0.85, train_acc.max()])
653 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_acc.png', bbox_inches="tight")
654 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_acc.pdf', bbox_inches="tight")
655 | # plt.savefig(dataset + str(loc_ep1[1]) + 'train_acc.pdf')
656 | plt.figure(2)
657 |
658 | plt.grid(True)
659 | for i in range(Numb_Algs):
660 | stringbatch = str(batch_size[i])
661 | if (stringbatch == '0'):
662 | stringbatch = '$\infty$'
663 | plt.plot(train_loss[i, 1:], linestyle=linestyles[i], marker=markers[i],
664 | label=algs_lbl[i] + " : " + '$B = $' + stringbatch, markevery=0.4, markersize=5)
665 |
666 | # plt.plot(train_loss1[i, 1:], label=algs_lbl1[i])
667 | plt.legend(loc='upper right')
668 | plt.ylabel('Training Loss')
669 | plt.xlabel('Global rounds')
670 | plt.title('FEMNIST')
671 | # plt.ylim([train_loss.min(), 0.7])
672 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_loss.png', bbox_inches="tight")
673 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'train_loss.pdf', bbox_inches="tight")
674 | # plt.savefig(dataset + str(loc_ep1[1]) + 'train_loss.pdf')
675 | plt.figure(3)
676 | plt.grid(True)
677 | for i in range(Numb_Algs):
678 | stringbatch = str(batch_size[i])
679 | if (stringbatch == '0'):
680 | stringbatch = '$\infty$'
681 | plt.plot(glob_acc[i, 1:], linestyle=linestyles[i], marker=markers[i],
682 | label=algs_lbl[i] + " : " + '$B = $' + stringbatch, markevery=0.4, markersize=5)
683 | # plt.plot(glob_acc1[i, 1:], label=algs_lbl1[i])
684 | plt.legend(loc='lower right')
685 | # plt.ylim([0.8, glob_acc.max() + 0.005])
686 | plt.ylabel('Test Accuracy')
687 | plt.xlabel('Global rounds ')
688 | plt.title('FEMNIST')
689 | # ax.set_title('FEMNIST', y=1.02)
690 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'glob_acc.png', bbox_inches="tight")
691 | plt.savefig(dataset.upper() + str(loc_ep1[1]) + 'glob_acc.pdf', bbox_inches="tight")
692 | # plt.savefig(dataset + str(loc_ep1[1]) + 'glob_acc.pdf')
693 |
694 |
695 |
--------------------------------------------------------------------------------
/utils/plot_utils.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import matplotlib
3 | import h5py
4 | import numpy as np
5 | from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes, mark_inset
6 | import os
7 | from pathlib import Path
8 |
9 | plt.rcParams.update({'font.size': 14})
10 |
11 |
12 | def read_from_results(file_name):
13 | hf = h5py.File(file_name, 'r')
14 | string = file_name.split('_')
15 | if "norms" in string:
16 | rs_param_norms = np.array(hf.get('rs_param_norms')[:])
17 | if "SCAFFOLD" in string:
18 | rs_control_norms = np.array(hf.get('rs_control_norms')[:])
19 | return rs_param_norms, rs_control_norms
20 | else:
21 | return rs_param_norms
22 |
23 | rs_glob_acc = np.array(hf.get('rs_glob_acc')[:])
24 | rs_train_acc = np.array(hf.get('rs_train_acc')[:])
25 | rs_train_loss = np.array(hf.get('rs_train_loss')[:])
26 | return rs_train_acc, rs_train_loss, rs_glob_acc
27 |
28 |
29 | # TODO: replace all args with input_dict
30 | def get_all_training_data_value(num_glob_iters, algorithm, dataset, times, similarity, noise):
31 | train_acc = np.zeros((times, num_glob_iters))
32 | train_loss = np.zeros((times, num_glob_iters))
33 | glob_acc = np.zeros((times, num_glob_iters))
34 |
35 | file_name = "./results/" + dataset + "_" + algorithm
36 | file_name += "_" + str(similarity) + "s"
37 | if noise:
38 | file_name += '_noisy'
39 |
40 | for i in range(times):
41 | f = file_name + "_" + str(i) + ".h5"
42 | train_acc[i, :], train_loss[i, :], glob_acc[i, :] = np.array(read_from_results(f))[:, :num_glob_iters]
43 | return glob_acc, train_acc, train_loss
44 |
45 |
46 | def average_smooth(data, window_len=10, window='hanning'):
47 | results = []
48 | if window_len < 3:
49 | return data
50 | for i in range(len(data)):
51 | x = data[i]
52 | s = np.r_[x[window_len - 1:0:-1], x, x[-2:-window_len - 1:-1]]
53 | # print(len(s))
54 | if window == 'flat': # moving average
55 | w = np.ones(window_len, 'd')
56 | else:
57 | w = eval('numpy.' + window + '(window_len)')
58 |
59 | y = np.convolve(w / w.sum(), s, mode='valid')
60 | results.append(y[window_len - 1:])
61 | return np.array(results)
62 |
63 |
64 | def average_data(num_glob_iters, algorithm, dataset, times, similarity, noise):
65 | glob_acc, train_acc, train_loss = get_all_training_data_value(num_glob_iters, algorithm, dataset, times, similarity,
66 | noise)
67 |
68 | glob_acc_data = np.average(glob_acc, axis=0)
69 | train_acc_data = np.average(train_acc, axis=0)
70 | train_loss_data = np.average(train_loss, axis=0)
71 |
72 | max_accurancy = []
73 | for i in range(times):
74 | max_accurancy.append(glob_acc[i].max())
75 | print("std:", np.std(max_accurancy))
76 | print("Mean:", np.mean(max_accurancy))
77 |
78 | # store average value to h5 file
79 | file_name = "./results/" + dataset + "_" + algorithm
80 | file_name += "_" + str(similarity) + "s"
81 | if noise:
82 | file_name += '_noisy'
83 | file_name += "_avg.h5"
84 |
85 | if len(glob_acc) != 0 & len(train_acc) & len(train_loss):
86 | with h5py.File(file_name, 'w') as hf:
87 | hf.create_dataset('rs_glob_acc', data=glob_acc_data)
88 | hf.create_dataset('rs_train_acc', data=train_acc_data)
89 | hf.create_dataset('rs_train_loss', data=train_loss_data)
90 | hf.close()
91 | return 0
92 |
93 |
94 | def get_all_norms(num_glob_iters, algorithm, dataset, times, similarity, noise):
95 | file_name = "./results/" + dataset + "_" + algorithm + "_norms"
96 | file_name += "_" + str(similarity) + "s"
97 | if noise:
98 | file_name += '_noisy'
99 |
100 | param_norms = np.zeros((times, num_glob_iters))
101 |
102 | if algorithm == "SCAFFOLD":
103 | control_norms = np.zeros((times, num_glob_iters))
104 | for i in range(times):
105 | f = file_name + "_" + str(i) + ".h5"
106 | param_norms[i, :], control_norms[i, :] = np.array(read_from_results(f))[:, :num_glob_iters]
107 | return param_norms, control_norms
108 | else:
109 | for i in range(times):
110 | f = file_name + "_" + str(i) + ".h5"
111 | param_norms[i, :] = np.array(read_from_results(f))[:num_glob_iters]
112 | return param_norms
113 |
114 |
115 | def average_norms(num_glob_iters, algorithm, dataset, times, similarity, noise):
116 | # store average value to h5 file
117 | file_name = "./results/" + dataset + "_" + algorithm + "_norms"
118 | file_name += "_" + str(similarity) + "s"
119 | if noise:
120 | file_name += '_noisy'
121 | file_name += "_avg.h5"
122 |
123 | if algorithm == "SCAFFOLD":
124 | param_norms, control_norms = get_all_norms(num_glob_iters, algorithm, dataset, times, similarity,
125 | noise)
126 | glob_param_norms = np.average(param_norms, axis=0)
127 | glob_control_norms = np.average(control_norms, axis=0)
128 | if len(glob_param_norms) & len(glob_control_norms):
129 | with h5py.File(file_name, 'w') as hf:
130 | hf.create_dataset('rs_param_norms', data=glob_param_norms)
131 | hf.create_dataset('rs_control_norms', data=glob_control_norms)
132 | else:
133 | param_norms = get_all_norms(num_glob_iters, algorithm, dataset, times, similarity, noise)
134 | glob_param_norms = np.average(param_norms, axis=0)
135 | if len(glob_param_norms) != 0:
136 | with h5py.File(file_name, 'w') as hf:
137 | hf.create_dataset('rs_param_norms', data=glob_param_norms)
138 | hf.close()
139 |
140 |
141 | def get_plot_dict(input_dict, algorithms, local_epochs):
142 | keys = ["dataset", "learning_rate", "num_glob_iters", "users_per_round", "batch_size", "local_epochs",
143 | "similarity", "noise"]
144 | plot_dict = {x: input_dict[x] for x in keys}
145 | plot_dict["local_epochs"] = local_epochs
146 | plot_dict["algorithms"] = algorithms
147 | return plot_dict
148 |
149 |
150 | def plot_by_epochs(dataset, algorithms, num_glob_iters, learning_rate, users_per_round, batch_size, local_epochs,
151 | similarity, noise):
152 | """take the Monta Carlo simulation and present it SCAFFOLD vs FedAvg"""
153 | colours = ['r', 'g', 'b']
154 | fig, axs = plt.subplots(1, len(local_epochs), constrained_layout=True)
155 | if len(algorithms) == 2:
156 | fig.suptitle(f"{algorithms[0]} vs {algorithms[1]} - {dataset}")
157 | elif len(algorithms) == 1:
158 | fig.suptitle(f"{algorithms[0]} - {dataset}")
159 |
160 | if len(local_epochs) == 1:
161 | axs = [axs]
162 |
163 | for k, epochs in enumerate(local_epochs):
164 | axs[k].set_xlabel("Global Iterations")
165 | axs[k].set_ylabel("Accuracy")
166 | axs[k].set_title("number of local epochs =" + str(epochs))
167 |
168 | for j, algorithm in enumerate(algorithms):
169 | file_name = "./results/" + dataset
170 | file_name += "_" + algorithm
171 | file_name += "_" + str(learning_rate) + "lr"
172 | file_name += "_" + str(users_per_round) + "u"
173 | file_name += "_" + str(batch_size) + "b"
174 | file_name += "_" + str(epochs) + "e"
175 | file_name += "_" + str(similarity) + "s"
176 | if noise:
177 | file_name += '_noisy'
178 | file_name += "_avg.h5"
179 | train_acc, train_loss, glob_acc = np.array(read_from_results(file_name))[:, :num_glob_iters]
180 | axs[k].plot(glob_acc, color=colours[j], label=algorithm)
181 | axs[k].legend(loc="lower right")
182 | plt.show()
183 |
184 |
185 | def plot_norms(dataset, algorithms, noises, similarities, num_glob_iters):
186 | colours = ['r', 'g', 'b']
187 | fig, axs = plt.subplots(1, len(similarities), constrained_layout=True)
188 | fig.suptitle(f"{dataset}")
189 |
190 | if len(similarities) == 1:
191 | axs = [axs]
192 |
193 | for k, similarity in enumerate(similarities):
194 | axs[k].set_xlabel("Global Iterations")
195 | axs[k].set_ylabel("Average Norm")
196 | axs[k].set_yscale('log')
197 | axs[k].set_title(str(100 * similarity) + "% Similarity")
198 |
199 | for noise in noises:
200 | for j, algorithm in enumerate(algorithms):
201 | file_name = "./results/" + dataset
202 | file_name += "_" + algorithm + "_norms"
203 | file_name += "_" + str(similarity) + "s"
204 | label = algorithm
205 | color = colours[j]
206 | if noise:
207 | file_name += '_noisy'
208 | label += ' with noise'
209 | color += ':'
210 | file_name += "_avg.h5"
211 | if algorithm == "SCAFFOLD":
212 | param_norms, control_norms = np.array(read_from_results(file_name))[:, :num_glob_iters]
213 | axs[k].plot(param_norms, color, label='params ' + label)
214 | color = colours[-1] + color[1:]
215 | axs[k].plot(control_norms, color, label='controls ' + label)
216 | else:
217 | param_norms = np.array(read_from_results(file_name))[:num_glob_iters]
218 | axs[k].plot(param_norms, color, label='params ' + label)
219 | axs[k].legend(loc="center right")
220 | plt.show()
221 |
222 |
223 | def plot_accuracy(dataset, algorithms, noises, similarities, num_glob_iters):
224 | colours = ['r', 'g']
225 | fig, axs = plt.subplots(1, len(similarities), constrained_layout=True)
226 | fig.suptitle(f"{dataset}")
227 |
228 | if len(similarities) == 1:
229 | axs = [axs]
230 |
231 | for k, similarity in enumerate(similarities):
232 | axs[k].set_xlabel("Global Iterations")
233 | axs[k].set_ylabel("Accuracy")
234 | axs[k].set_title(str(100 * similarity) + "% Similarity")
235 |
236 | for noise in noises:
237 | for j, algorithm in enumerate(algorithms):
238 | file_name = "./results/" + dataset
239 | file_name += "_" + algorithm
240 | file_name += "_" + str(similarity) + "s"
241 | label = algorithm
242 | color = colours[j]
243 | if noise:
244 | file_name += '_noisy'
245 | label += ' with noise'
246 | color += ':'
247 | file_name += "_avg.h5"
248 | train_acc, train_loss, glob_acc = np.array(read_from_results(file_name))[:, :num_glob_iters]
249 | axs[k].plot(glob_acc, color, label=label)
250 | axs[k].legend(loc="lower right")
251 | plt.show()
252 |
--------------------------------------------------------------------------------