├── .gitignore ├── README.md ├── compare_results_real.pdf ├── data ├── goodreads │ ├── data │ │ ├── test │ │ │ └── test.json │ │ └── train │ │ │ └── train.json │ ├── data_text │ │ ├── test │ │ │ └── test.json │ │ └── train │ │ │ └── train.json │ └── generate.ipynb ├── mex │ ├── data │ │ ├── test │ │ │ └── test.json │ │ └── train │ │ │ └── train.json │ └── generate.py ├── mnist │ ├── README.md │ └── generate_niid.py ├── nist │ ├── README.md │ ├── data │ │ └── my_sample.py │ ├── preprocess.sh │ ├── preprocess │ │ ├── data_to_json.py │ │ ├── data_to_json.sh │ │ ├── get_data.sh │ │ ├── get_file_dirs.py │ │ ├── get_hashes.py │ │ ├── group_by_writer.py │ │ └── match_hashes.py │ └── stats.sh ├── synthetic_0.25_0.25 │ ├── data │ │ ├── test │ │ │ └── mytest.json │ │ └── train │ │ │ └── mytrain.json │ └── synthetic_0.25_0.25.zip ├── synthetic_0.5_0.5 │ ├── README.md │ ├── data │ │ ├── test │ │ │ └── mytest.json │ │ └── train │ │ │ └── mytrain.json │ └── generate_synthetic.py ├── synthetic_0.75_0.75 │ └── data │ │ ├── test │ │ └── mytest.json │ │ └── train │ │ └── mytrain.json ├── synthetic_0_0 │ ├── README.md │ ├── data │ │ ├── test │ │ │ └── mytest.json │ │ └── train │ │ │ └── mytrain.json │ ├── generate_synthetic.py │ └── synthetic_0_0.zip ├── synthetic_1_1 │ ├── README.md │ ├── data │ │ ├── test │ │ │ └── mytest.json │ │ └── train │ │ │ └── mytrain.json │ └── generate_synthetic.py └── synthetic_iid │ ├── README.md │ ├── data │ ├── test │ │ └── mytest.json │ └── train │ │ └── mytrain.json │ └── generate_iid.py ├── fedavg_original ├── goodreads_20.csv ├── goodreads_20_prox.csv.csv ├── mex_10.csv ├── mex_10_prox.csv ├── mnist_20.csv ├── mnist_20_prox.csv ├── nist_20.csv └── nist_20_prox.csv ├── flearn ├── models │ ├── __init__.py │ ├── client.py │ ├── goodreads │ │ ├── get_embs.py │ │ ├── get_embs.sh │ │ ├── mclr.py │ │ └── rnn.py │ ├── mex │ │ ├── __init__.py │ │ ├── dnn.py │ │ └── mclr.py │ ├── mnist │ │ ├── __init__.py │ │ ├── cnn.py │ │ └── mclr.py │ ├── nist │ │ ├── __init__.py │ │ ├── cnn.py │ │ └── mclr.py │ └── synthetic │ │ ├── __init__.py │ │ └── mclr.py ├── optimizer │ ├── pgd.py │ └── pggd.py ├── trainers │ ├── __init__.py │ ├── fedavg.py │ ├── fedbase.py │ ├── feddane.py │ ├── fedprox.py │ └── fedsim.py └── utils │ ├── __init__.py │ ├── language_utils.py │ ├── model_utils.py │ ├── tf_utils.py │ └── utils.py ├── full_results_real.pdf ├── full_results_real_other.pdf ├── full_results_synthetic.pdf ├── images ├── compare_results_real.png └── full_results_real.png ├── logs └── sample │ ├── clusters.csv │ ├── fed_sim_g_0.csv │ ├── fed_sim_g_1.csv │ ├── fed_sim_g_2.csv │ ├── fed_sim_g_3.csv │ ├── fed_sim_g_4.csv │ ├── fed_sim_g_5.csv │ ├── fed_sim_g_6.csv │ ├── fed_sim_g_7.csv │ ├── fed_sim_g_8.csv │ ├── nist_0_fedsim.csv │ ├── nist_0_fedsim.pdf │ ├── params.json │ └── timetaken.csv ├── main.py ├── plot_fedsim_improvements.py ├── plot_fedsim_main.py ├── plot_fedsim_other.py ├── requirements.txt ├── results ├── main │ ├── 00.csv │ ├── 0505.csv │ ├── 11.csv │ ├── 2525.csv │ ├── 7575.csv │ ├── IID.csv │ ├── femnist.csv │ ├── goodreads.csv │ ├── mex.csv │ └── mnist.csv └── other │ ├── femnist.csv │ ├── goodreads.csv │ ├── mex.csv │ ├── mex_cnn.csv │ └── mnist.csv ├── run_fedavg.sh ├── run_fedprox.sh ├── run_fedsim.sh └── utils ├── __init__.py ├── csv_log.py ├── language_utils.py ├── model_utils.py ├── preprocess.sh ├── remove_users.py ├── sample.py ├── split_data.py ├── stats.py ├── tf_utils.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | data/ 132 | 133 | logs/ 134 | 135 | PNI/logs/ 136 | docs/analysis/logs/ 137 | docs/analysis/logs_cnn/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FedSim 2 | _Similarity Guided Model Aggregation for Federated Learning_ 3 | 4 | This work is published at the **Neurocomputing Journal [FedSim: Similarity guided model aggregation for Federated Learning](https://doi.org/10.1016/j.neucom.2021.08.141)** 5 | 6 | Bibtext 7 | ``` 8 | @article{palihawadana2021fedsim, 9 | title={FedSim: Similarity guided model aggregation for Federated Learning}, 10 | author={Palihawadana, Chamath and Wiratunga, Nirmalie and Wijekoon, Anjana and Kalutarage, Harsha}, 11 | journal={Neurocomputing}, 12 | year={2021}, 13 | publisher={Elsevier} 14 | } 15 | ``` 16 | 17 | 18 | ## Usage 19 | ```shell 20 | bash run_fedsim.sh DATASET_NAME DROP_PERC NUM_CLUSTERS NUM_CLIENTS Run_Name 21 | bash run_fedsim.sh mnist 0 9 20 mnist_run 22 | ``` 23 | 24 | FedSim algorithm implementation is available in [`flearn/trainers/fedsim.py`](https://github.com/chamathpali/FedSim/blob/main/flearn/trainers/fedsim.py). 25 | 26 | ## Reproduce results 27 | 28 | 29 | The experiments performed on all the datasets were carried out with 35 random seeds (from 0 to 34 incremented by 1) to empirically demonstrate the significance. Repetition of the same experiment with different random seeds helps to reduce the sampling error of our experiments. 30 | 31 | For a single dataset, run rounds of FedSim, FedAvg and FedProx where each run will generate a single folder in the `logs` folder, as a reference we have added a sample log folder with the results in `logs/sample/` 32 | 33 | Used hyper parameters for the experiments are presented in Table 2. 34 | 35 | Once the experiments are completed, create the summary log files with the 3 methods as in `results/` folder. We have added our results in here which can help to refer and use FedSim. 36 | 37 | 1. Figure 3 - Results on real datasets - `plot_fedsim_main.py` 38 | 39 | 2. Figure 5 - Accuracy improvements of FedSim - `plot_fedsim_improvements.py` 40 | 41 | 3. Figure 6 - Results on synthetic datasets - `plot_fedsim_main.py` change line #123 to `if(True)` 42 | 43 | 4. Figure 7 - Results on other learning models - `plot_fedsim_other.py` 44 | 45 | ## Experiment setup 46 | We have adapted the experiment setup from [FedProx](https://github.com/litian96/FedProx) and [Leaf Benchmark](https://github.com/TalwalkarLab/leaf) work. Thanks for the support by [Tian Li](https://github.com/litian96). 47 | 48 | ### Dataset generation 49 | 50 | For all datasets, see the `README` files in separate `data/$dataset` folders for instructions on preprocessing and/or sampling data. 51 | 52 | For further clarifications follow the guides on [FedProx](https://github.com/litian96/FedProx) and [Leaf](https://github.com/TalwalkarLab/leaf) 53 | 54 | The two datasets produced with this work is published with the generation source code. 55 | - [Fed-Mex](https://github.com/chamathpali/Fed-MEx/) 56 | - [Fed-Goodreads](https://github.com/chamathpali/Fed-Goodreads/) 57 | 58 | ### Downloading dependencies 59 | 60 | ``` 61 | pip3 install -r requirements.txt 62 | ``` 63 | ### Run FedSim Experiments 64 | 65 | ```shell 66 | bash run_fedsim.sh DATASET_NAME 0 NUM_CLUSTERS NUM_CLIENTS Run_Name 67 | bash run_fedavg.sh mnist 0 9 20 mnist_run 68 | ``` 69 | or direcly use the python command 70 | ```shell 71 | python3 -u main.py --dataset='goodreads' --optimizer='fedsim' --learning_rate=0.0001 72 | --num_rounds=250 --clients_per_round=20 --eval_every=1 --batch_size=10 --num_epochs=10 73 | --model='rnn' --drop_percent=0 --num_groups=11 --ex_name=goodreads_rnn_0 --seed=0 74 | ``` 75 | 76 | When running on GPU specify the id and then run the experiments 77 | ``` 78 | export CUDA_VISIBLE_DEVICES=GPU_ID 79 | ``` 80 | 81 | ### Results 82 | 83 | 84 | ![](https://raw.githubusercontent.com/chamathpali/FedSim/main/images/full_results_real.png) 85 | 86 | _Figure 3: Comparison of performances over communication rounds with real-world datasets_ 87 | 88 | 89 | ![](https://raw.githubusercontent.com/chamathpali/FedSim/main/images/compare_results_real.png) 90 | 91 | _Figure 5: Accuracy improvements of FedSim compared to FedAvg and FedProx of experiments in Figure 3. Values below zero indicate negative performance against a baseline and grey vertical lines denote areas of no statistical significance_ 92 | -------------------------------------------------------------------------------- /compare_results_real.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/compare_results_real.pdf -------------------------------------------------------------------------------- /data/mnist/README.md: -------------------------------------------------------------------------------- 1 | # MNIST Dataset 2 | 3 | First download the raw data [here](https://drive.google.com/file/d/1Vp_gJHw4pPqwMUSgodhFOqUglAQyaOGD/view?usp=sharing), put `mnist-original.mat` under the folder `data/mldata/`. 4 | 5 | To generate non-iid data: 6 | 7 | ``` 8 | mkdir test 9 | mkdir train 10 | python generate_niid.py 11 | ``` 12 | 13 | Or you can download the dataset [here](https://drive.google.com/file/d/1cU_LcBAUZvfZWveOMhG4G5Fg9uFXhVdf/view?usp=sharing), unzip it and put the `train` and `test` folder under `data`. 14 | 15 | The layout of the folders under `./mnist` should be: 16 | 17 | ``` 18 | | data 19 | 20 | ----| mldata 21 | 22 | ---- ----| mnist-original.mat 23 | 24 | ----| train 25 | 26 | ---- ----| train_file_name.json 27 | 28 | ----| test 29 | 30 | ---- ----| test_file_name.json 31 | 32 | | generate_niid.py 33 | | README.md 34 | ``` 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /data/mnist/generate_niid.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import fetch_mldata 2 | from tqdm import trange 3 | import numpy as np 4 | import random 5 | import json 6 | import os 7 | 8 | # Setup directory for train/test data 9 | train_path = './data/train/all_data_0_niid_0_keep_10_train_9.json' 10 | test_path = './data/test/all_data_0_niid_0_keep_10_test_9.json' 11 | dir_path = os.path.dirname(train_path) 12 | if not os.path.exists(dir_path): 13 | os.makedirs(dir_path) 14 | dir_path = os.path.dirname(test_path) 15 | if not os.path.exists(dir_path): 16 | os.makedirs(dir_path) 17 | 18 | # Get MNIST data, normalize, and divide by level 19 | mnist = fetch_mldata('MNIST original', data_home='./data') 20 | mu = np.mean(mnist.data.astype(np.float32), 0) 21 | sigma = np.std(mnist.data.astype(np.float32), 0) 22 | mnist.data = (mnist.data.astype(np.float32) - mu)/(sigma+0.001) 23 | mnist_data = [] 24 | for i in trange(10): 25 | idx = mnist.target==i 26 | mnist_data.append(mnist.data[idx]) 27 | 28 | print([len(v) for v in mnist_data]) 29 | 30 | ###### CREATE USER DATA SPLIT ####### 31 | # Assign 10 samples to each user 32 | X = [[] for _ in range(1000)] 33 | y = [[] for _ in range(1000)] 34 | idx = np.zeros(10, dtype=np.int64) 35 | for user in range(1000): 36 | for j in range(2): 37 | l = (user+j)%10 38 | X[user] += mnist_data[l][idx[l]:idx[l]+5].tolist() 39 | y[user] += (l*np.ones(5)).tolist() 40 | idx[l] += 5 41 | print(idx) 42 | 43 | # Assign remaining sample by power law 44 | user = 0 45 | props = np.random.lognormal(0, 2.0, (10,100,2)) 46 | props = np.array([[[len(v)-1000]] for v in mnist_data])*props/np.sum(props,(1,2), keepdims=True) 47 | #idx = 1000*np.ones(10, dtype=np.int64) 48 | for user in trange(1000): 49 | for j in range(2): 50 | l = (user+j)%10 51 | num_samples = int(props[l,user//10,j]) 52 | #print(num_samples) 53 | if idx[l] + num_samples < len(mnist_data[l]): 54 | X[user] += mnist_data[l][idx[l]:idx[l]+num_samples].tolist() 55 | y[user] += (l*np.ones(num_samples)).tolist() 56 | idx[l] += num_samples 57 | 58 | print(idx) 59 | 60 | # Create data structure 61 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]} 62 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]} 63 | 64 | # Setup 1000 users 65 | for i in trange(1000, ncols=120): 66 | uname = 'f_{0:05d}'.format(i) 67 | 68 | combined = list(zip(X[i], y[i])) 69 | random.shuffle(combined) 70 | X[i][:], y[i][:] = zip(*combined) 71 | num_samples = len(X[i]) 72 | train_len = int(0.9*num_samples) 73 | test_len = num_samples - train_len 74 | 75 | train_data['users'].append(uname) 76 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]} 77 | train_data['num_samples'].append(train_len) 78 | test_data['users'].append(uname) 79 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]} 80 | test_data['num_samples'].append(test_len) 81 | 82 | print(train_data['num_samples']) 83 | print(sum(train_data['num_samples'])) 84 | 85 | with open(train_path,'w') as outfile: 86 | json.dump(train_data, outfile) 87 | with open(test_path, 'w') as outfile: 88 | json.dump(test_data, outfile) 89 | -------------------------------------------------------------------------------- /data/nist/README.md: -------------------------------------------------------------------------------- 1 | # FEMNIST Dataset 2 | 3 | ## Setup Instructions 4 | 5 | 6 | You can download the dataset [here](https://drive.google.com/file/d/1tCEcJgRJ8NdRo11UJZR6WSKMNdmox4GC/view?usp=sharing), unzip it and put the `train` and `test` folder under `data`. 7 | 8 | 9 | The FEMNIST data we used in the paper is a subsampled (and repartitioned) version of the original full dataset in order to impose additional statistical heterogeneity. The above dataset is generated by the following instruction: 10 | 11 | (1) First, 12 | 13 | Run preprocess.sh with a choice of the following tags: 14 | 15 | - ```-s``` := 'iid' to sample in an i.i.d. manner, or 'niid' to sample in a non-i.i.d. manner; more information on i.i.d. versus non-i.i.d. is included in the 'Notes' section 16 | - ```--iu``` := number of users, if iid sampling; expressed as a fraction of the total number of users; default is 0.01 17 | - ```--sf``` := fraction of data to sample, written as a decimal; default is 0.1 18 | - ```-k``` := minimum number of samples per user 19 | - ```-t``` := 'user' to partition users into train-test groups, or 'sample' to partition each user's samples into train-test groups 20 | - ```--tf``` := fraction of data in training set, written as a decimal; default is 0.9 21 | 22 | 23 | And then run: 24 | 25 | ``` 26 | ./preprocess.sh -s niid --sf 0.5 -k 0 -tf 0.8 -t sample 27 | ``` 28 | 29 | 30 | (Make sure to delete the rem\_user\_data, sampled\_data, test, and train subfolders in the data directory before re-running preprocess.sh.) 31 | 32 | (2) And then re-partition the data: 33 | 34 | ``` 35 | cd data 36 | python my_sample.py 37 | ``` 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /data/nist/data/my_sample.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import json 3 | import math 4 | import numpy as np 5 | import os 6 | import sys 7 | import random 8 | from tqdm import trange 9 | 10 | from PIL import Image 11 | 12 | NUM_USER = 200 13 | CLASS_PER_USER = 3 # from 10 lowercase characters 14 | 15 | 16 | def relabel_class(c): 17 | ''' 18 | maps hexadecimal class value (string) to a decimal number 19 | returns: 20 | - 0 through 9 for classes representing respective numbers 21 | - 10 through 35 for classes representing respective uppercase letters 22 | - 36 through 61 for classes representing respective lowercase letters 23 | ''' 24 | if c.isdigit() and int(c) < 40: 25 | return (int(c) - 30) 26 | elif int(c, 16) <= 90: # uppercase 27 | return (int(c, 16) - 55) 28 | else: 29 | return (int(c, 16) - 61) # lowercase 30 | 31 | def load_image(file_name): 32 | '''read in a png 33 | Return: a flatted list representing the image 34 | ''' 35 | size = (28, 28) 36 | img = Image.open(file_name) 37 | gray = img.convert('L') 38 | gray.thumbnail(size, Image.ANTIALIAS) 39 | arr = np.asarray(gray).copy() 40 | vec = arr.flatten() 41 | vec = vec / 255 # scale all pixel values to between 0 and 1 42 | vec = vec.tolist() 43 | 44 | return vec 45 | 46 | 47 | def main(): 48 | file_dir = "raw_data/by_class" 49 | 50 | train_path = "train/mytrain.json" 51 | test_path = "test/mytest.json" 52 | 53 | X = [[] for _ in range(NUM_USER)] 54 | y = [[] for _ in range(NUM_USER)] 55 | 56 | nist_data = {} 57 | 58 | 59 | for class_ in os.listdir(file_dir): 60 | 61 | real_class = relabel_class(class_) 62 | if real_class >= 36 and real_class <= 45: 63 | full_img_path = file_dir + "/" + class_ + "/train_" + class_ 64 | all_files_this_class = os.listdir(full_img_path) 65 | random.shuffle(all_files_this_class) 66 | sampled_files_this_class = all_files_this_class[:4000] 67 | imgs = [] 68 | for img in sampled_files_this_class: 69 | imgs.append(load_image(full_img_path + "/" + img)) 70 | class_ = relabel_class(class_) 71 | print(class_) 72 | nist_data[class_-36] = imgs # a list of list, key is (0, 25) 73 | print(len(imgs)) 74 | 75 | num_samples = np.random.lognormal(4, 1, (NUM_USER)) + 5 76 | 77 | idx = np.zeros(10, dtype=np.int64) 78 | 79 | for user in range(NUM_USER): 80 | num_sample_per_class = int(num_samples[user] / CLASS_PER_USER) 81 | if num_sample_per_class < 2: 82 | num_sample_per_class = 2 83 | 84 | for j in range(CLASS_PER_USER): 85 | class_id = (user + j) % 10 86 | if idx[class_id] + num_sample_per_class < len(nist_data[class_id]): 87 | idx[class_id] = 0 88 | X[user] += nist_data[class_id][idx[class_id]: (idx[class_id] + num_sample_per_class)] 89 | y[user] += (class_id * np.ones(num_sample_per_class)).tolist() 90 | idx[class_id] += num_sample_per_class 91 | 92 | # Create data structure 93 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]} 94 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]} 95 | 96 | for i in trange(NUM_USER, ncols=120): 97 | uname = 'f_{0:05d}'.format(i) 98 | 99 | combined = list(zip(X[i], y[i])) 100 | random.shuffle(combined) 101 | X[i][:], y[i][:] = zip(*combined) 102 | num_samples = len(X[i]) 103 | train_len = int(0.9 * num_samples) 104 | test_len = num_samples - train_len 105 | 106 | train_data['users'].append(uname) 107 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]} 108 | train_data['num_samples'].append(train_len) 109 | test_data['users'].append(uname) 110 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]} 111 | test_data['num_samples'].append(test_len) 112 | 113 | with open(train_path, 'w') as outfile: 114 | json.dump(train_data, outfile) 115 | with open(test_path, 'w') as outfile: 116 | json.dump(test_data, outfile) 117 | 118 | 119 | if __name__ == "__main__": 120 | main() 121 | 122 | -------------------------------------------------------------------------------- /data/nist/preprocess.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | #rm -rf rem_user_data sampled_data test train 4 | 5 | # download data and convert to .json format 6 | 7 | if [ ! -d "data/all_data" ] || [ ! "$(ls -A data/all_data)" ]; then 8 | cd preprocess 9 | ./data_to_json.sh 10 | cd .. 11 | fi 12 | 13 | NAME="nist" # name of the dataset, equivalent to directory name 14 | 15 | cd ../../utils 16 | 17 | # ./preprocess.sh -s niid --sf 0.05 -k 64 -t sample 18 | # ./preprocess.sh --name nist -s niid --sf 1.0 -k 0 -t sample 19 | # ./preprocess.sh --name sent140 -s niid --sf 1.0 -k 1 -t sample 20 | ./preprocess.sh --name $NAME $@ 21 | 22 | cd ../data/$NAME 23 | -------------------------------------------------------------------------------- /data/nist/preprocess/data_to_json.py: -------------------------------------------------------------------------------- 1 | # Converts a list of (writer, [list of (file,class)]) tuples into a json object 2 | # of the form: 3 | # {users: [bob, etc], num_samples: [124, etc.], 4 | # user_data: {bob : {x:[img1,img2,etc], y:[class1,class2,etc]}, etc}} 5 | # where 'img_' is a vectorized representation of the corresponding image 6 | 7 | from __future__ import division 8 | import json 9 | import math 10 | import numpy as np 11 | import os 12 | import sys 13 | 14 | from PIL import Image 15 | 16 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 17 | utils_dir = os.path.join(utils_dir, 'utils') 18 | sys.path.append(utils_dir) 19 | 20 | import utils 21 | 22 | 23 | MAX_WRITERS = 100 # max number of writers per json file. 24 | 25 | 26 | def relabel_class(c): 27 | ''' 28 | maps hexadecimal class value (string) to a decimal number 29 | returns: 30 | - 0 through 9 for classes representing respective numbers 31 | - 10 through 35 for classes representing respective uppercase letters 32 | - 36 through 61 for classes representing respective lowercase letters 33 | ''' 34 | if c.isdigit() and int(c) < 40: 35 | return (int(c) - 30) 36 | elif int(c, 16) <= 90: # uppercase 37 | return (int(c, 16) - 55) 38 | else: 39 | return (int(c, 16) - 61) 40 | 41 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 42 | 43 | ibwd = os.path.join(parent_path, 'data', 'intermediate', 'images_by_writer') 44 | writers = utils.load_obj(ibwd) 45 | 46 | num_json = int(math.ceil(len(writers) / MAX_WRITERS)) 47 | 48 | users = [[] for _ in range(num_json)] 49 | num_samples = [[] for _ in range(num_json)] 50 | user_data = [{} for _ in range(num_json)] 51 | 52 | writer_count = 0 53 | json_index = 0 54 | for (w, l) in writers: 55 | 56 | users[json_index].append(w) 57 | num_samples[json_index].append(len(l)) 58 | user_data[json_index][w] = {'x': [], 'y': []} 59 | 60 | size = 28, 28 # original image size is 128, 128 61 | for (f, c) in l: 62 | file_path = os.path.join(parent_path, f) 63 | img = Image.open(file_path) 64 | gray = img.convert('L') 65 | gray.thumbnail(size, Image.ANTIALIAS) 66 | arr = np.asarray(gray).copy() 67 | vec = arr.flatten() 68 | vec = vec / 255 # scale all pixel values to between 0 and 1 69 | vec = vec.tolist() 70 | 71 | nc = relabel_class(c) 72 | 73 | user_data[json_index][w]['x'].append(vec) 74 | user_data[json_index][w]['y'].append(nc) 75 | 76 | writer_count += 1 77 | if writer_count == MAX_WRITERS: 78 | 79 | all_data = {} 80 | all_data['users'] = users[json_index] 81 | all_data['num_samples'] = num_samples[json_index] 82 | all_data['user_data'] = user_data[json_index] 83 | 84 | file_name = 'all_data_%d.json' % json_index 85 | file_path = os.path.join(parent_path, 'data', 'all_data', file_name) 86 | 87 | print('writing %s' % file_name) 88 | 89 | with open(file_path, 'w') as outfile: 90 | json.dump(all_data, outfile) 91 | 92 | writer_count = 0 93 | json_index += 1 94 | -------------------------------------------------------------------------------- /data/nist/preprocess/data_to_json.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # assumes that the script is run in the preprocess folder 4 | 5 | if [ ! -d "../data" ]; then 6 | mkdir ../data 7 | fi 8 | if [ ! -d "../data/raw_data" ]; then 9 | echo "------------------------------" 10 | echo "downloading data" 11 | mkdir ../data/raw_data 12 | ./get_data.sh 13 | echo "finished downloading data" 14 | fi 15 | 16 | if [ ! -d "../data/intermediate" ]; then # stores .pkl files during preprocessing 17 | mkdir ../data/intermediate 18 | fi 19 | 20 | if [ ! -f ../data/intermediate/class_file_dirs.pkl ]; then 21 | echo "------------------------------" 22 | echo "extracting file directories of images" 23 | python3 get_file_dirs.py 24 | echo "finished extracting file directories of images" 25 | fi 26 | 27 | if [ ! -f ../data/intermediate/class_file_hashes.pkl ]; then 28 | echo "------------------------------" 29 | echo "calculating image hashes" 30 | python3 get_hashes.py 31 | echo "finished calculating image hashes" 32 | fi 33 | 34 | if [ ! -f ../data/intermediate/write_with_class.pkl ]; then 35 | echo "------------------------------" 36 | echo "assigning class labels to write images" 37 | python3 match_hashes.py 38 | echo "finished assigning class labels to write images" 39 | fi 40 | 41 | if [ ! -f ../data/intermediate/images_by_writer.pkl ]; then 42 | echo "------------------------------" 43 | echo "grouping images by writer" 44 | python3 group_by_writer.py 45 | echo "finished grouping images by writer" 46 | fi 47 | 48 | if [ ! -d "../data/all_data" ]; then 49 | mkdir ../data/all_data 50 | fi 51 | if [ ! "$(ls -A ../data/all_data)" ]; then 52 | echo "------------------------------" 53 | echo "converting data to .json format" 54 | python3 data_to_json.py 55 | echo "finished converting data to .json format" 56 | fi 57 | -------------------------------------------------------------------------------- /data/nist/preprocess/get_data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # assumes that the script is run in the preprocess folder 4 | 5 | cd ../data/raw_data 6 | wget https://s3.amazonaws.com/nist-srd/SD19/by_class.zip 7 | wget https://s3.amazonaws.com/nist-srd/SD19/by_write.zip 8 | unzip by_class.zip 9 | rm by_class.zip 10 | unzip by_write.zip 11 | rm by_write.zip 12 | cd ../../preprocess 13 | -------------------------------------------------------------------------------- /data/nist/preprocess/get_file_dirs.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Creates .pkl files for: 3 | 1. list of directories of every image in 'by_class' 4 | 2. list of directories of every image in 'by_write' 5 | the hierarchal structure of the data is as follows: 6 | - by_class -> classes -> folders containing images -> images 7 | - by_write -> folders containing writers -> writer -> types of images -> images 8 | the directories written into the files are of the form 'raw_data/...' 9 | ''' 10 | 11 | import os 12 | import sys 13 | 14 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 15 | utils_dir = os.path.join(utils_dir, 'utils') 16 | sys.path.append(utils_dir) 17 | 18 | import utils 19 | 20 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 21 | 22 | class_files = [] # (class, file directory) 23 | write_files = [] # (writer, file directory) 24 | 25 | class_dir = os.path.join(parent_path, 'data', 'raw_data', 'by_class') 26 | rel_class_dir = os.path.join('data', 'raw_data', 'by_class') 27 | classes = os.listdir(class_dir) 28 | 29 | for cl in classes: 30 | cldir = os.path.join(class_dir, cl) 31 | rel_cldir = os.path.join(rel_class_dir, cl) 32 | subcls = os.listdir(cldir) 33 | 34 | subcls = [s for s in subcls if (('hsf' in s) and ('mit' not in s))] 35 | 36 | for subcl in subcls: 37 | subcldir = os.path.join(cldir, subcl) 38 | rel_subcldir = os.path.join(rel_cldir, subcl) 39 | images = os.listdir(subcldir) 40 | image_dirs = [os.path.join(rel_subcldir, i) for i in images] 41 | 42 | for image_dir in image_dirs: 43 | class_files.append((cl, image_dir)) 44 | 45 | write_dir = os.path.join(parent_path, 'data', 'raw_data', 'by_write') 46 | rel_write_dir = os.path.join('data', 'raw_data', 'by_write') 47 | write_parts = os.listdir(write_dir) 48 | 49 | for write_part in write_parts: 50 | writers_dir = os.path.join(write_dir, write_part) 51 | rel_writers_dir = os.path.join(rel_write_dir, write_part) 52 | writers = os.listdir(writers_dir) 53 | 54 | for writer in writers: 55 | writer_dir = os.path.join(writers_dir, writer) 56 | rel_writer_dir = os.path.join(rel_writers_dir, writer) 57 | wtypes = os.listdir(writer_dir) 58 | 59 | for wtype in wtypes: 60 | type_dir = os.path.join(writer_dir, wtype) 61 | rel_type_dir = os.path.join(rel_writer_dir, wtype) 62 | images = os.listdir(type_dir) 63 | image_dirs = [os.path.join(rel_type_dir, i) for i in images] 64 | 65 | for image_dir in image_dirs: 66 | write_files.append((writer, image_dir)) 67 | 68 | utils.save_obj( 69 | class_files, 70 | os.path.join(parent_path, 'data', 'intermediate', 'class_file_dirs')) 71 | utils.save_obj( 72 | write_files, 73 | os.path.join(parent_path, 'data', 'intermediate', 'write_file_dirs')) 74 | -------------------------------------------------------------------------------- /data/nist/preprocess/get_hashes.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import os 3 | import sys 4 | 5 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 6 | utils_dir = os.path.join(utils_dir, 'utils') 7 | sys.path.append(utils_dir) 8 | 9 | import utils 10 | 11 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 12 | 13 | cfd = os.path.join(parent_path, 'data', 'intermediate', 'class_file_dirs') 14 | wfd = os.path.join(parent_path, 'data', 'intermediate', 'write_file_dirs') 15 | class_file_dirs = utils.load_obj(cfd) 16 | write_file_dirs = utils.load_obj(wfd) 17 | 18 | class_file_hashes = [] 19 | write_file_hashes = [] 20 | 21 | count = 0 22 | for tup in class_file_dirs: 23 | if (count%100000 == 0): 24 | print('hashed %d class images' % count) 25 | 26 | (cclass, cfile) = tup 27 | file_path = os.path.join(parent_path, cfile) 28 | 29 | chash = hashlib.md5(open(file_path, 'rb').read()).hexdigest() 30 | 31 | class_file_hashes.append((cclass, cfile, chash)) 32 | 33 | count += 1 34 | 35 | cfhd = os.path.join(parent_path, 'data', 'intermediate', 'class_file_hashes') 36 | utils.save_obj(class_file_hashes, cfhd) 37 | 38 | count = 0 39 | for tup in write_file_dirs: 40 | if (count%100000 == 0): 41 | print('hashed %d write images' % count) 42 | 43 | (cclass, cfile) = tup 44 | file_path = os.path.join(parent_path, cfile) 45 | 46 | chash = hashlib.md5(open(file_path, 'rb').read()).hexdigest() 47 | 48 | write_file_hashes.append((cclass, cfile, chash)) 49 | 50 | count += 1 51 | 52 | wfhd = os.path.join(parent_path, 'data', 'intermediate', 'write_file_hashes') 53 | utils.save_obj(write_file_hashes, wfhd) 54 | -------------------------------------------------------------------------------- /data/nist/preprocess/group_by_writer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 5 | utils_dir = os.path.join(utils_dir, 'utils') 6 | sys.path.append(utils_dir) 7 | 8 | import utils 9 | 10 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 11 | 12 | wwcd = os.path.join(parent_path, 'data', 'intermediate', 'write_with_class') 13 | write_class = utils.load_obj(wwcd) 14 | 15 | writers = [] # each entry is a (writer, [list of (file, class)]) tuple 16 | cimages = [] 17 | (cw, _, _) = write_class[0] 18 | for (w, f, c) in write_class: 19 | if w != cw: 20 | writers.append((cw, cimages)) 21 | cw = w 22 | cimages = [(f, c)] 23 | cimages.append((f, c)) 24 | writers.append((cw, cimages)) 25 | 26 | ibwd = os.path.join(parent_path, 'data', 'intermediate', 'images_by_writer') 27 | utils.save_obj(writers, ibwd) 28 | -------------------------------------------------------------------------------- /data/nist/preprocess/match_hashes.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 5 | utils_dir = os.path.join(utils_dir, 'utils') 6 | sys.path.append(utils_dir) 7 | 8 | import utils 9 | 10 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 11 | 12 | cfhd = os.path.join(parent_path, 'data', 'intermediate', 'class_file_hashes') 13 | wfhd = os.path.join(parent_path, 'data', 'intermediate', 'write_file_hashes') 14 | class_file_hashes = utils.load_obj(cfhd) # each elem is (class, file dir, hash) 15 | write_file_hashes = utils.load_obj(wfhd) # each elem is (writer, file dir, hash) 16 | 17 | class_hash_dict = {} 18 | for i in range(len(class_file_hashes)): 19 | (c, f, h) = class_file_hashes[len(class_file_hashes)-i-1] 20 | class_hash_dict[h] = (c, f) 21 | 22 | write_classes = [] 23 | for tup in write_file_hashes: 24 | (w, f, h) = tup 25 | write_classes.append((w, f, class_hash_dict[h][0])) 26 | 27 | wwcd = os.path.join(parent_path, 'data', 'intermediate', 'write_with_class') 28 | utils.save_obj(write_classes, wwcd) 29 | -------------------------------------------------------------------------------- /data/nist/stats.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | NAME="nist" 4 | 5 | cd ../../utils 6 | 7 | python3 stats.py --name $NAME 8 | 9 | cd ../data/$NAME -------------------------------------------------------------------------------- /data/synthetic_0.25_0.25/synthetic_0.25_0.25.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/data/synthetic_0.25_0.25/synthetic_0.25_0.25.zip -------------------------------------------------------------------------------- /data/synthetic_0.5_0.5/README.md: -------------------------------------------------------------------------------- 1 | ``` 2 | python generate_synthetic.py 3 | ``` -------------------------------------------------------------------------------- /data/synthetic_0.5_0.5/generate_synthetic.py: -------------------------------------------------------------------------------- 1 | import json 2 | import math 3 | import numpy as np 4 | import os 5 | import sys 6 | import random 7 | from tqdm import trange 8 | import math 9 | 10 | 11 | NUM_USER = 30 12 | 13 | def softmax(x): 14 | ex = np.exp(x) 15 | sum_ex = np.sum( np.exp(x)) 16 | return ex/sum_ex 17 | 18 | 19 | def generate_synthetic(alpha, beta, iid): 20 | 21 | dimension = 60 22 | NUM_CLASS = 10 23 | 24 | samples_per_user = np.random.lognormal(4, 2, (NUM_USER)).astype(int) + 50 25 | print(samples_per_user) 26 | num_samples = np.sum(samples_per_user) 27 | 28 | X_split = [[] for _ in range(NUM_USER)] 29 | y_split = [[] for _ in range(NUM_USER)] 30 | 31 | 32 | #### define some eprior #### 33 | mean_W = np.random.normal(0, alpha, NUM_USER) 34 | mean_b = mean_W 35 | B = np.random.normal(0, beta, NUM_USER) 36 | mean_x = np.zeros((NUM_USER, dimension)) 37 | 38 | diagonal = np.zeros(dimension) 39 | for j in range(dimension): 40 | diagonal[j] = np.power((j+1), -1.2) 41 | cov_x = np.diag(diagonal) 42 | 43 | for i in range(NUM_USER): 44 | if iid == 1: 45 | mean_x[i] = np.ones(dimension) * B[i] # all zeros 46 | else: 47 | mean_x[i] = np.random.normal(B[i], 1, dimension) 48 | print(mean_x[i]) 49 | 50 | if iid == 1: 51 | W_global = np.random.normal(0, 1, (dimension, NUM_CLASS)) 52 | b_global = np.random.normal(0, 1, NUM_CLASS) 53 | 54 | for i in range(NUM_USER): 55 | 56 | W = np.random.normal(mean_W[i], 1, (dimension, NUM_CLASS)) 57 | b = np.random.normal(mean_b[i], 1, NUM_CLASS) 58 | 59 | if iid == 1: 60 | W = W_global 61 | b = b_global 62 | 63 | xx = np.random.multivariate_normal(mean_x[i], cov_x, samples_per_user[i]) 64 | yy = np.zeros(samples_per_user[i]) 65 | 66 | for j in range(samples_per_user[i]): 67 | tmp = np.dot(xx[j], W) + b 68 | yy[j] = np.argmax(softmax(tmp)) 69 | 70 | X_split[i] = xx.tolist() 71 | y_split[i] = yy.tolist() 72 | 73 | print("{}-th users has {} exampls".format(i, len(y_split[i]))) 74 | 75 | 76 | return X_split, y_split 77 | 78 | 79 | 80 | def main(): 81 | 82 | 83 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]} 84 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]} 85 | 86 | train_path = "data/train/mytrain.json" 87 | test_path = "data/test/mytest.json" 88 | 89 | #X, y = generate_synthetic(alpha=0, beta=0, iid=0) # synthetiv (0,0) 90 | X, y = generate_synthetic(alpha=0.5, beta=0.5, iid=0) # synthetic (0.5, 0.5) 91 | #X, y = generate_synthetic(alpha=1, beta=1, iid=0) # synthetic (1,1) 92 | #X, y = generate_synthetic(alpha=0, beta=0, iid=1) # synthetic_IID 93 | 94 | 95 | # Create data structure 96 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]} 97 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]} 98 | 99 | for i in trange(NUM_USER, ncols=120): 100 | 101 | uname = 'f_{0:05d}'.format(i) 102 | combined = list(zip(X[i], y[i])) 103 | random.shuffle(combined) 104 | X[i][:], y[i][:] = zip(*combined) 105 | num_samples = len(X[i]) 106 | train_len = int(0.9 * num_samples) 107 | test_len = num_samples - train_len 108 | 109 | train_data['users'].append(uname) 110 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]} 111 | train_data['num_samples'].append(train_len) 112 | test_data['users'].append(uname) 113 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]} 114 | test_data['num_samples'].append(test_len) 115 | 116 | 117 | with open(train_path,'w') as outfile: 118 | json.dump(train_data, outfile) 119 | with open(test_path, 'w') as outfile: 120 | json.dump(test_data, outfile) 121 | 122 | 123 | if __name__ == "__main__": 124 | main() 125 | 126 | -------------------------------------------------------------------------------- /data/synthetic_0_0/README.md: -------------------------------------------------------------------------------- 1 | ``` 2 | python generate_synthetic.py 3 | ``` -------------------------------------------------------------------------------- /data/synthetic_0_0/generate_synthetic.py: -------------------------------------------------------------------------------- 1 | import json 2 | import math 3 | import numpy as np 4 | import os 5 | import sys 6 | import random 7 | from tqdm import trange 8 | import math 9 | 10 | 11 | NUM_USER = 30 12 | 13 | def softmax(x): 14 | ex = np.exp(x) 15 | sum_ex = np.sum( np.exp(x)) 16 | return ex/sum_ex 17 | 18 | 19 | def generate_synthetic(alpha, beta, iid): 20 | 21 | dimension = 60 22 | NUM_CLASS = 10 23 | 24 | samples_per_user = np.random.lognormal(4, 2, (NUM_USER)).astype(int) + 50 25 | print(samples_per_user) 26 | num_samples = np.sum(samples_per_user) 27 | 28 | X_split = [[] for _ in range(NUM_USER)] 29 | y_split = [[] for _ in range(NUM_USER)] 30 | 31 | 32 | #### define some eprior #### 33 | mean_W = np.random.normal(0, alpha, NUM_USER) 34 | mean_b = mean_W 35 | B = np.random.normal(0, beta, NUM_USER) 36 | mean_x = np.zeros((NUM_USER, dimension)) 37 | 38 | diagonal = np.zeros(dimension) 39 | for j in range(dimension): 40 | diagonal[j] = np.power((j+1), -1.2) 41 | cov_x = np.diag(diagonal) 42 | 43 | for i in range(NUM_USER): 44 | if iid == 1: 45 | mean_x[i] = np.ones(dimension) * B[i] # all zeros 46 | else: 47 | mean_x[i] = np.random.normal(B[i], 1, dimension) 48 | print(mean_x[i]) 49 | 50 | if iid == 1: 51 | W_global = np.random.normal(0, 1, (dimension, NUM_CLASS)) 52 | b_global = np.random.normal(0, 1, NUM_CLASS) 53 | 54 | for i in range(NUM_USER): 55 | 56 | W = np.random.normal(mean_W[i], 1, (dimension, NUM_CLASS)) 57 | b = np.random.normal(mean_b[i], 1, NUM_CLASS) 58 | 59 | if iid == 1: 60 | W = W_global 61 | b = b_global 62 | 63 | xx = np.random.multivariate_normal(mean_x[i], cov_x, samples_per_user[i]) 64 | yy = np.zeros(samples_per_user[i]) 65 | 66 | for j in range(samples_per_user[i]): 67 | tmp = np.dot(xx[j], W) + b 68 | yy[j] = np.argmax(softmax(tmp)) 69 | 70 | X_split[i] = xx.tolist() 71 | y_split[i] = yy.tolist() 72 | 73 | print("{}-th users has {} exampls".format(i, len(y_split[i]))) 74 | 75 | 76 | return X_split, y_split 77 | 78 | 79 | 80 | def main(): 81 | 82 | 83 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]} 84 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]} 85 | 86 | train_path = "data/train/mytrain.json" 87 | test_path = "data/test/mytest.json" 88 | 89 | X, y = generate_synthetic(alpha=0, beta=0, iid=0) # synthetiv (0,0) 90 | #X, y = generate_synthetic(alpha=0.5, beta=0.5, iid=0) # synthetic (0.5, 0.5) 91 | #X, y = generate_synthetic(alpha=1, beta=1, iid=0) # synthetic (1,1) 92 | #X, y = generate_synthetic(alpha=0, beta=0, iid=1) # synthetic_IID 93 | 94 | 95 | # Create data structure 96 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]} 97 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]} 98 | 99 | for i in trange(NUM_USER, ncols=120): 100 | 101 | uname = 'f_{0:05d}'.format(i) 102 | combined = list(zip(X[i], y[i])) 103 | random.shuffle(combined) 104 | X[i][:], y[i][:] = zip(*combined) 105 | num_samples = len(X[i]) 106 | train_len = int(0.9 * num_samples) 107 | test_len = num_samples - train_len 108 | 109 | train_data['users'].append(uname) 110 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]} 111 | train_data['num_samples'].append(train_len) 112 | test_data['users'].append(uname) 113 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]} 114 | test_data['num_samples'].append(test_len) 115 | 116 | 117 | with open(train_path,'w') as outfile: 118 | json.dump(train_data, outfile) 119 | with open(test_path, 'w') as outfile: 120 | json.dump(test_data, outfile) 121 | 122 | 123 | if __name__ == "__main__": 124 | main() 125 | 126 | -------------------------------------------------------------------------------- /data/synthetic_0_0/synthetic_0_0.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/data/synthetic_0_0/synthetic_0_0.zip -------------------------------------------------------------------------------- /data/synthetic_1_1/README.md: -------------------------------------------------------------------------------- 1 | ``` 2 | python generate_synthetic.py 3 | ``` -------------------------------------------------------------------------------- /data/synthetic_1_1/generate_synthetic.py: -------------------------------------------------------------------------------- 1 | import json 2 | import math 3 | import numpy as np 4 | import os 5 | import sys 6 | import random 7 | from tqdm import trange 8 | import math 9 | 10 | 11 | NUM_USER = 30 12 | 13 | 14 | def softmax(x): 15 | ex = np.exp(x) 16 | sum_ex = np.sum( np.exp(x)) 17 | return ex/sum_ex 18 | 19 | 20 | def generate_synthetic(alpha, beta, iid): 21 | 22 | dimension = 60 23 | NUM_CLASS = 10 24 | 25 | samples_per_user = np.random.lognormal(4, 2, (NUM_USER)).astype(int) + 50 26 | print(samples_per_user) 27 | num_samples = np.sum(samples_per_user) 28 | 29 | X_split = [[] for _ in range(NUM_USER)] 30 | y_split = [[] for _ in range(NUM_USER)] 31 | 32 | 33 | #### define some eprior #### 34 | mean_W = np.random.normal(0, alpha, NUM_USER) 35 | mean_b = mean_W 36 | B = np.random.normal(0, beta, NUM_USER) 37 | mean_x = np.zeros((NUM_USER, dimension)) 38 | 39 | diagonal = np.zeros(dimension) 40 | for j in range(dimension): 41 | diagonal[j] = np.power((j+1), -1.2) 42 | cov_x = np.diag(diagonal) 43 | 44 | for i in range(NUM_USER): 45 | mean_x[i] = np.random.normal(B[i], 1, dimension) 46 | print(mean_x[i]) 47 | 48 | 49 | for i in range(NUM_USER): 50 | 51 | W = np.random.normal(mean_W[i], 1, (dimension, NUM_CLASS)) 52 | b = np.random.normal(mean_b[i], 1, NUM_CLASS) 53 | 54 | xx = np.random.multivariate_normal(mean_x[i], cov_x, samples_per_user[i]) 55 | yy = np.zeros(samples_per_user[i]) 56 | 57 | for j in range(samples_per_user[i]): 58 | tmp = np.dot(xx[j], W) + b 59 | yy[j] = np.argmax(softmax(tmp)) 60 | 61 | X_split[i] = xx.tolist() 62 | y_split[i] = yy.tolist() 63 | 64 | print("{}-th users has {} exampls".format(i, len(y_split[i]))) 65 | 66 | 67 | return X_split, y_split 68 | 69 | 70 | 71 | def main(): 72 | 73 | 74 | train_path = "data/train/mytrain.json" 75 | test_path = "data/test/mytest.json" 76 | 77 | X, y = generate_synthetic(alpha=1, beta=1, iid=0) # synthetic (1,1) 78 | 79 | 80 | # Create data structure 81 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]} 82 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]} 83 | 84 | for i in trange(NUM_USER, ncols=120): 85 | 86 | uname = 'f_{0:05d}'.format(i) 87 | combined = list(zip(X[i], y[i])) 88 | random.shuffle(combined) 89 | X[i][:], y[i][:] = zip(*combined) 90 | num_samples = len(X[i]) 91 | train_len = int(0.9 * num_samples) 92 | test_len = num_samples - train_len 93 | 94 | train_data['users'].append(uname) 95 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]} 96 | train_data['num_samples'].append(train_len) 97 | test_data['users'].append(uname) 98 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]} 99 | test_data['num_samples'].append(test_len) 100 | 101 | 102 | with open(train_path,'w') as outfile: 103 | json.dump(train_data, outfile) 104 | with open(test_path, 'w') as outfile: 105 | json.dump(test_data, outfile) 106 | 107 | 108 | if __name__ == "__main__": 109 | main() 110 | 111 | -------------------------------------------------------------------------------- /data/synthetic_iid/README.md: -------------------------------------------------------------------------------- 1 | ``` 2 | python generate_synthetic.py 3 | ``` -------------------------------------------------------------------------------- /data/synthetic_iid/generate_iid.py: -------------------------------------------------------------------------------- 1 | import json, math, os, sys 2 | import numpy as np 3 | import random 4 | from tqdm import trange 5 | 6 | 7 | NUM_USER = 30 8 | 9 | def softmax(x): 10 | ex = np.exp(x) 11 | sum_ex = np.sum(np.exp(x)) 12 | return ex/sum_ex 13 | 14 | def generate_synthetic(alpha, beta, iid): 15 | dimension = 60 16 | NUM_CLASS = 10 17 | 18 | samples_per_user = np.random.lognormal(4, 2, (NUM_USER)).astype(int) + 50 19 | print(samples_per_user) 20 | num_samples = np.sum(samples_per_user) 21 | 22 | X_split = [[] for _ in range(NUM_USER)] 23 | y_split = [[] for _ in range(NUM_USER)] 24 | 25 | #### define some eprior #### 26 | mean_x = np.zeros((NUM_USER, dimension)) 27 | 28 | diagonal = np.zeros(dimension) 29 | for j in range(dimension): 30 | diagonal[j] = np.power((j+1), -1.2) 31 | cov_x = np.diag(diagonal) 32 | 33 | for i in range(NUM_USER): 34 | mean_x[i] = np.zeros(dimension) 35 | 36 | W = np.random.normal(0, 1, (dimension, NUM_CLASS)) 37 | b = np.random.normal(0, 1, NUM_CLASS) 38 | 39 | for i in range(NUM_USER): 40 | xx = np.random.multivariate_normal(mean_x[i], cov_x, samples_per_user[i]) 41 | yy = np.zeros(samples_per_user[i]) 42 | 43 | for j in range(samples_per_user[i]): 44 | tmp = np.dot(xx[j], W) + b 45 | yy[j] = np.argmax(softmax(tmp)) 46 | 47 | X_split[i] = xx.tolist() 48 | y_split[i] = yy.tolist() 49 | 50 | print("{}-th users has {} exampls".format(i, len(y_split[i]))) 51 | 52 | return X_split, y_split 53 | 54 | 55 | 56 | def main(): 57 | train_path = "data/train/mytrain.json" 58 | test_path = "data/test/mytest.json" 59 | 60 | X, y = generate_synthetic(alpha=0, beta=0, iid=1) 61 | 62 | # Create data structure 63 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]} 64 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]} 65 | 66 | for i in trange(NUM_USER, ncols=120): 67 | 68 | uname = 'f_{0:05d}'.format(i) 69 | combined = list(zip(X[i], y[i])) 70 | random.shuffle(combined) 71 | X[i][:], y[i][:] = zip(*combined) 72 | num_samples = len(X[i]) 73 | train_len = int(0.9 * num_samples) 74 | test_len = num_samples - train_len 75 | 76 | train_data['users'].append(uname) 77 | train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]} 78 | train_data['num_samples'].append(train_len) 79 | test_data['users'].append(uname) 80 | test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]} 81 | test_data['num_samples'].append(test_len) 82 | 83 | with open(train_path, 'w') as outfile: 84 | json.dump(train_data, outfile) 85 | with open(test_path, 'w') as outfile: 86 | json.dump(test_data, outfile) 87 | 88 | 89 | if __name__ == "__main__": 90 | main() 91 | 92 | -------------------------------------------------------------------------------- /flearn/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/flearn/models/__init__.py -------------------------------------------------------------------------------- /flearn/models/client.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class Client(object): 4 | 5 | def __init__(self, id, group=None, train_data={'x':[],'y':[]}, eval_data={'x':[],'y':[]}, model=None): 6 | self.model = model 7 | self.id = id # integer 8 | self.group = group 9 | self.train_data = {k: np.array(v) for k, v in train_data.items()} 10 | self.eval_data = {k: np.array(v) for k, v in eval_data.items()} 11 | self.num_samples = len(self.train_data['y']) 12 | self.test_samples = len(self.eval_data['y']) 13 | 14 | def set_params(self, model_params): 15 | '''set model parameters''' 16 | self.model.set_params(model_params) 17 | 18 | def get_params(self): 19 | '''get model parameters''' 20 | return self.model.get_params() 21 | 22 | def get_grads(self, model_len): 23 | '''get model gradient''' 24 | return self.model.get_gradients(self.train_data, model_len) 25 | 26 | def solve_grad(self): 27 | '''get model gradient with cost''' 28 | bytes_w = self.model.size 29 | grads = self.model.get_gradients(self.train_data) 30 | comp = self.model.flops * self.num_samples 31 | bytes_r = self.model.size 32 | return ((self.num_samples, grads), (bytes_w, comp, bytes_r)) 33 | 34 | def solve_inner(self, num_epochs=1, batch_size=10): 35 | '''Solves local optimization problem 36 | 37 | Return: 38 | 1: num_samples: number of samples used in training 39 | 1: soln: local optimization solution 40 | 2: bytes read: number of bytes received 41 | 2: comp: number of FLOPs executed in training process 42 | 2: bytes_write: number of bytes transmitted 43 | ''' 44 | 45 | bytes_w = self.model.size 46 | soln, comp = self.model.solve_inner(self.train_data, num_epochs, batch_size) 47 | bytes_r = self.model.size 48 | return (self.num_samples, soln), (bytes_w, comp, bytes_r) 49 | 50 | def solve_iters(self, num_iters=1, batch_size=10): 51 | '''Solves local optimization problem 52 | 53 | Return: 54 | 1: num_samples: number of samples used in training 55 | 1: soln: local optimization solution 56 | 2: bytes read: number of bytes received 57 | 2: comp: number of FLOPs executed in training process 58 | 2: bytes_write: number of bytes transmitted 59 | ''' 60 | 61 | bytes_w = self.model.size 62 | soln, comp = self.model.solve_iters(self.train_data, num_iters, batch_size) 63 | bytes_r = self.model.size 64 | return (self.num_samples, soln), (bytes_w, comp, bytes_r) 65 | 66 | def train_error_and_loss(self): 67 | tot_correct, loss = self.model.test(self.train_data) 68 | return tot_correct, loss, self.num_samples 69 | 70 | 71 | def test(self): 72 | '''tests current model on local eval_data 73 | 74 | Return: 75 | tot_correct: total #correct predictions 76 | test_samples: int 77 | ''' 78 | tot_correct, loss = self.model.test(self.eval_data) 79 | return tot_correct, self.test_samples 80 | -------------------------------------------------------------------------------- /flearn/models/goodreads/get_embs.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | 4 | parser = argparse.ArgumentParser() 5 | 6 | parser.add_argument('-f', 7 | help='path to .txt file containing word embedding information;', 8 | type=str, 9 | default='glove.6B.300d.txt') 10 | 11 | args = parser.parse_args() 12 | 13 | lines = [] 14 | with open(args.f, 'r') as inf: 15 | lines = inf.readlines() 16 | lines = [l.split() for l in lines] 17 | vocab = [l[0] for l in lines] 18 | emb_floats = [[float(n) for n in l[1:]] for l in lines] 19 | emb_floats.append([0.0 for _ in range(300)]) # for unknown word 20 | js = {'vocab': vocab, 'emba': emb_floats} 21 | with open('embs.json', 'w') as ouf: 22 | json.dump(js, ouf) 23 | -------------------------------------------------------------------------------- /flearn/models/goodreads/get_embs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd sent140 4 | 5 | if [ ! -f 'glove.6B.300d.txt' ]; then 6 | wget http://nlp.stanford.edu/data/glove.6B.zip 7 | unzip glove.6B.zip 8 | rm glove.6B.50d.txt glove.6B.100d.txt glove.6B.200d.txt glove.6B.zip 9 | fi 10 | 11 | if [ ! -f embs.json ]; then 12 | python3 get_embs.py 13 | fi -------------------------------------------------------------------------------- /flearn/models/goodreads/mclr.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from tqdm import trange 4 | 5 | from flearn.utils.model_utils import batch_data, batch_data_multiple_iters 6 | from flearn.utils.tf_utils import graph_size 7 | from flearn.utils.tf_utils import process_grad 8 | 9 | 10 | class Model(object): 11 | def __init__(self, num_classes, optimizer, seed=1): 12 | 13 | # params 14 | self.num_classes = num_classes 15 | 16 | # create computation graph 17 | self.graph = tf.Graph() 18 | with self.graph.as_default(): 19 | tf.set_random_seed(123 + seed) 20 | self.features, self.labels, self.train_op, self.grads, self.eval_metric_ops, self.loss = self.create_model( 21 | optimizer) 22 | self.saver = tf.train.Saver() 23 | self.sess = tf.Session(graph=self.graph) 24 | 25 | # find memory footprint and compute cost of the model 26 | self.size = graph_size(self.graph) 27 | with self.graph.as_default(): 28 | self.sess.run(tf.global_variables_initializer()) 29 | metadata = tf.RunMetadata() 30 | opts = tf.profiler.ProfileOptionBuilder.float_operation() 31 | self.flops = tf.profiler.profile(self.graph, run_meta=metadata, cmd='scope', options=opts).total_float_ops 32 | 33 | def create_model(self, optimizer): 34 | """Model function for Logistic Regression.""" 35 | features = tf.placeholder(tf.float32, shape=[None, 2517], name='features') 36 | labels = tf.placeholder(tf.int64, shape=[None, ], name='labels') 37 | logits = tf.layers.dense(inputs=features, units=self.num_classes, 38 | kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001)) 39 | predictions = { 40 | "classes": tf.argmax(input=logits, axis=1), 41 | "probabilities": tf.nn.softmax(logits, name="softmax_tensor") 42 | } 43 | loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) 44 | 45 | grads_and_vars = optimizer.compute_gradients(loss) 46 | grads, _ = zip(*grads_and_vars) 47 | train_op = optimizer.apply_gradients(grads_and_vars, global_step=tf.train.get_global_step()) 48 | eval_metric_ops = tf.count_nonzero(tf.equal(labels, predictions["classes"])) 49 | return features, labels, train_op, grads, eval_metric_ops, loss 50 | 51 | def set_params(self, model_params=None): 52 | if model_params is not None: 53 | with self.graph.as_default(): 54 | all_vars = tf.trainable_variables() 55 | for variable, value in zip(all_vars, model_params): 56 | variable.load(value, self.sess) 57 | 58 | def get_params(self): 59 | with self.graph.as_default(): 60 | model_params = self.sess.run(tf.trainable_variables()) 61 | return model_params 62 | 63 | def get_gradients(self, data, model_len): 64 | 65 | grads = np.zeros(model_len) 66 | num_samples = len(data['y']) 67 | 68 | with self.graph.as_default(): 69 | model_grads = self.sess.run(self.grads, 70 | feed_dict={self.features: data['x'], self.labels: data['y']}) 71 | grads = process_grad(model_grads) 72 | 73 | return num_samples, grads 74 | 75 | def solve_inner(self, data, num_epochs=1, batch_size=32): 76 | '''Solves local optimization problem''' 77 | for _ in trange(num_epochs, desc='Epoch: ', leave=False, ncols=120): 78 | for X, y in batch_data(data, batch_size): 79 | with self.graph.as_default(): 80 | self.sess.run(self.train_op, 81 | feed_dict={self.features: X, self.labels: y}) 82 | soln = self.get_params() 83 | comp = num_epochs * (len(data['y']) // batch_size) * batch_size * self.flops 84 | return soln, comp 85 | 86 | def solve_iters(self, data, num_iters=1, batch_size=32): 87 | '''Solves local optimization problem''' 88 | 89 | for X, y in batch_data_multiple_iters(data, batch_size, num_iters): 90 | with self.graph.as_default(): 91 | self.sess.run(self.train_op, feed_dict={self.features: X, self.labels: y}) 92 | soln = self.get_params() 93 | comp = 0 94 | return soln, comp 95 | 96 | def test(self, data): 97 | ''' 98 | Args: 99 | data: dict of the form {'x': [list], 'y': [list]} 100 | ''' 101 | with self.graph.as_default(): 102 | tot_correct, loss = self.sess.run([self.eval_metric_ops, self.loss], 103 | feed_dict={self.features: data['x'], self.labels: data['y']}) 104 | return tot_correct, loss 105 | 106 | def close(self): 107 | self.sess.close() 108 | -------------------------------------------------------------------------------- /flearn/models/mex/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/flearn/models/mex/__init__.py -------------------------------------------------------------------------------- /flearn/models/mex/dnn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from tqdm import trange 4 | 5 | from flearn.utils.model_utils import batch_data, batch_data_multiple_iters 6 | from flearn.utils.tf_utils import graph_size 7 | from flearn.utils.tf_utils import process_grad 8 | 9 | 10 | class Model(object): 11 | 12 | def __init__(self, num_classes, optimizer, seed=1): 13 | 14 | # params 15 | self.num_classes = num_classes 16 | 17 | # create computation graph 18 | self.graph = tf.Graph() 19 | with self.graph.as_default(): 20 | tf.set_random_seed(123 + seed) 21 | self.features, self.labels, self.train_op, self.grads, self.eval_metric_ops, self.loss = self.create_model( 22 | optimizer) 23 | self.saver = tf.train.Saver() 24 | self.sess = tf.Session(graph=self.graph) 25 | 26 | # find memory footprint and compute cost of the model 27 | self.size = graph_size(self.graph) 28 | with self.graph.as_default(): 29 | self.sess.run(tf.global_variables_initializer()) 30 | metadata = tf.RunMetadata() 31 | opts = tf.profiler.ProfileOptionBuilder.float_operation() 32 | self.flops = tf.profiler.profile(self.graph, run_meta=metadata, cmd='scope', options=opts).total_float_ops 33 | 34 | def create_model(self, optimizer): 35 | """Model function for Deep neural network.""" 36 | features = tf.placeholder(tf.float32, shape=[None, 1280], name='features') 37 | labels = tf.placeholder(tf.int64, shape=[None, ], name='labels') 38 | 39 | second_layer = tf.layers.dense(units=1280, activation='relu', inputs=features) 40 | third_layer = tf.layers.dense(units=640, activation='relu', inputs=second_layer) 41 | fourth_layer = tf.layers.dense(units=120, activation='relu', inputs=third_layer) 42 | 43 | # second_layer = tf.layers.dense(units=1280, activation='relu', inputs=features) 44 | # second_layer_bn = tf.layers.batch_normalization(second_layer) 45 | # third_layer = tf.layers.dense(units=640, activation='relu', inputs=second_layer_bn) 46 | # third_layer_bn = tf.layers.batch_normalization(third_layer) 47 | # fourth_layer = tf.layers.dense(units=120, activation='relu', inputs=third_layer_bn) 48 | # fourth_layer_bn = tf.layers.batch_normalization(fourth_layer) 49 | 50 | logits = tf.layers.dense(inputs=fourth_layer, units=self.num_classes, kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001)) 51 | 52 | predictions = { 53 | "classes": tf.argmax(input=logits, axis=1), 54 | "probabilities": tf.nn.softmax(logits, name="softmax_tensor") 55 | } 56 | loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) 57 | 58 | grads_and_vars = optimizer.compute_gradients(loss) 59 | grads, _ = zip(*grads_and_vars) 60 | train_op = optimizer.apply_gradients(grads_and_vars, global_step=tf.train.get_global_step()) 61 | eval_metric_ops = tf.count_nonzero(tf.equal(labels, predictions["classes"])) 62 | return features, labels, train_op, grads, eval_metric_ops, loss 63 | 64 | def set_params(self, model_params=None): 65 | if model_params is not None: 66 | with self.graph.as_default(): 67 | all_vars = tf.trainable_variables() 68 | for variable, value in zip(all_vars, model_params): 69 | variable.load(value, self.sess) 70 | 71 | def get_params(self): 72 | with self.graph.as_default(): 73 | model_params = self.sess.run(tf.trainable_variables()) 74 | return model_params 75 | 76 | def get_gradients(self, data, model_len): 77 | 78 | grads = np.zeros(model_len) 79 | num_samples = len(data['y']) 80 | 81 | with self.graph.as_default(): 82 | model_grads = self.sess.run(self.grads, 83 | feed_dict={self.features: data['x'], self.labels: data['y']}) 84 | grads = process_grad(model_grads) 85 | 86 | return num_samples, grads 87 | 88 | def solve_inner(self, data, num_epochs=1, batch_size=32): 89 | '''Solves local optimization problem''' 90 | for _ in trange(num_epochs, desc='Epoch: ', leave=False, ncols=120): 91 | for X, y in batch_data(data, batch_size): 92 | with self.graph.as_default(): 93 | self.sess.run(self.train_op, 94 | feed_dict={self.features: X, self.labels: y}) 95 | soln = self.get_params() 96 | comp = num_epochs * (len(data['y']) // batch_size) * batch_size * self.flops 97 | return soln, comp 98 | 99 | def solve_iters(self, data, num_iters=1, batch_size=32): 100 | '''Solves local optimization problem''' 101 | 102 | for X, y in batch_data_multiple_iters(data, batch_size, num_iters): 103 | with self.graph.as_default(): 104 | self.sess.run(self.train_op, feed_dict={self.features: X, self.labels: y}) 105 | soln = self.get_params() 106 | comp = 0 107 | return soln, comp 108 | 109 | def test(self, data): 110 | ''' 111 | Args: 112 | data: dict of the form {'x': [list], 'y': [list]} 113 | ''' 114 | with self.graph.as_default(): 115 | tot_correct, loss = self.sess.run([self.eval_metric_ops, self.loss], 116 | feed_dict={self.features: data['x'], self.labels: data['y']}) 117 | return tot_correct, loss 118 | 119 | def close(self): 120 | self.sess.close() 121 | -------------------------------------------------------------------------------- /flearn/models/mex/mclr.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from tqdm import trange 4 | 5 | from flearn.utils.model_utils import batch_data, batch_data_multiple_iters 6 | from flearn.utils.tf_utils import graph_size 7 | from flearn.utils.tf_utils import process_grad 8 | 9 | 10 | class Model(object): 11 | 12 | 13 | def __init__(self, num_classes, optimizer, seed=1): 14 | 15 | # params 16 | self.num_classes = num_classes 17 | 18 | # create computation graph 19 | self.graph = tf.Graph() 20 | with self.graph.as_default(): 21 | tf.set_random_seed(123+seed) 22 | self.features, self.labels, self.train_op, self.grads, self.eval_metric_ops, self.loss = self.create_model(optimizer) 23 | self.saver = tf.train.Saver() 24 | self.sess = tf.Session(graph=self.graph) 25 | 26 | # find memory footprint and compute cost of the model 27 | self.size = graph_size(self.graph) 28 | with self.graph.as_default(): 29 | self.sess.run(tf.global_variables_initializer()) 30 | metadata = tf.RunMetadata() 31 | opts = tf.profiler.ProfileOptionBuilder.float_operation() 32 | self.flops = tf.profiler.profile(self.graph, run_meta=metadata, cmd='scope', options=opts).total_float_ops 33 | 34 | def create_model(self, optimizer): 35 | """Model function for Logistic Regression.""" 36 | features = tf.placeholder(tf.float32, shape=[None, 1280], name='features') 37 | labels = tf.placeholder(tf.int64, shape=[None,], name='labels') 38 | logits = tf.layers.dense(inputs=features, units=self.num_classes, kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001)) 39 | predictions = { 40 | "classes": tf.argmax(input=logits, axis=1), 41 | "probabilities": tf.nn.softmax(logits, name="softmax_tensor") 42 | } 43 | loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) 44 | 45 | grads_and_vars = optimizer.compute_gradients(loss) 46 | grads, _ = zip(*grads_and_vars) 47 | train_op = optimizer.apply_gradients(grads_and_vars, global_step=tf.train.get_global_step()) 48 | eval_metric_ops = tf.count_nonzero(tf.equal(labels, predictions["classes"])) 49 | return features, labels, train_op, grads, eval_metric_ops, loss 50 | 51 | def set_params(self, model_params=None): 52 | if model_params is not None: 53 | with self.graph.as_default(): 54 | all_vars = tf.trainable_variables() 55 | for variable, value in zip(all_vars, model_params): 56 | variable.load(value, self.sess) 57 | 58 | def get_params(self): 59 | with self.graph.as_default(): 60 | model_params = self.sess.run(tf.trainable_variables()) 61 | return model_params 62 | 63 | def get_gradients(self, data, model_len): 64 | 65 | grads = np.zeros(model_len) 66 | num_samples = len(data['y']) 67 | 68 | with self.graph.as_default(): 69 | model_grads = self.sess.run(self.grads, 70 | feed_dict={self.features: data['x'], self.labels: data['y']}) 71 | grads = process_grad(model_grads) 72 | 73 | return num_samples, grads 74 | 75 | def solve_inner(self, data, num_epochs=1, batch_size=32): 76 | '''Solves local optimization problem''' 77 | for _ in trange(num_epochs, desc='Epoch: ', leave=False, ncols=120): 78 | for X, y in batch_data(data, batch_size): 79 | with self.graph.as_default(): 80 | self.sess.run(self.train_op, 81 | feed_dict={self.features: X, self.labels: y}) 82 | soln = self.get_params() 83 | comp = num_epochs * (len(data['y'])//batch_size) * batch_size * self.flops 84 | return soln, comp 85 | 86 | def solve_iters(self, data, num_iters=1, batch_size=32): 87 | '''Solves local optimization problem''' 88 | 89 | for X, y in batch_data_multiple_iters(data, batch_size, num_iters): 90 | with self.graph.as_default(): 91 | self.sess.run(self.train_op, feed_dict={self.features: X, self.labels: y}) 92 | soln = self.get_params() 93 | comp = 0 94 | return soln, comp 95 | 96 | def test(self, data): 97 | ''' 98 | Args: 99 | data: dict of the form {'x': [list], 'y': [list]} 100 | ''' 101 | with self.graph.as_default(): 102 | tot_correct, loss = self.sess.run([self.eval_metric_ops, self.loss], 103 | feed_dict={self.features: data['x'], self.labels: data['y']}) 104 | return tot_correct, loss 105 | 106 | def close(self): 107 | self.sess.close() 108 | -------------------------------------------------------------------------------- /flearn/models/mnist/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/flearn/models/mnist/__init__.py -------------------------------------------------------------------------------- /flearn/models/mnist/cnn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from tqdm import trange 4 | 5 | from flearn.utils.model_utils import batch_data, batch_data_multiple_iters 6 | from flearn.utils.tf_utils import graph_size 7 | from flearn.utils.tf_utils import process_grad 8 | 9 | 10 | class Model(object): 11 | ''' 12 | Assumes that images are 28px by 28px 13 | ''' 14 | 15 | def __init__(self, num_classes, optimizer, seed=1): 16 | 17 | # params 18 | self.num_classes = num_classes 19 | 20 | # create computation graph 21 | self.graph = tf.Graph() 22 | with self.graph.as_default(): 23 | tf.set_random_seed(123 + seed) 24 | self.features, self.labels, self.train_op, self.grads, self.eval_metric_ops, self.loss = self.create_model( 25 | optimizer) 26 | self.saver = tf.train.Saver() 27 | self.sess = tf.Session(graph=self.graph) 28 | 29 | # find memory footprint and compute cost of the model 30 | self.size = graph_size(self.graph) 31 | with self.graph.as_default(): 32 | self.sess.run(tf.global_variables_initializer()) 33 | metadata = tf.RunMetadata() 34 | opts = tf.profiler.ProfileOptionBuilder.float_operation() 35 | self.flops = tf.profiler.profile(self.graph, run_meta=metadata, cmd='scope', options=opts).total_float_ops 36 | 37 | def create_model(self, optimizer): 38 | """Model function for CNN.""" 39 | features = tf.placeholder(tf.float32, shape=[None, 28 * 28], name='features') 40 | labels = tf.placeholder(tf.int64, shape=[None], name='labels') 41 | input_layer = tf.reshape(features, [-1, 28, 28, 1]) 42 | conv1 = tf.layers.conv2d( 43 | inputs=input_layer, 44 | filters=32, 45 | kernel_size=[5, 5], 46 | padding="same", 47 | activation=tf.nn.relu) 48 | pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2) 49 | conv2 = tf.layers.conv2d( 50 | inputs=pool1, 51 | filters=64, 52 | kernel_size=[5, 5], 53 | padding="same", 54 | activation=tf.nn.relu) 55 | pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2) 56 | pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64]) 57 | dense = tf.layers.dense(inputs=pool2_flat, units=2048, activation=tf.nn.relu) 58 | logits = tf.layers.dense(inputs=dense, units=self.num_classes) 59 | predictions = { 60 | "classes": tf.argmax(input=logits, axis=1), 61 | "probabilities": tf.nn.softmax(logits, name="softmax_tensor") 62 | } 63 | loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) 64 | grads_and_vars = optimizer.compute_gradients(loss) 65 | grads, _ = zip(*grads_and_vars) 66 | train_op = optimizer.apply_gradients(grads_and_vars, global_step=tf.train.get_global_step()) 67 | 68 | eval_metric_ops = tf.count_nonzero(tf.equal(labels, predictions["classes"])) 69 | return features, labels, train_op, grads, eval_metric_ops, loss 70 | 71 | def set_params(self, model_params=None): 72 | if model_params is not None: 73 | with self.graph.as_default(): 74 | all_vars = tf.trainable_variables() 75 | for variable, value in zip(all_vars, model_params): 76 | variable.load(value, self.sess) 77 | 78 | def get_params(self): 79 | with self.graph.as_default(): 80 | model_params = self.sess.run(tf.trainable_variables()) 81 | return model_params 82 | 83 | def get_gradients(self, data, model_len): 84 | 85 | grads = np.zeros(model_len) 86 | num_samples = len(data['y']) 87 | 88 | with self.graph.as_default(): 89 | model_grads = self.sess.run(self.grads, 90 | feed_dict={self.features: data['x'], self.labels: data['y']}) 91 | grads = process_grad(model_grads) 92 | 93 | return num_samples, grads 94 | 95 | def solve_inner(self, data, num_epochs=1, batch_size=32): 96 | '''Solves local optimization problem''' 97 | for _ in trange(num_epochs, desc='Epoch: ', leave=False, ncols=120): 98 | for X, y in batch_data(data, batch_size): 99 | with self.graph.as_default(): 100 | self.sess.run(self.train_op, 101 | feed_dict={self.features: X, self.labels: y}) 102 | soln = self.get_params() 103 | comp = num_epochs * (len(data['y']) // batch_size) * batch_size * self.flops 104 | return soln, comp 105 | 106 | def solve_iters(self, data, num_iters=1, batch_size=32): 107 | '''Solves local optimization problem''' 108 | 109 | for X, y in batch_data_multiple_iters(data, batch_size, num_iters): 110 | with self.graph.as_default(): 111 | self.sess.run(self.train_op, feed_dict={self.features: X, self.labels: y}) 112 | soln = self.get_params() 113 | comp = 0 114 | return soln, comp 115 | 116 | def test(self, data): 117 | ''' 118 | Args: 119 | data: dict of the form {'x': [list], 'y': [list]} 120 | ''' 121 | with self.graph.as_default(): 122 | tot_correct, loss = self.sess.run([self.eval_metric_ops, self.loss], 123 | feed_dict={self.features: data['x'], self.labels: data['y']}) 124 | return tot_correct, loss 125 | 126 | def close(self): 127 | self.sess.close() 128 | -------------------------------------------------------------------------------- /flearn/models/mnist/mclr.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from tqdm import trange 4 | 5 | from flearn.utils.model_utils import batch_data, batch_data_multiple_iters 6 | from flearn.utils.tf_utils import graph_size 7 | from flearn.utils.tf_utils import process_grad 8 | 9 | 10 | class Model(object): 11 | ''' 12 | Assumes that images are 28px by 28px 13 | ''' 14 | 15 | def __init__(self, num_classes, optimizer, seed=1): 16 | 17 | # params 18 | self.num_classes = num_classes 19 | 20 | # create computation graph 21 | self.graph = tf.Graph() 22 | with self.graph.as_default(): 23 | tf.set_random_seed(123+seed) 24 | self.features, self.labels, self.train_op, self.grads, self.eval_metric_ops, self.loss = self.create_model(optimizer) 25 | self.saver = tf.train.Saver() 26 | self.sess = tf.Session(graph=self.graph) 27 | 28 | # find memory footprint and compute cost of the model 29 | self.size = graph_size(self.graph) 30 | with self.graph.as_default(): 31 | self.sess.run(tf.global_variables_initializer()) 32 | metadata = tf.RunMetadata() 33 | opts = tf.profiler.ProfileOptionBuilder.float_operation() 34 | self.flops = tf.profiler.profile(self.graph, run_meta=metadata, cmd='scope', options=opts).total_float_ops 35 | 36 | def create_model(self, optimizer): 37 | """Model function for Logistic Regression.""" 38 | features = tf.placeholder(tf.float32, shape=[None, 784], name='features') 39 | labels = tf.placeholder(tf.int64, shape=[None,], name='labels') 40 | logits = tf.layers.dense(inputs=features, units=self.num_classes, kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001)) 41 | predictions = { 42 | "classes": tf.argmax(input=logits, axis=1), 43 | "probabilities": tf.nn.softmax(logits, name="softmax_tensor") 44 | } 45 | loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) 46 | 47 | grads_and_vars = optimizer.compute_gradients(loss) 48 | grads, _ = zip(*grads_and_vars) 49 | train_op = optimizer.apply_gradients(grads_and_vars, global_step=tf.train.get_global_step()) 50 | eval_metric_ops = tf.count_nonzero(tf.equal(labels, predictions["classes"])) 51 | return features, labels, train_op, grads, eval_metric_ops, loss 52 | 53 | def set_params(self, model_params=None): 54 | if model_params is not None: 55 | with self.graph.as_default(): 56 | all_vars = tf.trainable_variables() 57 | for variable, value in zip(all_vars, model_params): 58 | variable.load(value, self.sess) 59 | 60 | def get_params(self): 61 | with self.graph.as_default(): 62 | model_params = self.sess.run(tf.trainable_variables()) 63 | return model_params 64 | 65 | def get_gradients(self, data, model_len): 66 | 67 | grads = np.zeros(model_len) 68 | num_samples = len(data['y']) 69 | 70 | with self.graph.as_default(): 71 | model_grads = self.sess.run(self.grads, 72 | feed_dict={self.features: data['x'], self.labels: data['y']}) 73 | grads = process_grad(model_grads) 74 | 75 | return num_samples, grads 76 | 77 | def solve_inner(self, data, num_epochs=1, batch_size=32): 78 | '''Solves local optimization problem''' 79 | for _ in trange(num_epochs, desc='Epoch: ', leave=False, ncols=120): 80 | for X, y in batch_data(data, batch_size): 81 | with self.graph.as_default(): 82 | self.sess.run(self.train_op, 83 | feed_dict={self.features: X, self.labels: y}) 84 | soln = self.get_params() 85 | comp = num_epochs * (len(data['y'])//batch_size) * batch_size * self.flops 86 | return soln, comp 87 | 88 | def solve_iters(self, data, num_iters=1, batch_size=32): 89 | '''Solves local optimization problem''' 90 | 91 | for X, y in batch_data_multiple_iters(data, batch_size, num_iters): 92 | with self.graph.as_default(): 93 | self.sess.run(self.train_op, feed_dict={self.features: X, self.labels: y}) 94 | soln = self.get_params() 95 | comp = 0 96 | return soln, comp 97 | 98 | def test(self, data): 99 | ''' 100 | Args: 101 | data: dict of the form {'x': [list], 'y': [list]} 102 | ''' 103 | with self.graph.as_default(): 104 | tot_correct, loss = self.sess.run([self.eval_metric_ops, self.loss], 105 | feed_dict={self.features: data['x'], self.labels: data['y']}) 106 | return tot_correct, loss 107 | 108 | def close(self): 109 | self.sess.close() 110 | -------------------------------------------------------------------------------- /flearn/models/nist/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/flearn/models/nist/__init__.py -------------------------------------------------------------------------------- /flearn/models/nist/cnn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from tqdm import trange 4 | 5 | from flearn.utils.model_utils import batch_data, batch_data_multiple_iters 6 | from flearn.utils.tf_utils import graph_size 7 | from flearn.utils.tf_utils import process_grad 8 | 9 | 10 | class Model(object): 11 | ''' 12 | Assumes that images are 28px by 28px 13 | ''' 14 | 15 | def __init__(self, num_classes, optimizer, seed=1): 16 | 17 | # params 18 | self.num_classes = num_classes 19 | 20 | # create computation graph 21 | self.graph = tf.Graph() 22 | with self.graph.as_default(): 23 | tf.set_random_seed(123 + seed) 24 | self.features, self.labels, self.train_op, self.grads, self.eval_metric_ops, self.loss = self.create_model( 25 | optimizer) 26 | self.saver = tf.train.Saver() 27 | self.sess = tf.Session(graph=self.graph) 28 | 29 | # find memory footprint and compute cost of the model 30 | self.size = graph_size(self.graph) 31 | with self.graph.as_default(): 32 | self.sess.run(tf.global_variables_initializer()) 33 | metadata = tf.RunMetadata() 34 | opts = tf.profiler.ProfileOptionBuilder.float_operation() 35 | self.flops = tf.profiler.profile(self.graph, run_meta=metadata, cmd='scope', options=opts).total_float_ops 36 | 37 | def create_model(self, optimizer): 38 | """Model function for CNN.""" 39 | features = tf.placeholder(tf.float32, shape=[None, 28 * 28], name='features') 40 | labels = tf.placeholder(tf.int64, shape=[None], name='labels') 41 | input_layer = tf.reshape(features, [-1, 28, 28, 1]) 42 | conv1 = tf.layers.conv2d( 43 | inputs=input_layer, 44 | filters=32, 45 | kernel_size=[5, 5], 46 | padding="same", 47 | activation=tf.nn.relu) 48 | pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2) 49 | conv2 = tf.layers.conv2d( 50 | inputs=pool1, 51 | filters=64, 52 | kernel_size=[5, 5], 53 | padding="same", 54 | activation=tf.nn.relu) 55 | pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2) 56 | pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64]) 57 | dense = tf.layers.dense(inputs=pool2_flat, units=2048, activation=tf.nn.relu) 58 | logits = tf.layers.dense(inputs=dense, units=self.num_classes) 59 | predictions = { 60 | "classes": tf.argmax(input=logits, axis=1), 61 | "probabilities": tf.nn.softmax(logits, name="softmax_tensor") 62 | } 63 | loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) 64 | grads_and_vars = optimizer.compute_gradients(loss) 65 | grads, _ = zip(*grads_and_vars) 66 | train_op = optimizer.apply_gradients(grads_and_vars, global_step=tf.train.get_global_step()) 67 | 68 | eval_metric_ops = tf.count_nonzero(tf.equal(labels, predictions["classes"])) 69 | return features, labels, train_op, grads, eval_metric_ops, loss 70 | 71 | def set_params(self, model_params=None): 72 | if model_params is not None: 73 | with self.graph.as_default(): 74 | all_vars = tf.trainable_variables() 75 | for variable, value in zip(all_vars, model_params): 76 | variable.load(value, self.sess) 77 | 78 | def get_params(self): 79 | with self.graph.as_default(): 80 | model_params = self.sess.run(tf.trainable_variables()) 81 | return model_params 82 | 83 | def get_gradients(self, data, model_len): 84 | 85 | grads = np.zeros(model_len) 86 | num_samples = len(data['y']) 87 | 88 | with self.graph.as_default(): 89 | model_grads = self.sess.run(self.grads, 90 | feed_dict={self.features: data['x'], self.labels: data['y']}) 91 | grads = process_grad(model_grads) 92 | 93 | return num_samples, grads 94 | 95 | def solve_inner(self, data, num_epochs=1, batch_size=32): 96 | '''Solves local optimization problem''' 97 | for _ in trange(num_epochs, desc='Epoch: ', leave=False, ncols=120): 98 | for X, y in batch_data(data, batch_size): 99 | with self.graph.as_default(): 100 | self.sess.run(self.train_op, feed_dict={self.features: X, self.labels: y}) 101 | soln = self.get_params() 102 | comp = num_epochs * (len(data['y']) // batch_size) * batch_size * self.flops 103 | return soln, comp 104 | 105 | def solve_iters(self, data, num_iters=1, batch_size=32): 106 | '''Solves local optimization problem''' 107 | 108 | for X, y in batch_data_multiple_iters(data, batch_size, num_iters): 109 | with self.graph.as_default(): 110 | self.sess.run(self.train_op, feed_dict={self.features: X, self.labels: y}) 111 | soln = self.get_params() 112 | comp = 0 113 | return soln, comp 114 | 115 | def test(self, data): 116 | ''' 117 | Args: 118 | data: dict of the form {'x': [list], 'y': [list]} 119 | ''' 120 | with self.graph.as_default(): 121 | tot_correct, loss = self.sess.run([self.eval_metric_ops, self.loss], 122 | feed_dict={self.features: data['x'], self.labels: data['y']}) 123 | return tot_correct, loss 124 | 125 | def close(self): 126 | self.sess.close() 127 | -------------------------------------------------------------------------------- /flearn/models/nist/mclr.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from tqdm import trange 4 | 5 | from flearn.utils.model_utils import batch_data, batch_data_multiple_iters 6 | from flearn.utils.tf_utils import graph_size 7 | from flearn.utils.tf_utils import process_grad 8 | 9 | 10 | class Model(object): 11 | ''' 12 | Assumes that images are 28px by 28px 13 | ''' 14 | 15 | def __init__(self, num_classes, optimizer, seed=1): 16 | 17 | # params 18 | self.num_classes = num_classes 19 | 20 | # create computation graph 21 | self.graph = tf.Graph() 22 | with self.graph.as_default(): 23 | tf.set_random_seed(123+seed) 24 | self.features, self.labels, self.train_op, self.grads, self.eval_metric_ops, self.loss = self.create_model(optimizer) 25 | self.saver = tf.train.Saver() 26 | self.sess = tf.Session(graph=self.graph) 27 | 28 | # find memory footprint and compute cost of the model 29 | self.size = graph_size(self.graph) 30 | with self.graph.as_default(): 31 | self.sess.run(tf.global_variables_initializer()) 32 | metadata = tf.RunMetadata() 33 | opts = tf.profiler.ProfileOptionBuilder.float_operation() 34 | self.flops = tf.profiler.profile(self.graph, run_meta=metadata, cmd='scope', options=opts).total_float_ops 35 | 36 | def create_model(self, optimizer): 37 | """Model function for Logistic Regression.""" 38 | features = tf.placeholder(tf.float32, shape=[None, 784], name='features') 39 | labels = tf.placeholder(tf.int64, shape=[None,], name='labels') 40 | logits = tf.layers.dense(inputs=features, units=self.num_classes, kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001)) 41 | predictions = { 42 | "classes": tf.argmax(input=logits, axis=1), 43 | "probabilities": tf.nn.softmax(logits, name="softmax_tensor") 44 | } 45 | loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) 46 | 47 | grads_and_vars = optimizer.compute_gradients(loss) 48 | grads, _ = zip(*grads_and_vars) 49 | train_op = optimizer.apply_gradients(grads_and_vars, global_step=tf.train.get_global_step()) 50 | eval_metric_ops = tf.count_nonzero(tf.equal(labels, predictions["classes"])) 51 | return features, labels, train_op, grads, eval_metric_ops, loss 52 | 53 | def set_params(self, model_params=None): 54 | if model_params is not None: 55 | with self.graph.as_default(): 56 | all_vars = tf.trainable_variables() 57 | for variable, value in zip(all_vars, model_params): 58 | variable.load(value, self.sess) 59 | 60 | def get_params(self): 61 | with self.graph.as_default(): 62 | model_params = self.sess.run(tf.trainable_variables()) 63 | return model_params 64 | 65 | def get_gradients(self, data, model_len): 66 | 67 | grads = np.zeros(model_len) 68 | num_samples = len(data['y']) 69 | 70 | with self.graph.as_default(): 71 | model_grads = self.sess.run(self.grads, 72 | feed_dict={self.features: data['x'], self.labels: data['y']}) 73 | grads = process_grad(model_grads) 74 | 75 | return num_samples, grads 76 | 77 | def solve_inner(self, data, num_epochs=1, batch_size=32): 78 | '''Solves local optimization problem''' 79 | for _ in trange(num_epochs, desc='Epoch: ', leave=False, ncols=120): 80 | for X, y in batch_data(data, batch_size): 81 | with self.graph.as_default(): 82 | self.sess.run(self.train_op, feed_dict={self.features: X, self.labels: y}) 83 | soln = self.get_params() 84 | comp = num_epochs * (len(data['y'])//batch_size) * batch_size * self.flops 85 | return soln, comp 86 | 87 | def solve_iters(self, data, num_iters=1, batch_size=32): 88 | '''Solves local optimization problem''' 89 | 90 | for X, y in batch_data_multiple_iters(data, batch_size, num_iters): 91 | with self.graph.as_default(): 92 | self.sess.run(self.train_op, feed_dict={self.features: X, self.labels: y}) 93 | soln = self.get_params() 94 | comp = 0 95 | return soln, comp 96 | 97 | def test(self, data): 98 | ''' 99 | Args: 100 | data: dict of the form {'x': [list], 'y': [list]} 101 | ''' 102 | with self.graph.as_default(): 103 | tot_correct, loss = self.sess.run([self.eval_metric_ops, self.loss], 104 | feed_dict={self.features: data['x'], self.labels: data['y']}) 105 | return tot_correct, loss 106 | 107 | def close(self): 108 | self.sess.close() 109 | -------------------------------------------------------------------------------- /flearn/models/synthetic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/flearn/models/synthetic/__init__.py -------------------------------------------------------------------------------- /flearn/models/synthetic/mclr.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from flearn.utils.model_utils import batch_data, batch_data_multiple_iters 5 | from flearn.utils.tf_utils import graph_size 6 | from flearn.utils.tf_utils import process_grad 7 | 8 | 9 | class Model(object): 10 | ''' 11 | Assumes that images are 28px by 28px 12 | ''' 13 | 14 | def __init__(self, num_classes, optimizer, seed=1): 15 | 16 | # params 17 | self.num_classes = num_classes 18 | 19 | # create computation graph 20 | self.graph = tf.Graph() 21 | with self.graph.as_default(): 22 | tf.set_random_seed(123+seed) 23 | self.features, self.labels, self.train_op, self.grads, self.eval_metric_ops, self.loss, self.pred = self.create_model(optimizer) 24 | self.saver = tf.train.Saver() 25 | self.sess = tf.Session(graph=self.graph) 26 | 27 | # find memory footprint and compute cost of the model 28 | self.size = graph_size(self.graph) 29 | with self.graph.as_default(): 30 | self.sess.run(tf.global_variables_initializer()) 31 | metadata = tf.RunMetadata() 32 | opts = tf.profiler.ProfileOptionBuilder.float_operation() 33 | self.flops = tf.profiler.profile(self.graph, run_meta=metadata, cmd='scope', options=opts).total_float_ops 34 | 35 | def create_model(self, optimizer): 36 | """Model function for Logistic Regression.""" 37 | features = tf.placeholder(tf.float32, shape=[None, 60], name='features') 38 | labels = tf.placeholder(tf.int64, shape=[None,], name='labels') 39 | logits = tf.layers.dense(inputs=features, units=self.num_classes, kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001)) 40 | predictions = { 41 | "classes": tf.argmax(input=logits, axis=1), 42 | "probabilities": tf.nn.softmax(logits, name="softmax_tensor") 43 | } 44 | loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) 45 | 46 | grads_and_vars = optimizer.compute_gradients(loss) 47 | grads, _ = zip(*grads_and_vars) 48 | train_op = optimizer.apply_gradients(grads_and_vars, global_step=tf.train.get_global_step()) 49 | eval_metric_ops = tf.count_nonzero(tf.equal(labels, predictions["classes"])) 50 | return features, labels, train_op, grads, eval_metric_ops, loss, predictions["classes"] 51 | 52 | def set_params(self, model_params=None): 53 | if model_params is not None: 54 | with self.graph.as_default(): 55 | all_vars = tf.trainable_variables() 56 | for variable, value in zip(all_vars, model_params): 57 | variable.load(value, self.sess) 58 | 59 | def get_params(self): 60 | with self.graph.as_default(): 61 | model_params = self.sess.run(tf.trainable_variables()) 62 | return model_params 63 | 64 | def get_gradients(self, data, model_len): 65 | 66 | grads = np.zeros(model_len) 67 | num_samples = len(data['y']) 68 | 69 | with self.graph.as_default(): 70 | model_grads = self.sess.run(self.grads, 71 | feed_dict={self.features: data['x'], self.labels: data['y']}) 72 | grads = process_grad(model_grads) 73 | 74 | return num_samples, grads 75 | 76 | def solve_inner(self, data, num_epochs=1, batch_size=32): 77 | '''Solves local optimization problem''' 78 | 79 | for _ in range(num_epochs): 80 | for X, y in batch_data(data, batch_size): 81 | with self.graph.as_default(): 82 | self.sess.run(self.train_op, feed_dict={self.features: X, self.labels: y}) 83 | soln = self.get_params() 84 | comp = num_epochs * (len(data['y'])//batch_size) * batch_size * self.flops 85 | return soln, comp 86 | 87 | def solve_iters(self, data, num_iters=1, batch_size=32): 88 | '''Solves local optimization problem''' 89 | 90 | for X, y in batch_data_multiple_iters(data, batch_size, num_iters): 91 | with self.graph.as_default(): 92 | self.sess.run(self.train_op, feed_dict={self.features: X, self.labels: y}) 93 | soln = self.get_params() 94 | comp = 0 95 | return soln, comp 96 | 97 | def test(self, data): 98 | ''' 99 | Args: 100 | data: dict of the form {'x': [list], 'y': [list]} 101 | ''' 102 | with self.graph.as_default(): 103 | tot_correct, loss, pred = self.sess.run([self.eval_metric_ops, self.loss, self.pred], 104 | feed_dict={self.features: data['x'], self.labels: data['y']}) 105 | return tot_correct, loss 106 | 107 | def close(self): 108 | self.sess.close() 109 | -------------------------------------------------------------------------------- /flearn/optimizer/pgd.py: -------------------------------------------------------------------------------- 1 | from tensorflow.python.ops import control_flow_ops 2 | from tensorflow.python.ops import math_ops 3 | from tensorflow.python.ops import state_ops 4 | from tensorflow.python.framework import ops 5 | from tensorflow.python.training import optimizer 6 | import tensorflow as tf 7 | 8 | 9 | class PerturbedGradientDescent(optimizer.Optimizer): 10 | """Implementation of Perturbed Gradient Descent, i.e., FedProx optimizer""" 11 | def __init__(self, learning_rate=0.001, mu=0.01, use_locking=False, name="PGD"): 12 | super(PerturbedGradientDescent, self).__init__(use_locking, name) 13 | self._lr = learning_rate 14 | self._mu = mu 15 | 16 | # Tensor versions of the constructor arguments, created in _prepare(). 17 | self._lr_t = None 18 | self._mu_t = None 19 | 20 | def _prepare(self): 21 | self._lr_t = ops.convert_to_tensor(self._lr, name="learning_rate") 22 | self._mu_t = ops.convert_to_tensor(self._mu, name="prox_mu") 23 | 24 | def _create_slots(self, var_list): 25 | # Create slots for the global solution. 26 | for v in var_list: 27 | self._zeros_slot(v, "vstar", self._name) 28 | 29 | def _apply_dense(self, grad, var): 30 | lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) 31 | mu_t = math_ops.cast(self._mu_t, var.dtype.base_dtype) 32 | vstar = self.get_slot(var, "vstar") 33 | 34 | var_update = state_ops.assign_sub(var, lr_t*(grad + mu_t*(var-vstar))) 35 | 36 | return control_flow_ops.group(*[var_update,]) 37 | 38 | 39 | def _apply_sparse_shared(self, grad, var, indices, scatter_add): 40 | 41 | lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) 42 | mu_t = math_ops.cast(self._mu_t, var.dtype.base_dtype) 43 | vstar = self.get_slot(var, "vstar") 44 | 45 | v_diff = state_ops.assign(vstar, mu_t * (var - vstar), use_locking=self._use_locking) 46 | 47 | with ops.control_dependencies([v_diff]): # run v_diff operation before scatter_add 48 | scaled_grad = scatter_add(vstar, indices, grad) 49 | var_update = state_ops.assign_sub(var, lr_t * scaled_grad) 50 | 51 | return control_flow_ops.group(*[var_update,]) 52 | 53 | def _apply_sparse(self, grad, var): 54 | return self._apply_sparse_shared( 55 | grad.values, var, grad.indices, 56 | lambda x, i, v: state_ops.scatter_add(x, i, v)) 57 | 58 | 59 | def set_params(self, cog, client): 60 | with client.graph.as_default(): 61 | all_vars = tf.trainable_variables() 62 | for variable, value in zip(all_vars, cog): 63 | # print(value) 64 | # print(variable) 65 | vstar = self.get_slot(variable, "vstar") 66 | # print(vstar) 67 | vstar.load(value, client.sess) 68 | -------------------------------------------------------------------------------- /flearn/optimizer/pggd.py: -------------------------------------------------------------------------------- 1 | from tensorflow.python.ops import control_flow_ops 2 | from tensorflow.python.ops import math_ops 3 | from tensorflow.python.ops import state_ops 4 | from tensorflow.python.framework import ops 5 | from tensorflow.python.training import optimizer 6 | import tensorflow as tf 7 | 8 | 9 | class PerGodGradientDescent(optimizer.Optimizer): 10 | """Implementation of Perturbed gold Gradient Descent""" 11 | def __init__(self, learning_rate=0.001, mu=0.01, use_locking=False, name="PGD"): 12 | super(PerGodGradientDescent, self).__init__(use_locking, name) 13 | self._lr = learning_rate 14 | self._mu = mu 15 | 16 | # Tensor versions of the constructor arguments, created in _prepare(). 17 | self._lr_t = None 18 | self._mu_t = None 19 | 20 | def _prepare(self): 21 | self._lr_t = ops.convert_to_tensor(self._lr, name="learning_rate") 22 | self._mu_t = ops.convert_to_tensor(self._mu, name="prox_mu") 23 | 24 | def _create_slots(self, var_list): 25 | # Create slots for the global solution. 26 | for v in var_list: 27 | self._zeros_slot(v, "vstar", self._name) 28 | self._zeros_slot(v, "gold", self._name) 29 | 30 | def _apply_dense(self, grad, var): 31 | lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) 32 | mu_t = math_ops.cast(self._mu_t, var.dtype.base_dtype) 33 | 34 | vstar = self.get_slot(var, "vstar") 35 | gold = self.get_slot(var, "gold") 36 | 37 | var_update = state_ops.assign_sub(var, lr_t*(grad + gold + mu_t*(var-vstar))) #Update 'ref' by subtracting 'value 38 | #Create an op that groups multiple operations. 39 | #When this op finishes, all ops in input have finished 40 | return control_flow_ops.group(*[var_update,]) 41 | 42 | def _apply_sparse(self, grad, var): 43 | raise NotImplementedError("Sparse gradient updates are not supported.") 44 | 45 | def set_params(self, cog, avg_gradient, client): 46 | with client.model.graph.as_default(): 47 | all_vars = tf.trainable_variables() 48 | for variable, value in zip(all_vars, cog): 49 | vstar = self.get_slot(variable, "vstar") 50 | vstar.load(value, client.model.sess) 51 | 52 | # get old gradient 53 | gprev = client.get_grads() 54 | 55 | # Find g_t - F'(old) 56 | gdiff = [g1-g2 for g1,g2 in zip(avg_gradient, gprev)] 57 | 58 | with client.model.graph.as_default(): 59 | all_vars = tf.trainable_variables() 60 | for variable, grad in zip(all_vars, gdiff): 61 | gold = self.get_slot(variable, "gold") 62 | gold.load(grad, client.model.sess) 63 | -------------------------------------------------------------------------------- /flearn/trainers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/flearn/trainers/__init__.py -------------------------------------------------------------------------------- /flearn/trainers/fedavg.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import time 3 | 4 | import numpy as np 5 | from tqdm import trange, tqdm 6 | import tensorflow as tf 7 | 8 | from utils import csv_log 9 | from .fedbase import BaseFedarated 10 | from flearn.utils.tf_utils import process_grad 11 | 12 | 13 | class Server(BaseFedarated): 14 | def __init__(self, params, learner, dataset): 15 | print('Using Federated avg to Train') 16 | self.inner_opt = tf.train.GradientDescentOptimizer(params['learning_rate']) 17 | 18 | # Setup Log 19 | self.params_log = params 20 | # self.run_name = str(params["ex_name"])+"_fedavg_"+ str(datetime.datetime.now().strftime("%m%d-%H%M%S")) 21 | self.run_name = str(params["ex_name"])+"_fedavg" 22 | self.log_main = [] 23 | csv_log.log_start('avg',params,1, self.run_name) 24 | 25 | super(Server, self).__init__(params, learner, dataset) 26 | 27 | def train(self): 28 | '''Train using Federated Proximal''' 29 | print('Training with {} workers ---'.format(self.clients_per_round)) 30 | elapsed = [] 31 | for i in range(self.num_rounds): 32 | # test model 33 | if i % self.eval_every == 0: 34 | stats = self.test() # have set the latest model for all clients 35 | stats_train = self.train_error_and_loss() 36 | 37 | train_loss = np.dot(stats_train[4], stats_train[2]) * 1.0 / np.sum(stats_train[2]) 38 | train_acc = np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2]) 39 | test_acc = np.sum(stats[3]) * 1.0 / np.sum(stats[2]) 40 | 41 | self.log_main.append([i, train_loss, train_acc, test_acc]) 42 | 43 | tqdm.write('At round {} accuracy: {}'.format(i, test_acc )) # testing accuracy 44 | tqdm.write('At round {} training accuracy: {}'.format(i,train_acc )) 45 | tqdm.write('At round {} training loss: {}'.format(i,train_loss )) 46 | 47 | start_time = time.time() 48 | 49 | indices, selected_clients = self.select_clients(i, num_clients=self.clients_per_round) # uniform sampling 50 | np.random.seed(i) 51 | active_clients = np.random.choice(selected_clients, round(self.clients_per_round * (1-self.drop_percent)), replace=False) 52 | 53 | csolns = [] # buffer for receiving client solutions 54 | 55 | for idx, c in enumerate(active_clients.tolist()): # simply drop the slow devices 56 | # communicate the latest model 57 | c.set_params(self.latest_model) 58 | 59 | # solve minimization locally 60 | soln, stats = c.solve_inner(num_epochs=self.num_epochs, batch_size=self.batch_size) 61 | 62 | # gather solutions from client 63 | csolns.append(soln) 64 | 65 | # track communication cost 66 | self.metrics.update(rnd=i, cid=c.id, stats=stats) 67 | 68 | # update models 69 | self.latest_model = self.aggregate(csolns) 70 | elapsed_time = time.time() - start_time 71 | elapsed.append(elapsed_time) 72 | 73 | # final test model 74 | stats = self.test() 75 | stats_train = self.train_error_and_loss() 76 | self.metrics.accuracies.append(stats) 77 | self.metrics.train_accuracies.append(stats_train) 78 | 79 | test_acc = np.sum(stats[3]) * 1.0 / np.sum(stats[2]) 80 | train_acc = np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2]) 81 | 82 | tqdm.write('At round {} accuracy: {}'.format(self.num_rounds, test_acc)) 83 | tqdm.write('At round {} training accuracy: {}'.format(self.num_rounds, train_acc)) 84 | 85 | self.log_main.append([self.num_rounds, train_loss, train_acc, test_acc]) 86 | csv_log.write_all('avg', self.log_main, [], 1, self.run_name) 87 | csv_log.graph_print('avg',self.params_log, 1, self.run_name) 88 | 89 | print("Time Taken Each Round: ") 90 | print(elapsed) 91 | print(np.mean(elapsed)) 92 | csv_log.write_time_taken(elapsed, self.run_name) 93 | 94 | -------------------------------------------------------------------------------- /flearn/trainers/fedbase.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from tqdm import tqdm 4 | 5 | from flearn.models.client import Client 6 | from flearn.utils.model_utils import Metrics 7 | from flearn.utils.tf_utils import process_grad 8 | 9 | class BaseFedarated(object): 10 | def __init__(self, params, learner, dataset): 11 | # transfer parameters to self 12 | for key, val in params.items(): setattr(self, key, val); 13 | 14 | # create worker nodes 15 | tf.reset_default_graph() 16 | self.client_model = learner(*params['model_params'], self.inner_opt, self.seed) 17 | self.clients = self.setup_clients(dataset, self.client_model) 18 | print('{} Clients in Total'.format(len(self.clients))) 19 | self.latest_model = self.client_model.get_params() 20 | self.params_ = params 21 | # initialize system metrics 22 | self.metrics = Metrics(self.clients, params) 23 | 24 | def __del__(self): 25 | self.client_model.close() 26 | 27 | def setup_clients(self, dataset, model=None): 28 | '''instantiates clients based on given train and test data directories 29 | 30 | Return: 31 | list of Clients 32 | ''' 33 | users, groups, train_data, test_data = dataset 34 | if len(groups) == 0: 35 | groups = [None for _ in users] 36 | all_clients = [Client(u, g, train_data[u], test_data[u], model) for u, g in zip(users, groups)] 37 | return all_clients 38 | 39 | def train_error_and_loss(self): 40 | num_samples = [] 41 | tot_correct = [] 42 | losses = [] 43 | 44 | for c in self.clients: 45 | ct, cl, ns = c.train_error_and_loss() 46 | tot_correct.append(ct*1.0) 47 | num_samples.append(ns) 48 | losses.append(cl*1.0) 49 | 50 | ids = [c.id for c in self.clients] 51 | groups = [c.group for c in self.clients] 52 | 53 | return ids, groups, num_samples, tot_correct, losses 54 | 55 | 56 | def show_grads(self): 57 | ''' 58 | Return: 59 | gradients on all workers and the global gradient 60 | ''' 61 | 62 | model_len = process_grad(self.latest_model).size 63 | global_grads = np.zeros(model_len) 64 | 65 | intermediate_grads = [] 66 | samples=[] 67 | 68 | self.client_model.set_params(self.latest_model) 69 | for c in self.clients: 70 | num_samples, client_grads = c.get_grads(self.latest_model) 71 | samples.append(num_samples) 72 | global_grads = np.add(global_grads, client_grads * num_samples) 73 | intermediate_grads.append(client_grads) 74 | 75 | global_grads = global_grads * 1.0 / np.sum(np.asarray(samples)) 76 | intermediate_grads.append(global_grads) 77 | 78 | return intermediate_grads 79 | 80 | 81 | def test(self): 82 | '''tests self.latest_model on given clients 83 | ''' 84 | num_samples = [] 85 | tot_correct = [] 86 | self.client_model.set_params(self.latest_model) 87 | for c in self.clients: 88 | ct, ns = c.test() 89 | tot_correct.append(ct*1.0) 90 | num_samples.append(ns) 91 | ids = [c.id for c in self.clients] 92 | groups = [c.group for c in self.clients] 93 | return ids, groups, num_samples, tot_correct 94 | 95 | def save(self): 96 | pass 97 | 98 | def select_clients(self, round, num_clients=20): 99 | '''selects num_clients clients weighted by number of samples from possible_clients 100 | 101 | Args: 102 | num_clients: number of clients to select; default 20 103 | note that within function, num_clients is set to 104 | min(num_clients, len(possible_clients)) 105 | 106 | Return: 107 | list of selected clients objects 108 | ''' 109 | 110 | num_clients = min(num_clients, len(self.clients)) 111 | np.random.seed(round+self.params_['seed']) # make sure for each comparison, we are selecting the same clients each round 112 | indices = np.random.choice(range(len(self.clients)), num_clients, replace=False) 113 | return indices, np.asarray(self.clients)[indices] 114 | 115 | def aggregate(self, wsolns): 116 | total_weight = 0.0 117 | base = [0]*len(wsolns[0][1]) 118 | for (w, soln) in wsolns: # w is the number of local samples 119 | total_weight += w 120 | for i, v in enumerate(soln): 121 | base[i] += w*v.astype(np.float64) 122 | 123 | averaged_soln = [v / total_weight for v in base] 124 | 125 | return averaged_soln 126 | 127 | -------------------------------------------------------------------------------- /flearn/trainers/feddane.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tqdm import trange, tqdm 3 | import tensorflow as tf 4 | 5 | from .fedbase import BaseFedarated 6 | from flearn.optimizer.pggd import PerGodGradientDescent 7 | from flearn.utils.tf_utils import process_grad, process_sparse_grad 8 | 9 | 10 | class Server(BaseFedarated): 11 | def __init__(self, params, learner, dataset): 12 | print('Using Federated Dane to Train') 13 | self.inner_opt = PerGodGradientDescent(params['learning_rate'], params['mu']) 14 | super(Server, self).__init__(params, learner, dataset) 15 | 16 | def train(self): 17 | '''Train using Federated Proximal''' 18 | print('Training with {} workers ---'.format(self.clients_per_round)) 19 | for i in trange(self.num_rounds, desc='Round: ', ncols=120): 20 | # test model 21 | if i % self.eval_every == 0: 22 | stats = self.test() # have set the latest model for all clients 23 | stats_train = self.train_error_and_loss() 24 | 25 | tqdm.write('At round {} accuracy: {}'.format(i, np.sum(stats[3])*1.0/np.sum(stats[2]))) # testing accuracy 26 | tqdm.write('At round {} training accuracy: {}'.format(i, np.sum(stats_train[3])*1.0/np.sum(stats_train[2]))) 27 | tqdm.write('At round {} training loss: {}'.format(i, np.dot(stats_train[4], stats_train[2])*1.0/np.sum(stats_train[2]))) 28 | 29 | # choose K clients prop to data size 30 | selected_clients = self.select_clients(i, num_clients=self.clients_per_round) 31 | 32 | cgrads = [] # buffer for receiving client solutions 33 | for c in tqdm(selected_clients, desc='Grads: ', leave=False, ncols=120): 34 | # communicate the latest model 35 | c.set_params(self.latest_model) 36 | 37 | # get the gradients 38 | grad, stats = c.solve_grad() 39 | 40 | # gather gradient from client 41 | cgrads.append(grad) 42 | 43 | # Total gradient 44 | avg_gradient = self.aggregate(cgrads) 45 | 46 | # Choose K clients prop to data size 47 | selected_clients = self.select_clients(i, num_clients=self.clients_per_round) 48 | 49 | csolns = [] # buffer for receiving client solutions 50 | for c in tqdm(selected_clients, desc='Solve: ', leave=False, ncols=120): 51 | # communicate the latest model 52 | c.set_params(self.latest_model) # w_{t-1} 53 | 54 | # setup local optimizer 55 | self.inner_opt.set_params(self.latest_model, avg_gradient, c) 56 | 57 | # solve minimization locally 58 | soln, stats = c.solve_inner(num_epochs=self.num_epochs, batch_size=self.batch_size) 59 | 60 | # gather solutions from client 61 | csolns.append(soln) 62 | 63 | # update model 64 | self.latest_model = self.aggregate(csolns) 65 | 66 | # final test model 67 | stats = self.test() 68 | stats_train = self.train_error_and_loss() 69 | tqdm.write('At round {} accuracy: {}'.format(self.num_rounds, np.sum(stats[3])*1.0/np.sum(stats[2]))) 70 | tqdm.write('At round {} training accuracy: {}'.format(self.num_rounds, np.sum(stats_train[3])*1.0/np.sum(stats_train[2]))) 71 | -------------------------------------------------------------------------------- /flearn/trainers/fedprox.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import time 3 | 4 | import numpy as np 5 | from tqdm import trange, tqdm 6 | import tensorflow as tf 7 | 8 | from utils import csv_log 9 | from .fedbase import BaseFedarated 10 | from flearn.optimizer.pgd import PerturbedGradientDescent 11 | from flearn.utils.tf_utils import process_grad, process_sparse_grad 12 | 13 | 14 | class Server(BaseFedarated): 15 | def __init__(self, params, learner, dataset): 16 | print('Using Federated prox to Train') 17 | self.inner_opt = PerturbedGradientDescent(params['learning_rate'], params['mu']) 18 | 19 | # Setup Log 20 | self.params_log = params 21 | # self.run_name = str(params["ex_name"])+"_fedprox_"+ str(datetime.datetime.now().strftime("%m%d-%H%M%S")) 22 | self.run_name = str(params["ex_name"])+"_fedprox" 23 | self.log_main = [] 24 | csv_log.log_start('prox',params,1, self.run_name) 25 | 26 | super(Server, self).__init__(params, learner, dataset) 27 | 28 | def train(self): 29 | '''Train using Federated Proximal''' 30 | print('Training with {} workers ---'.format(self.clients_per_round)) 31 | 32 | elapsed = [] 33 | 34 | for i in range(self.num_rounds): 35 | # test model 36 | if i % self.eval_every == 0: 37 | stats = self.test() # have set the latest model for all clients 38 | stats_train = self.train_error_and_loss() 39 | 40 | test_acc = np.sum(stats[3]) * 1.0 / np.sum(stats[2]) 41 | train_acc = np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2]) 42 | train_loss = np.dot(stats_train[4], stats_train[2]) * 1.0 / np.sum(stats_train[2]) 43 | 44 | tqdm.write('At round {} accuracy: {}'.format(i, test_acc )) # testing accuracy 45 | tqdm.write('At round {} training accuracy: {}'.format(i,train_acc )) 46 | tqdm.write('At round {} training loss: {}'.format(i,train_loss )) 47 | 48 | self.log_main.append([i, train_loss, train_acc, test_acc]) 49 | 50 | start_time = time.time() 51 | 52 | model_len = process_grad(self.latest_model).size 53 | global_grads = np.zeros(model_len) 54 | client_grads = np.zeros(model_len) 55 | num_samples = [] 56 | local_grads = [] 57 | 58 | for c in self.clients: 59 | num, client_grad = c.get_grads(model_len) 60 | local_grads.append(client_grad) 61 | num_samples.append(num) 62 | global_grads = np.add(global_grads, client_grad * num) 63 | global_grads = global_grads * 1.0 / np.sum(np.asarray(num_samples)) 64 | 65 | difference = 0 66 | for idx in range(len(self.clients)): 67 | difference += np.sum(np.square(global_grads - local_grads[idx])) 68 | difference = difference * 1.0 / len(self.clients) 69 | tqdm.write('gradient difference: {}'.format(difference)) 70 | 71 | indices, selected_clients = self.select_clients(i, num_clients=self.clients_per_round) # uniform sampling 72 | np.random.seed(i) # make sure that the stragglers are the same for FedProx and FedAvg 73 | active_clients = np.random.choice(selected_clients, round(self.clients_per_round * (1 - self.drop_percent)), replace=False) 74 | 75 | csolns = [] # buffer for receiving client solutions 76 | 77 | self.inner_opt.set_params(self.latest_model, self.client_model) 78 | 79 | for idx, c in enumerate(selected_clients.tolist()): 80 | # communicate the latest model 81 | c.set_params(self.latest_model) 82 | 83 | total_iters = int(self.num_epochs * c.num_samples / self.batch_size)+2 # randint(low,high)=[low,high) 84 | 85 | # solve minimization locally 86 | if c in active_clients: 87 | soln, stats = c.solve_inner(num_epochs=self.num_epochs, batch_size=self.batch_size) 88 | else: 89 | #soln, stats = c.solve_iters(num_iters=np.random.randint(low=1, high=total_iters), batch_size=self.batch_size) 90 | soln, stats = c.solve_inner(num_epochs=np.random.randint(low=1, high=self.num_epochs), batch_size=self.batch_size) 91 | 92 | # gather solutions from client 93 | csolns.append(soln) 94 | 95 | # track communication cost 96 | self.metrics.update(rnd=i, cid=c.id, stats=stats) 97 | 98 | # update models 99 | self.latest_model = self.aggregate(csolns) 100 | self.client_model.set_params(self.latest_model) 101 | elapsed_time = time.time() - start_time 102 | elapsed.append(elapsed_time) 103 | 104 | # final test model 105 | stats = self.test() 106 | stats_train = self.train_error_and_loss() 107 | self.metrics.accuracies.append(stats) 108 | self.metrics.train_accuracies.append(stats_train) 109 | 110 | test_acc = np.sum(stats[3]) * 1.0 / np.sum(stats[2]) 111 | train_acc = np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2]) 112 | 113 | tqdm.write('At round {} accuracy: {}'.format(self.num_rounds, test_acc)) 114 | tqdm.write('At round {} training accuracy: {}'.format(self.num_rounds, train_acc)) 115 | 116 | self.log_main.append([self.num_rounds, train_loss, train_acc, test_acc]) 117 | csv_log.write_all('prox', self.log_main, [], 1, self.run_name) 118 | csv_log.graph_print('prox',self.params_log, 1, self.run_name) 119 | 120 | # tqdm.write('At round {} accuracy: {}'.format(self.num_rounds, np.sum(stats[3])*1.0/np.sum(stats[2]))) 121 | # tqdm.write('At round {} training accuracy: {}'.format(self.num_rounds, np.sum(stats_train[3])*1.0/np.sum(stats_train[2]))) 122 | print("Time Taken Each Round: ") 123 | print(elapsed) 124 | print(np.mean(elapsed)) 125 | csv_log.write_time_taken(elapsed, self.run_name) 126 | -------------------------------------------------------------------------------- /flearn/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/flearn/utils/__init__.py -------------------------------------------------------------------------------- /flearn/utils/language_utils.py: -------------------------------------------------------------------------------- 1 | """Utils for language models.""" 2 | 3 | import re 4 | 5 | 6 | # ------------------------ 7 | # utils for shakespeare dataset 8 | 9 | ALL_LETTERS = "\n !\"&'(),-.0123456789:;>?ABCDEFGHIJKLMNOPQRSTUVWXYZ[]abcdefghijklmnopqrstuvwxyz}" 10 | NUM_LETTERS = len(ALL_LETTERS) 11 | 12 | 13 | def _one_hot(index, size): 14 | '''returns one-hot vector with given size and value 1 at given index 15 | ''' 16 | vec = [0 for _ in range(size)] 17 | vec[int(index)] = 1 18 | return vec 19 | 20 | 21 | def letter_to_vec(letter): 22 | '''returns one-hot representation of given letter 23 | ''' 24 | index = ALL_LETTERS.find(letter) 25 | return _one_hot(index, NUM_LETTERS) 26 | 27 | 28 | def word_to_indices(word): 29 | '''returns a list of character indices 30 | 31 | Args: 32 | word: string 33 | 34 | Return: 35 | indices: int list with length len(word) 36 | ''' 37 | indices = [] 38 | for c in word: 39 | indices.append(ALL_LETTERS.find(c)) 40 | return indices 41 | 42 | 43 | # ------------------------ 44 | # utils for sent140 dataset 45 | 46 | 47 | def split_line(line): 48 | '''split given line/phrase into list of words 49 | 50 | Args: 51 | line: string representing phrase to be split 52 | 53 | Return: 54 | list of strings, with each string representing a word 55 | ''' 56 | return re.findall(r"[\w']+|[.,!?;]", line) 57 | 58 | 59 | def _word_to_index(word, indd): 60 | '''returns index of given word based on given lookup dictionary 61 | 62 | returns the length of the lookup dictionary if word not found 63 | 64 | Args: 65 | word: string 66 | indd: dictionary with string words as keys and int indices as values 67 | ''' 68 | if word in indd: 69 | return indd[word] 70 | else: 71 | return len(indd) 72 | 73 | 74 | def line_to_indices(line, word2id, max_words=25): 75 | '''converts given phrase into list of word indices 76 | 77 | if the phrase has more than max_words words, returns a list containing 78 | indices of the first max_words words 79 | if the phrase has less than max_words words, repeatedly appends integer 80 | representing unknown index to returned list until the list's length is 81 | max_words 82 | 83 | Args: 84 | line: string representing phrase/sequence of words 85 | word2id: dictionary with string words as keys and int indices as values 86 | max_words: maximum number of word indices in returned list 87 | 88 | Return: 89 | indl: list of word indices, one index for each word in phrase 90 | ''' 91 | unk_id = len(word2id) 92 | line_list = split_line(line) # split phrase in words 93 | indl = [word2id[w] if w in word2id else unk_id for w in line_list[:max_words]] 94 | indl += [unk_id]*(max_words-len(indl)) 95 | return indl 96 | 97 | 98 | def bag_of_words(line, vocab): 99 | '''returns bag of words representation of given phrase using given vocab 100 | 101 | Args: 102 | line: string representing phrase to be parsed 103 | vocab: dictionary with words as keys and indices as values 104 | 105 | Return: 106 | integer list 107 | ''' 108 | bag = [0]*len(vocab) 109 | words = split_line(line) 110 | for w in words: 111 | if w in vocab: 112 | bag[vocab[w]] += 1 113 | return bag 114 | -------------------------------------------------------------------------------- /flearn/utils/model_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | import os 4 | 5 | def batch_data(data, batch_size): 6 | ''' 7 | data is a dict := {'x': [numpy array], 'y': [numpy array]} (on one client) 8 | returns x, y, which are both numpy array of length: batch_size 9 | ''' 10 | data_x = data['x'] 11 | data_y = data['y'] 12 | 13 | # randomly shuffle data 14 | np.random.seed(100) 15 | rng_state = np.random.get_state() 16 | np.random.shuffle(data_x) 17 | np.random.set_state(rng_state) 18 | np.random.shuffle(data_y) 19 | 20 | # loop through mini-batches 21 | for i in range(0, len(data_x), batch_size): 22 | batched_x = data_x[i:i+batch_size] 23 | batched_y = data_y[i:i+batch_size] 24 | yield (batched_x, batched_y) 25 | 26 | def batch_data_multiple_iters(data, batch_size, num_iters): 27 | data_x = data['x'] 28 | data_y = data['y'] 29 | 30 | np.random.seed(100) 31 | rng_state = np.random.get_state() 32 | np.random.shuffle(data_x) 33 | np.random.set_state(rng_state) 34 | np.random.shuffle(data_y) 35 | 36 | idx = 0 37 | 38 | for i in range(num_iters): 39 | if idx+batch_size >= len(data_x): 40 | idx = 0 41 | rng_state = np.random.get_state() 42 | np.random.shuffle(data_x) 43 | np.random.set_state(rng_state) 44 | np.random.shuffle(data_y) 45 | batched_x = data_x[idx: idx+batch_size] 46 | batched_y = data_y[idx: idx+batch_size] 47 | idx += batch_size 48 | yield (batched_x, batched_y) 49 | 50 | def read_data(train_data_dir, test_data_dir): 51 | '''parses data in given train and test data directories 52 | 53 | assumes: 54 | - the data in the input directories are .json files with 55 | keys 'users' and 'user_data' 56 | - the set of train set users is the same as the set of test set users 57 | 58 | Return: 59 | clients: list of client ids 60 | groups: list of group ids; empty list if none found 61 | train_data: dictionary of train data 62 | test_data: dictionary of test data 63 | ''' 64 | clients = [] 65 | groups = [] 66 | train_data = {} 67 | test_data = {} 68 | 69 | train_files = os.listdir(train_data_dir) 70 | train_files = [f for f in train_files if f.endswith('.json')] 71 | for f in train_files: 72 | file_path = os.path.join(train_data_dir,f) 73 | with open(file_path, 'r') as inf: 74 | cdata = json.load(inf) 75 | clients.extend(cdata['users']) 76 | if 'hierarchies' in cdata: 77 | groups.extend(cdata['hierarchies']) 78 | train_data.update(cdata['user_data']) 79 | 80 | test_files = os.listdir(test_data_dir) 81 | test_files = [f for f in test_files if f.endswith('.json')] 82 | for f in test_files: 83 | file_path = os.path.join(test_data_dir,f) 84 | with open(file_path, 'r') as inf: 85 | cdata = json.load(inf) 86 | test_data.update(cdata['user_data']) 87 | 88 | clients = list(sorted(train_data.keys())) 89 | 90 | return clients, groups, train_data, test_data 91 | 92 | 93 | class Metrics(object): 94 | def __init__(self, clients, params): 95 | self.params = params 96 | num_rounds = params['num_rounds'] 97 | self.bytes_written = {c.id: [0] * num_rounds for c in clients} 98 | self.client_computations = {c.id: [0] * num_rounds for c in clients} 99 | self.bytes_read = {c.id: [0] * num_rounds for c in clients} 100 | self.accuracies = [] 101 | self.train_accuracies = [] 102 | 103 | def update(self, rnd, cid, stats): 104 | bytes_w, comp, bytes_r = stats 105 | self.bytes_written[cid][rnd] += bytes_w 106 | self.client_computations[cid][rnd] += comp 107 | self.bytes_read[cid][rnd] += bytes_r 108 | 109 | def write(self): 110 | metrics = {} 111 | metrics['dataset'] = self.params['dataset'] 112 | metrics['num_rounds'] = self.params['num_rounds'] 113 | metrics['eval_every'] = self.params['eval_every'] 114 | metrics['learning_rate'] = self.params['learning_rate'] 115 | metrics['mu'] = self.params['mu'] 116 | metrics['num_epochs'] = self.params['num_epochs'] 117 | metrics['batch_size'] = self.params['batch_size'] 118 | metrics['accuracies'] = self.accuracies 119 | metrics['train_accuracies'] = self.train_accuracies 120 | metrics['client_computations'] = self.client_computations 121 | metrics['bytes_written'] = self.bytes_written 122 | metrics['bytes_read'] = self.bytes_read 123 | metrics_dir = os.path.join('out', self.params['dataset'], 'metrics_{}_{}_{}_{}_{}.json'.format(self.params['seed'], self.params['optimizer'], self.params['learning_rate'], self.params['num_epochs'], self.params['mu'])) 124 | #os.mkdir(os.path.join('out', self.params['dataset'])) 125 | if not os.path.exists(os.path.join('out', self.params['dataset'])): 126 | os.mkdir(os.path.join('out', self.params['dataset'])) 127 | with open(metrics_dir, 'w') as ouf: 128 | json.dump(metrics, ouf) 129 | -------------------------------------------------------------------------------- /flearn/utils/tf_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import tensorflow as tf 4 | 5 | def __num_elems(shape): 6 | '''Returns the number of elements in the given shape 7 | 8 | Args: 9 | shape: TensorShape 10 | 11 | Return: 12 | tot_elems: int 13 | ''' 14 | tot_elems = 1 15 | for s in shape: 16 | tot_elems *= int(s) 17 | return tot_elems 18 | 19 | def graph_size(graph): 20 | '''Returns the size of the given graph in bytes 21 | 22 | The size of the graph is calculated by summing up the sizes of each 23 | trainable variable. The sizes of variables are calculated by multiplying 24 | the number of bytes in their dtype with their number of elements, captured 25 | in their shape attribute 26 | 27 | Args: 28 | graph: TF graph 29 | Return: 30 | integer representing size of graph (in bytes) 31 | ''' 32 | tot_size = 0 33 | with graph.as_default(): 34 | vs = tf.trainable_variables() 35 | for v in vs: 36 | tot_elems = __num_elems(v.shape) 37 | dtype_size = int(v.dtype.size) 38 | var_size = tot_elems * dtype_size 39 | tot_size += var_size 40 | return tot_size 41 | 42 | def process_sparse_grad(grads): 43 | ''' 44 | Args: 45 | grads: grad returned by LSTM model (only for the shakespaere dataset) 46 | Return: 47 | a flattened grad in numpy (1-D array) 48 | ''' 49 | 50 | indices = grads[0].indices 51 | values = grads[0].values 52 | first_layer_dense = np.zeros((80,8)) 53 | for i in range(indices.shape[0]): 54 | first_layer_dense[indices[i], :] = values[i, :] 55 | 56 | client_grads = first_layer_dense 57 | for i in range(1, len(grads)): 58 | client_grads = np.append(client_grads, grads[i]) # output a flattened array 59 | 60 | 61 | return client_grads 62 | 63 | def process_grad(grads): 64 | ''' 65 | Args: 66 | grads: grad 67 | Return: 68 | a flattened grad in numpy (1-D array) 69 | ''' 70 | 71 | client_grads = grads[0] 72 | 73 | for i in range(1, len(grads)): 74 | client_grads = np.append(client_grads, grads[i]) # output a flattened array 75 | 76 | 77 | return client_grads 78 | 79 | def cosine_sim(a, b): 80 | '''Returns the cosine similarity between two arrays a and b 81 | ''' 82 | dot_product = np.dot(a, b) 83 | norm_a = np.linalg.norm(a) 84 | norm_b = np.linalg.norm(b) 85 | return dot_product * 1.0 / (norm_a * norm_b) 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /flearn/utils/utils.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | def save_obj(obj, name): 4 | with open(name + '.pkl', 'wb') as f: 5 | pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) 6 | 7 | def load_obj(name): 8 | with open(name + '.pkl', 'rb') as f: 9 | return pickle.load(f) 10 | 11 | def iid_divide(l, g): 12 | ''' 13 | divide list l among g groups 14 | each group has either int(len(l)/g) or int(len(l)/g)+1 elements 15 | returns a list of groups 16 | ''' 17 | num_elems = len(l) 18 | group_size = int(len(l)/g) 19 | num_big_groups = num_elems - g * group_size 20 | num_small_groups = g - num_big_groups 21 | glist = [] 22 | for i in range(num_small_groups): 23 | glist.append(l[group_size*i:group_size*(i+1)]) 24 | bi = group_size*num_small_groups 25 | group_size += 1 26 | for i in range(num_big_groups): 27 | glist.append(l[bi+group_size*i:bi+group_size*(i+1)]) 28 | return glist -------------------------------------------------------------------------------- /full_results_real.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/full_results_real.pdf -------------------------------------------------------------------------------- /full_results_real_other.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/full_results_real_other.pdf -------------------------------------------------------------------------------- /full_results_synthetic.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/full_results_synthetic.pdf -------------------------------------------------------------------------------- /images/compare_results_real.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/images/compare_results_real.png -------------------------------------------------------------------------------- /images/full_results_real.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/images/full_results_real.png -------------------------------------------------------------------------------- /logs/sample/nist_0_fedsim.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/logs/sample/nist_0_fedsim.pdf -------------------------------------------------------------------------------- /logs/sample/params.json: -------------------------------------------------------------------------------- 1 | {"eval_every": 1, "batch_size": 10, "num_rounds": 200, "seed": 0, "clients_per_round": 20, "model_params": [26], "mu": 0, "ex_name": "nist_0", "num_groups": 9, "model": "mclr", "dataset": "nist", "num_epochs": 20, "num_iters": 1, "learning_rate": 0.003, "drop_percent": 0.0, "optimizer": "fedsim"} -------------------------------------------------------------------------------- /logs/sample/timetaken.csv: -------------------------------------------------------------------------------- 1 | 4.214725017547607 2 | 5.838357448577881 3 | 2.27826189994812 4 | 3.640329360961914 5 | 3.9433250427246094 6 | 4.371510982513428 7 | 4.614236116409302 8 | 3.6348283290863037 9 | 4.996238946914673 10 | 4.2246363162994385 11 | 3.8461761474609375 12 | 3.9423208236694336 13 | 4.81032657623291 14 | 4.6327292919158936 15 | 4.864189863204956 16 | 4.644549608230591 17 | 3.357804775238037 18 | 3.611405849456787 19 | 4.387576103210449 20 | 3.3639280796051025 21 | 5.5443501472473145 22 | 2.9019081592559814 23 | 2.857591152191162 24 | 3.4291489124298096 25 | 3.949673652648926 26 | 3.728832483291626 27 | 3.3412296772003174 28 | 3.3335158824920654 29 | 4.09697413444519 30 | 6.109966278076172 31 | 4.153619766235352 32 | 2.502737283706665 33 | 3.415215015411377 34 | 2.3686652183532715 35 | 3.5048797130584717 36 | 3.1543667316436768 37 | 4.710537672042847 38 | 3.802248477935791 39 | 2.779290199279785 40 | 3.647050380706787 41 | 2.7246694564819336 42 | 3.9133896827697754 43 | 3.7519407272338867 44 | 2.7095303535461426 45 | 3.036364793777466 46 | 3.8586699962615967 47 | 5.336363315582275 48 | 3.487077236175537 49 | 3.1062116622924805 50 | 4.089224338531494 51 | 4.3005688190460205 52 | 3.6237943172454834 53 | 5.833637237548828 54 | 4.375678300857544 55 | 3.372389316558838 56 | 5.044728994369507 57 | 3.2794342041015625 58 | 3.5190436840057373 59 | 3.564743995666504 60 | 3.7481768131256104 61 | 3.776792049407959 62 | 3.156827449798584 63 | 4.273126602172852 64 | 3.272702693939209 65 | 3.364558219909668 66 | 3.6134274005889893 67 | 4.213018178939819 68 | 3.009146213531494 69 | 2.7177224159240723 70 | 3.214456558227539 71 | 2.91222882270813 72 | 5.4962992668151855 73 | 5.531466484069824 74 | 4.0989015102386475 75 | 4.856179475784302 76 | 3.8664450645446777 77 | 4.387919187545776 78 | 2.3184447288513184 79 | 3.5940182209014893 80 | 5.098527908325195 81 | 3.8258328437805176 82 | 3.7295711040496826 83 | 3.320307731628418 84 | 5.78113055229187 85 | 3.460848808288574 86 | 3.7840187549591064 87 | 3.936537504196167 88 | 4.583587884902954 89 | 4.4354448318481445 90 | 3.77303147315979 91 | 5.793760299682617 92 | 3.4721591472625732 93 | 4.305266618728638 94 | 3.124553680419922 95 | 4.849768400192261 96 | 6.200813055038452 97 | 4.458336591720581 98 | 5.42701268196106 99 | 4.517723083496094 100 | 3.4376020431518555 101 | 5.089099645614624 102 | 4.69443416595459 103 | 4.045530557632446 104 | 3.4828686714172363 105 | 3.4498486518859863 106 | 4.347341299057007 107 | 3.4853549003601074 108 | 3.4179494380950928 109 | 4.953992605209351 110 | 4.152960538864136 111 | 2.546747922897339 112 | 5.421801328659058 113 | 6.776229381561279 114 | 3.7474520206451416 115 | 4.845839262008667 116 | 2.8177096843719482 117 | 3.4865877628326416 118 | 3.222196102142334 119 | 3.5015692710876465 120 | 4.133359432220459 121 | 4.52689003944397 122 | 3.3103301525115967 123 | 4.601098299026489 124 | 3.8205862045288086 125 | 4.177612543106079 126 | 4.034154653549194 127 | 3.4860522747039795 128 | 3.5163798332214355 129 | 3.654008388519287 130 | 3.204397439956665 131 | 3.4267284870147705 132 | 4.247891664505005 133 | 3.4885201454162598 134 | 3.1615872383117676 135 | 4.064049243927002 136 | 3.8435089588165283 137 | 3.8735852241516113 138 | 4.101383209228516 139 | 3.107581615447998 140 | 3.2751734256744385 141 | 4.465871334075928 142 | 3.7008581161499023 143 | 3.2754745483398438 144 | 3.2202935218811035 145 | 4.118159770965576 146 | 4.30841326713562 147 | 3.80255389213562 148 | 3.1343297958374023 149 | 3.8692893981933594 150 | 4.835238933563232 151 | 3.2457480430603027 152 | 3.8979451656341553 153 | 4.613762855529785 154 | 2.955164909362793 155 | 3.2703709602355957 156 | 4.195127248764038 157 | 4.027691125869751 158 | 3.856401205062866 159 | 3.0798890590667725 160 | 2.8761911392211914 161 | 4.519295692443848 162 | 4.205658912658691 163 | 3.9707353115081787 164 | 4.714170694351196 165 | 4.722447633743286 166 | 3.4032065868377686 167 | 3.8246989250183105 168 | 4.494217157363892 169 | 3.7432897090911865 170 | 4.892650842666626 171 | 4.120609998703003 172 | 4.131703615188599 173 | 4.355011701583862 174 | 4.092770576477051 175 | 3.4949283599853516 176 | 3.3183343410491943 177 | 3.649127721786499 178 | 3.1163415908813477 179 | 3.616954803466797 180 | 5.093567132949829 181 | 2.825251579284668 182 | 3.7706644535064697 183 | 3.6947431564331055 184 | 4.169904470443726 185 | 3.7774462699890137 186 | 5.93481183052063 187 | 3.4171574115753174 188 | 3.385265588760376 189 | 2.6730124950408936 190 | 4.493683099746704 191 | 2.276113748550415 192 | 3.825378179550171 193 | 3.441279888153076 194 | 4.027428150177002 195 | 3.4155771732330322 196 | 4.0339367389678955 197 | 4.612143039703369 198 | 3.1436209678649902 199 | 3.6796154975891113 200 | 3.641611337661743 201 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import argparse 3 | import importlib 4 | import random 5 | import os 6 | import tensorflow as tf 7 | from flearn.utils.model_utils import read_data 8 | # print(tf.__version__) 9 | # tf = tf.compat.v1 10 | # GLOBAL PARAMETERS 11 | OPTIMIZERS = ['fedavg', 'fedprox', 'feddane', 'fedddane', 'fedsgd', 'fedprox_origin', 'fedsim'] 12 | DATASETS = ['sent140', 'nist', 'shakespeare', 'mnist','mex' , 13 | 'synthetic_iid', 'synthetic_0_0', 'synthetic_0.5_0.5', 'synthetic_1_1', 14 | 'synthetic_0.25_0.25', 15 | 'synthetic_0.75_0.75', 16 | 'synthetic_0.25_0.75', 17 | 'synthetic_0.75_0.25', 18 | 'news', 19 | 'goodreads' 20 | 21 | ] # NIST is EMNIST in the paepr 22 | 23 | 24 | MODEL_PARAMS = { 25 | 'sent140.bag_dnn': (2,), # num_classes 26 | 'sent140.stacked_lstm': (25, 2, 100), # seq_len, num_classes, num_hidden 27 | 'sent140.stacked_lstm_no_embeddings': (25, 2, 100), # seq_len, num_classes, num_hidden 28 | 'nist.mclr': (26,), # num_classes 29 | 'nist.cnn': (26,), # num_classes 30 | 'mex.mclr': (7,), # num_classes 31 | 'mex.cnn': (7,), # num_classes 32 | 'mex.dnn': (7,), # num_classes 33 | 'mnist.mclr': (10,), # num_classes 34 | 'mnist.cnn': (10,), # num_classes 35 | 'shakespeare.stacked_lstm': (80, 80, 256), # seq_len, emb_dim, num_hidden 36 | 'synthetic.mclr': (10, ), # num_classes 37 | 'news.mclr': (20,), # num_classes 38 | 'goodreads.mclr': (2,), # num_classes 39 | 'goodreads.stacked_lstm': (25,2,10), # seq_len, num_classes, num_hidden 40 | 'goodreads.rnn': (40, 2, 128), # seq_len, num_classes, num_units 41 | 'goodreads.dnn': (2,), # num_classes 42 | 43 | } 44 | 45 | 46 | def read_options(): 47 | ''' Parse command line arguments or load defaults ''' 48 | parser = argparse.ArgumentParser() 49 | 50 | parser.add_argument('--optimizer', 51 | help='name of optimizer;', 52 | type=str, 53 | choices=OPTIMIZERS, 54 | default='fedavg') 55 | parser.add_argument('--dataset', 56 | help='name of dataset;', 57 | type=str, 58 | choices=DATASETS, 59 | default='nist') 60 | parser.add_argument('--model', 61 | help='name of model;', 62 | type=str, 63 | default='stacked_lstm.py') 64 | parser.add_argument('--num_rounds', 65 | help='number of rounds to simulate;', 66 | type=int, 67 | default=-1) 68 | parser.add_argument('--eval_every', 69 | help='evaluate every ____ rounds;', 70 | type=int, 71 | default=-1) 72 | parser.add_argument('--clients_per_round', 73 | help='number of clients trained per round;', 74 | type=int, 75 | default=-1) 76 | parser.add_argument('--batch_size', 77 | help='batch size when clients train on data;', 78 | type=int, 79 | default=10) 80 | parser.add_argument('--num_epochs', 81 | help='number of epochs when clients train on data;', 82 | type=int, 83 | default=1) 84 | parser.add_argument('--num_iters', 85 | help='number of iterations when clients train on data;', 86 | type=int, 87 | default=1) 88 | parser.add_argument('--learning_rate', 89 | help='learning rate for inner solver;', 90 | type=float, 91 | default=0.003) 92 | parser.add_argument('--mu', 93 | help='constant for prox;', 94 | type=float, 95 | default=0) 96 | parser.add_argument('--seed', 97 | help='seed for randomness;', 98 | type=int, 99 | default=0) 100 | parser.add_argument('--drop_percent', 101 | help='percentage of slow devices', 102 | type=float, 103 | default=0.1) 104 | parser.add_argument('--num_groups', 105 | help='Number of groups;', 106 | type=int, 107 | default=1) 108 | parser.add_argument('--ex_name', 109 | help='Run Name to identify;', 110 | type=str, 111 | default='dev') 112 | 113 | try: parsed = vars(parser.parse_args()) 114 | except IOError as msg: parser.error(str(msg)) 115 | 116 | # Set seeds 117 | random.seed(1 + parsed['seed']) 118 | np.random.seed(12 + parsed['seed']) 119 | tf.set_random_seed(123 + parsed['seed']) 120 | 121 | 122 | # load selected model 123 | if parsed['dataset'].startswith("synthetic"): # all synthetic datasets use the same model 124 | model_path = '%s.%s.%s.%s' % ('flearn', 'models', 'synthetic', parsed['model']) 125 | else: 126 | model_path = '%s.%s.%s.%s' % ('flearn', 'models', parsed['dataset'], parsed['model']) 127 | 128 | mod = importlib.import_module(model_path) 129 | learner = getattr(mod, 'Model') 130 | print(model_path) 131 | print(learner) 132 | 133 | # load selected trainer 134 | opt_path = 'flearn.trainers.%s' % parsed['optimizer'] 135 | mod = importlib.import_module(opt_path) 136 | optimizer = getattr(mod, 'Server') 137 | 138 | print(opt_path) 139 | print(mod) 140 | print(optimizer) 141 | 142 | # add selected model parameter 143 | parsed['model_params'] = MODEL_PARAMS['.'.join(model_path.split('.')[2:])] 144 | 145 | # print and return 146 | maxLen = max([len(ii) for ii in parsed.keys()]); 147 | fmtString = '\t%' + str(maxLen) + 's : %s'; 148 | print('Arguments:') 149 | for keyPair in sorted(parsed.items()): print(fmtString % keyPair) 150 | 151 | return parsed, learner, optimizer 152 | 153 | def main(): 154 | # suppress tf warnings 155 | # tf.logging.set_verbosity(tf.logging.WARN) 156 | tf.logging.set_verbosity(tf.logging.ERROR) 157 | # parse command line arguments 158 | options, learner, optimizer = read_options() 159 | 160 | # read data 161 | train_path = os.path.join('data', options['dataset'], 'data', 'train') 162 | test_path = os.path.join('data', options['dataset'], 'data', 'test') 163 | dataset = read_data(train_path, test_path) 164 | print(learner) 165 | print(options) 166 | # call appropriate trainer 167 | t = optimizer(options, learner, dataset) 168 | t.train() 169 | 170 | if __name__ == '__main__': 171 | main() 172 | -------------------------------------------------------------------------------- /plot_fedsim_other.py: -------------------------------------------------------------------------------- 1 | # Used to generate - full_results_real_other.pdf 2 | 3 | import json 4 | import os 5 | import csv 6 | import matplotlib.pyplot as plt 7 | import matplotlib 8 | 9 | color_avg ="#ff7f0e" 10 | color_prox ="#13CA91" 11 | color_sim ="#17becf" 12 | 13 | color_avg ="#ff7f0e" 14 | color_prox ="#fb99bc" 15 | color_sim ="#17becf" 16 | linewidth = 1.8 17 | 18 | ROUNDS = 501 19 | rounds = [i for i in range(ROUNDS)] 20 | 21 | datasets = ["mnist","femnist", "mex","goodreads"] 22 | 23 | all = {} 24 | for ds in datasets: 25 | dataset = "results/other/"+ds+".csv" 26 | avg_rounds = [] 27 | avg_test_acc = [] 28 | prox_rounds = [] 29 | prox_test_acc = [] 30 | sim_rounds = [] 31 | sim_test_acc = [] 32 | with open(dataset, 33 | mode='r') as csv_file: 34 | csv_reader = csv.DictReader(csv_file) 35 | line_count = 0 36 | for row in csv_reader: 37 | if line_count == 0: 38 | line_count += 1 39 | avg_rounds.append(float(row["round"])) 40 | avg_test_acc.append(float(row["avg"])) 41 | prox_rounds.append(float(row["round"])) 42 | prox_test_acc.append(float(row["prox"])) 43 | sim_rounds.append(float(row["round"])) 44 | sim_test_acc.append(float(row["sim"])) 45 | 46 | line_count += 1 47 | 48 | all[ds] = {"avg": [], "sim": [], "prox": []} 49 | all[ds]["avg"] = avg_test_acc 50 | all[ds]["sim"] = sim_test_acc 51 | all[ds]["prox"] = prox_test_acc 52 | 53 | 54 | if(False): 55 | print(False) 56 | else: 57 | fig, ax = plt.subplots(2, 2, figsize=[10, 8]) 58 | # linewidth = 1.2 59 | 60 | ax[0,0].plot([i for i in range(31)], all["mnist"]["sim"], linewidth=linewidth, color=color_sim, label="FedSim") 61 | ax[0,0].plot([i for i in range(31)], all["mnist"]["avg"],":", alpha=1, linewidth=linewidth, color=color_avg, label="FedAvg") 62 | ax[0,0].plot([i for i in range(31)], all["mnist"]["prox"], "-.", alpha=1, linewidth=linewidth, color=color_prox, label="FedProx") 63 | ax[0,0].set_title("MNIST - CNN",fontweight='bold') 64 | ax[0,0].set_xlim(0, 31) 65 | ax[0,0].set_ylim(0.2, 0.9) 66 | 67 | ax[0,1].plot([i for i in range(501)], all["femnist"]["sim"], linewidth=linewidth, color=color_sim) 68 | ax[0,1].plot([i for i in range(501)], all["femnist"]["avg"],":", alpha=1, linewidth=linewidth, color=color_avg) 69 | ax[0,1].plot([i for i in range(501)], all["femnist"]["prox"], "-.", alpha=1, linewidth=linewidth, color=color_prox) 70 | ax[0,1].set_title("FEMNIST - CNN", fontweight='bold') 71 | ax[0,1].set_xlim(0, 501) 72 | ax[0,1].set_ylim(0.6, 0.95) 73 | 74 | # 75 | ax[1,0].plot([i for i in range(201)], all["mex"]["sim"], linewidth=linewidth, color=color_sim) 76 | ax[1,0].plot([i for i in range(201)], all["mex"]["avg"],":", alpha=1, linewidth=linewidth, color=color_avg) 77 | ax[1,0].plot([i for i in range(201)], all["mex"]["prox"], "-.", alpha=1, linewidth=linewidth, color=color_prox) 78 | ax[1,0].set_title("Fed-MEx - MLP", fontweight='bold') 79 | ax[1,0].set_xlim(0, 201) 80 | ax[1,0].set_ylim(0.68, 0.98) 81 | # 82 | # 83 | ax[1,1].plot([i for i in range(251)], all["goodreads"]["sim"], linewidth=linewidth, color=color_sim) 84 | ax[1,1].plot([i for i in range(251)], all["goodreads"]["avg"],":", alpha=1, linewidth=linewidth, color=color_avg) 85 | ax[1,1].plot([i for i in range(251)], all["goodreads"]["prox"], "-.", alpha=1, linewidth=linewidth, color=color_prox) 86 | ax[1,1].set_title("Fed-Goodreads - RNN", fontweight='bold') 87 | ax[1,1].set_xlim(0, 251) 88 | ax[1,1].set_ylim(0.45, 0.6) 89 | 90 | 91 | plt.subplots_adjust(hspace=0.5) 92 | ax[0,0].set_xlabel("# Rounds") 93 | ax[0,0].set_ylabel('Test Accuracy') 94 | ax[1,0].set_xlabel("# Rounds") 95 | ax[1,0].set_ylabel('Test Accuracy') 96 | 97 | for i in range(2): 98 | for j in range(2): 99 | ax[j, i].spines['bottom'].set_color('#dddddd') 100 | ax[j, i].spines['top'].set_color('#dddddd') 101 | ax[j, i].spines['right'].set_color('#dddddd') 102 | ax[j, i].spines['left'].set_color('#dddddd') 103 | ax[j, i].tick_params(color='#dddddd') 104 | # ax[j, i].set_xlim(0, ROUNDS) 105 | 106 | fig.legend(frameon=False, loc='lower center', ncol=3, prop=dict(weight='normal', size=13), 107 | borderaxespad=-0.3) # note: different from plt.legend 108 | 109 | plt.tight_layout() 110 | plt.subplots_adjust(bottom=0.09) 111 | plt.subplots_adjust(left=0.061, wspace=0.11) 112 | 113 | plt.show() 114 | fig.savefig("full_results_real_cnn.pdf") 115 | 116 | exit(0) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | tensorflow-gpu==1.10 4 | Pillow 5 | matplotlib 6 | jupyter 7 | tqdm 8 | -------------------------------------------------------------------------------- /results/main/00.csv: -------------------------------------------------------------------------------- 1 | round,avg,prox,sim 2 | 0,0.10686600221483941,0.10686600221483941,0.10686600221483941 3 | 1,0.19728682170542636,0.150609080841639,0.15498338870431894 4 | 2,0.28405315614617943,0.20049833887043192,0.20775193798449615 5 | 3,0.37547065337763014,0.2719822812846069,0.26904761904761904 6 | 4,0.42480620155038756,0.3246954595791805,0.3268549280177187 7 | 5,0.4662236987818384,0.37563676633444076,0.3945182724252492 8 | 6,0.49689922480620163,0.4135658914728682,0.45127353266888154 9 | 7,0.5301218161683278,0.456312292358804,0.5066998892580287 10 | 8,0.5513289036544851,0.4846622369878184,0.5493355481727576 11 | 9,0.570764119601329,0.5093023255813953,0.5907530454042083 12 | 10,0.588704318936877,0.5301218161683279,0.6199889258028792 13 | 11,0.6077519379844961,0.5534330011074197,0.6521594684385382 14 | 12,0.6207087486157252,0.5707641196013289,0.6713178294573644 15 | 13,0.6317829457364342,0.5864894795127353,0.6911406423034328 16 | 14,0.6397009966777407,0.5996124031007751,0.7073643410852712 17 | 15,0.6477851605758582,0.6109634551495017,0.725249169435216 18 | 16,0.6532668881506091,0.620874861572536,0.7402547065337762 19 | 17,0.6604097452934662,0.6337763012181619,0.7567552602436324 20 | 18,0.6623477297895903,0.6428017718715393,0.7708748615725362 21 | 19,0.6667774086378738,0.6513842746400886,0.7817829457364341 22 | 20,0.6704872646733112,0.658139534883721,0.7903100775193798 23 | 21,0.6742524916943521,0.6646179401993355,0.8044296788482836 24 | 22,0.6779069767441862,0.6698228128460686,0.8104651162790698 25 | 23,0.6820044296788482,0.6768549280177188,0.8137873754152825 26 | 24,0.6867663344407531,0.6838870431893688,0.8161129568106312 27 | 25,0.6909745293466224,0.6895348837209302,0.8234219269102991 28 | 26,0.6937430786267996,0.6973975636766335,0.8285714285714285 29 | 27,0.6987264673311187,0.7022702104097452,0.8290697674418606 30 | 28,0.703266888150609,0.7080287929125139,0.8312292358803987 31 | 29,0.7064784053156146,0.7134551495016611,0.832281284606866 32 | 30,0.7094130675526025,0.7184939091915836,0.833499446290144 33 | 31,0.7136212624584718,0.7230343300110741,0.8342746400885938 34 | 32,0.7163898117386488,0.7266888150609081,0.8348837209302326 35 | 33,0.7213178294573643,0.7311184939091916,0.8361572535991141 36 | 34,0.7244739756367663,0.7364341085271319,0.8365448504983389 37 | 35,0.7274640088593576,0.74296788482835,0.8370431893687708 38 | 36,0.7304540420819491,0.7461240310077519,0.8398671096345515 39 | 37,0.7317275747508305,0.7501107419712071,0.8409745293466224 40 | 38,0.733610188261351,0.7538759689922481,0.8414174972314508 41 | 39,0.7357142857142857,0.7580287929125138,0.8410299003322259 42 | 40,0.736655592469546,0.7620155038759691,0.8426356589147287 43 | 41,0.7392580287929125,0.7665559246954596,0.8454595791805093 44 | 42,0.7423034330011075,0.7696566998892581,0.8468992248062016 45 | 43,0.743798449612403,0.7721483942414176,0.8476190476190476 46 | 44,0.7460132890365447,0.774418604651163,0.8479512735326691 47 | 45,0.7475083056478405,0.7775747508305647,0.8476190476190476 48 | 46,0.749390919158361,0.7799557032115171,0.8468992248062016 49 | 47,0.7510520487264674,0.7829457364341087,0.8472868217054264 50 | 48,0.7537652270210411,0.7868770764119601,0.8476190476190476 51 | 49,0.7552602436323366,0.7900332225913621,0.850498338870432 52 | 50,0.7580287929125138,0.7925249169435217,0.8503322259136213 53 | 51,0.759468438538206,0.7947397563676635,0.851550387596899 54 | 52,0.7617386489479513,0.7977297895902546,0.8501107419712072 55 | 53,0.7645071982281285,0.8,0.8493909191583611 56 | 54,0.7672203765227021,0.8032115171650055,0.8484496124031008 57 | 55,0.7688815060908084,0.8050387596899224,0.8501661129568107 58 | 56,0.7711517165005538,0.8075304540420819,0.8509966777408637 59 | 57,0.7724806201550388,0.8102436323366555,0.8516057585825028 60 | 58,0.7746954595791806,0.8132890365448503,0.8518826135105204 61 | 59,0.7766888150609081,0.8150055370985602,0.8532668881506089 62 | 60,0.7776301218161684,0.8167220376522701,0.8529346622369878 63 | 61,0.7796788482834995,0.8182724252491694,0.8517165005537097 64 | 62,0.7821151716500554,0.8204872646733112,0.8513289036544851 65 | 63,0.7843853820598007,0.8217607973421929,0.8515503875968992 66 | 64,0.7863787375415283,0.8231450719822814,0.8523809523809524 67 | 65,0.7888704318936878,0.8241971207087487,0.8533776301218162 68 | 66,0.788372093023256,0.825249169435216,0.8553709856035437 69 | 67,0.7893687707641196,0.8265227021040974,0.8563676633444076 70 | 68,0.7904761904761906,0.8272978959025472,0.8575858250276854 71 | 69,0.7923034330011074,0.8287929125138428,0.8589147286821704 72 | 70,0.7946290143964563,0.8309523809523809,0.8598006644518272 73 | 71,0.796124031007752,0.8324473975636766,0.8606312292358803 74 | 72,0.7973421926910299,0.8334440753045403,0.8613510520487264 75 | 73,0.7987264673311185,0.8353266888150609,0.8617386489479514 76 | 74,0.7998338870431894,0.8368770764119602,0.8618493909191584 77 | 75,0.8021594684385382,0.837984496124031,0.8622923588039867 78 | 76,0.803156146179402,0.8379844961240309,0.861904761904762 79 | 77,0.8047065337763013,0.8393133997785162,0.8606866002214838 80 | 78,0.8062015503875968,0.8397563676633445,0.8615725359911407 81 | 79,0.8068106312292359,0.8395348837209302,0.8612956810631229 82 | 80,0.8078626799557033,0.8405869324473976,0.8621816168327796 83 | 81,0.8081949058693245,0.8414728682170542,0.8612956810631229 84 | 82,0.8084163898117386,0.8419158361018825,0.862126245847176 85 | 83,0.810077519379845,0.8431339977851606,0.8638981173864895 86 | 84,0.8106312292358804,0.8433554817275747,0.8640088593576967 87 | 85,0.8105758582502768,0.8434662236987818,0.8632890365448506 88 | 86,0.8119601328903654,0.8434108527131783,0.8636766334440752 89 | 87,0.8123477297895901,0.8441306755260243,0.8632336655592469 90 | 88,0.8130675526024363,0.8440753045404209,0.8638981173864896 91 | 89,0.8129014396456256,0.8454042081949058,0.8630675526024362 92 | 90,0.813233665559247,0.8460686600221484,0.8653377630121816 93 | 91,0.8131782945736433,0.8453488372093023,0.8666666666666666 94 | 92,0.813233665559247,0.8459025470653377,0.8666112956810631 95 | 93,0.8138427464008858,0.8460686600221484,0.8652823920265781 96 | 94,0.8140088593576965,0.8466223698781837,0.8659468438538206 97 | 95,0.8147840531561462,0.8466223698781837,0.8658361018826135 98 | 96,0.8152270210409746,0.8477297895902547,0.8661683277962346 99 | 97,0.8150055370985603,0.848671096345515,0.8668327796234773 100 | 98,0.8166112956810632,0.8494462901439643,0.8669435215946844 101 | 99,0.8160022148394241,0.8493909191583611,0.867940199335548 102 | 100,0.8158361018826136,0.8492248062015503,0.8684939091915835 103 | -------------------------------------------------------------------------------- /results/main/0505.csv: -------------------------------------------------------------------------------- 1 | round,avg,prox,sim 2 | 0,0.09072681704260653,0.09072681704260653,0.09072681704260653 3 | 1,0.24445488721804512,0.17781954887218046,0.13348997493734335 4 | 2,0.3483709273182957,0.2517857142857143,0.1599624060150376 5 | 3,0.41810776942355893,0.3097431077694236,0.22465538847117797 6 | 4,0.4887218045112783,0.3969924812030075,0.31130952380952376 7 | 5,0.5201441102756892,0.4345238095238095,0.35037593984962406 8 | 6,0.5390037593984962,0.4683897243107769,0.40673558897243106 9 | 7,0.5485902255639097,0.4948621553884711,0.43170426065162903 10 | 8,0.5528508771929825,0.5153822055137843,0.4629385964912281 11 | 9,0.5605263157894737,0.5253446115288222,0.4854949874686717 12 | 10,0.5708959899749374,0.5400375939849623,0.5228070175438597 13 | 11,0.5766917293233084,0.5589598997493734,0.5434523809523809 14 | 12,0.5821428571428572,0.5641290726817043,0.5586152882205514 15 | 13,0.5852443609022556,0.5731516290726816,0.5788847117794488 16 | 14,0.5906641604010026,0.5820175438596491,0.5803571428571428 17 | 15,0.5949561403508771,0.5899749373433583,0.5952694235588972 18 | 16,0.6005325814536341,0.6005325814536341,0.6047619047619048 19 | 17,0.6053884711779449,0.605983709273183,0.6109962406015036 20 | 18,0.6090852130325815,0.6094924812030075,0.6142543859649123 21 | 19,0.612092731829574,0.6137844611528822,0.6180764411027567 22 | 20,0.6142857142857144,0.619204260651629,0.6272556390977444 23 | 21,0.6162280701754387,0.6265977443609023,0.6341165413533835 24 | 22,0.6196741854636592,0.6302944862155389,0.6427318295739348 25 | 23,0.6208333333333333,0.6356516290726817,0.6488721804511278 26 | 24,0.6243421052631578,0.637750626566416,0.6519110275689224 27 | 25,0.6271929824561403,0.6433270676691729,0.6565162907268169 28 | 26,0.6286340852130325,0.6491541353383459,0.6625313283208021 29 | 27,0.6284774436090226,0.6515664160401003,0.6670739348370928 30 | 28,0.6306390977443609,0.6551065162907268,0.6715852130325815 31 | 29,0.6306704260651629,0.6569548872180451,0.6766290726817042 32 | 30,0.6312656641604011,0.6570175438596492,0.6796992481203008 33 | 31,0.6310150375939849,0.6579260651629072,0.6843984962406015 34 | 32,0.6324561403508772,0.6607769423558897,0.6880639097744362 35 | 33,0.6324874686716793,0.6630952380952381,0.6892230576441103 36 | 34,0.6328634085213032,0.6670426065162907,0.6920426065162907 37 | 35,0.6329260651629073,0.669016290726817,0.6954260651629074 38 | 36,0.6342105263157894,0.6716478696741854,0.6962719298245614 39 | 37,0.6366541353383459,0.6741228070175439,0.6974310776942356 40 | 38,0.6382205513784461,0.6788220551378448,0.6985275689223057 41 | 39,0.6385651629072682,0.680795739348371,0.7010964912280703 42 | 40,0.6403195488721805,0.6828320802005011,0.7032894736842105 43 | 41,0.6414473684210525,0.6838345864661654,0.7030075187969925 44 | 42,0.6416040100250626,0.6838972431077694,0.7036340852130327 45 | 43,0.6426378446115288,0.6844611528822055,0.7045426065162909 46 | 44,0.6445175438596491,0.6850250626566415,0.7037593984962406 47 | 45,0.6451441102756892,0.6864974937343359,0.7056390977443608 48 | 46,0.6458020050125313,0.6887531328320803,0.7061090225563909 49 | 47,0.6476817042606515,0.6892857142857143,0.706547619047619 50 | 48,0.649279448621554,0.6897869674185464,0.7080200501253133 51 | 49,0.6501253132832081,0.6914473684210527,0.7082393483709273 52 | 50,0.6510338345864662,0.6928884711779449,0.7099310776942356 53 | 51,0.650407268170426,0.6943295739348371,0.7090538847117794 54 | 52,0.6503446115288222,0.6966478696741855,0.7104010025062657 55 | 53,0.6494674185463658,0.6972431077694236,0.7110902255639098 56 | 54,0.650093984962406,0.699498746867168,0.7108082706766917 57 | 55,0.649780701754386,0.6997807017543859,0.7128759398496242 58 | 56,0.6503132832080201,0.7006892230576443,0.7130952380952381 59 | 57,0.6506265664160401,0.7012844611528822,0.7139097744360904 60 | 58,0.6508145363408523,0.7021616541353385,0.7148809523809524 61 | 59,0.6515037593984963,0.7016290726817042,0.7156641604010027 62 | 60,0.65,0.7035714285714286,0.7162280701754387 63 | 61,0.6513784461152882,0.7062969924812028,0.7163220551378447 64 | 62,0.6522556390977444,0.7083020050125314,0.7166666666666667 65 | 63,0.6521616541353383,0.7119674185463658,0.7180137844611528 66 | 64,0.6523496240601503,0.7130952380952381,0.718389724310777 67 | 65,0.6532581453634085,0.7153195488721804,0.718796992481203 68 | 66,0.6536340852130327,0.7162593984962405,0.718984962406015 69 | 67,0.6536027568922306,0.7144736842105264,0.7197681704260651 70 | 68,0.6531641604010026,0.7185776942355889,0.7201754385964912 71 | 69,0.6533834586466165,0.7210526315789473,0.7205827067669174 72 | 70,0.6525062656641604,0.7226190476190476,0.7197994987468673 73 | 71,0.6514097744360902,0.7234335839598998,0.7222431077694236 74 | 72,0.6506578947368422,0.7256578947368421,0.724561403508772 75 | 73,0.64984335839599,0.7270989974937343,0.7246240601503761 76 | 74,0.6490601503759397,0.7283834586466165,0.7265664160401003 77 | 75,0.6490288220551379,0.7293859649122807,0.7272243107769425 78 | 76,0.6505012531328321,0.730482456140351,0.7276002506265664 79 | 77,0.6513471177944863,0.7319862155388471,0.7279448621553885 80 | 78,0.6525689223057644,0.7308583959899749,0.7290100250626566 81 | 79,0.6524122807017544,0.7328007518796993,0.7293546365914787 82 | 80,0.6539786967418547,0.7321428571428571,0.7302944862155388 83 | 81,0.655764411027569,0.7354010025062656,0.731265664160401 84 | 82,0.6556704260651629,0.7346177944862156,0.731766917293233 85 | 83,0.655451127819549,0.7359022556390978,0.7331140350877193 86 | 84,0.6561716791979949,0.736685463659148,0.7338345864661655 87 | 85,0.656234335839599,0.7375000000000002,0.7347431077694236 88 | 86,0.6555764411027569,0.7393170426065164,0.7350250626566417 89 | 87,0.6572368421052632,0.7400375939849624,0.7350877192982457 90 | 88,0.6591165413533834,0.7404448621553885,0.7354323308270676 91 | 89,0.661810776942356,0.7419172932330825,0.7355889724310779 92 | 90,0.6634711779448621,0.7439223057644112,0.7364661654135338 93 | 91,0.6654135338345865,0.7448308270676692,0.73640350877193 94 | 92,0.6691416040100251,0.7464285714285714,0.7354323308270676 95 | 93,0.6720864661654135,0.7473684210526315,0.7361528822055139 96 | 94,0.675281954887218,0.7473997493734337,0.73687343358396 97 | 95,0.6777568922305764,0.7484962406015038,0.7357769423558896 98 | 96,0.6766290726817044,0.7490914786967416,0.7360588972431078 99 | 97,0.676347117794486,0.7501566416040102,0.7348370927318296 100 | 98,0.6764724310776943,0.7509711779448622,0.7351817042606517 101 | 99,0.6766917293233082,0.7525689223057644,0.7351503759398496 102 | 100,0.6765037593984963,0.7526002506265663,0.7354949874686717 103 | -------------------------------------------------------------------------------- /results/main/11.csv: -------------------------------------------------------------------------------- 1 | round,avg,prox,sim 2 | 0,0.10363732208750658,0.10363732208750658,0.10363732208750658 3 | 1,0.2700316288877175,0.2264891934633632,0.15213494992092777 4 | 2,0.3386399578281497,0.27108592514496577,0.22132314180284657 5 | 3,0.39891934633632053,0.3394306800210859,0.25761729045861886 6 | 4,0.42698998418555606,0.35972588297311553,0.3022667369530838 7 | 5,0.4405376910911966,0.3814443858724301,0.3090142329994729 8 | 6,0.4704533473906167,0.4059040590405904,0.3151818661043753 9 | 7,0.4796257248286768,0.413995782814971,0.3357933579335794 10 | 8,0.4809963099630997,0.434343700579863,0.36731681602530314 11 | 9,0.4937796520822352,0.4537954665260938,0.39156562994201377 12 | 10,0.48943068002108603,0.4445967316816026,0.4137322087506589 13 | 11,0.49583552978386924,0.4593832366895097,0.39973642593568787 14 | 12,0.49525566684238265,0.4679230363732209,0.4363468634686347 15 | 13,0.4990247759620452,0.4785714285714286,0.44064312071692147 16 | 14,0.4979177648919347,0.478360569319979,0.465919873484449 17 | 15,0.5045071164997363,0.49088033737480236,0.4722192936215077 18 | 16,0.5127833421191356,0.5017923036373221,0.4804691618344755 19 | 17,0.5225355824986823,0.5089878755930416,0.4707432788613601 20 | 18,0.5233526620980496,0.5227464417501319,0.4813916710595678 21 | 19,0.5268318397469689,0.5226937269372693,0.49053769109119655 22 | 20,0.5406167633104904,0.5442540853979968,0.49095940959409595 23 | 21,0.5447285187137586,0.539799683711123,0.5121244069583553 24 | 22,0.5487348444913019,0.5357933579335793,0.49501845018450186 25 | 23,0.5534264628360569,0.5397469688982603,0.5076963626779125 26 | 24,0.559040590405904,0.5387453874538745,0.5034791776489194 27 | 25,0.5594623089088033,0.5386663152345809,0.5087243015287295 28 | 26,0.5601476014760148,0.5370848708487084,0.510411175540327 29 | 27,0.5624670532419611,0.540590405904059,0.513837638376384 30 | 28,0.5661834475487613,0.5385081707959937,0.512967843964154 31 | 29,0.5657880864522932,0.5330785450711648,0.5151555086979441 32 | 30,0.5696889826041118,0.5298892988929889,0.513995782814971 33 | 31,0.5737480231945178,0.5365050079072219,0.5191881918819188 34 | 32,0.5766736953083816,0.5496837111228255,0.5197416974169743 35 | 33,0.578492356352135,0.5531101739588824,0.5189509752240379 36 | 34,0.5808645229309436,0.5606220347917764,0.5162098049551925 37 | 35,0.582683183974697,0.5721929362150765,0.5173695308381655 38 | 36,0.5810753821823932,0.5747759620453348,0.5170795993674222 39 | 37,0.5853452820242488,0.5689509752240379,0.5087770163415919 40 | 38,0.5885081707959937,0.5779652082235108,0.5144702161307327 41 | 39,0.5952029520295202,0.593384290985767,0.5186083289404323 42 | 40,0.5986030574591461,0.5921982076963626,0.522509225092251 43 | 41,0.5952556668423828,0.5883500263574064,0.5198734844491302 44 | 42,0.5958882445967316,0.5838692672641012,0.5183447548761202 45 | 43,0.5954665260938324,0.5865050079072219,0.5216921454928836 46 | 44,0.594148655772272,0.594122298365841,0.5181075382182393 47 | 45,0.5919082762256194,0.5897469688982604,0.5137058513442276 48 | 46,0.6023194517659461,0.6024512387981024,0.5173695308381655 49 | 47,0.6032419609910384,0.5985767000527149,0.5187664733790195 50 | 48,0.6084080126515552,0.6055877701634158,0.5188982604111755 51 | 49,0.6109646810753823,0.6091460200316289,0.5225355824986821 52 | 50,0.6129151291512915,0.6134686346863468,0.5258566157090143 53 | 51,0.6136794939377965,0.6205587770163415,0.5225882973115445 54 | 52,0.6156826568265682,0.6253558249868212,0.5268845545598312 55 | 53,0.6143384290985766,0.6264628360569321,0.5294148655772272 56 | 54,0.61462836056932,0.6302319451765946,0.5343173431734318 57 | 55,0.6150764364786504,0.635239852398524,0.5350026357406431 58 | 56,0.615155508697944,0.6368212967843965,0.5400105429625726 59 | 57,0.6110173958882447,0.6427253558249868,0.5396151818661044 60 | 58,0.6124934106483924,0.6511070110701106,0.535740643120717 61 | 59,0.6074327886136004,0.6515814443858724,0.5371639430680022 62 | 60,0.6040590405904059,0.6511333684765419,0.5375856615709014 63 | 61,0.6079599367422244,0.653690036900369,0.5428307854507116 64 | 62,0.6108328940432262,0.6510279388508171,0.5505535055350554 65 | 63,0.6101739588824461,0.651897733263047,0.5512915129151292 66 | 64,0.6099367422245651,0.65392725355825,0.5512387981022667 67 | 65,0.6131523458091723,0.6544016868740117,0.5515550869794413 68 | 66,0.6105166051660517,0.6575118608328939,0.5531365313653136 69 | 67,0.6076963626779126,0.6504744333157617,0.5528202424881391 70 | 68,0.6022667369530837,0.6341328413284133,0.5575909330521878 71 | 69,0.6038481813389562,0.6340274117026886,0.558144438587243 72 | 70,0.6043753294675803,0.6538745387453874,0.5592250922509225 73 | 71,0.6063257775434896,0.6521349499209278,0.5631259884027411 74 | 72,0.6079599367422247,0.6604638903531892,0.5588297311544543 75 | 73,0.6088033737480231,0.6615709014232999,0.5602793885081707 76 | 74,0.6081707959936741,0.6510806536636795,0.56004217185029 77 | 75,0.6076172904586188,0.6412229836584081,0.5653663679493938 78 | 76,0.6093305218766474,0.632577754348972,0.5670005271481285 79 | 77,0.6100421718502899,0.6287032156035846,0.5723510806536636 80 | 78,0.6076172904586188,0.6247759620453347,0.5748286768581973 81 | 79,0.6020822351080654,0.625303110173959,0.5699525566684238 82 | 80,0.6006062203479177,0.6178703215603586,0.5699525566684239 83 | 81,0.6038745387453874,0.6240115972588298,0.5685028993147073 84 | 82,0.607116499736426,0.6204269899841856,0.5692409066947812 85 | 83,0.6118608328940432,0.6275171323141802,0.5712177121771218 86 | 84,0.6216921454928835,0.6465735371639431,0.5710068529256722 87 | 85,0.6225882973115445,0.6595677385345283,0.5766473379019504 88 | 86,0.6228518713758566,0.6595413811280969,0.5786768581971535 89 | 87,0.6234580917237743,0.6676594623089088,0.5797575118608329 90 | 88,0.6206378492356354,0.6714022140221403,0.5823405376910912 91 | 89,0.6195308381655246,0.6719293621507644,0.5828413284132841 92 | 90,0.6206114918292039,0.6730890880337375,0.5814971006852926 93 | 91,0.6217185028993147,0.6721665788086453,0.5766736953083818 94 | 92,0.6236162361623616,0.6695835529783869,0.5771481286241434 95 | 93,0.6219293621507643,0.6693726937269372,0.5779124934106483 96 | 94,0.6179493937796521,0.6709541381128098,0.5767791249341064 97 | 95,0.6141539272535582,0.6710859251449658,0.5795466526093832 98 | 96,0.6111491829204004,0.6741433842909857,0.5835529783869267 99 | 97,0.6119926199261995,0.6785977859778599,0.5838165524512388 100 | 98,0.6115445440168686,0.6813916710595678,0.5914602003162889 101 | 99,0.610964681075382,0.6816552451238798,0.5910648392198209 102 | 100,0.6135477069056405,0.6783342119135476,0.5897733263046915 103 | -------------------------------------------------------------------------------- /results/main/2525.csv: -------------------------------------------------------------------------------- 1 | round,avg,prox,sim 2 | 0,0.09173881673881674,0.09173881673881674,0.09173881673881674 3 | 1,0.2435425685425685,0.2174242424242424,0.17453102453102454 4 | 2,0.3255050505050505,0.2979076479076479,0.21313131313131312 5 | 3,0.3713924963924964,0.3396825396825397,0.27575757575757576 6 | 4,0.40584415584415584,0.38582251082251084,0.319011544011544 7 | 5,0.44062049062049063,0.42676767676767674,0.3725108225108225 8 | 6,0.4653318903318904,0.4591991341991343,0.41396103896103903 9 | 7,0.483982683982684,0.4845598845598844,0.45371572871572874 10 | 8,0.5003968253968254,0.5044733044733044,0.4821789321789322 11 | 9,0.5178932178932178,0.5377705627705627,0.5086940836940836 12 | 10,0.5334415584415585,0.5523088023088022,0.5408008658008657 13 | 11,0.536075036075036,0.5648268398268398,0.565873015873016 14 | 12,0.5435064935064935,0.5750360750360749,0.58997113997114 15 | 13,0.5496031746031745,0.5857864357864356,0.6139610389610388 16 | 14,0.5545454545454546,0.5962842712842713,0.6323232323232323 17 | 15,0.5563131313131312,0.6073953823953823,0.6495310245310244 18 | 16,0.5616161616161616,0.6179292929292929,0.666017316017316 19 | 17,0.5634559884559884,0.6275974025974026,0.6742784992784995 20 | 18,0.5669913419913419,0.6356060606060606,0.6850649350649352 21 | 19,0.5691558441558441,0.6441558441558441,0.6968975468975469 22 | 20,0.5748556998556997,0.6488455988455988,0.7085137085137084 23 | 21,0.5788961038961038,0.6563131313131314,0.7137806637806637 24 | 22,0.5828643578643578,0.6627705627705628,0.7206349206349205 25 | 23,0.5854617604617605,0.6697691197691197,0.7276695526695528 26 | 24,0.5892496392496392,0.6744588744588744,0.7322510822510822 27 | 25,0.5939393939393939,0.6806637806637806,0.7375180375180374 28 | 26,0.5966810966810966,0.685064935064935,0.7399711399711398 29 | 27,0.6015873015873016,0.6893578643578643,0.7474025974025973 30 | 28,0.6053391053391052,0.6936147186147187,0.7512626262626262 31 | 29,0.6066017316017316,0.6975468975468976,0.7537157287157288 32 | 30,0.6103535353535352,0.7007575757575757,0.7601370851370851 33 | 31,0.6145743145743144,0.7052669552669553,0.7636363636363637 34 | 32,0.6153318903318903,0.7084776334776334,0.765981240981241 35 | 33,0.6179292929292929,0.7126984126984125,0.7686147186147186 36 | 34,0.6191919191919192,0.7161616161616161,0.7721139971139971 37 | 35,0.6210317460317459,0.7202741702741702,0.7752164502164501 38 | 36,0.6226190476190475,0.7229437229437229,0.7754329004329006 39 | 37,0.6234126984126984,0.7261904761904762,0.7795093795093796 40 | 38,0.624098124098124,0.729040404040404,0.780916305916306 41 | 39,0.6261904761904761,0.7331529581529582,0.7844155844155846 42 | 40,0.6258297258297257,0.7370851370851371,0.7843795093795095 43 | 41,0.6266233766233764,0.7413419913419914,0.7866883116883118 44 | 42,0.6272727272727272,0.7457792207792208,0.7908369408369409 45 | 43,0.6282106782106782,0.7491702741702742,0.7902597402597404 46 | 44,0.6271645021645021,0.7533189033189034,0.7931096681096682 47 | 45,0.6274531024531024,0.7554112554112554,0.7930375180375182 48 | 46,0.6275252525252525,0.7580086580086581,0.7946248196248197 49 | 47,0.6272727272727273,0.7605339105339106,0.7924963924963926 50 | 48,0.6283910533910534,0.7641053391053392,0.7919552669552671 51 | 49,0.6293650793650793,0.7653318903318903,0.7942279942279943 52 | 50,0.6295454545454545,0.7678571428571429,0.7955627705627707 53 | 51,0.6308441558441558,0.7703823953823955,0.796933621933622 54 | 52,0.6319624819624821,0.771789321789322,0.7982323232323234 55 | 53,0.6325757575757576,0.7731601731601732,0.7968614718614719 56 | 54,0.6332972582972584,0.7756854256854258,0.7970779220779223 57 | 55,0.6329365079365079,0.7772727272727273,0.7972582972582973 58 | 56,0.6348124098124098,0.7793290043290044,0.796933621933622 59 | 57,0.6375541125541127,0.7804112554112554,0.7969336219336219 60 | 58,0.6385281385281385,0.7821789321789323,0.7976551226551227 61 | 59,0.6414862914862915,0.7836940836940839,0.7993506493506496 62 | 60,0.6424963924963926,0.7847402597402598,0.8004329004329005 63 | 61,0.6447691197691199,0.7860750360750361,0.8024531024531025 64 | 62,0.6480519480519482,0.7875901875901877,0.8040043290043292 65 | 63,0.6510461760461762,0.7877344877344878,0.80487012987013 66 | 64,0.6537518037518036,0.7890331890331891,0.8065656565656566 67 | 65,0.6567460317460316,0.7897546897546898,0.8061688311688312 68 | 66,0.6594155844155842,0.7910533910533912,0.8069624819624821 69 | 67,0.663023088023088,0.7933261183261185,0.8077200577200578 70 | 68,0.666053391053391,0.7944083694083696,0.8064574314574315 71 | 69,0.6686868686868687,0.7959235209235208,0.8061688311688312 72 | 70,0.6734126984126984,0.7970418470418471,0.8075396825396826 73 | 71,0.6761544011544011,0.797979797979798,0.8082611832611833 74 | 72,0.6799062049062048,0.8003607503607505,0.8097402597402595 75 | 73,0.6836219336219335,0.8011183261183262,0.8103535353535353 76 | 74,0.6867604617604617,0.8024170274170274,0.8099567099567099 77 | 75,0.690079365079365,0.8026695526695525,0.8123015873015872 78 | 76,0.6929292929292928,0.8037157287157288,0.8134920634920634 79 | 77,0.6961038961038961,0.8055916305916304,0.8129870129870128 80 | 78,0.6976551226551226,0.8072510822510821,0.8131673881673881 81 | 79,0.6995310245310244,0.8088023088023086,0.8137445887445888 82 | 80,0.7012626262626261,0.8099206349206348,0.8151515151515152 83 | 81,0.7027056277056277,0.8107503607503606,0.8159812409812413 84 | 82,0.7056998556998556,0.8119408369408367,0.8160533910533913 85 | 83,0.7078643578643578,0.8128066378066376,0.8157287157287157 86 | 84,0.7117965367965368,0.8139249639249638,0.8171717171717172 87 | 85,0.7125901875901876,0.8146464646464645,0.8175685425685426 88 | 86,0.7156565656565655,0.8151515151515152,0.8174963924963926 89 | 87,0.7175685425685424,0.814935064935065,0.8194083694083695 90 | 88,0.7203823953823953,0.8155122655122654,0.8203823953823954 91 | 89,0.7228354978354977,0.8163059163059162,0.8208874458874459 92 | 90,0.7254689754689754,0.8171717171717172,0.8216450216450214 93 | 91,0.7272005772005772,0.8172799422799424,0.8214285714285712 94 | 92,0.7288961038961039,0.8182900432900433,0.8216810966810965 95 | 93,0.7303030303030302,0.8190115440115441,0.8217171717171717 96 | 94,0.7313492063492063,0.820165945165945,0.8209235209235209 97 | 95,0.7337301587301587,0.820093795093795,0.82012987012987 98 | 96,0.7353896103896103,0.8206349206349206,0.8203823953823954 99 | 97,0.7342352092352092,0.8207431457431457,0.8203823953823953 100 | 98,0.7353174603174603,0.8211399711399711,0.82012987012987 101 | 99,0.7369047619047618,0.8208152958152957,0.82027417027417 102 | 100,0.738059163059163,0.8207792207792206,0.8208513708513707 103 | -------------------------------------------------------------------------------- /results/main/7575.csv: -------------------------------------------------------------------------------- 1 | round,avg,prox,sim 2 | 0,0.08952205882352941,0.08952205882352941,0.08952205882352941 3 | 1,0.2142331932773109,0.1603991596638656,0.1470325630252101 4 | 2,0.30354516806722687,0.2290703781512605,0.1746323529411765 5 | 3,0.34868697478991606,0.2792804621848739,0.21701680672268908 6 | 4,0.39845063025210087,0.32796743697478986,0.25829831932773106 7 | 5,0.4363970588235294,0.3702993697478991,0.28332457983193277 8 | 6,0.44708508403361347,0.3951155462184874,0.30795693277310926 9 | 7,0.471218487394958,0.4271008403361345,0.32555147058823525 10 | 8,0.48981092436974794,0.4507352941176471,0.34908088235294116 11 | 9,0.49892331932773104,0.469905462184874,0.3680672268907563 12 | 10,0.522452731092437,0.4919117647058824,0.3959033613445378 13 | 11,0.5404149159663867,0.5110556722689076,0.42171743697478986 14 | 12,0.548765756302521,0.5204306722689076,0.439968487394958 15 | 13,0.551969537815126,0.5248949579831933,0.46302521008403363 16 | 14,0.5618172268907564,0.5340336134453781,0.47938550420168063 17 | 15,0.5740283613445378,0.5451943277310924,0.4914915966386555 18 | 16,0.5821691176470588,0.556827731092437,0.5103203781512605 19 | 17,0.5951418067226892,0.5686974789915966,0.5239758403361344 20 | 18,0.6059086134453782,0.5735819327731092,0.5410714285714285 21 | 19,0.6131302521008405,0.5841386554621848,0.5473476890756305 22 | 20,0.6197216386554621,0.5919117647058822,0.5544117647058824 23 | 21,0.6237394957983194,0.598844537815126,0.5591649159663864 24 | 22,0.6294117647058824,0.6061974789915968,0.5713760504201683 25 | 23,0.6328518907563024,0.6116596638655463,0.5852415966386555 26 | 24,0.6338497899159663,0.6178571428571429,0.5950892857142858 27 | 25,0.6370798319327732,0.623424369747899,0.5972951680672268 28 | 26,0.6377888655462186,0.6293329831932774,0.5955094537815125 29 | 27,0.6387867647058824,0.6359506302521007,0.5985819327731093 30 | 28,0.6405987394957983,0.6394957983193278,0.6110294117647059 31 | 29,0.6407300420168067,0.6431460084033613,0.6109768907563027 32 | 30,0.6372636554621849,0.6464548319327732,0.6170693277310924 33 | 31,0.6387079831932775,0.6492647058823529,0.6209033613445378 34 | 32,0.6387342436974791,0.6518382352941176,0.6298844537815125 35 | 33,0.6405724789915966,0.6546218487394958,0.6314075630252101 36 | 34,0.6431985294117647,0.6580619747899159,0.6334033613445378 37 | 35,0.6461134453781513,0.6602678571428572,0.6369485294117646 38 | 36,0.6468224789915966,0.6630514705882352,0.6376838235294118 39 | 37,0.6479254201680672,0.6654936974789916,0.638813025210084 40 | 38,0.6498424369747899,0.666701680672269,0.6454831932773109 41 | 39,0.6508403361344537,0.6676207983193279,0.6427258403361343 42 | 40,0.6519695378151261,0.670561974789916,0.6432510504201682 43 | 41,0.6539128151260503,0.672951680672269,0.645745798319328 44 | 42,0.6534926470588235,0.6758665966386554,0.6477415966386555 45 | 43,0.652389705882353,0.6785976890756302,0.6514705882352941 46 | 44,0.6499474789915967,0.6817752100840336,0.6504726890756303 47 | 45,0.6492384453781511,0.6847689075630252,0.647531512605042 48 | 46,0.6474264705882353,0.6882615546218488,0.6496060924369749 49 | 47,0.6478466386554622,0.6898634453781513,0.6510504201680671 50 | 48,0.6493172268907562,0.6923581932773111,0.6509453781512604 51 | 49,0.651155462184874,0.6962447478991596,0.652127100840336 52 | 50,0.6530462184873951,0.6997636554621849,0.6529936974789915 53 | 51,0.6535976890756302,0.7014180672268907,0.6488970588235293 54 | 52,0.6536764705882353,0.7022321428571429,0.651811974789916 55 | 53,0.654359243697479,0.7033088235294118,0.6499212184873948 56 | 54,0.6546743697478992,0.7056460084033613,0.6545168067226891 57 | 55,0.6530724789915966,0.7067752100840335,0.6515493697478991 58 | 56,0.6511292016806722,0.7074579831932772,0.6495010504201681 59 | 57,0.6500525210084033,0.7096113445378152,0.6488182773109245 60 | 58,0.6492647058823529,0.710267857142857,0.6475840336134452 61 | 59,0.64968487394958,0.7115808823529411,0.6522846638655462 62 | 60,0.6492647058823529,0.7141806722689076,0.6503939075630252 63 | 61,0.6480829831932773,0.716281512605042,0.6551207983193278 64 | 62,0.6470325630252101,0.7173319327731092,0.6543855042016806 65 | 63,0.6470063025210084,0.719485294117647,0.6552258403361344 66 | 64,0.6489495798319329,0.7206932773109244,0.6561186974789915 67 | 65,0.6518644957983194,0.721796218487395,0.6572216386554622 68 | 66,0.6535714285714287,0.7226365546218487,0.6517594537815127 69 | 67,0.6552783613445379,0.7215861344537816,0.6505252100840336 70 | 68,0.6544905462184873,0.7237132352941177,0.6476102941176471 71 | 69,0.6541228991596638,0.7245273109243697,0.6434611344537815 72 | 70,0.6543067226890756,0.7277310924369749,0.6435399159663865 73 | 71,0.6564075630252102,0.7301207983193279,0.6418067226890757 74 | 72,0.6555672268907563,0.7304884453781512,0.6383928571428572 75 | 73,0.6566439075630253,0.7308035714285714,0.6384191176470588 76 | 74,0.6571953781512606,0.730908613445378,0.6362132352941177 77 | 75,0.6571165966386556,0.7312762605042018,0.635031512605042 78 | 76,0.6571691176470587,0.7308298319327732,0.6341386554621848 79 | 77,0.6576155462184874,0.7295693277310924,0.6344012605042016 80 | 78,0.6585084033613445,0.7300157563025209,0.6353203781512606 81 | 79,0.6604254201680672,0.7305147058823529,0.6372899159663865 82 | 80,0.6595063025210084,0.731171218487395,0.6397058823529411 83 | 81,0.6568539915966387,0.7334821428571429,0.6371848739495797 84 | 82,0.6535976890756301,0.7341386554621848,0.6373686974789916 85 | 83,0.6554884453781512,0.7352153361344538,0.6379726890756302 86 | 84,0.6561974789915966,0.7357930672268908,0.6404149159663864 87 | 85,0.6573004201680672,0.7361607142857142,0.6392594537815124 88 | 86,0.6580882352941178,0.7375262605042017,0.6441439075630252 89 | 87,0.6607142857142857,0.7375262605042017,0.645640756302521 90 | 88,0.6634191176470589,0.7376050420168068,0.6487132352941177 91 | 89,0.6668329831932772,0.7373949579831932,0.6509716386554624 92 | 90,0.6717436974789915,0.7377626050420169,0.6519170168067226 93 | 91,0.673686974789916,0.7378939075630252,0.6540441176470589 94 | 92,0.676811974789916,0.7380514705882353,0.6541491596638656 95 | 93,0.6772321428571428,0.7382878151260505,0.6560136554621849 96 | 94,0.6781775210084036,0.7387342436974789,0.6535714285714287 97 | 95,0.6774947478991598,0.7395483193277309,0.6543329831932774 98 | 96,0.676890756302521,0.7401785714285716,0.6535714285714286 99 | 97,0.6757615546218487,0.7403886554621847,0.6537289915966387 100 | 98,0.671638655462185,0.7406775210084033,0.6519432773109243 101 | 99,0.6711922268907563,0.7419905462184874,0.6523897058823529 102 | 100,0.6710346638655462,0.742436974789916,0.6547531512605043 103 | -------------------------------------------------------------------------------- /results/main/IID.csv: -------------------------------------------------------------------------------- 1 | round,avg,prox,sim 2 | 0,0.10736468500443656,0.10736468500443656,0.10736468500443656 3 | 1,0.5493877551020409,0.2987045252883762,0.5269210292812776 4 | 2,0.588890860692103,0.4537000887311447,0.5488198757763976 5 | 3,0.6124933451641527,0.5083939662821653,0.5590771960958296 6 | 4,0.6319432120674356,0.5220940550133096,0.5699378881987579 7 | 5,0.6501863354037266,0.52773735581189,0.5810115350488022 8 | 6,0.6649511978704525,0.5311446317657498,0.5907719609582964 9 | 7,0.6774090505767525,0.5336291038154393,0.6009937888198759 10 | 8,0.6871694764862466,0.5367879325643301,0.6100443655723159 11 | 9,0.6956166814551908,0.5388110026619343,0.6173558118899733 12 | 10,0.7048092280390418,0.5423957409050578,0.6253061224489797 13 | 11,0.7144631765749777,0.5459804791481812,0.6319432120674355 14 | 12,0.7219520851818989,0.5490683229813663,0.6384738243123336 15 | 13,0.7295119787045253,0.55226264418811,0.6441526175687667 16 | 14,0.7374622892635314,0.556131322094055,0.6497249334516415 17 | 15,0.7455545696539487,0.5592546583850931,0.6545519077196095 18 | 16,0.7527595385980479,0.5616681455190772,0.6602661934338954 19 | 17,0.7600709849157053,0.5647914818101154,0.6650576752440107 20 | 18,0.7663886424134871,0.5674889086069208,0.6693167701863354 21 | 19,0.7720319432120675,0.5708606921029281,0.6729724933451641 22 | 20,0.7786335403726709,0.573522626441881,0.6761668145519077 23 | 21,0.7837799467613131,0.5758651286601597,0.6788642413487133 24 | 22,0.7885714285714285,0.577639751552795,0.6825909494232475 25 | 23,0.7924401064773736,0.5795208518189885,0.6853593611357587 26 | 24,0.79527950310559,0.5819698314108253,0.6887666370896184 27 | 25,0.8001419698314107,0.5843478260869565,0.6919964507542146 28 | 26,0.8034782608695652,0.5876131322094055,0.6954037267080745 29 | 27,0.8064241348713398,0.5901330967169477,0.6976397515527951 30 | 28,0.809192546583851,0.5932209405501332,0.6995208518189885 31 | 29,0.8127772848269743,0.5953859804791481,0.7019343389529725 32 | 30,0.8153682342502218,0.5972670807453415,0.7044543034605147 33 | 31,0.8188464951197871,0.5992546583850931,0.7062999112688554 34 | 32,0.820798580301686,0.6013132209405501,0.7074001774622892 35 | 33,0.8236379769299024,0.6040106477373559,0.709849157054126 36 | 34,0.8257320319432121,0.6054658385093168,0.7114463176574978 37 | 35,0.828039041703638,0.6070275066548358,0.7133274179236914 38 | 36,0.8301685891748004,0.6083762200532387,0.7155634427684118 39 | 37,0.8310204081632653,0.6105767524401065,0.7176574977817214 40 | 38,0.8333274179236912,0.6119254658385094,0.7194321206743568 41 | 39,0.835634427684117,0.61377107364685,0.7216326530612245 42 | 40,0.8370186335403726,0.6151907719609583,0.7236202307009761 43 | 41,0.8384738243123337,0.6166459627329193,0.7256787932564331 44 | 42,0.8413487133984028,0.6183141082519965,0.7280567879325643 45 | 43,0.8431233362910381,0.6200887311446319,0.7310736468500443 46 | 44,0.8435847382431233,0.6212954747116237,0.732919254658385 47 | 45,0.8457142857142856,0.6228216503992903,0.7362555456965396 48 | 46,0.8463531499556343,0.6238864241348714,0.7385625554569654 49 | 47,0.8475953859804791,0.6255190771960957,0.7405856255545696 50 | 48,0.8486601597160603,0.6269387755102039,0.7425022182786158 51 | 49,0.8506122448979592,0.6278970718722271,0.7448802129547472 52 | 50,0.8514640638864241,0.6290328305235138,0.7467258207630878 53 | 51,0.8524578527062999,0.6305590062111801,0.7495652173913042 54 | 52,0.8530257320319432,0.6313753327417924,0.7511978704525287 55 | 53,0.8537355811889975,0.6325110913930789,0.7537178349600708 56 | 54,0.8554037267080746,0.6338952972493346,0.7559893522626443 57 | 55,0.856184560780834,0.6351730257320318,0.7586157941437445 58 | 56,0.8578172138420586,0.6357763975155281,0.7600709849157053 59 | 57,0.8590239574090506,0.6371606033717835,0.7620585625554568 60 | 58,0.8605501330967169,0.637941437444543,0.7642590949423247 61 | 59,0.8615439219165927,0.6385803016858919,0.7661047027506654 62 | 60,0.8624312333629105,0.6395385980479149,0.7686601597160603 63 | 61,0.863815439219166,0.6402839396628217,0.7700088731144633 64 | 62,0.8648092280390418,0.6413842058562556,0.7718189884649512 65 | 63,0.8664063886424136,0.6423779946761313,0.7740550133096717 66 | 64,0.8667613132209405,0.643513753327418,0.7764685004436559 67 | 65,0.8683584738243124,0.644614019520852,0.7780656610470276 68 | 66,0.8691038154392191,0.6455013309671696,0.7796628216503994 69 | 67,0.8701330967169477,0.6467080745341613,0.7822892635314995 70 | 68,0.8706654835847383,0.6477728482697427,0.7839219165927241 71 | 69,0.8711978704525288,0.6484826974267968,0.7856965394853593 72 | 70,0.8720851818988465,0.6495829636202307,0.7869387755102041 73 | 71,0.872404614019521,0.6502573203194321,0.7891393078970719 74 | 72,0.8730434782608696,0.6511446317657498,0.7896007098491571 75 | 73,0.8729015084294587,0.6518189884649512,0.7908074534161491 76 | 74,0.8740017746228925,0.6527772848269743,0.7919432120674356 77 | 75,0.8742857142857142,0.6537355811889972,0.7937888198757763 78 | 76,0.8748890860692103,0.6543744454303462,0.7953504880212955 79 | 77,0.8753149955634427,0.6550488021295475,0.7960958296362023 80 | 78,0.8754569653948535,0.6557586512866017,0.7981898846495119 81 | 79,0.8762732919254658,0.6562555456965395,0.7988287488908608 82 | 80,0.876379769299024,0.6565039929015084,0.799751552795031 83 | 81,0.8769121561668145,0.6570718722271518,0.8013487133984029 84 | 82,0.8771251109139308,0.6574977817213842,0.8029103815439219 85 | 83,0.8777994676131321,0.658243123336291,0.8034782608695652 86 | 84,0.8782253771073647,0.6587755102040818,0.8047204968944098 87 | 85,0.8785448092280391,0.6595563442768413,0.8054303460514641 88 | 86,0.878935226264419,0.6602661934338953,0.8064596273291927 89 | 87,0.8790771960958297,0.6612244897959183,0.8076308784383318 90 | 88,0.8793611357586515,0.6620053238686779,0.8090505767524401 91 | 89,0.8800354924578527,0.6624312333629103,0.8094764862466727 92 | 90,0.8803904170363798,0.6629991126885538,0.8108961845607807 93 | 91,0.8804968944099378,0.6640993788819874,0.8114640638864241 94 | 92,0.8807453416149068,0.6645962732919255,0.8122448979591838 95 | 93,0.8809937888198759,0.6649157054125997,0.8132741792369121 96 | 94,0.8816681455190771,0.6655190771960959,0.8143389529724933 97 | 95,0.882058562555457,0.6657675244010648,0.815581188997338 98 | 96,0.8820940550133096,0.6665483584738244,0.8158651286601597 99 | 97,0.8832653061224491,0.6675421472937001,0.81678793256433 100 | 98,0.8833007985803015,0.6680745341614907,0.8171073646850044 101 | 99,0.8839396628216504,0.6687133984028394,0.8184205856255545 102 | 100,0.8844365572315884,0.6695297249334515,0.8190594498669034 103 | -------------------------------------------------------------------------------- /results/main/mnist.csv: -------------------------------------------------------------------------------- 1 | round,avg,prox,sim 2 | 0,0.10386650386650387,0.10386650386650387,0.10386650386650387 3 | 1,0.33560478322383086,0.2569761807857046,0.36892842607128323 4 | 2,0.46288350097873904,0.3878675116770354,0.5368722987770607 5 | 3,0.533531019245305,0.4857646762408666,0.639897668469097 6 | 4,0.5789871504157218,0.5537957633195727,0.6957109909490863 7 | 5,0.616016435064054,0.6107641917165727,0.7300928348547396 8 | 6,0.6454251216155978,0.6527976432738338,0.7580557009128438 9 | 7,0.6697598697598697,0.6853305424733998,0.781739248405915 10 | 8,0.6895517181231466,0.7119599976742833,0.7946857375428804 11 | 9,0.7086264705312323,0.7347791538267728,0.8068376068376069 12 | 10,0.7267360505455744,0.751853789949028,0.8137449851735565 13 | 11,0.7415624939434462,0.7665368141558618,0.8212958117720024 14 | 12,0.7505281314805123,0.7781692734073686,0.8269434269434269 15 | 13,0.7566641471403376,0.7872511967750062,0.8330096711049092 16 | 14,0.7602224935558268,0.7934724887105838,0.8378471616566854 17 | 15,0.7658662325328992,0.8009109056728103,0.8426691474310521 18 | 16,0.7717309145880575,0.8071903405236739,0.8458359982169505 19 | 17,0.7768087291896816,0.8138884043645948,0.8496075353218211 20 | 18,0.7811384382812955,0.8183150183150184,0.8478865050293622 21 | 19,0.7873984921603969,0.8229199372056516,0.8516192801907086 22 | 20,0.7918289823051727,0.8280210089733899,0.856084656084656 23 | 21,0.7985154175630367,0.8318274318274318,0.8583289726146869 24 | 22,0.8059344535535012,0.8357695214838072,0.8603794794270984 25 | 23,0.8123069170688219,0.8401418687132973,0.8627400817877008 26 | 24,0.8157993681803206,0.8430141287284145,0.8644223501366358 27 | 25,0.8177994844661511,0.8446227493846542,0.864100626005388 28 | 26,0.819570905285191,0.8465530941721418,0.8656588561350466 29 | 27,0.817869255964494,0.8484950675426866,0.8680233346900014 30 | 28,0.8187607806655426,0.8507975269880031,0.869015640444212 31 | 29,0.8183421516754851,0.8525340620578715,0.8699342985057271 32 | 30,0.8192142954047716,0.8537085489466443,0.8707676802914899 33 | -------------------------------------------------------------------------------- /results/other/mnist.csv: -------------------------------------------------------------------------------- 1 | round,avg,prox,sim 2 | 0,0.09983913793437602,0.0998430141287284,0.09983913793437602 3 | 1,0.19298796441653582,0.15719518576661437,0.1989379227474465 4 | 2,0.2186328662519139,0.18673178673178675,0.35703626179816655 5 | 3,0.24948737329689713,0.22424171947981472,0.47979533693819415 6 | 4,0.29881969881969883,0.26790704885942984,0.5500668643525786 7 | 5,0.35224916177297133,0.3162199352675543,0.6149659863945578 8 | 6,0.41871039013896166,0.36516851754946994,0.6573405430548288 9 | 7,0.4712056902533092,0.412570498284784,0.6916022249355582 10 | 8,0.5089598232455376,0.4419598038645658,0.7210070352927496 11 | 9,0.5529546291451052,0.47181425276663375,0.7384305289067192 12 | 10,0.5857239762001667,0.5139368567939997,0.7665872046824428 13 | 11,0.6176599414694652,0.5475085760800046,0.7657460705079753 14 | 12,0.6410992887183364,0.5525708859042192,0.7767195767195768 15 | 13,0.6699885652266605,0.5666337190146714,0.7943833943833943 16 | 14,0.685706533325581,0.5988061321394654,0.8004418861561718 17 | 15,0.6956761051999146,0.6290908386146482,0.8097292478244859 18 | 16,0.7112506541077969,0.6392387154291918,0.8210593639165068 19 | 17,0.7231195612147993,0.6671473147663624,0.8231486326724422 20 | 18,0.7406477120762835,0.6668527239955813,0.8191057619629049 21 | 19,0.7549082310987072,0.7028974552784076,0.8266953505048742 22 | 20,0.760365912746865,0.718723956819195,0.8400876019923639 23 | 21,0.7743822315250887,0.7304378161521019,0.8363858363858364 24 | 22,0.7887939221272553,0.7402833498071595,0.84097525049906 25 | 23,0.7947942709847471,0.7481403957594434,0.8497044401806307 26 | 24,0.8018450685117352,0.7621799717037813,0.8571544857259142 27 | 25,0.8108300870205633,0.7662848615229568,0.8632711204139776 28 | 26,0.8150822722251293,0.765180146132527,0.8630269201697772 29 | 27,0.8106711630521154,0.7673469387755102,0.8563288563288562 30 | 28,0.8151326627517103,0.7656569180378704,0.8681396205205728 31 | 29,0.8194274860941527,0.7711843711843711,0.8620578715816811 32 | 30,0.8227455084597942,0.7720720196910673,0.8614376804852995 33 | -------------------------------------------------------------------------------- /run_fedavg.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python3 -u main.py --dataset=$1 --optimizer='fedavg' \ 3 | --learning_rate=0.01 --num_rounds=200 --clients_per_round=$4 \ 4 | --eval_every=1 --batch_size=10 \ 5 | --num_epochs=20 \ 6 | --model='mclr' \ 7 | --drop_percent=$2 \ 8 | --num_groups=$3 \ 9 | --ex_name=$5 \ 10 | --seed=0 -------------------------------------------------------------------------------- /run_fedprox.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python3 -u main.py --dataset=$1 --optimizer='fedprox' \ 3 | --learning_rate=0.01 --num_rounds=200 --clients_per_round=$4 \ 4 | --eval_every=1 --batch_size=10 \ 5 | --num_epochs=20 \ 6 | --model='mclr' \ 7 | --drop_percent=$2 \ 8 | --mu=$3 \ 9 | --ex_name=$5 \ 10 | 11 | -------------------------------------------------------------------------------- /run_fedsim.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python3 -u main.py --dataset=$1 --optimizer='fedsim' \ 3 | --learning_rate=0.01 --num_rounds=200 --clients_per_round=$4 \ 4 | --eval_every=1 --batch_size=10 \ 5 | --num_epochs=20 \ 6 | --model='mclr' \ 7 | --drop_percent=$2 \ 8 | --num_groups=$3 \ 9 | --ex_name=$5 \ 10 | --seed=0 -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/utils/__init__.py -------------------------------------------------------------------------------- /utils/csv_log.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import csv 5 | import matplotlib.pyplot as plt 6 | import matplotlib 7 | 8 | matplotlib.rc('xtick', labelsize=17) 9 | matplotlib.rc('ytick', labelsize=17) 10 | 11 | def log_start(method, params, num_groups = 1, name="non"): 12 | logdir = "logs/"+name 13 | if not os.path.exists(logdir): 14 | os.makedirs(logdir) 15 | 16 | with open(logdir+"/params" + '.json', 'w') as json_file: 17 | json.dump(params, json_file) 18 | 19 | def write_dataset(arr, name="non"): 20 | logdir = "logs/"+name 21 | with open(logdir + '/dataset_analysis.csv', mode='a+', newline='') as log_file: 22 | writer = csv.DictWriter(log_file, fieldnames=arr[0].keys()) 23 | writer.writeheader() 24 | for data in arr: 25 | writer.writerow(data) 26 | 27 | 28 | def write_clusters(arr, name="non"): 29 | logdir = "logs/"+name 30 | with open(logdir + '/clusters.csv', mode='a+', newline='') as log_file: 31 | writer = csv.writer(log_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) 32 | writer.writerow(arr) 33 | 34 | def write_all(method, log_data, log_groups, num_groups = 1, name="non"): 35 | logdir = "logs/"+name 36 | # with open(logdir + '/fed_' +method +'_'+str(num_groups)+'.csv', mode='w', newline='') as log_file: 37 | with open(logdir + '/' + name + '.csv', mode='w', newline='') as log_file: 38 | writer = csv.writer(log_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) 39 | writer.writerow(['round', 'train_loss', 'train_acc', 'test_acc']) 40 | for line in log_data: 41 | writer.writerow(line) 42 | 43 | for idx,group in enumerate(log_groups): 44 | with open(logdir + '/fed_' +method +'_g_'+str(idx)+'.csv', mode='w', newline='') as log_file: 45 | writer = csv.writer(log_file) 46 | writer.writerow(['round', 'train_loss', 'train_acc', 'test_acc']) 47 | for line in log_groups[idx]: 48 | writer.writerow(line) 49 | 50 | def graph_print(method, params, num_groups = 1, name="non"): 51 | sim_rounds = [] 52 | sim_test_acc = [] 53 | avg_rounds = [] 54 | avg_test_acc = [] 55 | prox_rounds = [] 56 | prox_test_acc = [] 57 | groups = [] 58 | logdir = "logs/"+name 59 | 60 | with open(logdir + '/' + name + '.csv', mode='r') as csv_file: 61 | csv_reader = csv.DictReader(csv_file) 62 | line_count = 0 63 | for row in csv_reader: 64 | if line_count == 0: 65 | line_count += 1 66 | sim_rounds.append(float(row["round"])) 67 | sim_test_acc.append(float(row["test_acc"])) 68 | line_count += 1 69 | # print(f'Fed Processed {line_count} lines.') 70 | 71 | if method == "sim": 72 | for i in range(num_groups): 73 | group_data = [] 74 | with open(logdir + '/fed_sim_g_'+ str(i)+'.csv', mode='r') as csv_file: 75 | csv_reader = csv.DictReader(csv_file) 76 | line_count = 0 77 | for row in csv_reader: 78 | if line_count == 0: 79 | line_count += 1 80 | group_data.append(float(row["test_acc"])) 81 | line_count += 1 82 | groups.append(group_data) 83 | # print(f'FedSim Groups {line_count} lines.') 84 | 85 | with open('fedavg_original/'+str(params["dataset"])+'_'+str(params["clients_per_round"])+'.csv', mode='r') as csv_file: 86 | # with open('fedavg_original/seeds/'+str(params["dataset"])+'_'+str(params["seed"])+'_fedavg/'+str(params["dataset"])+'_'+str(params["seed"])+'_fedavg.csv', mode='r') as csv_file: 87 | csv_reader = csv.DictReader(csv_file) 88 | line_count = 0 89 | for row in csv_reader: 90 | if line_count == 0: 91 | line_count += 1 92 | avg_rounds.append(float(row["round"])) 93 | avg_test_acc.append(float(row["test_acc"])) 94 | line_count += 1 95 | # print(f'FedAvg log Processed {line_count} lines.') 96 | 97 | with open('fedavg_original/'+str(params["dataset"])+'_'+str(params["clients_per_round"])+'_prox.csv', mode='r') as csv_file: 98 | # with open('fedavg_original/seeds/'+str(params["dataset"])+'_'+str(params["seed"])+'_fedprox/'+str(params["dataset"])+'_'+str(params["seed"])+'_fedprox.csv', mode='r') as csv_file: 99 | csv_reader = csv.DictReader(csv_file) 100 | line_count = 0 101 | for row in csv_reader: 102 | if line_count == 0: 103 | line_count += 1 104 | prox_rounds.append(float(row["round"])) 105 | prox_test_acc.append(float(row["test_acc"])) 106 | line_count += 1 107 | # print(f'FedProx log Processed {line_count} lines.') 108 | 109 | fig, ax = plt.subplots(2,1, figsize=[12, 16]) 110 | 111 | ax[0].plot(sim_rounds, sim_test_acc, linewidth=3.0, color="#17becf", label="FedSim - G - "+str(num_groups)) 112 | ax[0].plot(avg_rounds, avg_test_acc, ":",alpha=0.6, linewidth=3.0, color="#ff7f0e", label="FedAvg") 113 | ax[0].plot(prox_rounds, prox_test_acc, "-",alpha=0.6, linewidth=3.0, color="#90C978", label="FedProx") 114 | 115 | ax[1].plot(sim_rounds, sim_test_acc, "-",linewidth=1.0, alpha=0.8, color="#0000ff", label="FedSim - G - "+str(num_groups)) 116 | ax[1].plot(avg_rounds, avg_test_acc, "-",linewidth=1.0, alpha=0.8, color="#ff0000", label="Fed Avg") 117 | for idx,g in enumerate(groups): 118 | ax[1].plot(sim_rounds, g, linewidth=1.5, alpha=0.3, label="Group - " + str(idx)) 119 | 120 | ax[0].set_xlabel("# Rounds", fontsize=22) 121 | ax[0].set_ylabel('Testing Accuracy', fontsize=22) 122 | ax[0].set_title("FedSim comparision - Data:"+str(params["dataset"]) 123 | + " Clients/round: "+ str(params["clients_per_round"]) 124 | + " E: " + str(params["num_epochs"]) 125 | + " Groups: " + str(params["num_groups"]), fontsize=18) 126 | 127 | ax[0].legend(fontsize=22, loc='lower center') 128 | ax[0].grid() 129 | 130 | ax[1].set_xlabel("# Rounds", fontsize=22) 131 | ax[1].set_ylabel('Testing Accuracy', fontsize=22) 132 | ax[1].set_title("Group Accuracies", fontsize=22) 133 | ax[1].legend(fontsize=22, loc='lower right') 134 | ax[1].grid() 135 | 136 | # plt.xticks(fontsize=17) 137 | # plt.yticks(fontsize=17) 138 | # ax.tick_params(color='#dddddd') 139 | # ax.spines['bottom'].set_color('#dddddd') 140 | # ax.spines['top'].set_color('#dddddd') 141 | # ax.spines['right'].set_color('#dddddd') 142 | # ax.spines['left'].set_color('#dddddd') 143 | 144 | # fig.showfig.show()() 145 | # fig.savefig(logdir+"/fed"+method+"_acc_"+str(num_groups)+".pdf") 146 | fig.savefig(logdir+"/"+name+".pdf") 147 | 148 | 149 | plt.close(fig) 150 | 151 | def write_time_taken(elapsed, name="non"): 152 | logdir = "logs/"+name 153 | with open(logdir + '/timetaken.csv', mode='w', newline='') as log_file: 154 | writer = csv.writer(log_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) 155 | for line in elapsed: 156 | writer.writerow([line]) 157 | -------------------------------------------------------------------------------- /utils/language_utils.py: -------------------------------------------------------------------------------- 1 | """Utils for language models.""" 2 | 3 | import re 4 | 5 | 6 | # ------------------------ 7 | # utils for shakespeare dataset 8 | 9 | ALL_LETTERS = "\n !\"&'(),-.0123456789:;>?ABCDEFGHIJKLMNOPQRSTUVWXYZ[]abcdefghijklmnopqrstuvwxyz}" 10 | NUM_LETTERS = len(ALL_LETTERS) 11 | 12 | 13 | def _one_hot(index, size): 14 | '''returns one-hot vector with given size and value 1 at given index 15 | ''' 16 | vec = [0 for _ in range(size)] 17 | vec[int(index)] = 1 18 | return vec 19 | 20 | 21 | def letter_to_vec(letter): 22 | '''returns one-hot representation of given letter 23 | ''' 24 | index = ALL_LETTERS.find(letter) 25 | return _one_hot(index, NUM_LETTERS) 26 | 27 | 28 | def word_to_indices(word): 29 | '''returns a list of character indices 30 | 31 | Args: 32 | word: string 33 | 34 | Return: 35 | indices: int list with length len(word) 36 | ''' 37 | indices = [] 38 | for c in word: 39 | indices.append(ALL_LETTERS.find(c)) 40 | return indices 41 | 42 | 43 | # ------------------------ 44 | # utils for sent140 dataset 45 | 46 | 47 | def split_line(line): 48 | '''split given line/phrase into list of words 49 | 50 | Args: 51 | line: string representing phrase to be split 52 | 53 | Return: 54 | list of strings, with each string representing a word 55 | ''' 56 | return re.findall(r"[\w']+|[.,!?;]", line) 57 | 58 | 59 | def _word_to_index(word, indd): 60 | '''returns index of given word based on given lookup dictionary 61 | 62 | returns the length of the lookup dictionary if word not found 63 | 64 | Args: 65 | word: string 66 | indd: dictionary with string words as keys and int indices as values 67 | ''' 68 | if word in indd: 69 | return indd[word] 70 | else: 71 | return len(indd) 72 | 73 | 74 | def line_to_indices(line, indd, max_words=25): 75 | '''converts given phrase into list of word indices 76 | 77 | if the phrase has more than max_words words, returns a list containing 78 | indices of the first max_words words 79 | if the phrase has less than max_words words, repeatedly appends integer 80 | representing unknown index to returned list until the list's length is 81 | max_words 82 | 83 | Args: 84 | line: string representing phrase/sequence of words 85 | indd: dictionary with string words as keys and int indices as values 86 | max_words: maximum number of word indices in returned list 87 | 88 | Return: 89 | indl: list of word indices, one index for each word in phrase 90 | ''' 91 | line_list = split_line(line) # split phrase in words 92 | indl = [] 93 | for word in line_list: 94 | cind = _word_to_index(word, indd) 95 | indl.append(cind) 96 | if (len(indl) == max_words): 97 | break 98 | for i in range(max_words - len(indl)): 99 | indl.append(len(indd)) 100 | return indl 101 | 102 | 103 | def bag_of_words(line, vocab): 104 | '''returns bag of words representation of given phrase using given vocab 105 | 106 | Args: 107 | line: string representing phrase to be parsed 108 | vocab: dictionary with words as keys and indices as values 109 | 110 | Return: 111 | integer list 112 | ''' 113 | bag = [0]*len(vocab) 114 | words = split_line(line) 115 | for w in words: 116 | if w in vocab: 117 | bag[vocab[w]] += 1 118 | return bag 119 | -------------------------------------------------------------------------------- /utils/model_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | import os 4 | import re 5 | import sys 6 | 7 | models_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 8 | models_dir = os.path.join(models_dir, 'models') 9 | sys.path.append(models_dir) 10 | 11 | from client import Client 12 | 13 | def batch_data(data, batch_size): 14 | ''' 15 | data is a dict := {'x': [list], 'y': [list]} 16 | returns x, y, which are both lists of size-batch_size lists 17 | ''' 18 | raw_x = data['x'] 19 | raw_y = data['y'] 20 | batched_x = [] 21 | batched_y = [] 22 | for i in range(0, len(raw_x), batch_size): 23 | batched_x.append(raw_x[i:i+batch_size]) 24 | batched_y.append(raw_y[i:i+batch_size]) 25 | return batched_x, batched_y 26 | 27 | def read_data(train_data_dir, test_data_dir): 28 | '''parses data in given train and test data directories 29 | 30 | assumes: 31 | - the data in the input directories are .json files with 32 | keys 'users' and 'user_data' 33 | - the set of train set users is the same as the set of test set users 34 | 35 | Return: 36 | clients: list of client ids 37 | groups: list of group ids; empty list if none found 38 | train_data: dictionary of train data 39 | test_data: dictionary of test data 40 | ''' 41 | clients = [] 42 | groups = [] 43 | train_data = {} 44 | test_data = {} 45 | 46 | train_files = os.listdir(train_data_dir) 47 | train_files = [f for f in train_files if f.endswith('.json')] 48 | for f in train_files: 49 | file_path = os.path.join(train_data_dir,f) 50 | with open(file_path, 'r') as inf: 51 | cdata = json.load(inf) 52 | clients.extend(cdata['users']) 53 | if 'hierarchies' in cdata: 54 | groups.extend(cdata['hierarchies']) 55 | train_data.update(cdata['user_data']) 56 | 57 | test_files = os.listdir(test_data_dir) 58 | test_files = [f for f in test_files if f.endswith('.json')] 59 | for f in test_files: 60 | file_path = os.path.join(test_data_dir,f) 61 | with open(file_path, 'r') as inf: 62 | cdata = json.load(inf) 63 | test_data.update(cdata['user_data']) 64 | 65 | clients = list(train_data.keys()) 66 | 67 | return clients, groups, train_data, test_data 68 | 69 | def setup_clients(train_data_dir, test_data_dir, model=None): 70 | '''instantiates clients based on given train and test data directories 71 | 72 | Return: 73 | list of Clients 74 | ''' 75 | users, groups, train_data, test_data = read_data(train_data_dir, test_data_dir) 76 | if len(groups) == 0: 77 | groups = [None for _ in users] 78 | all_clients = [Client(u, g, train_data[u], test_data[u], model) for u, g in zip(users, groups)] 79 | return all_clients 80 | 81 | -------------------------------------------------------------------------------- /utils/preprocess.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # script to preprocess data 4 | 5 | # -------------------- 6 | # parse arguments 7 | 8 | NAME="sent140" # name of the dataset, equivalent to directory name 9 | SAMPLE="na" # -s tag, iid or niid 10 | IUSER="" # --iu tag, # of users if iid sampling 11 | SFRAC="" # --sf tag, fraction of data to sample 12 | MINSAMPLES="na" # -k tag, minimum allowable # of samples per user 13 | TRAIN="na" # -t tag, user or sample 14 | TFRAC="" # --tf tag, fraction of data in training set 15 | 16 | while [[ $# -gt 0 ]] 17 | do 18 | key="$1" 19 | 20 | case $key in 21 | --name) 22 | NAME="$2" 23 | shift # past argument 24 | if [ ${SAMPLE:0:1} = "-" ]; then 25 | NAME="sent140" 26 | else 27 | shift # past value 28 | fi 29 | ;; 30 | -s) 31 | SAMPLE="$2" 32 | shift # past argument 33 | if [ ${SAMPLE:0:1} = "-" ]; then 34 | SAMPLE="" 35 | else 36 | shift # past value 37 | fi 38 | ;; 39 | --iu) 40 | IUSER="$2" 41 | shift # past argument 42 | if [ ${IUSER:0:1} = "-" ]; then 43 | IUSER="" 44 | else 45 | shift # past value 46 | fi 47 | ;; 48 | --sf) 49 | SFRAC="$2" 50 | shift # past argument 51 | if [ ${SFRAC:0:1} = "-" ]; then 52 | SFRAC="" 53 | else 54 | shift # past value 55 | fi 56 | ;; 57 | -k) 58 | MINSAMPLES="$2" 59 | shift # past argument 60 | if [ ${MINSAMPLES:0:1} = "-" ]; then 61 | MINSAMPLES="" 62 | else 63 | shift # past value 64 | fi 65 | ;; 66 | -t) 67 | TRAIN="$2" 68 | shift # past argument 69 | if [ -z "$TRAIN" ] || [ ${TRAIN:0:1} = "-" ]; then 70 | TRAIN="" 71 | else 72 | shift # past value 73 | fi 74 | ;; 75 | --tf) 76 | TFRAC="$2" 77 | shift # past argument 78 | if [ ${TFRAC:0:1} = "-" ]; then 79 | TFRAC="" 80 | else 81 | shift # past value 82 | fi 83 | ;; 84 | *) # unknown option 85 | shift # past argument 86 | ;; 87 | esac 88 | done 89 | 90 | # -------------------- 91 | # preprocess data 92 | 93 | CONT_SCRIPT=true 94 | cd ../data/$NAME 95 | 96 | # download data and convert to .json format 97 | 98 | if [ ! -d "data/all_data" ]; then 99 | cd preprocess 100 | ./data_to_json.sh 101 | cd .. 102 | fi 103 | 104 | NAMETAG="--name $NAME" 105 | 106 | # sample data 107 | IUSERTAG="" 108 | if [ ! -z $IUSER ]; then 109 | IUSERTAG="--u $IUSER" 110 | fi 111 | SFRACTAG="" 112 | if [ ! -z $SFRAC ]; then 113 | SFRACTAG="--fraction $SFRAC" 114 | fi 115 | 116 | if [ "$CONT_SCRIPT" = true ] && [ ! $SAMPLE = "na" ]; then 117 | if [ -d "data/sampled_data" ] && [ "$(ls -A data/sampled_data)" ]; then 118 | CONT_SCRIPT=false 119 | else 120 | if [ ! -d "data/sampled_data" ]; then 121 | mkdir data/sampled_data 122 | fi 123 | 124 | cd ../../utils 125 | 126 | if [ $SAMPLE = "iid" ]; then 127 | python3 sample.py $NAMETAG --iid $IUSERTAG $SFRACTAG 128 | else 129 | python3 sample.py $NAMETAG $SFRACTAG 130 | fi 131 | 132 | cd ../data/$NAME 133 | fi 134 | fi 135 | 136 | # remove users with less then given number of samples 137 | if [ "$CONT_SCRIPT" = true ] && [ ! $MINSAMPLES = "na" ]; then 138 | if [ -d "data/rem_user_data" ] && [ "$(ls -A data/rem_user_data)" ]; then 139 | CONT_SCRIPT=false 140 | else 141 | if [ ! -d "data/rem_user_data" ]; then 142 | mkdir data/rem_user_data 143 | fi 144 | 145 | cd ../../utils 146 | 147 | if [ -z $MINSAMPLES ]; then 148 | python3 remove_users.py $NAMETAG 149 | else 150 | python3 remove_users.py $NAMETAG --min_samples $MINSAMPLES 151 | fi 152 | 153 | cd ../data/$NAME 154 | fi 155 | fi 156 | 157 | # create train-test split 158 | TFRACTAG="" 159 | if [ ! -z $TFRAC ]; then 160 | TFRACTAG="--frac $TFRAC" 161 | fi 162 | 163 | if [ "$CONT_SCRIPT" = true ] && [ ! $TRAIN = "na" ]; then 164 | if [ -d "data/train" ] && [ "$(ls -A data/train)" ]; then 165 | CONT_SCRIPT=false 166 | else 167 | if [ ! -d "data/train" ]; then 168 | mkdir data/train 169 | fi 170 | if [ ! -d "data/test" ]; then 171 | mkdir data/test 172 | fi 173 | 174 | cd ../../utils 175 | 176 | if [ -z $TRAIN ]; then 177 | python3 split_data.py $NAMETAG $TFRACTAG 178 | elif [ $TRAIN = "user" ]; then 179 | python3 split_data.py $NAMETAG --by_user $TFRACTAG 180 | elif [ $TRAIN = "sample" ]; then 181 | python3 split_data.py $NAMETAG --by_sample $TFRACTAG 182 | fi 183 | 184 | cd ../data/$NAME 185 | fi 186 | fi 187 | 188 | if [ "$CONT_SCRIPT" = false ]; then 189 | echo "Data for one of the specified preprocessing tasks has already been" 190 | echo "generated. If you would like to re-generate data for this directory," 191 | echo "please delete the existing one. Otherwise, please remove the" 192 | echo "respective tag(s) from the preprocessing command." 193 | fi -------------------------------------------------------------------------------- /utils/remove_users.py: -------------------------------------------------------------------------------- 1 | 2 | ''' 3 | removes users with less than the given number of samples 4 | ''' 5 | 6 | import argparse 7 | import json 8 | import os 9 | 10 | import numpy as np 11 | 12 | parser = argparse.ArgumentParser() 13 | 14 | parser.add_argument('--name', 15 | help='name of dataset to parse; default: sent140;', 16 | type=str, 17 | default='sent140') 18 | 19 | parser.add_argument('--min_samples', 20 | help='users with less than x samples are discarded; default: 10;', 21 | type=int, 22 | default=10) 23 | 24 | args = parser.parse_args() 25 | 26 | 27 | print('------------------------------') 28 | 29 | 30 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 31 | dir = os.path.join(parent_path, 'data', args.name, 'data') 32 | subdir = os.path.join(dir, 'sampled_data') 33 | files = [] 34 | if os.path.exists(subdir): 35 | files = os.listdir(subdir) 36 | if len(files) == 0: 37 | subdir = os.path.join(dir, 'all_data') 38 | files = os.listdir(subdir) 39 | files = [f for f in files if f.endswith('.json')] 40 | 41 | for f in files: 42 | users = [] 43 | hierarchies = [] 44 | num_samples = [] 45 | user_data = {} 46 | 47 | min_number = 0 48 | max_number = 0 49 | 50 | file_dir = os.path.join(subdir, f) 51 | with open(file_dir, 'r') as inf: 52 | data = json.load(inf) 53 | 54 | num_users = len(data['users']) 55 | for i in range(num_users): 56 | curr_user = data['users'][i] 57 | curr_hierarchy = None 58 | if 'hierarchies' in data: 59 | curr_hierarchy = data['hierarchies'][i] 60 | curr_num_samples = data['num_samples'][i] 61 | 62 | if curr_num_samples > args.min_samples: 63 | user_data[curr_user] = data['user_data'][curr_user] 64 | users.append(curr_user) 65 | max_number += 1 66 | if curr_hierarchy is not None: 67 | hierarchies.append(curr_hierarchy) 68 | num_samples.append(data['num_samples'][i]) 69 | 70 | 71 | all_data = {} 72 | all_data['users'] = users 73 | if len(hierarchies) == len(users): 74 | all_data['hierarchies'] = hierarchies 75 | all_data['num_samples'] = num_samples 76 | all_data['user_data'] = user_data 77 | 78 | file_name = '%s_keep_%d.json' % ((f[:-5]), args.min_samples) 79 | ouf_dir = os.path.join(dir, 'rem_user_data', file_name) 80 | 81 | print('writing %s' % file_name) 82 | with open(ouf_dir, 'w') as outfile: 83 | json.dump(all_data, outfile) 84 | 85 | -------------------------------------------------------------------------------- /utils/sample.py: -------------------------------------------------------------------------------- 1 | ''' 2 | samples from all raw data; 3 | by default samples in a non-iid manner; namely, randomly selects users from 4 | raw data until their cumulative amount of data exceeds the given number of 5 | datapoints to sample (specified by --fraction argument); 6 | ordering of original data points is not preserved in sampled data 7 | ''' 8 | 9 | import argparse 10 | import json 11 | import os 12 | import random 13 | 14 | from utils import iid_divide 15 | 16 | parser = argparse.ArgumentParser() 17 | 18 | parser.add_argument('--name', 19 | help='name of dataset to parse; default: sent140;', 20 | type=str, 21 | default='sent140') 22 | parser.add_argument('--iid', 23 | help='sample iid;', 24 | action="store_true") 25 | parser.add_argument('--niid', 26 | help="sample niid;", 27 | dest='iid', action='store_false') 28 | parser.add_argument('--fraction', 29 | help='fraction of all data to sample; default: 0.1;', 30 | type=float, 31 | default=0.1) 32 | parser.add_argument('--u', 33 | help=('number of users in iid data set; ignored in niid case;' 34 | 'represented as fraction of original total number of users; ' 35 | 'default: 0.01;'), 36 | type=float, 37 | default=0.01) 38 | parser.set_defaults(iid=False) 39 | 40 | args = parser.parse_args() 41 | 42 | print('------------------------------') 43 | print('sampling data') 44 | 45 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 46 | data_dir = os.path.join(parent_path, 'data', args.name, 'data') 47 | subdir = os.path.join(data_dir, 'all_data') 48 | files = os.listdir(subdir) 49 | files = [f for f in files if f.endswith('.json')] 50 | 51 | new_user_count = 0 # for iid case 52 | for f in files: 53 | file_dir = os.path.join(subdir, f) 54 | with open(file_dir, 'r') as inf: 55 | data = json.load(inf) 56 | 57 | num_users = len(data['users']) 58 | 59 | tot_num_samples = sum(data['num_samples']) 60 | print('Fraction: ', args.fraction) 61 | num_new_samples = int(args.fraction * tot_num_samples) 62 | 63 | hierarchies = None 64 | 65 | if(args.iid): 66 | raw_list = list(data['user_data'].values()) 67 | raw_x = [elem['x'] for elem in raw_list] 68 | raw_y = [elem['y'] for elem in raw_list] 69 | x_list = [item for sublist in raw_x for item in sublist] # flatten raw_x 70 | y_list = [item for sublist in raw_y for item in sublist] # flatten raw_y 71 | 72 | num_new_users = int(round(args.u * num_users)) 73 | if num_new_users == 0: 74 | num_new_users += 1 75 | 76 | indices = [i for i in range(tot_num_samples)] 77 | new_indices = random.sample(indices, num_new_samples) 78 | # TODO: seed this random 79 | 80 | users = [str(i+new_user_count) for i in range(num_new_users)] 81 | 82 | user_data = {} 83 | for user in users: 84 | user_data[user] = {'x': [], 'y': []} 85 | all_x_samples = [x_list[i] for i in new_indices] 86 | all_y_samples = [y_list[i] for i in new_indices] 87 | x_groups = iid_divide(all_x_samples, num_new_users) 88 | y_groups = iid_divide(all_y_samples, num_new_users) 89 | for i in range(num_new_users): 90 | user_data[users[i]]['x'] = x_groups[i] 91 | user_data[users[i]]['y'] = y_groups[i] 92 | 93 | num_samples = [len(user_data[u]['y']) for u in users] 94 | 95 | new_user_count += num_new_users 96 | 97 | else: 98 | 99 | ctot_num_samples = 0 100 | 101 | users = data['users'] 102 | users_and_hiers = None 103 | if 'hierarchies' in data: 104 | users_and_hiers = list(zip(users, data['hierarchies'])) 105 | random.shuffle(users_and_hiers) 106 | else: 107 | random.shuffle(users) 108 | user_i = 0 109 | num_samples = [] 110 | user_data = {} 111 | 112 | if 'hierarchies' in data: 113 | hierarchies = [] 114 | 115 | while(ctot_num_samples < num_new_samples): 116 | hierarchy = None 117 | if users_and_hiers is not None: 118 | user, hier = users_and_hiers[user_i] 119 | else: 120 | user = users[user_i] 121 | 122 | cdata = data['user_data'][user] 123 | 124 | cnum_samples = len(data['user_data'][user]['y']) 125 | 126 | if (ctot_num_samples + cnum_samples > num_new_samples): 127 | cnum_samples = num_new_samples - ctot_num_samples 128 | indices = [i for i in range(cnum_samples)] 129 | new_indices = random.sample(indices, cnum_samples) 130 | x = [] 131 | y = [] 132 | for i in new_indices: 133 | x.append(data['user_data'][user]['x'][i]) 134 | y.append(data['user_data'][user]['y'][i]) 135 | cdata = {'x': x, 'y': y} 136 | 137 | if 'hierarchies' in data: 138 | hierarchies.append(hier) 139 | 140 | num_samples.append(cnum_samples) 141 | user_data[user] = cdata 142 | 143 | ctot_num_samples += cnum_samples 144 | user_i += 1 145 | 146 | if 'hierarchies' in data: 147 | users = [u for u, h in users_and_hiers][:user_i] 148 | else: 149 | users = users[:user_i] 150 | 151 | # ------------ 152 | # create .json file 153 | 154 | all_data = {} 155 | all_data['users'] = users 156 | if hierarchies is not None: 157 | all_data['hierarchies'] = hierarchies 158 | all_data['num_samples'] = num_samples 159 | all_data['user_data'] = user_data 160 | 161 | slabel = '' 162 | if(args.iid): 163 | slabel = 'iid' 164 | else: 165 | slabel = 'niid' 166 | 167 | arg_frac = str(args.fraction) 168 | arg_frac = arg_frac[2:] 169 | arg_nu = str(args.u) 170 | arg_nu = arg_nu[2:] 171 | arg_label = arg_frac 172 | if(args.iid): 173 | arg_label = '%s_%s' % (arg_nu, arg_label) 174 | file_name = '%s_%s_%s.json' % ((f[:-5]), slabel, arg_label) 175 | ouf_dir = os.path.join(data_dir, 'sampled_data', file_name) 176 | 177 | print('writing %s' % file_name) 178 | with open(ouf_dir, 'w') as outfile: 179 | json.dump(all_data, outfile) 180 | -------------------------------------------------------------------------------- /utils/stats.py: -------------------------------------------------------------------------------- 1 | ''' 2 | assumes that the user has already generated .json file(s) containing data 3 | ''' 4 | 5 | import argparse 6 | import json 7 | import matplotlib.pyplot as plt 8 | import math 9 | import numpy as np 10 | import os 11 | 12 | from scipy import io 13 | from scipy import stats 14 | 15 | parser = argparse.ArgumentParser() 16 | 17 | parser.add_argument('--name', 18 | help='name of dataset to parse; default: sent140;', 19 | type=str, 20 | default='sent140') 21 | 22 | args = parser.parse_args() 23 | 24 | 25 | def load_data(name): 26 | 27 | users = [] 28 | num_samples = [] 29 | 30 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 31 | data_dir = os.path.join(parent_path, 'data', name, 'data') 32 | subdir = os.path.join(data_dir, 'all_data') 33 | 34 | files = os.listdir(subdir) 35 | files = [f for f in files if f.endswith('.json')] 36 | 37 | for f in files: 38 | file_dir = os.path.join(subdir, f) 39 | 40 | with open(file_dir) as inf: 41 | data = json.load(inf) 42 | 43 | users.extend(data['users']) 44 | num_samples.extend(data['num_samples']) 45 | 46 | return users, num_samples 47 | 48 | def print_dataset_stats(name): 49 | users, num_samples = load_data(name) 50 | num_users = len(users) 51 | 52 | print('####################################') 53 | print('DATASET: %s' % name) 54 | print('%d users' % num_users) 55 | print('%d samples (total)' % np.sum(num_samples)) 56 | print('%.2f samples per user (mean)' % np.mean(num_samples)) 57 | print('num_samples (std): %.2f' % np.std(num_samples)) 58 | print('num_samples (std/mean): %.2f' % (np.std(num_samples)/np.mean(num_samples))) 59 | print('num_samples (skewness): %.2f' % stats.skew(num_samples)) 60 | 61 | bins = [0,20,40,60,80,100,120,140,160,180,200] 62 | if args.name == 'shakespeare': 63 | bins = [0,2000,4000,6000,8000,10000,12000,14000,16000,18000,20000] 64 | if args.name == 'nist': 65 | bins = [0,20,40,60,80,100,120,140,160,180,200,220,240,260,280,300,320,340,360,380,400,420,440,460,480,500] 66 | 67 | hist, edges = np.histogram(num_samples,bins=bins) 68 | print("\nnum_sam\tnum_users") 69 | for e, h in zip(edges, hist): 70 | print(e, "\t", h) 71 | 72 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 73 | data_dir = os.path.join(parent_path, 'data', name, 'data') 74 | 75 | plt.hist(num_samples, bins = bins) 76 | fig_name = "%s_hist_nolabel.png" % name 77 | fig_dir = os.path.join(data_dir, fig_name) 78 | plt.savefig(fig_dir) 79 | plt.title(name) 80 | plt.xlabel("number of samples") 81 | plt.ylabel("number of users") 82 | fig_name = "%s_hist.png" % name 83 | fig_dir = os.path.join(data_dir, fig_name) 84 | plt.savefig(fig_dir) 85 | 86 | print_dataset_stats(args.name) -------------------------------------------------------------------------------- /utils/tf_utils.py: -------------------------------------------------------------------------------- 1 | 2 | import tensorflow as tf 3 | 4 | def __num_elems(shape): 5 | '''Returns the number of elements in the given shape 6 | 7 | Args: 8 | shape: TensorShape 9 | 10 | Return: 11 | tot_elems: int 12 | ''' 13 | tot_elems = 1 14 | for s in shape: 15 | tot_elems *= int(s) 16 | return tot_elems 17 | 18 | def graph_size(graph): 19 | '''Returns the size of the given graph in bytes 20 | 21 | The size of the graph is calculated by summing up the sizes of each 22 | trainable variable. The sizes of variables are calculated by multiplying 23 | the number of bytes in their dtype with their number of elements, captured 24 | in their shape attribute 25 | 26 | Args: 27 | graph: TF graph 28 | Return: 29 | integer representing size of graph (in bytes) 30 | ''' 31 | tot_size = 0 32 | with graph.as_default(): 33 | vs = tf.trainable_variables() 34 | for v in vs: 35 | tot_elems = __num_elems(v.shape) 36 | dtype_size = int(v.dtype.size) 37 | var_size = tot_elems * dtype_size 38 | tot_size += var_size 39 | return tot_size 40 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | def save_obj(obj, name): 4 | with open(name + '.pkl', 'wb') as f: 5 | pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) 6 | 7 | def load_obj(name): 8 | with open(name + '.pkl', 'rb') as f: 9 | return pickle.load(f) 10 | 11 | def iid_divide(l, g): 12 | ''' 13 | divide list l among g groups 14 | each group has either int(len(l)/g) or int(len(l)/g)+1 elements 15 | returns a list of groups 16 | ''' 17 | num_elems = len(l) 18 | group_size = int(len(l)/g) 19 | num_big_groups = num_elems - g * group_size 20 | num_small_groups = g - num_big_groups 21 | glist = [] 22 | for i in range(num_small_groups): 23 | glist.append(l[group_size*i:group_size*(i+1)]) 24 | bi = group_size*num_small_groups 25 | group_size += 1 26 | for i in range(num_big_groups): 27 | glist.append(l[bi+group_size*i:bi+group_size*(i+1)]) 28 | return glist --------------------------------------------------------------------------------